vortex_sparse/
canonical.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::sync::Arc;
5
6use itertools::Itertools;
7use num_traits::NumCast;
8use vortex_array::Array;
9use vortex_array::Canonical;
10use vortex_array::ToCanonical;
11use vortex_array::arrays::BoolArray;
12use vortex_array::arrays::ConstantArray;
13use vortex_array::arrays::FixedSizeListArray;
14use vortex_array::arrays::ListViewArray;
15use vortex_array::arrays::NullArray;
16use vortex_array::arrays::PrimitiveArray;
17use vortex_array::arrays::StructArray;
18use vortex_array::arrays::VarBinViewArray;
19use vortex_array::builders::ArrayBuilder;
20use vortex_array::builders::DecimalBuilder;
21use vortex_array::builders::ListViewBuilder;
22use vortex_array::builders::builder_with_capacity;
23use vortex_array::patches::Patches;
24use vortex_array::validity::Validity;
25use vortex_array::vtable::CanonicalVTable;
26use vortex_array::vtable::ValidityHelper;
27use vortex_buffer::BitBuffer;
28use vortex_buffer::Buffer;
29use vortex_buffer::BufferString;
30use vortex_buffer::ByteBuffer;
31use vortex_buffer::buffer;
32use vortex_buffer::buffer_mut;
33use vortex_dtype::DType;
34use vortex_dtype::DecimalDType;
35use vortex_dtype::DecimalType;
36use vortex_dtype::IntegerPType;
37use vortex_dtype::NativeDecimalType;
38use vortex_dtype::NativePType;
39use vortex_dtype::Nullability;
40use vortex_dtype::StructFields;
41use vortex_dtype::match_each_decimal_value_type;
42use vortex_dtype::match_each_integer_ptype;
43use vortex_dtype::match_each_native_ptype;
44use vortex_dtype::match_smallest_offset_type;
45use vortex_error::VortexError;
46use vortex_error::VortexExpect;
47use vortex_error::vortex_panic;
48use vortex_scalar::DecimalScalar;
49use vortex_scalar::ListScalar;
50use vortex_scalar::Scalar;
51use vortex_scalar::StructScalar;
52use vortex_vector::binaryview::BinaryView;
53
54use crate::SparseArray;
55use crate::SparseVTable;
56
57impl CanonicalVTable<SparseVTable> for SparseVTable {
58    fn canonicalize(array: &SparseArray) -> Canonical {
59        if array.patches().num_patches() == 0 {
60            return ConstantArray::new(array.fill_scalar().clone(), array.len()).to_canonical();
61        }
62
63        match array.dtype() {
64            DType::Null => {
65                assert!(array.fill_scalar().is_null());
66                Canonical::Null(NullArray::new(array.len()))
67            }
68            DType::Bool(..) => {
69                let resolved_patches = array.resolved_patches();
70                canonicalize_sparse_bools(&resolved_patches, array.fill_scalar())
71            }
72            DType::Primitive(ptype, ..) => {
73                let resolved_patches = array.resolved_patches();
74                match_each_native_ptype!(ptype, |P| {
75                    canonicalize_sparse_primitives::<P>(&resolved_patches, array.fill_scalar())
76                })
77            }
78            DType::Struct(struct_fields, ..) => canonicalize_sparse_struct(
79                struct_fields,
80                array.fill_scalar().as_struct(),
81                array.dtype(),
82                array.patches(),
83                array.len(),
84            ),
85            DType::Decimal(decimal_dtype, nullability) => {
86                let canonical_decimal_value_type =
87                    DecimalType::smallest_decimal_value_type(decimal_dtype);
88                let fill_value = array.fill_scalar().as_decimal();
89                match_each_decimal_value_type!(canonical_decimal_value_type, |D| {
90                    canonicalize_sparse_decimal::<D>(
91                        *decimal_dtype,
92                        *nullability,
93                        fill_value,
94                        array.patches(),
95                        array.len(),
96                    )
97                })
98            }
99            dtype @ DType::Utf8(..) => {
100                let fill_value = array.fill_scalar().as_utf8().value();
101                let fill_value = fill_value.map(BufferString::into_inner);
102                canonicalize_varbin(array, dtype.clone(), fill_value)
103            }
104            dtype @ DType::Binary(..) => {
105                let fill_value = array.fill_scalar().as_binary().value();
106                canonicalize_varbin(array, dtype.clone(), fill_value)
107            }
108            DType::List(values_dtype, nullability) => {
109                canonicalize_sparse_lists(array, values_dtype.clone(), *nullability)
110            }
111            DType::FixedSizeList(.., nullability) => {
112                canonicalize_sparse_fixed_size_list(array, *nullability)
113            }
114            DType::Extension(_ext_dtype) => todo!(),
115        }
116    }
117}
118
119#[expect(
120    clippy::cognitive_complexity,
121    reason = "complexity is from nested match_smallest_offset_type macro"
122)]
123fn canonicalize_sparse_lists(
124    array: &SparseArray,
125    values_dtype: Arc<DType>,
126    nullability: Nullability,
127) -> Canonical {
128    let resolved_patches = array.resolved_patches();
129
130    let indices = resolved_patches.indices().to_primitive();
131    let values = resolved_patches.values().to_listview();
132    let fill_value = array.fill_scalar().as_list();
133
134    let n_filled = array.len() - resolved_patches.num_patches();
135    let total_canonical_values = values.elements().len() + fill_value.len() * n_filled;
136
137    let validity = Validity::from_mask(array.validity_mask(), nullability);
138
139    match_each_integer_ptype!(indices.ptype(), |I| {
140        match_smallest_offset_type!(total_canonical_values, |O| {
141            canonicalize_sparse_lists_inner::<I, O>(
142                indices.as_slice(),
143                values,
144                fill_value,
145                values_dtype,
146                array.len(),
147                total_canonical_values,
148                validity,
149            )
150        })
151    })
152}
153
154fn canonicalize_sparse_lists_inner<I: IntegerPType, O: IntegerPType>(
155    patch_indices: &[I],
156    patch_values: ListViewArray,
157    fill_value: ListScalar,
158    values_dtype: Arc<DType>,
159    len: usize,
160    total_canonical_values: usize,
161    validity: Validity,
162) -> Canonical {
163    // Create the builder with appropriate types. It is easy to just use the same type for both
164    // `offsets` and `sizes` since we have no other constraints.
165    let mut builder = ListViewBuilder::<O, O>::with_capacity(
166        values_dtype,
167        validity.nullability(),
168        total_canonical_values,
169        len,
170    );
171
172    let mut patch_idx = 0;
173
174    // Loop over the patch indices and set them to the corresponding scalar values. For positions
175    // that are not patched, use the fill value.
176    for position in 0..len {
177        let position_is_patched = patch_idx < patch_indices.len()
178            && patch_indices[patch_idx]
179                .to_usize()
180                .vortex_expect("patch index must fit in usize")
181                == position;
182
183        if position_is_patched {
184            // Set with the patch value.
185            builder
186                .append_value(patch_values.scalar_at(patch_idx).as_list())
187                .vortex_expect("Failed to append sparse value");
188            patch_idx += 1;
189        } else {
190            // Set with the fill value.
191            builder
192                .append_value(fill_value.clone())
193                .vortex_expect("Failed to append fill value");
194        }
195    }
196
197    builder.finish_into_canonical()
198}
199
200/// Canonicalize a sparse [`FixedSizeListArray`] by expanding it into a dense representation.
201fn canonicalize_sparse_fixed_size_list(array: &SparseArray, nullability: Nullability) -> Canonical {
202    let resolved_patches = array.resolved_patches();
203    let indices = resolved_patches.indices().to_primitive();
204    let values = resolved_patches.values().to_fixed_size_list();
205    let fill_value = array.fill_scalar().as_list();
206
207    let validity = Validity::from_mask(array.validity_mask(), nullability);
208
209    match_each_integer_ptype!(indices.ptype(), |I| {
210        canonicalize_sparse_fixed_size_list_inner::<I>(
211            indices.as_slice(),
212            values,
213            fill_value,
214            array.len(),
215            validity,
216        )
217    })
218}
219
220/// Build a canonical [`FixedSizeListArray`] from sparse patches by interleaving patch values with
221/// fill values.
222///
223/// This algorithm walks through the sparse indices sequentially, filling gaps with the fill value's
224/// elements (or defaults if null). Since all lists have the same size, we can directly append
225/// elements without tracking offsets.
226fn canonicalize_sparse_fixed_size_list_inner<I: IntegerPType>(
227    indices: &[I],
228    values: FixedSizeListArray,
229    fill_value: ListScalar,
230    array_len: usize,
231    validity: Validity,
232) -> Canonical {
233    let list_size = values.list_size();
234    let element_dtype = values.elements().dtype();
235    let total_elements = array_len * list_size as usize;
236    let mut builder = builder_with_capacity(element_dtype, total_elements);
237    let fill_elements = fill_value.elements();
238
239    let mut next_index = 0;
240    let indices = indices
241        .iter()
242        .map(|x| (*x).to_usize().vortex_expect("index must fit in usize"));
243
244    for (patch_idx, sparse_idx) in indices.enumerate() {
245        // Fill gap before this patch with fill values.
246        append_n_lists(
247            &mut *builder,
248            fill_elements.as_deref(),
249            list_size,
250            sparse_idx - next_index,
251        );
252
253        // Append the patch value, handling null patches by appending defaults.
254        if values.validity().is_valid(patch_idx) {
255            let patch_list = values.fixed_size_list_elements_at(patch_idx);
256            for i in 0..list_size as usize {
257                builder
258                    .append_scalar(&patch_list.scalar_at(i))
259                    .vortex_expect("element dtype must match");
260            }
261        } else {
262            builder.append_defaults(list_size as usize);
263        }
264
265        next_index = sparse_idx + 1;
266    }
267
268    // Fill remaining positions after last patch.
269    append_n_lists(
270        &mut *builder,
271        fill_elements.as_deref(),
272        list_size,
273        array_len - next_index,
274    );
275
276    let elements = builder.finish();
277
278    // SAFETY: elements.len() == array_len * list_size, validity length matches array_len.
279    Canonical::FixedSizeList(unsafe {
280        FixedSizeListArray::new_unchecked(elements, list_size, validity, array_len)
281    })
282}
283
284/// Append `count` copies of a fixed-size list to the builder.
285///
286/// If `fill_elements` is `Some`, appends those elements `count` times.
287/// If `fill_elements` is `None` (null fill), appends `list_size` default elements `count` times.
288fn append_n_lists(
289    builder: &mut dyn ArrayBuilder,
290    fill_elements: Option<&[Scalar]>,
291    list_size: u32,
292    count: usize,
293) {
294    for _ in 0..count {
295        if let Some(fill_elems) = fill_elements {
296            for elem in fill_elems {
297                builder
298                    .append_scalar(elem)
299                    .vortex_expect("element dtype must match");
300            }
301        } else {
302            builder.append_defaults(list_size as usize);
303        }
304    }
305}
306
307fn canonicalize_sparse_bools(patches: &Patches, fill_value: &Scalar) -> Canonical {
308    let (fill_bool, validity) = if fill_value.is_null() {
309        (false, Validity::AllInvalid)
310    } else {
311        (
312            fill_value
313                .try_into()
314                .vortex_expect("Fill value must convert to bool"),
315            if patches.dtype().nullability() == Nullability::NonNullable {
316                Validity::NonNullable
317            } else {
318                Validity::AllValid
319            },
320        )
321    };
322
323    let bools =
324        BoolArray::from_bit_buffer(BitBuffer::full(fill_bool, patches.array_len()), validity);
325
326    Canonical::Bool(bools.patch(patches))
327}
328
329fn canonicalize_sparse_primitives<
330    T: NativePType + for<'a> TryFrom<&'a Scalar, Error = VortexError>,
331>(
332    patches: &Patches,
333    fill_value: &Scalar,
334) -> Canonical {
335    let (primitive_fill, validity) = if fill_value.is_null() {
336        (T::default(), Validity::AllInvalid)
337    } else {
338        (
339            fill_value
340                .try_into()
341                .vortex_expect("Fill value must convert to target T"),
342            if patches.dtype().nullability() == Nullability::NonNullable {
343                Validity::NonNullable
344            } else {
345                Validity::AllValid
346            },
347        )
348    };
349
350    let parray = PrimitiveArray::new(buffer![primitive_fill; patches.array_len()], validity);
351
352    Canonical::Primitive(parray.patch(patches))
353}
354
355fn canonicalize_sparse_struct(
356    struct_fields: &StructFields,
357    fill_struct: StructScalar,
358    dtype: &DType,
359    // Resolution is unnecessary b/c we're just pushing the patches into the fields.
360    unresolved_patches: &Patches,
361    len: usize,
362) -> Canonical {
363    let (fill_values, top_level_fill_validity) = match fill_struct.fields() {
364        Some(fill_values) => (fill_values.collect::<Vec<_>>(), Validity::AllValid),
365        None => (
366            struct_fields
367                .fields()
368                .map(Scalar::default_value)
369                .collect::<Vec<_>>(),
370            Validity::AllInvalid,
371        ),
372    };
373    let patch_values_as_struct = unresolved_patches.values().to_struct();
374    let columns_patch_values = patch_values_as_struct.fields();
375    let names = patch_values_as_struct.names();
376    let validity = if dtype.is_nullable() {
377        top_level_fill_validity.patch(
378            len,
379            unresolved_patches.offset(),
380            unresolved_patches.indices(),
381            &Validity::from_mask(
382                unresolved_patches.values().validity_mask(),
383                Nullability::Nullable,
384            ),
385        )
386    } else {
387        top_level_fill_validity
388            .into_non_nullable(len)
389            .unwrap_or_else(|| vortex_panic!("fill validity should match sparse array nullability"))
390    };
391
392    StructArray::try_from_iter_with_validity(
393        names.iter().zip_eq(
394            columns_patch_values
395                .iter()
396                .cloned()
397                .zip_eq(fill_values)
398                .map(|(patch_values, fill_value)| unsafe {
399                    SparseArray::new_unchecked(
400                        unresolved_patches
401                            .clone()
402                            .map_values(|_| Ok(patch_values))
403                            .vortex_expect("Replacing patch values"),
404                        fill_value,
405                    )
406                }),
407        ),
408        validity,
409    )
410    .map(Canonical::Struct)
411    .vortex_expect("Creating struct array")
412}
413
414fn canonicalize_sparse_decimal<D: NativeDecimalType>(
415    decimal_dtype: DecimalDType,
416    nullability: Nullability,
417    fill_value: DecimalScalar,
418    patches: &Patches,
419    len: usize,
420) -> Canonical {
421    let mut builder = DecimalBuilder::with_capacity::<D>(len, decimal_dtype, nullability);
422    match fill_value.decimal_value() {
423        Some(fill_value) => {
424            let fill_value = fill_value
425                .cast::<D>()
426                .vortex_expect("unexpected value type");
427            for _ in 0..len {
428                builder.append_value(fill_value)
429            }
430        }
431        None => {
432            builder.append_nulls(len);
433        }
434    }
435    let filled_array = builder.finish_into_decimal();
436    let array = filled_array.patch(patches);
437    Canonical::Decimal(array)
438}
439
440fn canonicalize_varbin(
441    array: &SparseArray,
442    dtype: DType,
443    fill_value: Option<ByteBuffer>,
444) -> Canonical {
445    let patches = array.resolved_patches();
446    let indices = patches.indices().to_primitive();
447    let values = patches.values().to_varbinview();
448    let validity = Validity::from_mask(array.validity_mask(), dtype.nullability());
449    let len = array.len();
450
451    match_each_integer_ptype!(indices.ptype(), |I| {
452        let indices = indices.buffer::<I>();
453        canonicalize_varbin_inner::<I>(fill_value, indices, values, dtype, validity, len)
454    })
455}
456
457fn canonicalize_varbin_inner<I: IntegerPType>(
458    fill_value: Option<ByteBuffer>,
459    indices: Buffer<I>,
460    values: VarBinViewArray,
461    dtype: DType,
462    validity: Validity,
463    len: usize,
464) -> Canonical {
465    assert_eq!(dtype.nullability(), validity.nullability());
466
467    let n_patch_buffers = values.buffers().len();
468    let mut buffers = values.buffers().to_vec();
469
470    let fill = if let Some(buffer) = &fill_value {
471        buffers.push(buffer.clone());
472        BinaryView::make_view(
473            buffer.as_ref(),
474            u32::try_from(n_patch_buffers).vortex_expect("too many buffers"),
475            0,
476        )
477    } else {
478        // any <=12 character value will do
479        BinaryView::make_view(&[], 0, 0)
480    };
481
482    let mut views = buffer_mut![fill; len];
483    for (patch_index, &patch) in indices.into_iter().zip_eq(values.views().iter()) {
484        let patch_index_usize = <usize as NumCast>::from(patch_index)
485            .vortex_expect("var bin view indices must fit in usize");
486        views[patch_index_usize] = patch;
487    }
488
489    // SAFETY: views are constructed to maintain the invariants
490    let array = unsafe {
491        VarBinViewArray::new_unchecked(views.freeze(), Arc::from(buffers), dtype, validity)
492    };
493
494    Canonical::VarBinView(array)
495}
496
497#[cfg(test)]
498mod test {
499    use std::sync::Arc;
500
501    use rstest::rstest;
502    use vortex_array::IntoArray;
503    use vortex_array::ToCanonical;
504    use vortex_array::arrays::BoolArray;
505    use vortex_array::arrays::DecimalArray;
506    use vortex_array::arrays::FixedSizeListArray;
507    use vortex_array::arrays::ListArray;
508    use vortex_array::arrays::ListViewArray;
509    use vortex_array::arrays::PrimitiveArray;
510    use vortex_array::arrays::StructArray;
511    use vortex_array::arrays::VarBinArray;
512    use vortex_array::arrays::VarBinViewArray;
513    use vortex_array::arrow::IntoArrowArray as _;
514    use vortex_array::assert_arrays_eq;
515    use vortex_array::validity::Validity;
516    use vortex_buffer::ByteBuffer;
517    use vortex_buffer::buffer;
518    use vortex_buffer::buffer_mut;
519    use vortex_dtype::DType;
520    use vortex_dtype::DecimalDType;
521    use vortex_dtype::FieldNames;
522    use vortex_dtype::Nullability::NonNullable;
523    use vortex_dtype::Nullability::Nullable;
524    use vortex_dtype::PType;
525    use vortex_dtype::StructFields;
526    use vortex_mask::Mask;
527    use vortex_scalar::DecimalValue;
528    use vortex_scalar::Scalar;
529
530    use crate::SparseArray;
531
532    #[rstest]
533    #[case(Some(true))]
534    #[case(Some(false))]
535    #[case(None)]
536    fn test_sparse_bool(#[case] fill_value: Option<bool>) {
537        let indices = buffer![0u64, 1, 7].into_array();
538        let values = BoolArray::from_iter([Some(true), None, Some(false)]).into_array();
539        let sparse_bools =
540            SparseArray::try_new(indices, values, 10, Scalar::from(fill_value)).unwrap();
541        let actual = sparse_bools.to_bool();
542
543        let expected = BoolArray::from_iter([
544            Some(true),
545            None,
546            fill_value,
547            fill_value,
548            fill_value,
549            fill_value,
550            fill_value,
551            Some(false),
552            fill_value,
553            fill_value,
554        ]);
555
556        assert_arrays_eq!(actual, expected);
557    }
558
559    #[rstest]
560    #[case(Some(0i32))]
561    #[case(Some(-1i32))]
562    #[case(None)]
563    fn test_sparse_primitive(#[case] fill_value: Option<i32>) {
564        let indices = buffer![0u64, 1, 7].into_array();
565        let values = PrimitiveArray::from_option_iter([Some(0i32), None, Some(1)]).into_array();
566        let sparse_ints =
567            SparseArray::try_new(indices, values, 10, Scalar::from(fill_value)).unwrap();
568        assert_eq!(*sparse_ints.dtype(), DType::Primitive(PType::I32, Nullable));
569
570        let flat_ints = sparse_ints.to_primitive();
571        let expected = PrimitiveArray::from_option_iter([
572            Some(0i32),
573            None,
574            fill_value,
575            fill_value,
576            fill_value,
577            fill_value,
578            fill_value,
579            Some(1),
580            fill_value,
581            fill_value,
582        ]);
583
584        assert_arrays_eq!(&flat_ints, &expected);
585    }
586
587    #[test]
588    fn test_sparse_struct_valid_fill() {
589        let field_names = FieldNames::from_iter(["a", "b"]);
590        let field_types = vec![
591            DType::Primitive(PType::I32, Nullable),
592            DType::Primitive(PType::I32, Nullable),
593        ];
594        let struct_fields = StructFields::new(field_names, field_types);
595        let struct_dtype = DType::Struct(struct_fields.clone(), Nullable);
596
597        let indices = buffer![0u64, 1, 7, 8].into_array();
598        let patch_values_a =
599            PrimitiveArray::from_option_iter([Some(10i32), None, Some(20), Some(30)]).into_array();
600        let patch_values_b =
601            PrimitiveArray::from_option_iter([Some(1i32), Some(2), None, Some(3)]).into_array();
602        let patch_values = StructArray::try_new_with_dtype(
603            vec![patch_values_a, patch_values_b],
604            struct_fields.clone(),
605            4,
606            Validity::Array(
607                BoolArray::from_indices(4, vec![0, 1, 2], Validity::NonNullable).to_array(),
608            ),
609        )
610        .unwrap()
611        .into_array();
612
613        let fill_scalar = Scalar::struct_(
614            struct_dtype,
615            vec![Scalar::from(Some(-10i32)), Scalar::from(Some(-1i32))],
616        );
617        let len = 10;
618        let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
619
620        let expected_a = PrimitiveArray::from_option_iter((0..len).map(|i| {
621            if i == 0 {
622                Some(10)
623            } else if i == 1 {
624                None
625            } else if i == 7 {
626                Some(20)
627            } else {
628                Some(-10)
629            }
630        }));
631        let expected_b = PrimitiveArray::from_option_iter((0..len).map(|i| {
632            if i == 0 {
633                Some(1i32)
634            } else if i == 1 {
635                Some(2)
636            } else if i == 7 {
637                None
638            } else {
639                Some(-1)
640            }
641        }));
642
643        let expected = StructArray::try_new_with_dtype(
644            vec![expected_a.into_array(), expected_b.into_array()],
645            struct_fields,
646            len,
647            // NB: patch indices: [0, 1, 7, 8]; patch validity: [Valid, Valid, Valid, Invalid]; ergo 8 is Invalid.
648            Validity::from_mask(Mask::from_excluded_indices(10, vec![8]), Nullable),
649        )
650        .unwrap()
651        .to_array();
652
653        let actual = sparse_struct.to_struct();
654        assert_arrays_eq!(actual, expected);
655    }
656
657    #[test]
658    fn test_sparse_struct_invalid_fill() {
659        let field_names = FieldNames::from_iter(["a", "b"]);
660        let field_types = vec![
661            DType::Primitive(PType::I32, Nullable),
662            DType::Primitive(PType::I32, Nullable),
663        ];
664        let struct_fields = StructFields::new(field_names, field_types);
665        let struct_dtype = DType::Struct(struct_fields.clone(), Nullable);
666
667        let indices = buffer![0u64, 1, 7, 8].into_array();
668        let patch_values_a =
669            PrimitiveArray::from_option_iter([Some(10i32), None, Some(20), Some(30)]).into_array();
670        let patch_values_b =
671            PrimitiveArray::from_option_iter([Some(1i32), Some(2), None, Some(3)]).into_array();
672        let patch_values = StructArray::try_new_with_dtype(
673            vec![patch_values_a, patch_values_b],
674            struct_fields.clone(),
675            4,
676            Validity::Array(
677                BoolArray::from_indices(4, vec![0, 1, 2], Validity::NonNullable).to_array(),
678            ),
679        )
680        .unwrap()
681        .into_array();
682
683        let fill_scalar = Scalar::null(struct_dtype);
684        let len = 10;
685        let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
686
687        let expected_a = PrimitiveArray::from_option_iter((0..len).map(|i| {
688            if i == 0 {
689                Some(10)
690            } else if i == 1 {
691                None
692            } else if i == 7 {
693                Some(20)
694            } else {
695                Some(-10)
696            }
697        }));
698        let expected_b = PrimitiveArray::from_option_iter((0..len).map(|i| {
699            if i == 0 {
700                Some(1i32)
701            } else if i == 1 {
702                Some(2)
703            } else if i == 7 {
704                None
705            } else {
706                Some(-1)
707            }
708        }));
709
710        let expected = StructArray::try_new_with_dtype(
711            vec![expected_a.into_array(), expected_b.into_array()],
712            struct_fields,
713            len,
714            // NB: patch indices: [0, 1, 7, 8]; patch validity: [Valid, Valid, Valid, Invalid]; ergo 0, 1, 7 are valid.
715            Validity::from_mask(Mask::from_indices(10, vec![0, 1, 7]), Nullable),
716        )
717        .unwrap()
718        .to_array();
719
720        let actual = sparse_struct.to_struct();
721        assert_arrays_eq!(actual, expected);
722    }
723
724    #[test]
725    fn test_sparse_decimal() {
726        let indices = buffer![0u32, 1u32, 7u32, 8u32].into_array();
727        let decimal_dtype = DecimalDType::new(3, 2);
728        let patch_values = DecimalArray::new(
729            buffer![100i128, 200i128, 300i128, 4000i128],
730            decimal_dtype,
731            Validity::from_iter([true, true, true, false]),
732        )
733        .to_array();
734        let len = 10;
735        let fill_scalar = Scalar::decimal(DecimalValue::I32(123), decimal_dtype, Nullable);
736        let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
737
738        let expected = DecimalArray::new(
739            buffer![100i128, 200, 123, 123, 123, 123, 123, 300, 4000, 123],
740            decimal_dtype,
741            // NB: patch indices: [0, 1, 7, 8]; patch validity: [Valid, Valid, Valid, Invalid]; ergo 0, 1, 7 are valid.
742            Validity::from_mask(Mask::from_excluded_indices(10, vec![8]), Nullable),
743        )
744        .to_array()
745        .into_arrow_preferred()
746        .unwrap();
747
748        let actual = sparse_struct
749            .to_decimal()
750            .to_array()
751            .into_arrow_preferred()
752            .unwrap();
753
754        assert_eq!(expected.data_type(), actual.data_type());
755        assert_eq!(&expected, &actual);
756    }
757
758    #[test]
759    fn test_sparse_utf8_varbinview_non_null_fill() {
760        let strings = <VarBinViewArray as FromIterator<_>>::from_iter([
761            Some("hello"),
762            Some("goodbye"),
763            Some("hello"),
764            None,
765            Some("bonjour"),
766            Some("你好"),
767            None,
768        ])
769        .into_array();
770
771        let array = SparseArray::try_new(
772            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
773            strings,
774            12,
775            Scalar::from(Some("123".to_owned())),
776        )
777        .unwrap();
778
779        let actual = array.to_varbinview().into_array();
780        let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
781            Some("hello"),
782            Some("123"),
783            Some("123"),
784            Some("goodbye"),
785            Some("hello"),
786            None,
787            Some("123"),
788            Some("bonjour"),
789            Some("123"),
790            Some("你好"),
791            None,
792            Some("123"),
793        ])
794        .into_array();
795
796        assert_arrays_eq!(actual, expected);
797    }
798
799    #[test]
800    fn test_sparse_utf8_varbinview_null_fill() {
801        let strings = <VarBinViewArray as FromIterator<_>>::from_iter([
802            Some("hello"),
803            Some("goodbye"),
804            Some("hello"),
805            None,
806            Some("bonjour"),
807            Some("你好"),
808            None,
809        ])
810        .into_array();
811
812        let array = SparseArray::try_new(
813            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
814            strings,
815            12,
816            Scalar::null(DType::Utf8(Nullable)),
817        )
818        .unwrap();
819
820        let actual = array.to_varbinview().into_array();
821        let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
822            Some("hello"),
823            None,
824            None,
825            Some("goodbye"),
826            Some("hello"),
827            None,
828            None,
829            Some("bonjour"),
830            None,
831            Some("你好"),
832            None,
833            None,
834        ])
835        .into_array();
836
837        assert_arrays_eq!(actual, expected);
838    }
839
840    #[test]
841    fn test_sparse_utf8_varbinview_non_nullable() {
842        let strings =
843            VarBinViewArray::from_iter_str(["hello", "goodbye", "hello", "bonjour", "你好"])
844                .into_array();
845
846        let array = SparseArray::try_new(
847            buffer![0u16, 3, 4, 5, 8].into_array(),
848            strings,
849            9,
850            Scalar::from("123".to_owned()),
851        )
852        .unwrap();
853
854        let actual = array.to_varbinview().into_array();
855        let expected = VarBinViewArray::from_iter_str([
856            "hello", "123", "123", "goodbye", "hello", "bonjour", "123", "123", "你好",
857        ])
858        .into_array();
859
860        assert_arrays_eq!(actual, expected);
861    }
862
863    #[test]
864    fn test_sparse_utf8_varbin_null_fill() {
865        let strings = <VarBinArray as FromIterator<_>>::from_iter([
866            Some("hello"),
867            Some("goodbye"),
868            Some("hello"),
869            None,
870            Some("bonjour"),
871            Some("你好"),
872            None,
873        ])
874        .into_array();
875
876        let array = SparseArray::try_new(
877            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
878            strings,
879            12,
880            Scalar::null(DType::Utf8(Nullable)),
881        )
882        .unwrap();
883
884        let actual = array.to_varbinview().into_array();
885        let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
886            Some("hello"),
887            None,
888            None,
889            Some("goodbye"),
890            Some("hello"),
891            None,
892            None,
893            Some("bonjour"),
894            None,
895            Some("你好"),
896            None,
897            None,
898        ])
899        .into_array();
900
901        assert_arrays_eq!(actual, expected);
902    }
903
904    #[test]
905    fn test_sparse_binary_varbinview_non_null_fill() {
906        let binaries = VarBinViewArray::from_iter_nullable_bin([
907            Some(b"hello" as &[u8]),
908            Some(b"goodbye"),
909            Some(b"hello"),
910            None,
911            Some(b"\x00"),
912            Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
913            None,
914        ])
915        .into_array();
916
917        let array = SparseArray::try_new(
918            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
919            binaries,
920            12,
921            Scalar::from(Some(ByteBuffer::from(b"123".to_vec()))),
922        )
923        .unwrap();
924
925        let actual = array.to_varbinview().into_array();
926        let expected = VarBinViewArray::from_iter_nullable_bin([
927            Some(b"hello" as &[u8]),
928            Some(b"123"),
929            Some(b"123"),
930            Some(b"goodbye"),
931            Some(b"hello"),
932            None,
933            Some(b"123"),
934            Some(b"\x00"),
935            Some(b"123"),
936            Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
937            None,
938            Some(b"123"),
939        ])
940        .into_array();
941
942        assert_arrays_eq!(actual, expected);
943    }
944
945    #[test]
946    fn test_sparse_list_null_fill() {
947        // Use ListViewArray consistently
948        let elements = buffer![1i32, 2, 1, 2].into_array();
949        // Create ListView with offsets and sizes
950        // List 0: [1] at offset 0, size 1
951        // List 1: [2] at offset 1, size 1
952        // List 2: [1] at offset 2, size 1
953        // List 3: [2] at offset 3, size 1
954        let offsets = buffer![0u32, 1, 2, 3].into_array();
955        let sizes = buffer![1u32, 1, 1, 1].into_array();
956        let lists = unsafe {
957            ListViewArray::new_unchecked(elements, offsets, sizes, Validity::AllValid)
958                .with_zero_copy_to_list(true)
959        }
960        .into_array();
961
962        let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
963        let fill_value = Scalar::null(lists.dtype().clone());
964        let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
965            .unwrap()
966            .into_array();
967
968        let actual = sparse.to_canonical().into_array();
969        let result_listview = actual.to_listview();
970
971        // Check the structure
972        assert_eq!(result_listview.len(), 6);
973
974        // Verify sizes: positions 0,3,4,5 have data, positions 1,2 are null
975        assert_eq!(result_listview.size_at(0), 1); // [1]
976        assert_eq!(result_listview.size_at(1), 0); // null
977        assert_eq!(result_listview.size_at(2), 0); // null
978        assert_eq!(result_listview.size_at(3), 1); // [2]
979        assert_eq!(result_listview.size_at(4), 1); // [1]
980        assert_eq!(result_listview.size_at(5), 1); // [2]
981
982        // Verify actual values
983        let elements_array = result_listview.elements().to_primitive();
984        let elements_slice = elements_array.as_slice::<i32>();
985
986        let list0_offset = result_listview.offset_at(0);
987        assert_eq!(elements_slice[list0_offset], 1);
988
989        let list3_offset = result_listview.offset_at(3);
990        assert_eq!(elements_slice[list3_offset], 2);
991
992        let list4_offset = result_listview.offset_at(4);
993        assert_eq!(elements_slice[list4_offset], 1);
994
995        let list5_offset = result_listview.offset_at(5);
996        assert_eq!(elements_slice[list5_offset], 2);
997    }
998
999    #[test]
1000    fn test_sparse_list_null_fill_sliced_sparse_values() {
1001        // Create ListViewArray with 8 elements forming 8 single-element lists
1002        let elements = buffer![1i32, 2, 1, 2, 1, 2, 1, 2].into_array();
1003        let offsets = buffer![0u32, 1, 2, 3, 4, 5, 6, 7].into_array();
1004        let sizes = buffer![1u32, 1, 1, 1, 1, 1, 1, 1].into_array();
1005        let lists = unsafe {
1006            ListViewArray::new_unchecked(elements, offsets, sizes, Validity::AllValid)
1007                .with_zero_copy_to_list(true)
1008        }
1009        .into_array();
1010
1011        // Slice to get lists 2..6, which are: [1], [2], [1], [2]
1012        let lists = lists.slice(2..6);
1013
1014        let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
1015        let fill_value = Scalar::null(lists.dtype().clone());
1016        let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
1017            .unwrap()
1018            .into_array();
1019
1020        let actual = sparse.to_canonical().into_array();
1021        let result_listview = actual.to_listview();
1022
1023        // Check the structure
1024        assert_eq!(result_listview.len(), 6);
1025
1026        // Verify sizes: positions 0,3,4,5 have data (from the sliced lists), positions 1,2 are null
1027        assert_eq!(result_listview.size_at(0), 1); // [1] - from slice index 0 (original index 2)
1028        assert_eq!(result_listview.size_at(1), 0); // null
1029        assert_eq!(result_listview.size_at(2), 0); // null
1030        assert_eq!(result_listview.size_at(3), 1); // [2] - from slice index 3 (original index 5)
1031        assert_eq!(result_listview.size_at(4), 1); // [1] - extra element beyond original slice
1032        assert_eq!(result_listview.size_at(5), 1); // [2] - extra element beyond original slice
1033
1034        // Verify actual values
1035        let elements_array = result_listview.elements().to_primitive();
1036        let elements_slice = elements_array.as_slice::<i32>();
1037
1038        let list0_offset = result_listview.offset_at(0);
1039        assert_eq!(elements_slice[list0_offset], 1);
1040
1041        let list3_offset = result_listview.offset_at(3);
1042        assert_eq!(elements_slice[list3_offset], 2);
1043    }
1044
1045    #[test]
1046    fn test_sparse_list_non_null_fill() {
1047        // Create ListViewArray with 4 single-element lists
1048        let elements = buffer![1i32, 2, 1, 2].into_array();
1049        let offsets = buffer![0u32, 1, 2, 3].into_array();
1050        let sizes = buffer![1u32, 1, 1, 1].into_array();
1051        let lists = unsafe {
1052            ListViewArray::new_unchecked(elements, offsets, sizes, Validity::AllValid)
1053                .with_zero_copy_to_list(true)
1054        }
1055        .into_array();
1056
1057        let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
1058        let fill_value = Scalar::from(Some(vec![5i32, 6, 7, 8]));
1059        let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
1060            .unwrap()
1061            .into_array();
1062
1063        let actual = sparse.to_canonical().into_array();
1064        let result_listview = actual.to_listview();
1065
1066        // Check the structure
1067        assert_eq!(result_listview.len(), 6);
1068
1069        // Verify sizes: positions 0,3,4,5 have sparse data, positions 1,2 have fill values
1070        assert_eq!(result_listview.size_at(0), 1); // [1] from sparse
1071        assert_eq!(result_listview.size_at(1), 4); // [5,6,7,8] fill value
1072        assert_eq!(result_listview.size_at(2), 4); // [5,6,7,8] fill value
1073        assert_eq!(result_listview.size_at(3), 1); // [2] from sparse
1074        assert_eq!(result_listview.size_at(4), 1); // [1] from sparse
1075        assert_eq!(result_listview.size_at(5), 1); // [2] from sparse
1076
1077        // Verify actual values
1078        let elements_array = result_listview.elements().to_primitive();
1079        let elements_slice = elements_array.as_slice::<i32>();
1080
1081        // List 0: [1]
1082        let list0_offset = result_listview.offset_at(0) as usize;
1083        assert_eq!(elements_slice[list0_offset], 1);
1084
1085        // List 1: [5,6,7,8]
1086        let list1_offset = result_listview.offset_at(1) as usize;
1087        let list1_size = result_listview.size_at(1) as usize;
1088        assert_eq!(
1089            &elements_slice[list1_offset..list1_offset + list1_size],
1090            &[5, 6, 7, 8]
1091        );
1092
1093        // List 2: [5,6,7,8]
1094        let list2_offset = result_listview.offset_at(2) as usize;
1095        let list2_size = result_listview.size_at(2) as usize;
1096        assert_eq!(
1097            &elements_slice[list2_offset..list2_offset + list2_size],
1098            &[5, 6, 7, 8]
1099        );
1100
1101        // List 3: [2]
1102        let list3_offset = result_listview.offset_at(3) as usize;
1103        assert_eq!(elements_slice[list3_offset], 2);
1104
1105        // List 4: [1]
1106        let list4_offset = result_listview.offset_at(4) as usize;
1107        assert_eq!(elements_slice[list4_offset], 1);
1108
1109        // List 5: [2]
1110        let list5_offset = result_listview.offset_at(5) as usize;
1111        assert_eq!(elements_slice[list5_offset], 2);
1112    }
1113
1114    #[test]
1115    fn test_sparse_binary_varbin_null_fill() {
1116        let strings = <VarBinArray as FromIterator<_>>::from_iter([
1117            Some(b"hello" as &[u8]),
1118            Some(b"goodbye"),
1119            Some(b"hello"),
1120            None,
1121            Some(b"\x00"),
1122            Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
1123            None,
1124        ])
1125        .into_array();
1126
1127        let array = SparseArray::try_new(
1128            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
1129            strings,
1130            12,
1131            Scalar::null(DType::Binary(Nullable)),
1132        )
1133        .unwrap();
1134
1135        let actual = array.to_varbinview().into_array();
1136        let expected = VarBinViewArray::from_iter_nullable_bin([
1137            Some(b"hello" as &[u8]),
1138            None,
1139            None,
1140            Some(b"goodbye"),
1141            Some(b"hello"),
1142            None,
1143            None,
1144            Some(b"\x00"),
1145            None,
1146            Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
1147            None,
1148            None,
1149        ])
1150        .into_array();
1151
1152        assert_arrays_eq!(actual, expected);
1153    }
1154
1155    #[test]
1156    fn test_sparse_fixed_size_list_null_fill() {
1157        // Create a FixedSizeListArray with 3 lists of size 3.
1158        let elements = buffer![1i32, 2, 3, 4, 5, 6, 7, 8, 9].into_array();
1159        let fsl = FixedSizeListArray::try_new(elements, 3, Validity::AllValid, 3)
1160            .unwrap()
1161            .into_array();
1162
1163        let indices = buffer![0u8, 2u8, 3u8].into_array();
1164        let fill_value = Scalar::null(DType::FixedSizeList(
1165            Arc::new(DType::Primitive(PType::I32, NonNullable)),
1166            3,
1167            Nullable,
1168        ));
1169        let sparse = SparseArray::try_new(indices, fsl, 5, fill_value)
1170            .unwrap()
1171            .into_array();
1172
1173        let actual = sparse.to_canonical().into_array();
1174
1175        // Expected: [1,2,3], null, [4,5,6], [7,8,9], null.
1176        let expected_elements =
1177            buffer![1i32, 2, 3, 0, 0, 0, 4, 5, 6, 7, 8, 9, 0, 0, 0].into_array();
1178        let expected = FixedSizeListArray::try_new(
1179            expected_elements,
1180            3,
1181            Validity::Array(BoolArray::from_iter([true, false, true, true, false]).into_array()),
1182            5,
1183        )
1184        .unwrap()
1185        .into_array();
1186
1187        assert_arrays_eq!(actual, expected);
1188    }
1189
1190    #[test]
1191    fn test_sparse_fixed_size_list_non_null_fill() {
1192        let elements = buffer![1i32, 2, 3, 4, 5, 6].into_array();
1193        let fsl = FixedSizeListArray::try_new(elements, 2, Validity::AllValid, 3)
1194            .unwrap()
1195            .into_array();
1196
1197        let indices = buffer![0u8, 2u8, 4u8].into_array();
1198        let fill_value = Scalar::fixed_size_list(
1199            Arc::new(DType::Primitive(PType::I32, NonNullable)),
1200            vec![
1201                Scalar::primitive(99i32, NonNullable),
1202                Scalar::primitive(88i32, NonNullable),
1203            ],
1204            NonNullable,
1205        );
1206        let sparse = SparseArray::try_new(indices, fsl, 6, fill_value)
1207            .unwrap()
1208            .into_array();
1209
1210        let actual = sparse.to_canonical().into_array();
1211
1212        // Expected: [1,2], [99,88], [3,4], [99,88], [5,6], [99,88].
1213        let expected_elements = buffer![1i32, 2, 99, 88, 3, 4, 99, 88, 5, 6, 99, 88].into_array();
1214        let expected = FixedSizeListArray::try_new(expected_elements, 2, Validity::NonNullable, 6)
1215            .unwrap()
1216            .into_array();
1217
1218        assert_arrays_eq!(actual, expected);
1219    }
1220
1221    #[test]
1222    fn test_sparse_fixed_size_list_with_validity() {
1223        // Create FSL values with some nulls.
1224        let elements = buffer![10i32, 20, 30, 40, 50, 60].into_array();
1225        let fsl = FixedSizeListArray::try_new(
1226            elements,
1227            2,
1228            Validity::Array(BoolArray::from_iter([true, false, true]).into_array()),
1229            3,
1230        )
1231        .unwrap()
1232        .into_array();
1233
1234        let indices = buffer![1u16, 3u16, 4u16].into_array();
1235        let fill_value = Scalar::fixed_size_list(
1236            Arc::new(DType::Primitive(PType::I32, NonNullable)),
1237            vec![
1238                Scalar::primitive(7i32, NonNullable),
1239                Scalar::primitive(8i32, NonNullable),
1240            ],
1241            Nullable,
1242        );
1243        let sparse = SparseArray::try_new(indices, fsl, 6, fill_value)
1244            .unwrap()
1245            .into_array();
1246
1247        let actual = sparse.to_canonical().into_array();
1248
1249        // Expected validity: [true, true, true, false, true, true].
1250        // Expected elements: [7,8], [10,20], [7,8], [30,40], [50,60], [7,8].
1251        let expected_elements = buffer![7i32, 8, 10, 20, 7, 8, 30, 40, 50, 60, 7, 8].into_array();
1252        let expected = FixedSizeListArray::try_new(
1253            expected_elements,
1254            2,
1255            Validity::Array(
1256                BoolArray::from_iter([true, true, true, false, true, true]).into_array(),
1257            ),
1258            6,
1259        )
1260        .unwrap()
1261        .into_array();
1262
1263        assert_arrays_eq!(actual, expected);
1264    }
1265
1266    #[test]
1267    fn test_sparse_fixed_size_list_truly_sparse() {
1268        // Test with a truly sparse array where most values are the fill value.
1269        // This demonstrates the compression benefit of sparse encoding.
1270
1271        // Create patch values: only 3 distinct lists out of 100 total positions.
1272        let elements = buffer![10i32, 11, 20, 21, 30, 31].into_array();
1273        let fsl = FixedSizeListArray::try_new(elements, 2, Validity::AllValid, 3)
1274            .unwrap()
1275            .into_array();
1276
1277        // Patches at positions 5, 50, and 95 out of 100.
1278        let indices = buffer![5u32, 50, 95].into_array();
1279
1280        // Fill value [99, 99] will appear 97 times but stored only once.
1281        let fill_value = Scalar::fixed_size_list(
1282            Arc::new(DType::Primitive(PType::I32, NonNullable)),
1283            vec![
1284                Scalar::primitive(99i32, NonNullable),
1285                Scalar::primitive(99i32, NonNullable),
1286            ],
1287            NonNullable,
1288        );
1289
1290        let sparse = SparseArray::try_new(indices, fsl, 100, fill_value)
1291            .unwrap()
1292            .into_array();
1293
1294        let actual = sparse.to_canonical().into_array();
1295
1296        // Build expected: 97 copies of [99,99] with patches at positions 5, 50, 95.
1297        let mut expected_elements_vec = Vec::with_capacity(200);
1298        // Positions 0-4: fill values
1299        for _ in 0..5 {
1300            expected_elements_vec.extend([99i32, 99]);
1301        }
1302        // Position 5: first patch [10, 11]
1303        expected_elements_vec.extend([10, 11]);
1304        // Positions 6-49: fill values
1305        for _ in 6..50 {
1306            expected_elements_vec.extend([99, 99]);
1307        }
1308        // Position 50: second patch [20, 21]
1309        expected_elements_vec.extend([20, 21]);
1310        // Positions 51-94: fill values
1311        for _ in 51..95 {
1312            expected_elements_vec.extend([99, 99]);
1313        }
1314        // Position 95: third patch [30, 31]
1315        expected_elements_vec.extend([30, 31]);
1316        // Positions 96-99: fill values
1317        for _ in 96..100 {
1318            expected_elements_vec.extend([99, 99]);
1319        }
1320        let expected_elements = PrimitiveArray::from_iter(expected_elements_vec).into_array();
1321        let expected =
1322            FixedSizeListArray::try_new(expected_elements, 2, Validity::NonNullable, 100)
1323                .unwrap()
1324                .into_array();
1325
1326        assert_arrays_eq!(actual, expected);
1327    }
1328
1329    #[test]
1330    fn test_sparse_fixed_size_list_single_element() {
1331        // Test with a single element FSL array.
1332        let elements = buffer![42i32, 43].into_array();
1333        let fsl = FixedSizeListArray::try_new(elements, 2, Validity::AllValid, 1)
1334            .unwrap()
1335            .into_array();
1336
1337        let indices = buffer![0u32].into_array();
1338        let fill_value = Scalar::fixed_size_list(
1339            Arc::new(DType::Primitive(PType::I32, NonNullable)),
1340            vec![
1341                Scalar::primitive(1i32, NonNullable),
1342                Scalar::primitive(2i32, NonNullable),
1343            ],
1344            NonNullable,
1345        );
1346        let sparse = SparseArray::try_new(indices, fsl, 1, fill_value)
1347            .unwrap()
1348            .into_array();
1349
1350        let actual = sparse.to_canonical().into_array();
1351
1352        // Expected: just [42, 43].
1353        let expected_elements = buffer![42i32, 43].into_array();
1354        let expected = FixedSizeListArray::try_new(expected_elements, 2, Validity::NonNullable, 1)
1355            .unwrap()
1356            .into_array();
1357
1358        assert_arrays_eq!(actual, expected);
1359    }
1360
1361    #[test]
1362    fn test_sparse_list_grows_offset_type() {
1363        let elements = buffer![1i32, 2, 1, 2].into_array();
1364        let offsets = buffer![0u8, 1, 2, 3, 4].into_array();
1365        let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1366            .unwrap()
1367            .into_array();
1368
1369        let indices = buffer![0u8, 1u8, 2u8, 3u8].into_array();
1370        let fill_value = Scalar::from(Some(vec![42i32; 252])); // 252 + 4 elements = 256 > u8::MAX
1371        let sparse = SparseArray::try_new(indices, lists, 5, fill_value)
1372            .unwrap()
1373            .into_array();
1374
1375        let actual = sparse.to_canonical().into_array();
1376        let mut expected_elements = buffer_mut![1, 2, 1, 2];
1377        expected_elements.extend(buffer![42i32; 252]);
1378        let expected = ListArray::try_new(
1379            expected_elements.freeze().into_array(),
1380            buffer![0u16, 1, 2, 3, 4, 256].into_array(),
1381            Validity::AllValid,
1382        )
1383        .unwrap()
1384        .into_array();
1385
1386        assert_eq!(
1387            actual.to_listview().offsets().dtype(),
1388            &DType::Primitive(PType::U16, NonNullable)
1389        );
1390        assert_arrays_eq!(&actual, &expected);
1391
1392        // Note that the preferred arrow list representation is `List` (not `ListView`).
1393        let arrow_dtype = expected.dtype().to_arrow_dtype().unwrap();
1394        let actual = actual.into_arrow(&arrow_dtype).unwrap();
1395        let expected = expected.into_arrow(&arrow_dtype).unwrap();
1396
1397        assert_eq!(actual.data_type(), expected.data_type());
1398    }
1399
1400    #[test]
1401    fn test_sparse_listview_null_fill_with_gaps() {
1402        // This test specifically catches the bug where the old implementation
1403        // incorrectly tracked `last_valid_offset` as the START of the last list
1404        // instead of properly handling ListView's offset/size pairs.
1405
1406        // Create a ListViewArray with non-trivial offsets and sizes.
1407        // Elements: [10, 11, 12, 20, 21, 30, 31, 32, 33]
1408        // List 0: elements[0..3] = [10, 11, 12]
1409        // List 1: elements[3..5] = [20, 21]
1410        // List 2: elements[5..9] = [30, 31, 32, 33]
1411        let elements = buffer![10i32, 11, 12, 20, 21, 30, 31, 32, 33].into_array();
1412        let offsets = buffer![0u32, 3, 5].into_array();
1413        let sizes = buffer![3u32, 2, 4].into_array();
1414
1415        let list_view = unsafe {
1416            ListViewArray::new_unchecked(elements.clone(), offsets, sizes, Validity::AllValid)
1417                .with_zero_copy_to_list(true)
1418        };
1419
1420        let list_dtype = list_view.dtype().clone();
1421
1422        // Create sparse array with indices [1, 4, 7] and length 10
1423        // This means we have:
1424        // - Index 0: null
1425        // - Index 1: List 0 [10, 11, 12]
1426        // - Index 2-3: null
1427        // - Index 4: List 1 [20, 21]
1428        // - Index 5-6: null
1429        // - Index 7: List 2 [30, 31, 32, 33]
1430        // - Index 8-9: null
1431        let indices = buffer![1u8, 4, 7].into_array();
1432        let sparse = SparseArray::try_new(
1433            indices,
1434            list_view.into_array(),
1435            10,
1436            Scalar::null(list_dtype),
1437        )
1438        .unwrap();
1439
1440        // Convert to canonical form - this triggers the function we're testing
1441        let canonical = sparse.to_canonical().into_array();
1442        let result_listview = canonical.to_listview();
1443
1444        // Verify the structure
1445        assert_eq!(result_listview.len(), 10);
1446
1447        // Helper to get list values at an index
1448        let get_list_values = |idx: usize| -> Vec<i32> {
1449            let offset = result_listview.offset_at(idx);
1450            let size = result_listview.size_at(idx);
1451            if size == 0 {
1452                vec![] // null/empty list
1453            } else {
1454                let elements = result_listview.elements().to_primitive();
1455                let slice = elements.as_slice::<i32>();
1456                slice[offset..offset + size].to_vec()
1457            }
1458        };
1459
1460        let empty: Vec<i32> = vec![];
1461
1462        // Verify all list values
1463        assert_eq!(get_list_values(0), empty); // null
1464        assert_eq!(get_list_values(1), vec![10, 11, 12]); // sparse index 0
1465        assert_eq!(get_list_values(2), empty); // null
1466        assert_eq!(get_list_values(3), empty); // null
1467        assert_eq!(get_list_values(4), vec![20, 21]); // sparse index 1
1468        assert_eq!(get_list_values(5), empty); // null
1469        assert_eq!(get_list_values(6), empty); // null
1470        assert_eq!(get_list_values(7), vec![30, 31, 32, 33]); // sparse index 2
1471        assert_eq!(get_list_values(8), empty); // null
1472        assert_eq!(get_list_values(9), empty); // null
1473    }
1474
1475    #[test]
1476    fn test_sparse_listview_sliced_values_null_fill() {
1477        // This test uses sliced ListView values to ensure proper handling
1478        // of non-zero starting offsets in the source data.
1479
1480        // Create a larger ListViewArray and then slice it
1481        // Original elements: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
1482        let elements = buffer![0i32, 1, 2, 3, 4, 5, 6, 7, 8, 9].into_array();
1483
1484        // Create 5 lists with different offsets and sizes
1485        // List 0: [0, 1] at offset 0
1486        // List 1: [2, 3, 4] at offset 2
1487        // List 2: [5] at offset 5
1488        // List 3: [6, 7] at offset 6
1489        // List 4: [8, 9] at offset 8
1490        let offsets = buffer![0u32, 2, 5, 6, 8].into_array();
1491        let sizes = buffer![2u32, 3, 1, 2, 2].into_array();
1492
1493        let full_listview = unsafe {
1494            ListViewArray::new_unchecked(elements, offsets, sizes, Validity::AllValid)
1495                .with_zero_copy_to_list(true)
1496        }
1497        .into_array();
1498
1499        // Slice to get lists 1, 2, 3 (indices 1..4)
1500        // This gives us lists with elements:
1501        // - Index 0: [2, 3, 4] (original list 1)
1502        // - Index 1: [5] (original list 2)
1503        // - Index 2: [6, 7] (original list 3)
1504        let sliced = full_listview.slice(1..4);
1505
1506        // Create sparse array with indices [0, 1] and length 5
1507        // Expected result:
1508        // - Index 0: [2, 3, 4] (from sliced[0])
1509        // - Index 1: [5] (from sliced[1])
1510        // - Index 2: null
1511        // - Index 3: null
1512        // - Index 4: null
1513        let indices = buffer![0u8, 1].into_array();
1514        // Extract only the values we need from the sliced array
1515        let values = sliced.slice(0..2);
1516        let sparse =
1517            SparseArray::try_new(indices, values, 5, Scalar::null(sliced.dtype().clone())).unwrap();
1518
1519        let canonical = sparse.to_canonical().into_array();
1520        let result_listview = canonical.to_listview();
1521
1522        assert_eq!(result_listview.len(), 5);
1523
1524        // Helper to get list values at an index
1525        let get_list_values = |idx: usize| -> Vec<i32> {
1526            let offset = result_listview.offset_at(idx);
1527            let size = result_listview.size_at(idx);
1528            if size == 0 {
1529                vec![] // null/empty list
1530            } else {
1531                let elements = result_listview.elements().to_primitive();
1532                let slice = elements.as_slice::<i32>();
1533                slice[offset..offset + size].to_vec()
1534            }
1535        };
1536
1537        let empty: Vec<i32> = vec![];
1538
1539        // Verify all list values
1540        // Original slice had lists at indices 1,2,3 which were: [2,3,4], [5], [6,7]
1541        // We take indices 0 and 1 from the slice
1542        assert_eq!(get_list_values(0), vec![2, 3, 4]); // From slice index 0 (original list 1)
1543        assert_eq!(get_list_values(1), vec![5]); // From slice index 1 (original list 2)
1544        assert_eq!(get_list_values(2), empty); // null
1545        assert_eq!(get_list_values(3), empty); // null
1546        assert_eq!(get_list_values(4), empty); // null
1547
1548        // The bug in the old implementation would have incorrectly used
1549        // offsets[sparse_index] for filling nulls, which would be wrong
1550        // when dealing with sliced arrays that have non-zero starting offsets.
1551    }
1552}