vortex_sparse/
canonical.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::sync::Arc;
5
6use itertools::Itertools;
7use num_traits::NumCast;
8use vortex_array::Array;
9use vortex_array::Canonical;
10use vortex_array::ToCanonical;
11use vortex_array::arrays::BoolArray;
12use vortex_array::arrays::ConstantArray;
13use vortex_array::arrays::FixedSizeListArray;
14use vortex_array::arrays::ListViewArray;
15use vortex_array::arrays::NullArray;
16use vortex_array::arrays::PrimitiveArray;
17use vortex_array::arrays::StructArray;
18use vortex_array::arrays::VarBinViewArray;
19use vortex_array::builders::ArrayBuilder;
20use vortex_array::builders::DecimalBuilder;
21use vortex_array::builders::ListViewBuilder;
22use vortex_array::builders::builder_with_capacity;
23use vortex_array::patches::Patches;
24use vortex_array::validity::Validity;
25use vortex_array::vtable::CanonicalVTable;
26use vortex_array::vtable::ValidityHelper;
27use vortex_buffer::BitBuffer;
28use vortex_buffer::Buffer;
29use vortex_buffer::BufferString;
30use vortex_buffer::ByteBuffer;
31use vortex_buffer::buffer;
32use vortex_buffer::buffer_mut;
33use vortex_dtype::DType;
34use vortex_dtype::DecimalDType;
35use vortex_dtype::DecimalType;
36use vortex_dtype::IntegerPType;
37use vortex_dtype::NativeDecimalType;
38use vortex_dtype::NativePType;
39use vortex_dtype::Nullability;
40use vortex_dtype::StructFields;
41use vortex_dtype::match_each_decimal_value_type;
42use vortex_dtype::match_each_integer_ptype;
43use vortex_dtype::match_each_native_ptype;
44use vortex_error::VortexError;
45use vortex_error::VortexExpect;
46use vortex_error::vortex_panic;
47use vortex_scalar::DecimalScalar;
48use vortex_scalar::ListScalar;
49use vortex_scalar::Scalar;
50use vortex_scalar::StructScalar;
51use vortex_vector::binaryview::BinaryView;
52
53use crate::SparseArray;
54use crate::SparseVTable;
55
56impl CanonicalVTable<SparseVTable> for SparseVTable {
57    fn canonicalize(array: &SparseArray) -> Canonical {
58        if array.patches().num_patches() == 0 {
59            return ConstantArray::new(array.fill_scalar().clone(), array.len()).to_canonical();
60        }
61
62        match array.dtype() {
63            DType::Null => {
64                assert!(array.fill_scalar().is_null());
65                Canonical::Null(NullArray::new(array.len()))
66            }
67            DType::Bool(..) => {
68                let resolved_patches = array.resolved_patches();
69                canonicalize_sparse_bools(&resolved_patches, array.fill_scalar())
70            }
71            DType::Primitive(ptype, ..) => {
72                let resolved_patches = array.resolved_patches();
73                match_each_native_ptype!(ptype, |P| {
74                    canonicalize_sparse_primitives::<P>(&resolved_patches, array.fill_scalar())
75                })
76            }
77            DType::Struct(struct_fields, ..) => canonicalize_sparse_struct(
78                struct_fields,
79                array.fill_scalar().as_struct(),
80                array.dtype(),
81                array.patches(),
82                array.len(),
83            ),
84            DType::Decimal(decimal_dtype, nullability) => {
85                let canonical_decimal_value_type =
86                    DecimalType::smallest_decimal_value_type(decimal_dtype);
87                let fill_value = array.fill_scalar().as_decimal();
88                match_each_decimal_value_type!(canonical_decimal_value_type, |D| {
89                    canonicalize_sparse_decimal::<D>(
90                        *decimal_dtype,
91                        *nullability,
92                        fill_value,
93                        array.patches(),
94                        array.len(),
95                    )
96                })
97            }
98            dtype @ DType::Utf8(..) => {
99                let fill_value = array.fill_scalar().as_utf8().value();
100                let fill_value = fill_value.map(BufferString::into_inner);
101                canonicalize_varbin(array, dtype.clone(), fill_value)
102            }
103            dtype @ DType::Binary(..) => {
104                let fill_value = array.fill_scalar().as_binary().value();
105                canonicalize_varbin(array, dtype.clone(), fill_value)
106            }
107            DType::List(values_dtype, nullability) => {
108                canonicalize_sparse_lists(array, values_dtype.clone(), *nullability)
109            }
110            DType::FixedSizeList(.., nullability) => {
111                canonicalize_sparse_fixed_size_list(array, *nullability)
112            }
113            DType::Extension(_ext_dtype) => todo!(),
114        }
115    }
116}
117
118#[expect(
119    clippy::cognitive_complexity,
120    reason = "complexity is from nested match_smallest_offset_type macro"
121)]
122fn canonicalize_sparse_lists(
123    array: &SparseArray,
124    values_dtype: Arc<DType>,
125    nullability: Nullability,
126) -> Canonical {
127    // TODO(connor): We should move this to `vortex-dtype` so that we can use this elsewhere.
128    macro_rules! match_smallest_offset_type {
129        ($n_elements:expr, | $offset_type:ident | $body:block) => {{
130            let n_elements = $n_elements;
131            if n_elements <= u8::MAX as usize {
132                type $offset_type = u8;
133                $body
134            } else if n_elements <= u16::MAX as usize {
135                type $offset_type = u16;
136                $body
137            } else if n_elements <= u32::MAX as usize {
138                type $offset_type = u32;
139                $body
140            } else {
141                assert!(u64::try_from(n_elements).is_ok());
142                type $offset_type = u64;
143                $body
144            }
145        }};
146    }
147
148    let resolved_patches = array.resolved_patches();
149
150    let indices = resolved_patches.indices().to_primitive();
151    let values = resolved_patches.values().to_listview();
152    let fill_value = array.fill_scalar().as_list();
153
154    let n_filled = array.len() - resolved_patches.num_patches();
155    let total_canonical_values = values.elements().len() + fill_value.len() * n_filled;
156
157    let validity = Validity::from_mask(array.validity_mask(), nullability);
158
159    match_each_integer_ptype!(indices.ptype(), |I| {
160        match_smallest_offset_type!(total_canonical_values, |O| {
161            canonicalize_sparse_lists_inner::<I, O>(
162                indices.as_slice(),
163                values,
164                fill_value,
165                values_dtype,
166                array.len(),
167                total_canonical_values,
168                validity,
169            )
170        })
171    })
172}
173
174fn canonicalize_sparse_lists_inner<I: IntegerPType, O: IntegerPType>(
175    patch_indices: &[I],
176    patch_values: ListViewArray,
177    fill_value: ListScalar,
178    values_dtype: Arc<DType>,
179    len: usize,
180    total_canonical_values: usize,
181    validity: Validity,
182) -> Canonical {
183    // Create the builder with appropriate types. It is easy to just use the same type for both
184    // `offsets` and `sizes` since we have no other constraints.
185    let mut builder = ListViewBuilder::<O, O>::with_capacity(
186        values_dtype,
187        validity.nullability(),
188        total_canonical_values,
189        len,
190    );
191
192    let mut patch_idx = 0;
193
194    // Loop over the patch indices and set them to the corresponding scalar values. For positions
195    // that are not patched, use the fill value.
196    for position in 0..len {
197        let position_is_patched = patch_idx < patch_indices.len()
198            && patch_indices[patch_idx]
199                .to_usize()
200                .vortex_expect("patch index must fit in usize")
201                == position;
202
203        if position_is_patched {
204            // Set with the patch value.
205            builder
206                .append_value(patch_values.scalar_at(patch_idx).as_list())
207                .vortex_expect("Failed to append sparse value");
208            patch_idx += 1;
209        } else {
210            // Set with the fill value.
211            builder
212                .append_value(fill_value.clone())
213                .vortex_expect("Failed to append fill value");
214        }
215    }
216
217    builder.finish_into_canonical()
218}
219
220/// Canonicalize a sparse [`FixedSizeListArray`] by expanding it into a dense representation.
221fn canonicalize_sparse_fixed_size_list(array: &SparseArray, nullability: Nullability) -> Canonical {
222    let resolved_patches = array.resolved_patches();
223    let indices = resolved_patches.indices().to_primitive();
224    let values = resolved_patches.values().to_fixed_size_list();
225    let fill_value = array.fill_scalar().as_list();
226
227    let validity = Validity::from_mask(array.validity_mask(), nullability);
228
229    match_each_integer_ptype!(indices.ptype(), |I| {
230        canonicalize_sparse_fixed_size_list_inner::<I>(
231            indices.as_slice(),
232            values,
233            fill_value,
234            array.len(),
235            validity,
236        )
237    })
238}
239
240/// Build a canonical [`FixedSizeListArray`] from sparse patches by interleaving patch values with
241/// fill values.
242///
243/// This algorithm walks through the sparse indices sequentially, filling gaps with the fill value's
244/// elements (or defaults if null). Since all lists have the same size, we can directly append
245/// elements without tracking offsets.
246fn canonicalize_sparse_fixed_size_list_inner<I: IntegerPType>(
247    indices: &[I],
248    values: FixedSizeListArray,
249    fill_value: ListScalar,
250    array_len: usize,
251    validity: Validity,
252) -> Canonical {
253    let list_size = values.list_size();
254    let element_dtype = values.elements().dtype();
255    let total_elements = array_len * list_size as usize;
256    let mut builder = builder_with_capacity(element_dtype, total_elements);
257    let fill_elements = fill_value.elements();
258
259    let mut next_index = 0;
260    let indices = indices
261        .iter()
262        .map(|x| (*x).to_usize().vortex_expect("index must fit in usize"));
263
264    for (patch_idx, sparse_idx) in indices.enumerate() {
265        // Fill gap before this patch with fill values.
266        append_n_lists(
267            &mut *builder,
268            fill_elements.as_deref(),
269            list_size,
270            sparse_idx - next_index,
271        );
272
273        // Append the patch value, handling null patches by appending defaults.
274        if values.validity().is_valid(patch_idx) {
275            let patch_list = values.fixed_size_list_elements_at(patch_idx);
276            for i in 0..list_size as usize {
277                builder
278                    .append_scalar(&patch_list.scalar_at(i))
279                    .vortex_expect("element dtype must match");
280            }
281        } else {
282            builder.append_defaults(list_size as usize);
283        }
284
285        next_index = sparse_idx + 1;
286    }
287
288    // Fill remaining positions after last patch.
289    append_n_lists(
290        &mut *builder,
291        fill_elements.as_deref(),
292        list_size,
293        array_len - next_index,
294    );
295
296    let elements = builder.finish();
297
298    // SAFETY: elements.len() == array_len * list_size, validity length matches array_len.
299    Canonical::FixedSizeList(unsafe {
300        FixedSizeListArray::new_unchecked(elements, list_size, validity, array_len)
301    })
302}
303
304/// Append `count` copies of a fixed-size list to the builder.
305///
306/// If `fill_elements` is `Some`, appends those elements `count` times.
307/// If `fill_elements` is `None` (null fill), appends `list_size` default elements `count` times.
308fn append_n_lists(
309    builder: &mut dyn ArrayBuilder,
310    fill_elements: Option<&[Scalar]>,
311    list_size: u32,
312    count: usize,
313) {
314    for _ in 0..count {
315        if let Some(fill_elems) = fill_elements {
316            for elem in fill_elems {
317                builder
318                    .append_scalar(elem)
319                    .vortex_expect("element dtype must match");
320            }
321        } else {
322            builder.append_defaults(list_size as usize);
323        }
324    }
325}
326
327fn canonicalize_sparse_bools(patches: &Patches, fill_value: &Scalar) -> Canonical {
328    let (fill_bool, validity) = if fill_value.is_null() {
329        (false, Validity::AllInvalid)
330    } else {
331        (
332            fill_value
333                .try_into()
334                .vortex_expect("Fill value must convert to bool"),
335            if patches.dtype().nullability() == Nullability::NonNullable {
336                Validity::NonNullable
337            } else {
338                Validity::AllValid
339            },
340        )
341    };
342
343    let bools =
344        BoolArray::from_bit_buffer(BitBuffer::full(fill_bool, patches.array_len()), validity);
345
346    Canonical::Bool(bools.patch(patches))
347}
348
349fn canonicalize_sparse_primitives<
350    T: NativePType + for<'a> TryFrom<&'a Scalar, Error = VortexError>,
351>(
352    patches: &Patches,
353    fill_value: &Scalar,
354) -> Canonical {
355    let (primitive_fill, validity) = if fill_value.is_null() {
356        (T::default(), Validity::AllInvalid)
357    } else {
358        (
359            fill_value
360                .try_into()
361                .vortex_expect("Fill value must convert to target T"),
362            if patches.dtype().nullability() == Nullability::NonNullable {
363                Validity::NonNullable
364            } else {
365                Validity::AllValid
366            },
367        )
368    };
369
370    let parray = PrimitiveArray::new(buffer![primitive_fill; patches.array_len()], validity);
371
372    Canonical::Primitive(parray.patch(patches))
373}
374
375fn canonicalize_sparse_struct(
376    struct_fields: &StructFields,
377    fill_struct: StructScalar,
378    dtype: &DType,
379    // Resolution is unnecessary b/c we're just pushing the patches into the fields.
380    unresolved_patches: &Patches,
381    len: usize,
382) -> Canonical {
383    let (fill_values, top_level_fill_validity) = match fill_struct.fields() {
384        Some(fill_values) => (fill_values.collect::<Vec<_>>(), Validity::AllValid),
385        None => (
386            struct_fields
387                .fields()
388                .map(Scalar::default_value)
389                .collect::<Vec<_>>(),
390            Validity::AllInvalid,
391        ),
392    };
393    let patch_values_as_struct = unresolved_patches.values().to_struct();
394    let columns_patch_values = patch_values_as_struct.fields();
395    let names = patch_values_as_struct.names();
396    let validity = if dtype.is_nullable() {
397        top_level_fill_validity.patch(
398            len,
399            unresolved_patches.offset(),
400            unresolved_patches.indices(),
401            &Validity::from_mask(
402                unresolved_patches.values().validity_mask(),
403                Nullability::Nullable,
404            ),
405        )
406    } else {
407        top_level_fill_validity
408            .into_non_nullable(len)
409            .unwrap_or_else(|| vortex_panic!("fill validity should match sparse array nullability"))
410    };
411
412    StructArray::try_from_iter_with_validity(
413        names.iter().zip_eq(
414            columns_patch_values
415                .iter()
416                .cloned()
417                .zip_eq(fill_values)
418                .map(|(patch_values, fill_value)| unsafe {
419                    SparseArray::new_unchecked(
420                        unresolved_patches
421                            .clone()
422                            .map_values(|_| Ok(patch_values))
423                            .vortex_expect("Replacing patch values"),
424                        fill_value,
425                    )
426                }),
427        ),
428        validity,
429    )
430    .map(Canonical::Struct)
431    .vortex_expect("Creating struct array")
432}
433
434fn canonicalize_sparse_decimal<D: NativeDecimalType>(
435    decimal_dtype: DecimalDType,
436    nullability: Nullability,
437    fill_value: DecimalScalar,
438    patches: &Patches,
439    len: usize,
440) -> Canonical {
441    let mut builder = DecimalBuilder::with_capacity::<D>(len, decimal_dtype, nullability);
442    match fill_value.decimal_value() {
443        Some(fill_value) => {
444            let fill_value = fill_value
445                .cast::<D>()
446                .vortex_expect("unexpected value type");
447            for _ in 0..len {
448                builder.append_value(fill_value)
449            }
450        }
451        None => {
452            builder.append_nulls(len);
453        }
454    }
455    let filled_array = builder.finish_into_decimal();
456    let array = filled_array.patch(patches);
457    Canonical::Decimal(array)
458}
459
460fn canonicalize_varbin(
461    array: &SparseArray,
462    dtype: DType,
463    fill_value: Option<ByteBuffer>,
464) -> Canonical {
465    let patches = array.resolved_patches();
466    let indices = patches.indices().to_primitive();
467    let values = patches.values().to_varbinview();
468    let validity = Validity::from_mask(array.validity_mask(), dtype.nullability());
469    let len = array.len();
470
471    match_each_integer_ptype!(indices.ptype(), |I| {
472        let indices = indices.buffer::<I>();
473        canonicalize_varbin_inner::<I>(fill_value, indices, values, dtype, validity, len)
474    })
475}
476
477fn canonicalize_varbin_inner<I: IntegerPType>(
478    fill_value: Option<ByteBuffer>,
479    indices: Buffer<I>,
480    values: VarBinViewArray,
481    dtype: DType,
482    validity: Validity,
483    len: usize,
484) -> Canonical {
485    assert_eq!(dtype.nullability(), validity.nullability());
486
487    let n_patch_buffers = values.buffers().len();
488    let mut buffers = values.buffers().to_vec();
489
490    let fill = if let Some(buffer) = &fill_value {
491        buffers.push(buffer.clone());
492        BinaryView::make_view(
493            buffer.as_ref(),
494            u32::try_from(n_patch_buffers).vortex_expect("too many buffers"),
495            0,
496        )
497    } else {
498        // any <=12 character value will do
499        BinaryView::make_view(&[], 0, 0)
500    };
501
502    let mut views = buffer_mut![fill; len];
503    for (patch_index, &patch) in indices.into_iter().zip_eq(values.views().iter()) {
504        let patch_index_usize = <usize as NumCast>::from(patch_index)
505            .vortex_expect("var bin view indices must fit in usize");
506        views[patch_index_usize] = patch;
507    }
508
509    // SAFETY: views are constructed to maintain the invariants
510    let array = unsafe {
511        VarBinViewArray::new_unchecked(views.freeze(), Arc::from(buffers), dtype, validity)
512    };
513
514    Canonical::VarBinView(array)
515}
516
517#[cfg(test)]
518mod test {
519    use std::sync::Arc;
520
521    use rstest::rstest;
522    use vortex_array::IntoArray;
523    use vortex_array::ToCanonical;
524    use vortex_array::arrays::BoolArray;
525    use vortex_array::arrays::DecimalArray;
526    use vortex_array::arrays::FixedSizeListArray;
527    use vortex_array::arrays::ListArray;
528    use vortex_array::arrays::ListViewArray;
529    use vortex_array::arrays::PrimitiveArray;
530    use vortex_array::arrays::StructArray;
531    use vortex_array::arrays::VarBinArray;
532    use vortex_array::arrays::VarBinViewArray;
533    use vortex_array::arrow::IntoArrowArray as _;
534    use vortex_array::assert_arrays_eq;
535    use vortex_array::validity::Validity;
536    use vortex_buffer::ByteBuffer;
537    use vortex_buffer::buffer;
538    use vortex_buffer::buffer_mut;
539    use vortex_dtype::DType;
540    use vortex_dtype::DecimalDType;
541    use vortex_dtype::FieldNames;
542    use vortex_dtype::Nullability::NonNullable;
543    use vortex_dtype::Nullability::Nullable;
544    use vortex_dtype::PType;
545    use vortex_dtype::StructFields;
546    use vortex_mask::Mask;
547    use vortex_scalar::DecimalValue;
548    use vortex_scalar::Scalar;
549
550    use crate::SparseArray;
551
552    #[rstest]
553    #[case(Some(true))]
554    #[case(Some(false))]
555    #[case(None)]
556    fn test_sparse_bool(#[case] fill_value: Option<bool>) {
557        let indices = buffer![0u64, 1, 7].into_array();
558        let values = BoolArray::from_iter([Some(true), None, Some(false)]).into_array();
559        let sparse_bools =
560            SparseArray::try_new(indices, values, 10, Scalar::from(fill_value)).unwrap();
561        let actual = sparse_bools.to_bool();
562
563        let expected = BoolArray::from_iter([
564            Some(true),
565            None,
566            fill_value,
567            fill_value,
568            fill_value,
569            fill_value,
570            fill_value,
571            Some(false),
572            fill_value,
573            fill_value,
574        ]);
575
576        assert_arrays_eq!(actual, expected);
577    }
578
579    #[rstest]
580    #[case(Some(0i32))]
581    #[case(Some(-1i32))]
582    #[case(None)]
583    fn test_sparse_primitive(#[case] fill_value: Option<i32>) {
584        let indices = buffer![0u64, 1, 7].into_array();
585        let values = PrimitiveArray::from_option_iter([Some(0i32), None, Some(1)]).into_array();
586        let sparse_ints =
587            SparseArray::try_new(indices, values, 10, Scalar::from(fill_value)).unwrap();
588        assert_eq!(*sparse_ints.dtype(), DType::Primitive(PType::I32, Nullable));
589
590        let flat_ints = sparse_ints.to_primitive();
591        let expected = PrimitiveArray::from_option_iter([
592            Some(0i32),
593            None,
594            fill_value,
595            fill_value,
596            fill_value,
597            fill_value,
598            fill_value,
599            Some(1),
600            fill_value,
601            fill_value,
602        ]);
603
604        assert_arrays_eq!(&flat_ints, &expected);
605    }
606
607    #[test]
608    fn test_sparse_struct_valid_fill() {
609        let field_names = FieldNames::from_iter(["a", "b"]);
610        let field_types = vec![
611            DType::Primitive(PType::I32, Nullable),
612            DType::Primitive(PType::I32, Nullable),
613        ];
614        let struct_fields = StructFields::new(field_names, field_types);
615        let struct_dtype = DType::Struct(struct_fields.clone(), Nullable);
616
617        let indices = buffer![0u64, 1, 7, 8].into_array();
618        let patch_values_a =
619            PrimitiveArray::from_option_iter([Some(10i32), None, Some(20), Some(30)]).into_array();
620        let patch_values_b =
621            PrimitiveArray::from_option_iter([Some(1i32), Some(2), None, Some(3)]).into_array();
622        let patch_values = StructArray::try_new_with_dtype(
623            vec![patch_values_a, patch_values_b],
624            struct_fields.clone(),
625            4,
626            Validity::Array(
627                BoolArray::from_indices(4, vec![0, 1, 2], Validity::NonNullable).to_array(),
628            ),
629        )
630        .unwrap()
631        .into_array();
632
633        let fill_scalar = Scalar::struct_(
634            struct_dtype,
635            vec![Scalar::from(Some(-10i32)), Scalar::from(Some(-1i32))],
636        );
637        let len = 10;
638        let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
639
640        let expected_a = PrimitiveArray::from_option_iter((0..len).map(|i| {
641            if i == 0 {
642                Some(10)
643            } else if i == 1 {
644                None
645            } else if i == 7 {
646                Some(20)
647            } else {
648                Some(-10)
649            }
650        }));
651        let expected_b = PrimitiveArray::from_option_iter((0..len).map(|i| {
652            if i == 0 {
653                Some(1i32)
654            } else if i == 1 {
655                Some(2)
656            } else if i == 7 {
657                None
658            } else {
659                Some(-1)
660            }
661        }));
662
663        let expected = StructArray::try_new_with_dtype(
664            vec![expected_a.into_array(), expected_b.into_array()],
665            struct_fields,
666            len,
667            // NB: patch indices: [0, 1, 7, 8]; patch validity: [Valid, Valid, Valid, Invalid]; ergo 8 is Invalid.
668            Validity::from_mask(Mask::from_excluded_indices(10, vec![8]), Nullable),
669        )
670        .unwrap()
671        .to_array();
672
673        let actual = sparse_struct.to_struct();
674        assert_arrays_eq!(actual, expected);
675    }
676
677    #[test]
678    fn test_sparse_struct_invalid_fill() {
679        let field_names = FieldNames::from_iter(["a", "b"]);
680        let field_types = vec![
681            DType::Primitive(PType::I32, Nullable),
682            DType::Primitive(PType::I32, Nullable),
683        ];
684        let struct_fields = StructFields::new(field_names, field_types);
685        let struct_dtype = DType::Struct(struct_fields.clone(), Nullable);
686
687        let indices = buffer![0u64, 1, 7, 8].into_array();
688        let patch_values_a =
689            PrimitiveArray::from_option_iter([Some(10i32), None, Some(20), Some(30)]).into_array();
690        let patch_values_b =
691            PrimitiveArray::from_option_iter([Some(1i32), Some(2), None, Some(3)]).into_array();
692        let patch_values = StructArray::try_new_with_dtype(
693            vec![patch_values_a, patch_values_b],
694            struct_fields.clone(),
695            4,
696            Validity::Array(
697                BoolArray::from_indices(4, vec![0, 1, 2], Validity::NonNullable).to_array(),
698            ),
699        )
700        .unwrap()
701        .into_array();
702
703        let fill_scalar = Scalar::null(struct_dtype);
704        let len = 10;
705        let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
706
707        let expected_a = PrimitiveArray::from_option_iter((0..len).map(|i| {
708            if i == 0 {
709                Some(10)
710            } else if i == 1 {
711                None
712            } else if i == 7 {
713                Some(20)
714            } else {
715                Some(-10)
716            }
717        }));
718        let expected_b = PrimitiveArray::from_option_iter((0..len).map(|i| {
719            if i == 0 {
720                Some(1i32)
721            } else if i == 1 {
722                Some(2)
723            } else if i == 7 {
724                None
725            } else {
726                Some(-1)
727            }
728        }));
729
730        let expected = StructArray::try_new_with_dtype(
731            vec![expected_a.into_array(), expected_b.into_array()],
732            struct_fields,
733            len,
734            // NB: patch indices: [0, 1, 7, 8]; patch validity: [Valid, Valid, Valid, Invalid]; ergo 0, 1, 7 are valid.
735            Validity::from_mask(Mask::from_indices(10, vec![0, 1, 7]), Nullable),
736        )
737        .unwrap()
738        .to_array();
739
740        let actual = sparse_struct.to_struct();
741        assert_arrays_eq!(actual, expected);
742    }
743
744    #[test]
745    fn test_sparse_decimal() {
746        let indices = buffer![0u32, 1u32, 7u32, 8u32].into_array();
747        let decimal_dtype = DecimalDType::new(3, 2);
748        let patch_values = DecimalArray::new(
749            buffer![100i128, 200i128, 300i128, 4000i128],
750            decimal_dtype,
751            Validity::from_iter([true, true, true, false]),
752        )
753        .to_array();
754        let len = 10;
755        let fill_scalar = Scalar::decimal(DecimalValue::I32(123), decimal_dtype, Nullable);
756        let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
757
758        let expected = DecimalArray::new(
759            buffer![100i128, 200, 123, 123, 123, 123, 123, 300, 4000, 123],
760            decimal_dtype,
761            // NB: patch indices: [0, 1, 7, 8]; patch validity: [Valid, Valid, Valid, Invalid]; ergo 0, 1, 7 are valid.
762            Validity::from_mask(Mask::from_excluded_indices(10, vec![8]), Nullable),
763        )
764        .to_array()
765        .into_arrow_preferred()
766        .unwrap();
767
768        let actual = sparse_struct
769            .to_decimal()
770            .to_array()
771            .into_arrow_preferred()
772            .unwrap();
773
774        assert_eq!(expected.data_type(), actual.data_type());
775        assert_eq!(&expected, &actual);
776    }
777
778    #[test]
779    fn test_sparse_utf8_varbinview_non_null_fill() {
780        let strings = <VarBinViewArray as FromIterator<_>>::from_iter([
781            Some("hello"),
782            Some("goodbye"),
783            Some("hello"),
784            None,
785            Some("bonjour"),
786            Some("你好"),
787            None,
788        ])
789        .into_array();
790
791        let array = SparseArray::try_new(
792            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
793            strings,
794            12,
795            Scalar::from(Some("123".to_owned())),
796        )
797        .unwrap();
798
799        let actual = array.to_varbinview().into_array();
800        let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
801            Some("hello"),
802            Some("123"),
803            Some("123"),
804            Some("goodbye"),
805            Some("hello"),
806            None,
807            Some("123"),
808            Some("bonjour"),
809            Some("123"),
810            Some("你好"),
811            None,
812            Some("123"),
813        ])
814        .into_array();
815
816        assert_arrays_eq!(actual, expected);
817    }
818
819    #[test]
820    fn test_sparse_utf8_varbinview_null_fill() {
821        let strings = <VarBinViewArray as FromIterator<_>>::from_iter([
822            Some("hello"),
823            Some("goodbye"),
824            Some("hello"),
825            None,
826            Some("bonjour"),
827            Some("你好"),
828            None,
829        ])
830        .into_array();
831
832        let array = SparseArray::try_new(
833            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
834            strings,
835            12,
836            Scalar::null(DType::Utf8(Nullable)),
837        )
838        .unwrap();
839
840        let actual = array.to_varbinview().into_array();
841        let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
842            Some("hello"),
843            None,
844            None,
845            Some("goodbye"),
846            Some("hello"),
847            None,
848            None,
849            Some("bonjour"),
850            None,
851            Some("你好"),
852            None,
853            None,
854        ])
855        .into_array();
856
857        assert_arrays_eq!(actual, expected);
858    }
859
860    #[test]
861    fn test_sparse_utf8_varbinview_non_nullable() {
862        let strings =
863            VarBinViewArray::from_iter_str(["hello", "goodbye", "hello", "bonjour", "你好"])
864                .into_array();
865
866        let array = SparseArray::try_new(
867            buffer![0u16, 3, 4, 5, 8].into_array(),
868            strings,
869            9,
870            Scalar::from("123".to_owned()),
871        )
872        .unwrap();
873
874        let actual = array.to_varbinview().into_array();
875        let expected = VarBinViewArray::from_iter_str([
876            "hello", "123", "123", "goodbye", "hello", "bonjour", "123", "123", "你好",
877        ])
878        .into_array();
879
880        assert_arrays_eq!(actual, expected);
881    }
882
883    #[test]
884    fn test_sparse_utf8_varbin_null_fill() {
885        let strings = <VarBinArray as FromIterator<_>>::from_iter([
886            Some("hello"),
887            Some("goodbye"),
888            Some("hello"),
889            None,
890            Some("bonjour"),
891            Some("你好"),
892            None,
893        ])
894        .into_array();
895
896        let array = SparseArray::try_new(
897            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
898            strings,
899            12,
900            Scalar::null(DType::Utf8(Nullable)),
901        )
902        .unwrap();
903
904        let actual = array.to_varbinview().into_array();
905        let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
906            Some("hello"),
907            None,
908            None,
909            Some("goodbye"),
910            Some("hello"),
911            None,
912            None,
913            Some("bonjour"),
914            None,
915            Some("你好"),
916            None,
917            None,
918        ])
919        .into_array();
920
921        assert_arrays_eq!(actual, expected);
922    }
923
924    #[test]
925    fn test_sparse_binary_varbinview_non_null_fill() {
926        let binaries = VarBinViewArray::from_iter_nullable_bin([
927            Some(b"hello" as &[u8]),
928            Some(b"goodbye"),
929            Some(b"hello"),
930            None,
931            Some(b"\x00"),
932            Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
933            None,
934        ])
935        .into_array();
936
937        let array = SparseArray::try_new(
938            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
939            binaries,
940            12,
941            Scalar::from(Some(ByteBuffer::from(b"123".to_vec()))),
942        )
943        .unwrap();
944
945        let actual = array.to_varbinview().into_array();
946        let expected = VarBinViewArray::from_iter_nullable_bin([
947            Some(b"hello" as &[u8]),
948            Some(b"123"),
949            Some(b"123"),
950            Some(b"goodbye"),
951            Some(b"hello"),
952            None,
953            Some(b"123"),
954            Some(b"\x00"),
955            Some(b"123"),
956            Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
957            None,
958            Some(b"123"),
959        ])
960        .into_array();
961
962        assert_arrays_eq!(actual, expected);
963    }
964
965    #[test]
966    fn test_sparse_list_null_fill() {
967        // Use ListViewArray consistently
968        let elements = buffer![1i32, 2, 1, 2].into_array();
969        // Create ListView with offsets and sizes
970        // List 0: [1] at offset 0, size 1
971        // List 1: [2] at offset 1, size 1
972        // List 2: [1] at offset 2, size 1
973        // List 3: [2] at offset 3, size 1
974        let offsets = buffer![0u32, 1, 2, 3].into_array();
975        let sizes = buffer![1u32, 1, 1, 1].into_array();
976        let lists = unsafe {
977            ListViewArray::new_unchecked(elements, offsets, sizes, Validity::AllValid)
978                .with_zero_copy_to_list(true)
979        }
980        .into_array();
981
982        let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
983        let fill_value = Scalar::null(lists.dtype().clone());
984        let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
985            .unwrap()
986            .into_array();
987
988        let actual = sparse.to_canonical().into_array();
989        let result_listview = actual.to_listview();
990
991        // Check the structure
992        assert_eq!(result_listview.len(), 6);
993
994        // Verify sizes: positions 0,3,4,5 have data, positions 1,2 are null
995        assert_eq!(result_listview.size_at(0), 1); // [1]
996        assert_eq!(result_listview.size_at(1), 0); // null
997        assert_eq!(result_listview.size_at(2), 0); // null
998        assert_eq!(result_listview.size_at(3), 1); // [2]
999        assert_eq!(result_listview.size_at(4), 1); // [1]
1000        assert_eq!(result_listview.size_at(5), 1); // [2]
1001
1002        // Verify actual values
1003        let elements_array = result_listview.elements().to_primitive();
1004        let elements_slice = elements_array.as_slice::<i32>();
1005
1006        let list0_offset = result_listview.offset_at(0);
1007        assert_eq!(elements_slice[list0_offset], 1);
1008
1009        let list3_offset = result_listview.offset_at(3);
1010        assert_eq!(elements_slice[list3_offset], 2);
1011
1012        let list4_offset = result_listview.offset_at(4);
1013        assert_eq!(elements_slice[list4_offset], 1);
1014
1015        let list5_offset = result_listview.offset_at(5);
1016        assert_eq!(elements_slice[list5_offset], 2);
1017    }
1018
1019    #[test]
1020    fn test_sparse_list_null_fill_sliced_sparse_values() {
1021        // Create ListViewArray with 8 elements forming 8 single-element lists
1022        let elements = buffer![1i32, 2, 1, 2, 1, 2, 1, 2].into_array();
1023        let offsets = buffer![0u32, 1, 2, 3, 4, 5, 6, 7].into_array();
1024        let sizes = buffer![1u32, 1, 1, 1, 1, 1, 1, 1].into_array();
1025        let lists = unsafe {
1026            ListViewArray::new_unchecked(elements, offsets, sizes, Validity::AllValid)
1027                .with_zero_copy_to_list(true)
1028        }
1029        .into_array();
1030
1031        // Slice to get lists 2..6, which are: [1], [2], [1], [2]
1032        let lists = lists.slice(2..6);
1033
1034        let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
1035        let fill_value = Scalar::null(lists.dtype().clone());
1036        let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
1037            .unwrap()
1038            .into_array();
1039
1040        let actual = sparse.to_canonical().into_array();
1041        let result_listview = actual.to_listview();
1042
1043        // Check the structure
1044        assert_eq!(result_listview.len(), 6);
1045
1046        // Verify sizes: positions 0,3,4,5 have data (from the sliced lists), positions 1,2 are null
1047        assert_eq!(result_listview.size_at(0), 1); // [1] - from slice index 0 (original index 2)
1048        assert_eq!(result_listview.size_at(1), 0); // null
1049        assert_eq!(result_listview.size_at(2), 0); // null
1050        assert_eq!(result_listview.size_at(3), 1); // [2] - from slice index 3 (original index 5)
1051        assert_eq!(result_listview.size_at(4), 1); // [1] - extra element beyond original slice
1052        assert_eq!(result_listview.size_at(5), 1); // [2] - extra element beyond original slice
1053
1054        // Verify actual values
1055        let elements_array = result_listview.elements().to_primitive();
1056        let elements_slice = elements_array.as_slice::<i32>();
1057
1058        let list0_offset = result_listview.offset_at(0);
1059        assert_eq!(elements_slice[list0_offset], 1);
1060
1061        let list3_offset = result_listview.offset_at(3);
1062        assert_eq!(elements_slice[list3_offset], 2);
1063    }
1064
1065    #[test]
1066    fn test_sparse_list_non_null_fill() {
1067        // Create ListViewArray with 4 single-element lists
1068        let elements = buffer![1i32, 2, 1, 2].into_array();
1069        let offsets = buffer![0u32, 1, 2, 3].into_array();
1070        let sizes = buffer![1u32, 1, 1, 1].into_array();
1071        let lists = unsafe {
1072            ListViewArray::new_unchecked(elements, offsets, sizes, Validity::AllValid)
1073                .with_zero_copy_to_list(true)
1074        }
1075        .into_array();
1076
1077        let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
1078        let fill_value = Scalar::from(Some(vec![5i32, 6, 7, 8]));
1079        let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
1080            .unwrap()
1081            .into_array();
1082
1083        let actual = sparse.to_canonical().into_array();
1084        let result_listview = actual.to_listview();
1085
1086        // Check the structure
1087        assert_eq!(result_listview.len(), 6);
1088
1089        // Verify sizes: positions 0,3,4,5 have sparse data, positions 1,2 have fill values
1090        assert_eq!(result_listview.size_at(0), 1); // [1] from sparse
1091        assert_eq!(result_listview.size_at(1), 4); // [5,6,7,8] fill value
1092        assert_eq!(result_listview.size_at(2), 4); // [5,6,7,8] fill value
1093        assert_eq!(result_listview.size_at(3), 1); // [2] from sparse
1094        assert_eq!(result_listview.size_at(4), 1); // [1] from sparse
1095        assert_eq!(result_listview.size_at(5), 1); // [2] from sparse
1096
1097        // Verify actual values
1098        let elements_array = result_listview.elements().to_primitive();
1099        let elements_slice = elements_array.as_slice::<i32>();
1100
1101        // List 0: [1]
1102        let list0_offset = result_listview.offset_at(0) as usize;
1103        assert_eq!(elements_slice[list0_offset], 1);
1104
1105        // List 1: [5,6,7,8]
1106        let list1_offset = result_listview.offset_at(1) as usize;
1107        let list1_size = result_listview.size_at(1) as usize;
1108        assert_eq!(
1109            &elements_slice[list1_offset..list1_offset + list1_size],
1110            &[5, 6, 7, 8]
1111        );
1112
1113        // List 2: [5,6,7,8]
1114        let list2_offset = result_listview.offset_at(2) as usize;
1115        let list2_size = result_listview.size_at(2) as usize;
1116        assert_eq!(
1117            &elements_slice[list2_offset..list2_offset + list2_size],
1118            &[5, 6, 7, 8]
1119        );
1120
1121        // List 3: [2]
1122        let list3_offset = result_listview.offset_at(3) as usize;
1123        assert_eq!(elements_slice[list3_offset], 2);
1124
1125        // List 4: [1]
1126        let list4_offset = result_listview.offset_at(4) as usize;
1127        assert_eq!(elements_slice[list4_offset], 1);
1128
1129        // List 5: [2]
1130        let list5_offset = result_listview.offset_at(5) as usize;
1131        assert_eq!(elements_slice[list5_offset], 2);
1132    }
1133
1134    #[test]
1135    fn test_sparse_binary_varbin_null_fill() {
1136        let strings = <VarBinArray as FromIterator<_>>::from_iter([
1137            Some(b"hello" as &[u8]),
1138            Some(b"goodbye"),
1139            Some(b"hello"),
1140            None,
1141            Some(b"\x00"),
1142            Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
1143            None,
1144        ])
1145        .into_array();
1146
1147        let array = SparseArray::try_new(
1148            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
1149            strings,
1150            12,
1151            Scalar::null(DType::Binary(Nullable)),
1152        )
1153        .unwrap();
1154
1155        let actual = array.to_varbinview().into_array();
1156        let expected = VarBinViewArray::from_iter_nullable_bin([
1157            Some(b"hello" as &[u8]),
1158            None,
1159            None,
1160            Some(b"goodbye"),
1161            Some(b"hello"),
1162            None,
1163            None,
1164            Some(b"\x00"),
1165            None,
1166            Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
1167            None,
1168            None,
1169        ])
1170        .into_array();
1171
1172        assert_arrays_eq!(actual, expected);
1173    }
1174
1175    #[test]
1176    fn test_sparse_fixed_size_list_null_fill() {
1177        // Create a FixedSizeListArray with 3 lists of size 3.
1178        let elements = buffer![1i32, 2, 3, 4, 5, 6, 7, 8, 9].into_array();
1179        let fsl = FixedSizeListArray::try_new(elements, 3, Validity::AllValid, 3)
1180            .unwrap()
1181            .into_array();
1182
1183        let indices = buffer![0u8, 2u8, 3u8].into_array();
1184        let fill_value = Scalar::null(DType::FixedSizeList(
1185            Arc::new(DType::Primitive(PType::I32, NonNullable)),
1186            3,
1187            Nullable,
1188        ));
1189        let sparse = SparseArray::try_new(indices, fsl, 5, fill_value)
1190            .unwrap()
1191            .into_array();
1192
1193        let actual = sparse.to_canonical().into_array();
1194
1195        // Expected: [1,2,3], null, [4,5,6], [7,8,9], null.
1196        let expected_elements =
1197            buffer![1i32, 2, 3, 0, 0, 0, 4, 5, 6, 7, 8, 9, 0, 0, 0].into_array();
1198        let expected = FixedSizeListArray::try_new(
1199            expected_elements,
1200            3,
1201            Validity::Array(BoolArray::from_iter([true, false, true, true, false]).into_array()),
1202            5,
1203        )
1204        .unwrap()
1205        .into_array();
1206
1207        assert_arrays_eq!(actual, expected);
1208    }
1209
1210    #[test]
1211    fn test_sparse_fixed_size_list_non_null_fill() {
1212        let elements = buffer![1i32, 2, 3, 4, 5, 6].into_array();
1213        let fsl = FixedSizeListArray::try_new(elements, 2, Validity::AllValid, 3)
1214            .unwrap()
1215            .into_array();
1216
1217        let indices = buffer![0u8, 2u8, 4u8].into_array();
1218        let fill_value = Scalar::fixed_size_list(
1219            Arc::new(DType::Primitive(PType::I32, NonNullable)),
1220            vec![
1221                Scalar::primitive(99i32, NonNullable),
1222                Scalar::primitive(88i32, NonNullable),
1223            ],
1224            NonNullable,
1225        );
1226        let sparse = SparseArray::try_new(indices, fsl, 6, fill_value)
1227            .unwrap()
1228            .into_array();
1229
1230        let actual = sparse.to_canonical().into_array();
1231
1232        // Expected: [1,2], [99,88], [3,4], [99,88], [5,6], [99,88].
1233        let expected_elements = buffer![1i32, 2, 99, 88, 3, 4, 99, 88, 5, 6, 99, 88].into_array();
1234        let expected = FixedSizeListArray::try_new(expected_elements, 2, Validity::NonNullable, 6)
1235            .unwrap()
1236            .into_array();
1237
1238        assert_arrays_eq!(actual, expected);
1239    }
1240
1241    #[test]
1242    fn test_sparse_fixed_size_list_with_validity() {
1243        // Create FSL values with some nulls.
1244        let elements = buffer![10i32, 20, 30, 40, 50, 60].into_array();
1245        let fsl = FixedSizeListArray::try_new(
1246            elements,
1247            2,
1248            Validity::Array(BoolArray::from_iter([true, false, true]).into_array()),
1249            3,
1250        )
1251        .unwrap()
1252        .into_array();
1253
1254        let indices = buffer![1u16, 3u16, 4u16].into_array();
1255        let fill_value = Scalar::fixed_size_list(
1256            Arc::new(DType::Primitive(PType::I32, NonNullable)),
1257            vec![
1258                Scalar::primitive(7i32, NonNullable),
1259                Scalar::primitive(8i32, NonNullable),
1260            ],
1261            Nullable,
1262        );
1263        let sparse = SparseArray::try_new(indices, fsl, 6, fill_value)
1264            .unwrap()
1265            .into_array();
1266
1267        let actual = sparse.to_canonical().into_array();
1268
1269        // Expected validity: [true, true, true, false, true, true].
1270        // Expected elements: [7,8], [10,20], [7,8], [30,40], [50,60], [7,8].
1271        let expected_elements = buffer![7i32, 8, 10, 20, 7, 8, 30, 40, 50, 60, 7, 8].into_array();
1272        let expected = FixedSizeListArray::try_new(
1273            expected_elements,
1274            2,
1275            Validity::Array(
1276                BoolArray::from_iter([true, true, true, false, true, true]).into_array(),
1277            ),
1278            6,
1279        )
1280        .unwrap()
1281        .into_array();
1282
1283        assert_arrays_eq!(actual, expected);
1284    }
1285
1286    #[test]
1287    fn test_sparse_fixed_size_list_truly_sparse() {
1288        // Test with a truly sparse array where most values are the fill value.
1289        // This demonstrates the compression benefit of sparse encoding.
1290
1291        // Create patch values: only 3 distinct lists out of 100 total positions.
1292        let elements = buffer![10i32, 11, 20, 21, 30, 31].into_array();
1293        let fsl = FixedSizeListArray::try_new(elements, 2, Validity::AllValid, 3)
1294            .unwrap()
1295            .into_array();
1296
1297        // Patches at positions 5, 50, and 95 out of 100.
1298        let indices = buffer![5u32, 50, 95].into_array();
1299
1300        // Fill value [99, 99] will appear 97 times but stored only once.
1301        let fill_value = Scalar::fixed_size_list(
1302            Arc::new(DType::Primitive(PType::I32, NonNullable)),
1303            vec![
1304                Scalar::primitive(99i32, NonNullable),
1305                Scalar::primitive(99i32, NonNullable),
1306            ],
1307            NonNullable,
1308        );
1309
1310        let sparse = SparseArray::try_new(indices, fsl, 100, fill_value)
1311            .unwrap()
1312            .into_array();
1313
1314        let actual = sparse.to_canonical().into_array();
1315
1316        // Build expected: 97 copies of [99,99] with patches at positions 5, 50, 95.
1317        let mut expected_elements_vec = Vec::with_capacity(200);
1318        // Positions 0-4: fill values
1319        for _ in 0..5 {
1320            expected_elements_vec.extend([99i32, 99]);
1321        }
1322        // Position 5: first patch [10, 11]
1323        expected_elements_vec.extend([10, 11]);
1324        // Positions 6-49: fill values
1325        for _ in 6..50 {
1326            expected_elements_vec.extend([99, 99]);
1327        }
1328        // Position 50: second patch [20, 21]
1329        expected_elements_vec.extend([20, 21]);
1330        // Positions 51-94: fill values
1331        for _ in 51..95 {
1332            expected_elements_vec.extend([99, 99]);
1333        }
1334        // Position 95: third patch [30, 31]
1335        expected_elements_vec.extend([30, 31]);
1336        // Positions 96-99: fill values
1337        for _ in 96..100 {
1338            expected_elements_vec.extend([99, 99]);
1339        }
1340        let expected_elements = PrimitiveArray::from_iter(expected_elements_vec).into_array();
1341        let expected =
1342            FixedSizeListArray::try_new(expected_elements, 2, Validity::NonNullable, 100)
1343                .unwrap()
1344                .into_array();
1345
1346        assert_arrays_eq!(actual, expected);
1347    }
1348
1349    #[test]
1350    fn test_sparse_fixed_size_list_single_element() {
1351        // Test with a single element FSL array.
1352        let elements = buffer![42i32, 43].into_array();
1353        let fsl = FixedSizeListArray::try_new(elements, 2, Validity::AllValid, 1)
1354            .unwrap()
1355            .into_array();
1356
1357        let indices = buffer![0u32].into_array();
1358        let fill_value = Scalar::fixed_size_list(
1359            Arc::new(DType::Primitive(PType::I32, NonNullable)),
1360            vec![
1361                Scalar::primitive(1i32, NonNullable),
1362                Scalar::primitive(2i32, NonNullable),
1363            ],
1364            NonNullable,
1365        );
1366        let sparse = SparseArray::try_new(indices, fsl, 1, fill_value)
1367            .unwrap()
1368            .into_array();
1369
1370        let actual = sparse.to_canonical().into_array();
1371
1372        // Expected: just [42, 43].
1373        let expected_elements = buffer![42i32, 43].into_array();
1374        let expected = FixedSizeListArray::try_new(expected_elements, 2, Validity::NonNullable, 1)
1375            .unwrap()
1376            .into_array();
1377
1378        assert_arrays_eq!(actual, expected);
1379    }
1380
1381    #[test]
1382    fn test_sparse_list_grows_offset_type() {
1383        let elements = buffer![1i32, 2, 1, 2].into_array();
1384        let offsets = buffer![0u8, 1, 2, 3, 4].into_array();
1385        let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1386            .unwrap()
1387            .into_array();
1388
1389        let indices = buffer![0u8, 1u8, 2u8, 3u8].into_array();
1390        let fill_value = Scalar::from(Some(vec![42i32; 252])); // 252 + 4 elements = 256 > u8::MAX
1391        let sparse = SparseArray::try_new(indices, lists, 5, fill_value)
1392            .unwrap()
1393            .into_array();
1394
1395        let actual = sparse.to_canonical().into_array();
1396        let mut expected_elements = buffer_mut![1, 2, 1, 2];
1397        expected_elements.extend(buffer![42i32; 252]);
1398        let expected = ListArray::try_new(
1399            expected_elements.freeze().into_array(),
1400            buffer![0u16, 1, 2, 3, 4, 256].into_array(),
1401            Validity::AllValid,
1402        )
1403        .unwrap()
1404        .into_array();
1405
1406        assert_eq!(
1407            actual.to_listview().offsets().dtype(),
1408            &DType::Primitive(PType::U16, NonNullable)
1409        );
1410        assert_arrays_eq!(&actual, &expected);
1411
1412        // Note that the preferred arrow list representation is `List` (not `ListView`).
1413        let arrow_dtype = expected.dtype().to_arrow_dtype().unwrap();
1414        let actual = actual.into_arrow(&arrow_dtype).unwrap();
1415        let expected = expected.into_arrow(&arrow_dtype).unwrap();
1416
1417        assert_eq!(actual.data_type(), expected.data_type());
1418    }
1419
1420    #[test]
1421    fn test_sparse_listview_null_fill_with_gaps() {
1422        // This test specifically catches the bug where the old implementation
1423        // incorrectly tracked `last_valid_offset` as the START of the last list
1424        // instead of properly handling ListView's offset/size pairs.
1425
1426        // Create a ListViewArray with non-trivial offsets and sizes.
1427        // Elements: [10, 11, 12, 20, 21, 30, 31, 32, 33]
1428        // List 0: elements[0..3] = [10, 11, 12]
1429        // List 1: elements[3..5] = [20, 21]
1430        // List 2: elements[5..9] = [30, 31, 32, 33]
1431        let elements = buffer![10i32, 11, 12, 20, 21, 30, 31, 32, 33].into_array();
1432        let offsets = buffer![0u32, 3, 5].into_array();
1433        let sizes = buffer![3u32, 2, 4].into_array();
1434
1435        let list_view = unsafe {
1436            ListViewArray::new_unchecked(elements.clone(), offsets, sizes, Validity::AllValid)
1437                .with_zero_copy_to_list(true)
1438        };
1439
1440        let list_dtype = list_view.dtype().clone();
1441
1442        // Create sparse array with indices [1, 4, 7] and length 10
1443        // This means we have:
1444        // - Index 0: null
1445        // - Index 1: List 0 [10, 11, 12]
1446        // - Index 2-3: null
1447        // - Index 4: List 1 [20, 21]
1448        // - Index 5-6: null
1449        // - Index 7: List 2 [30, 31, 32, 33]
1450        // - Index 8-9: null
1451        let indices = buffer![1u8, 4, 7].into_array();
1452        let sparse = SparseArray::try_new(
1453            indices,
1454            list_view.into_array(),
1455            10,
1456            Scalar::null(list_dtype),
1457        )
1458        .unwrap();
1459
1460        // Convert to canonical form - this triggers the function we're testing
1461        let canonical = sparse.to_canonical().into_array();
1462        let result_listview = canonical.to_listview();
1463
1464        // Verify the structure
1465        assert_eq!(result_listview.len(), 10);
1466
1467        // Helper to get list values at an index
1468        let get_list_values = |idx: usize| -> Vec<i32> {
1469            let offset = result_listview.offset_at(idx);
1470            let size = result_listview.size_at(idx);
1471            if size == 0 {
1472                vec![] // null/empty list
1473            } else {
1474                let elements = result_listview.elements().to_primitive();
1475                let slice = elements.as_slice::<i32>();
1476                slice[offset..offset + size].to_vec()
1477            }
1478        };
1479
1480        let empty: Vec<i32> = vec![];
1481
1482        // Verify all list values
1483        assert_eq!(get_list_values(0), empty); // null
1484        assert_eq!(get_list_values(1), vec![10, 11, 12]); // sparse index 0
1485        assert_eq!(get_list_values(2), empty); // null
1486        assert_eq!(get_list_values(3), empty); // null
1487        assert_eq!(get_list_values(4), vec![20, 21]); // sparse index 1
1488        assert_eq!(get_list_values(5), empty); // null
1489        assert_eq!(get_list_values(6), empty); // null
1490        assert_eq!(get_list_values(7), vec![30, 31, 32, 33]); // sparse index 2
1491        assert_eq!(get_list_values(8), empty); // null
1492        assert_eq!(get_list_values(9), empty); // null
1493    }
1494
1495    #[test]
1496    fn test_sparse_listview_sliced_values_null_fill() {
1497        // This test uses sliced ListView values to ensure proper handling
1498        // of non-zero starting offsets in the source data.
1499
1500        // Create a larger ListViewArray and then slice it
1501        // Original elements: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
1502        let elements = buffer![0i32, 1, 2, 3, 4, 5, 6, 7, 8, 9].into_array();
1503
1504        // Create 5 lists with different offsets and sizes
1505        // List 0: [0, 1] at offset 0
1506        // List 1: [2, 3, 4] at offset 2
1507        // List 2: [5] at offset 5
1508        // List 3: [6, 7] at offset 6
1509        // List 4: [8, 9] at offset 8
1510        let offsets = buffer![0u32, 2, 5, 6, 8].into_array();
1511        let sizes = buffer![2u32, 3, 1, 2, 2].into_array();
1512
1513        let full_listview = unsafe {
1514            ListViewArray::new_unchecked(elements, offsets, sizes, Validity::AllValid)
1515                .with_zero_copy_to_list(true)
1516        }
1517        .into_array();
1518
1519        // Slice to get lists 1, 2, 3 (indices 1..4)
1520        // This gives us lists with elements:
1521        // - Index 0: [2, 3, 4] (original list 1)
1522        // - Index 1: [5] (original list 2)
1523        // - Index 2: [6, 7] (original list 3)
1524        let sliced = full_listview.slice(1..4);
1525
1526        // Create sparse array with indices [0, 1] and length 5
1527        // Expected result:
1528        // - Index 0: [2, 3, 4] (from sliced[0])
1529        // - Index 1: [5] (from sliced[1])
1530        // - Index 2: null
1531        // - Index 3: null
1532        // - Index 4: null
1533        let indices = buffer![0u8, 1].into_array();
1534        // Extract only the values we need from the sliced array
1535        let values = sliced.slice(0..2);
1536        let sparse =
1537            SparseArray::try_new(indices, values, 5, Scalar::null(sliced.dtype().clone())).unwrap();
1538
1539        let canonical = sparse.to_canonical().into_array();
1540        let result_listview = canonical.to_listview();
1541
1542        assert_eq!(result_listview.len(), 5);
1543
1544        // Helper to get list values at an index
1545        let get_list_values = |idx: usize| -> Vec<i32> {
1546            let offset = result_listview.offset_at(idx);
1547            let size = result_listview.size_at(idx);
1548            if size == 0 {
1549                vec![] // null/empty list
1550            } else {
1551                let elements = result_listview.elements().to_primitive();
1552                let slice = elements.as_slice::<i32>();
1553                slice[offset..offset + size].to_vec()
1554            }
1555        };
1556
1557        let empty: Vec<i32> = vec![];
1558
1559        // Verify all list values
1560        // Original slice had lists at indices 1,2,3 which were: [2,3,4], [5], [6,7]
1561        // We take indices 0 and 1 from the slice
1562        assert_eq!(get_list_values(0), vec![2, 3, 4]); // From slice index 0 (original list 1)
1563        assert_eq!(get_list_values(1), vec![5]); // From slice index 1 (original list 2)
1564        assert_eq!(get_list_values(2), empty); // null
1565        assert_eq!(get_list_values(3), empty); // null
1566        assert_eq!(get_list_values(4), empty); // null
1567
1568        // The bug in the old implementation would have incorrectly used
1569        // offsets[sparse_index] for filling nulls, which would be wrong
1570        // when dealing with sliced arrays that have non-zero starting offsets.
1571    }
1572}