vortex_sparse/
canonical.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::sync::Arc;
5
6use itertools::Itertools;
7use num_traits::NumCast;
8use vortex_array::arrays::{
9    BinaryView, BoolArray, BooleanBuffer, ConstantArray, ListArray, NullArray, OffsetPType,
10    PrimitiveArray, StructArray, VarBinViewArray, smallest_storage_type,
11};
12use vortex_array::builders::{
13    ArrayBuilder as _, ArrayBuilderExt, DecimalBuilder, ListBuilder, builder_with_capacity,
14};
15use vortex_array::patches::Patches;
16use vortex_array::validity::Validity;
17use vortex_array::vtable::CanonicalVTable;
18use vortex_array::{Array, ArrayRef, Canonical, IntoArray as _, ToCanonical as _};
19use vortex_buffer::{Buffer, BufferMut, BufferString, ByteBuffer, buffer, buffer_mut};
20use vortex_dtype::{
21    DType, DecimalDType, NativePType, Nullability, StructFields, match_each_integer_ptype,
22    match_each_native_ptype,
23};
24use vortex_error::{VortexError, VortexExpect as _, VortexResult, vortex_err};
25use vortex_scalar::{
26    DecimalScalar, ListScalar, NativeDecimalType, Scalar, StructScalar,
27    match_each_decimal_value_type,
28};
29
30use crate::{SparseArray, SparseVTable};
31
32impl CanonicalVTable<SparseVTable> for SparseVTable {
33    fn canonicalize(array: &SparseArray) -> VortexResult<Canonical> {
34        if array.patches().num_patches() == 0 {
35            return ConstantArray::new(array.fill_scalar().clone(), array.len()).to_canonical();
36        }
37
38        match array.dtype() {
39            DType::Null => {
40                assert!(array.fill_scalar().is_null());
41                Ok(Canonical::Null(NullArray::new(array.len())))
42            }
43            DType::Bool(..) => {
44                let resolved_patches = array.resolved_patches()?;
45                canonicalize_sparse_bools(&resolved_patches, array.fill_scalar())
46            }
47            DType::Primitive(ptype, ..) => {
48                let resolved_patches = array.resolved_patches()?;
49                match_each_native_ptype!(ptype, |P| {
50                    canonicalize_sparse_primitives::<P>(&resolved_patches, array.fill_scalar())
51                })
52            }
53            DType::Struct(struct_fields, ..) => canonicalize_sparse_struct(
54                struct_fields,
55                array.fill_scalar().as_struct(),
56                array.dtype(),
57                array.patches(),
58                array.len(),
59            ),
60            DType::Decimal(decimal_dtype, nullability) => {
61                let canonical_decimal_value_type = smallest_storage_type(decimal_dtype);
62                let fill_value = array.fill_scalar().as_decimal();
63                match_each_decimal_value_type!(canonical_decimal_value_type, |D| {
64                    canonicalize_sparse_decimal::<D>(
65                        *decimal_dtype,
66                        *nullability,
67                        fill_value,
68                        array.patches(),
69                        array.len(),
70                    )
71                })
72            }
73            dtype @ DType::Utf8(..) => {
74                let fill_value = array.fill_scalar().as_utf8().value();
75                let fill_value = fill_value.map(BufferString::into_inner);
76                canonicalize_varbin(array, dtype.clone(), fill_value)
77            }
78            dtype @ DType::Binary(..) => {
79                let fill_value = array.fill_scalar().as_binary().value();
80                canonicalize_varbin(array, dtype.clone(), fill_value)
81            }
82            DType::List(values_dtype, nullability) => {
83                let resolved_patches = array.resolved_patches()?;
84                canonicalize_sparse_lists(
85                    array,
86                    resolved_patches,
87                    values_dtype.clone(),
88                    *nullability,
89                )
90            }
91            DType::Extension(_ext_dtype) => todo!(),
92        }
93    }
94}
95
96/// The elements of this [ListScalar] as an array or `None` if scalar is null.
97fn list_scalar_to_elements_array(scalar: ListScalar) -> VortexResult<Option<ArrayRef>> {
98    let Some(elements) = scalar.elements() else {
99        return Ok(None);
100    };
101
102    let mut builder = builder_with_capacity(scalar.element_dtype(), scalar.len());
103    for s in elements {
104        builder.append_scalar(&s)?;
105    }
106    Ok(Some(builder.finish()))
107}
108
109/// Create a list-typed array containing one element, scalar, or `None` if scalar is null.
110fn list_scalar_to_singleton_list_array(scalar: ListScalar) -> VortexResult<Option<ArrayRef>> {
111    let nullability = scalar.dtype().nullability();
112    let Some(elements) = list_scalar_to_elements_array(scalar)? else {
113        return Ok(None);
114    };
115
116    let validity = match nullability {
117        Nullability::NonNullable => Validity::NonNullable,
118        Nullability::Nullable => Validity::AllValid,
119    };
120
121    let n = elements.len();
122    ListArray::try_new(elements, buffer![0_u64, n as u64].into_array(), validity)
123        .map(|x| Some(x.into_array()))
124}
125
126#[allow(clippy::cognitive_complexity)]
127fn canonicalize_sparse_lists(
128    array: &SparseArray,
129    resolved_patches: Patches,
130    values_dtype: Arc<DType>,
131    nullability: Nullability,
132) -> VortexResult<Canonical> {
133    macro_rules! match_smallest_offset_type {
134        ($n_elements:expr, | $offset_type:ident | $body:block) => {{
135            let n_elements = $n_elements;
136            if n_elements <= u8::MAX as usize {
137                type $offset_type = u8;
138                $body
139            } else if n_elements <= u16::MAX as usize {
140                type $offset_type = u16;
141                $body
142            } else if n_elements <= u32::MAX as usize {
143                type $offset_type = u32;
144                $body
145            } else {
146                assert!(u64::try_from(n_elements).is_ok());
147                type $offset_type = u64;
148                $body
149            }
150        }};
151    }
152
153    let indices = resolved_patches.indices().to_primitive()?;
154    let values = resolved_patches.values().to_list()?;
155    let fill_value = array.fill_scalar().as_list();
156
157    let n_filled = array.len() - resolved_patches.num_patches();
158    let total_canonical_values = values.elements().len() + fill_value.len() * n_filled;
159
160    let validity = array
161        .validity_mask()
162        .map(|x| Validity::from_mask(x, nullability))?;
163
164    match_each_integer_ptype!(indices.ptype(), |I| {
165        match_smallest_offset_type!(total_canonical_values, |O| {
166            canonicalize_sparse_lists_inner::<I, O>(
167                indices.as_slice(),
168                values,
169                fill_value,
170                values_dtype,
171                array.len(),
172                total_canonical_values,
173                validity,
174            )
175        })
176    })
177}
178
179fn canonicalize_sparse_lists_inner<I: NativePType, SmallestViableOffsetType: OffsetPType>(
180    indices: &[I],
181    values: ListArray,
182    fill_value: ListScalar,
183    values_dtype: Arc<DType>,
184    len: usize,
185    total_canonical_values: usize,
186    validity: Validity,
187) -> VortexResult<Canonical> {
188    let Some(fill_value_array) = list_scalar_to_singleton_list_array(fill_value)? else {
189        let sparse_list_elements = values.elements().clone();
190        let sparse_list_offsets = values.offsets().to_primitive()?;
191        match_each_integer_ptype!(sparse_list_offsets.ptype(), |SparseValuesOffsetType| {
192            let sparse_list_offsets = sparse_list_offsets.as_slice::<SparseValuesOffsetType>();
193            // If the values are a small slice of a large array, their offsets may not fit in
194            // SmallestViableOffsetType. We avoid a copy by reusing values.elements(), but we
195            // therefore must use the offset type of values.offsets().
196            return canonicalize_sparse_lists_inner_with_null_fill_value(
197                indices,
198                sparse_list_elements,
199                sparse_list_offsets,
200                len,
201                validity,
202            );
203        });
204    };
205
206    let mut builder = ListBuilder::<SmallestViableOffsetType>::with_values_and_index_capacity(
207        values_dtype,
208        validity.nullability(),
209        total_canonical_values,
210        len + 1,
211    );
212    let mut next_index = 0_usize;
213    let enumerated_indices_usize = indices
214        .iter()
215        .map(|x| (*x).to_usize().vortex_expect("index must fit in usize"))
216        .enumerate();
217    for (patch_values_index, next_patched_index) in enumerated_indices_usize {
218        for _ in next_index..next_patched_index {
219            builder.extend_from_array(&fill_value_array)?;
220        }
221        builder.extend_from_array(&values.slice(patch_values_index, patch_values_index + 1))?;
222        next_index = next_patched_index + 1;
223    }
224
225    for _ in next_index..len {
226        builder.extend_from_array(&fill_value_array)?;
227    }
228
229    builder.finish().to_canonical()
230}
231
232fn canonicalize_sparse_lists_inner_with_null_fill_value<I: NativePType, O: OffsetPType>(
233    indices: &[I],
234    elements: ArrayRef,
235    offsets: &[O],
236    len: usize,
237    validity: Validity,
238) -> VortexResult<Canonical> {
239    assert!(indices.len() < len + 1);
240    let mut dense_offsets = BufferMut::with_capacity(len + 1);
241
242    // We cannot use zero because elements may have leading junk values (e.g. the result of a slice).
243    dense_offsets.push(offsets[0]);
244    let mut dense_last_set_index = 0_usize;
245    for (sparse_start_index, dense_start_index) in indices.iter().enumerate() {
246        let sparse_end_index = sparse_start_index + 1;
247        let dense_start_index = (*dense_start_index)
248            .to_usize()
249            .vortex_expect("index must fit in usize");
250        let dense_end_index = dense_start_index + 1;
251
252        for _ in (dense_last_set_index + 1)..dense_end_index {
253            // For each null list, copy-forward the old index. These empty lists are masked by the validity.
254            dense_offsets.push(dense_offsets[dense_last_set_index]);
255        }
256        dense_offsets.push(offsets[sparse_end_index]);
257        dense_last_set_index = dense_end_index;
258    }
259    for _ in (dense_last_set_index + 1)..len {
260        // For each null list, copy-forward the old index. These empty lists are masked by the validity.
261        dense_offsets.push(dense_offsets[dense_last_set_index]);
262    }
263    let array = ListArray::try_new(elements, dense_offsets.into_array(), validity)?;
264    Ok(Canonical::List(array))
265}
266
267fn canonicalize_sparse_bools(patches: &Patches, fill_value: &Scalar) -> VortexResult<Canonical> {
268    let (fill_bool, validity) = if fill_value.is_null() {
269        (false, Validity::AllInvalid)
270    } else {
271        (
272            fill_value.try_into()?,
273            if patches.dtype().nullability() == Nullability::NonNullable {
274                Validity::NonNullable
275            } else {
276                Validity::AllValid
277            },
278        )
279    };
280
281    let bools = BoolArray::new(
282        if fill_bool {
283            BooleanBuffer::new_set(patches.array_len())
284        } else {
285            BooleanBuffer::new_unset(patches.array_len())
286        },
287        validity,
288    );
289
290    bools.patch(patches).map(Canonical::Bool)
291}
292
293fn canonicalize_sparse_primitives<
294    T: NativePType + for<'a> TryFrom<&'a Scalar, Error = VortexError>,
295>(
296    patches: &Patches,
297    fill_value: &Scalar,
298) -> VortexResult<Canonical> {
299    let (primitive_fill, validity) = if fill_value.is_null() {
300        (T::default(), Validity::AllInvalid)
301    } else {
302        (
303            fill_value.try_into()?,
304            if patches.dtype().nullability() == Nullability::NonNullable {
305                Validity::NonNullable
306            } else {
307                Validity::AllValid
308            },
309        )
310    };
311
312    let parray = PrimitiveArray::new(buffer![primitive_fill; patches.array_len()], validity);
313
314    parray.patch(patches).map(Canonical::Primitive)
315}
316
317fn canonicalize_sparse_struct(
318    struct_fields: &StructFields,
319    fill_struct: StructScalar,
320    dtype: &DType,
321    // Resolution is unnecessary b/c we're just pushing the patches into the fields.
322    unresolved_patches: &Patches,
323    len: usize,
324) -> VortexResult<Canonical> {
325    let (fill_values, top_level_fill_validity) = match fill_struct.fields() {
326        Some(fill_values) => (fill_values, Validity::AllValid),
327        None => (
328            struct_fields.fields().map(Scalar::default_value).collect(),
329            Validity::AllInvalid,
330        ),
331    };
332    let patch_values_as_struct = unresolved_patches.values().to_canonical()?.into_struct()?;
333    let columns_patch_values = patch_values_as_struct.fields();
334    let names = patch_values_as_struct.names();
335    let validity = if dtype.is_nullable() {
336        top_level_fill_validity.patch(
337            len,
338            unresolved_patches.offset(),
339            unresolved_patches.indices(),
340            &Validity::from_mask(
341                unresolved_patches.values().validity_mask()?,
342                Nullability::Nullable,
343            ),
344        )?
345    } else {
346        top_level_fill_validity
347            .into_non_nullable()
348            .ok_or_else(|| vortex_err!("fill validity should match sparse array nullability"))?
349    };
350
351    columns_patch_values
352        .iter()
353        .cloned()
354        .zip_eq(fill_values.into_iter())
355        .map(|(patch_values, fill_value)| -> VortexResult<_> {
356            SparseArray::try_new_from_patches(
357                unresolved_patches
358                    .clone()
359                    .map_values(|_| Ok(patch_values))?,
360                fill_value,
361            )
362        })
363        .process_results(|sparse_columns| {
364            StructArray::try_from_iter_with_validity(names.iter().zip_eq(sparse_columns), validity)
365                .map(Canonical::Struct)
366        })?
367}
368
369fn canonicalize_sparse_decimal<D: NativeDecimalType>(
370    decimal_dtype: DecimalDType,
371    nullability: Nullability,
372    fill_value: DecimalScalar,
373    patches: &Patches,
374    len: usize,
375) -> VortexResult<Canonical> {
376    let mut builder = DecimalBuilder::with_capacity::<D>(len, decimal_dtype, nullability);
377    match fill_value.decimal_value() {
378        Some(fill_value) => {
379            let fill_value = fill_value
380                .cast::<D>()
381                .vortex_expect("unexpected value type");
382            for _ in 0..len {
383                builder.append_value(fill_value)
384            }
385        }
386        None => {
387            builder.append_nulls(len);
388        }
389    }
390    let filled_array = builder.finish_into_decimal();
391    let array = filled_array.patch(patches)?;
392    Ok(Canonical::Decimal(array))
393}
394
395fn canonicalize_varbin(
396    array: &SparseArray,
397    dtype: DType,
398    fill_value: Option<ByteBuffer>,
399) -> VortexResult<Canonical> {
400    let patches = array.resolved_patches()?;
401    let indices = patches.indices().to_primitive()?;
402    let values = patches.values().to_varbinview()?;
403    let validity = array
404        .validity_mask()
405        .map(|x| Validity::from_mask(x, dtype.nullability()))?;
406    let len = array.len();
407
408    match_each_integer_ptype!(indices.ptype(), |I| {
409        let indices = indices.buffer::<I>();
410        canonicalize_varbin_inner::<I>(fill_value, indices, values, dtype, validity, len)
411    })
412}
413
414fn canonicalize_varbin_inner<I: NativePType>(
415    fill_value: Option<ByteBuffer>,
416    indices: Buffer<I>,
417    values: VarBinViewArray,
418    dtype: DType,
419    validity: Validity,
420    len: usize,
421) -> VortexResult<Canonical> {
422    assert_eq!(dtype.nullability(), validity.nullability());
423
424    let n_patch_buffers = values.buffers().len();
425    let mut buffers = values.buffers().to_vec();
426
427    let fill = if let Some(buffer) = &fill_value {
428        buffers.push(buffer.clone());
429        BinaryView::make_view(
430            buffer.as_ref(),
431            u32::try_from(n_patch_buffers).vortex_expect("too many buffers"),
432            0,
433        )
434    } else {
435        // any <=12 character value will do
436        BinaryView::make_view(&[], 0, 0)
437    };
438
439    let mut views = buffer_mut![fill; len];
440    for (patch_index, &patch) in indices.into_iter().zip_eq(values.views().iter()) {
441        let patch_index_usize = <usize as NumCast>::from(patch_index)
442            .vortex_expect("var bin view indices must fit in usize");
443        views[patch_index_usize] = patch;
444    }
445
446    // SAFETY: views are constructed to maintain the invariants
447    let array = unsafe {
448        VarBinViewArray::new_unchecked(views.freeze(), Arc::from(buffers), dtype, validity)
449    };
450
451    Ok(Canonical::VarBinView(array))
452}
453
454#[cfg(test)]
455mod test {
456    use rstest::rstest;
457    use vortex_array::arrays::{
458        BoolArray, BooleanBufferBuilder, DecimalArray, ListArray, PrimitiveArray, StructArray,
459        VarBinArray, VarBinViewArray,
460    };
461    use vortex_array::arrow::IntoArrowArray as _;
462    use vortex_array::validity::Validity;
463    use vortex_array::vtable::ValidityHelper;
464    use vortex_array::{IntoArray, ToCanonical};
465    use vortex_buffer::{ByteBuffer, buffer, buffer_mut};
466    use vortex_dtype::Nullability::{NonNullable, Nullable};
467    use vortex_dtype::{DType, DecimalDType, FieldNames, PType, StructFields};
468    use vortex_mask::Mask;
469    use vortex_scalar::{DecimalValue, Scalar};
470
471    use crate::SparseArray;
472    use crate::canonical::{list_scalar_to_elements_array, list_scalar_to_singleton_list_array};
473
474    #[rstest]
475    #[case(Some(true))]
476    #[case(Some(false))]
477    #[case(None)]
478    fn test_sparse_bool(#[case] fill_value: Option<bool>) {
479        let indices = buffer![0u64, 1, 7].into_array();
480        let values = bool_array_from_nullable_vec(vec![Some(true), None, Some(false)], fill_value)
481            .into_array();
482        let sparse_bools =
483            SparseArray::try_new(indices, values, 10, Scalar::from(fill_value)).unwrap();
484        assert_eq!(sparse_bools.dtype(), &DType::Bool(Nullable));
485
486        let flat_bools = sparse_bools.to_bool().unwrap();
487        let expected = bool_array_from_nullable_vec(
488            vec![
489                Some(true),
490                None,
491                fill_value,
492                fill_value,
493                fill_value,
494                fill_value,
495                fill_value,
496                Some(false),
497                fill_value,
498                fill_value,
499            ],
500            fill_value,
501        );
502
503        assert_eq!(flat_bools.boolean_buffer(), expected.boolean_buffer());
504        assert_eq!(flat_bools.validity(), expected.validity());
505
506        assert!(flat_bools.boolean_buffer().value(0));
507        assert!(flat_bools.validity().is_valid(0).unwrap());
508        assert_eq!(
509            flat_bools.boolean_buffer().value(1),
510            fill_value.unwrap_or_default()
511        );
512        assert!(!flat_bools.validity().is_valid(1).unwrap());
513        assert_eq!(
514            flat_bools.validity().is_valid(2).unwrap(),
515            fill_value.is_some()
516        );
517        assert!(!flat_bools.boolean_buffer().value(7));
518        assert!(flat_bools.validity().is_valid(7).unwrap());
519    }
520
521    fn bool_array_from_nullable_vec(
522        bools: Vec<Option<bool>>,
523        fill_value: Option<bool>,
524    ) -> BoolArray {
525        let mut buffer = BooleanBufferBuilder::new(bools.len());
526        let mut validity = BooleanBufferBuilder::new(bools.len());
527        for maybe_bool in bools {
528            buffer.append(maybe_bool.unwrap_or_else(|| fill_value.unwrap_or_default()));
529            validity.append(maybe_bool.is_some());
530        }
531        BoolArray::new(buffer.finish(), Validity::from(validity.finish()))
532    }
533
534    #[rstest]
535    #[case(Some(0i32))]
536    #[case(Some(-1i32))]
537    #[case(None)]
538    fn test_sparse_primitive(#[case] fill_value: Option<i32>) {
539        let indices = buffer![0u64, 1, 7].into_array();
540        let values = PrimitiveArray::from_option_iter([Some(0i32), None, Some(1)]).into_array();
541        let sparse_ints =
542            SparseArray::try_new(indices, values, 10, Scalar::from(fill_value)).unwrap();
543        assert_eq!(*sparse_ints.dtype(), DType::Primitive(PType::I32, Nullable));
544
545        let flat_ints = sparse_ints.to_primitive().unwrap();
546        let expected = PrimitiveArray::from_option_iter([
547            Some(0i32),
548            None,
549            fill_value,
550            fill_value,
551            fill_value,
552            fill_value,
553            fill_value,
554            Some(1),
555            fill_value,
556            fill_value,
557        ]);
558
559        assert_eq!(flat_ints.byte_buffer(), expected.byte_buffer());
560        assert_eq!(flat_ints.validity(), expected.validity());
561
562        assert_eq!(flat_ints.as_slice::<i32>()[0], 0);
563        assert!(flat_ints.validity().is_valid(0).unwrap());
564        assert_eq!(flat_ints.as_slice::<i32>()[1], 0);
565        assert!(!flat_ints.validity().is_valid(1).unwrap());
566        assert_eq!(
567            flat_ints.as_slice::<i32>()[2],
568            fill_value.unwrap_or_default()
569        );
570        assert_eq!(
571            flat_ints.validity().is_valid(2).unwrap(),
572            fill_value.is_some()
573        );
574        assert_eq!(flat_ints.as_slice::<i32>()[7], 1);
575        assert!(flat_ints.validity().is_valid(7).unwrap());
576    }
577
578    #[test]
579    fn test_sparse_struct_valid_fill() {
580        let field_names = FieldNames::from_iter(["a", "b"]);
581        let field_types = vec![
582            DType::Primitive(PType::I32, Nullable),
583            DType::Primitive(PType::I32, Nullable),
584        ];
585        let struct_fields = StructFields::new(field_names, field_types);
586        let struct_dtype = DType::Struct(struct_fields.clone(), Nullable);
587
588        let indices = buffer![0u64, 1, 7, 8].into_array();
589        let patch_values_a =
590            PrimitiveArray::from_option_iter([Some(10i32), None, Some(20), Some(30)]).into_array();
591        let patch_values_b =
592            PrimitiveArray::from_option_iter([Some(1i32), Some(2), None, Some(3)]).into_array();
593        let patch_values = StructArray::try_new_with_dtype(
594            vec![patch_values_a, patch_values_b],
595            struct_fields.clone(),
596            4,
597            Validity::Array(
598                BoolArray::from_indices(4, vec![0, 1, 2], Validity::NonNullable).to_array(),
599            ),
600        )
601        .unwrap()
602        .into_array();
603
604        let fill_scalar = Scalar::struct_(
605            struct_dtype,
606            vec![Scalar::from(Some(-10i32)), Scalar::from(Some(-1i32))],
607        );
608        let len = 10;
609        let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
610
611        let expected_a = PrimitiveArray::from_option_iter((0..len).map(|i| {
612            if i == 0 {
613                Some(10)
614            } else if i == 1 {
615                None
616            } else if i == 7 {
617                Some(20)
618            } else {
619                Some(-10)
620            }
621        }));
622        let expected_b = PrimitiveArray::from_option_iter((0..len).map(|i| {
623            if i == 0 {
624                Some(1i32)
625            } else if i == 1 {
626                Some(2)
627            } else if i == 7 {
628                None
629            } else {
630                Some(-1)
631            }
632        }));
633
634        let expected = StructArray::try_new_with_dtype(
635            vec![expected_a.into_array(), expected_b.into_array()],
636            struct_fields,
637            len,
638            // NB: patch indices: [0, 1, 7, 8]; patch validity: [Valid, Valid, Valid, Invalid]; ergo 8 is Invalid.
639            Validity::from_mask(Mask::from_excluded_indices(10, vec![8]), Nullable),
640        )
641        .unwrap()
642        .to_array()
643        .into_arrow_preferred()
644        .unwrap();
645
646        let actual = sparse_struct
647            .to_struct()
648            .unwrap()
649            .to_array()
650            .into_arrow_preferred()
651            .unwrap();
652
653        assert_eq!(expected.data_type(), actual.data_type());
654        assert_eq!(&expected, &actual);
655    }
656
657    #[test]
658    fn test_sparse_struct_invalid_fill() {
659        let field_names = FieldNames::from_iter(["a", "b"]);
660        let field_types = vec![
661            DType::Primitive(PType::I32, Nullable),
662            DType::Primitive(PType::I32, Nullable),
663        ];
664        let struct_fields = StructFields::new(field_names, field_types);
665        let struct_dtype = DType::Struct(struct_fields.clone(), Nullable);
666
667        let indices = buffer![0u64, 1, 7, 8].into_array();
668        let patch_values_a =
669            PrimitiveArray::from_option_iter([Some(10i32), None, Some(20), Some(30)]).into_array();
670        let patch_values_b =
671            PrimitiveArray::from_option_iter([Some(1i32), Some(2), None, Some(3)]).into_array();
672        let patch_values = StructArray::try_new_with_dtype(
673            vec![patch_values_a, patch_values_b],
674            struct_fields.clone(),
675            4,
676            Validity::Array(
677                BoolArray::from_indices(4, vec![0, 1, 2], Validity::NonNullable).to_array(),
678            ),
679        )
680        .unwrap()
681        .into_array();
682
683        let fill_scalar = Scalar::null(struct_dtype);
684        let len = 10;
685        let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
686
687        let expected_a = PrimitiveArray::from_option_iter((0..len).map(|i| {
688            if i == 0 {
689                Some(10)
690            } else if i == 1 {
691                None
692            } else if i == 7 {
693                Some(20)
694            } else {
695                Some(-10)
696            }
697        }));
698        let expected_b = PrimitiveArray::from_option_iter((0..len).map(|i| {
699            if i == 0 {
700                Some(1i32)
701            } else if i == 1 {
702                Some(2)
703            } else if i == 7 {
704                None
705            } else {
706                Some(-1)
707            }
708        }));
709
710        let expected = StructArray::try_new_with_dtype(
711            vec![expected_a.into_array(), expected_b.into_array()],
712            struct_fields,
713            len,
714            // NB: patch indices: [0, 1, 7, 8]; patch validity: [Valid, Valid, Valid, Invalid]; ergo 0, 1, 7 are valid.
715            Validity::from_mask(Mask::from_indices(10, vec![0, 1, 7]), Nullable),
716        )
717        .unwrap()
718        .to_array()
719        .into_arrow_preferred()
720        .unwrap();
721
722        let actual = sparse_struct
723            .to_struct()
724            .unwrap()
725            .to_array()
726            .into_arrow_preferred()
727            .unwrap();
728
729        assert_eq!(expected.data_type(), actual.data_type());
730        assert_eq!(&expected, &actual);
731    }
732
733    #[test]
734    fn test_sparse_decimal() {
735        let indices = buffer![0u32, 1u32, 7u32, 8u32].into_array();
736        let decimal_dtype = DecimalDType::new(3, 2);
737        let patch_values = DecimalArray::new(
738            buffer![100i128, 200i128, 300i128, 4000i128],
739            decimal_dtype,
740            Validity::from_iter([true, true, true, false]),
741        )
742        .to_array();
743        let len = 10;
744        let fill_scalar = Scalar::decimal(DecimalValue::I32(123), decimal_dtype, Nullable);
745        let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
746
747        let expected = DecimalArray::new(
748            buffer![100i128, 200, 123, 123, 123, 123, 123, 300, 4000, 123],
749            decimal_dtype,
750            // NB: patch indices: [0, 1, 7, 8]; patch validity: [Valid, Valid, Valid, Invalid]; ergo 0, 1, 7 are valid.
751            Validity::from_mask(Mask::from_excluded_indices(10, vec![8]), Nullable),
752        )
753        .to_array()
754        .into_arrow_preferred()
755        .unwrap();
756
757        let actual = sparse_struct
758            .to_decimal()
759            .unwrap()
760            .to_array()
761            .into_arrow_preferred()
762            .unwrap();
763
764        assert_eq!(expected.data_type(), actual.data_type());
765        assert_eq!(&expected, &actual);
766    }
767
768    #[test]
769    fn test_sparse_utf8_varbinview_non_null_fill() {
770        let strings = <VarBinViewArray as FromIterator<_>>::from_iter([
771            Some("hello"),
772            Some("goodbye"),
773            Some("hello"),
774            None,
775            Some("bonjour"),
776            Some("你好"),
777            None,
778        ])
779        .into_array();
780
781        let array = SparseArray::try_new(
782            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
783            strings,
784            12,
785            Scalar::from(Some("123".to_owned())),
786        )
787        .unwrap();
788
789        let actual = array.to_varbinview().unwrap().into_array();
790        let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
791            Some("hello"),
792            Some("123"),
793            Some("123"),
794            Some("goodbye"),
795            Some("hello"),
796            None,
797            Some("123"),
798            Some("bonjour"),
799            Some("123"),
800            Some("你好"),
801            None,
802            Some("123"),
803        ])
804        .into_array();
805
806        let actual = actual.into_arrow_preferred().unwrap();
807        let expected = expected.into_arrow_preferred().unwrap();
808
809        assert_eq!(actual.data_type(), expected.data_type());
810        assert_eq!(&actual, &expected);
811    }
812
813    #[test]
814    fn test_sparse_utf8_varbinview_null_fill() {
815        let strings = <VarBinViewArray as FromIterator<_>>::from_iter([
816            Some("hello"),
817            Some("goodbye"),
818            Some("hello"),
819            None,
820            Some("bonjour"),
821            Some("你好"),
822            None,
823        ])
824        .into_array();
825
826        let array = SparseArray::try_new(
827            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
828            strings,
829            12,
830            Scalar::null(DType::Utf8(Nullable)),
831        )
832        .unwrap();
833
834        let actual = array.to_varbinview().unwrap().into_array();
835        let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
836            Some("hello"),
837            None,
838            None,
839            Some("goodbye"),
840            Some("hello"),
841            None,
842            None,
843            Some("bonjour"),
844            None,
845            Some("你好"),
846            None,
847            None,
848        ])
849        .into_array();
850
851        let actual = actual.into_arrow_preferred().unwrap();
852        let expected = expected.into_arrow_preferred().unwrap();
853
854        assert_eq!(actual.data_type(), expected.data_type());
855        assert_eq!(&actual, &expected);
856    }
857
858    #[test]
859    fn test_sparse_utf8_varbinview_non_nullable() {
860        let strings =
861            VarBinViewArray::from_iter_str(["hello", "goodbye", "hello", "bonjour", "你好"])
862                .into_array();
863
864        let array = SparseArray::try_new(
865            buffer![0u16, 3, 4, 5, 8].into_array(),
866            strings,
867            9,
868            Scalar::from("123".to_owned()),
869        )
870        .unwrap();
871
872        let actual = array.to_varbinview().unwrap().into_array();
873        let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
874            Some("hello"),
875            Some("123"),
876            Some("123"),
877            Some("goodbye"),
878            Some("hello"),
879            Some("bonjour"),
880            Some("123"),
881            Some("123"),
882            Some("你好"),
883        ])
884        .into_array();
885
886        let actual = actual.into_arrow_preferred().unwrap();
887        let expected = expected.into_arrow_preferred().unwrap();
888
889        assert_eq!(actual.data_type(), expected.data_type());
890        assert_eq!(&actual, &expected);
891    }
892
893    #[test]
894    fn test_sparse_utf8_varbin_null_fill() {
895        let strings = <VarBinArray as FromIterator<_>>::from_iter([
896            Some("hello"),
897            Some("goodbye"),
898            Some("hello"),
899            None,
900            Some("bonjour"),
901            Some("你好"),
902            None,
903        ])
904        .into_array();
905
906        let array = SparseArray::try_new(
907            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
908            strings,
909            12,
910            Scalar::null(DType::Utf8(Nullable)),
911        )
912        .unwrap();
913
914        let actual = array.to_varbinview().unwrap().into_array();
915        let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
916            Some("hello"),
917            None,
918            None,
919            Some("goodbye"),
920            Some("hello"),
921            None,
922            None,
923            Some("bonjour"),
924            None,
925            Some("你好"),
926            None,
927            None,
928        ])
929        .into_array();
930
931        let actual = actual.into_arrow_preferred().unwrap();
932        let expected = expected.into_arrow_preferred().unwrap();
933
934        assert_eq!(actual.data_type(), expected.data_type());
935        assert_eq!(&actual, &expected);
936    }
937
938    #[test]
939    fn test_sparse_binary_varbinview_non_null_fill() {
940        let binaries = VarBinViewArray::from_iter_nullable_bin([
941            Some(b"hello" as &[u8]),
942            Some(b"goodbye"),
943            Some(b"hello"),
944            None,
945            Some(b"\x00"),
946            Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
947            None,
948        ])
949        .into_array();
950
951        let array = SparseArray::try_new(
952            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
953            binaries,
954            12,
955            Scalar::from(Some(ByteBuffer::from(b"123".to_vec()))),
956        )
957        .unwrap();
958
959        let actual = array.to_varbinview().unwrap().into_array();
960        let expected = VarBinViewArray::from_iter_nullable_bin([
961            Some(b"hello" as &[u8]),
962            Some(b"123"),
963            Some(b"123"),
964            Some(b"goodbye"),
965            Some(b"hello"),
966            None,
967            Some(b"123"),
968            Some(b"\x00"),
969            Some(b"123"),
970            Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
971            None,
972            Some(b"123"),
973        ])
974        .into_array();
975
976        let actual = actual.into_arrow_preferred().unwrap();
977        let expected = expected.into_arrow_preferred().unwrap();
978
979        assert_eq!(actual.data_type(), expected.data_type());
980        assert_eq!(&actual, &expected);
981    }
982
983    #[test]
984    fn test_list_scalar_to_elements_array() {
985        let scalar = Scalar::from(Some(vec![1, 2, 3]));
986        let array = list_scalar_to_elements_array(scalar.as_list()).unwrap();
987        assert_eq!(
988            array.unwrap().display_values().to_string(),
989            "[1i32, 2i32, 3i32]"
990        );
991
992        let scalar = Scalar::null_typed::<Vec<i32>>();
993        let array = list_scalar_to_elements_array(scalar.as_list()).unwrap();
994        assert!(array.is_none());
995    }
996
997    #[test]
998    fn test_list_scalar_to_singleton_list_array() {
999        let scalar = Scalar::from(Some(vec![1, 2, 3]));
1000        let array = list_scalar_to_singleton_list_array(scalar.as_list()).unwrap();
1001        assert!(array.is_some());
1002        let array = array.unwrap();
1003        assert_eq!(array.scalar_at(0), scalar);
1004        assert_eq!(array.len(), 1);
1005
1006        let scalar = Scalar::null_typed::<Vec<i32>>();
1007        let array = list_scalar_to_singleton_list_array(scalar.as_list()).unwrap();
1008        assert!(array.is_none());
1009    }
1010
1011    #[test]
1012    fn test_sparse_list_null_fill() {
1013        let elements = buffer![1i32, 2, 1, 2].into_array();
1014        let offsets = buffer![0u32, 1, 2, 3, 4].into_array();
1015        let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1016            .unwrap()
1017            .into_array();
1018
1019        let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
1020        let fill_value = Scalar::null(lists.dtype().clone());
1021        let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
1022            .unwrap()
1023            .into_array();
1024
1025        let actual = sparse.to_canonical().unwrap().into_array();
1026        let expected = ListArray::try_new(
1027            buffer![1i32, 2, 1, 2].into_array(),
1028            buffer![0u32, 1, 1, 1, 2, 3, 4].into_array(),
1029            Validity::Array(
1030                BoolArray::from_iter([true, false, false, true, true, true]).into_array(),
1031            ),
1032        )
1033        .unwrap()
1034        .into_array();
1035
1036        let actual = actual.into_arrow_preferred().unwrap();
1037        let expected = expected.into_arrow_preferred().unwrap();
1038
1039        assert_eq!(actual.data_type(), expected.data_type());
1040        assert_eq!(&actual, &expected);
1041    }
1042
1043    #[test]
1044    fn test_sparse_list_null_fill_sliced_sparse_values() {
1045        let elements = buffer![1i32, 2, 1, 2, 1, 2, 1, 2].into_array();
1046        let offsets = buffer![0u32, 1, 2, 3, 4, 5, 6, 7, 8].into_array();
1047        let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1048            .unwrap()
1049            .into_array();
1050        let lists = lists.slice(2, 6);
1051
1052        let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
1053        let fill_value = Scalar::null(lists.dtype().clone());
1054        let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
1055            .unwrap()
1056            .into_array();
1057
1058        let actual = sparse.to_canonical().unwrap().into_array();
1059        let expected = ListArray::try_new(
1060            buffer![1i32, 2, 1, 2].into_array(),
1061            buffer![0u32, 1, 1, 1, 2, 3, 4].into_array(),
1062            Validity::Array(
1063                BoolArray::from_iter([true, false, false, true, true, true]).into_array(),
1064            ),
1065        )
1066        .unwrap()
1067        .into_array();
1068
1069        let actual = actual.into_arrow_preferred().unwrap();
1070        let expected = expected.into_arrow_preferred().unwrap();
1071
1072        assert_eq!(actual.data_type(), expected.data_type());
1073        assert_eq!(&actual, &expected);
1074    }
1075
1076    #[test]
1077    fn test_sparse_list_non_null_fill() {
1078        let elements = buffer![1i32, 2, 1, 2].into_array();
1079        let offsets = buffer![0u32, 1, 2, 3, 4].into_array();
1080        let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1081            .unwrap()
1082            .into_array();
1083
1084        let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
1085        let fill_value = Scalar::from(Some(vec![5i32, 6, 7, 8]));
1086        let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
1087            .unwrap()
1088            .into_array();
1089
1090        let actual = sparse.to_canonical().unwrap().into_array();
1091        let expected = ListArray::try_new(
1092            buffer![1i32, 5, 6, 7, 8, 5, 6, 7, 8, 2, 1, 2].into_array(),
1093            buffer![0u32, 1, 5, 9, 10, 11, 12].into_array(),
1094            Validity::AllValid,
1095        )
1096        .unwrap()
1097        .into_array();
1098
1099        let actual = actual.into_arrow_preferred().unwrap();
1100        let expected = expected.into_arrow_preferred().unwrap();
1101
1102        assert_eq!(actual.data_type(), expected.data_type());
1103        assert_eq!(&actual, &expected);
1104    }
1105
1106    #[test]
1107    fn test_sparse_binary_varbin_null_fill() {
1108        let strings = <VarBinArray as FromIterator<_>>::from_iter([
1109            Some(b"hello" as &[u8]),
1110            Some(b"goodbye"),
1111            Some(b"hello"),
1112            None,
1113            Some(b"\x00"),
1114            Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
1115            None,
1116        ])
1117        .into_array();
1118
1119        let array = SparseArray::try_new(
1120            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
1121            strings,
1122            12,
1123            Scalar::null(DType::Binary(Nullable)),
1124        )
1125        .unwrap();
1126
1127        let actual = array.to_varbinview().unwrap().into_array();
1128        let expected = VarBinViewArray::from_iter_nullable_bin([
1129            Some(b"hello" as &[u8]),
1130            None,
1131            None,
1132            Some(b"goodbye"),
1133            Some(b"hello"),
1134            None,
1135            None,
1136            Some(b"\x00"),
1137            None,
1138            Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
1139            None,
1140            None,
1141        ])
1142        .into_array();
1143        let actual = actual.into_arrow_preferred().unwrap();
1144        let expected = expected.into_arrow_preferred().unwrap();
1145
1146        assert_eq!(actual.data_type(), expected.data_type());
1147        assert_eq!(&actual, &expected);
1148    }
1149
1150    #[test]
1151    fn test_sparse_list_grows_offset_type() {
1152        let elements = buffer![1i32, 2, 1, 2].into_array();
1153        let offsets = buffer![0u8, 1, 2, 3, 4].into_array();
1154        let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1155            .unwrap()
1156            .into_array();
1157
1158        let indices = buffer![0u8, 1u8, 2u8, 3u8].into_array();
1159        let fill_value = Scalar::from(Some(vec![42i32; 252])); // 252 + 4 elements = 256 > u8::MAX
1160        let sparse = SparseArray::try_new(indices, lists, 5, fill_value)
1161            .unwrap()
1162            .into_array();
1163
1164        let actual = sparse.to_canonical().unwrap().into_array();
1165        let mut expected_elements = buffer_mut![1, 2, 1, 2];
1166        expected_elements.extend(buffer![42i32; 252]);
1167        let expected = ListArray::try_new(
1168            expected_elements.freeze().into_array(),
1169            buffer![0u16, 1, 2, 3, 4, 256].into_array(),
1170            Validity::AllValid,
1171        )
1172        .unwrap()
1173        .into_array();
1174
1175        assert_eq!(
1176            actual.to_list().unwrap().offsets().dtype(),
1177            &DType::Primitive(PType::U16, NonNullable)
1178        );
1179
1180        let actual = actual.into_arrow_preferred().unwrap();
1181        let expected = expected.into_arrow_preferred().unwrap();
1182
1183        assert_eq!(actual.data_type(), expected.data_type());
1184        assert_eq!(&actual, &expected);
1185    }
1186}