vortex_sparse/
canonical.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use itertools::Itertools;
5use num_traits::NumCast;
6use vortex_array::arrays::{
7    BinaryView, BoolArray, BooleanBuffer, ConstantArray, NullArray, PrimitiveArray, StructArray,
8    VarBinViewArray, smallest_storage_type,
9};
10use vortex_array::builders::{ArrayBuilder as _, DecimalBuilder};
11use vortex_array::patches::Patches;
12use vortex_array::validity::Validity;
13use vortex_array::vtable::CanonicalVTable;
14use vortex_array::{Array, Canonical, ToCanonical as _};
15use vortex_buffer::{Buffer, BufferString, ByteBuffer, buffer, buffer_mut};
16use vortex_dtype::{
17    DType, DecimalDType, NativePType, Nullability, StructFields, match_each_integer_ptype,
18    match_each_native_ptype,
19};
20use vortex_error::{VortexError, VortexExpect as _, VortexResult, vortex_err};
21use vortex_scalar::{
22    DecimalScalar, NativeDecimalType, Scalar, StructScalar, match_each_decimal_value_type,
23};
24
25use crate::{SparseArray, SparseVTable};
26
27impl CanonicalVTable<SparseVTable> for SparseVTable {
28    fn canonicalize(array: &SparseArray) -> VortexResult<Canonical> {
29        if array.patches().num_patches() == 0 {
30            return ConstantArray::new(array.fill_scalar().clone(), array.len()).to_canonical();
31        }
32
33        match array.dtype() {
34            DType::Null => {
35                assert!(array.fill_scalar().is_null());
36                Ok(Canonical::Null(NullArray::new(array.len())))
37            }
38            DType::Bool(..) => {
39                let resolved_patches = array.resolved_patches()?;
40                canonicalize_sparse_bools(&resolved_patches, array.fill_scalar())
41            }
42            DType::Primitive(ptype, ..) => {
43                let resolved_patches = array.resolved_patches()?;
44                match_each_native_ptype!(ptype, |P| {
45                    canonicalize_sparse_primitives::<P>(&resolved_patches, array.fill_scalar())
46                })
47            }
48            DType::Struct(struct_fields, ..) => canonicalize_sparse_struct(
49                struct_fields,
50                array.fill_scalar().as_struct(),
51                array.dtype(),
52                array.patches(),
53                array.len(),
54            ),
55            DType::Decimal(decimal_dtype, nullability) => {
56                let canonical_decimal_value_type = smallest_storage_type(decimal_dtype);
57                let fill_value = array.fill_scalar().as_decimal();
58                match_each_decimal_value_type!(canonical_decimal_value_type, |D| {
59                    canonicalize_sparse_decimal::<D>(
60                        *decimal_dtype,
61                        *nullability,
62                        fill_value,
63                        array.patches(),
64                        array.len(),
65                    )
66                })
67            }
68            dtype @ DType::Utf8(..) => {
69                let fill_value = array.fill_scalar().as_utf8().value();
70                let fill_value = fill_value.map(BufferString::into_inner);
71                canonicalize_varbin(array, dtype.clone(), fill_value)
72            }
73            dtype @ DType::Binary(..) => {
74                let fill_value = array.fill_scalar().as_binary().value();
75                canonicalize_varbin(array, dtype.clone(), fill_value)
76            }
77            DType::List(_dtype, _nullability) => todo!(),
78            DType::Extension(_ext_dtype) => todo!(),
79        }
80    }
81}
82
83fn canonicalize_sparse_bools(patches: &Patches, fill_value: &Scalar) -> VortexResult<Canonical> {
84    let (fill_bool, validity) = if fill_value.is_null() {
85        (false, Validity::AllInvalid)
86    } else {
87        (
88            fill_value.try_into()?,
89            if patches.dtype().nullability() == Nullability::NonNullable {
90                Validity::NonNullable
91            } else {
92                Validity::AllValid
93            },
94        )
95    };
96
97    let bools = BoolArray::new(
98        if fill_bool {
99            BooleanBuffer::new_set(patches.array_len())
100        } else {
101            BooleanBuffer::new_unset(patches.array_len())
102        },
103        validity,
104    );
105
106    bools.patch(patches).map(Canonical::Bool)
107}
108
109fn canonicalize_sparse_primitives<
110    T: NativePType + for<'a> TryFrom<&'a Scalar, Error = VortexError>,
111>(
112    patches: &Patches,
113    fill_value: &Scalar,
114) -> VortexResult<Canonical> {
115    let (primitive_fill, validity) = if fill_value.is_null() {
116        (T::default(), Validity::AllInvalid)
117    } else {
118        (
119            fill_value.try_into()?,
120            if patches.dtype().nullability() == Nullability::NonNullable {
121                Validity::NonNullable
122            } else {
123                Validity::AllValid
124            },
125        )
126    };
127
128    let parray = PrimitiveArray::new(buffer![primitive_fill; patches.array_len()], validity);
129
130    parray.patch(patches).map(Canonical::Primitive)
131}
132
133fn canonicalize_sparse_struct(
134    struct_fields: &StructFields,
135    fill_struct: StructScalar,
136    dtype: &DType,
137    // Resolution is unnecessary b/c we're just pushing the patches into the fields.
138    unresolved_patches: &Patches,
139    len: usize,
140) -> VortexResult<Canonical> {
141    let (fill_values, top_level_fill_validity) = match fill_struct.fields() {
142        Some(fill_values) => (fill_values, Validity::AllValid),
143        None => (
144            struct_fields.fields().map(Scalar::default_value).collect(),
145            Validity::AllInvalid,
146        ),
147    };
148    let patch_values_as_struct = unresolved_patches.values().to_canonical()?.into_struct()?;
149    let columns_patch_values = patch_values_as_struct.fields();
150    let names = patch_values_as_struct.names();
151    let validity = if dtype.is_nullable() {
152        top_level_fill_validity.patch(
153            len,
154            unresolved_patches.offset(),
155            unresolved_patches.indices(),
156            &Validity::from_mask(
157                unresolved_patches.values().validity_mask()?,
158                Nullability::Nullable,
159            ),
160        )?
161    } else {
162        top_level_fill_validity
163            .into_non_nullable()
164            .ok_or_else(|| vortex_err!("fill validity should match sparse array nullability"))?
165    };
166
167    columns_patch_values
168        .iter()
169        .cloned()
170        .zip_eq(fill_values.into_iter())
171        .map(|(patch_values, fill_value)| -> VortexResult<_> {
172            SparseArray::try_new_from_patches(
173                unresolved_patches
174                    .clone()
175                    .map_values(|_| Ok(patch_values))?,
176                fill_value,
177            )
178        })
179        .process_results(|sparse_columns| {
180            StructArray::try_from_iter_with_validity(names.iter().zip_eq(sparse_columns), validity)
181                .map(Canonical::Struct)
182        })?
183}
184
185fn canonicalize_sparse_decimal<D: NativeDecimalType>(
186    decimal_dtype: DecimalDType,
187    nullability: Nullability,
188    fill_value: DecimalScalar,
189    patches: &Patches,
190    len: usize,
191) -> VortexResult<Canonical> {
192    let mut builder = DecimalBuilder::with_capacity::<D>(len, decimal_dtype, nullability);
193    match fill_value.decimal_value() {
194        Some(fill_value) => {
195            let fill_value = fill_value
196                .cast::<D>()
197                .vortex_expect("unexpected value type");
198            for _ in 0..len {
199                builder.append_value(fill_value)
200            }
201        }
202        None => {
203            builder.append_nulls(len);
204        }
205    }
206    let filled_array = builder.finish_into_decimal();
207    let array = filled_array.patch(patches)?;
208    Ok(Canonical::Decimal(array))
209}
210
211fn canonicalize_varbin(
212    array: &SparseArray,
213    dtype: DType,
214    fill_value: Option<ByteBuffer>,
215) -> VortexResult<Canonical> {
216    let patches = array.resolved_patches()?;
217    let indices = patches.indices().to_primitive()?;
218    let values = patches.values().to_varbinview()?;
219    let validity = array
220        .validity_mask()
221        .map(|x| Validity::from_mask(x, dtype.nullability()))?;
222    let len = array.len();
223
224    match_each_integer_ptype!(indices.ptype(), |I| {
225        let indices = indices.buffer::<I>();
226        canonicalize_varbin_inner::<I>(fill_value, indices, values, dtype, validity, len)
227    })
228}
229
230fn canonicalize_varbin_inner<I: NativePType>(
231    fill_value: Option<ByteBuffer>,
232    indices: Buffer<I>,
233    values: VarBinViewArray,
234    dtype: DType,
235    validity: Validity,
236    len: usize,
237) -> VortexResult<Canonical> {
238    assert_eq!(dtype.nullability(), validity.nullability());
239
240    let n_patch_buffers = values.buffers().len();
241    let mut buffers = values.buffers().to_vec();
242
243    let fill = if let Some(buffer) = &fill_value {
244        buffers.push(buffer.clone());
245        BinaryView::make_view(
246            buffer.as_ref(),
247            u32::try_from(n_patch_buffers).vortex_expect("too many buffers"),
248            0,
249        )
250    } else {
251        // any <=12 character value will do
252        BinaryView::make_view(&[], 0, 0)
253    };
254
255    let mut views = buffer_mut![fill; len];
256    for (patch_index, &patch) in indices.into_iter().zip_eq(values.views().iter()) {
257        let patch_index_usize = <usize as NumCast>::from(patch_index)
258            .vortex_expect("var bin view indices must fit in usize");
259        views[patch_index_usize] = patch;
260    }
261
262    let array = VarBinViewArray::try_new(views.freeze(), buffers, dtype, validity)?;
263
264    Ok(Canonical::VarBinView(array))
265}
266
267#[cfg(test)]
268mod test {
269    use rstest::rstest;
270    use vortex_array::arrays::{
271        BoolArray, BooleanBufferBuilder, DecimalArray, PrimitiveArray, StructArray, VarBinArray,
272        VarBinViewArray,
273    };
274    use vortex_array::arrow::IntoArrowArray as _;
275    use vortex_array::validity::Validity;
276    use vortex_array::vtable::ValidityHelper;
277    use vortex_array::{IntoArray, ToCanonical};
278    use vortex_buffer::{ByteBuffer, buffer};
279    use vortex_dtype::Nullability::Nullable;
280    use vortex_dtype::{DType, DecimalDType, FieldNames, PType, StructFields};
281    use vortex_mask::Mask;
282    use vortex_scalar::{DecimalValue, Scalar};
283
284    use crate::SparseArray;
285
286    #[rstest]
287    #[case(Some(true))]
288    #[case(Some(false))]
289    #[case(None)]
290    fn test_sparse_bool(#[case] fill_value: Option<bool>) {
291        let indices = buffer![0u64, 1, 7].into_array();
292        let values = bool_array_from_nullable_vec(vec![Some(true), None, Some(false)], fill_value)
293            .into_array();
294        let sparse_bools =
295            SparseArray::try_new(indices, values, 10, Scalar::from(fill_value)).unwrap();
296        assert_eq!(sparse_bools.dtype(), &DType::Bool(Nullable));
297
298        let flat_bools = sparse_bools.to_bool().unwrap();
299        let expected = bool_array_from_nullable_vec(
300            vec![
301                Some(true),
302                None,
303                fill_value,
304                fill_value,
305                fill_value,
306                fill_value,
307                fill_value,
308                Some(false),
309                fill_value,
310                fill_value,
311            ],
312            fill_value,
313        );
314
315        assert_eq!(flat_bools.boolean_buffer(), expected.boolean_buffer());
316        assert_eq!(flat_bools.validity(), expected.validity());
317
318        assert!(flat_bools.boolean_buffer().value(0));
319        assert!(flat_bools.validity().is_valid(0).unwrap());
320        assert_eq!(
321            flat_bools.boolean_buffer().value(1),
322            fill_value.unwrap_or_default()
323        );
324        assert!(!flat_bools.validity().is_valid(1).unwrap());
325        assert_eq!(
326            flat_bools.validity().is_valid(2).unwrap(),
327            fill_value.is_some()
328        );
329        assert!(!flat_bools.boolean_buffer().value(7));
330        assert!(flat_bools.validity().is_valid(7).unwrap());
331    }
332
333    fn bool_array_from_nullable_vec(
334        bools: Vec<Option<bool>>,
335        fill_value: Option<bool>,
336    ) -> BoolArray {
337        let mut buffer = BooleanBufferBuilder::new(bools.len());
338        let mut validity = BooleanBufferBuilder::new(bools.len());
339        for maybe_bool in bools {
340            buffer.append(maybe_bool.unwrap_or_else(|| fill_value.unwrap_or_default()));
341            validity.append(maybe_bool.is_some());
342        }
343        BoolArray::new(buffer.finish(), Validity::from(validity.finish()))
344    }
345
346    #[rstest]
347    #[case(Some(0i32))]
348    #[case(Some(-1i32))]
349    #[case(None)]
350    fn test_sparse_primitive(#[case] fill_value: Option<i32>) {
351        let indices = buffer![0u64, 1, 7].into_array();
352        let values = PrimitiveArray::from_option_iter([Some(0i32), None, Some(1)]).into_array();
353        let sparse_ints =
354            SparseArray::try_new(indices, values, 10, Scalar::from(fill_value)).unwrap();
355        assert_eq!(*sparse_ints.dtype(), DType::Primitive(PType::I32, Nullable));
356
357        let flat_ints = sparse_ints.to_primitive().unwrap();
358        let expected = PrimitiveArray::from_option_iter([
359            Some(0i32),
360            None,
361            fill_value,
362            fill_value,
363            fill_value,
364            fill_value,
365            fill_value,
366            Some(1),
367            fill_value,
368            fill_value,
369        ]);
370
371        assert_eq!(flat_ints.byte_buffer(), expected.byte_buffer());
372        assert_eq!(flat_ints.validity(), expected.validity());
373
374        assert_eq!(flat_ints.as_slice::<i32>()[0], 0);
375        assert!(flat_ints.validity().is_valid(0).unwrap());
376        assert_eq!(flat_ints.as_slice::<i32>()[1], 0);
377        assert!(!flat_ints.validity().is_valid(1).unwrap());
378        assert_eq!(
379            flat_ints.as_slice::<i32>()[2],
380            fill_value.unwrap_or_default()
381        );
382        assert_eq!(
383            flat_ints.validity().is_valid(2).unwrap(),
384            fill_value.is_some()
385        );
386        assert_eq!(flat_ints.as_slice::<i32>()[7], 1);
387        assert!(flat_ints.validity().is_valid(7).unwrap());
388    }
389
390    #[test]
391    fn test_sparse_struct_valid_fill() {
392        let field_names = FieldNames::from_iter(["a", "b"]);
393        let field_types = vec![
394            DType::Primitive(PType::I32, Nullable),
395            DType::Primitive(PType::I32, Nullable),
396        ];
397        let struct_fields = StructFields::new(field_names, field_types);
398        let struct_dtype = DType::Struct(struct_fields.clone(), Nullable);
399
400        let indices = buffer![0u64, 1, 7, 8].into_array();
401        let patch_values_a =
402            PrimitiveArray::from_option_iter([Some(10i32), None, Some(20), Some(30)]).into_array();
403        let patch_values_b =
404            PrimitiveArray::from_option_iter([Some(1i32), Some(2), None, Some(3)]).into_array();
405        let patch_values = StructArray::try_new_with_dtype(
406            vec![patch_values_a, patch_values_b],
407            struct_fields.clone(),
408            4,
409            Validity::Array(
410                BoolArray::from_indices(4, vec![0, 1, 2], Validity::NonNullable).to_array(),
411            ),
412        )
413        .unwrap()
414        .into_array();
415
416        let fill_scalar = Scalar::struct_(
417            struct_dtype,
418            vec![Scalar::from(Some(-10i32)), Scalar::from(Some(-1i32))],
419        );
420        let len = 10;
421        let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
422
423        let expected_a = PrimitiveArray::from_option_iter((0..len).map(|i| {
424            if i == 0 {
425                Some(10)
426            } else if i == 1 {
427                None
428            } else if i == 7 {
429                Some(20)
430            } else {
431                Some(-10)
432            }
433        }));
434        let expected_b = PrimitiveArray::from_option_iter((0..len).map(|i| {
435            if i == 0 {
436                Some(1i32)
437            } else if i == 1 {
438                Some(2)
439            } else if i == 7 {
440                None
441            } else {
442                Some(-1)
443            }
444        }));
445
446        let expected = StructArray::try_new_with_dtype(
447            vec![expected_a.into_array(), expected_b.into_array()],
448            struct_fields,
449            len,
450            // NB: patch indices: [0, 1, 7, 8]; patch validity: [Valid, Valid, Valid, Invalid]; ergo 8 is Invalid.
451            Validity::from_mask(Mask::from_excluded_indices(10, vec![8]), Nullable),
452        )
453        .unwrap()
454        .to_array()
455        .into_arrow_preferred()
456        .unwrap();
457
458        let actual = sparse_struct
459            .to_struct()
460            .unwrap()
461            .to_array()
462            .into_arrow_preferred()
463            .unwrap();
464
465        assert_eq!(expected.data_type(), actual.data_type());
466        assert_eq!(&expected, &actual);
467    }
468
469    #[test]
470    fn test_sparse_struct_invalid_fill() {
471        let field_names = FieldNames::from_iter(["a", "b"]);
472        let field_types = vec![
473            DType::Primitive(PType::I32, Nullable),
474            DType::Primitive(PType::I32, Nullable),
475        ];
476        let struct_fields = StructFields::new(field_names, field_types);
477        let struct_dtype = DType::Struct(struct_fields.clone(), Nullable);
478
479        let indices = buffer![0u64, 1, 7, 8].into_array();
480        let patch_values_a =
481            PrimitiveArray::from_option_iter([Some(10i32), None, Some(20), Some(30)]).into_array();
482        let patch_values_b =
483            PrimitiveArray::from_option_iter([Some(1i32), Some(2), None, Some(3)]).into_array();
484        let patch_values = StructArray::try_new_with_dtype(
485            vec![patch_values_a, patch_values_b],
486            struct_fields.clone(),
487            4,
488            Validity::Array(
489                BoolArray::from_indices(4, vec![0, 1, 2], Validity::NonNullable).to_array(),
490            ),
491        )
492        .unwrap()
493        .into_array();
494
495        let fill_scalar = Scalar::null(struct_dtype);
496        let len = 10;
497        let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
498
499        let expected_a = PrimitiveArray::from_option_iter((0..len).map(|i| {
500            if i == 0 {
501                Some(10)
502            } else if i == 1 {
503                None
504            } else if i == 7 {
505                Some(20)
506            } else {
507                Some(-10)
508            }
509        }));
510        let expected_b = PrimitiveArray::from_option_iter((0..len).map(|i| {
511            if i == 0 {
512                Some(1i32)
513            } else if i == 1 {
514                Some(2)
515            } else if i == 7 {
516                None
517            } else {
518                Some(-1)
519            }
520        }));
521
522        let expected = StructArray::try_new_with_dtype(
523            vec![expected_a.into_array(), expected_b.into_array()],
524            struct_fields,
525            len,
526            // NB: patch indices: [0, 1, 7, 8]; patch validity: [Valid, Valid, Valid, Invalid]; ergo 0, 1, 7 are valid.
527            Validity::from_mask(Mask::from_indices(10, vec![0, 1, 7]), Nullable),
528        )
529        .unwrap()
530        .to_array()
531        .into_arrow_preferred()
532        .unwrap();
533
534        let actual = sparse_struct
535            .to_struct()
536            .unwrap()
537            .to_array()
538            .into_arrow_preferred()
539            .unwrap();
540
541        assert_eq!(expected.data_type(), actual.data_type());
542        assert_eq!(&expected, &actual);
543    }
544
545    #[test]
546    fn test_sparse_decimal() {
547        let indices = buffer![0u32, 1u32, 7u32, 8u32].into_array();
548        let decimal_dtype = DecimalDType::new(3, 2);
549        let patch_values = DecimalArray::new(
550            buffer![100i128, 200i128, 300i128, 4000i128],
551            decimal_dtype,
552            Validity::from_iter([true, true, true, false]),
553        )
554        .to_array();
555        let len = 10;
556        let fill_scalar = Scalar::decimal(DecimalValue::I32(123), decimal_dtype, Nullable);
557        let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
558
559        let expected = DecimalArray::new(
560            buffer![100i128, 200, 123, 123, 123, 123, 123, 300, 4000, 123],
561            decimal_dtype,
562            // NB: patch indices: [0, 1, 7, 8]; patch validity: [Valid, Valid, Valid, Invalid]; ergo 0, 1, 7 are valid.
563            Validity::from_mask(Mask::from_excluded_indices(10, vec![8]), Nullable),
564        )
565        .to_array()
566        .into_arrow_preferred()
567        .unwrap();
568
569        let actual = sparse_struct
570            .to_decimal()
571            .unwrap()
572            .to_array()
573            .into_arrow_preferred()
574            .unwrap();
575
576        assert_eq!(expected.data_type(), actual.data_type());
577        assert_eq!(&expected, &actual);
578    }
579
580    #[test]
581    fn test_sparse_utf8_varbinview_non_null_fill() {
582        let strings = <VarBinViewArray as FromIterator<_>>::from_iter([
583            Some("hello"),
584            Some("goodbye"),
585            Some("hello"),
586            None,
587            Some("bonjour"),
588            Some("你好"),
589            None,
590        ])
591        .into_array();
592
593        let array = SparseArray::try_new(
594            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
595            strings,
596            12,
597            Scalar::from(Some("123".to_owned())),
598        )
599        .unwrap();
600
601        let actual = array.to_varbinview().unwrap().into_array();
602        let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
603            Some("hello"),
604            Some("123"),
605            Some("123"),
606            Some("goodbye"),
607            Some("hello"),
608            None,
609            Some("123"),
610            Some("bonjour"),
611            Some("123"),
612            Some("你好"),
613            None,
614            Some("123"),
615        ])
616        .into_array();
617
618        let actual = actual.into_arrow_preferred().unwrap();
619        let expected = expected.into_arrow_preferred().unwrap();
620
621        assert_eq!(actual.data_type(), expected.data_type());
622        assert_eq!(&actual, &expected);
623    }
624
625    #[test]
626    fn test_sparse_utf8_varbinview_null_fill() {
627        let strings = <VarBinViewArray as FromIterator<_>>::from_iter([
628            Some("hello"),
629            Some("goodbye"),
630            Some("hello"),
631            None,
632            Some("bonjour"),
633            Some("你好"),
634            None,
635        ])
636        .into_array();
637
638        let array = SparseArray::try_new(
639            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
640            strings,
641            12,
642            Scalar::null(DType::Utf8(Nullable)),
643        )
644        .unwrap();
645
646        let actual = array.to_varbinview().unwrap().into_array();
647        let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
648            Some("hello"),
649            None,
650            None,
651            Some("goodbye"),
652            Some("hello"),
653            None,
654            None,
655            Some("bonjour"),
656            None,
657            Some("你好"),
658            None,
659            None,
660        ])
661        .into_array();
662
663        let actual = actual.into_arrow_preferred().unwrap();
664        let expected = expected.into_arrow_preferred().unwrap();
665
666        assert_eq!(actual.data_type(), expected.data_type());
667        assert_eq!(&actual, &expected);
668    }
669
670    #[test]
671    fn test_sparse_utf8_varbinview_non_nullable() {
672        let strings =
673            VarBinViewArray::from_iter_str(["hello", "goodbye", "hello", "bonjour", "你好"])
674                .into_array();
675
676        let array = SparseArray::try_new(
677            buffer![0u16, 3, 4, 5, 8].into_array(),
678            strings,
679            9,
680            Scalar::from("123".to_owned()),
681        )
682        .unwrap();
683
684        let actual = array.to_varbinview().unwrap().into_array();
685        let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
686            Some("hello"),
687            Some("123"),
688            Some("123"),
689            Some("goodbye"),
690            Some("hello"),
691            Some("bonjour"),
692            Some("123"),
693            Some("123"),
694            Some("你好"),
695        ])
696        .into_array();
697
698        let actual = actual.into_arrow_preferred().unwrap();
699        let expected = expected.into_arrow_preferred().unwrap();
700
701        assert_eq!(actual.data_type(), expected.data_type());
702        assert_eq!(&actual, &expected);
703    }
704
705    #[test]
706    fn test_sparse_utf8_varbin_null_fill() {
707        let strings = <VarBinArray as FromIterator<_>>::from_iter([
708            Some("hello"),
709            Some("goodbye"),
710            Some("hello"),
711            None,
712            Some("bonjour"),
713            Some("你好"),
714            None,
715        ])
716        .into_array();
717
718        let array = SparseArray::try_new(
719            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
720            strings,
721            12,
722            Scalar::null(DType::Utf8(Nullable)),
723        )
724        .unwrap();
725
726        let actual = array.to_varbinview().unwrap().into_array();
727        let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
728            Some("hello"),
729            None,
730            None,
731            Some("goodbye"),
732            Some("hello"),
733            None,
734            None,
735            Some("bonjour"),
736            None,
737            Some("你好"),
738            None,
739            None,
740        ])
741        .into_array();
742
743        let actual = actual.into_arrow_preferred().unwrap();
744        let expected = expected.into_arrow_preferred().unwrap();
745
746        assert_eq!(actual.data_type(), expected.data_type());
747        assert_eq!(&actual, &expected);
748    }
749
750    #[test]
751    fn test_sparse_binary_varbinview_non_null_fill() {
752        let binaries = VarBinViewArray::from_iter_nullable_bin([
753            Some(b"hello" as &[u8]),
754            Some(b"goodbye"),
755            Some(b"hello"),
756            None,
757            Some(b"\x00"),
758            Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
759            None,
760        ])
761        .into_array();
762
763        let array = SparseArray::try_new(
764            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
765            binaries,
766            12,
767            Scalar::from(Some(ByteBuffer::from(b"123".to_vec()))),
768        )
769        .unwrap();
770
771        let actual = array.to_varbinview().unwrap().into_array();
772        let expected = VarBinViewArray::from_iter_nullable_bin([
773            Some(b"hello" as &[u8]),
774            Some(b"123"),
775            Some(b"123"),
776            Some(b"goodbye"),
777            Some(b"hello"),
778            None,
779            Some(b"123"),
780            Some(b"\x00"),
781            Some(b"123"),
782            Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
783            None,
784            Some(b"123"),
785        ])
786        .into_array();
787
788        let actual = actual.into_arrow_preferred().unwrap();
789        let expected = expected.into_arrow_preferred().unwrap();
790
791        assert_eq!(actual.data_type(), expected.data_type());
792        assert_eq!(&actual, &expected);
793    }
794
795    #[test]
796    fn test_sparse_binary_varbin_null_fill() {
797        let strings = <VarBinArray as FromIterator<_>>::from_iter([
798            Some(b"hello" as &[u8]),
799            Some(b"goodbye"),
800            Some(b"hello"),
801            None,
802            Some(b"\x00"),
803            Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
804            None,
805        ])
806        .into_array();
807
808        let array = SparseArray::try_new(
809            buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
810            strings,
811            12,
812            Scalar::null(DType::Binary(Nullable)),
813        )
814        .unwrap();
815
816        let actual = array.to_varbinview().unwrap().into_array();
817        let expected = VarBinViewArray::from_iter_nullable_bin([
818            Some(b"hello" as &[u8]),
819            None,
820            None,
821            Some(b"goodbye"),
822            Some(b"hello"),
823            None,
824            None,
825            Some(b"\x00"),
826            None,
827            Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
828            None,
829            None,
830        ])
831        .into_array();
832
833        let actual = actual.into_arrow_preferred().unwrap();
834        let expected = expected.into_arrow_preferred().unwrap();
835
836        assert_eq!(actual.data_type(), expected.data_type());
837        assert_eq!(&actual, &expected);
838    }
839}