1use std::sync::Arc;
5
6use itertools::Itertools;
7use num_traits::NumCast;
8use vortex_array::arrays::{
9 BinaryView, BoolArray, BooleanBuffer, ConstantArray, ListArray, NullArray, OffsetPType,
10 PrimitiveArray, StructArray, VarBinViewArray, smallest_storage_type,
11};
12use vortex_array::builders::{
13 ArrayBuilder as _, DecimalBuilder, ListBuilder, builder_with_capacity,
14};
15use vortex_array::patches::Patches;
16use vortex_array::validity::Validity;
17use vortex_array::vtable::CanonicalVTable;
18use vortex_array::{Array, ArrayRef, Canonical, IntoArray as _, ToCanonical as _};
19use vortex_buffer::{Buffer, BufferMut, BufferString, ByteBuffer, buffer, buffer_mut};
20use vortex_dtype::{
21 DType, DecimalDType, NativePType, Nullability, StructFields, match_each_integer_ptype,
22 match_each_native_ptype,
23};
24use vortex_error::{VortexError, VortexExpect as _, vortex_panic};
25use vortex_scalar::{
26 DecimalScalar, ListScalar, NativeDecimalType, Scalar, StructScalar,
27 match_each_decimal_value_type,
28};
29
30use crate::{SparseArray, SparseVTable};
31
32impl CanonicalVTable<SparseVTable> for SparseVTable {
33 fn canonicalize(array: &SparseArray) -> Canonical {
34 if array.patches().num_patches() == 0 {
35 return ConstantArray::new(array.fill_scalar().clone(), array.len()).to_canonical();
36 }
37
38 match array.dtype() {
39 DType::Null => {
40 assert!(array.fill_scalar().is_null());
41 Canonical::Null(NullArray::new(array.len()))
42 }
43 DType::Bool(..) => {
44 let resolved_patches = array.resolved_patches();
45 canonicalize_sparse_bools(&resolved_patches, array.fill_scalar())
46 }
47 DType::Primitive(ptype, ..) => {
48 let resolved_patches = array.resolved_patches();
49 match_each_native_ptype!(ptype, |P| {
50 canonicalize_sparse_primitives::<P>(&resolved_patches, array.fill_scalar())
51 })
52 }
53 DType::Struct(struct_fields, ..) => canonicalize_sparse_struct(
54 struct_fields,
55 array.fill_scalar().as_struct(),
56 array.dtype(),
57 array.patches(),
58 array.len(),
59 ),
60 DType::Decimal(decimal_dtype, nullability) => {
61 let canonical_decimal_value_type = smallest_storage_type(decimal_dtype);
62 let fill_value = array.fill_scalar().as_decimal();
63 match_each_decimal_value_type!(canonical_decimal_value_type, |D| {
64 canonicalize_sparse_decimal::<D>(
65 *decimal_dtype,
66 *nullability,
67 fill_value,
68 array.patches(),
69 array.len(),
70 )
71 })
72 }
73 dtype @ DType::Utf8(..) => {
74 let fill_value = array.fill_scalar().as_utf8().value();
75 let fill_value = fill_value.map(BufferString::into_inner);
76 canonicalize_varbin(array, dtype.clone(), fill_value)
77 }
78 dtype @ DType::Binary(..) => {
79 let fill_value = array.fill_scalar().as_binary().value();
80 canonicalize_varbin(array, dtype.clone(), fill_value)
81 }
82 DType::List(values_dtype, nullability) => {
83 let resolved_patches = array.resolved_patches();
84 canonicalize_sparse_lists(
85 array,
86 resolved_patches,
87 values_dtype.clone(),
88 *nullability,
89 )
90 }
91 DType::FixedSizeList(..) => {
92 unimplemented!("TODO(connor)[FixedSizeList]")
93 }
94 DType::Extension(_ext_dtype) => todo!(),
95 }
96 }
97}
98
99fn list_scalar_to_elements_array(scalar: ListScalar) -> Option<ArrayRef> {
101 let elements = scalar.elements()?;
102
103 let mut builder = builder_with_capacity(scalar.element_dtype(), scalar.len());
104 for s in elements {
105 builder
106 .append_scalar(&s)
107 .vortex_expect("Scalar dtype must match");
108 }
109 Some(builder.finish())
110}
111
112fn list_scalar_to_singleton_list_array(scalar: ListScalar) -> Option<ArrayRef> {
114 let nullability = scalar.dtype().nullability();
115 let elements = list_scalar_to_elements_array(scalar)?;
116
117 let validity = match nullability {
118 Nullability::NonNullable => Validity::NonNullable,
119 Nullability::Nullable => Validity::AllValid,
120 };
121
122 let n = elements.len();
123 Some(
124 unsafe {
125 ListArray::new_unchecked(elements, buffer![0_u64, n as u64].into_array(), validity)
126 }
127 .into_array(),
128 )
129}
130
131#[allow(clippy::cognitive_complexity)]
132fn canonicalize_sparse_lists(
133 array: &SparseArray,
134 resolved_patches: Patches,
135 values_dtype: Arc<DType>,
136 nullability: Nullability,
137) -> Canonical {
138 macro_rules! match_smallest_offset_type {
139 ($n_elements:expr, | $offset_type:ident | $body:block) => {{
140 let n_elements = $n_elements;
141 if n_elements <= u8::MAX as usize {
142 type $offset_type = u8;
143 $body
144 } else if n_elements <= u16::MAX as usize {
145 type $offset_type = u16;
146 $body
147 } else if n_elements <= u32::MAX as usize {
148 type $offset_type = u32;
149 $body
150 } else {
151 assert!(u64::try_from(n_elements).is_ok());
152 type $offset_type = u64;
153 $body
154 }
155 }};
156 }
157
158 let indices = resolved_patches.indices().to_primitive();
159 let values = resolved_patches.values().to_list();
160 let fill_value = array.fill_scalar().as_list();
161
162 let n_filled = array.len() - resolved_patches.num_patches();
163 let total_canonical_values = values.elements().len() + fill_value.len() * n_filled;
164
165 let validity = Validity::from_mask(array.validity_mask(), nullability);
166
167 match_each_integer_ptype!(indices.ptype(), |I| {
168 match_smallest_offset_type!(total_canonical_values, |O| {
169 canonicalize_sparse_lists_inner::<I, O>(
170 indices.as_slice(),
171 values,
172 fill_value,
173 values_dtype,
174 array.len(),
175 total_canonical_values,
176 validity,
177 )
178 })
179 })
180}
181
182fn canonicalize_sparse_lists_inner<I: NativePType, SmallestViableOffsetType: OffsetPType>(
183 indices: &[I],
184 values: ListArray,
185 fill_value: ListScalar,
186 values_dtype: Arc<DType>,
187 len: usize,
188 total_canonical_values: usize,
189 validity: Validity,
190) -> Canonical {
191 let Some(fill_value_array) = list_scalar_to_singleton_list_array(fill_value) else {
192 let sparse_list_elements = values.elements().clone();
193 let sparse_list_offsets = values.offsets().to_primitive();
194 match_each_integer_ptype!(sparse_list_offsets.ptype(), |SparseValuesOffsetType| {
195 let sparse_list_offsets = sparse_list_offsets.as_slice::<SparseValuesOffsetType>();
196 return canonicalize_sparse_lists_inner_with_null_fill_value(
200 indices,
201 sparse_list_elements,
202 sparse_list_offsets,
203 len,
204 validity,
205 );
206 });
207 };
208
209 let mut builder = ListBuilder::<SmallestViableOffsetType>::with_values_and_index_capacity(
210 values_dtype,
211 validity.nullability(),
212 total_canonical_values,
213 len + 1,
214 );
215 let mut next_index = 0_usize;
216 let enumerated_indices_usize = indices
217 .iter()
218 .map(|x| (*x).to_usize().vortex_expect("index must fit in usize"))
219 .enumerate();
220 for (patch_values_index, next_patched_index) in enumerated_indices_usize {
221 for _ in next_index..next_patched_index {
222 builder.extend_from_array(&fill_value_array);
223 }
224 builder.extend_from_array(&values.slice(patch_values_index..patch_values_index + 1));
225 next_index = next_patched_index + 1;
226 }
227
228 for _ in next_index..len {
229 builder.extend_from_array(&fill_value_array);
230 }
231
232 builder.finish_into_canonical()
233}
234
235fn canonicalize_sparse_lists_inner_with_null_fill_value<I: NativePType, O: OffsetPType>(
236 indices: &[I],
237 elements: ArrayRef,
238 offsets: &[O],
239 len: usize,
240 validity: Validity,
241) -> Canonical {
242 assert!(indices.len() < len + 1);
243 let mut dense_offsets = BufferMut::with_capacity(len + 1);
244
245 dense_offsets.push(offsets[0]);
247 let mut dense_last_set_index = 0_usize;
248 for (sparse_start_index, dense_start_index) in indices.iter().enumerate() {
249 let sparse_end_index = sparse_start_index + 1;
250 let dense_start_index = (*dense_start_index)
251 .to_usize()
252 .vortex_expect("index must fit in usize");
253 let dense_end_index = dense_start_index + 1;
254
255 for _ in (dense_last_set_index + 1)..dense_end_index {
256 dense_offsets.push(dense_offsets[dense_last_set_index]);
258 }
259 dense_offsets.push(offsets[sparse_end_index]);
260 dense_last_set_index = dense_end_index;
261 }
262 for _ in (dense_last_set_index + 1)..len {
263 dense_offsets.push(dense_offsets[dense_last_set_index]);
265 }
266 Canonical::List(unsafe {
267 ListArray::new_unchecked(elements, dense_offsets.into_array(), validity)
268 })
269}
270
271fn canonicalize_sparse_bools(patches: &Patches, fill_value: &Scalar) -> Canonical {
272 let (fill_bool, validity) = if fill_value.is_null() {
273 (false, Validity::AllInvalid)
274 } else {
275 (
276 fill_value
277 .try_into()
278 .vortex_expect("Fill value must convert to bool"),
279 if patches.dtype().nullability() == Nullability::NonNullable {
280 Validity::NonNullable
281 } else {
282 Validity::AllValid
283 },
284 )
285 };
286
287 let bools = BoolArray::new(
288 if fill_bool {
289 BooleanBuffer::new_set(patches.array_len())
290 } else {
291 BooleanBuffer::new_unset(patches.array_len())
292 },
293 validity,
294 );
295
296 Canonical::Bool(bools.patch(patches))
297}
298
299fn canonicalize_sparse_primitives<
300 T: NativePType + for<'a> TryFrom<&'a Scalar, Error = VortexError>,
301>(
302 patches: &Patches,
303 fill_value: &Scalar,
304) -> Canonical {
305 let (primitive_fill, validity) = if fill_value.is_null() {
306 (T::default(), Validity::AllInvalid)
307 } else {
308 (
309 fill_value
310 .try_into()
311 .vortex_expect("Fill value must convert to target T"),
312 if patches.dtype().nullability() == Nullability::NonNullable {
313 Validity::NonNullable
314 } else {
315 Validity::AllValid
316 },
317 )
318 };
319
320 let parray = PrimitiveArray::new(buffer![primitive_fill; patches.array_len()], validity);
321
322 Canonical::Primitive(parray.patch(patches))
323}
324
325fn canonicalize_sparse_struct(
326 struct_fields: &StructFields,
327 fill_struct: StructScalar,
328 dtype: &DType,
329 unresolved_patches: &Patches,
331 len: usize,
332) -> Canonical {
333 let (fill_values, top_level_fill_validity) = match fill_struct.fields() {
334 Some(fill_values) => (fill_values, Validity::AllValid),
335 None => (
336 struct_fields.fields().map(Scalar::default_value).collect(),
337 Validity::AllInvalid,
338 ),
339 };
340 let patch_values_as_struct = unresolved_patches.values().to_struct();
341 let columns_patch_values = patch_values_as_struct.fields();
342 let names = patch_values_as_struct.names();
343 let validity = if dtype.is_nullable() {
344 top_level_fill_validity.patch(
345 len,
346 unresolved_patches.offset(),
347 unresolved_patches.indices(),
348 &Validity::from_mask(
349 unresolved_patches.values().validity_mask(),
350 Nullability::Nullable,
351 ),
352 )
353 } else {
354 top_level_fill_validity
355 .into_non_nullable()
356 .unwrap_or_else(|| vortex_panic!("fill validity should match sparse array nullability"))
357 };
358
359 StructArray::try_from_iter_with_validity(
360 names.iter().zip_eq(
361 columns_patch_values
362 .iter()
363 .cloned()
364 .zip_eq(fill_values)
365 .map(|(patch_values, fill_value)| unsafe {
366 SparseArray::new_unchecked(
367 unresolved_patches
368 .clone()
369 .map_values(|_| Ok(patch_values))
370 .vortex_expect("Replacing patch values"),
371 fill_value,
372 )
373 }),
374 ),
375 validity,
376 )
377 .map(Canonical::Struct)
378 .vortex_expect("Creating struct array")
379}
380
381fn canonicalize_sparse_decimal<D: NativeDecimalType>(
382 decimal_dtype: DecimalDType,
383 nullability: Nullability,
384 fill_value: DecimalScalar,
385 patches: &Patches,
386 len: usize,
387) -> Canonical {
388 let mut builder = DecimalBuilder::with_capacity::<D>(len, decimal_dtype, nullability);
389 match fill_value.decimal_value() {
390 Some(fill_value) => {
391 let fill_value = fill_value
392 .cast::<D>()
393 .vortex_expect("unexpected value type");
394 for _ in 0..len {
395 builder.append_value(fill_value)
396 }
397 }
398 None => {
399 builder.append_nulls(len);
400 }
401 }
402 let filled_array = builder.finish_into_decimal();
403 let array = filled_array.patch(patches);
404 Canonical::Decimal(array)
405}
406
407fn canonicalize_varbin(
408 array: &SparseArray,
409 dtype: DType,
410 fill_value: Option<ByteBuffer>,
411) -> Canonical {
412 let patches = array.resolved_patches();
413 let indices = patches.indices().to_primitive();
414 let values = patches.values().to_varbinview();
415 let validity = Validity::from_mask(array.validity_mask(), dtype.nullability());
416 let len = array.len();
417
418 match_each_integer_ptype!(indices.ptype(), |I| {
419 let indices = indices.buffer::<I>();
420 canonicalize_varbin_inner::<I>(fill_value, indices, values, dtype, validity, len)
421 })
422}
423
424fn canonicalize_varbin_inner<I: NativePType>(
425 fill_value: Option<ByteBuffer>,
426 indices: Buffer<I>,
427 values: VarBinViewArray,
428 dtype: DType,
429 validity: Validity,
430 len: usize,
431) -> Canonical {
432 assert_eq!(dtype.nullability(), validity.nullability());
433
434 let n_patch_buffers = values.buffers().len();
435 let mut buffers = values.buffers().to_vec();
436
437 let fill = if let Some(buffer) = &fill_value {
438 buffers.push(buffer.clone());
439 BinaryView::make_view(
440 buffer.as_ref(),
441 u32::try_from(n_patch_buffers).vortex_expect("too many buffers"),
442 0,
443 )
444 } else {
445 BinaryView::make_view(&[], 0, 0)
447 };
448
449 let mut views = buffer_mut![fill; len];
450 for (patch_index, &patch) in indices.into_iter().zip_eq(values.views().iter()) {
451 let patch_index_usize = <usize as NumCast>::from(patch_index)
452 .vortex_expect("var bin view indices must fit in usize");
453 views[patch_index_usize] = patch;
454 }
455
456 let array = unsafe {
458 VarBinViewArray::new_unchecked(views.freeze(), Arc::from(buffers), dtype, validity)
459 };
460
461 Canonical::VarBinView(array)
462}
463
464#[cfg(test)]
465mod test {
466 use rstest::rstest;
467 use vortex_array::arrays::{
468 BoolArray, BooleanBufferBuilder, DecimalArray, ListArray, PrimitiveArray, StructArray,
469 VarBinArray, VarBinViewArray,
470 };
471 use vortex_array::arrow::IntoArrowArray as _;
472 use vortex_array::validity::Validity;
473 use vortex_array::vtable::ValidityHelper;
474 use vortex_array::{IntoArray, ToCanonical};
475 use vortex_buffer::{ByteBuffer, buffer, buffer_mut};
476 use vortex_dtype::Nullability::{NonNullable, Nullable};
477 use vortex_dtype::{DType, DecimalDType, FieldNames, PType, StructFields};
478 use vortex_mask::Mask;
479 use vortex_scalar::{DecimalValue, Scalar};
480
481 use crate::SparseArray;
482 use crate::canonical::{list_scalar_to_elements_array, list_scalar_to_singleton_list_array};
483
484 #[rstest]
485 #[case(Some(true))]
486 #[case(Some(false))]
487 #[case(None)]
488 fn test_sparse_bool(#[case] fill_value: Option<bool>) {
489 let indices = buffer![0u64, 1, 7].into_array();
490 let values = bool_array_from_nullable_vec(vec![Some(true), None, Some(false)], fill_value)
491 .into_array();
492 let sparse_bools =
493 SparseArray::try_new(indices, values, 10, Scalar::from(fill_value)).unwrap();
494 assert_eq!(sparse_bools.dtype(), &DType::Bool(Nullable));
495
496 let flat_bools = sparse_bools.to_bool();
497 let expected = bool_array_from_nullable_vec(
498 vec![
499 Some(true),
500 None,
501 fill_value,
502 fill_value,
503 fill_value,
504 fill_value,
505 fill_value,
506 Some(false),
507 fill_value,
508 fill_value,
509 ],
510 fill_value,
511 );
512
513 assert_eq!(flat_bools.boolean_buffer(), expected.boolean_buffer());
514 assert_eq!(flat_bools.validity(), expected.validity());
515
516 assert!(flat_bools.boolean_buffer().value(0));
517 assert!(flat_bools.validity().is_valid(0));
518 assert_eq!(
519 flat_bools.boolean_buffer().value(1),
520 fill_value.unwrap_or_default()
521 );
522 assert!(!flat_bools.validity().is_valid(1));
523 assert_eq!(flat_bools.validity().is_valid(2), fill_value.is_some());
524 assert!(!flat_bools.boolean_buffer().value(7));
525 assert!(flat_bools.validity().is_valid(7));
526 }
527
528 fn bool_array_from_nullable_vec(
529 bools: Vec<Option<bool>>,
530 fill_value: Option<bool>,
531 ) -> BoolArray {
532 let mut buffer = BooleanBufferBuilder::new(bools.len());
533 let mut validity = BooleanBufferBuilder::new(bools.len());
534 for maybe_bool in bools {
535 buffer.append(maybe_bool.unwrap_or_else(|| fill_value.unwrap_or_default()));
536 validity.append(maybe_bool.is_some());
537 }
538 BoolArray::new(buffer.finish(), Validity::from(validity.finish()))
539 }
540
541 #[rstest]
542 #[case(Some(0i32))]
543 #[case(Some(-1i32))]
544 #[case(None)]
545 fn test_sparse_primitive(#[case] fill_value: Option<i32>) {
546 let indices = buffer![0u64, 1, 7].into_array();
547 let values = PrimitiveArray::from_option_iter([Some(0i32), None, Some(1)]).into_array();
548 let sparse_ints =
549 SparseArray::try_new(indices, values, 10, Scalar::from(fill_value)).unwrap();
550 assert_eq!(*sparse_ints.dtype(), DType::Primitive(PType::I32, Nullable));
551
552 let flat_ints = sparse_ints.to_primitive();
553 let expected = PrimitiveArray::from_option_iter([
554 Some(0i32),
555 None,
556 fill_value,
557 fill_value,
558 fill_value,
559 fill_value,
560 fill_value,
561 Some(1),
562 fill_value,
563 fill_value,
564 ]);
565
566 assert_eq!(flat_ints.byte_buffer(), expected.byte_buffer());
567 assert_eq!(flat_ints.validity(), expected.validity());
568
569 assert_eq!(flat_ints.as_slice::<i32>()[0], 0);
570 assert!(flat_ints.validity().is_valid(0));
571 assert_eq!(flat_ints.as_slice::<i32>()[1], 0);
572 assert!(!flat_ints.validity().is_valid(1));
573 assert_eq!(
574 flat_ints.as_slice::<i32>()[2],
575 fill_value.unwrap_or_default()
576 );
577 assert_eq!(flat_ints.validity().is_valid(2), fill_value.is_some());
578 assert_eq!(flat_ints.as_slice::<i32>()[7], 1);
579 assert!(flat_ints.validity().is_valid(7));
580 }
581
582 #[test]
583 fn test_sparse_struct_valid_fill() {
584 let field_names = FieldNames::from_iter(["a", "b"]);
585 let field_types = vec![
586 DType::Primitive(PType::I32, Nullable),
587 DType::Primitive(PType::I32, Nullable),
588 ];
589 let struct_fields = StructFields::new(field_names, field_types);
590 let struct_dtype = DType::Struct(struct_fields.clone(), Nullable);
591
592 let indices = buffer![0u64, 1, 7, 8].into_array();
593 let patch_values_a =
594 PrimitiveArray::from_option_iter([Some(10i32), None, Some(20), Some(30)]).into_array();
595 let patch_values_b =
596 PrimitiveArray::from_option_iter([Some(1i32), Some(2), None, Some(3)]).into_array();
597 let patch_values = StructArray::try_new_with_dtype(
598 vec![patch_values_a, patch_values_b],
599 struct_fields.clone(),
600 4,
601 Validity::Array(
602 BoolArray::from_indices(4, vec![0, 1, 2], Validity::NonNullable).to_array(),
603 ),
604 )
605 .unwrap()
606 .into_array();
607
608 let fill_scalar = Scalar::struct_(
609 struct_dtype,
610 vec![Scalar::from(Some(-10i32)), Scalar::from(Some(-1i32))],
611 );
612 let len = 10;
613 let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
614
615 let expected_a = PrimitiveArray::from_option_iter((0..len).map(|i| {
616 if i == 0 {
617 Some(10)
618 } else if i == 1 {
619 None
620 } else if i == 7 {
621 Some(20)
622 } else {
623 Some(-10)
624 }
625 }));
626 let expected_b = PrimitiveArray::from_option_iter((0..len).map(|i| {
627 if i == 0 {
628 Some(1i32)
629 } else if i == 1 {
630 Some(2)
631 } else if i == 7 {
632 None
633 } else {
634 Some(-1)
635 }
636 }));
637
638 let expected = StructArray::try_new_with_dtype(
639 vec![expected_a.into_array(), expected_b.into_array()],
640 struct_fields,
641 len,
642 Validity::from_mask(Mask::from_excluded_indices(10, vec![8]), Nullable),
644 )
645 .unwrap()
646 .to_array()
647 .into_arrow_preferred()
648 .unwrap();
649
650 let actual = sparse_struct
651 .to_struct()
652 .to_array()
653 .into_arrow_preferred()
654 .unwrap();
655
656 assert_eq!(expected.data_type(), actual.data_type());
657 assert_eq!(&expected, &actual);
658 }
659
660 #[test]
661 fn test_sparse_struct_invalid_fill() {
662 let field_names = FieldNames::from_iter(["a", "b"]);
663 let field_types = vec![
664 DType::Primitive(PType::I32, Nullable),
665 DType::Primitive(PType::I32, Nullable),
666 ];
667 let struct_fields = StructFields::new(field_names, field_types);
668 let struct_dtype = DType::Struct(struct_fields.clone(), Nullable);
669
670 let indices = buffer![0u64, 1, 7, 8].into_array();
671 let patch_values_a =
672 PrimitiveArray::from_option_iter([Some(10i32), None, Some(20), Some(30)]).into_array();
673 let patch_values_b =
674 PrimitiveArray::from_option_iter([Some(1i32), Some(2), None, Some(3)]).into_array();
675 let patch_values = StructArray::try_new_with_dtype(
676 vec![patch_values_a, patch_values_b],
677 struct_fields.clone(),
678 4,
679 Validity::Array(
680 BoolArray::from_indices(4, vec![0, 1, 2], Validity::NonNullable).to_array(),
681 ),
682 )
683 .unwrap()
684 .into_array();
685
686 let fill_scalar = Scalar::null(struct_dtype);
687 let len = 10;
688 let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
689
690 let expected_a = PrimitiveArray::from_option_iter((0..len).map(|i| {
691 if i == 0 {
692 Some(10)
693 } else if i == 1 {
694 None
695 } else if i == 7 {
696 Some(20)
697 } else {
698 Some(-10)
699 }
700 }));
701 let expected_b = PrimitiveArray::from_option_iter((0..len).map(|i| {
702 if i == 0 {
703 Some(1i32)
704 } else if i == 1 {
705 Some(2)
706 } else if i == 7 {
707 None
708 } else {
709 Some(-1)
710 }
711 }));
712
713 let expected = StructArray::try_new_with_dtype(
714 vec![expected_a.into_array(), expected_b.into_array()],
715 struct_fields,
716 len,
717 Validity::from_mask(Mask::from_indices(10, vec![0, 1, 7]), Nullable),
719 )
720 .unwrap()
721 .to_array()
722 .into_arrow_preferred()
723 .unwrap();
724
725 let actual = sparse_struct
726 .to_struct()
727 .to_array()
728 .into_arrow_preferred()
729 .unwrap();
730
731 assert_eq!(expected.data_type(), actual.data_type());
732 assert_eq!(&expected, &actual);
733 }
734
735 #[test]
736 fn test_sparse_decimal() {
737 let indices = buffer![0u32, 1u32, 7u32, 8u32].into_array();
738 let decimal_dtype = DecimalDType::new(3, 2);
739 let patch_values = DecimalArray::new(
740 buffer![100i128, 200i128, 300i128, 4000i128],
741 decimal_dtype,
742 Validity::from_iter([true, true, true, false]),
743 )
744 .to_array();
745 let len = 10;
746 let fill_scalar = Scalar::decimal(DecimalValue::I32(123), decimal_dtype, Nullable);
747 let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
748
749 let expected = DecimalArray::new(
750 buffer![100i128, 200, 123, 123, 123, 123, 123, 300, 4000, 123],
751 decimal_dtype,
752 Validity::from_mask(Mask::from_excluded_indices(10, vec![8]), Nullable),
754 )
755 .to_array()
756 .into_arrow_preferred()
757 .unwrap();
758
759 let actual = sparse_struct
760 .to_decimal()
761 .to_array()
762 .into_arrow_preferred()
763 .unwrap();
764
765 assert_eq!(expected.data_type(), actual.data_type());
766 assert_eq!(&expected, &actual);
767 }
768
769 #[test]
770 fn test_sparse_utf8_varbinview_non_null_fill() {
771 let strings = <VarBinViewArray as FromIterator<_>>::from_iter([
772 Some("hello"),
773 Some("goodbye"),
774 Some("hello"),
775 None,
776 Some("bonjour"),
777 Some("你好"),
778 None,
779 ])
780 .into_array();
781
782 let array = SparseArray::try_new(
783 buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
784 strings,
785 12,
786 Scalar::from(Some("123".to_owned())),
787 )
788 .unwrap();
789
790 let actual = array.to_varbinview().into_array();
791 let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
792 Some("hello"),
793 Some("123"),
794 Some("123"),
795 Some("goodbye"),
796 Some("hello"),
797 None,
798 Some("123"),
799 Some("bonjour"),
800 Some("123"),
801 Some("你好"),
802 None,
803 Some("123"),
804 ])
805 .into_array();
806
807 let actual = actual.into_arrow_preferred().unwrap();
808 let expected = expected.into_arrow_preferred().unwrap();
809
810 assert_eq!(actual.data_type(), expected.data_type());
811 assert_eq!(&actual, &expected);
812 }
813
814 #[test]
815 fn test_sparse_utf8_varbinview_null_fill() {
816 let strings = <VarBinViewArray as FromIterator<_>>::from_iter([
817 Some("hello"),
818 Some("goodbye"),
819 Some("hello"),
820 None,
821 Some("bonjour"),
822 Some("你好"),
823 None,
824 ])
825 .into_array();
826
827 let array = SparseArray::try_new(
828 buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
829 strings,
830 12,
831 Scalar::null(DType::Utf8(Nullable)),
832 )
833 .unwrap();
834
835 let actual = array.to_varbinview().into_array();
836 let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
837 Some("hello"),
838 None,
839 None,
840 Some("goodbye"),
841 Some("hello"),
842 None,
843 None,
844 Some("bonjour"),
845 None,
846 Some("你好"),
847 None,
848 None,
849 ])
850 .into_array();
851
852 let actual = actual.into_arrow_preferred().unwrap();
853 let expected = expected.into_arrow_preferred().unwrap();
854
855 assert_eq!(actual.data_type(), expected.data_type());
856 assert_eq!(&actual, &expected);
857 }
858
859 #[test]
860 fn test_sparse_utf8_varbinview_non_nullable() {
861 let strings =
862 VarBinViewArray::from_iter_str(["hello", "goodbye", "hello", "bonjour", "你好"])
863 .into_array();
864
865 let array = SparseArray::try_new(
866 buffer![0u16, 3, 4, 5, 8].into_array(),
867 strings,
868 9,
869 Scalar::from("123".to_owned()),
870 )
871 .unwrap();
872
873 let actual = array.to_varbinview().into_array();
874 let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
875 Some("hello"),
876 Some("123"),
877 Some("123"),
878 Some("goodbye"),
879 Some("hello"),
880 Some("bonjour"),
881 Some("123"),
882 Some("123"),
883 Some("你好"),
884 ])
885 .into_array();
886
887 let actual = actual.into_arrow_preferred().unwrap();
888 let expected = expected.into_arrow_preferred().unwrap();
889
890 assert_eq!(actual.data_type(), expected.data_type());
891 assert_eq!(&actual, &expected);
892 }
893
894 #[test]
895 fn test_sparse_utf8_varbin_null_fill() {
896 let strings = <VarBinArray as FromIterator<_>>::from_iter([
897 Some("hello"),
898 Some("goodbye"),
899 Some("hello"),
900 None,
901 Some("bonjour"),
902 Some("你好"),
903 None,
904 ])
905 .into_array();
906
907 let array = SparseArray::try_new(
908 buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
909 strings,
910 12,
911 Scalar::null(DType::Utf8(Nullable)),
912 )
913 .unwrap();
914
915 let actual = array.to_varbinview().into_array();
916 let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
917 Some("hello"),
918 None,
919 None,
920 Some("goodbye"),
921 Some("hello"),
922 None,
923 None,
924 Some("bonjour"),
925 None,
926 Some("你好"),
927 None,
928 None,
929 ])
930 .into_array();
931
932 let actual = actual.into_arrow_preferred().unwrap();
933 let expected = expected.into_arrow_preferred().unwrap();
934
935 assert_eq!(actual.data_type(), expected.data_type());
936 assert_eq!(&actual, &expected);
937 }
938
939 #[test]
940 fn test_sparse_binary_varbinview_non_null_fill() {
941 let binaries = VarBinViewArray::from_iter_nullable_bin([
942 Some(b"hello" as &[u8]),
943 Some(b"goodbye"),
944 Some(b"hello"),
945 None,
946 Some(b"\x00"),
947 Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
948 None,
949 ])
950 .into_array();
951
952 let array = SparseArray::try_new(
953 buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
954 binaries,
955 12,
956 Scalar::from(Some(ByteBuffer::from(b"123".to_vec()))),
957 )
958 .unwrap();
959
960 let actual = array.to_varbinview().into_array();
961 let expected = VarBinViewArray::from_iter_nullable_bin([
962 Some(b"hello" as &[u8]),
963 Some(b"123"),
964 Some(b"123"),
965 Some(b"goodbye"),
966 Some(b"hello"),
967 None,
968 Some(b"123"),
969 Some(b"\x00"),
970 Some(b"123"),
971 Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
972 None,
973 Some(b"123"),
974 ])
975 .into_array();
976
977 let actual = actual.into_arrow_preferred().unwrap();
978 let expected = expected.into_arrow_preferred().unwrap();
979
980 assert_eq!(actual.data_type(), expected.data_type());
981 assert_eq!(&actual, &expected);
982 }
983
984 #[test]
985 fn test_list_scalar_to_elements_array() {
986 let scalar = Scalar::from(Some(vec![1, 2, 3]));
987 let array = list_scalar_to_elements_array(scalar.as_list());
988 assert_eq!(
989 array.unwrap().display_values().to_string(),
990 "[1i32, 2i32, 3i32]"
991 );
992
993 let scalar = Scalar::null_typed::<Vec<i32>>();
994 let array = list_scalar_to_elements_array(scalar.as_list());
995 assert!(array.is_none());
996 }
997
998 #[test]
999 fn test_list_scalar_to_singleton_list_array() {
1000 let scalar = Scalar::from(Some(vec![1, 2, 3]));
1001 let array = list_scalar_to_singleton_list_array(scalar.as_list());
1002 assert!(array.is_some());
1003 let array = array.unwrap();
1004 assert_eq!(array.scalar_at(0), scalar);
1005 assert_eq!(array.len(), 1);
1006
1007 let scalar = Scalar::null_typed::<Vec<i32>>();
1008 let array = list_scalar_to_singleton_list_array(scalar.as_list());
1009 assert!(array.is_none());
1010 }
1011
1012 #[test]
1013 fn test_sparse_list_null_fill() {
1014 let elements = buffer![1i32, 2, 1, 2].into_array();
1015 let offsets = buffer![0u32, 1, 2, 3, 4].into_array();
1016 let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1017 .unwrap()
1018 .into_array();
1019
1020 let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
1021 let fill_value = Scalar::null(lists.dtype().clone());
1022 let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
1023 .unwrap()
1024 .into_array();
1025
1026 let actual = sparse.to_canonical().into_array();
1027 let expected = ListArray::try_new(
1028 buffer![1i32, 2, 1, 2].into_array(),
1029 buffer![0u32, 1, 1, 1, 2, 3, 4].into_array(),
1030 Validity::Array(
1031 BoolArray::from_iter([true, false, false, true, true, true]).into_array(),
1032 ),
1033 )
1034 .unwrap()
1035 .into_array();
1036
1037 let actual = actual.into_arrow_preferred().unwrap();
1038 let expected = expected.into_arrow_preferred().unwrap();
1039
1040 assert_eq!(actual.data_type(), expected.data_type());
1041 assert_eq!(&actual, &expected);
1042 }
1043
1044 #[test]
1045 fn test_sparse_list_null_fill_sliced_sparse_values() {
1046 let elements = buffer![1i32, 2, 1, 2, 1, 2, 1, 2].into_array();
1047 let offsets = buffer![0u32, 1, 2, 3, 4, 5, 6, 7, 8].into_array();
1048 let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1049 .unwrap()
1050 .into_array();
1051 let lists = lists.slice(2..6);
1052
1053 let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
1054 let fill_value = Scalar::null(lists.dtype().clone());
1055 let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
1056 .unwrap()
1057 .into_array();
1058
1059 let actual = sparse.to_canonical().into_array();
1060 let expected = ListArray::try_new(
1061 buffer![1i32, 2, 1, 2].into_array(),
1062 buffer![0u32, 1, 1, 1, 2, 3, 4].into_array(),
1063 Validity::Array(
1064 BoolArray::from_iter([true, false, false, true, true, true]).into_array(),
1065 ),
1066 )
1067 .unwrap()
1068 .into_array();
1069
1070 let actual = actual.into_arrow_preferred().unwrap();
1071 let expected = expected.into_arrow_preferred().unwrap();
1072
1073 assert_eq!(actual.data_type(), expected.data_type());
1074 assert_eq!(&actual, &expected);
1075 }
1076
1077 #[test]
1078 fn test_sparse_list_non_null_fill() {
1079 let elements = buffer![1i32, 2, 1, 2].into_array();
1080 let offsets = buffer![0u32, 1, 2, 3, 4].into_array();
1081 let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1082 .unwrap()
1083 .into_array();
1084
1085 let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
1086 let fill_value = Scalar::from(Some(vec![5i32, 6, 7, 8]));
1087 let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
1088 .unwrap()
1089 .into_array();
1090
1091 let actual = sparse.to_canonical().into_array();
1092 let expected = ListArray::try_new(
1093 buffer![1i32, 5, 6, 7, 8, 5, 6, 7, 8, 2, 1, 2].into_array(),
1094 buffer![0u32, 1, 5, 9, 10, 11, 12].into_array(),
1095 Validity::AllValid,
1096 )
1097 .unwrap()
1098 .into_array();
1099
1100 let actual = actual.into_arrow_preferred().unwrap();
1101 let expected = expected.into_arrow_preferred().unwrap();
1102
1103 assert_eq!(actual.data_type(), expected.data_type());
1104 assert_eq!(&actual, &expected);
1105 }
1106
1107 #[test]
1108 fn test_sparse_binary_varbin_null_fill() {
1109 let strings = <VarBinArray as FromIterator<_>>::from_iter([
1110 Some(b"hello" as &[u8]),
1111 Some(b"goodbye"),
1112 Some(b"hello"),
1113 None,
1114 Some(b"\x00"),
1115 Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
1116 None,
1117 ])
1118 .into_array();
1119
1120 let array = SparseArray::try_new(
1121 buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
1122 strings,
1123 12,
1124 Scalar::null(DType::Binary(Nullable)),
1125 )
1126 .unwrap();
1127
1128 let actual = array.to_varbinview().into_array();
1129 let expected = VarBinViewArray::from_iter_nullable_bin([
1130 Some(b"hello" as &[u8]),
1131 None,
1132 None,
1133 Some(b"goodbye"),
1134 Some(b"hello"),
1135 None,
1136 None,
1137 Some(b"\x00"),
1138 None,
1139 Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
1140 None,
1141 None,
1142 ])
1143 .into_array();
1144 let actual = actual.into_arrow_preferred().unwrap();
1145 let expected = expected.into_arrow_preferred().unwrap();
1146
1147 assert_eq!(actual.data_type(), expected.data_type());
1148 assert_eq!(&actual, &expected);
1149 }
1150
1151 #[test]
1152 fn test_sparse_list_grows_offset_type() {
1153 let elements = buffer![1i32, 2, 1, 2].into_array();
1154 let offsets = buffer![0u8, 1, 2, 3, 4].into_array();
1155 let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1156 .unwrap()
1157 .into_array();
1158
1159 let indices = buffer![0u8, 1u8, 2u8, 3u8].into_array();
1160 let fill_value = Scalar::from(Some(vec![42i32; 252])); let sparse = SparseArray::try_new(indices, lists, 5, fill_value)
1162 .unwrap()
1163 .into_array();
1164
1165 let actual = sparse.to_canonical().into_array();
1166 let mut expected_elements = buffer_mut![1, 2, 1, 2];
1167 expected_elements.extend(buffer![42i32; 252]);
1168 let expected = ListArray::try_new(
1169 expected_elements.freeze().into_array(),
1170 buffer![0u16, 1, 2, 3, 4, 256].into_array(),
1171 Validity::AllValid,
1172 )
1173 .unwrap()
1174 .into_array();
1175
1176 assert_eq!(
1177 actual.to_list().offsets().dtype(),
1178 &DType::Primitive(PType::U16, NonNullable)
1179 );
1180
1181 let actual = actual.into_arrow_preferred().unwrap();
1182 let expected = expected.into_arrow_preferred().unwrap();
1183
1184 assert_eq!(actual.data_type(), expected.data_type());
1185 assert_eq!(&actual, &expected);
1186 }
1187}