1use std::sync::Arc;
5
6use itertools::Itertools;
7use num_traits::NumCast;
8use vortex_array::arrays::{
9 BoolArray, ConstantArray, FixedSizeListArray, ListViewArray, NullArray, PrimitiveArray,
10 StructArray, VarBinViewArray,
11};
12use vortex_array::builders::{
13 ArrayBuilder, DecimalBuilder, ListViewBuilder, builder_with_capacity,
14};
15use vortex_array::patches::Patches;
16use vortex_array::validity::Validity;
17use vortex_array::vtable::{CanonicalVTable, ValidityHelper};
18use vortex_array::{Array, Canonical, ToCanonical};
19use vortex_buffer::{BitBuffer, Buffer, BufferString, ByteBuffer, buffer, buffer_mut};
20use vortex_dtype::{
21 DType, DecimalDType, DecimalType, IntegerPType, NativeDecimalType, NativePType, Nullability,
22 StructFields, match_each_decimal_value_type, match_each_integer_ptype, match_each_native_ptype,
23};
24use vortex_error::{VortexError, VortexExpect, vortex_panic};
25use vortex_scalar::{DecimalScalar, ListScalar, Scalar, StructScalar};
26use vortex_vector::binaryview::BinaryView;
27
28use crate::{SparseArray, SparseVTable};
29
30impl CanonicalVTable<SparseVTable> for SparseVTable {
31 fn canonicalize(array: &SparseArray) -> Canonical {
32 if array.patches().num_patches() == 0 {
33 return ConstantArray::new(array.fill_scalar().clone(), array.len()).to_canonical();
34 }
35
36 match array.dtype() {
37 DType::Null => {
38 assert!(array.fill_scalar().is_null());
39 Canonical::Null(NullArray::new(array.len()))
40 }
41 DType::Bool(..) => {
42 let resolved_patches = array.resolved_patches();
43 canonicalize_sparse_bools(&resolved_patches, array.fill_scalar())
44 }
45 DType::Primitive(ptype, ..) => {
46 let resolved_patches = array.resolved_patches();
47 match_each_native_ptype!(ptype, |P| {
48 canonicalize_sparse_primitives::<P>(&resolved_patches, array.fill_scalar())
49 })
50 }
51 DType::Struct(struct_fields, ..) => canonicalize_sparse_struct(
52 struct_fields,
53 array.fill_scalar().as_struct(),
54 array.dtype(),
55 array.patches(),
56 array.len(),
57 ),
58 DType::Decimal(decimal_dtype, nullability) => {
59 let canonical_decimal_value_type =
60 DecimalType::smallest_decimal_value_type(decimal_dtype);
61 let fill_value = array.fill_scalar().as_decimal();
62 match_each_decimal_value_type!(canonical_decimal_value_type, |D| {
63 canonicalize_sparse_decimal::<D>(
64 *decimal_dtype,
65 *nullability,
66 fill_value,
67 array.patches(),
68 array.len(),
69 )
70 })
71 }
72 dtype @ DType::Utf8(..) => {
73 let fill_value = array.fill_scalar().as_utf8().value();
74 let fill_value = fill_value.map(BufferString::into_inner);
75 canonicalize_varbin(array, dtype.clone(), fill_value)
76 }
77 dtype @ DType::Binary(..) => {
78 let fill_value = array.fill_scalar().as_binary().value();
79 canonicalize_varbin(array, dtype.clone(), fill_value)
80 }
81 DType::List(values_dtype, nullability) => {
82 canonicalize_sparse_lists(array, values_dtype.clone(), *nullability)
83 }
84 DType::FixedSizeList(.., nullability) => {
85 canonicalize_sparse_fixed_size_list(array, *nullability)
86 }
87 DType::Extension(_ext_dtype) => todo!(),
88 }
89 }
90}
91
92#[allow(clippy::cognitive_complexity)]
93fn canonicalize_sparse_lists(
94 array: &SparseArray,
95 values_dtype: Arc<DType>,
96 nullability: Nullability,
97) -> Canonical {
98 macro_rules! match_smallest_offset_type {
100 ($n_elements:expr, | $offset_type:ident | $body:block) => {{
101 let n_elements = $n_elements;
102 if n_elements <= u8::MAX as usize {
103 type $offset_type = u8;
104 $body
105 } else if n_elements <= u16::MAX as usize {
106 type $offset_type = u16;
107 $body
108 } else if n_elements <= u32::MAX as usize {
109 type $offset_type = u32;
110 $body
111 } else {
112 assert!(u64::try_from(n_elements).is_ok());
113 type $offset_type = u64;
114 $body
115 }
116 }};
117 }
118
119 let resolved_patches = array.resolved_patches();
120
121 let indices = resolved_patches.indices().to_primitive();
122 let values = resolved_patches.values().to_listview();
123 let fill_value = array.fill_scalar().as_list();
124
125 let n_filled = array.len() - resolved_patches.num_patches();
126 let total_canonical_values = values.elements().len() + fill_value.len() * n_filled;
127
128 let validity = Validity::from_mask(array.validity_mask(), nullability);
129
130 match_each_integer_ptype!(indices.ptype(), |I| {
131 match_smallest_offset_type!(total_canonical_values, |O| {
132 canonicalize_sparse_lists_inner::<I, O>(
133 indices.as_slice(),
134 values,
135 fill_value,
136 values_dtype,
137 array.len(),
138 total_canonical_values,
139 validity,
140 )
141 })
142 })
143}
144
145fn canonicalize_sparse_lists_inner<I: IntegerPType, O: IntegerPType>(
146 patch_indices: &[I],
147 patch_values: ListViewArray,
148 fill_value: ListScalar,
149 values_dtype: Arc<DType>,
150 len: usize,
151 total_canonical_values: usize,
152 validity: Validity,
153) -> Canonical {
154 let mut builder = ListViewBuilder::<O, O>::with_capacity(
157 values_dtype,
158 validity.nullability(),
159 total_canonical_values,
160 len,
161 );
162
163 let mut patch_idx = 0;
164
165 for position in 0..len {
168 let position_is_patched = patch_idx < patch_indices.len()
169 && patch_indices[patch_idx]
170 .to_usize()
171 .vortex_expect("patch index must fit in usize")
172 == position;
173
174 if position_is_patched {
175 builder
177 .append_value(patch_values.scalar_at(patch_idx).as_list())
178 .vortex_expect("Failed to append sparse value");
179 patch_idx += 1;
180 } else {
181 builder
183 .append_value(fill_value.clone())
184 .vortex_expect("Failed to append fill value");
185 }
186 }
187
188 builder.finish_into_canonical()
189}
190
191fn canonicalize_sparse_fixed_size_list(array: &SparseArray, nullability: Nullability) -> Canonical {
193 let resolved_patches = array.resolved_patches();
194 let indices = resolved_patches.indices().to_primitive();
195 let values = resolved_patches.values().to_fixed_size_list();
196 let fill_value = array.fill_scalar().as_list();
197
198 let validity = Validity::from_mask(array.validity_mask(), nullability);
199
200 match_each_integer_ptype!(indices.ptype(), |I| {
201 canonicalize_sparse_fixed_size_list_inner::<I>(
202 indices.as_slice(),
203 values,
204 fill_value,
205 array.len(),
206 validity,
207 )
208 })
209}
210
211fn canonicalize_sparse_fixed_size_list_inner<I: IntegerPType>(
218 indices: &[I],
219 values: FixedSizeListArray,
220 fill_value: ListScalar,
221 array_len: usize,
222 validity: Validity,
223) -> Canonical {
224 let list_size = values.list_size();
225 let element_dtype = values.elements().dtype();
226 let total_elements = array_len * list_size as usize;
227 let mut builder = builder_with_capacity(element_dtype, total_elements);
228 let fill_elements = fill_value.elements();
229
230 let mut next_index = 0;
231 let indices = indices
232 .iter()
233 .map(|x| (*x).to_usize().vortex_expect("index must fit in usize"));
234
235 for (patch_idx, sparse_idx) in indices.enumerate() {
236 append_n_lists(
238 &mut *builder,
239 fill_elements.as_deref(),
240 list_size,
241 sparse_idx - next_index,
242 );
243
244 if values.validity().is_valid(patch_idx) {
246 let patch_list = values.fixed_size_list_elements_at(patch_idx);
247 for i in 0..list_size as usize {
248 builder
249 .append_scalar(&patch_list.scalar_at(i))
250 .vortex_expect("element dtype must match");
251 }
252 } else {
253 builder.append_defaults(list_size as usize);
254 }
255
256 next_index = sparse_idx + 1;
257 }
258
259 append_n_lists(
261 &mut *builder,
262 fill_elements.as_deref(),
263 list_size,
264 array_len - next_index,
265 );
266
267 let elements = builder.finish();
268
269 Canonical::FixedSizeList(unsafe {
271 FixedSizeListArray::new_unchecked(elements, list_size, validity, array_len)
272 })
273}
274
275fn append_n_lists(
280 builder: &mut dyn ArrayBuilder,
281 fill_elements: Option<&[Scalar]>,
282 list_size: u32,
283 count: usize,
284) {
285 for _ in 0..count {
286 if let Some(fill_elems) = fill_elements {
287 for elem in fill_elems {
288 builder
289 .append_scalar(elem)
290 .vortex_expect("element dtype must match");
291 }
292 } else {
293 builder.append_defaults(list_size as usize);
294 }
295 }
296}
297
298fn canonicalize_sparse_bools(patches: &Patches, fill_value: &Scalar) -> Canonical {
299 let (fill_bool, validity) = if fill_value.is_null() {
300 (false, Validity::AllInvalid)
301 } else {
302 (
303 fill_value
304 .try_into()
305 .vortex_expect("Fill value must convert to bool"),
306 if patches.dtype().nullability() == Nullability::NonNullable {
307 Validity::NonNullable
308 } else {
309 Validity::AllValid
310 },
311 )
312 };
313
314 let bools =
315 BoolArray::from_bit_buffer(BitBuffer::full(fill_bool, patches.array_len()), validity);
316
317 Canonical::Bool(bools.patch(patches))
318}
319
320fn canonicalize_sparse_primitives<
321 T: NativePType + for<'a> TryFrom<&'a Scalar, Error = VortexError>,
322>(
323 patches: &Patches,
324 fill_value: &Scalar,
325) -> Canonical {
326 let (primitive_fill, validity) = if fill_value.is_null() {
327 (T::default(), Validity::AllInvalid)
328 } else {
329 (
330 fill_value
331 .try_into()
332 .vortex_expect("Fill value must convert to target T"),
333 if patches.dtype().nullability() == Nullability::NonNullable {
334 Validity::NonNullable
335 } else {
336 Validity::AllValid
337 },
338 )
339 };
340
341 let parray = PrimitiveArray::new(buffer![primitive_fill; patches.array_len()], validity);
342
343 Canonical::Primitive(parray.patch(patches))
344}
345
346fn canonicalize_sparse_struct(
347 struct_fields: &StructFields,
348 fill_struct: StructScalar,
349 dtype: &DType,
350 unresolved_patches: &Patches,
352 len: usize,
353) -> Canonical {
354 let (fill_values, top_level_fill_validity) = match fill_struct.fields() {
355 Some(fill_values) => (fill_values.collect::<Vec<_>>(), Validity::AllValid),
356 None => (
357 struct_fields
358 .fields()
359 .map(Scalar::default_value)
360 .collect::<Vec<_>>(),
361 Validity::AllInvalid,
362 ),
363 };
364 let patch_values_as_struct = unresolved_patches.values().to_struct();
365 let columns_patch_values = patch_values_as_struct.fields();
366 let names = patch_values_as_struct.names();
367 let validity = if dtype.is_nullable() {
368 top_level_fill_validity.patch(
369 len,
370 unresolved_patches.offset(),
371 unresolved_patches.indices(),
372 &Validity::from_mask(
373 unresolved_patches.values().validity_mask(),
374 Nullability::Nullable,
375 ),
376 )
377 } else {
378 top_level_fill_validity
379 .into_non_nullable(len)
380 .unwrap_or_else(|| vortex_panic!("fill validity should match sparse array nullability"))
381 };
382
383 StructArray::try_from_iter_with_validity(
384 names.iter().zip_eq(
385 columns_patch_values
386 .iter()
387 .cloned()
388 .zip_eq(fill_values)
389 .map(|(patch_values, fill_value)| unsafe {
390 SparseArray::new_unchecked(
391 unresolved_patches
392 .clone()
393 .map_values(|_| Ok(patch_values))
394 .vortex_expect("Replacing patch values"),
395 fill_value,
396 )
397 }),
398 ),
399 validity,
400 )
401 .map(Canonical::Struct)
402 .vortex_expect("Creating struct array")
403}
404
405fn canonicalize_sparse_decimal<D: NativeDecimalType>(
406 decimal_dtype: DecimalDType,
407 nullability: Nullability,
408 fill_value: DecimalScalar,
409 patches: &Patches,
410 len: usize,
411) -> Canonical {
412 let mut builder = DecimalBuilder::with_capacity::<D>(len, decimal_dtype, nullability);
413 match fill_value.decimal_value() {
414 Some(fill_value) => {
415 let fill_value = fill_value
416 .cast::<D>()
417 .vortex_expect("unexpected value type");
418 for _ in 0..len {
419 builder.append_value(fill_value)
420 }
421 }
422 None => {
423 builder.append_nulls(len);
424 }
425 }
426 let filled_array = builder.finish_into_decimal();
427 let array = filled_array.patch(patches);
428 Canonical::Decimal(array)
429}
430
431fn canonicalize_varbin(
432 array: &SparseArray,
433 dtype: DType,
434 fill_value: Option<ByteBuffer>,
435) -> Canonical {
436 let patches = array.resolved_patches();
437 let indices = patches.indices().to_primitive();
438 let values = patches.values().to_varbinview();
439 let validity = Validity::from_mask(array.validity_mask(), dtype.nullability());
440 let len = array.len();
441
442 match_each_integer_ptype!(indices.ptype(), |I| {
443 let indices = indices.buffer::<I>();
444 canonicalize_varbin_inner::<I>(fill_value, indices, values, dtype, validity, len)
445 })
446}
447
448fn canonicalize_varbin_inner<I: IntegerPType>(
449 fill_value: Option<ByteBuffer>,
450 indices: Buffer<I>,
451 values: VarBinViewArray,
452 dtype: DType,
453 validity: Validity,
454 len: usize,
455) -> Canonical {
456 assert_eq!(dtype.nullability(), validity.nullability());
457
458 let n_patch_buffers = values.buffers().len();
459 let mut buffers = values.buffers().to_vec();
460
461 let fill = if let Some(buffer) = &fill_value {
462 buffers.push(buffer.clone());
463 BinaryView::make_view(
464 buffer.as_ref(),
465 u32::try_from(n_patch_buffers).vortex_expect("too many buffers"),
466 0,
467 )
468 } else {
469 BinaryView::make_view(&[], 0, 0)
471 };
472
473 let mut views = buffer_mut![fill; len];
474 for (patch_index, &patch) in indices.into_iter().zip_eq(values.views().iter()) {
475 let patch_index_usize = <usize as NumCast>::from(patch_index)
476 .vortex_expect("var bin view indices must fit in usize");
477 views[patch_index_usize] = patch;
478 }
479
480 let array = unsafe {
482 VarBinViewArray::new_unchecked(views.freeze(), Arc::from(buffers), dtype, validity)
483 };
484
485 Canonical::VarBinView(array)
486}
487
488#[cfg(test)]
489mod test {
490 use std::sync::Arc;
491
492 use rstest::rstest;
493 use vortex_array::arrays::{
494 BoolArray, DecimalArray, FixedSizeListArray, ListArray, ListViewArray, PrimitiveArray,
495 StructArray, VarBinArray, VarBinViewArray,
496 };
497 use vortex_array::arrow::IntoArrowArray as _;
498 use vortex_array::validity::Validity;
499 use vortex_array::{IntoArray, ToCanonical, assert_arrays_eq};
500 use vortex_buffer::{ByteBuffer, buffer, buffer_mut};
501 use vortex_dtype::Nullability::{NonNullable, Nullable};
502 use vortex_dtype::{DType, DecimalDType, FieldNames, PType, StructFields};
503 use vortex_mask::Mask;
504 use vortex_scalar::{DecimalValue, Scalar};
505
506 use crate::SparseArray;
507
508 #[rstest]
509 #[case(Some(true))]
510 #[case(Some(false))]
511 #[case(None)]
512 fn test_sparse_bool(#[case] fill_value: Option<bool>) {
513 let indices = buffer![0u64, 1, 7].into_array();
514 let values = BoolArray::from_iter([Some(true), None, Some(false)]).into_array();
515 let sparse_bools =
516 SparseArray::try_new(indices, values, 10, Scalar::from(fill_value)).unwrap();
517 let actual = sparse_bools.to_bool();
518
519 let expected = BoolArray::from_iter([
520 Some(true),
521 None,
522 fill_value,
523 fill_value,
524 fill_value,
525 fill_value,
526 fill_value,
527 Some(false),
528 fill_value,
529 fill_value,
530 ]);
531
532 assert_arrays_eq!(actual, expected);
533 }
534
535 #[rstest]
536 #[case(Some(0i32))]
537 #[case(Some(-1i32))]
538 #[case(None)]
539 fn test_sparse_primitive(#[case] fill_value: Option<i32>) {
540 let indices = buffer![0u64, 1, 7].into_array();
541 let values = PrimitiveArray::from_option_iter([Some(0i32), None, Some(1)]).into_array();
542 let sparse_ints =
543 SparseArray::try_new(indices, values, 10, Scalar::from(fill_value)).unwrap();
544 assert_eq!(*sparse_ints.dtype(), DType::Primitive(PType::I32, Nullable));
545
546 let flat_ints = sparse_ints.to_primitive();
547 let expected = PrimitiveArray::from_option_iter([
548 Some(0i32),
549 None,
550 fill_value,
551 fill_value,
552 fill_value,
553 fill_value,
554 fill_value,
555 Some(1),
556 fill_value,
557 fill_value,
558 ]);
559
560 assert_arrays_eq!(&flat_ints, &expected);
561 }
562
563 #[test]
564 fn test_sparse_struct_valid_fill() {
565 let field_names = FieldNames::from_iter(["a", "b"]);
566 let field_types = vec![
567 DType::Primitive(PType::I32, Nullable),
568 DType::Primitive(PType::I32, Nullable),
569 ];
570 let struct_fields = StructFields::new(field_names, field_types);
571 let struct_dtype = DType::Struct(struct_fields.clone(), Nullable);
572
573 let indices = buffer![0u64, 1, 7, 8].into_array();
574 let patch_values_a =
575 PrimitiveArray::from_option_iter([Some(10i32), None, Some(20), Some(30)]).into_array();
576 let patch_values_b =
577 PrimitiveArray::from_option_iter([Some(1i32), Some(2), None, Some(3)]).into_array();
578 let patch_values = StructArray::try_new_with_dtype(
579 vec![patch_values_a, patch_values_b],
580 struct_fields.clone(),
581 4,
582 Validity::Array(
583 BoolArray::from_indices(4, vec![0, 1, 2], Validity::NonNullable).to_array(),
584 ),
585 )
586 .unwrap()
587 .into_array();
588
589 let fill_scalar = Scalar::struct_(
590 struct_dtype,
591 vec![Scalar::from(Some(-10i32)), Scalar::from(Some(-1i32))],
592 );
593 let len = 10;
594 let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
595
596 let expected_a = PrimitiveArray::from_option_iter((0..len).map(|i| {
597 if i == 0 {
598 Some(10)
599 } else if i == 1 {
600 None
601 } else if i == 7 {
602 Some(20)
603 } else {
604 Some(-10)
605 }
606 }));
607 let expected_b = PrimitiveArray::from_option_iter((0..len).map(|i| {
608 if i == 0 {
609 Some(1i32)
610 } else if i == 1 {
611 Some(2)
612 } else if i == 7 {
613 None
614 } else {
615 Some(-1)
616 }
617 }));
618
619 let expected = StructArray::try_new_with_dtype(
620 vec![expected_a.into_array(), expected_b.into_array()],
621 struct_fields,
622 len,
623 Validity::from_mask(Mask::from_excluded_indices(10, vec![8]), Nullable),
625 )
626 .unwrap()
627 .to_array();
628
629 let actual = sparse_struct.to_struct();
630 assert_arrays_eq!(actual, expected);
631 }
632
633 #[test]
634 fn test_sparse_struct_invalid_fill() {
635 let field_names = FieldNames::from_iter(["a", "b"]);
636 let field_types = vec![
637 DType::Primitive(PType::I32, Nullable),
638 DType::Primitive(PType::I32, Nullable),
639 ];
640 let struct_fields = StructFields::new(field_names, field_types);
641 let struct_dtype = DType::Struct(struct_fields.clone(), Nullable);
642
643 let indices = buffer![0u64, 1, 7, 8].into_array();
644 let patch_values_a =
645 PrimitiveArray::from_option_iter([Some(10i32), None, Some(20), Some(30)]).into_array();
646 let patch_values_b =
647 PrimitiveArray::from_option_iter([Some(1i32), Some(2), None, Some(3)]).into_array();
648 let patch_values = StructArray::try_new_with_dtype(
649 vec![patch_values_a, patch_values_b],
650 struct_fields.clone(),
651 4,
652 Validity::Array(
653 BoolArray::from_indices(4, vec![0, 1, 2], Validity::NonNullable).to_array(),
654 ),
655 )
656 .unwrap()
657 .into_array();
658
659 let fill_scalar = Scalar::null(struct_dtype);
660 let len = 10;
661 let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
662
663 let expected_a = PrimitiveArray::from_option_iter((0..len).map(|i| {
664 if i == 0 {
665 Some(10)
666 } else if i == 1 {
667 None
668 } else if i == 7 {
669 Some(20)
670 } else {
671 Some(-10)
672 }
673 }));
674 let expected_b = PrimitiveArray::from_option_iter((0..len).map(|i| {
675 if i == 0 {
676 Some(1i32)
677 } else if i == 1 {
678 Some(2)
679 } else if i == 7 {
680 None
681 } else {
682 Some(-1)
683 }
684 }));
685
686 let expected = StructArray::try_new_with_dtype(
687 vec![expected_a.into_array(), expected_b.into_array()],
688 struct_fields,
689 len,
690 Validity::from_mask(Mask::from_indices(10, vec![0, 1, 7]), Nullable),
692 )
693 .unwrap()
694 .to_array();
695
696 let actual = sparse_struct.to_struct();
697 assert_arrays_eq!(actual, expected);
698 }
699
700 #[test]
701 fn test_sparse_decimal() {
702 let indices = buffer![0u32, 1u32, 7u32, 8u32].into_array();
703 let decimal_dtype = DecimalDType::new(3, 2);
704 let patch_values = DecimalArray::new(
705 buffer![100i128, 200i128, 300i128, 4000i128],
706 decimal_dtype,
707 Validity::from_iter([true, true, true, false]),
708 )
709 .to_array();
710 let len = 10;
711 let fill_scalar = Scalar::decimal(DecimalValue::I32(123), decimal_dtype, Nullable);
712 let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
713
714 let expected = DecimalArray::new(
715 buffer![100i128, 200, 123, 123, 123, 123, 123, 300, 4000, 123],
716 decimal_dtype,
717 Validity::from_mask(Mask::from_excluded_indices(10, vec![8]), Nullable),
719 )
720 .to_array()
721 .into_arrow_preferred()
722 .unwrap();
723
724 let actual = sparse_struct
725 .to_decimal()
726 .to_array()
727 .into_arrow_preferred()
728 .unwrap();
729
730 assert_eq!(expected.data_type(), actual.data_type());
731 assert_eq!(&expected, &actual);
732 }
733
734 #[test]
735 fn test_sparse_utf8_varbinview_non_null_fill() {
736 let strings = <VarBinViewArray as FromIterator<_>>::from_iter([
737 Some("hello"),
738 Some("goodbye"),
739 Some("hello"),
740 None,
741 Some("bonjour"),
742 Some("ä½ å¥½"),
743 None,
744 ])
745 .into_array();
746
747 let array = SparseArray::try_new(
748 buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
749 strings,
750 12,
751 Scalar::from(Some("123".to_owned())),
752 )
753 .unwrap();
754
755 let actual = array.to_varbinview().into_array();
756 let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
757 Some("hello"),
758 Some("123"),
759 Some("123"),
760 Some("goodbye"),
761 Some("hello"),
762 None,
763 Some("123"),
764 Some("bonjour"),
765 Some("123"),
766 Some("ä½ å¥½"),
767 None,
768 Some("123"),
769 ])
770 .into_array();
771
772 assert_arrays_eq!(actual, expected);
773 }
774
775 #[test]
776 fn test_sparse_utf8_varbinview_null_fill() {
777 let strings = <VarBinViewArray as FromIterator<_>>::from_iter([
778 Some("hello"),
779 Some("goodbye"),
780 Some("hello"),
781 None,
782 Some("bonjour"),
783 Some("ä½ å¥½"),
784 None,
785 ])
786 .into_array();
787
788 let array = SparseArray::try_new(
789 buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
790 strings,
791 12,
792 Scalar::null(DType::Utf8(Nullable)),
793 )
794 .unwrap();
795
796 let actual = array.to_varbinview().into_array();
797 let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
798 Some("hello"),
799 None,
800 None,
801 Some("goodbye"),
802 Some("hello"),
803 None,
804 None,
805 Some("bonjour"),
806 None,
807 Some("ä½ å¥½"),
808 None,
809 None,
810 ])
811 .into_array();
812
813 assert_arrays_eq!(actual, expected);
814 }
815
816 #[test]
817 fn test_sparse_utf8_varbinview_non_nullable() {
818 let strings =
819 VarBinViewArray::from_iter_str(["hello", "goodbye", "hello", "bonjour", "ä½ å¥½"])
820 .into_array();
821
822 let array = SparseArray::try_new(
823 buffer![0u16, 3, 4, 5, 8].into_array(),
824 strings,
825 9,
826 Scalar::from("123".to_owned()),
827 )
828 .unwrap();
829
830 let actual = array.to_varbinview().into_array();
831 let expected = VarBinViewArray::from_iter_str([
832 "hello", "123", "123", "goodbye", "hello", "bonjour", "123", "123", "ä½ å¥½",
833 ])
834 .into_array();
835
836 assert_arrays_eq!(actual, expected);
837 }
838
839 #[test]
840 fn test_sparse_utf8_varbin_null_fill() {
841 let strings = <VarBinArray as FromIterator<_>>::from_iter([
842 Some("hello"),
843 Some("goodbye"),
844 Some("hello"),
845 None,
846 Some("bonjour"),
847 Some("ä½ å¥½"),
848 None,
849 ])
850 .into_array();
851
852 let array = SparseArray::try_new(
853 buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
854 strings,
855 12,
856 Scalar::null(DType::Utf8(Nullable)),
857 )
858 .unwrap();
859
860 let actual = array.to_varbinview().into_array();
861 let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
862 Some("hello"),
863 None,
864 None,
865 Some("goodbye"),
866 Some("hello"),
867 None,
868 None,
869 Some("bonjour"),
870 None,
871 Some("ä½ å¥½"),
872 None,
873 None,
874 ])
875 .into_array();
876
877 assert_arrays_eq!(actual, expected);
878 }
879
880 #[test]
881 fn test_sparse_binary_varbinview_non_null_fill() {
882 let binaries = VarBinViewArray::from_iter_nullable_bin([
883 Some(b"hello" as &[u8]),
884 Some(b"goodbye"),
885 Some(b"hello"),
886 None,
887 Some(b"\x00"),
888 Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
889 None,
890 ])
891 .into_array();
892
893 let array = SparseArray::try_new(
894 buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
895 binaries,
896 12,
897 Scalar::from(Some(ByteBuffer::from(b"123".to_vec()))),
898 )
899 .unwrap();
900
901 let actual = array.to_varbinview().into_array();
902 let expected = VarBinViewArray::from_iter_nullable_bin([
903 Some(b"hello" as &[u8]),
904 Some(b"123"),
905 Some(b"123"),
906 Some(b"goodbye"),
907 Some(b"hello"),
908 None,
909 Some(b"123"),
910 Some(b"\x00"),
911 Some(b"123"),
912 Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
913 None,
914 Some(b"123"),
915 ])
916 .into_array();
917
918 assert_arrays_eq!(actual, expected);
919 }
920
921 #[test]
922 fn test_sparse_list_null_fill() {
923 let elements = buffer![1i32, 2, 1, 2].into_array();
925 let offsets = buffer![0u32, 1, 2, 3].into_array();
931 let sizes = buffer![1u32, 1, 1, 1].into_array();
932 let lists = unsafe {
933 ListViewArray::new_unchecked(elements, offsets, sizes, Validity::AllValid)
934 .with_zero_copy_to_list(true)
935 }
936 .into_array();
937
938 let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
939 let fill_value = Scalar::null(lists.dtype().clone());
940 let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
941 .unwrap()
942 .into_array();
943
944 let actual = sparse.to_canonical().into_array();
945 let result_listview = actual.to_listview();
946
947 assert_eq!(result_listview.len(), 6);
949
950 assert_eq!(result_listview.size_at(0), 1); assert_eq!(result_listview.size_at(1), 0); assert_eq!(result_listview.size_at(2), 0); assert_eq!(result_listview.size_at(3), 1); assert_eq!(result_listview.size_at(4), 1); assert_eq!(result_listview.size_at(5), 1); let elements_array = result_listview.elements().to_primitive();
960 let elements_slice = elements_array.as_slice::<i32>();
961
962 let list0_offset = result_listview.offset_at(0);
963 assert_eq!(elements_slice[list0_offset], 1);
964
965 let list3_offset = result_listview.offset_at(3);
966 assert_eq!(elements_slice[list3_offset], 2);
967
968 let list4_offset = result_listview.offset_at(4);
969 assert_eq!(elements_slice[list4_offset], 1);
970
971 let list5_offset = result_listview.offset_at(5);
972 assert_eq!(elements_slice[list5_offset], 2);
973 }
974
975 #[test]
976 fn test_sparse_list_null_fill_sliced_sparse_values() {
977 let elements = buffer![1i32, 2, 1, 2, 1, 2, 1, 2].into_array();
979 let offsets = buffer![0u32, 1, 2, 3, 4, 5, 6, 7].into_array();
980 let sizes = buffer![1u32, 1, 1, 1, 1, 1, 1, 1].into_array();
981 let lists = unsafe {
982 ListViewArray::new_unchecked(elements, offsets, sizes, Validity::AllValid)
983 .with_zero_copy_to_list(true)
984 }
985 .into_array();
986
987 let lists = lists.slice(2..6);
989
990 let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
991 let fill_value = Scalar::null(lists.dtype().clone());
992 let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
993 .unwrap()
994 .into_array();
995
996 let actual = sparse.to_canonical().into_array();
997 let result_listview = actual.to_listview();
998
999 assert_eq!(result_listview.len(), 6);
1001
1002 assert_eq!(result_listview.size_at(0), 1); assert_eq!(result_listview.size_at(1), 0); assert_eq!(result_listview.size_at(2), 0); assert_eq!(result_listview.size_at(3), 1); assert_eq!(result_listview.size_at(4), 1); assert_eq!(result_listview.size_at(5), 1); let elements_array = result_listview.elements().to_primitive();
1012 let elements_slice = elements_array.as_slice::<i32>();
1013
1014 let list0_offset = result_listview.offset_at(0);
1015 assert_eq!(elements_slice[list0_offset], 1);
1016
1017 let list3_offset = result_listview.offset_at(3);
1018 assert_eq!(elements_slice[list3_offset], 2);
1019 }
1020
1021 #[test]
1022 fn test_sparse_list_non_null_fill() {
1023 let elements = buffer![1i32, 2, 1, 2].into_array();
1025 let offsets = buffer![0u32, 1, 2, 3].into_array();
1026 let sizes = buffer![1u32, 1, 1, 1].into_array();
1027 let lists = unsafe {
1028 ListViewArray::new_unchecked(elements, offsets, sizes, Validity::AllValid)
1029 .with_zero_copy_to_list(true)
1030 }
1031 .into_array();
1032
1033 let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
1034 let fill_value = Scalar::from(Some(vec![5i32, 6, 7, 8]));
1035 let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
1036 .unwrap()
1037 .into_array();
1038
1039 let actual = sparse.to_canonical().into_array();
1040 let result_listview = actual.to_listview();
1041
1042 assert_eq!(result_listview.len(), 6);
1044
1045 assert_eq!(result_listview.size_at(0), 1); assert_eq!(result_listview.size_at(1), 4); assert_eq!(result_listview.size_at(2), 4); assert_eq!(result_listview.size_at(3), 1); assert_eq!(result_listview.size_at(4), 1); assert_eq!(result_listview.size_at(5), 1); let elements_array = result_listview.elements().to_primitive();
1055 let elements_slice = elements_array.as_slice::<i32>();
1056
1057 let list0_offset = result_listview.offset_at(0) as usize;
1059 assert_eq!(elements_slice[list0_offset], 1);
1060
1061 let list1_offset = result_listview.offset_at(1) as usize;
1063 let list1_size = result_listview.size_at(1) as usize;
1064 assert_eq!(
1065 &elements_slice[list1_offset..list1_offset + list1_size],
1066 &[5, 6, 7, 8]
1067 );
1068
1069 let list2_offset = result_listview.offset_at(2) as usize;
1071 let list2_size = result_listview.size_at(2) as usize;
1072 assert_eq!(
1073 &elements_slice[list2_offset..list2_offset + list2_size],
1074 &[5, 6, 7, 8]
1075 );
1076
1077 let list3_offset = result_listview.offset_at(3) as usize;
1079 assert_eq!(elements_slice[list3_offset], 2);
1080
1081 let list4_offset = result_listview.offset_at(4) as usize;
1083 assert_eq!(elements_slice[list4_offset], 1);
1084
1085 let list5_offset = result_listview.offset_at(5) as usize;
1087 assert_eq!(elements_slice[list5_offset], 2);
1088 }
1089
1090 #[test]
1091 fn test_sparse_binary_varbin_null_fill() {
1092 let strings = <VarBinArray as FromIterator<_>>::from_iter([
1093 Some(b"hello" as &[u8]),
1094 Some(b"goodbye"),
1095 Some(b"hello"),
1096 None,
1097 Some(b"\x00"),
1098 Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
1099 None,
1100 ])
1101 .into_array();
1102
1103 let array = SparseArray::try_new(
1104 buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
1105 strings,
1106 12,
1107 Scalar::null(DType::Binary(Nullable)),
1108 )
1109 .unwrap();
1110
1111 let actual = array.to_varbinview().into_array();
1112 let expected = VarBinViewArray::from_iter_nullable_bin([
1113 Some(b"hello" as &[u8]),
1114 None,
1115 None,
1116 Some(b"goodbye"),
1117 Some(b"hello"),
1118 None,
1119 None,
1120 Some(b"\x00"),
1121 None,
1122 Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
1123 None,
1124 None,
1125 ])
1126 .into_array();
1127
1128 assert_arrays_eq!(actual, expected);
1129 }
1130
1131 #[test]
1132 fn test_sparse_fixed_size_list_null_fill() {
1133 let elements = buffer![1i32, 2, 3, 4, 5, 6, 7, 8, 9].into_array();
1135 let fsl = FixedSizeListArray::try_new(elements, 3, Validity::AllValid, 3)
1136 .unwrap()
1137 .into_array();
1138
1139 let indices = buffer![0u8, 2u8, 3u8].into_array();
1140 let fill_value = Scalar::null(DType::FixedSizeList(
1141 Arc::new(DType::Primitive(PType::I32, NonNullable)),
1142 3,
1143 Nullable,
1144 ));
1145 let sparse = SparseArray::try_new(indices, fsl, 5, fill_value)
1146 .unwrap()
1147 .into_array();
1148
1149 let actual = sparse.to_canonical().into_array();
1150
1151 let expected_elements =
1153 buffer![1i32, 2, 3, 0, 0, 0, 4, 5, 6, 7, 8, 9, 0, 0, 0].into_array();
1154 let expected = FixedSizeListArray::try_new(
1155 expected_elements,
1156 3,
1157 Validity::Array(BoolArray::from_iter([true, false, true, true, false]).into_array()),
1158 5,
1159 )
1160 .unwrap()
1161 .into_array();
1162
1163 assert_arrays_eq!(actual, expected);
1164 }
1165
1166 #[test]
1167 fn test_sparse_fixed_size_list_non_null_fill() {
1168 let elements = buffer![1i32, 2, 3, 4, 5, 6].into_array();
1169 let fsl = FixedSizeListArray::try_new(elements, 2, Validity::AllValid, 3)
1170 .unwrap()
1171 .into_array();
1172
1173 let indices = buffer![0u8, 2u8, 4u8].into_array();
1174 let fill_value = Scalar::fixed_size_list(
1175 Arc::new(DType::Primitive(PType::I32, NonNullable)),
1176 vec![
1177 Scalar::primitive(99i32, NonNullable),
1178 Scalar::primitive(88i32, NonNullable),
1179 ],
1180 NonNullable,
1181 );
1182 let sparse = SparseArray::try_new(indices, fsl, 6, fill_value)
1183 .unwrap()
1184 .into_array();
1185
1186 let actual = sparse.to_canonical().into_array();
1187
1188 let expected_elements = buffer![1i32, 2, 99, 88, 3, 4, 99, 88, 5, 6, 99, 88].into_array();
1190 let expected = FixedSizeListArray::try_new(expected_elements, 2, Validity::NonNullable, 6)
1191 .unwrap()
1192 .into_array();
1193
1194 assert_arrays_eq!(actual, expected);
1195 }
1196
1197 #[test]
1198 fn test_sparse_fixed_size_list_with_validity() {
1199 let elements = buffer![10i32, 20, 30, 40, 50, 60].into_array();
1201 let fsl = FixedSizeListArray::try_new(
1202 elements,
1203 2,
1204 Validity::Array(BoolArray::from_iter([true, false, true]).into_array()),
1205 3,
1206 )
1207 .unwrap()
1208 .into_array();
1209
1210 let indices = buffer![1u16, 3u16, 4u16].into_array();
1211 let fill_value = Scalar::fixed_size_list(
1212 Arc::new(DType::Primitive(PType::I32, NonNullable)),
1213 vec![
1214 Scalar::primitive(7i32, NonNullable),
1215 Scalar::primitive(8i32, NonNullable),
1216 ],
1217 Nullable,
1218 );
1219 let sparse = SparseArray::try_new(indices, fsl, 6, fill_value)
1220 .unwrap()
1221 .into_array();
1222
1223 let actual = sparse.to_canonical().into_array();
1224
1225 let expected_elements = buffer![7i32, 8, 10, 20, 7, 8, 30, 40, 50, 60, 7, 8].into_array();
1228 let expected = FixedSizeListArray::try_new(
1229 expected_elements,
1230 2,
1231 Validity::Array(
1232 BoolArray::from_iter([true, true, true, false, true, true]).into_array(),
1233 ),
1234 6,
1235 )
1236 .unwrap()
1237 .into_array();
1238
1239 assert_arrays_eq!(actual, expected);
1240 }
1241
1242 #[test]
1243 fn test_sparse_fixed_size_list_truly_sparse() {
1244 let elements = buffer![10i32, 11, 20, 21, 30, 31].into_array();
1249 let fsl = FixedSizeListArray::try_new(elements, 2, Validity::AllValid, 3)
1250 .unwrap()
1251 .into_array();
1252
1253 let indices = buffer![5u32, 50, 95].into_array();
1255
1256 let fill_value = Scalar::fixed_size_list(
1258 Arc::new(DType::Primitive(PType::I32, NonNullable)),
1259 vec![
1260 Scalar::primitive(99i32, NonNullable),
1261 Scalar::primitive(99i32, NonNullable),
1262 ],
1263 NonNullable,
1264 );
1265
1266 let sparse = SparseArray::try_new(indices, fsl, 100, fill_value)
1267 .unwrap()
1268 .into_array();
1269
1270 let actual = sparse.to_canonical().into_array();
1271
1272 let mut expected_elements_vec = Vec::with_capacity(200);
1274 for _ in 0..5 {
1276 expected_elements_vec.extend([99i32, 99]);
1277 }
1278 expected_elements_vec.extend([10, 11]);
1280 for _ in 6..50 {
1282 expected_elements_vec.extend([99, 99]);
1283 }
1284 expected_elements_vec.extend([20, 21]);
1286 for _ in 51..95 {
1288 expected_elements_vec.extend([99, 99]);
1289 }
1290 expected_elements_vec.extend([30, 31]);
1292 for _ in 96..100 {
1294 expected_elements_vec.extend([99, 99]);
1295 }
1296 let expected_elements = PrimitiveArray::from_iter(expected_elements_vec).into_array();
1297 let expected =
1298 FixedSizeListArray::try_new(expected_elements, 2, Validity::NonNullable, 100)
1299 .unwrap()
1300 .into_array();
1301
1302 assert_arrays_eq!(actual, expected);
1303 }
1304
1305 #[test]
1306 fn test_sparse_fixed_size_list_single_element() {
1307 let elements = buffer![42i32, 43].into_array();
1309 let fsl = FixedSizeListArray::try_new(elements, 2, Validity::AllValid, 1)
1310 .unwrap()
1311 .into_array();
1312
1313 let indices = buffer![0u32].into_array();
1314 let fill_value = Scalar::fixed_size_list(
1315 Arc::new(DType::Primitive(PType::I32, NonNullable)),
1316 vec![
1317 Scalar::primitive(1i32, NonNullable),
1318 Scalar::primitive(2i32, NonNullable),
1319 ],
1320 NonNullable,
1321 );
1322 let sparse = SparseArray::try_new(indices, fsl, 1, fill_value)
1323 .unwrap()
1324 .into_array();
1325
1326 let actual = sparse.to_canonical().into_array();
1327
1328 let expected_elements = buffer![42i32, 43].into_array();
1330 let expected = FixedSizeListArray::try_new(expected_elements, 2, Validity::NonNullable, 1)
1331 .unwrap()
1332 .into_array();
1333
1334 assert_arrays_eq!(actual, expected);
1335 }
1336
1337 #[test]
1338 fn test_sparse_list_grows_offset_type() {
1339 let elements = buffer![1i32, 2, 1, 2].into_array();
1340 let offsets = buffer![0u8, 1, 2, 3, 4].into_array();
1341 let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1342 .unwrap()
1343 .into_array();
1344
1345 let indices = buffer![0u8, 1u8, 2u8, 3u8].into_array();
1346 let fill_value = Scalar::from(Some(vec![42i32; 252])); let sparse = SparseArray::try_new(indices, lists, 5, fill_value)
1348 .unwrap()
1349 .into_array();
1350
1351 let actual = sparse.to_canonical().into_array();
1352 let mut expected_elements = buffer_mut![1, 2, 1, 2];
1353 expected_elements.extend(buffer![42i32; 252]);
1354 let expected = ListArray::try_new(
1355 expected_elements.freeze().into_array(),
1356 buffer![0u16, 1, 2, 3, 4, 256].into_array(),
1357 Validity::AllValid,
1358 )
1359 .unwrap()
1360 .into_array();
1361
1362 assert_eq!(
1363 actual.to_listview().offsets().dtype(),
1364 &DType::Primitive(PType::U16, NonNullable)
1365 );
1366 assert_arrays_eq!(&actual, &expected);
1367
1368 let arrow_dtype = expected.dtype().to_arrow_dtype().unwrap();
1370 let actual = actual.into_arrow(&arrow_dtype).unwrap();
1371 let expected = expected.into_arrow(&arrow_dtype).unwrap();
1372
1373 assert_eq!(actual.data_type(), expected.data_type());
1374 }
1375
1376 #[test]
1377 fn test_sparse_listview_null_fill_with_gaps() {
1378 let elements = buffer![10i32, 11, 12, 20, 21, 30, 31, 32, 33].into_array();
1388 let offsets = buffer![0u32, 3, 5].into_array();
1389 let sizes = buffer![3u32, 2, 4].into_array();
1390
1391 let list_view = unsafe {
1392 ListViewArray::new_unchecked(elements.clone(), offsets, sizes, Validity::AllValid)
1393 .with_zero_copy_to_list(true)
1394 };
1395
1396 let list_dtype = list_view.dtype().clone();
1397
1398 let indices = buffer![1u8, 4, 7].into_array();
1408 let sparse = SparseArray::try_new(
1409 indices,
1410 list_view.into_array(),
1411 10,
1412 Scalar::null(list_dtype),
1413 )
1414 .unwrap();
1415
1416 let canonical = sparse.to_canonical().into_array();
1418 let result_listview = canonical.to_listview();
1419
1420 assert_eq!(result_listview.len(), 10);
1422
1423 let get_list_values = |idx: usize| -> Vec<i32> {
1425 let offset = result_listview.offset_at(idx);
1426 let size = result_listview.size_at(idx);
1427 if size == 0 {
1428 vec![] } else {
1430 let elements = result_listview.elements().to_primitive();
1431 let slice = elements.as_slice::<i32>();
1432 slice[offset..offset + size].to_vec()
1433 }
1434 };
1435
1436 let empty: Vec<i32> = vec![];
1437
1438 assert_eq!(get_list_values(0), empty); assert_eq!(get_list_values(1), vec![10, 11, 12]); assert_eq!(get_list_values(2), empty); assert_eq!(get_list_values(3), empty); assert_eq!(get_list_values(4), vec![20, 21]); assert_eq!(get_list_values(5), empty); assert_eq!(get_list_values(6), empty); assert_eq!(get_list_values(7), vec![30, 31, 32, 33]); assert_eq!(get_list_values(8), empty); assert_eq!(get_list_values(9), empty); }
1450
1451 #[test]
1452 fn test_sparse_listview_sliced_values_null_fill() {
1453 let elements = buffer![0i32, 1, 2, 3, 4, 5, 6, 7, 8, 9].into_array();
1459
1460 let offsets = buffer![0u32, 2, 5, 6, 8].into_array();
1467 let sizes = buffer![2u32, 3, 1, 2, 2].into_array();
1468
1469 let full_listview = unsafe {
1470 ListViewArray::new_unchecked(elements, offsets, sizes, Validity::AllValid)
1471 .with_zero_copy_to_list(true)
1472 }
1473 .into_array();
1474
1475 let sliced = full_listview.slice(1..4);
1481
1482 let indices = buffer![0u8, 1].into_array();
1490 let values = sliced.slice(0..2);
1492 let sparse =
1493 SparseArray::try_new(indices, values, 5, Scalar::null(sliced.dtype().clone())).unwrap();
1494
1495 let canonical = sparse.to_canonical().into_array();
1496 let result_listview = canonical.to_listview();
1497
1498 assert_eq!(result_listview.len(), 5);
1499
1500 let get_list_values = |idx: usize| -> Vec<i32> {
1502 let offset = result_listview.offset_at(idx);
1503 let size = result_listview.size_at(idx);
1504 if size == 0 {
1505 vec![] } else {
1507 let elements = result_listview.elements().to_primitive();
1508 let slice = elements.as_slice::<i32>();
1509 slice[offset..offset + size].to_vec()
1510 }
1511 };
1512
1513 let empty: Vec<i32> = vec![];
1514
1515 assert_eq!(get_list_values(0), vec![2, 3, 4]); assert_eq!(get_list_values(1), vec![5]); assert_eq!(get_list_values(2), empty); assert_eq!(get_list_values(3), empty); assert_eq!(get_list_values(4), empty); }
1528}