1use std::sync::Arc;
5
6use itertools::Itertools;
7use num_traits::NumCast;
8use vortex_array::arrays::{
9 BinaryView, BoolArray, BooleanBuffer, ConstantArray, ListArray, NullArray, OffsetPType,
10 PrimitiveArray, StructArray, VarBinViewArray, smallest_storage_type,
11};
12use vortex_array::builders::{
13 ArrayBuilder as _, ArrayBuilderExt, DecimalBuilder, ListBuilder, builder_with_capacity,
14};
15use vortex_array::patches::Patches;
16use vortex_array::validity::Validity;
17use vortex_array::vtable::CanonicalVTable;
18use vortex_array::{Array, ArrayRef, Canonical, IntoArray as _, ToCanonical as _};
19use vortex_buffer::{Buffer, BufferMut, BufferString, ByteBuffer, buffer, buffer_mut};
20use vortex_dtype::{
21 DType, DecimalDType, NativePType, Nullability, StructFields, match_each_integer_ptype,
22 match_each_native_ptype,
23};
24use vortex_error::{VortexError, VortexExpect as _, VortexResult, vortex_err};
25use vortex_scalar::{
26 DecimalScalar, ListScalar, NativeDecimalType, Scalar, StructScalar,
27 match_each_decimal_value_type,
28};
29
30use crate::{SparseArray, SparseVTable};
31
32impl CanonicalVTable<SparseVTable> for SparseVTable {
33 fn canonicalize(array: &SparseArray) -> VortexResult<Canonical> {
34 if array.patches().num_patches() == 0 {
35 return ConstantArray::new(array.fill_scalar().clone(), array.len()).to_canonical();
36 }
37
38 match array.dtype() {
39 DType::Null => {
40 assert!(array.fill_scalar().is_null());
41 Ok(Canonical::Null(NullArray::new(array.len())))
42 }
43 DType::Bool(..) => {
44 let resolved_patches = array.resolved_patches()?;
45 canonicalize_sparse_bools(&resolved_patches, array.fill_scalar())
46 }
47 DType::Primitive(ptype, ..) => {
48 let resolved_patches = array.resolved_patches()?;
49 match_each_native_ptype!(ptype, |P| {
50 canonicalize_sparse_primitives::<P>(&resolved_patches, array.fill_scalar())
51 })
52 }
53 DType::Struct(struct_fields, ..) => canonicalize_sparse_struct(
54 struct_fields,
55 array.fill_scalar().as_struct(),
56 array.dtype(),
57 array.patches(),
58 array.len(),
59 ),
60 DType::Decimal(decimal_dtype, nullability) => {
61 let canonical_decimal_value_type = smallest_storage_type(decimal_dtype);
62 let fill_value = array.fill_scalar().as_decimal();
63 match_each_decimal_value_type!(canonical_decimal_value_type, |D| {
64 canonicalize_sparse_decimal::<D>(
65 *decimal_dtype,
66 *nullability,
67 fill_value,
68 array.patches(),
69 array.len(),
70 )
71 })
72 }
73 dtype @ DType::Utf8(..) => {
74 let fill_value = array.fill_scalar().as_utf8().value();
75 let fill_value = fill_value.map(BufferString::into_inner);
76 canonicalize_varbin(array, dtype.clone(), fill_value)
77 }
78 dtype @ DType::Binary(..) => {
79 let fill_value = array.fill_scalar().as_binary().value();
80 canonicalize_varbin(array, dtype.clone(), fill_value)
81 }
82 DType::List(values_dtype, nullability) => {
83 let resolved_patches = array.resolved_patches()?;
84 canonicalize_sparse_lists(
85 array,
86 resolved_patches,
87 values_dtype.clone(),
88 *nullability,
89 )
90 }
91 DType::Extension(_ext_dtype) => todo!(),
92 }
93 }
94}
95
96fn list_scalar_to_elements_array(scalar: ListScalar) -> VortexResult<Option<ArrayRef>> {
98 let Some(elements) = scalar.elements() else {
99 return Ok(None);
100 };
101
102 let mut builder = builder_with_capacity(scalar.element_dtype(), scalar.len());
103 for s in elements {
104 builder.append_scalar(&s)?;
105 }
106 Ok(Some(builder.finish()))
107}
108
109fn list_scalar_to_singleton_list_array(scalar: ListScalar) -> VortexResult<Option<ArrayRef>> {
111 let nullability = scalar.dtype().nullability();
112 let Some(elements) = list_scalar_to_elements_array(scalar)? else {
113 return Ok(None);
114 };
115
116 let validity = match nullability {
117 Nullability::NonNullable => Validity::NonNullable,
118 Nullability::Nullable => Validity::AllValid,
119 };
120
121 let n = elements.len();
122 ListArray::try_new(elements, buffer![0_u64, n as u64].into_array(), validity)
123 .map(|x| Some(x.into_array()))
124}
125
126#[allow(clippy::cognitive_complexity)]
127fn canonicalize_sparse_lists(
128 array: &SparseArray,
129 resolved_patches: Patches,
130 values_dtype: Arc<DType>,
131 nullability: Nullability,
132) -> VortexResult<Canonical> {
133 macro_rules! match_smallest_offset_type {
134 ($n_elements:expr, | $offset_type:ident | $body:block) => {{
135 let n_elements = $n_elements;
136 if n_elements <= u8::MAX as usize {
137 type $offset_type = u8;
138 $body
139 } else if n_elements <= u16::MAX as usize {
140 type $offset_type = u16;
141 $body
142 } else if n_elements <= u32::MAX as usize {
143 type $offset_type = u32;
144 $body
145 } else {
146 assert!(u64::try_from(n_elements).is_ok());
147 type $offset_type = u64;
148 $body
149 }
150 }};
151 }
152
153 let indices = resolved_patches.indices().to_primitive()?;
154 let values = resolved_patches.values().to_list()?;
155 let fill_value = array.fill_scalar().as_list();
156
157 let n_filled = array.len() - resolved_patches.num_patches();
158 let total_canonical_values = values.elements().len() + fill_value.len() * n_filled;
159
160 let validity = array
161 .validity_mask()
162 .map(|x| Validity::from_mask(x, nullability))?;
163
164 match_each_integer_ptype!(indices.ptype(), |I| {
165 match_smallest_offset_type!(total_canonical_values, |O| {
166 canonicalize_sparse_lists_inner::<I, O>(
167 indices.as_slice(),
168 values,
169 fill_value,
170 values_dtype,
171 array.len(),
172 total_canonical_values,
173 validity,
174 )
175 })
176 })
177}
178
179fn canonicalize_sparse_lists_inner<I: NativePType, SmallestViableOffsetType: OffsetPType>(
180 indices: &[I],
181 values: ListArray,
182 fill_value: ListScalar,
183 values_dtype: Arc<DType>,
184 len: usize,
185 total_canonical_values: usize,
186 validity: Validity,
187) -> VortexResult<Canonical> {
188 let Some(fill_value_array) = list_scalar_to_singleton_list_array(fill_value)? else {
189 let sparse_list_elements = values.elements().clone();
190 let sparse_list_offsets = values.offsets().to_primitive()?;
191 match_each_integer_ptype!(sparse_list_offsets.ptype(), |SparseValuesOffsetType| {
192 let sparse_list_offsets = sparse_list_offsets.as_slice::<SparseValuesOffsetType>();
193 return canonicalize_sparse_lists_inner_with_null_fill_value(
197 indices,
198 sparse_list_elements,
199 sparse_list_offsets,
200 len,
201 validity,
202 );
203 });
204 };
205
206 let mut builder = ListBuilder::<SmallestViableOffsetType>::with_values_and_index_capacity(
207 values_dtype,
208 validity.nullability(),
209 total_canonical_values,
210 len + 1,
211 );
212 let mut next_index = 0_usize;
213 let enumerated_indices_usize = indices
214 .iter()
215 .map(|x| (*x).to_usize().vortex_expect("index must fit in usize"))
216 .enumerate();
217 for (patch_values_index, next_patched_index) in enumerated_indices_usize {
218 for _ in next_index..next_patched_index {
219 builder.extend_from_array(&fill_value_array)?;
220 }
221 builder.extend_from_array(&values.slice(patch_values_index, patch_values_index + 1)?)?;
222 next_index = next_patched_index + 1;
223 }
224
225 for _ in next_index..len {
226 builder.extend_from_array(&fill_value_array)?;
227 }
228
229 builder.finish().to_canonical()
230}
231
232fn canonicalize_sparse_lists_inner_with_null_fill_value<I: NativePType, O: OffsetPType>(
233 indices: &[I],
234 elements: ArrayRef,
235 offsets: &[O],
236 len: usize,
237 validity: Validity,
238) -> VortexResult<Canonical> {
239 assert!(indices.len() < len + 1);
240 let mut dense_offsets = BufferMut::with_capacity(len + 1);
241
242 dense_offsets.push(offsets[0]);
244 let mut dense_last_set_index = 0_usize;
245 for (sparse_start_index, dense_start_index) in indices.iter().enumerate() {
246 let sparse_end_index = sparse_start_index + 1;
247 let dense_start_index = (*dense_start_index)
248 .to_usize()
249 .vortex_expect("index must fit in usize");
250 let dense_end_index = dense_start_index + 1;
251
252 for _ in (dense_last_set_index + 1)..dense_end_index {
253 dense_offsets.push(dense_offsets[dense_last_set_index]);
255 }
256 dense_offsets.push(offsets[sparse_end_index]);
257 dense_last_set_index = dense_end_index;
258 }
259 for _ in (dense_last_set_index + 1)..len {
260 dense_offsets.push(dense_offsets[dense_last_set_index]);
262 }
263 let array = ListArray::try_new(elements, dense_offsets.into_array(), validity)?;
264 Ok(Canonical::List(array))
265}
266
267fn canonicalize_sparse_bools(patches: &Patches, fill_value: &Scalar) -> VortexResult<Canonical> {
268 let (fill_bool, validity) = if fill_value.is_null() {
269 (false, Validity::AllInvalid)
270 } else {
271 (
272 fill_value.try_into()?,
273 if patches.dtype().nullability() == Nullability::NonNullable {
274 Validity::NonNullable
275 } else {
276 Validity::AllValid
277 },
278 )
279 };
280
281 let bools = BoolArray::new(
282 if fill_bool {
283 BooleanBuffer::new_set(patches.array_len())
284 } else {
285 BooleanBuffer::new_unset(patches.array_len())
286 },
287 validity,
288 );
289
290 bools.patch(patches).map(Canonical::Bool)
291}
292
293fn canonicalize_sparse_primitives<
294 T: NativePType + for<'a> TryFrom<&'a Scalar, Error = VortexError>,
295>(
296 patches: &Patches,
297 fill_value: &Scalar,
298) -> VortexResult<Canonical> {
299 let (primitive_fill, validity) = if fill_value.is_null() {
300 (T::default(), Validity::AllInvalid)
301 } else {
302 (
303 fill_value.try_into()?,
304 if patches.dtype().nullability() == Nullability::NonNullable {
305 Validity::NonNullable
306 } else {
307 Validity::AllValid
308 },
309 )
310 };
311
312 let parray = PrimitiveArray::new(buffer![primitive_fill; patches.array_len()], validity);
313
314 parray.patch(patches).map(Canonical::Primitive)
315}
316
317fn canonicalize_sparse_struct(
318 struct_fields: &StructFields,
319 fill_struct: StructScalar,
320 dtype: &DType,
321 unresolved_patches: &Patches,
323 len: usize,
324) -> VortexResult<Canonical> {
325 let (fill_values, top_level_fill_validity) = match fill_struct.fields() {
326 Some(fill_values) => (fill_values, Validity::AllValid),
327 None => (
328 struct_fields.fields().map(Scalar::default_value).collect(),
329 Validity::AllInvalid,
330 ),
331 };
332 let patch_values_as_struct = unresolved_patches.values().to_canonical()?.into_struct()?;
333 let columns_patch_values = patch_values_as_struct.fields();
334 let names = patch_values_as_struct.names();
335 let validity = if dtype.is_nullable() {
336 top_level_fill_validity.patch(
337 len,
338 unresolved_patches.offset(),
339 unresolved_patches.indices(),
340 &Validity::from_mask(
341 unresolved_patches.values().validity_mask()?,
342 Nullability::Nullable,
343 ),
344 )?
345 } else {
346 top_level_fill_validity
347 .into_non_nullable()
348 .ok_or_else(|| vortex_err!("fill validity should match sparse array nullability"))?
349 };
350
351 columns_patch_values
352 .iter()
353 .cloned()
354 .zip_eq(fill_values.into_iter())
355 .map(|(patch_values, fill_value)| -> VortexResult<_> {
356 SparseArray::try_new_from_patches(
357 unresolved_patches
358 .clone()
359 .map_values(|_| Ok(patch_values))?,
360 fill_value,
361 )
362 })
363 .process_results(|sparse_columns| {
364 StructArray::try_from_iter_with_validity(names.iter().zip_eq(sparse_columns), validity)
365 .map(Canonical::Struct)
366 })?
367}
368
369fn canonicalize_sparse_decimal<D: NativeDecimalType>(
370 decimal_dtype: DecimalDType,
371 nullability: Nullability,
372 fill_value: DecimalScalar,
373 patches: &Patches,
374 len: usize,
375) -> VortexResult<Canonical> {
376 let mut builder = DecimalBuilder::with_capacity::<D>(len, decimal_dtype, nullability);
377 match fill_value.decimal_value() {
378 Some(fill_value) => {
379 let fill_value = fill_value
380 .cast::<D>()
381 .vortex_expect("unexpected value type");
382 for _ in 0..len {
383 builder.append_value(fill_value)
384 }
385 }
386 None => {
387 builder.append_nulls(len);
388 }
389 }
390 let filled_array = builder.finish_into_decimal();
391 let array = filled_array.patch(patches)?;
392 Ok(Canonical::Decimal(array))
393}
394
395fn canonicalize_varbin(
396 array: &SparseArray,
397 dtype: DType,
398 fill_value: Option<ByteBuffer>,
399) -> VortexResult<Canonical> {
400 let patches = array.resolved_patches()?;
401 let indices = patches.indices().to_primitive()?;
402 let values = patches.values().to_varbinview()?;
403 let validity = array
404 .validity_mask()
405 .map(|x| Validity::from_mask(x, dtype.nullability()))?;
406 let len = array.len();
407
408 match_each_integer_ptype!(indices.ptype(), |I| {
409 let indices = indices.buffer::<I>();
410 canonicalize_varbin_inner::<I>(fill_value, indices, values, dtype, validity, len)
411 })
412}
413
414fn canonicalize_varbin_inner<I: NativePType>(
415 fill_value: Option<ByteBuffer>,
416 indices: Buffer<I>,
417 values: VarBinViewArray,
418 dtype: DType,
419 validity: Validity,
420 len: usize,
421) -> VortexResult<Canonical> {
422 assert_eq!(dtype.nullability(), validity.nullability());
423
424 let n_patch_buffers = values.buffers().len();
425 let mut buffers = values.buffers().to_vec();
426
427 let fill = if let Some(buffer) = &fill_value {
428 buffers.push(buffer.clone());
429 BinaryView::make_view(
430 buffer.as_ref(),
431 u32::try_from(n_patch_buffers).vortex_expect("too many buffers"),
432 0,
433 )
434 } else {
435 BinaryView::make_view(&[], 0, 0)
437 };
438
439 let mut views = buffer_mut![fill; len];
440 for (patch_index, &patch) in indices.into_iter().zip_eq(values.views().iter()) {
441 let patch_index_usize = <usize as NumCast>::from(patch_index)
442 .vortex_expect("var bin view indices must fit in usize");
443 views[patch_index_usize] = patch;
444 }
445
446 let array = VarBinViewArray::try_new(views.freeze(), Arc::from(buffers), dtype, validity)?;
447
448 Ok(Canonical::VarBinView(array))
449}
450
451#[cfg(test)]
452mod test {
453 use rstest::rstest;
454 use vortex_array::arrays::{
455 BoolArray, BooleanBufferBuilder, DecimalArray, ListArray, PrimitiveArray, StructArray,
456 VarBinArray, VarBinViewArray,
457 };
458 use vortex_array::arrow::IntoArrowArray as _;
459 use vortex_array::validity::Validity;
460 use vortex_array::vtable::ValidityHelper;
461 use vortex_array::{IntoArray, ToCanonical};
462 use vortex_buffer::{ByteBuffer, buffer, buffer_mut};
463 use vortex_dtype::Nullability::{NonNullable, Nullable};
464 use vortex_dtype::{DType, DecimalDType, FieldNames, PType, StructFields};
465 use vortex_mask::Mask;
466 use vortex_scalar::{DecimalValue, Scalar};
467
468 use crate::SparseArray;
469 use crate::canonical::{list_scalar_to_elements_array, list_scalar_to_singleton_list_array};
470
471 #[rstest]
472 #[case(Some(true))]
473 #[case(Some(false))]
474 #[case(None)]
475 fn test_sparse_bool(#[case] fill_value: Option<bool>) {
476 let indices = buffer![0u64, 1, 7].into_array();
477 let values = bool_array_from_nullable_vec(vec![Some(true), None, Some(false)], fill_value)
478 .into_array();
479 let sparse_bools =
480 SparseArray::try_new(indices, values, 10, Scalar::from(fill_value)).unwrap();
481 assert_eq!(sparse_bools.dtype(), &DType::Bool(Nullable));
482
483 let flat_bools = sparse_bools.to_bool().unwrap();
484 let expected = bool_array_from_nullable_vec(
485 vec![
486 Some(true),
487 None,
488 fill_value,
489 fill_value,
490 fill_value,
491 fill_value,
492 fill_value,
493 Some(false),
494 fill_value,
495 fill_value,
496 ],
497 fill_value,
498 );
499
500 assert_eq!(flat_bools.boolean_buffer(), expected.boolean_buffer());
501 assert_eq!(flat_bools.validity(), expected.validity());
502
503 assert!(flat_bools.boolean_buffer().value(0));
504 assert!(flat_bools.validity().is_valid(0).unwrap());
505 assert_eq!(
506 flat_bools.boolean_buffer().value(1),
507 fill_value.unwrap_or_default()
508 );
509 assert!(!flat_bools.validity().is_valid(1).unwrap());
510 assert_eq!(
511 flat_bools.validity().is_valid(2).unwrap(),
512 fill_value.is_some()
513 );
514 assert!(!flat_bools.boolean_buffer().value(7));
515 assert!(flat_bools.validity().is_valid(7).unwrap());
516 }
517
518 fn bool_array_from_nullable_vec(
519 bools: Vec<Option<bool>>,
520 fill_value: Option<bool>,
521 ) -> BoolArray {
522 let mut buffer = BooleanBufferBuilder::new(bools.len());
523 let mut validity = BooleanBufferBuilder::new(bools.len());
524 for maybe_bool in bools {
525 buffer.append(maybe_bool.unwrap_or_else(|| fill_value.unwrap_or_default()));
526 validity.append(maybe_bool.is_some());
527 }
528 BoolArray::new(buffer.finish(), Validity::from(validity.finish()))
529 }
530
531 #[rstest]
532 #[case(Some(0i32))]
533 #[case(Some(-1i32))]
534 #[case(None)]
535 fn test_sparse_primitive(#[case] fill_value: Option<i32>) {
536 let indices = buffer![0u64, 1, 7].into_array();
537 let values = PrimitiveArray::from_option_iter([Some(0i32), None, Some(1)]).into_array();
538 let sparse_ints =
539 SparseArray::try_new(indices, values, 10, Scalar::from(fill_value)).unwrap();
540 assert_eq!(*sparse_ints.dtype(), DType::Primitive(PType::I32, Nullable));
541
542 let flat_ints = sparse_ints.to_primitive().unwrap();
543 let expected = PrimitiveArray::from_option_iter([
544 Some(0i32),
545 None,
546 fill_value,
547 fill_value,
548 fill_value,
549 fill_value,
550 fill_value,
551 Some(1),
552 fill_value,
553 fill_value,
554 ]);
555
556 assert_eq!(flat_ints.byte_buffer(), expected.byte_buffer());
557 assert_eq!(flat_ints.validity(), expected.validity());
558
559 assert_eq!(flat_ints.as_slice::<i32>()[0], 0);
560 assert!(flat_ints.validity().is_valid(0).unwrap());
561 assert_eq!(flat_ints.as_slice::<i32>()[1], 0);
562 assert!(!flat_ints.validity().is_valid(1).unwrap());
563 assert_eq!(
564 flat_ints.as_slice::<i32>()[2],
565 fill_value.unwrap_or_default()
566 );
567 assert_eq!(
568 flat_ints.validity().is_valid(2).unwrap(),
569 fill_value.is_some()
570 );
571 assert_eq!(flat_ints.as_slice::<i32>()[7], 1);
572 assert!(flat_ints.validity().is_valid(7).unwrap());
573 }
574
575 #[test]
576 fn test_sparse_struct_valid_fill() {
577 let field_names = FieldNames::from_iter(["a", "b"]);
578 let field_types = vec![
579 DType::Primitive(PType::I32, Nullable),
580 DType::Primitive(PType::I32, Nullable),
581 ];
582 let struct_fields = StructFields::new(field_names, field_types);
583 let struct_dtype = DType::Struct(struct_fields.clone(), Nullable);
584
585 let indices = buffer![0u64, 1, 7, 8].into_array();
586 let patch_values_a =
587 PrimitiveArray::from_option_iter([Some(10i32), None, Some(20), Some(30)]).into_array();
588 let patch_values_b =
589 PrimitiveArray::from_option_iter([Some(1i32), Some(2), None, Some(3)]).into_array();
590 let patch_values = StructArray::try_new_with_dtype(
591 vec![patch_values_a, patch_values_b],
592 struct_fields.clone(),
593 4,
594 Validity::Array(
595 BoolArray::from_indices(4, vec![0, 1, 2], Validity::NonNullable).to_array(),
596 ),
597 )
598 .unwrap()
599 .into_array();
600
601 let fill_scalar = Scalar::struct_(
602 struct_dtype,
603 vec![Scalar::from(Some(-10i32)), Scalar::from(Some(-1i32))],
604 );
605 let len = 10;
606 let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
607
608 let expected_a = PrimitiveArray::from_option_iter((0..len).map(|i| {
609 if i == 0 {
610 Some(10)
611 } else if i == 1 {
612 None
613 } else if i == 7 {
614 Some(20)
615 } else {
616 Some(-10)
617 }
618 }));
619 let expected_b = PrimitiveArray::from_option_iter((0..len).map(|i| {
620 if i == 0 {
621 Some(1i32)
622 } else if i == 1 {
623 Some(2)
624 } else if i == 7 {
625 None
626 } else {
627 Some(-1)
628 }
629 }));
630
631 let expected = StructArray::try_new_with_dtype(
632 vec![expected_a.into_array(), expected_b.into_array()],
633 struct_fields,
634 len,
635 Validity::from_mask(Mask::from_excluded_indices(10, vec![8]), Nullable),
637 )
638 .unwrap()
639 .to_array()
640 .into_arrow_preferred()
641 .unwrap();
642
643 let actual = sparse_struct
644 .to_struct()
645 .unwrap()
646 .to_array()
647 .into_arrow_preferred()
648 .unwrap();
649
650 assert_eq!(expected.data_type(), actual.data_type());
651 assert_eq!(&expected, &actual);
652 }
653
654 #[test]
655 fn test_sparse_struct_invalid_fill() {
656 let field_names = FieldNames::from_iter(["a", "b"]);
657 let field_types = vec![
658 DType::Primitive(PType::I32, Nullable),
659 DType::Primitive(PType::I32, Nullable),
660 ];
661 let struct_fields = StructFields::new(field_names, field_types);
662 let struct_dtype = DType::Struct(struct_fields.clone(), Nullable);
663
664 let indices = buffer![0u64, 1, 7, 8].into_array();
665 let patch_values_a =
666 PrimitiveArray::from_option_iter([Some(10i32), None, Some(20), Some(30)]).into_array();
667 let patch_values_b =
668 PrimitiveArray::from_option_iter([Some(1i32), Some(2), None, Some(3)]).into_array();
669 let patch_values = StructArray::try_new_with_dtype(
670 vec![patch_values_a, patch_values_b],
671 struct_fields.clone(),
672 4,
673 Validity::Array(
674 BoolArray::from_indices(4, vec![0, 1, 2], Validity::NonNullable).to_array(),
675 ),
676 )
677 .unwrap()
678 .into_array();
679
680 let fill_scalar = Scalar::null(struct_dtype);
681 let len = 10;
682 let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
683
684 let expected_a = PrimitiveArray::from_option_iter((0..len).map(|i| {
685 if i == 0 {
686 Some(10)
687 } else if i == 1 {
688 None
689 } else if i == 7 {
690 Some(20)
691 } else {
692 Some(-10)
693 }
694 }));
695 let expected_b = PrimitiveArray::from_option_iter((0..len).map(|i| {
696 if i == 0 {
697 Some(1i32)
698 } else if i == 1 {
699 Some(2)
700 } else if i == 7 {
701 None
702 } else {
703 Some(-1)
704 }
705 }));
706
707 let expected = StructArray::try_new_with_dtype(
708 vec![expected_a.into_array(), expected_b.into_array()],
709 struct_fields,
710 len,
711 Validity::from_mask(Mask::from_indices(10, vec![0, 1, 7]), Nullable),
713 )
714 .unwrap()
715 .to_array()
716 .into_arrow_preferred()
717 .unwrap();
718
719 let actual = sparse_struct
720 .to_struct()
721 .unwrap()
722 .to_array()
723 .into_arrow_preferred()
724 .unwrap();
725
726 assert_eq!(expected.data_type(), actual.data_type());
727 assert_eq!(&expected, &actual);
728 }
729
730 #[test]
731 fn test_sparse_decimal() {
732 let indices = buffer![0u32, 1u32, 7u32, 8u32].into_array();
733 let decimal_dtype = DecimalDType::new(3, 2);
734 let patch_values = DecimalArray::new(
735 buffer![100i128, 200i128, 300i128, 4000i128],
736 decimal_dtype,
737 Validity::from_iter([true, true, true, false]),
738 )
739 .to_array();
740 let len = 10;
741 let fill_scalar = Scalar::decimal(DecimalValue::I32(123), decimal_dtype, Nullable);
742 let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
743
744 let expected = DecimalArray::new(
745 buffer![100i128, 200, 123, 123, 123, 123, 123, 300, 4000, 123],
746 decimal_dtype,
747 Validity::from_mask(Mask::from_excluded_indices(10, vec![8]), Nullable),
749 )
750 .to_array()
751 .into_arrow_preferred()
752 .unwrap();
753
754 let actual = sparse_struct
755 .to_decimal()
756 .unwrap()
757 .to_array()
758 .into_arrow_preferred()
759 .unwrap();
760
761 assert_eq!(expected.data_type(), actual.data_type());
762 assert_eq!(&expected, &actual);
763 }
764
765 #[test]
766 fn test_sparse_utf8_varbinview_non_null_fill() {
767 let strings = <VarBinViewArray as FromIterator<_>>::from_iter([
768 Some("hello"),
769 Some("goodbye"),
770 Some("hello"),
771 None,
772 Some("bonjour"),
773 Some("你好"),
774 None,
775 ])
776 .into_array();
777
778 let array = SparseArray::try_new(
779 buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
780 strings,
781 12,
782 Scalar::from(Some("123".to_owned())),
783 )
784 .unwrap();
785
786 let actual = array.to_varbinview().unwrap().into_array();
787 let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
788 Some("hello"),
789 Some("123"),
790 Some("123"),
791 Some("goodbye"),
792 Some("hello"),
793 None,
794 Some("123"),
795 Some("bonjour"),
796 Some("123"),
797 Some("你好"),
798 None,
799 Some("123"),
800 ])
801 .into_array();
802
803 let actual = actual.into_arrow_preferred().unwrap();
804 let expected = expected.into_arrow_preferred().unwrap();
805
806 assert_eq!(actual.data_type(), expected.data_type());
807 assert_eq!(&actual, &expected);
808 }
809
810 #[test]
811 fn test_sparse_utf8_varbinview_null_fill() {
812 let strings = <VarBinViewArray as FromIterator<_>>::from_iter([
813 Some("hello"),
814 Some("goodbye"),
815 Some("hello"),
816 None,
817 Some("bonjour"),
818 Some("你好"),
819 None,
820 ])
821 .into_array();
822
823 let array = SparseArray::try_new(
824 buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
825 strings,
826 12,
827 Scalar::null(DType::Utf8(Nullable)),
828 )
829 .unwrap();
830
831 let actual = array.to_varbinview().unwrap().into_array();
832 let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
833 Some("hello"),
834 None,
835 None,
836 Some("goodbye"),
837 Some("hello"),
838 None,
839 None,
840 Some("bonjour"),
841 None,
842 Some("你好"),
843 None,
844 None,
845 ])
846 .into_array();
847
848 let actual = actual.into_arrow_preferred().unwrap();
849 let expected = expected.into_arrow_preferred().unwrap();
850
851 assert_eq!(actual.data_type(), expected.data_type());
852 assert_eq!(&actual, &expected);
853 }
854
855 #[test]
856 fn test_sparse_utf8_varbinview_non_nullable() {
857 let strings =
858 VarBinViewArray::from_iter_str(["hello", "goodbye", "hello", "bonjour", "你好"])
859 .into_array();
860
861 let array = SparseArray::try_new(
862 buffer![0u16, 3, 4, 5, 8].into_array(),
863 strings,
864 9,
865 Scalar::from("123".to_owned()),
866 )
867 .unwrap();
868
869 let actual = array.to_varbinview().unwrap().into_array();
870 let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
871 Some("hello"),
872 Some("123"),
873 Some("123"),
874 Some("goodbye"),
875 Some("hello"),
876 Some("bonjour"),
877 Some("123"),
878 Some("123"),
879 Some("你好"),
880 ])
881 .into_array();
882
883 let actual = actual.into_arrow_preferred().unwrap();
884 let expected = expected.into_arrow_preferred().unwrap();
885
886 assert_eq!(actual.data_type(), expected.data_type());
887 assert_eq!(&actual, &expected);
888 }
889
890 #[test]
891 fn test_sparse_utf8_varbin_null_fill() {
892 let strings = <VarBinArray as FromIterator<_>>::from_iter([
893 Some("hello"),
894 Some("goodbye"),
895 Some("hello"),
896 None,
897 Some("bonjour"),
898 Some("你好"),
899 None,
900 ])
901 .into_array();
902
903 let array = SparseArray::try_new(
904 buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
905 strings,
906 12,
907 Scalar::null(DType::Utf8(Nullable)),
908 )
909 .unwrap();
910
911 let actual = array.to_varbinview().unwrap().into_array();
912 let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
913 Some("hello"),
914 None,
915 None,
916 Some("goodbye"),
917 Some("hello"),
918 None,
919 None,
920 Some("bonjour"),
921 None,
922 Some("你好"),
923 None,
924 None,
925 ])
926 .into_array();
927
928 let actual = actual.into_arrow_preferred().unwrap();
929 let expected = expected.into_arrow_preferred().unwrap();
930
931 assert_eq!(actual.data_type(), expected.data_type());
932 assert_eq!(&actual, &expected);
933 }
934
935 #[test]
936 fn test_sparse_binary_varbinview_non_null_fill() {
937 let binaries = VarBinViewArray::from_iter_nullable_bin([
938 Some(b"hello" as &[u8]),
939 Some(b"goodbye"),
940 Some(b"hello"),
941 None,
942 Some(b"\x00"),
943 Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
944 None,
945 ])
946 .into_array();
947
948 let array = SparseArray::try_new(
949 buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
950 binaries,
951 12,
952 Scalar::from(Some(ByteBuffer::from(b"123".to_vec()))),
953 )
954 .unwrap();
955
956 let actual = array.to_varbinview().unwrap().into_array();
957 let expected = VarBinViewArray::from_iter_nullable_bin([
958 Some(b"hello" as &[u8]),
959 Some(b"123"),
960 Some(b"123"),
961 Some(b"goodbye"),
962 Some(b"hello"),
963 None,
964 Some(b"123"),
965 Some(b"\x00"),
966 Some(b"123"),
967 Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
968 None,
969 Some(b"123"),
970 ])
971 .into_array();
972
973 let actual = actual.into_arrow_preferred().unwrap();
974 let expected = expected.into_arrow_preferred().unwrap();
975
976 assert_eq!(actual.data_type(), expected.data_type());
977 assert_eq!(&actual, &expected);
978 }
979
980 #[test]
981 fn test_list_scalar_to_elements_array() {
982 let scalar = Scalar::from(Some(vec![1, 2, 3]));
983 let array = list_scalar_to_elements_array(scalar.as_list()).unwrap();
984 assert_eq!(
985 array.unwrap().display_values().to_string(),
986 "[1i32, 2i32, 3i32]"
987 );
988
989 let scalar = Scalar::null_typed::<Vec<i32>>();
990 let array = list_scalar_to_elements_array(scalar.as_list()).unwrap();
991 assert!(array.is_none());
992 }
993
994 #[test]
995 fn test_list_scalar_to_singleton_list_array() {
996 let scalar = Scalar::from(Some(vec![1, 2, 3]));
997 let array = list_scalar_to_singleton_list_array(scalar.as_list()).unwrap();
998 assert!(array.is_some());
999 let array = array.unwrap();
1000 assert_eq!(array.scalar_at(0).unwrap(), scalar);
1001 assert_eq!(array.len(), 1);
1002
1003 let scalar = Scalar::null_typed::<Vec<i32>>();
1004 let array = list_scalar_to_singleton_list_array(scalar.as_list()).unwrap();
1005 assert!(array.is_none());
1006 }
1007
1008 #[test]
1009 fn test_sparse_list_null_fill() {
1010 let elements = buffer![1i32, 2, 1, 2].into_array();
1011 let offsets = buffer![0u32, 1, 2, 3, 4].into_array();
1012 let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1013 .unwrap()
1014 .into_array();
1015
1016 let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
1017 let fill_value = Scalar::null(lists.dtype().clone());
1018 let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
1019 .unwrap()
1020 .into_array();
1021
1022 let actual = sparse.to_canonical().unwrap().into_array();
1023 let expected = ListArray::try_new(
1024 buffer![1i32, 2, 1, 2].into_array(),
1025 buffer![0u32, 1, 1, 1, 2, 3, 4].into_array(),
1026 Validity::Array(
1027 BoolArray::from_iter([true, false, false, true, true, true]).into_array(),
1028 ),
1029 )
1030 .unwrap()
1031 .into_array();
1032
1033 let actual = actual.into_arrow_preferred().unwrap();
1034 let expected = expected.into_arrow_preferred().unwrap();
1035
1036 assert_eq!(actual.data_type(), expected.data_type());
1037 assert_eq!(&actual, &expected);
1038 }
1039
1040 #[test]
1041 fn test_sparse_list_null_fill_sliced_sparse_values() {
1042 let elements = buffer![1i32, 2, 1, 2, 1, 2, 1, 2].into_array();
1043 let offsets = buffer![0u32, 1, 2, 3, 4, 5, 6, 7, 8].into_array();
1044 let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1045 .unwrap()
1046 .into_array();
1047 let lists = lists.slice(2, 6).unwrap();
1048
1049 let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
1050 let fill_value = Scalar::null(lists.dtype().clone());
1051 let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
1052 .unwrap()
1053 .into_array();
1054
1055 let actual = sparse.to_canonical().unwrap().into_array();
1056 let expected = ListArray::try_new(
1057 buffer![1i32, 2, 1, 2].into_array(),
1058 buffer![0u32, 1, 1, 1, 2, 3, 4].into_array(),
1059 Validity::Array(
1060 BoolArray::from_iter([true, false, false, true, true, true]).into_array(),
1061 ),
1062 )
1063 .unwrap()
1064 .into_array();
1065
1066 let actual = actual.into_arrow_preferred().unwrap();
1067 let expected = expected.into_arrow_preferred().unwrap();
1068
1069 assert_eq!(actual.data_type(), expected.data_type());
1070 assert_eq!(&actual, &expected);
1071 }
1072
1073 #[test]
1074 fn test_sparse_list_non_null_fill() {
1075 let elements = buffer![1i32, 2, 1, 2].into_array();
1076 let offsets = buffer![0u32, 1, 2, 3, 4].into_array();
1077 let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1078 .unwrap()
1079 .into_array();
1080
1081 let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
1082 let fill_value = Scalar::from(Some(vec![5i32, 6, 7, 8]));
1083 let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
1084 .unwrap()
1085 .into_array();
1086
1087 let actual = sparse.to_canonical().unwrap().into_array();
1088 let expected = ListArray::try_new(
1089 buffer![1i32, 5, 6, 7, 8, 5, 6, 7, 8, 2, 1, 2].into_array(),
1090 buffer![0u32, 1, 5, 9, 10, 11, 12].into_array(),
1091 Validity::AllValid,
1092 )
1093 .unwrap()
1094 .into_array();
1095
1096 let actual = actual.into_arrow_preferred().unwrap();
1097 let expected = expected.into_arrow_preferred().unwrap();
1098
1099 assert_eq!(actual.data_type(), expected.data_type());
1100 assert_eq!(&actual, &expected);
1101 }
1102
1103 #[test]
1104 fn test_sparse_binary_varbin_null_fill() {
1105 let strings = <VarBinArray as FromIterator<_>>::from_iter([
1106 Some(b"hello" as &[u8]),
1107 Some(b"goodbye"),
1108 Some(b"hello"),
1109 None,
1110 Some(b"\x00"),
1111 Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
1112 None,
1113 ])
1114 .into_array();
1115
1116 let array = SparseArray::try_new(
1117 buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
1118 strings,
1119 12,
1120 Scalar::null(DType::Binary(Nullable)),
1121 )
1122 .unwrap();
1123
1124 let actual = array.to_varbinview().unwrap().into_array();
1125 let expected = VarBinViewArray::from_iter_nullable_bin([
1126 Some(b"hello" as &[u8]),
1127 None,
1128 None,
1129 Some(b"goodbye"),
1130 Some(b"hello"),
1131 None,
1132 None,
1133 Some(b"\x00"),
1134 None,
1135 Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
1136 None,
1137 None,
1138 ])
1139 .into_array();
1140 let actual = actual.into_arrow_preferred().unwrap();
1141 let expected = expected.into_arrow_preferred().unwrap();
1142
1143 assert_eq!(actual.data_type(), expected.data_type());
1144 assert_eq!(&actual, &expected);
1145 }
1146
1147 #[test]
1148 fn test_sparse_list_grows_offset_type() {
1149 let elements = buffer![1i32, 2, 1, 2].into_array();
1150 let offsets = buffer![0u8, 1, 2, 3, 4].into_array();
1151 let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1152 .unwrap()
1153 .into_array();
1154
1155 let indices = buffer![0u8, 1u8, 2u8, 3u8].into_array();
1156 let fill_value = Scalar::from(Some(vec![42i32; 252])); let sparse = SparseArray::try_new(indices, lists, 5, fill_value)
1158 .unwrap()
1159 .into_array();
1160
1161 let actual = sparse.to_canonical().unwrap().into_array();
1162 let mut expected_elements = buffer_mut![1, 2, 1, 2];
1163 expected_elements.extend(buffer![42i32; 252]);
1164 let expected = ListArray::try_new(
1165 expected_elements.freeze().into_array(),
1166 buffer![0u16, 1, 2, 3, 4, 256].into_array(),
1167 Validity::AllValid,
1168 )
1169 .unwrap()
1170 .into_array();
1171
1172 assert_eq!(
1173 actual.to_list().unwrap().offsets().dtype(),
1174 &DType::Primitive(PType::U16, NonNullable)
1175 );
1176
1177 let actual = actual.into_arrow_preferred().unwrap();
1178 let expected = expected.into_arrow_preferred().unwrap();
1179
1180 assert_eq!(actual.data_type(), expected.data_type());
1181 assert_eq!(&actual, &expected);
1182 }
1183}