1use std::sync::Arc;
5
6use itertools::Itertools;
7use num_traits::NumCast;
8use vortex_array::arrays::{
9 BinaryView, BoolArray, BooleanBuffer, ConstantArray, ListArray, NullArray, OffsetPType,
10 PrimitiveArray, StructArray, VarBinViewArray, smallest_storage_type,
11};
12use vortex_array::builders::{
13 ArrayBuilder as _, ArrayBuilderExt, DecimalBuilder, ListBuilder, builder_with_capacity,
14};
15use vortex_array::patches::Patches;
16use vortex_array::validity::Validity;
17use vortex_array::vtable::CanonicalVTable;
18use vortex_array::{Array, ArrayRef, Canonical, IntoArray as _, ToCanonical as _};
19use vortex_buffer::{Buffer, BufferMut, BufferString, ByteBuffer, buffer, buffer_mut};
20use vortex_dtype::{
21 DType, DecimalDType, NativePType, Nullability, StructFields, match_each_integer_ptype,
22 match_each_native_ptype,
23};
24use vortex_error::{VortexError, VortexExpect as _, VortexResult, vortex_err};
25use vortex_scalar::{
26 DecimalScalar, ListScalar, NativeDecimalType, Scalar, StructScalar,
27 match_each_decimal_value_type,
28};
29
30use crate::{SparseArray, SparseVTable};
31
32impl CanonicalVTable<SparseVTable> for SparseVTable {
33 fn canonicalize(array: &SparseArray) -> VortexResult<Canonical> {
34 if array.patches().num_patches() == 0 {
35 return ConstantArray::new(array.fill_scalar().clone(), array.len()).to_canonical();
36 }
37
38 match array.dtype() {
39 DType::Null => {
40 assert!(array.fill_scalar().is_null());
41 Ok(Canonical::Null(NullArray::new(array.len())))
42 }
43 DType::Bool(..) => {
44 let resolved_patches = array.resolved_patches()?;
45 canonicalize_sparse_bools(&resolved_patches, array.fill_scalar())
46 }
47 DType::Primitive(ptype, ..) => {
48 let resolved_patches = array.resolved_patches()?;
49 match_each_native_ptype!(ptype, |P| {
50 canonicalize_sparse_primitives::<P>(&resolved_patches, array.fill_scalar())
51 })
52 }
53 DType::Struct(struct_fields, ..) => canonicalize_sparse_struct(
54 struct_fields,
55 array.fill_scalar().as_struct(),
56 array.dtype(),
57 array.patches(),
58 array.len(),
59 ),
60 DType::Decimal(decimal_dtype, nullability) => {
61 let canonical_decimal_value_type = smallest_storage_type(decimal_dtype);
62 let fill_value = array.fill_scalar().as_decimal();
63 match_each_decimal_value_type!(canonical_decimal_value_type, |D| {
64 canonicalize_sparse_decimal::<D>(
65 *decimal_dtype,
66 *nullability,
67 fill_value,
68 array.patches(),
69 array.len(),
70 )
71 })
72 }
73 dtype @ DType::Utf8(..) => {
74 let fill_value = array.fill_scalar().as_utf8().value();
75 let fill_value = fill_value.map(BufferString::into_inner);
76 canonicalize_varbin(array, dtype.clone(), fill_value)
77 }
78 dtype @ DType::Binary(..) => {
79 let fill_value = array.fill_scalar().as_binary().value();
80 canonicalize_varbin(array, dtype.clone(), fill_value)
81 }
82 DType::List(values_dtype, nullability) => {
83 let resolved_patches = array.resolved_patches()?;
84 canonicalize_sparse_lists(
85 array,
86 resolved_patches,
87 values_dtype.clone(),
88 *nullability,
89 )
90 }
91 DType::Extension(_ext_dtype) => todo!(),
92 }
93 }
94}
95
96fn list_scalar_to_elements_array(scalar: ListScalar) -> VortexResult<Option<ArrayRef>> {
98 let Some(elements) = scalar.elements() else {
99 return Ok(None);
100 };
101
102 let mut builder = builder_with_capacity(scalar.element_dtype(), scalar.len());
103 for s in elements {
104 builder.append_scalar(&s)?;
105 }
106 Ok(Some(builder.finish()))
107}
108
109fn list_scalar_to_singleton_list_array(scalar: ListScalar) -> VortexResult<Option<ArrayRef>> {
111 let nullability = scalar.dtype().nullability();
112 let Some(elements) = list_scalar_to_elements_array(scalar)? else {
113 return Ok(None);
114 };
115
116 let validity = match nullability {
117 Nullability::NonNullable => Validity::NonNullable,
118 Nullability::Nullable => Validity::AllValid,
119 };
120
121 let n = elements.len();
122 ListArray::try_new(elements, buffer![0_u64, n as u64].into_array(), validity)
123 .map(|x| Some(x.into_array()))
124}
125
126#[allow(clippy::cognitive_complexity)]
127fn canonicalize_sparse_lists(
128 array: &SparseArray,
129 resolved_patches: Patches,
130 values_dtype: Arc<DType>,
131 nullability: Nullability,
132) -> VortexResult<Canonical> {
133 macro_rules! match_smallest_offset_type {
134 ($n_elements:expr, | $offset_type:ident | $body:block) => {{
135 let n_elements = $n_elements;
136 if n_elements <= u8::MAX as usize {
137 type $offset_type = u8;
138 $body
139 } else if n_elements <= u16::MAX as usize {
140 type $offset_type = u16;
141 $body
142 } else if n_elements <= u32::MAX as usize {
143 type $offset_type = u32;
144 $body
145 } else {
146 assert!(u64::try_from(n_elements).is_ok());
147 type $offset_type = u64;
148 $body
149 }
150 }};
151 }
152
153 let indices = resolved_patches.indices().to_primitive()?;
154 let values = resolved_patches.values().to_list()?;
155 let fill_value = array.fill_scalar().as_list();
156
157 let n_filled = array.len() - resolved_patches.num_patches();
158 let total_canonical_values = values.elements().len() + fill_value.len() * n_filled;
159
160 let validity = array
161 .validity_mask()
162 .map(|x| Validity::from_mask(x, nullability))?;
163
164 match_each_integer_ptype!(indices.ptype(), |I| {
165 match_smallest_offset_type!(total_canonical_values, |O| {
166 canonicalize_sparse_lists_inner::<I, O>(
167 indices.as_slice(),
168 values,
169 fill_value,
170 values_dtype,
171 array.len(),
172 total_canonical_values,
173 validity,
174 )
175 })
176 })
177}
178
179fn canonicalize_sparse_lists_inner<I: NativePType, SmallestViableOffsetType: OffsetPType>(
180 indices: &[I],
181 values: ListArray,
182 fill_value: ListScalar,
183 values_dtype: Arc<DType>,
184 len: usize,
185 total_canonical_values: usize,
186 validity: Validity,
187) -> VortexResult<Canonical> {
188 let Some(fill_value_array) = list_scalar_to_singleton_list_array(fill_value)? else {
189 let sparse_list_elements = values.elements().clone();
190 let sparse_list_offsets = values.offsets().to_primitive()?;
191 match_each_integer_ptype!(sparse_list_offsets.ptype(), |SparseValuesOffsetType| {
192 let sparse_list_offsets = sparse_list_offsets.as_slice::<SparseValuesOffsetType>();
193 return canonicalize_sparse_lists_inner_with_null_fill_value(
197 indices,
198 sparse_list_elements,
199 sparse_list_offsets,
200 len,
201 validity,
202 );
203 });
204 };
205
206 let mut builder = ListBuilder::<SmallestViableOffsetType>::with_values_and_index_capacity(
207 values_dtype,
208 validity.nullability(),
209 total_canonical_values,
210 len + 1,
211 );
212 let mut next_index = 0_usize;
213 let enumerated_indices_usize = indices
214 .iter()
215 .map(|x| (*x).to_usize().vortex_expect("index must fit in usize"))
216 .enumerate();
217 for (patch_values_index, next_patched_index) in enumerated_indices_usize {
218 for _ in next_index..next_patched_index {
219 builder.extend_from_array(&fill_value_array)?;
220 }
221 builder.extend_from_array(&values.slice(patch_values_index, patch_values_index + 1))?;
222 next_index = next_patched_index + 1;
223 }
224
225 for _ in next_index..len {
226 builder.extend_from_array(&fill_value_array)?;
227 }
228
229 builder.finish().to_canonical()
230}
231
232fn canonicalize_sparse_lists_inner_with_null_fill_value<I: NativePType, O: OffsetPType>(
233 indices: &[I],
234 elements: ArrayRef,
235 offsets: &[O],
236 len: usize,
237 validity: Validity,
238) -> VortexResult<Canonical> {
239 assert!(indices.len() < len + 1);
240 let mut dense_offsets = BufferMut::with_capacity(len + 1);
241
242 dense_offsets.push(offsets[0]);
244 let mut dense_last_set_index = 0_usize;
245 for (sparse_start_index, dense_start_index) in indices.iter().enumerate() {
246 let sparse_end_index = sparse_start_index + 1;
247 let dense_start_index = (*dense_start_index)
248 .to_usize()
249 .vortex_expect("index must fit in usize");
250 let dense_end_index = dense_start_index + 1;
251
252 for _ in (dense_last_set_index + 1)..dense_end_index {
253 dense_offsets.push(dense_offsets[dense_last_set_index]);
255 }
256 dense_offsets.push(offsets[sparse_end_index]);
257 dense_last_set_index = dense_end_index;
258 }
259 for _ in (dense_last_set_index + 1)..len {
260 dense_offsets.push(dense_offsets[dense_last_set_index]);
262 }
263 let array = ListArray::try_new(elements, dense_offsets.into_array(), validity)?;
264 Ok(Canonical::List(array))
265}
266
267fn canonicalize_sparse_bools(patches: &Patches, fill_value: &Scalar) -> VortexResult<Canonical> {
268 let (fill_bool, validity) = if fill_value.is_null() {
269 (false, Validity::AllInvalid)
270 } else {
271 (
272 fill_value.try_into()?,
273 if patches.dtype().nullability() == Nullability::NonNullable {
274 Validity::NonNullable
275 } else {
276 Validity::AllValid
277 },
278 )
279 };
280
281 let bools = BoolArray::new(
282 if fill_bool {
283 BooleanBuffer::new_set(patches.array_len())
284 } else {
285 BooleanBuffer::new_unset(patches.array_len())
286 },
287 validity,
288 );
289
290 bools.patch(patches).map(Canonical::Bool)
291}
292
293fn canonicalize_sparse_primitives<
294 T: NativePType + for<'a> TryFrom<&'a Scalar, Error = VortexError>,
295>(
296 patches: &Patches,
297 fill_value: &Scalar,
298) -> VortexResult<Canonical> {
299 let (primitive_fill, validity) = if fill_value.is_null() {
300 (T::default(), Validity::AllInvalid)
301 } else {
302 (
303 fill_value.try_into()?,
304 if patches.dtype().nullability() == Nullability::NonNullable {
305 Validity::NonNullable
306 } else {
307 Validity::AllValid
308 },
309 )
310 };
311
312 let parray = PrimitiveArray::new(buffer![primitive_fill; patches.array_len()], validity);
313
314 parray.patch(patches).map(Canonical::Primitive)
315}
316
317fn canonicalize_sparse_struct(
318 struct_fields: &StructFields,
319 fill_struct: StructScalar,
320 dtype: &DType,
321 unresolved_patches: &Patches,
323 len: usize,
324) -> VortexResult<Canonical> {
325 let (fill_values, top_level_fill_validity) = match fill_struct.fields() {
326 Some(fill_values) => (fill_values, Validity::AllValid),
327 None => (
328 struct_fields.fields().map(Scalar::default_value).collect(),
329 Validity::AllInvalid,
330 ),
331 };
332 let patch_values_as_struct = unresolved_patches.values().to_canonical()?.into_struct()?;
333 let columns_patch_values = patch_values_as_struct.fields();
334 let names = patch_values_as_struct.names();
335 let validity = if dtype.is_nullable() {
336 top_level_fill_validity.patch(
337 len,
338 unresolved_patches.offset(),
339 unresolved_patches.indices(),
340 &Validity::from_mask(
341 unresolved_patches.values().validity_mask()?,
342 Nullability::Nullable,
343 ),
344 )?
345 } else {
346 top_level_fill_validity
347 .into_non_nullable()
348 .ok_or_else(|| vortex_err!("fill validity should match sparse array nullability"))?
349 };
350
351 columns_patch_values
352 .iter()
353 .cloned()
354 .zip_eq(fill_values.into_iter())
355 .map(|(patch_values, fill_value)| -> VortexResult<_> {
356 SparseArray::try_new_from_patches(
357 unresolved_patches
358 .clone()
359 .map_values(|_| Ok(patch_values))?,
360 fill_value,
361 )
362 })
363 .process_results(|sparse_columns| {
364 StructArray::try_from_iter_with_validity(names.iter().zip_eq(sparse_columns), validity)
365 .map(Canonical::Struct)
366 })?
367}
368
369fn canonicalize_sparse_decimal<D: NativeDecimalType>(
370 decimal_dtype: DecimalDType,
371 nullability: Nullability,
372 fill_value: DecimalScalar,
373 patches: &Patches,
374 len: usize,
375) -> VortexResult<Canonical> {
376 let mut builder = DecimalBuilder::with_capacity::<D>(len, decimal_dtype, nullability);
377 match fill_value.decimal_value() {
378 Some(fill_value) => {
379 let fill_value = fill_value
380 .cast::<D>()
381 .vortex_expect("unexpected value type");
382 for _ in 0..len {
383 builder.append_value(fill_value)
384 }
385 }
386 None => {
387 builder.append_nulls(len);
388 }
389 }
390 let filled_array = builder.finish_into_decimal();
391 let array = filled_array.patch(patches)?;
392 Ok(Canonical::Decimal(array))
393}
394
395fn canonicalize_varbin(
396 array: &SparseArray,
397 dtype: DType,
398 fill_value: Option<ByteBuffer>,
399) -> VortexResult<Canonical> {
400 let patches = array.resolved_patches()?;
401 let indices = patches.indices().to_primitive()?;
402 let values = patches.values().to_varbinview()?;
403 let validity = array
404 .validity_mask()
405 .map(|x| Validity::from_mask(x, dtype.nullability()))?;
406 let len = array.len();
407
408 match_each_integer_ptype!(indices.ptype(), |I| {
409 let indices = indices.buffer::<I>();
410 canonicalize_varbin_inner::<I>(fill_value, indices, values, dtype, validity, len)
411 })
412}
413
414fn canonicalize_varbin_inner<I: NativePType>(
415 fill_value: Option<ByteBuffer>,
416 indices: Buffer<I>,
417 values: VarBinViewArray,
418 dtype: DType,
419 validity: Validity,
420 len: usize,
421) -> VortexResult<Canonical> {
422 assert_eq!(dtype.nullability(), validity.nullability());
423
424 let n_patch_buffers = values.buffers().len();
425 let mut buffers = values.buffers().to_vec();
426
427 let fill = if let Some(buffer) = &fill_value {
428 buffers.push(buffer.clone());
429 BinaryView::make_view(
430 buffer.as_ref(),
431 u32::try_from(n_patch_buffers).vortex_expect("too many buffers"),
432 0,
433 )
434 } else {
435 BinaryView::make_view(&[], 0, 0)
437 };
438
439 let mut views = buffer_mut![fill; len];
440 for (patch_index, &patch) in indices.into_iter().zip_eq(values.views().iter()) {
441 let patch_index_usize = <usize as NumCast>::from(patch_index)
442 .vortex_expect("var bin view indices must fit in usize");
443 views[patch_index_usize] = patch;
444 }
445
446 let array = unsafe {
448 VarBinViewArray::new_unchecked(views.freeze(), Arc::from(buffers), dtype, validity)
449 };
450
451 Ok(Canonical::VarBinView(array))
452}
453
454#[cfg(test)]
455mod test {
456 use rstest::rstest;
457 use vortex_array::arrays::{
458 BoolArray, BooleanBufferBuilder, DecimalArray, ListArray, PrimitiveArray, StructArray,
459 VarBinArray, VarBinViewArray,
460 };
461 use vortex_array::arrow::IntoArrowArray as _;
462 use vortex_array::validity::Validity;
463 use vortex_array::vtable::ValidityHelper;
464 use vortex_array::{IntoArray, ToCanonical};
465 use vortex_buffer::{ByteBuffer, buffer, buffer_mut};
466 use vortex_dtype::Nullability::{NonNullable, Nullable};
467 use vortex_dtype::{DType, DecimalDType, FieldNames, PType, StructFields};
468 use vortex_mask::Mask;
469 use vortex_scalar::{DecimalValue, Scalar};
470
471 use crate::SparseArray;
472 use crate::canonical::{list_scalar_to_elements_array, list_scalar_to_singleton_list_array};
473
474 #[rstest]
475 #[case(Some(true))]
476 #[case(Some(false))]
477 #[case(None)]
478 fn test_sparse_bool(#[case] fill_value: Option<bool>) {
479 let indices = buffer![0u64, 1, 7].into_array();
480 let values = bool_array_from_nullable_vec(vec![Some(true), None, Some(false)], fill_value)
481 .into_array();
482 let sparse_bools =
483 SparseArray::try_new(indices, values, 10, Scalar::from(fill_value)).unwrap();
484 assert_eq!(sparse_bools.dtype(), &DType::Bool(Nullable));
485
486 let flat_bools = sparse_bools.to_bool().unwrap();
487 let expected = bool_array_from_nullable_vec(
488 vec![
489 Some(true),
490 None,
491 fill_value,
492 fill_value,
493 fill_value,
494 fill_value,
495 fill_value,
496 Some(false),
497 fill_value,
498 fill_value,
499 ],
500 fill_value,
501 );
502
503 assert_eq!(flat_bools.boolean_buffer(), expected.boolean_buffer());
504 assert_eq!(flat_bools.validity(), expected.validity());
505
506 assert!(flat_bools.boolean_buffer().value(0));
507 assert!(flat_bools.validity().is_valid(0).unwrap());
508 assert_eq!(
509 flat_bools.boolean_buffer().value(1),
510 fill_value.unwrap_or_default()
511 );
512 assert!(!flat_bools.validity().is_valid(1).unwrap());
513 assert_eq!(
514 flat_bools.validity().is_valid(2).unwrap(),
515 fill_value.is_some()
516 );
517 assert!(!flat_bools.boolean_buffer().value(7));
518 assert!(flat_bools.validity().is_valid(7).unwrap());
519 }
520
521 fn bool_array_from_nullable_vec(
522 bools: Vec<Option<bool>>,
523 fill_value: Option<bool>,
524 ) -> BoolArray {
525 let mut buffer = BooleanBufferBuilder::new(bools.len());
526 let mut validity = BooleanBufferBuilder::new(bools.len());
527 for maybe_bool in bools {
528 buffer.append(maybe_bool.unwrap_or_else(|| fill_value.unwrap_or_default()));
529 validity.append(maybe_bool.is_some());
530 }
531 BoolArray::new(buffer.finish(), Validity::from(validity.finish()))
532 }
533
534 #[rstest]
535 #[case(Some(0i32))]
536 #[case(Some(-1i32))]
537 #[case(None)]
538 fn test_sparse_primitive(#[case] fill_value: Option<i32>) {
539 let indices = buffer![0u64, 1, 7].into_array();
540 let values = PrimitiveArray::from_option_iter([Some(0i32), None, Some(1)]).into_array();
541 let sparse_ints =
542 SparseArray::try_new(indices, values, 10, Scalar::from(fill_value)).unwrap();
543 assert_eq!(*sparse_ints.dtype(), DType::Primitive(PType::I32, Nullable));
544
545 let flat_ints = sparse_ints.to_primitive().unwrap();
546 let expected = PrimitiveArray::from_option_iter([
547 Some(0i32),
548 None,
549 fill_value,
550 fill_value,
551 fill_value,
552 fill_value,
553 fill_value,
554 Some(1),
555 fill_value,
556 fill_value,
557 ]);
558
559 assert_eq!(flat_ints.byte_buffer(), expected.byte_buffer());
560 assert_eq!(flat_ints.validity(), expected.validity());
561
562 assert_eq!(flat_ints.as_slice::<i32>()[0], 0);
563 assert!(flat_ints.validity().is_valid(0).unwrap());
564 assert_eq!(flat_ints.as_slice::<i32>()[1], 0);
565 assert!(!flat_ints.validity().is_valid(1).unwrap());
566 assert_eq!(
567 flat_ints.as_slice::<i32>()[2],
568 fill_value.unwrap_or_default()
569 );
570 assert_eq!(
571 flat_ints.validity().is_valid(2).unwrap(),
572 fill_value.is_some()
573 );
574 assert_eq!(flat_ints.as_slice::<i32>()[7], 1);
575 assert!(flat_ints.validity().is_valid(7).unwrap());
576 }
577
578 #[test]
579 fn test_sparse_struct_valid_fill() {
580 let field_names = FieldNames::from_iter(["a", "b"]);
581 let field_types = vec![
582 DType::Primitive(PType::I32, Nullable),
583 DType::Primitive(PType::I32, Nullable),
584 ];
585 let struct_fields = StructFields::new(field_names, field_types);
586 let struct_dtype = DType::Struct(struct_fields.clone(), Nullable);
587
588 let indices = buffer![0u64, 1, 7, 8].into_array();
589 let patch_values_a =
590 PrimitiveArray::from_option_iter([Some(10i32), None, Some(20), Some(30)]).into_array();
591 let patch_values_b =
592 PrimitiveArray::from_option_iter([Some(1i32), Some(2), None, Some(3)]).into_array();
593 let patch_values = StructArray::try_new_with_dtype(
594 vec![patch_values_a, patch_values_b],
595 struct_fields.clone(),
596 4,
597 Validity::Array(
598 BoolArray::from_indices(4, vec![0, 1, 2], Validity::NonNullable).to_array(),
599 ),
600 )
601 .unwrap()
602 .into_array();
603
604 let fill_scalar = Scalar::struct_(
605 struct_dtype,
606 vec![Scalar::from(Some(-10i32)), Scalar::from(Some(-1i32))],
607 );
608 let len = 10;
609 let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
610
611 let expected_a = PrimitiveArray::from_option_iter((0..len).map(|i| {
612 if i == 0 {
613 Some(10)
614 } else if i == 1 {
615 None
616 } else if i == 7 {
617 Some(20)
618 } else {
619 Some(-10)
620 }
621 }));
622 let expected_b = PrimitiveArray::from_option_iter((0..len).map(|i| {
623 if i == 0 {
624 Some(1i32)
625 } else if i == 1 {
626 Some(2)
627 } else if i == 7 {
628 None
629 } else {
630 Some(-1)
631 }
632 }));
633
634 let expected = StructArray::try_new_with_dtype(
635 vec![expected_a.into_array(), expected_b.into_array()],
636 struct_fields,
637 len,
638 Validity::from_mask(Mask::from_excluded_indices(10, vec![8]), Nullable),
640 )
641 .unwrap()
642 .to_array()
643 .into_arrow_preferred()
644 .unwrap();
645
646 let actual = sparse_struct
647 .to_struct()
648 .unwrap()
649 .to_array()
650 .into_arrow_preferred()
651 .unwrap();
652
653 assert_eq!(expected.data_type(), actual.data_type());
654 assert_eq!(&expected, &actual);
655 }
656
657 #[test]
658 fn test_sparse_struct_invalid_fill() {
659 let field_names = FieldNames::from_iter(["a", "b"]);
660 let field_types = vec![
661 DType::Primitive(PType::I32, Nullable),
662 DType::Primitive(PType::I32, Nullable),
663 ];
664 let struct_fields = StructFields::new(field_names, field_types);
665 let struct_dtype = DType::Struct(struct_fields.clone(), Nullable);
666
667 let indices = buffer![0u64, 1, 7, 8].into_array();
668 let patch_values_a =
669 PrimitiveArray::from_option_iter([Some(10i32), None, Some(20), Some(30)]).into_array();
670 let patch_values_b =
671 PrimitiveArray::from_option_iter([Some(1i32), Some(2), None, Some(3)]).into_array();
672 let patch_values = StructArray::try_new_with_dtype(
673 vec![patch_values_a, patch_values_b],
674 struct_fields.clone(),
675 4,
676 Validity::Array(
677 BoolArray::from_indices(4, vec![0, 1, 2], Validity::NonNullable).to_array(),
678 ),
679 )
680 .unwrap()
681 .into_array();
682
683 let fill_scalar = Scalar::null(struct_dtype);
684 let len = 10;
685 let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
686
687 let expected_a = PrimitiveArray::from_option_iter((0..len).map(|i| {
688 if i == 0 {
689 Some(10)
690 } else if i == 1 {
691 None
692 } else if i == 7 {
693 Some(20)
694 } else {
695 Some(-10)
696 }
697 }));
698 let expected_b = PrimitiveArray::from_option_iter((0..len).map(|i| {
699 if i == 0 {
700 Some(1i32)
701 } else if i == 1 {
702 Some(2)
703 } else if i == 7 {
704 None
705 } else {
706 Some(-1)
707 }
708 }));
709
710 let expected = StructArray::try_new_with_dtype(
711 vec![expected_a.into_array(), expected_b.into_array()],
712 struct_fields,
713 len,
714 Validity::from_mask(Mask::from_indices(10, vec![0, 1, 7]), Nullable),
716 )
717 .unwrap()
718 .to_array()
719 .into_arrow_preferred()
720 .unwrap();
721
722 let actual = sparse_struct
723 .to_struct()
724 .unwrap()
725 .to_array()
726 .into_arrow_preferred()
727 .unwrap();
728
729 assert_eq!(expected.data_type(), actual.data_type());
730 assert_eq!(&expected, &actual);
731 }
732
733 #[test]
734 fn test_sparse_decimal() {
735 let indices = buffer![0u32, 1u32, 7u32, 8u32].into_array();
736 let decimal_dtype = DecimalDType::new(3, 2);
737 let patch_values = DecimalArray::new(
738 buffer![100i128, 200i128, 300i128, 4000i128],
739 decimal_dtype,
740 Validity::from_iter([true, true, true, false]),
741 )
742 .to_array();
743 let len = 10;
744 let fill_scalar = Scalar::decimal(DecimalValue::I32(123), decimal_dtype, Nullable);
745 let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
746
747 let expected = DecimalArray::new(
748 buffer![100i128, 200, 123, 123, 123, 123, 123, 300, 4000, 123],
749 decimal_dtype,
750 Validity::from_mask(Mask::from_excluded_indices(10, vec![8]), Nullable),
752 )
753 .to_array()
754 .into_arrow_preferred()
755 .unwrap();
756
757 let actual = sparse_struct
758 .to_decimal()
759 .unwrap()
760 .to_array()
761 .into_arrow_preferred()
762 .unwrap();
763
764 assert_eq!(expected.data_type(), actual.data_type());
765 assert_eq!(&expected, &actual);
766 }
767
768 #[test]
769 fn test_sparse_utf8_varbinview_non_null_fill() {
770 let strings = <VarBinViewArray as FromIterator<_>>::from_iter([
771 Some("hello"),
772 Some("goodbye"),
773 Some("hello"),
774 None,
775 Some("bonjour"),
776 Some("你好"),
777 None,
778 ])
779 .into_array();
780
781 let array = SparseArray::try_new(
782 buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
783 strings,
784 12,
785 Scalar::from(Some("123".to_owned())),
786 )
787 .unwrap();
788
789 let actual = array.to_varbinview().unwrap().into_array();
790 let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
791 Some("hello"),
792 Some("123"),
793 Some("123"),
794 Some("goodbye"),
795 Some("hello"),
796 None,
797 Some("123"),
798 Some("bonjour"),
799 Some("123"),
800 Some("你好"),
801 None,
802 Some("123"),
803 ])
804 .into_array();
805
806 let actual = actual.into_arrow_preferred().unwrap();
807 let expected = expected.into_arrow_preferred().unwrap();
808
809 assert_eq!(actual.data_type(), expected.data_type());
810 assert_eq!(&actual, &expected);
811 }
812
813 #[test]
814 fn test_sparse_utf8_varbinview_null_fill() {
815 let strings = <VarBinViewArray as FromIterator<_>>::from_iter([
816 Some("hello"),
817 Some("goodbye"),
818 Some("hello"),
819 None,
820 Some("bonjour"),
821 Some("你好"),
822 None,
823 ])
824 .into_array();
825
826 let array = SparseArray::try_new(
827 buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
828 strings,
829 12,
830 Scalar::null(DType::Utf8(Nullable)),
831 )
832 .unwrap();
833
834 let actual = array.to_varbinview().unwrap().into_array();
835 let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
836 Some("hello"),
837 None,
838 None,
839 Some("goodbye"),
840 Some("hello"),
841 None,
842 None,
843 Some("bonjour"),
844 None,
845 Some("你好"),
846 None,
847 None,
848 ])
849 .into_array();
850
851 let actual = actual.into_arrow_preferred().unwrap();
852 let expected = expected.into_arrow_preferred().unwrap();
853
854 assert_eq!(actual.data_type(), expected.data_type());
855 assert_eq!(&actual, &expected);
856 }
857
858 #[test]
859 fn test_sparse_utf8_varbinview_non_nullable() {
860 let strings =
861 VarBinViewArray::from_iter_str(["hello", "goodbye", "hello", "bonjour", "你好"])
862 .into_array();
863
864 let array = SparseArray::try_new(
865 buffer![0u16, 3, 4, 5, 8].into_array(),
866 strings,
867 9,
868 Scalar::from("123".to_owned()),
869 )
870 .unwrap();
871
872 let actual = array.to_varbinview().unwrap().into_array();
873 let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
874 Some("hello"),
875 Some("123"),
876 Some("123"),
877 Some("goodbye"),
878 Some("hello"),
879 Some("bonjour"),
880 Some("123"),
881 Some("123"),
882 Some("你好"),
883 ])
884 .into_array();
885
886 let actual = actual.into_arrow_preferred().unwrap();
887 let expected = expected.into_arrow_preferred().unwrap();
888
889 assert_eq!(actual.data_type(), expected.data_type());
890 assert_eq!(&actual, &expected);
891 }
892
893 #[test]
894 fn test_sparse_utf8_varbin_null_fill() {
895 let strings = <VarBinArray as FromIterator<_>>::from_iter([
896 Some("hello"),
897 Some("goodbye"),
898 Some("hello"),
899 None,
900 Some("bonjour"),
901 Some("你好"),
902 None,
903 ])
904 .into_array();
905
906 let array = SparseArray::try_new(
907 buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
908 strings,
909 12,
910 Scalar::null(DType::Utf8(Nullable)),
911 )
912 .unwrap();
913
914 let actual = array.to_varbinview().unwrap().into_array();
915 let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
916 Some("hello"),
917 None,
918 None,
919 Some("goodbye"),
920 Some("hello"),
921 None,
922 None,
923 Some("bonjour"),
924 None,
925 Some("你好"),
926 None,
927 None,
928 ])
929 .into_array();
930
931 let actual = actual.into_arrow_preferred().unwrap();
932 let expected = expected.into_arrow_preferred().unwrap();
933
934 assert_eq!(actual.data_type(), expected.data_type());
935 assert_eq!(&actual, &expected);
936 }
937
938 #[test]
939 fn test_sparse_binary_varbinview_non_null_fill() {
940 let binaries = VarBinViewArray::from_iter_nullable_bin([
941 Some(b"hello" as &[u8]),
942 Some(b"goodbye"),
943 Some(b"hello"),
944 None,
945 Some(b"\x00"),
946 Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
947 None,
948 ])
949 .into_array();
950
951 let array = SparseArray::try_new(
952 buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
953 binaries,
954 12,
955 Scalar::from(Some(ByteBuffer::from(b"123".to_vec()))),
956 )
957 .unwrap();
958
959 let actual = array.to_varbinview().unwrap().into_array();
960 let expected = VarBinViewArray::from_iter_nullable_bin([
961 Some(b"hello" as &[u8]),
962 Some(b"123"),
963 Some(b"123"),
964 Some(b"goodbye"),
965 Some(b"hello"),
966 None,
967 Some(b"123"),
968 Some(b"\x00"),
969 Some(b"123"),
970 Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
971 None,
972 Some(b"123"),
973 ])
974 .into_array();
975
976 let actual = actual.into_arrow_preferred().unwrap();
977 let expected = expected.into_arrow_preferred().unwrap();
978
979 assert_eq!(actual.data_type(), expected.data_type());
980 assert_eq!(&actual, &expected);
981 }
982
983 #[test]
984 fn test_list_scalar_to_elements_array() {
985 let scalar = Scalar::from(Some(vec![1, 2, 3]));
986 let array = list_scalar_to_elements_array(scalar.as_list()).unwrap();
987 assert_eq!(
988 array.unwrap().display_values().to_string(),
989 "[1i32, 2i32, 3i32]"
990 );
991
992 let scalar = Scalar::null_typed::<Vec<i32>>();
993 let array = list_scalar_to_elements_array(scalar.as_list()).unwrap();
994 assert!(array.is_none());
995 }
996
997 #[test]
998 fn test_list_scalar_to_singleton_list_array() {
999 let scalar = Scalar::from(Some(vec![1, 2, 3]));
1000 let array = list_scalar_to_singleton_list_array(scalar.as_list()).unwrap();
1001 assert!(array.is_some());
1002 let array = array.unwrap();
1003 assert_eq!(array.scalar_at(0), scalar);
1004 assert_eq!(array.len(), 1);
1005
1006 let scalar = Scalar::null_typed::<Vec<i32>>();
1007 let array = list_scalar_to_singleton_list_array(scalar.as_list()).unwrap();
1008 assert!(array.is_none());
1009 }
1010
1011 #[test]
1012 fn test_sparse_list_null_fill() {
1013 let elements = buffer![1i32, 2, 1, 2].into_array();
1014 let offsets = buffer![0u32, 1, 2, 3, 4].into_array();
1015 let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1016 .unwrap()
1017 .into_array();
1018
1019 let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
1020 let fill_value = Scalar::null(lists.dtype().clone());
1021 let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
1022 .unwrap()
1023 .into_array();
1024
1025 let actual = sparse.to_canonical().unwrap().into_array();
1026 let expected = ListArray::try_new(
1027 buffer![1i32, 2, 1, 2].into_array(),
1028 buffer![0u32, 1, 1, 1, 2, 3, 4].into_array(),
1029 Validity::Array(
1030 BoolArray::from_iter([true, false, false, true, true, true]).into_array(),
1031 ),
1032 )
1033 .unwrap()
1034 .into_array();
1035
1036 let actual = actual.into_arrow_preferred().unwrap();
1037 let expected = expected.into_arrow_preferred().unwrap();
1038
1039 assert_eq!(actual.data_type(), expected.data_type());
1040 assert_eq!(&actual, &expected);
1041 }
1042
1043 #[test]
1044 fn test_sparse_list_null_fill_sliced_sparse_values() {
1045 let elements = buffer![1i32, 2, 1, 2, 1, 2, 1, 2].into_array();
1046 let offsets = buffer![0u32, 1, 2, 3, 4, 5, 6, 7, 8].into_array();
1047 let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1048 .unwrap()
1049 .into_array();
1050 let lists = lists.slice(2, 6);
1051
1052 let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
1053 let fill_value = Scalar::null(lists.dtype().clone());
1054 let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
1055 .unwrap()
1056 .into_array();
1057
1058 let actual = sparse.to_canonical().unwrap().into_array();
1059 let expected = ListArray::try_new(
1060 buffer![1i32, 2, 1, 2].into_array(),
1061 buffer![0u32, 1, 1, 1, 2, 3, 4].into_array(),
1062 Validity::Array(
1063 BoolArray::from_iter([true, false, false, true, true, true]).into_array(),
1064 ),
1065 )
1066 .unwrap()
1067 .into_array();
1068
1069 let actual = actual.into_arrow_preferred().unwrap();
1070 let expected = expected.into_arrow_preferred().unwrap();
1071
1072 assert_eq!(actual.data_type(), expected.data_type());
1073 assert_eq!(&actual, &expected);
1074 }
1075
1076 #[test]
1077 fn test_sparse_list_non_null_fill() {
1078 let elements = buffer![1i32, 2, 1, 2].into_array();
1079 let offsets = buffer![0u32, 1, 2, 3, 4].into_array();
1080 let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1081 .unwrap()
1082 .into_array();
1083
1084 let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
1085 let fill_value = Scalar::from(Some(vec![5i32, 6, 7, 8]));
1086 let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
1087 .unwrap()
1088 .into_array();
1089
1090 let actual = sparse.to_canonical().unwrap().into_array();
1091 let expected = ListArray::try_new(
1092 buffer![1i32, 5, 6, 7, 8, 5, 6, 7, 8, 2, 1, 2].into_array(),
1093 buffer![0u32, 1, 5, 9, 10, 11, 12].into_array(),
1094 Validity::AllValid,
1095 )
1096 .unwrap()
1097 .into_array();
1098
1099 let actual = actual.into_arrow_preferred().unwrap();
1100 let expected = expected.into_arrow_preferred().unwrap();
1101
1102 assert_eq!(actual.data_type(), expected.data_type());
1103 assert_eq!(&actual, &expected);
1104 }
1105
1106 #[test]
1107 fn test_sparse_binary_varbin_null_fill() {
1108 let strings = <VarBinArray as FromIterator<_>>::from_iter([
1109 Some(b"hello" as &[u8]),
1110 Some(b"goodbye"),
1111 Some(b"hello"),
1112 None,
1113 Some(b"\x00"),
1114 Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
1115 None,
1116 ])
1117 .into_array();
1118
1119 let array = SparseArray::try_new(
1120 buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
1121 strings,
1122 12,
1123 Scalar::null(DType::Binary(Nullable)),
1124 )
1125 .unwrap();
1126
1127 let actual = array.to_varbinview().unwrap().into_array();
1128 let expected = VarBinViewArray::from_iter_nullable_bin([
1129 Some(b"hello" as &[u8]),
1130 None,
1131 None,
1132 Some(b"goodbye"),
1133 Some(b"hello"),
1134 None,
1135 None,
1136 Some(b"\x00"),
1137 None,
1138 Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
1139 None,
1140 None,
1141 ])
1142 .into_array();
1143 let actual = actual.into_arrow_preferred().unwrap();
1144 let expected = expected.into_arrow_preferred().unwrap();
1145
1146 assert_eq!(actual.data_type(), expected.data_type());
1147 assert_eq!(&actual, &expected);
1148 }
1149
1150 #[test]
1151 fn test_sparse_list_grows_offset_type() {
1152 let elements = buffer![1i32, 2, 1, 2].into_array();
1153 let offsets = buffer![0u8, 1, 2, 3, 4].into_array();
1154 let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1155 .unwrap()
1156 .into_array();
1157
1158 let indices = buffer![0u8, 1u8, 2u8, 3u8].into_array();
1159 let fill_value = Scalar::from(Some(vec![42i32; 252])); let sparse = SparseArray::try_new(indices, lists, 5, fill_value)
1161 .unwrap()
1162 .into_array();
1163
1164 let actual = sparse.to_canonical().unwrap().into_array();
1165 let mut expected_elements = buffer_mut![1, 2, 1, 2];
1166 expected_elements.extend(buffer![42i32; 252]);
1167 let expected = ListArray::try_new(
1168 expected_elements.freeze().into_array(),
1169 buffer![0u16, 1, 2, 3, 4, 256].into_array(),
1170 Validity::AllValid,
1171 )
1172 .unwrap()
1173 .into_array();
1174
1175 assert_eq!(
1176 actual.to_list().unwrap().offsets().dtype(),
1177 &DType::Primitive(PType::U16, NonNullable)
1178 );
1179
1180 let actual = actual.into_arrow_preferred().unwrap();
1181 let expected = expected.into_arrow_preferred().unwrap();
1182
1183 assert_eq!(actual.data_type(), expected.data_type());
1184 assert_eq!(&actual, &expected);
1185 }
1186}