1use std::sync::Arc;
5
6use itertools::Itertools;
7use num_traits::NumCast;
8use vortex_array::arrays::{
9 BinaryView, BoolArray, BooleanBuffer, ConstantArray, ListArray, NullArray, OffsetPType,
10 PrimitiveArray, StructArray, VarBinViewArray, smallest_storage_type,
11};
12use vortex_array::builders::{
13 ArrayBuilder as _, ArrayBuilderExt, DecimalBuilder, ListBuilder, builder_with_capacity,
14};
15use vortex_array::patches::Patches;
16use vortex_array::validity::Validity;
17use vortex_array::vtable::CanonicalVTable;
18use vortex_array::{Array, ArrayRef, Canonical, IntoArray as _, ToCanonical as _};
19use vortex_buffer::{Buffer, BufferMut, BufferString, ByteBuffer, buffer, buffer_mut};
20use vortex_dtype::{
21 DType, DecimalDType, NativePType, Nullability, StructFields, match_each_integer_ptype,
22 match_each_native_ptype,
23};
24use vortex_error::{VortexError, VortexExpect as _, VortexResult, vortex_err};
25use vortex_scalar::{
26 DecimalScalar, ListScalar, NativeDecimalType, Scalar, StructScalar,
27 match_each_decimal_value_type,
28};
29
30use crate::{SparseArray, SparseVTable};
31
32impl CanonicalVTable<SparseVTable> for SparseVTable {
33 fn canonicalize(array: &SparseArray) -> VortexResult<Canonical> {
34 if array.patches().num_patches() == 0 {
35 return ConstantArray::new(array.fill_scalar().clone(), array.len()).to_canonical();
36 }
37
38 match array.dtype() {
39 DType::Null => {
40 assert!(array.fill_scalar().is_null());
41 Ok(Canonical::Null(NullArray::new(array.len())))
42 }
43 DType::Bool(..) => {
44 let resolved_patches = array.resolved_patches()?;
45 canonicalize_sparse_bools(&resolved_patches, array.fill_scalar())
46 }
47 DType::Primitive(ptype, ..) => {
48 let resolved_patches = array.resolved_patches()?;
49 match_each_native_ptype!(ptype, |P| {
50 canonicalize_sparse_primitives::<P>(&resolved_patches, array.fill_scalar())
51 })
52 }
53 DType::Struct(struct_fields, ..) => canonicalize_sparse_struct(
54 struct_fields,
55 array.fill_scalar().as_struct(),
56 array.dtype(),
57 array.patches(),
58 array.len(),
59 ),
60 DType::Decimal(decimal_dtype, nullability) => {
61 let canonical_decimal_value_type = smallest_storage_type(decimal_dtype);
62 let fill_value = array.fill_scalar().as_decimal();
63 match_each_decimal_value_type!(canonical_decimal_value_type, |D| {
64 canonicalize_sparse_decimal::<D>(
65 *decimal_dtype,
66 *nullability,
67 fill_value,
68 array.patches(),
69 array.len(),
70 )
71 })
72 }
73 dtype @ DType::Utf8(..) => {
74 let fill_value = array.fill_scalar().as_utf8().value();
75 let fill_value = fill_value.map(BufferString::into_inner);
76 canonicalize_varbin(array, dtype.clone(), fill_value)
77 }
78 dtype @ DType::Binary(..) => {
79 let fill_value = array.fill_scalar().as_binary().value();
80 canonicalize_varbin(array, dtype.clone(), fill_value)
81 }
82 DType::List(values_dtype, nullability) => {
83 let resolved_patches = array.resolved_patches()?;
84 canonicalize_sparse_lists(
85 array,
86 resolved_patches,
87 values_dtype.clone(),
88 *nullability,
89 )
90 }
91 DType::FixedSizeList(..) => {
92 unimplemented!("TODO(connor)[FixedSizeList]")
93 }
94 DType::Extension(_ext_dtype) => todo!(),
95 }
96 }
97}
98
99fn list_scalar_to_elements_array(scalar: ListScalar) -> VortexResult<Option<ArrayRef>> {
101 let Some(elements) = scalar.elements() else {
102 return Ok(None);
103 };
104
105 let mut builder = builder_with_capacity(scalar.element_dtype(), scalar.len());
106 for s in elements {
107 builder.append_scalar(&s)?;
108 }
109 Ok(Some(builder.finish()))
110}
111
112fn list_scalar_to_singleton_list_array(scalar: ListScalar) -> VortexResult<Option<ArrayRef>> {
114 let nullability = scalar.dtype().nullability();
115 let Some(elements) = list_scalar_to_elements_array(scalar)? else {
116 return Ok(None);
117 };
118
119 let validity = match nullability {
120 Nullability::NonNullable => Validity::NonNullable,
121 Nullability::Nullable => Validity::AllValid,
122 };
123
124 let n = elements.len();
125 ListArray::try_new(elements, buffer![0_u64, n as u64].into_array(), validity)
126 .map(|x| Some(x.into_array()))
127}
128
129#[allow(clippy::cognitive_complexity)]
130fn canonicalize_sparse_lists(
131 array: &SparseArray,
132 resolved_patches: Patches,
133 values_dtype: Arc<DType>,
134 nullability: Nullability,
135) -> VortexResult<Canonical> {
136 macro_rules! match_smallest_offset_type {
137 ($n_elements:expr, | $offset_type:ident | $body:block) => {{
138 let n_elements = $n_elements;
139 if n_elements <= u8::MAX as usize {
140 type $offset_type = u8;
141 $body
142 } else if n_elements <= u16::MAX as usize {
143 type $offset_type = u16;
144 $body
145 } else if n_elements <= u32::MAX as usize {
146 type $offset_type = u32;
147 $body
148 } else {
149 assert!(u64::try_from(n_elements).is_ok());
150 type $offset_type = u64;
151 $body
152 }
153 }};
154 }
155
156 let indices = resolved_patches.indices().to_primitive()?;
157 let values = resolved_patches.values().to_list()?;
158 let fill_value = array.fill_scalar().as_list();
159
160 let n_filled = array.len() - resolved_patches.num_patches();
161 let total_canonical_values = values.elements().len() + fill_value.len() * n_filled;
162
163 let validity = array
164 .validity_mask()
165 .map(|x| Validity::from_mask(x, nullability))?;
166
167 match_each_integer_ptype!(indices.ptype(), |I| {
168 match_smallest_offset_type!(total_canonical_values, |O| {
169 canonicalize_sparse_lists_inner::<I, O>(
170 indices.as_slice(),
171 values,
172 fill_value,
173 values_dtype,
174 array.len(),
175 total_canonical_values,
176 validity,
177 )
178 })
179 })
180}
181
182fn canonicalize_sparse_lists_inner<I: NativePType, SmallestViableOffsetType: OffsetPType>(
183 indices: &[I],
184 values: ListArray,
185 fill_value: ListScalar,
186 values_dtype: Arc<DType>,
187 len: usize,
188 total_canonical_values: usize,
189 validity: Validity,
190) -> VortexResult<Canonical> {
191 let Some(fill_value_array) = list_scalar_to_singleton_list_array(fill_value)? else {
192 let sparse_list_elements = values.elements().clone();
193 let sparse_list_offsets = values.offsets().to_primitive()?;
194 match_each_integer_ptype!(sparse_list_offsets.ptype(), |SparseValuesOffsetType| {
195 let sparse_list_offsets = sparse_list_offsets.as_slice::<SparseValuesOffsetType>();
196 return canonicalize_sparse_lists_inner_with_null_fill_value(
200 indices,
201 sparse_list_elements,
202 sparse_list_offsets,
203 len,
204 validity,
205 );
206 });
207 };
208
209 let mut builder = ListBuilder::<SmallestViableOffsetType>::with_values_and_index_capacity(
210 values_dtype,
211 validity.nullability(),
212 total_canonical_values,
213 len + 1,
214 );
215 let mut next_index = 0_usize;
216 let enumerated_indices_usize = indices
217 .iter()
218 .map(|x| (*x).to_usize().vortex_expect("index must fit in usize"))
219 .enumerate();
220 for (patch_values_index, next_patched_index) in enumerated_indices_usize {
221 for _ in next_index..next_patched_index {
222 builder.extend_from_array(&fill_value_array)?;
223 }
224 builder.extend_from_array(&values.slice(patch_values_index, patch_values_index + 1))?;
225 next_index = next_patched_index + 1;
226 }
227
228 for _ in next_index..len {
229 builder.extend_from_array(&fill_value_array)?;
230 }
231
232 builder.finish().to_canonical()
233}
234
235fn canonicalize_sparse_lists_inner_with_null_fill_value<I: NativePType, O: OffsetPType>(
236 indices: &[I],
237 elements: ArrayRef,
238 offsets: &[O],
239 len: usize,
240 validity: Validity,
241) -> VortexResult<Canonical> {
242 assert!(indices.len() < len + 1);
243 let mut dense_offsets = BufferMut::with_capacity(len + 1);
244
245 dense_offsets.push(offsets[0]);
247 let mut dense_last_set_index = 0_usize;
248 for (sparse_start_index, dense_start_index) in indices.iter().enumerate() {
249 let sparse_end_index = sparse_start_index + 1;
250 let dense_start_index = (*dense_start_index)
251 .to_usize()
252 .vortex_expect("index must fit in usize");
253 let dense_end_index = dense_start_index + 1;
254
255 for _ in (dense_last_set_index + 1)..dense_end_index {
256 dense_offsets.push(dense_offsets[dense_last_set_index]);
258 }
259 dense_offsets.push(offsets[sparse_end_index]);
260 dense_last_set_index = dense_end_index;
261 }
262 for _ in (dense_last_set_index + 1)..len {
263 dense_offsets.push(dense_offsets[dense_last_set_index]);
265 }
266 let array = ListArray::try_new(elements, dense_offsets.into_array(), validity)?;
267 Ok(Canonical::List(array))
268}
269
270fn canonicalize_sparse_bools(patches: &Patches, fill_value: &Scalar) -> VortexResult<Canonical> {
271 let (fill_bool, validity) = if fill_value.is_null() {
272 (false, Validity::AllInvalid)
273 } else {
274 (
275 fill_value.try_into()?,
276 if patches.dtype().nullability() == Nullability::NonNullable {
277 Validity::NonNullable
278 } else {
279 Validity::AllValid
280 },
281 )
282 };
283
284 let bools = BoolArray::new(
285 if fill_bool {
286 BooleanBuffer::new_set(patches.array_len())
287 } else {
288 BooleanBuffer::new_unset(patches.array_len())
289 },
290 validity,
291 );
292
293 bools.patch(patches).map(Canonical::Bool)
294}
295
296fn canonicalize_sparse_primitives<
297 T: NativePType + for<'a> TryFrom<&'a Scalar, Error = VortexError>,
298>(
299 patches: &Patches,
300 fill_value: &Scalar,
301) -> VortexResult<Canonical> {
302 let (primitive_fill, validity) = if fill_value.is_null() {
303 (T::default(), Validity::AllInvalid)
304 } else {
305 (
306 fill_value.try_into()?,
307 if patches.dtype().nullability() == Nullability::NonNullable {
308 Validity::NonNullable
309 } else {
310 Validity::AllValid
311 },
312 )
313 };
314
315 let parray = PrimitiveArray::new(buffer![primitive_fill; patches.array_len()], validity);
316
317 parray.patch(patches).map(Canonical::Primitive)
318}
319
320fn canonicalize_sparse_struct(
321 struct_fields: &StructFields,
322 fill_struct: StructScalar,
323 dtype: &DType,
324 unresolved_patches: &Patches,
326 len: usize,
327) -> VortexResult<Canonical> {
328 let (fill_values, top_level_fill_validity) = match fill_struct.fields() {
329 Some(fill_values) => (fill_values, Validity::AllValid),
330 None => (
331 struct_fields.fields().map(Scalar::default_value).collect(),
332 Validity::AllInvalid,
333 ),
334 };
335 let patch_values_as_struct = unresolved_patches.values().to_canonical()?.into_struct()?;
336 let columns_patch_values = patch_values_as_struct.fields();
337 let names = patch_values_as_struct.names();
338 let validity = if dtype.is_nullable() {
339 top_level_fill_validity.patch(
340 len,
341 unresolved_patches.offset(),
342 unresolved_patches.indices(),
343 &Validity::from_mask(
344 unresolved_patches.values().validity_mask()?,
345 Nullability::Nullable,
346 ),
347 )?
348 } else {
349 top_level_fill_validity
350 .into_non_nullable()
351 .ok_or_else(|| vortex_err!("fill validity should match sparse array nullability"))?
352 };
353
354 columns_patch_values
355 .iter()
356 .cloned()
357 .zip_eq(fill_values.into_iter())
358 .map(|(patch_values, fill_value)| -> VortexResult<_> {
359 SparseArray::try_new_from_patches(
360 unresolved_patches
361 .clone()
362 .map_values(|_| Ok(patch_values))?,
363 fill_value,
364 )
365 })
366 .process_results(|sparse_columns| {
367 StructArray::try_from_iter_with_validity(names.iter().zip_eq(sparse_columns), validity)
368 .map(Canonical::Struct)
369 })?
370}
371
372fn canonicalize_sparse_decimal<D: NativeDecimalType>(
373 decimal_dtype: DecimalDType,
374 nullability: Nullability,
375 fill_value: DecimalScalar,
376 patches: &Patches,
377 len: usize,
378) -> VortexResult<Canonical> {
379 let mut builder = DecimalBuilder::with_capacity::<D>(len, decimal_dtype, nullability);
380 match fill_value.decimal_value() {
381 Some(fill_value) => {
382 let fill_value = fill_value
383 .cast::<D>()
384 .vortex_expect("unexpected value type");
385 for _ in 0..len {
386 builder.append_value(fill_value)
387 }
388 }
389 None => {
390 builder.append_nulls(len);
391 }
392 }
393 let filled_array = builder.finish_into_decimal();
394 let array = filled_array.patch(patches)?;
395 Ok(Canonical::Decimal(array))
396}
397
398fn canonicalize_varbin(
399 array: &SparseArray,
400 dtype: DType,
401 fill_value: Option<ByteBuffer>,
402) -> VortexResult<Canonical> {
403 let patches = array.resolved_patches()?;
404 let indices = patches.indices().to_primitive()?;
405 let values = patches.values().to_varbinview()?;
406 let validity = array
407 .validity_mask()
408 .map(|x| Validity::from_mask(x, dtype.nullability()))?;
409 let len = array.len();
410
411 match_each_integer_ptype!(indices.ptype(), |I| {
412 let indices = indices.buffer::<I>();
413 canonicalize_varbin_inner::<I>(fill_value, indices, values, dtype, validity, len)
414 })
415}
416
417fn canonicalize_varbin_inner<I: NativePType>(
418 fill_value: Option<ByteBuffer>,
419 indices: Buffer<I>,
420 values: VarBinViewArray,
421 dtype: DType,
422 validity: Validity,
423 len: usize,
424) -> VortexResult<Canonical> {
425 assert_eq!(dtype.nullability(), validity.nullability());
426
427 let n_patch_buffers = values.buffers().len();
428 let mut buffers = values.buffers().to_vec();
429
430 let fill = if let Some(buffer) = &fill_value {
431 buffers.push(buffer.clone());
432 BinaryView::make_view(
433 buffer.as_ref(),
434 u32::try_from(n_patch_buffers).vortex_expect("too many buffers"),
435 0,
436 )
437 } else {
438 BinaryView::make_view(&[], 0, 0)
440 };
441
442 let mut views = buffer_mut![fill; len];
443 for (patch_index, &patch) in indices.into_iter().zip_eq(values.views().iter()) {
444 let patch_index_usize = <usize as NumCast>::from(patch_index)
445 .vortex_expect("var bin view indices must fit in usize");
446 views[patch_index_usize] = patch;
447 }
448
449 let array = unsafe {
451 VarBinViewArray::new_unchecked(views.freeze(), Arc::from(buffers), dtype, validity)
452 };
453
454 Ok(Canonical::VarBinView(array))
455}
456
457#[cfg(test)]
458mod test {
459 use rstest::rstest;
460 use vortex_array::arrays::{
461 BoolArray, BooleanBufferBuilder, DecimalArray, ListArray, PrimitiveArray, StructArray,
462 VarBinArray, VarBinViewArray,
463 };
464 use vortex_array::arrow::IntoArrowArray as _;
465 use vortex_array::validity::Validity;
466 use vortex_array::vtable::ValidityHelper;
467 use vortex_array::{IntoArray, ToCanonical};
468 use vortex_buffer::{ByteBuffer, buffer, buffer_mut};
469 use vortex_dtype::Nullability::{NonNullable, Nullable};
470 use vortex_dtype::{DType, DecimalDType, FieldNames, PType, StructFields};
471 use vortex_mask::Mask;
472 use vortex_scalar::{DecimalValue, Scalar};
473
474 use crate::SparseArray;
475 use crate::canonical::{list_scalar_to_elements_array, list_scalar_to_singleton_list_array};
476
477 #[rstest]
478 #[case(Some(true))]
479 #[case(Some(false))]
480 #[case(None)]
481 fn test_sparse_bool(#[case] fill_value: Option<bool>) {
482 let indices = buffer![0u64, 1, 7].into_array();
483 let values = bool_array_from_nullable_vec(vec![Some(true), None, Some(false)], fill_value)
484 .into_array();
485 let sparse_bools =
486 SparseArray::try_new(indices, values, 10, Scalar::from(fill_value)).unwrap();
487 assert_eq!(sparse_bools.dtype(), &DType::Bool(Nullable));
488
489 let flat_bools = sparse_bools.to_bool().unwrap();
490 let expected = bool_array_from_nullable_vec(
491 vec![
492 Some(true),
493 None,
494 fill_value,
495 fill_value,
496 fill_value,
497 fill_value,
498 fill_value,
499 Some(false),
500 fill_value,
501 fill_value,
502 ],
503 fill_value,
504 );
505
506 assert_eq!(flat_bools.boolean_buffer(), expected.boolean_buffer());
507 assert_eq!(flat_bools.validity(), expected.validity());
508
509 assert!(flat_bools.boolean_buffer().value(0));
510 assert!(flat_bools.validity().is_valid(0).unwrap());
511 assert_eq!(
512 flat_bools.boolean_buffer().value(1),
513 fill_value.unwrap_or_default()
514 );
515 assert!(!flat_bools.validity().is_valid(1).unwrap());
516 assert_eq!(
517 flat_bools.validity().is_valid(2).unwrap(),
518 fill_value.is_some()
519 );
520 assert!(!flat_bools.boolean_buffer().value(7));
521 assert!(flat_bools.validity().is_valid(7).unwrap());
522 }
523
524 fn bool_array_from_nullable_vec(
525 bools: Vec<Option<bool>>,
526 fill_value: Option<bool>,
527 ) -> BoolArray {
528 let mut buffer = BooleanBufferBuilder::new(bools.len());
529 let mut validity = BooleanBufferBuilder::new(bools.len());
530 for maybe_bool in bools {
531 buffer.append(maybe_bool.unwrap_or_else(|| fill_value.unwrap_or_default()));
532 validity.append(maybe_bool.is_some());
533 }
534 BoolArray::new(buffer.finish(), Validity::from(validity.finish()))
535 }
536
537 #[rstest]
538 #[case(Some(0i32))]
539 #[case(Some(-1i32))]
540 #[case(None)]
541 fn test_sparse_primitive(#[case] fill_value: Option<i32>) {
542 let indices = buffer![0u64, 1, 7].into_array();
543 let values = PrimitiveArray::from_option_iter([Some(0i32), None, Some(1)]).into_array();
544 let sparse_ints =
545 SparseArray::try_new(indices, values, 10, Scalar::from(fill_value)).unwrap();
546 assert_eq!(*sparse_ints.dtype(), DType::Primitive(PType::I32, Nullable));
547
548 let flat_ints = sparse_ints.to_primitive().unwrap();
549 let expected = PrimitiveArray::from_option_iter([
550 Some(0i32),
551 None,
552 fill_value,
553 fill_value,
554 fill_value,
555 fill_value,
556 fill_value,
557 Some(1),
558 fill_value,
559 fill_value,
560 ]);
561
562 assert_eq!(flat_ints.byte_buffer(), expected.byte_buffer());
563 assert_eq!(flat_ints.validity(), expected.validity());
564
565 assert_eq!(flat_ints.as_slice::<i32>()[0], 0);
566 assert!(flat_ints.validity().is_valid(0).unwrap());
567 assert_eq!(flat_ints.as_slice::<i32>()[1], 0);
568 assert!(!flat_ints.validity().is_valid(1).unwrap());
569 assert_eq!(
570 flat_ints.as_slice::<i32>()[2],
571 fill_value.unwrap_or_default()
572 );
573 assert_eq!(
574 flat_ints.validity().is_valid(2).unwrap(),
575 fill_value.is_some()
576 );
577 assert_eq!(flat_ints.as_slice::<i32>()[7], 1);
578 assert!(flat_ints.validity().is_valid(7).unwrap());
579 }
580
581 #[test]
582 fn test_sparse_struct_valid_fill() {
583 let field_names = FieldNames::from_iter(["a", "b"]);
584 let field_types = vec![
585 DType::Primitive(PType::I32, Nullable),
586 DType::Primitive(PType::I32, Nullable),
587 ];
588 let struct_fields = StructFields::new(field_names, field_types);
589 let struct_dtype = DType::Struct(struct_fields.clone(), Nullable);
590
591 let indices = buffer![0u64, 1, 7, 8].into_array();
592 let patch_values_a =
593 PrimitiveArray::from_option_iter([Some(10i32), None, Some(20), Some(30)]).into_array();
594 let patch_values_b =
595 PrimitiveArray::from_option_iter([Some(1i32), Some(2), None, Some(3)]).into_array();
596 let patch_values = StructArray::try_new_with_dtype(
597 vec![patch_values_a, patch_values_b],
598 struct_fields.clone(),
599 4,
600 Validity::Array(
601 BoolArray::from_indices(4, vec![0, 1, 2], Validity::NonNullable).to_array(),
602 ),
603 )
604 .unwrap()
605 .into_array();
606
607 let fill_scalar = Scalar::struct_(
608 struct_dtype,
609 vec![Scalar::from(Some(-10i32)), Scalar::from(Some(-1i32))],
610 );
611 let len = 10;
612 let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
613
614 let expected_a = PrimitiveArray::from_option_iter((0..len).map(|i| {
615 if i == 0 {
616 Some(10)
617 } else if i == 1 {
618 None
619 } else if i == 7 {
620 Some(20)
621 } else {
622 Some(-10)
623 }
624 }));
625 let expected_b = PrimitiveArray::from_option_iter((0..len).map(|i| {
626 if i == 0 {
627 Some(1i32)
628 } else if i == 1 {
629 Some(2)
630 } else if i == 7 {
631 None
632 } else {
633 Some(-1)
634 }
635 }));
636
637 let expected = StructArray::try_new_with_dtype(
638 vec![expected_a.into_array(), expected_b.into_array()],
639 struct_fields,
640 len,
641 Validity::from_mask(Mask::from_excluded_indices(10, vec![8]), Nullable),
643 )
644 .unwrap()
645 .to_array()
646 .into_arrow_preferred()
647 .unwrap();
648
649 let actual = sparse_struct
650 .to_struct()
651 .unwrap()
652 .to_array()
653 .into_arrow_preferred()
654 .unwrap();
655
656 assert_eq!(expected.data_type(), actual.data_type());
657 assert_eq!(&expected, &actual);
658 }
659
660 #[test]
661 fn test_sparse_struct_invalid_fill() {
662 let field_names = FieldNames::from_iter(["a", "b"]);
663 let field_types = vec![
664 DType::Primitive(PType::I32, Nullable),
665 DType::Primitive(PType::I32, Nullable),
666 ];
667 let struct_fields = StructFields::new(field_names, field_types);
668 let struct_dtype = DType::Struct(struct_fields.clone(), Nullable);
669
670 let indices = buffer![0u64, 1, 7, 8].into_array();
671 let patch_values_a =
672 PrimitiveArray::from_option_iter([Some(10i32), None, Some(20), Some(30)]).into_array();
673 let patch_values_b =
674 PrimitiveArray::from_option_iter([Some(1i32), Some(2), None, Some(3)]).into_array();
675 let patch_values = StructArray::try_new_with_dtype(
676 vec![patch_values_a, patch_values_b],
677 struct_fields.clone(),
678 4,
679 Validity::Array(
680 BoolArray::from_indices(4, vec![0, 1, 2], Validity::NonNullable).to_array(),
681 ),
682 )
683 .unwrap()
684 .into_array();
685
686 let fill_scalar = Scalar::null(struct_dtype);
687 let len = 10;
688 let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
689
690 let expected_a = PrimitiveArray::from_option_iter((0..len).map(|i| {
691 if i == 0 {
692 Some(10)
693 } else if i == 1 {
694 None
695 } else if i == 7 {
696 Some(20)
697 } else {
698 Some(-10)
699 }
700 }));
701 let expected_b = PrimitiveArray::from_option_iter((0..len).map(|i| {
702 if i == 0 {
703 Some(1i32)
704 } else if i == 1 {
705 Some(2)
706 } else if i == 7 {
707 None
708 } else {
709 Some(-1)
710 }
711 }));
712
713 let expected = StructArray::try_new_with_dtype(
714 vec![expected_a.into_array(), expected_b.into_array()],
715 struct_fields,
716 len,
717 Validity::from_mask(Mask::from_indices(10, vec![0, 1, 7]), Nullable),
719 )
720 .unwrap()
721 .to_array()
722 .into_arrow_preferred()
723 .unwrap();
724
725 let actual = sparse_struct
726 .to_struct()
727 .unwrap()
728 .to_array()
729 .into_arrow_preferred()
730 .unwrap();
731
732 assert_eq!(expected.data_type(), actual.data_type());
733 assert_eq!(&expected, &actual);
734 }
735
736 #[test]
737 fn test_sparse_decimal() {
738 let indices = buffer![0u32, 1u32, 7u32, 8u32].into_array();
739 let decimal_dtype = DecimalDType::new(3, 2);
740 let patch_values = DecimalArray::new(
741 buffer![100i128, 200i128, 300i128, 4000i128],
742 decimal_dtype,
743 Validity::from_iter([true, true, true, false]),
744 )
745 .to_array();
746 let len = 10;
747 let fill_scalar = Scalar::decimal(DecimalValue::I32(123), decimal_dtype, Nullable);
748 let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
749
750 let expected = DecimalArray::new(
751 buffer![100i128, 200, 123, 123, 123, 123, 123, 300, 4000, 123],
752 decimal_dtype,
753 Validity::from_mask(Mask::from_excluded_indices(10, vec![8]), Nullable),
755 )
756 .to_array()
757 .into_arrow_preferred()
758 .unwrap();
759
760 let actual = sparse_struct
761 .to_decimal()
762 .unwrap()
763 .to_array()
764 .into_arrow_preferred()
765 .unwrap();
766
767 assert_eq!(expected.data_type(), actual.data_type());
768 assert_eq!(&expected, &actual);
769 }
770
771 #[test]
772 fn test_sparse_utf8_varbinview_non_null_fill() {
773 let strings = <VarBinViewArray as FromIterator<_>>::from_iter([
774 Some("hello"),
775 Some("goodbye"),
776 Some("hello"),
777 None,
778 Some("bonjour"),
779 Some("你好"),
780 None,
781 ])
782 .into_array();
783
784 let array = SparseArray::try_new(
785 buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
786 strings,
787 12,
788 Scalar::from(Some("123".to_owned())),
789 )
790 .unwrap();
791
792 let actual = array.to_varbinview().unwrap().into_array();
793 let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
794 Some("hello"),
795 Some("123"),
796 Some("123"),
797 Some("goodbye"),
798 Some("hello"),
799 None,
800 Some("123"),
801 Some("bonjour"),
802 Some("123"),
803 Some("你好"),
804 None,
805 Some("123"),
806 ])
807 .into_array();
808
809 let actual = actual.into_arrow_preferred().unwrap();
810 let expected = expected.into_arrow_preferred().unwrap();
811
812 assert_eq!(actual.data_type(), expected.data_type());
813 assert_eq!(&actual, &expected);
814 }
815
816 #[test]
817 fn test_sparse_utf8_varbinview_null_fill() {
818 let strings = <VarBinViewArray as FromIterator<_>>::from_iter([
819 Some("hello"),
820 Some("goodbye"),
821 Some("hello"),
822 None,
823 Some("bonjour"),
824 Some("你好"),
825 None,
826 ])
827 .into_array();
828
829 let array = SparseArray::try_new(
830 buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
831 strings,
832 12,
833 Scalar::null(DType::Utf8(Nullable)),
834 )
835 .unwrap();
836
837 let actual = array.to_varbinview().unwrap().into_array();
838 let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
839 Some("hello"),
840 None,
841 None,
842 Some("goodbye"),
843 Some("hello"),
844 None,
845 None,
846 Some("bonjour"),
847 None,
848 Some("你好"),
849 None,
850 None,
851 ])
852 .into_array();
853
854 let actual = actual.into_arrow_preferred().unwrap();
855 let expected = expected.into_arrow_preferred().unwrap();
856
857 assert_eq!(actual.data_type(), expected.data_type());
858 assert_eq!(&actual, &expected);
859 }
860
861 #[test]
862 fn test_sparse_utf8_varbinview_non_nullable() {
863 let strings =
864 VarBinViewArray::from_iter_str(["hello", "goodbye", "hello", "bonjour", "你好"])
865 .into_array();
866
867 let array = SparseArray::try_new(
868 buffer![0u16, 3, 4, 5, 8].into_array(),
869 strings,
870 9,
871 Scalar::from("123".to_owned()),
872 )
873 .unwrap();
874
875 let actual = array.to_varbinview().unwrap().into_array();
876 let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
877 Some("hello"),
878 Some("123"),
879 Some("123"),
880 Some("goodbye"),
881 Some("hello"),
882 Some("bonjour"),
883 Some("123"),
884 Some("123"),
885 Some("你好"),
886 ])
887 .into_array();
888
889 let actual = actual.into_arrow_preferred().unwrap();
890 let expected = expected.into_arrow_preferred().unwrap();
891
892 assert_eq!(actual.data_type(), expected.data_type());
893 assert_eq!(&actual, &expected);
894 }
895
896 #[test]
897 fn test_sparse_utf8_varbin_null_fill() {
898 let strings = <VarBinArray as FromIterator<_>>::from_iter([
899 Some("hello"),
900 Some("goodbye"),
901 Some("hello"),
902 None,
903 Some("bonjour"),
904 Some("你好"),
905 None,
906 ])
907 .into_array();
908
909 let array = SparseArray::try_new(
910 buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
911 strings,
912 12,
913 Scalar::null(DType::Utf8(Nullable)),
914 )
915 .unwrap();
916
917 let actual = array.to_varbinview().unwrap().into_array();
918 let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
919 Some("hello"),
920 None,
921 None,
922 Some("goodbye"),
923 Some("hello"),
924 None,
925 None,
926 Some("bonjour"),
927 None,
928 Some("你好"),
929 None,
930 None,
931 ])
932 .into_array();
933
934 let actual = actual.into_arrow_preferred().unwrap();
935 let expected = expected.into_arrow_preferred().unwrap();
936
937 assert_eq!(actual.data_type(), expected.data_type());
938 assert_eq!(&actual, &expected);
939 }
940
941 #[test]
942 fn test_sparse_binary_varbinview_non_null_fill() {
943 let binaries = VarBinViewArray::from_iter_nullable_bin([
944 Some(b"hello" as &[u8]),
945 Some(b"goodbye"),
946 Some(b"hello"),
947 None,
948 Some(b"\x00"),
949 Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
950 None,
951 ])
952 .into_array();
953
954 let array = SparseArray::try_new(
955 buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
956 binaries,
957 12,
958 Scalar::from(Some(ByteBuffer::from(b"123".to_vec()))),
959 )
960 .unwrap();
961
962 let actual = array.to_varbinview().unwrap().into_array();
963 let expected = VarBinViewArray::from_iter_nullable_bin([
964 Some(b"hello" as &[u8]),
965 Some(b"123"),
966 Some(b"123"),
967 Some(b"goodbye"),
968 Some(b"hello"),
969 None,
970 Some(b"123"),
971 Some(b"\x00"),
972 Some(b"123"),
973 Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
974 None,
975 Some(b"123"),
976 ])
977 .into_array();
978
979 let actual = actual.into_arrow_preferred().unwrap();
980 let expected = expected.into_arrow_preferred().unwrap();
981
982 assert_eq!(actual.data_type(), expected.data_type());
983 assert_eq!(&actual, &expected);
984 }
985
986 #[test]
987 fn test_list_scalar_to_elements_array() {
988 let scalar = Scalar::from(Some(vec![1, 2, 3]));
989 let array = list_scalar_to_elements_array(scalar.as_list()).unwrap();
990 assert_eq!(
991 array.unwrap().display_values().to_string(),
992 "[1i32, 2i32, 3i32]"
993 );
994
995 let scalar = Scalar::null_typed::<Vec<i32>>();
996 let array = list_scalar_to_elements_array(scalar.as_list()).unwrap();
997 assert!(array.is_none());
998 }
999
1000 #[test]
1001 fn test_list_scalar_to_singleton_list_array() {
1002 let scalar = Scalar::from(Some(vec![1, 2, 3]));
1003 let array = list_scalar_to_singleton_list_array(scalar.as_list()).unwrap();
1004 assert!(array.is_some());
1005 let array = array.unwrap();
1006 assert_eq!(array.scalar_at(0), scalar);
1007 assert_eq!(array.len(), 1);
1008
1009 let scalar = Scalar::null_typed::<Vec<i32>>();
1010 let array = list_scalar_to_singleton_list_array(scalar.as_list()).unwrap();
1011 assert!(array.is_none());
1012 }
1013
1014 #[test]
1015 fn test_sparse_list_null_fill() {
1016 let elements = buffer![1i32, 2, 1, 2].into_array();
1017 let offsets = buffer![0u32, 1, 2, 3, 4].into_array();
1018 let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1019 .unwrap()
1020 .into_array();
1021
1022 let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
1023 let fill_value = Scalar::null(lists.dtype().clone());
1024 let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
1025 .unwrap()
1026 .into_array();
1027
1028 let actual = sparse.to_canonical().unwrap().into_array();
1029 let expected = ListArray::try_new(
1030 buffer![1i32, 2, 1, 2].into_array(),
1031 buffer![0u32, 1, 1, 1, 2, 3, 4].into_array(),
1032 Validity::Array(
1033 BoolArray::from_iter([true, false, false, true, true, true]).into_array(),
1034 ),
1035 )
1036 .unwrap()
1037 .into_array();
1038
1039 let actual = actual.into_arrow_preferred().unwrap();
1040 let expected = expected.into_arrow_preferred().unwrap();
1041
1042 assert_eq!(actual.data_type(), expected.data_type());
1043 assert_eq!(&actual, &expected);
1044 }
1045
1046 #[test]
1047 fn test_sparse_list_null_fill_sliced_sparse_values() {
1048 let elements = buffer![1i32, 2, 1, 2, 1, 2, 1, 2].into_array();
1049 let offsets = buffer![0u32, 1, 2, 3, 4, 5, 6, 7, 8].into_array();
1050 let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1051 .unwrap()
1052 .into_array();
1053 let lists = lists.slice(2, 6);
1054
1055 let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
1056 let fill_value = Scalar::null(lists.dtype().clone());
1057 let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
1058 .unwrap()
1059 .into_array();
1060
1061 let actual = sparse.to_canonical().unwrap().into_array();
1062 let expected = ListArray::try_new(
1063 buffer![1i32, 2, 1, 2].into_array(),
1064 buffer![0u32, 1, 1, 1, 2, 3, 4].into_array(),
1065 Validity::Array(
1066 BoolArray::from_iter([true, false, false, true, true, true]).into_array(),
1067 ),
1068 )
1069 .unwrap()
1070 .into_array();
1071
1072 let actual = actual.into_arrow_preferred().unwrap();
1073 let expected = expected.into_arrow_preferred().unwrap();
1074
1075 assert_eq!(actual.data_type(), expected.data_type());
1076 assert_eq!(&actual, &expected);
1077 }
1078
1079 #[test]
1080 fn test_sparse_list_non_null_fill() {
1081 let elements = buffer![1i32, 2, 1, 2].into_array();
1082 let offsets = buffer![0u32, 1, 2, 3, 4].into_array();
1083 let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1084 .unwrap()
1085 .into_array();
1086
1087 let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
1088 let fill_value = Scalar::from(Some(vec![5i32, 6, 7, 8]));
1089 let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
1090 .unwrap()
1091 .into_array();
1092
1093 let actual = sparse.to_canonical().unwrap().into_array();
1094 let expected = ListArray::try_new(
1095 buffer![1i32, 5, 6, 7, 8, 5, 6, 7, 8, 2, 1, 2].into_array(),
1096 buffer![0u32, 1, 5, 9, 10, 11, 12].into_array(),
1097 Validity::AllValid,
1098 )
1099 .unwrap()
1100 .into_array();
1101
1102 let actual = actual.into_arrow_preferred().unwrap();
1103 let expected = expected.into_arrow_preferred().unwrap();
1104
1105 assert_eq!(actual.data_type(), expected.data_type());
1106 assert_eq!(&actual, &expected);
1107 }
1108
1109 #[test]
1110 fn test_sparse_binary_varbin_null_fill() {
1111 let strings = <VarBinArray as FromIterator<_>>::from_iter([
1112 Some(b"hello" as &[u8]),
1113 Some(b"goodbye"),
1114 Some(b"hello"),
1115 None,
1116 Some(b"\x00"),
1117 Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
1118 None,
1119 ])
1120 .into_array();
1121
1122 let array = SparseArray::try_new(
1123 buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
1124 strings,
1125 12,
1126 Scalar::null(DType::Binary(Nullable)),
1127 )
1128 .unwrap();
1129
1130 let actual = array.to_varbinview().unwrap().into_array();
1131 let expected = VarBinViewArray::from_iter_nullable_bin([
1132 Some(b"hello" as &[u8]),
1133 None,
1134 None,
1135 Some(b"goodbye"),
1136 Some(b"hello"),
1137 None,
1138 None,
1139 Some(b"\x00"),
1140 None,
1141 Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
1142 None,
1143 None,
1144 ])
1145 .into_array();
1146 let actual = actual.into_arrow_preferred().unwrap();
1147 let expected = expected.into_arrow_preferred().unwrap();
1148
1149 assert_eq!(actual.data_type(), expected.data_type());
1150 assert_eq!(&actual, &expected);
1151 }
1152
1153 #[test]
1154 fn test_sparse_list_grows_offset_type() {
1155 let elements = buffer![1i32, 2, 1, 2].into_array();
1156 let offsets = buffer![0u8, 1, 2, 3, 4].into_array();
1157 let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1158 .unwrap()
1159 .into_array();
1160
1161 let indices = buffer![0u8, 1u8, 2u8, 3u8].into_array();
1162 let fill_value = Scalar::from(Some(vec![42i32; 252])); let sparse = SparseArray::try_new(indices, lists, 5, fill_value)
1164 .unwrap()
1165 .into_array();
1166
1167 let actual = sparse.to_canonical().unwrap().into_array();
1168 let mut expected_elements = buffer_mut![1, 2, 1, 2];
1169 expected_elements.extend(buffer![42i32; 252]);
1170 let expected = ListArray::try_new(
1171 expected_elements.freeze().into_array(),
1172 buffer![0u16, 1, 2, 3, 4, 256].into_array(),
1173 Validity::AllValid,
1174 )
1175 .unwrap()
1176 .into_array();
1177
1178 assert_eq!(
1179 actual.to_list().unwrap().offsets().dtype(),
1180 &DType::Primitive(PType::U16, NonNullable)
1181 );
1182
1183 let actual = actual.into_arrow_preferred().unwrap();
1184 let expected = expected.into_arrow_preferred().unwrap();
1185
1186 assert_eq!(actual.data_type(), expected.data_type());
1187 assert_eq!(&actual, &expected);
1188 }
1189}