1use std::sync::Arc;
5
6use itertools::Itertools;
7use num_traits::NumCast;
8use vortex_array::arrays::{
9 BinaryView, BoolArray, BooleanBuffer, ConstantArray, ListArray, NullArray, OffsetPType,
10 PrimitiveArray, StructArray, VarBinViewArray, smallest_storage_type,
11};
12use vortex_array::builders::{
13 ArrayBuilder as _, ArrayBuilderExt, DecimalBuilder, ListBuilder, builder_with_capacity,
14};
15use vortex_array::patches::Patches;
16use vortex_array::validity::Validity;
17use vortex_array::vtable::CanonicalVTable;
18use vortex_array::{Array, ArrayRef, Canonical, IntoArray as _, ToCanonical as _};
19use vortex_buffer::{Buffer, BufferMut, BufferString, ByteBuffer, buffer, buffer_mut};
20use vortex_dtype::{
21 DType, DecimalDType, NativePType, Nullability, StructFields, match_each_integer_ptype,
22 match_each_native_ptype,
23};
24use vortex_error::{VortexError, VortexExpect as _, VortexResult, vortex_err};
25use vortex_scalar::{
26 DecimalScalar, ListScalar, NativeDecimalType, Scalar, StructScalar,
27 match_each_decimal_value_type,
28};
29
30use crate::{SparseArray, SparseVTable};
31
32impl CanonicalVTable<SparseVTable> for SparseVTable {
33 fn canonicalize(array: &SparseArray) -> VortexResult<Canonical> {
34 if array.patches().num_patches() == 0 {
35 return ConstantArray::new(array.fill_scalar().clone(), array.len()).to_canonical();
36 }
37
38 match array.dtype() {
39 DType::Null => {
40 assert!(array.fill_scalar().is_null());
41 Ok(Canonical::Null(NullArray::new(array.len())))
42 }
43 DType::Bool(..) => {
44 let resolved_patches = array.resolved_patches()?;
45 canonicalize_sparse_bools(&resolved_patches, array.fill_scalar())
46 }
47 DType::Primitive(ptype, ..) => {
48 let resolved_patches = array.resolved_patches()?;
49 match_each_native_ptype!(ptype, |P| {
50 canonicalize_sparse_primitives::<P>(&resolved_patches, array.fill_scalar())
51 })
52 }
53 DType::Struct(struct_fields, ..) => canonicalize_sparse_struct(
54 struct_fields,
55 array.fill_scalar().as_struct(),
56 array.dtype(),
57 array.patches(),
58 array.len(),
59 ),
60 DType::Decimal(decimal_dtype, nullability) => {
61 let canonical_decimal_value_type = smallest_storage_type(decimal_dtype);
62 let fill_value = array.fill_scalar().as_decimal();
63 match_each_decimal_value_type!(canonical_decimal_value_type, |D| {
64 canonicalize_sparse_decimal::<D>(
65 *decimal_dtype,
66 *nullability,
67 fill_value,
68 array.patches(),
69 array.len(),
70 )
71 })
72 }
73 dtype @ DType::Utf8(..) => {
74 let fill_value = array.fill_scalar().as_utf8().value();
75 let fill_value = fill_value.map(BufferString::into_inner);
76 canonicalize_varbin(array, dtype.clone(), fill_value)
77 }
78 dtype @ DType::Binary(..) => {
79 let fill_value = array.fill_scalar().as_binary().value();
80 canonicalize_varbin(array, dtype.clone(), fill_value)
81 }
82 DType::List(values_dtype, nullability) => {
83 let resolved_patches = array.resolved_patches()?;
84 canonicalize_sparse_lists(
85 array,
86 resolved_patches,
87 values_dtype.clone(),
88 *nullability,
89 )
90 }
91 DType::FixedSizeList(..) => {
92 unimplemented!("TODO(connor)[FixedSizeList]")
93 }
94 DType::Extension(_ext_dtype) => todo!(),
95 }
96 }
97}
98
99fn list_scalar_to_elements_array(scalar: ListScalar) -> VortexResult<Option<ArrayRef>> {
101 let Some(elements) = scalar.elements() else {
102 return Ok(None);
103 };
104
105 let mut builder = builder_with_capacity(scalar.element_dtype(), scalar.len());
106 for s in elements {
107 builder.append_scalar(&s)?;
108 }
109 Ok(Some(builder.finish()))
110}
111
112fn list_scalar_to_singleton_list_array(scalar: ListScalar) -> VortexResult<Option<ArrayRef>> {
114 let nullability = scalar.dtype().nullability();
115 let Some(elements) = list_scalar_to_elements_array(scalar)? else {
116 return Ok(None);
117 };
118
119 let validity = match nullability {
120 Nullability::NonNullable => Validity::NonNullable,
121 Nullability::Nullable => Validity::AllValid,
122 };
123
124 let n = elements.len();
125 ListArray::try_new(elements, buffer![0_u64, n as u64].into_array(), validity)
126 .map(|x| Some(x.into_array()))
127}
128
129#[allow(clippy::cognitive_complexity)]
130fn canonicalize_sparse_lists(
131 array: &SparseArray,
132 resolved_patches: Patches,
133 values_dtype: Arc<DType>,
134 nullability: Nullability,
135) -> VortexResult<Canonical> {
136 macro_rules! match_smallest_offset_type {
137 ($n_elements:expr, | $offset_type:ident | $body:block) => {{
138 let n_elements = $n_elements;
139 if n_elements <= u8::MAX as usize {
140 type $offset_type = u8;
141 $body
142 } else if n_elements <= u16::MAX as usize {
143 type $offset_type = u16;
144 $body
145 } else if n_elements <= u32::MAX as usize {
146 type $offset_type = u32;
147 $body
148 } else {
149 assert!(u64::try_from(n_elements).is_ok());
150 type $offset_type = u64;
151 $body
152 }
153 }};
154 }
155
156 let indices = resolved_patches.indices().to_primitive()?;
157 let values = resolved_patches.values().to_list()?;
158 let fill_value = array.fill_scalar().as_list();
159
160 let n_filled = array.len() - resolved_patches.num_patches();
161 let total_canonical_values = values.elements().len() + fill_value.len() * n_filled;
162
163 let validity = Validity::from_mask(array.validity_mask(), nullability);
164
165 match_each_integer_ptype!(indices.ptype(), |I| {
166 match_smallest_offset_type!(total_canonical_values, |O| {
167 canonicalize_sparse_lists_inner::<I, O>(
168 indices.as_slice(),
169 values,
170 fill_value,
171 values_dtype,
172 array.len(),
173 total_canonical_values,
174 validity,
175 )
176 })
177 })
178}
179
180fn canonicalize_sparse_lists_inner<I: NativePType, SmallestViableOffsetType: OffsetPType>(
181 indices: &[I],
182 values: ListArray,
183 fill_value: ListScalar,
184 values_dtype: Arc<DType>,
185 len: usize,
186 total_canonical_values: usize,
187 validity: Validity,
188) -> VortexResult<Canonical> {
189 let Some(fill_value_array) = list_scalar_to_singleton_list_array(fill_value)? else {
190 let sparse_list_elements = values.elements().clone();
191 let sparse_list_offsets = values.offsets().to_primitive()?;
192 match_each_integer_ptype!(sparse_list_offsets.ptype(), |SparseValuesOffsetType| {
193 let sparse_list_offsets = sparse_list_offsets.as_slice::<SparseValuesOffsetType>();
194 return canonicalize_sparse_lists_inner_with_null_fill_value(
198 indices,
199 sparse_list_elements,
200 sparse_list_offsets,
201 len,
202 validity,
203 );
204 });
205 };
206
207 let mut builder = ListBuilder::<SmallestViableOffsetType>::with_values_and_index_capacity(
208 values_dtype,
209 validity.nullability(),
210 total_canonical_values,
211 len + 1,
212 );
213 let mut next_index = 0_usize;
214 let enumerated_indices_usize = indices
215 .iter()
216 .map(|x| (*x).to_usize().vortex_expect("index must fit in usize"))
217 .enumerate();
218 for (patch_values_index, next_patched_index) in enumerated_indices_usize {
219 for _ in next_index..next_patched_index {
220 builder.extend_from_array(&fill_value_array)?;
221 }
222 builder.extend_from_array(&values.slice(patch_values_index..patch_values_index + 1))?;
223 next_index = next_patched_index + 1;
224 }
225
226 for _ in next_index..len {
227 builder.extend_from_array(&fill_value_array)?;
228 }
229
230 builder.finish().to_canonical()
231}
232
233fn canonicalize_sparse_lists_inner_with_null_fill_value<I: NativePType, O: OffsetPType>(
234 indices: &[I],
235 elements: ArrayRef,
236 offsets: &[O],
237 len: usize,
238 validity: Validity,
239) -> VortexResult<Canonical> {
240 assert!(indices.len() < len + 1);
241 let mut dense_offsets = BufferMut::with_capacity(len + 1);
242
243 dense_offsets.push(offsets[0]);
245 let mut dense_last_set_index = 0_usize;
246 for (sparse_start_index, dense_start_index) in indices.iter().enumerate() {
247 let sparse_end_index = sparse_start_index + 1;
248 let dense_start_index = (*dense_start_index)
249 .to_usize()
250 .vortex_expect("index must fit in usize");
251 let dense_end_index = dense_start_index + 1;
252
253 for _ in (dense_last_set_index + 1)..dense_end_index {
254 dense_offsets.push(dense_offsets[dense_last_set_index]);
256 }
257 dense_offsets.push(offsets[sparse_end_index]);
258 dense_last_set_index = dense_end_index;
259 }
260 for _ in (dense_last_set_index + 1)..len {
261 dense_offsets.push(dense_offsets[dense_last_set_index]);
263 }
264 let array = ListArray::try_new(elements, dense_offsets.into_array(), validity)?;
265 Ok(Canonical::List(array))
266}
267
268fn canonicalize_sparse_bools(patches: &Patches, fill_value: &Scalar) -> VortexResult<Canonical> {
269 let (fill_bool, validity) = if fill_value.is_null() {
270 (false, Validity::AllInvalid)
271 } else {
272 (
273 fill_value.try_into()?,
274 if patches.dtype().nullability() == Nullability::NonNullable {
275 Validity::NonNullable
276 } else {
277 Validity::AllValid
278 },
279 )
280 };
281
282 let bools = BoolArray::new(
283 if fill_bool {
284 BooleanBuffer::new_set(patches.array_len())
285 } else {
286 BooleanBuffer::new_unset(patches.array_len())
287 },
288 validity,
289 );
290
291 bools.patch(patches).map(Canonical::Bool)
292}
293
294fn canonicalize_sparse_primitives<
295 T: NativePType + for<'a> TryFrom<&'a Scalar, Error = VortexError>,
296>(
297 patches: &Patches,
298 fill_value: &Scalar,
299) -> VortexResult<Canonical> {
300 let (primitive_fill, validity) = if fill_value.is_null() {
301 (T::default(), Validity::AllInvalid)
302 } else {
303 (
304 fill_value.try_into()?,
305 if patches.dtype().nullability() == Nullability::NonNullable {
306 Validity::NonNullable
307 } else {
308 Validity::AllValid
309 },
310 )
311 };
312
313 let parray = PrimitiveArray::new(buffer![primitive_fill; patches.array_len()], validity);
314
315 parray.patch(patches).map(Canonical::Primitive)
316}
317
318fn canonicalize_sparse_struct(
319 struct_fields: &StructFields,
320 fill_struct: StructScalar,
321 dtype: &DType,
322 unresolved_patches: &Patches,
324 len: usize,
325) -> VortexResult<Canonical> {
326 let (fill_values, top_level_fill_validity) = match fill_struct.fields() {
327 Some(fill_values) => (fill_values, Validity::AllValid),
328 None => (
329 struct_fields.fields().map(Scalar::default_value).collect(),
330 Validity::AllInvalid,
331 ),
332 };
333 let patch_values_as_struct = unresolved_patches.values().to_canonical()?.into_struct()?;
334 let columns_patch_values = patch_values_as_struct.fields();
335 let names = patch_values_as_struct.names();
336 let validity = if dtype.is_nullable() {
337 top_level_fill_validity.patch(
338 len,
339 unresolved_patches.offset(),
340 unresolved_patches.indices(),
341 &Validity::from_mask(
342 unresolved_patches.values().validity_mask(),
343 Nullability::Nullable,
344 ),
345 )?
346 } else {
347 top_level_fill_validity
348 .into_non_nullable()
349 .ok_or_else(|| vortex_err!("fill validity should match sparse array nullability"))?
350 };
351
352 columns_patch_values
353 .iter()
354 .cloned()
355 .zip_eq(fill_values.into_iter())
356 .map(|(patch_values, fill_value)| -> VortexResult<_> {
357 SparseArray::try_new_from_patches(
358 unresolved_patches
359 .clone()
360 .map_values(|_| Ok(patch_values))?,
361 fill_value,
362 )
363 })
364 .process_results(|sparse_columns| {
365 StructArray::try_from_iter_with_validity(names.iter().zip_eq(sparse_columns), validity)
366 .map(Canonical::Struct)
367 })?
368}
369
370fn canonicalize_sparse_decimal<D: NativeDecimalType>(
371 decimal_dtype: DecimalDType,
372 nullability: Nullability,
373 fill_value: DecimalScalar,
374 patches: &Patches,
375 len: usize,
376) -> VortexResult<Canonical> {
377 let mut builder = DecimalBuilder::with_capacity::<D>(len, decimal_dtype, nullability);
378 match fill_value.decimal_value() {
379 Some(fill_value) => {
380 let fill_value = fill_value
381 .cast::<D>()
382 .vortex_expect("unexpected value type");
383 for _ in 0..len {
384 builder.append_value(fill_value)
385 }
386 }
387 None => {
388 builder.append_nulls(len);
389 }
390 }
391 let filled_array = builder.finish_into_decimal();
392 let array = filled_array.patch(patches)?;
393 Ok(Canonical::Decimal(array))
394}
395
396fn canonicalize_varbin(
397 array: &SparseArray,
398 dtype: DType,
399 fill_value: Option<ByteBuffer>,
400) -> VortexResult<Canonical> {
401 let patches = array.resolved_patches()?;
402 let indices = patches.indices().to_primitive()?;
403 let values = patches.values().to_varbinview()?;
404 let validity = Validity::from_mask(array.validity_mask(), dtype.nullability());
405 let len = array.len();
406
407 match_each_integer_ptype!(indices.ptype(), |I| {
408 let indices = indices.buffer::<I>();
409 canonicalize_varbin_inner::<I>(fill_value, indices, values, dtype, validity, len)
410 })
411}
412
413fn canonicalize_varbin_inner<I: NativePType>(
414 fill_value: Option<ByteBuffer>,
415 indices: Buffer<I>,
416 values: VarBinViewArray,
417 dtype: DType,
418 validity: Validity,
419 len: usize,
420) -> VortexResult<Canonical> {
421 assert_eq!(dtype.nullability(), validity.nullability());
422
423 let n_patch_buffers = values.buffers().len();
424 let mut buffers = values.buffers().to_vec();
425
426 let fill = if let Some(buffer) = &fill_value {
427 buffers.push(buffer.clone());
428 BinaryView::make_view(
429 buffer.as_ref(),
430 u32::try_from(n_patch_buffers).vortex_expect("too many buffers"),
431 0,
432 )
433 } else {
434 BinaryView::make_view(&[], 0, 0)
436 };
437
438 let mut views = buffer_mut![fill; len];
439 for (patch_index, &patch) in indices.into_iter().zip_eq(values.views().iter()) {
440 let patch_index_usize = <usize as NumCast>::from(patch_index)
441 .vortex_expect("var bin view indices must fit in usize");
442 views[patch_index_usize] = patch;
443 }
444
445 let array = unsafe {
447 VarBinViewArray::new_unchecked(views.freeze(), Arc::from(buffers), dtype, validity)
448 };
449
450 Ok(Canonical::VarBinView(array))
451}
452
453#[cfg(test)]
454mod test {
455 use rstest::rstest;
456 use vortex_array::arrays::{
457 BoolArray, BooleanBufferBuilder, DecimalArray, ListArray, PrimitiveArray, StructArray,
458 VarBinArray, VarBinViewArray,
459 };
460 use vortex_array::arrow::IntoArrowArray as _;
461 use vortex_array::validity::Validity;
462 use vortex_array::vtable::ValidityHelper;
463 use vortex_array::{IntoArray, ToCanonical};
464 use vortex_buffer::{ByteBuffer, buffer, buffer_mut};
465 use vortex_dtype::Nullability::{NonNullable, Nullable};
466 use vortex_dtype::{DType, DecimalDType, FieldNames, PType, StructFields};
467 use vortex_mask::Mask;
468 use vortex_scalar::{DecimalValue, Scalar};
469
470 use crate::SparseArray;
471 use crate::canonical::{list_scalar_to_elements_array, list_scalar_to_singleton_list_array};
472
473 #[rstest]
474 #[case(Some(true))]
475 #[case(Some(false))]
476 #[case(None)]
477 fn test_sparse_bool(#[case] fill_value: Option<bool>) {
478 let indices = buffer![0u64, 1, 7].into_array();
479 let values = bool_array_from_nullable_vec(vec![Some(true), None, Some(false)], fill_value)
480 .into_array();
481 let sparse_bools =
482 SparseArray::try_new(indices, values, 10, Scalar::from(fill_value)).unwrap();
483 assert_eq!(sparse_bools.dtype(), &DType::Bool(Nullable));
484
485 let flat_bools = sparse_bools.to_bool().unwrap();
486 let expected = bool_array_from_nullable_vec(
487 vec![
488 Some(true),
489 None,
490 fill_value,
491 fill_value,
492 fill_value,
493 fill_value,
494 fill_value,
495 Some(false),
496 fill_value,
497 fill_value,
498 ],
499 fill_value,
500 );
501
502 assert_eq!(flat_bools.boolean_buffer(), expected.boolean_buffer());
503 assert_eq!(flat_bools.validity(), expected.validity());
504
505 assert!(flat_bools.boolean_buffer().value(0));
506 assert!(flat_bools.validity().is_valid(0));
507 assert_eq!(
508 flat_bools.boolean_buffer().value(1),
509 fill_value.unwrap_or_default()
510 );
511 assert!(!flat_bools.validity().is_valid(1));
512 assert_eq!(flat_bools.validity().is_valid(2), fill_value.is_some());
513 assert!(!flat_bools.boolean_buffer().value(7));
514 assert!(flat_bools.validity().is_valid(7));
515 }
516
517 fn bool_array_from_nullable_vec(
518 bools: Vec<Option<bool>>,
519 fill_value: Option<bool>,
520 ) -> BoolArray {
521 let mut buffer = BooleanBufferBuilder::new(bools.len());
522 let mut validity = BooleanBufferBuilder::new(bools.len());
523 for maybe_bool in bools {
524 buffer.append(maybe_bool.unwrap_or_else(|| fill_value.unwrap_or_default()));
525 validity.append(maybe_bool.is_some());
526 }
527 BoolArray::new(buffer.finish(), Validity::from(validity.finish()))
528 }
529
530 #[rstest]
531 #[case(Some(0i32))]
532 #[case(Some(-1i32))]
533 #[case(None)]
534 fn test_sparse_primitive(#[case] fill_value: Option<i32>) {
535 let indices = buffer![0u64, 1, 7].into_array();
536 let values = PrimitiveArray::from_option_iter([Some(0i32), None, Some(1)]).into_array();
537 let sparse_ints =
538 SparseArray::try_new(indices, values, 10, Scalar::from(fill_value)).unwrap();
539 assert_eq!(*sparse_ints.dtype(), DType::Primitive(PType::I32, Nullable));
540
541 let flat_ints = sparse_ints.to_primitive().unwrap();
542 let expected = PrimitiveArray::from_option_iter([
543 Some(0i32),
544 None,
545 fill_value,
546 fill_value,
547 fill_value,
548 fill_value,
549 fill_value,
550 Some(1),
551 fill_value,
552 fill_value,
553 ]);
554
555 assert_eq!(flat_ints.byte_buffer(), expected.byte_buffer());
556 assert_eq!(flat_ints.validity(), expected.validity());
557
558 assert_eq!(flat_ints.as_slice::<i32>()[0], 0);
559 assert!(flat_ints.validity().is_valid(0));
560 assert_eq!(flat_ints.as_slice::<i32>()[1], 0);
561 assert!(!flat_ints.validity().is_valid(1));
562 assert_eq!(
563 flat_ints.as_slice::<i32>()[2],
564 fill_value.unwrap_or_default()
565 );
566 assert_eq!(flat_ints.validity().is_valid(2), fill_value.is_some());
567 assert_eq!(flat_ints.as_slice::<i32>()[7], 1);
568 assert!(flat_ints.validity().is_valid(7));
569 }
570
571 #[test]
572 fn test_sparse_struct_valid_fill() {
573 let field_names = FieldNames::from_iter(["a", "b"]);
574 let field_types = vec![
575 DType::Primitive(PType::I32, Nullable),
576 DType::Primitive(PType::I32, Nullable),
577 ];
578 let struct_fields = StructFields::new(field_names, field_types);
579 let struct_dtype = DType::Struct(struct_fields.clone(), Nullable);
580
581 let indices = buffer![0u64, 1, 7, 8].into_array();
582 let patch_values_a =
583 PrimitiveArray::from_option_iter([Some(10i32), None, Some(20), Some(30)]).into_array();
584 let patch_values_b =
585 PrimitiveArray::from_option_iter([Some(1i32), Some(2), None, Some(3)]).into_array();
586 let patch_values = StructArray::try_new_with_dtype(
587 vec![patch_values_a, patch_values_b],
588 struct_fields.clone(),
589 4,
590 Validity::Array(
591 BoolArray::from_indices(4, vec![0, 1, 2], Validity::NonNullable).to_array(),
592 ),
593 )
594 .unwrap()
595 .into_array();
596
597 let fill_scalar = Scalar::struct_(
598 struct_dtype,
599 vec![Scalar::from(Some(-10i32)), Scalar::from(Some(-1i32))],
600 );
601 let len = 10;
602 let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
603
604 let expected_a = PrimitiveArray::from_option_iter((0..len).map(|i| {
605 if i == 0 {
606 Some(10)
607 } else if i == 1 {
608 None
609 } else if i == 7 {
610 Some(20)
611 } else {
612 Some(-10)
613 }
614 }));
615 let expected_b = PrimitiveArray::from_option_iter((0..len).map(|i| {
616 if i == 0 {
617 Some(1i32)
618 } else if i == 1 {
619 Some(2)
620 } else if i == 7 {
621 None
622 } else {
623 Some(-1)
624 }
625 }));
626
627 let expected = StructArray::try_new_with_dtype(
628 vec![expected_a.into_array(), expected_b.into_array()],
629 struct_fields,
630 len,
631 Validity::from_mask(Mask::from_excluded_indices(10, vec![8]), Nullable),
633 )
634 .unwrap()
635 .to_array()
636 .into_arrow_preferred()
637 .unwrap();
638
639 let actual = sparse_struct
640 .to_struct()
641 .unwrap()
642 .to_array()
643 .into_arrow_preferred()
644 .unwrap();
645
646 assert_eq!(expected.data_type(), actual.data_type());
647 assert_eq!(&expected, &actual);
648 }
649
650 #[test]
651 fn test_sparse_struct_invalid_fill() {
652 let field_names = FieldNames::from_iter(["a", "b"]);
653 let field_types = vec![
654 DType::Primitive(PType::I32, Nullable),
655 DType::Primitive(PType::I32, Nullable),
656 ];
657 let struct_fields = StructFields::new(field_names, field_types);
658 let struct_dtype = DType::Struct(struct_fields.clone(), Nullable);
659
660 let indices = buffer![0u64, 1, 7, 8].into_array();
661 let patch_values_a =
662 PrimitiveArray::from_option_iter([Some(10i32), None, Some(20), Some(30)]).into_array();
663 let patch_values_b =
664 PrimitiveArray::from_option_iter([Some(1i32), Some(2), None, Some(3)]).into_array();
665 let patch_values = StructArray::try_new_with_dtype(
666 vec![patch_values_a, patch_values_b],
667 struct_fields.clone(),
668 4,
669 Validity::Array(
670 BoolArray::from_indices(4, vec![0, 1, 2], Validity::NonNullable).to_array(),
671 ),
672 )
673 .unwrap()
674 .into_array();
675
676 let fill_scalar = Scalar::null(struct_dtype);
677 let len = 10;
678 let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
679
680 let expected_a = PrimitiveArray::from_option_iter((0..len).map(|i| {
681 if i == 0 {
682 Some(10)
683 } else if i == 1 {
684 None
685 } else if i == 7 {
686 Some(20)
687 } else {
688 Some(-10)
689 }
690 }));
691 let expected_b = PrimitiveArray::from_option_iter((0..len).map(|i| {
692 if i == 0 {
693 Some(1i32)
694 } else if i == 1 {
695 Some(2)
696 } else if i == 7 {
697 None
698 } else {
699 Some(-1)
700 }
701 }));
702
703 let expected = StructArray::try_new_with_dtype(
704 vec![expected_a.into_array(), expected_b.into_array()],
705 struct_fields,
706 len,
707 Validity::from_mask(Mask::from_indices(10, vec![0, 1, 7]), Nullable),
709 )
710 .unwrap()
711 .to_array()
712 .into_arrow_preferred()
713 .unwrap();
714
715 let actual = sparse_struct
716 .to_struct()
717 .unwrap()
718 .to_array()
719 .into_arrow_preferred()
720 .unwrap();
721
722 assert_eq!(expected.data_type(), actual.data_type());
723 assert_eq!(&expected, &actual);
724 }
725
726 #[test]
727 fn test_sparse_decimal() {
728 let indices = buffer![0u32, 1u32, 7u32, 8u32].into_array();
729 let decimal_dtype = DecimalDType::new(3, 2);
730 let patch_values = DecimalArray::new(
731 buffer![100i128, 200i128, 300i128, 4000i128],
732 decimal_dtype,
733 Validity::from_iter([true, true, true, false]),
734 )
735 .to_array();
736 let len = 10;
737 let fill_scalar = Scalar::decimal(DecimalValue::I32(123), decimal_dtype, Nullable);
738 let sparse_struct = SparseArray::try_new(indices, patch_values, len, fill_scalar).unwrap();
739
740 let expected = DecimalArray::new(
741 buffer![100i128, 200, 123, 123, 123, 123, 123, 300, 4000, 123],
742 decimal_dtype,
743 Validity::from_mask(Mask::from_excluded_indices(10, vec![8]), Nullable),
745 )
746 .to_array()
747 .into_arrow_preferred()
748 .unwrap();
749
750 let actual = sparse_struct
751 .to_decimal()
752 .unwrap()
753 .to_array()
754 .into_arrow_preferred()
755 .unwrap();
756
757 assert_eq!(expected.data_type(), actual.data_type());
758 assert_eq!(&expected, &actual);
759 }
760
761 #[test]
762 fn test_sparse_utf8_varbinview_non_null_fill() {
763 let strings = <VarBinViewArray as FromIterator<_>>::from_iter([
764 Some("hello"),
765 Some("goodbye"),
766 Some("hello"),
767 None,
768 Some("bonjour"),
769 Some("你好"),
770 None,
771 ])
772 .into_array();
773
774 let array = SparseArray::try_new(
775 buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
776 strings,
777 12,
778 Scalar::from(Some("123".to_owned())),
779 )
780 .unwrap();
781
782 let actual = array.to_varbinview().unwrap().into_array();
783 let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
784 Some("hello"),
785 Some("123"),
786 Some("123"),
787 Some("goodbye"),
788 Some("hello"),
789 None,
790 Some("123"),
791 Some("bonjour"),
792 Some("123"),
793 Some("你好"),
794 None,
795 Some("123"),
796 ])
797 .into_array();
798
799 let actual = actual.into_arrow_preferred().unwrap();
800 let expected = expected.into_arrow_preferred().unwrap();
801
802 assert_eq!(actual.data_type(), expected.data_type());
803 assert_eq!(&actual, &expected);
804 }
805
806 #[test]
807 fn test_sparse_utf8_varbinview_null_fill() {
808 let strings = <VarBinViewArray as FromIterator<_>>::from_iter([
809 Some("hello"),
810 Some("goodbye"),
811 Some("hello"),
812 None,
813 Some("bonjour"),
814 Some("你好"),
815 None,
816 ])
817 .into_array();
818
819 let array = SparseArray::try_new(
820 buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
821 strings,
822 12,
823 Scalar::null(DType::Utf8(Nullable)),
824 )
825 .unwrap();
826
827 let actual = array.to_varbinview().unwrap().into_array();
828 let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
829 Some("hello"),
830 None,
831 None,
832 Some("goodbye"),
833 Some("hello"),
834 None,
835 None,
836 Some("bonjour"),
837 None,
838 Some("你好"),
839 None,
840 None,
841 ])
842 .into_array();
843
844 let actual = actual.into_arrow_preferred().unwrap();
845 let expected = expected.into_arrow_preferred().unwrap();
846
847 assert_eq!(actual.data_type(), expected.data_type());
848 assert_eq!(&actual, &expected);
849 }
850
851 #[test]
852 fn test_sparse_utf8_varbinview_non_nullable() {
853 let strings =
854 VarBinViewArray::from_iter_str(["hello", "goodbye", "hello", "bonjour", "你好"])
855 .into_array();
856
857 let array = SparseArray::try_new(
858 buffer![0u16, 3, 4, 5, 8].into_array(),
859 strings,
860 9,
861 Scalar::from("123".to_owned()),
862 )
863 .unwrap();
864
865 let actual = array.to_varbinview().unwrap().into_array();
866 let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
867 Some("hello"),
868 Some("123"),
869 Some("123"),
870 Some("goodbye"),
871 Some("hello"),
872 Some("bonjour"),
873 Some("123"),
874 Some("123"),
875 Some("你好"),
876 ])
877 .into_array();
878
879 let actual = actual.into_arrow_preferred().unwrap();
880 let expected = expected.into_arrow_preferred().unwrap();
881
882 assert_eq!(actual.data_type(), expected.data_type());
883 assert_eq!(&actual, &expected);
884 }
885
886 #[test]
887 fn test_sparse_utf8_varbin_null_fill() {
888 let strings = <VarBinArray as FromIterator<_>>::from_iter([
889 Some("hello"),
890 Some("goodbye"),
891 Some("hello"),
892 None,
893 Some("bonjour"),
894 Some("你好"),
895 None,
896 ])
897 .into_array();
898
899 let array = SparseArray::try_new(
900 buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
901 strings,
902 12,
903 Scalar::null(DType::Utf8(Nullable)),
904 )
905 .unwrap();
906
907 let actual = array.to_varbinview().unwrap().into_array();
908 let expected = <VarBinViewArray as FromIterator<_>>::from_iter([
909 Some("hello"),
910 None,
911 None,
912 Some("goodbye"),
913 Some("hello"),
914 None,
915 None,
916 Some("bonjour"),
917 None,
918 Some("你好"),
919 None,
920 None,
921 ])
922 .into_array();
923
924 let actual = actual.into_arrow_preferred().unwrap();
925 let expected = expected.into_arrow_preferred().unwrap();
926
927 assert_eq!(actual.data_type(), expected.data_type());
928 assert_eq!(&actual, &expected);
929 }
930
931 #[test]
932 fn test_sparse_binary_varbinview_non_null_fill() {
933 let binaries = VarBinViewArray::from_iter_nullable_bin([
934 Some(b"hello" as &[u8]),
935 Some(b"goodbye"),
936 Some(b"hello"),
937 None,
938 Some(b"\x00"),
939 Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
940 None,
941 ])
942 .into_array();
943
944 let array = SparseArray::try_new(
945 buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
946 binaries,
947 12,
948 Scalar::from(Some(ByteBuffer::from(b"123".to_vec()))),
949 )
950 .unwrap();
951
952 let actual = array.to_varbinview().unwrap().into_array();
953 let expected = VarBinViewArray::from_iter_nullable_bin([
954 Some(b"hello" as &[u8]),
955 Some(b"123"),
956 Some(b"123"),
957 Some(b"goodbye"),
958 Some(b"hello"),
959 None,
960 Some(b"123"),
961 Some(b"\x00"),
962 Some(b"123"),
963 Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
964 None,
965 Some(b"123"),
966 ])
967 .into_array();
968
969 let actual = actual.into_arrow_preferred().unwrap();
970 let expected = expected.into_arrow_preferred().unwrap();
971
972 assert_eq!(actual.data_type(), expected.data_type());
973 assert_eq!(&actual, &expected);
974 }
975
976 #[test]
977 fn test_list_scalar_to_elements_array() {
978 let scalar = Scalar::from(Some(vec![1, 2, 3]));
979 let array = list_scalar_to_elements_array(scalar.as_list()).unwrap();
980 assert_eq!(
981 array.unwrap().display_values().to_string(),
982 "[1i32, 2i32, 3i32]"
983 );
984
985 let scalar = Scalar::null_typed::<Vec<i32>>();
986 let array = list_scalar_to_elements_array(scalar.as_list()).unwrap();
987 assert!(array.is_none());
988 }
989
990 #[test]
991 fn test_list_scalar_to_singleton_list_array() {
992 let scalar = Scalar::from(Some(vec![1, 2, 3]));
993 let array = list_scalar_to_singleton_list_array(scalar.as_list()).unwrap();
994 assert!(array.is_some());
995 let array = array.unwrap();
996 assert_eq!(array.scalar_at(0), scalar);
997 assert_eq!(array.len(), 1);
998
999 let scalar = Scalar::null_typed::<Vec<i32>>();
1000 let array = list_scalar_to_singleton_list_array(scalar.as_list()).unwrap();
1001 assert!(array.is_none());
1002 }
1003
1004 #[test]
1005 fn test_sparse_list_null_fill() {
1006 let elements = buffer![1i32, 2, 1, 2].into_array();
1007 let offsets = buffer![0u32, 1, 2, 3, 4].into_array();
1008 let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1009 .unwrap()
1010 .into_array();
1011
1012 let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
1013 let fill_value = Scalar::null(lists.dtype().clone());
1014 let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
1015 .unwrap()
1016 .into_array();
1017
1018 let actual = sparse.to_canonical().unwrap().into_array();
1019 let expected = ListArray::try_new(
1020 buffer![1i32, 2, 1, 2].into_array(),
1021 buffer![0u32, 1, 1, 1, 2, 3, 4].into_array(),
1022 Validity::Array(
1023 BoolArray::from_iter([true, false, false, true, true, true]).into_array(),
1024 ),
1025 )
1026 .unwrap()
1027 .into_array();
1028
1029 let actual = actual.into_arrow_preferred().unwrap();
1030 let expected = expected.into_arrow_preferred().unwrap();
1031
1032 assert_eq!(actual.data_type(), expected.data_type());
1033 assert_eq!(&actual, &expected);
1034 }
1035
1036 #[test]
1037 fn test_sparse_list_null_fill_sliced_sparse_values() {
1038 let elements = buffer![1i32, 2, 1, 2, 1, 2, 1, 2].into_array();
1039 let offsets = buffer![0u32, 1, 2, 3, 4, 5, 6, 7, 8].into_array();
1040 let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1041 .unwrap()
1042 .into_array();
1043 let lists = lists.slice(2..6);
1044
1045 let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
1046 let fill_value = Scalar::null(lists.dtype().clone());
1047 let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
1048 .unwrap()
1049 .into_array();
1050
1051 let actual = sparse.to_canonical().unwrap().into_array();
1052 let expected = ListArray::try_new(
1053 buffer![1i32, 2, 1, 2].into_array(),
1054 buffer![0u32, 1, 1, 1, 2, 3, 4].into_array(),
1055 Validity::Array(
1056 BoolArray::from_iter([true, false, false, true, true, true]).into_array(),
1057 ),
1058 )
1059 .unwrap()
1060 .into_array();
1061
1062 let actual = actual.into_arrow_preferred().unwrap();
1063 let expected = expected.into_arrow_preferred().unwrap();
1064
1065 assert_eq!(actual.data_type(), expected.data_type());
1066 assert_eq!(&actual, &expected);
1067 }
1068
1069 #[test]
1070 fn test_sparse_list_non_null_fill() {
1071 let elements = buffer![1i32, 2, 1, 2].into_array();
1072 let offsets = buffer![0u32, 1, 2, 3, 4].into_array();
1073 let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1074 .unwrap()
1075 .into_array();
1076
1077 let indices = buffer![0u8, 3u8, 4u8, 5u8].into_array();
1078 let fill_value = Scalar::from(Some(vec![5i32, 6, 7, 8]));
1079 let sparse = SparseArray::try_new(indices, lists, 6, fill_value)
1080 .unwrap()
1081 .into_array();
1082
1083 let actual = sparse.to_canonical().unwrap().into_array();
1084 let expected = ListArray::try_new(
1085 buffer![1i32, 5, 6, 7, 8, 5, 6, 7, 8, 2, 1, 2].into_array(),
1086 buffer![0u32, 1, 5, 9, 10, 11, 12].into_array(),
1087 Validity::AllValid,
1088 )
1089 .unwrap()
1090 .into_array();
1091
1092 let actual = actual.into_arrow_preferred().unwrap();
1093 let expected = expected.into_arrow_preferred().unwrap();
1094
1095 assert_eq!(actual.data_type(), expected.data_type());
1096 assert_eq!(&actual, &expected);
1097 }
1098
1099 #[test]
1100 fn test_sparse_binary_varbin_null_fill() {
1101 let strings = <VarBinArray as FromIterator<_>>::from_iter([
1102 Some(b"hello" as &[u8]),
1103 Some(b"goodbye"),
1104 Some(b"hello"),
1105 None,
1106 Some(b"\x00"),
1107 Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
1108 None,
1109 ])
1110 .into_array();
1111
1112 let array = SparseArray::try_new(
1113 buffer![0u16, 3, 4, 5, 7, 9, 10].into_array(),
1114 strings,
1115 12,
1116 Scalar::null(DType::Binary(Nullable)),
1117 )
1118 .unwrap();
1119
1120 let actual = array.to_varbinview().unwrap().into_array();
1121 let expected = VarBinViewArray::from_iter_nullable_bin([
1122 Some(b"hello" as &[u8]),
1123 None,
1124 None,
1125 Some(b"goodbye"),
1126 Some(b"hello"),
1127 None,
1128 None,
1129 Some(b"\x00"),
1130 None,
1131 Some(b"\xE4\xBD\xA0\xE5\xA5\xBD"),
1132 None,
1133 None,
1134 ])
1135 .into_array();
1136 let actual = actual.into_arrow_preferred().unwrap();
1137 let expected = expected.into_arrow_preferred().unwrap();
1138
1139 assert_eq!(actual.data_type(), expected.data_type());
1140 assert_eq!(&actual, &expected);
1141 }
1142
1143 #[test]
1144 fn test_sparse_list_grows_offset_type() {
1145 let elements = buffer![1i32, 2, 1, 2].into_array();
1146 let offsets = buffer![0u8, 1, 2, 3, 4].into_array();
1147 let lists = ListArray::try_new(elements, offsets, Validity::AllValid)
1148 .unwrap()
1149 .into_array();
1150
1151 let indices = buffer![0u8, 1u8, 2u8, 3u8].into_array();
1152 let fill_value = Scalar::from(Some(vec![42i32; 252])); let sparse = SparseArray::try_new(indices, lists, 5, fill_value)
1154 .unwrap()
1155 .into_array();
1156
1157 let actual = sparse.to_canonical().unwrap().into_array();
1158 let mut expected_elements = buffer_mut![1, 2, 1, 2];
1159 expected_elements.extend(buffer![42i32; 252]);
1160 let expected = ListArray::try_new(
1161 expected_elements.freeze().into_array(),
1162 buffer![0u16, 1, 2, 3, 4, 256].into_array(),
1163 Validity::AllValid,
1164 )
1165 .unwrap()
1166 .into_array();
1167
1168 assert_eq!(
1169 actual.to_list().unwrap().offsets().dtype(),
1170 &DType::Primitive(PType::U16, NonNullable)
1171 );
1172
1173 let actual = actual.into_arrow_preferred().unwrap();
1174 let expected = expected.into_arrow_preferred().unwrap();
1175
1176 assert_eq!(actual.data_type(), expected.data_type());
1177 assert_eq!(&actual, &expected);
1178 }
1179}