1use std::fmt::Debug;
5use std::fmt::Display;
6use std::fmt::Formatter;
7use std::hash::Hash;
8use std::hash::Hasher;
9
10use kernel::PARENT_KERNELS;
11use prost::Message as _;
12use vortex_array::AnyCanonical;
13use vortex_array::Array;
14use vortex_array::ArrayEq;
15use vortex_array::ArrayHash;
16use vortex_array::ArrayId;
17use vortex_array::ArrayParts;
18use vortex_array::ArrayRef;
19use vortex_array::ArraySlots;
20use vortex_array::ArrayView;
21use vortex_array::Canonical;
22use vortex_array::ExecutionCtx;
23use vortex_array::ExecutionResult;
24use vortex_array::IntoArray;
25use vortex_array::Precision;
26use vortex_array::arrays::BoolArray;
27use vortex_array::arrays::ConstantArray;
28use vortex_array::arrays::Primitive;
29use vortex_array::arrays::PrimitiveArray;
30use vortex_array::arrays::bool::BoolArrayExt;
31use vortex_array::buffer::BufferHandle;
32use vortex_array::builtins::ArrayBuiltins;
33use vortex_array::dtype::DType;
34use vortex_array::dtype::Nullability;
35use vortex_array::patches::PatchSlotIndices;
36use vortex_array::patches::Patches;
37use vortex_array::patches::PatchesData;
38use vortex_array::patches::PatchesMetadata;
39use vortex_array::require_child;
40use vortex_array::require_opt_child;
41use vortex_array::scalar::Scalar;
42use vortex_array::scalar::ScalarValue;
43use vortex_array::scalar_fn::fns::operators::Operator;
44use vortex_array::serde::ArrayChildren;
45use vortex_array::validity::Validity;
46use vortex_array::vtable::VTable;
47use vortex_array::vtable::ValidityVTable;
48use vortex_buffer::Buffer;
49use vortex_buffer::ByteBufferMut;
50use vortex_error::VortexExpect as _;
51use vortex_error::VortexResult;
52use vortex_error::vortex_bail;
53use vortex_error::vortex_ensure;
54use vortex_error::vortex_ensure_eq;
55use vortex_error::vortex_panic;
56use vortex_mask::AllOr;
57use vortex_mask::Mask;
58use vortex_session::VortexSession;
59use vortex_session::registry::CachedId;
60
61use crate::canonical::execute_sparse;
62use crate::rules::RULES;
63
64mod canonical;
65mod compute;
66mod kernel;
67mod ops;
68mod rules;
69mod slice;
70
71pub type SparseArray = Array<Sparse>;
73
74#[vortex_array::array_slots(Sparse)]
75pub struct SparseSlots {
76 pub patch_indices: ArrayRef,
77 pub patch_values: ArrayRef,
78 pub patch_chunk_offsets: Option<ArrayRef>,
79}
80
81pub(crate) struct SparseParts {
83 pub patches: Patches,
84 pub fill_value: Scalar,
85 pub dtype: DType,
86 pub len: usize,
87}
88
89pub(crate) trait SparseOwnedExt {
90 fn into_parts(self) -> VortexResult<SparseParts>;
91}
92
93impl SparseOwnedExt for Array<Sparse> {
94 fn into_parts(self) -> VortexResult<SparseParts> {
95 let patches = Patches::new(
96 self.len(),
97 self.patches().offset(),
98 self.as_ref().slots()[SparseSlots::PATCH_INDICES]
99 .clone()
100 .vortex_expect("indices"),
101 self.as_ref().slots()[SparseSlots::PATCH_VALUES]
102 .clone()
103 .vortex_expect("values"),
104 self.as_ref().slots()[SparseSlots::PATCH_CHUNK_OFFSETS].clone(),
105 )?;
106 Ok(SparseParts {
107 patches,
108 fill_value: self.fill_scalar().clone(),
109 dtype: self.dtype().clone(),
110 len: self.len(),
111 })
112 }
113}
114
115#[derive(Clone, prost::Message)]
116#[repr(C)]
117pub struct SparseMetadata {
118 #[prost(message, required, tag = "1")]
119 patches: PatchesMetadata,
120}
121
122impl ArrayHash for SparseData {
123 fn array_hash<H: Hasher>(&self, state: &mut H, _precision: Precision) {
124 self.array_len.hash(state);
125 self.patches_data.hash(state);
126 self.fill_value.hash(state);
127 }
128}
129
130impl ArrayEq for SparseData {
131 fn array_eq(&self, other: &Self, _precision: Precision) -> bool {
132 self.array_len == other.array_len
133 && self.patches_data == other.patches_data
134 && self.fill_value == other.fill_value
135 }
136}
137
138impl VTable for Sparse {
139 type TypedArrayData = SparseData;
140
141 type OperationsVTable = Self;
142 type ValidityVTable = Self;
143
144 fn id(&self) -> ArrayId {
145 static ID: CachedId = CachedId::new("vortex.sparse");
146 *ID
147 }
148
149 fn validate(
150 &self,
151 data: &Self::TypedArrayData,
152 dtype: &DType,
153 len: usize,
154 slots: &[Option<ArrayRef>],
155 ) -> VortexResult<()> {
156 let patches = SparseData::patches_from_slots(data, len, slots);
157 SparseData::validate(&patches, data.fill_scalar(), dtype, len)
158 }
159
160 fn nbuffers(_array: ArrayView<'_, Self>) -> usize {
161 1
162 }
163
164 fn buffer(array: ArrayView<'_, Self>, idx: usize) -> BufferHandle {
165 match idx {
166 0 => {
167 let fill_value_buffer =
168 ScalarValue::to_proto_bytes::<ByteBufferMut>(array.fill_value.value()).freeze();
169 BufferHandle::new_host(fill_value_buffer)
170 }
171 _ => vortex_panic!("SparseArray buffer index {idx} out of bounds"),
172 }
173 }
174
175 fn buffer_name(_array: ArrayView<'_, Self>, idx: usize) -> Option<String> {
176 match idx {
177 0 => Some("fill_value".to_string()),
178 _ => vortex_panic!("SparseArray buffer_name index {idx} out of bounds"),
179 }
180 }
181
182 fn serialize(
183 array: ArrayView<'_, Self>,
184 _session: &VortexSession,
185 ) -> VortexResult<Option<Vec<u8>>> {
186 let patches = array.patches().to_metadata(array.len(), array.dtype())?;
187 let metadata = SparseMetadata { patches };
188
189 Ok(Some(metadata.encode_to_vec()))
191 }
192
193 fn deserialize(
194 &self,
195 dtype: &DType,
196 len: usize,
197 metadata: &[u8],
198 buffers: &[BufferHandle],
199 children: &dyn ArrayChildren,
200 session: &VortexSession,
201 ) -> VortexResult<ArrayParts<Self>> {
202 let metadata = SparseMetadata::decode(metadata)?;
203
204 if buffers.len() != 1 {
207 vortex_bail!("Expected 1 buffer, got {}", buffers.len());
208 }
209 let scalar_bytes: &[u8] = &buffers[0].clone().try_to_host_sync()?;
210
211 let scalar_value = ScalarValue::from_proto_bytes(scalar_bytes, dtype, session)?;
212 let fill_value = Scalar::try_new(dtype.clone(), scalar_value)?;
213
214 vortex_ensure_eq!(
215 children.len(),
216 2,
217 "SparseArray expects 2 children for sparse encoding, found {}",
218 children.len()
219 );
220
221 let patch_indices = children.get(
222 0,
223 &metadata.patches.indices_dtype()?,
224 metadata.patches.len()?,
225 )?;
226 let patch_values = children.get(1, dtype, metadata.patches.len()?)?;
227
228 let patches = Patches::new(
229 len,
230 metadata.patches.offset()?,
231 patch_indices,
232 patch_values,
233 None,
234 )?;
235 let slots = SparseData::make_slots(&patches);
236 let data = SparseData::from_patches(&patches, fill_value)?;
237 Ok(ArrayParts::new(self.clone(), dtype.clone(), len, data).with_slots(slots))
238 }
239
240 fn slot_name(_array: ArrayView<'_, Self>, idx: usize) -> String {
241 SparseSlots::NAMES[idx].to_string()
242 }
243
244 fn reduce_parent(
245 array: ArrayView<'_, Self>,
246 parent: &ArrayRef,
247 child_idx: usize,
248 ) -> VortexResult<Option<ArrayRef>> {
249 RULES.evaluate(array, parent, child_idx)
250 }
251
252 fn execute_parent(
253 array: ArrayView<'_, Self>,
254 parent: &ArrayRef,
255 child_idx: usize,
256 ctx: &mut ExecutionCtx,
257 ) -> VortexResult<Option<ArrayRef>> {
258 PARENT_KERNELS.execute(array, parent, child_idx, ctx)
259 }
260
261 fn execute(array: Array<Self>, ctx: &mut ExecutionCtx) -> VortexResult<ExecutionResult> {
262 let array = if array.patches().offset() != 0 {
266 let offset = array.patches().offset();
267 let indices = array.patch_indices();
268 let values = array.patch_values().clone();
269 let len = array.len();
270 let offset_scalar = Scalar::from(offset).cast(indices.dtype())?;
271 let resolved_indices = indices.binary(
272 ConstantArray::new(offset_scalar, indices.len()).into_array(),
273 Operator::Sub,
274 )?;
275 let patches = Patches::new(len, 0, resolved_indices.clone(), values, None)?;
276 match array.try_into_parts() {
278 Ok(mut parts) => {
279 parts.data.patches_data = PatchesData::from_patches(&patches);
280 parts.slots[SparseSlots::PATCH_INDICES] = Some(resolved_indices);
281 parts.slots[SparseSlots::PATCH_CHUNK_OFFSETS] = None;
282 unsafe { Array::from_parts_unchecked(parts) }
283 }
284 Err(array) => unsafe {
285 Sparse::new_unchecked(patches, array.fill_scalar().clone())
286 },
287 }
288 } else {
289 array
290 };
291
292 let array = require_child!(
295 array, array.patch_indices(), SparseSlots::PATCH_INDICES => Primitive
296 );
297 let array = require_child!(
298 array, array.patch_values(), SparseSlots::PATCH_VALUES => AnyCanonical
299 );
300 require_opt_child!(
301 array,
302 array.patch_chunk_offsets(),
303 SparseSlots::PATCH_CHUNK_OFFSETS => Primitive
304 );
305
306 let parts = array.into_parts()?;
307 execute_sparse(parts, ctx).map(ExecutionResult::done)
309 }
310}
311
312const PATCH_SLOTS: PatchSlotIndices = PatchSlotIndices {
313 indices: SparseSlots::PATCH_INDICES,
314 values: SparseSlots::PATCH_VALUES,
315 chunk_offsets: SparseSlots::PATCH_CHUNK_OFFSETS,
316};
317
318#[derive(Clone, Debug)]
319pub struct SparseData {
320 array_len: usize,
322 patches_data: PatchesData,
324 fill_value: Scalar,
325}
326
327impl Display for SparseData {
328 fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
329 write!(f, "fill_value: {}", self.fill_value)
330 }
331}
332
333#[derive(Clone, Debug)]
334pub struct Sparse;
335
336impl Sparse {
337 pub fn try_new(
339 indices: ArrayRef,
340 values: ArrayRef,
341 len: usize,
342 fill_value: Scalar,
343 ) -> VortexResult<SparseArray> {
344 let dtype = fill_value.dtype().clone();
345 let patches = Patches::new(len, 0, indices, values, None)?;
346 let slots = SparseData::make_slots(&patches);
347 let data = SparseData::from_patches(&patches, fill_value)?;
348 Ok(unsafe {
349 Array::from_parts_unchecked(ArrayParts::new(Sparse, dtype, len, data).with_slots(slots))
350 })
351 }
352
353 pub fn try_new_from_patches(patches: Patches, fill_value: Scalar) -> VortexResult<SparseArray> {
354 let dtype = fill_value.dtype().clone();
355 let len = patches.array_len();
356 let slots = SparseData::make_slots(&patches);
357 let data = SparseData::from_patches(&patches, fill_value)?;
358 Ok(unsafe {
359 Array::from_parts_unchecked(ArrayParts::new(Sparse, dtype, len, data).with_slots(slots))
360 })
361 }
362
363 pub(crate) unsafe fn new_unchecked(patches: Patches, fill_value: Scalar) -> SparseArray {
364 let dtype = fill_value.dtype().clone();
365 let len = patches.array_len();
366 let slots = SparseData::make_slots(&patches);
367 let data = SparseData::from_patches_unchecked(&patches, fill_value);
368 unsafe {
369 Array::from_parts_unchecked(ArrayParts::new(Sparse, dtype, len, data).with_slots(slots))
370 }
371 }
372
373 pub fn encode(
375 array: &ArrayRef,
376 fill_value: Option<Scalar>,
377 ctx: &mut ExecutionCtx,
378 ) -> VortexResult<ArrayRef> {
379 SparseData::encode(array, fill_value, ctx)
380 }
381}
382
383impl SparseData {
384 fn normalize_patches_dtype(patches: Patches, fill_value: &Scalar) -> VortexResult<Patches> {
385 let fill_dtype = fill_value.dtype();
386 let values_dtype = patches.values().dtype();
387
388 vortex_ensure!(
389 values_dtype.eq_ignore_nullability(fill_dtype),
390 "fill value, {:?}, should be instance of values dtype, {} but was {}.",
391 fill_value,
392 values_dtype,
393 fill_dtype,
394 );
395
396 if values_dtype == fill_dtype {
397 Ok(patches)
398 } else {
399 patches.cast_values(fill_dtype)
400 }
401 }
402
403 pub fn validate(
404 patches: &Patches,
405 fill_value: &Scalar,
406 dtype: &DType,
407 len: usize,
408 ) -> VortexResult<()> {
409 vortex_ensure!(
410 fill_value.dtype() == dtype,
411 "fill value dtype {} does not match array dtype {}",
412 fill_value.dtype(),
413 dtype,
414 );
415 vortex_ensure!(
416 patches.array_len() == len,
417 "patches length {} does not match array length {}",
418 patches.array_len(),
419 len
420 );
421 vortex_ensure!(
422 patches.values().dtype() == dtype,
423 "patch values dtype {} does not match array dtype {}",
424 patches.values().dtype(),
425 dtype,
426 );
427 Ok(())
428 }
429
430 fn make_slots(patches: &Patches) -> ArraySlots {
431 let mut slots = ArraySlots::with_capacity(SparseSlots::COUNT);
432 PatchesData::push_slots(&mut slots, Some(patches));
433 slots
434 }
435
436 fn patches_from_slots(data: &SparseData, len: usize, slots: &[Option<ArrayRef>]) -> Patches {
438 PatchesData::patches_from_slots(Some(&data.patches_data), len, slots, PATCH_SLOTS)
439 .vortex_expect("SparseArray patch slots must be present")
440 }
441
442 pub fn try_new_from_patches(patches: Patches, fill_value: Scalar) -> VortexResult<Self> {
444 let patches = Self::normalize_patches_dtype(patches, &fill_value)?;
445 Ok(Self::from_patches_unchecked(&patches, fill_value))
446 }
447
448 fn from_patches(patches: &Patches, fill_value: Scalar) -> VortexResult<Self> {
450 let patches = Self::normalize_patches_dtype(patches.clone(), &fill_value)?;
451 Ok(Self::from_patches_unchecked(&patches, fill_value))
452 }
453
454 fn from_patches_unchecked(patches: &Patches, fill_value: Scalar) -> Self {
456 Self {
457 array_len: patches.array_len(),
458 patches_data: PatchesData::from_patches(patches),
459 fill_value,
460 }
461 }
462
463 #[inline]
465 pub fn len(&self) -> usize {
466 self.array_len
467 }
468
469 #[inline]
471 pub fn is_empty(&self) -> bool {
472 self.array_len == 0
473 }
474
475 #[inline]
477 pub fn dtype(&self) -> &DType {
478 self.fill_scalar().dtype()
479 }
480
481 #[inline]
483 pub fn offset(&self) -> usize {
484 self.patches_data.offset()
485 }
486
487 #[inline]
488 pub fn fill_scalar(&self) -> &Scalar {
489 &self.fill_value
490 }
491
492 pub fn encode(
496 array: &ArrayRef,
497 fill_value: Option<Scalar>,
498 ctx: &mut ExecutionCtx,
499 ) -> VortexResult<ArrayRef> {
500 if let Some(fill_value) = fill_value.as_ref()
501 && !array.dtype().eq_ignore_nullability(fill_value.dtype())
502 {
503 vortex_bail!(
504 "Array and fill value types must have the same base type. got {} and {}",
505 array.dtype(),
506 fill_value.dtype()
507 )
508 }
509 let mask = array.validity()?.execute_mask(array.len(), ctx)?;
510
511 if mask.all_false() {
512 return Ok(
514 ConstantArray::new(Scalar::null(array.dtype().clone()), array.len()).into_array(),
515 );
516 } else if mask.false_count() as f64 > (0.9 * mask.len() as f64) {
517 let non_null_values = array
519 .filter(mask.clone())?
520 .execute::<Canonical>(ctx)?
521 .into_array();
522 let non_null_indices = match mask.indices() {
523 AllOr::All => {
524 unreachable!("Mask is mostly null")
526 }
527 AllOr::None => {
528 unreachable!("Mask is mostly null but not all null")
530 }
531 AllOr::Some(values) => {
532 let buffer: Buffer<u32> = values
533 .iter()
534 .map(|&v| v.try_into().vortex_expect("indices must fit in u32"))
535 .collect();
536
537 buffer.into_array()
538 }
539 };
540
541 return Sparse::try_new(
542 non_null_indices,
543 non_null_values,
544 array.len(),
545 Scalar::null(array.dtype().clone()),
546 )
547 .map(IntoArray::into_array);
548 }
549
550 let fill = if let Some(fill) = fill_value {
551 fill.cast(array.dtype())?
552 } else {
553 let primitive = array.clone().execute::<PrimitiveArray>(ctx)?;
555 let (top_pvalue, _) = primitive
556 .top_value()?
557 .vortex_expect("Non empty or all null array");
558
559 Scalar::primitive_value(top_pvalue, top_pvalue.ptype(), array.dtype().nullability())
560 };
561
562 let fill_array = ConstantArray::new(fill.clone(), array.len()).into_array();
563 let non_top_bool = array
564 .binary(fill_array.clone(), Operator::NotEq)?
565 .fill_null(Scalar::bool(true, Nullability::NonNullable))?
566 .execute::<BoolArray>(ctx)?;
567 let non_top_mask = Mask::from_buffer(non_top_bool.to_bit_buffer());
568
569 let non_top_values = array
570 .filter(non_top_mask.clone())?
571 .execute::<Canonical>(ctx)?
572 .into_array();
573
574 let indices: Buffer<u64> = match non_top_mask {
575 Mask::AllTrue(count) => {
576 (0u64..count as u64).collect()
578 }
579 Mask::AllFalse(_) => {
580 return Ok(fill_array);
582 }
583 Mask::Values(values) => values.indices().iter().map(|v| *v as u64).collect(),
584 };
585
586 Sparse::try_new(indices.into_array(), non_top_values, array.len(), fill)
587 .map(IntoArray::into_array)
588 }
589}
590
591pub trait SparseExt {
595 fn patches(&self) -> Patches;
597
598 fn resolved_patches(&self) -> VortexResult<Patches> {
600 let patches = self.patches();
601 let indices_offset = Scalar::from(patches.offset()).cast(patches.indices().dtype())?;
602 let indices = patches.indices().binary(
603 ConstantArray::new(indices_offset, patches.indices().len()).into_array(),
604 Operator::Sub,
605 )?;
606
607 Patches::new(
608 patches.array_len(),
609 0,
610 indices,
611 patches.values().clone(),
612 None,
614 )
615 }
616}
617
618impl SparseExt for ArrayView<'_, Sparse> {
619 fn patches(&self) -> Patches {
620 SparseData::patches_from_slots(self.data(), self.len(), self.slots())
621 }
622}
623
624impl SparseExt for Array<Sparse> {
625 fn patches(&self) -> Patches {
626 SparseData::patches_from_slots(self.data(), self.as_array().len(), self.slots())
627 }
628}
629
630impl ValidityVTable<Sparse> for Sparse {
631 fn validity(array: ArrayView<'_, Sparse>) -> VortexResult<Validity> {
632 let orig_patches = array.patches();
633 let validity_patches = unsafe {
634 Patches::new_unchecked(
635 orig_patches.array_len(),
636 orig_patches.offset(),
637 orig_patches.indices().clone(),
638 orig_patches
639 .values()
640 .validity()?
641 .to_array(orig_patches.values().len()),
642 orig_patches.chunk_offsets().clone(),
643 orig_patches.offset_within_chunk(),
644 )
645 };
646
647 Ok(Validity::Array(
648 unsafe { Sparse::new_unchecked(validity_patches, array.fill_value.is_valid().into()) }
649 .into_array(),
650 ))
651 }
652}
653
654#[cfg(test)]
655mod test {
656 use itertools::Itertools;
657 use vortex_array::IntoArray;
658 use vortex_array::LEGACY_SESSION;
659 use vortex_array::VortexSessionExecute;
660 use vortex_array::arrays::ConstantArray;
661 use vortex_array::arrays::PrimitiveArray;
662 use vortex_array::assert_arrays_eq;
663 use vortex_array::builtins::ArrayBuiltins;
664 use vortex_array::dtype::DType;
665 use vortex_array::dtype::Nullability;
666 use vortex_array::dtype::PType;
667 use vortex_array::scalar::Scalar;
668 use vortex_array::validity::Validity;
669 use vortex_buffer::buffer;
670 use vortex_error::VortexExpect;
671
672 use super::*;
673 use crate::Sparse;
674
675 fn nullable_fill() -> Scalar {
676 Scalar::null(DType::Primitive(PType::I32, Nullability::Nullable))
677 }
678
679 fn non_nullable_fill() -> Scalar {
680 Scalar::from(42i32)
681 }
682
683 fn sparse_array(fill_value: Scalar) -> ArrayRef {
684 let mut values = buffer![100i32, 200, 300].into_array();
686 values = values.cast(fill_value.dtype().clone()).unwrap();
687
688 Sparse::try_new(buffer![2u64, 5, 8].into_array(), values, 10, fill_value)
689 .unwrap()
690 .into_array()
691 }
692
693 #[test]
694 pub fn test_scalar_at() {
695 let array = sparse_array(nullable_fill());
696
697 assert_eq!(
698 array
699 .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
700 .unwrap(),
701 nullable_fill()
702 );
703 assert_eq!(
704 array
705 .execute_scalar(2, &mut LEGACY_SESSION.create_execution_ctx())
706 .unwrap(),
707 Scalar::from(Some(100_i32))
708 );
709 assert_eq!(
710 array
711 .execute_scalar(5, &mut LEGACY_SESSION.create_execution_ctx())
712 .unwrap(),
713 Scalar::from(Some(200_i32))
714 );
715 }
716
717 #[test]
718 #[should_panic(expected = "out of bounds")]
719 fn test_scalar_at_oob() {
720 let array = sparse_array(nullable_fill());
721 array
722 .execute_scalar(10, &mut LEGACY_SESSION.create_execution_ctx())
723 .unwrap();
724 }
725
726 #[test]
727 pub fn test_scalar_at_again() {
728 let arr = Sparse::try_new(
729 ConstantArray::new(10u32, 1).into_array(),
730 ConstantArray::new(Scalar::primitive(1234u32, Nullability::Nullable), 1).into_array(),
731 100,
732 Scalar::null(DType::Primitive(PType::U32, Nullability::Nullable)),
733 )
734 .unwrap();
735
736 assert_eq!(
737 arr.execute_scalar(10, &mut LEGACY_SESSION.create_execution_ctx())
738 .unwrap()
739 .as_primitive()
740 .typed_value::<u32>(),
741 Some(1234)
742 );
743 assert!(
744 arr.execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
745 .unwrap()
746 .is_null()
747 );
748 assert!(
749 arr.execute_scalar(99, &mut LEGACY_SESSION.create_execution_ctx())
750 .unwrap()
751 .is_null()
752 );
753 }
754
755 #[test]
756 pub fn scalar_at_sliced() {
757 let sliced = sparse_array(nullable_fill()).slice(2..7).unwrap();
758 assert_eq!(
759 usize::try_from(
760 &sliced
761 .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
762 .unwrap()
763 )
764 .unwrap(),
765 100
766 );
767 }
768
769 #[test]
770 pub fn validity_mask_sliced_null_fill() {
771 let sliced = sparse_array(nullable_fill()).slice(2..7).unwrap();
772 assert_eq!(
773 sliced
774 .validity()
775 .unwrap()
776 .execute_mask(sliced.len(), &mut LEGACY_SESSION.create_execution_ctx())
777 .unwrap(),
778 Mask::from_iter(vec![true, false, false, true, false])
779 );
780 }
781
782 #[test]
783 pub fn validity_mask_sliced_nonnull_fill() {
784 let sliced = Sparse::try_new(
785 buffer![2u64, 5, 8].into_array(),
786 ConstantArray::new(
787 Scalar::null(DType::Primitive(PType::F32, Nullability::Nullable)),
788 3,
789 )
790 .into_array(),
791 10,
792 Scalar::primitive(1.0f32, Nullability::Nullable),
793 )
794 .unwrap()
795 .slice(2..7)
796 .unwrap();
797
798 assert_eq!(
799 sliced
800 .validity()
801 .unwrap()
802 .execute_mask(sliced.len(), &mut LEGACY_SESSION.create_execution_ctx())
803 .unwrap(),
804 Mask::from_iter(vec![false, true, true, false, true])
805 );
806 }
807
808 #[test]
809 pub fn scalar_at_sliced_twice() {
810 let sliced_once = sparse_array(nullable_fill()).slice(1..8).unwrap();
811 assert_eq!(
812 usize::try_from(
813 &sliced_once
814 .execute_scalar(1, &mut LEGACY_SESSION.create_execution_ctx())
815 .unwrap()
816 )
817 .unwrap(),
818 100
819 );
820
821 let sliced_twice = sliced_once.slice(1..6).unwrap();
822 assert_eq!(
823 usize::try_from(
824 &sliced_twice
825 .execute_scalar(3, &mut LEGACY_SESSION.create_execution_ctx())
826 .unwrap()
827 )
828 .unwrap(),
829 200
830 );
831 }
832
833 #[test]
834 pub fn sparse_validity_mask() {
835 let array = sparse_array(nullable_fill());
836 assert_eq!(
837 array
838 .validity()
839 .unwrap()
840 .execute_mask(array.len(), &mut LEGACY_SESSION.create_execution_ctx())
841 .unwrap()
842 .to_bit_buffer()
843 .iter()
844 .collect_vec(),
845 [
846 false, false, true, false, false, true, false, false, true, false
847 ]
848 );
849 }
850
851 #[test]
852 fn sparse_validity_mask_non_null_fill() {
853 let array = sparse_array(non_nullable_fill());
854 assert!(
855 array
856 .validity()
857 .unwrap()
858 .execute_mask(array.len(), &mut LEGACY_SESSION.create_execution_ctx())
859 .unwrap()
860 .all_true()
861 );
862 }
863
864 #[test]
865 #[should_panic]
866 fn test_invalid_length() {
867 let values = buffer![15_u32, 135, 13531, 42].into_array();
868 let indices = buffer![10_u64, 11, 50, 100].into_array();
869
870 Sparse::try_new(indices, values, 100, 0_u32.into()).unwrap();
871 }
872
873 #[test]
874 fn test_valid_length() {
875 let values = buffer![15_u32, 135, 13531, 42].into_array();
876 let indices = buffer![10_u64, 11, 50, 100].into_array();
877
878 Sparse::try_new(indices, values, 101, 0_u32.into()).unwrap();
879 }
880
881 #[test]
882 fn encode_with_nulls() {
883 let mut ctx = LEGACY_SESSION.create_execution_ctx();
884 let original = PrimitiveArray::new(
885 buffer![0i32, 1, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4],
886 Validity::from_iter(vec![
887 true, true, false, true, false, true, false, true, true, false, true, false,
888 ]),
889 );
890 let sparse = Sparse::encode(&original.clone().into_array(), None, &mut ctx)
891 .vortex_expect("Sparse::encode should succeed for test data");
892 assert_eq!(
893 sparse
894 .validity()
895 .unwrap()
896 .execute_mask(sparse.len(), &mut ctx)
897 .unwrap(),
898 Mask::from_iter(vec![
899 true, true, false, true, false, true, false, true, true, false, true, false,
900 ])
901 );
902 let sparse_primitive = sparse.execute::<PrimitiveArray>(&mut ctx).unwrap();
903 assert_arrays_eq!(sparse_primitive, original);
904 }
905
906 #[test]
907 fn validity_mask_includes_null_values_when_fill_is_null() {
908 let indices = buffer![0u8, 2, 4, 6, 8].into_array();
909 let values = PrimitiveArray::from_option_iter([Some(0i16), Some(1), None, None, Some(4)])
910 .into_array();
911 let array = Sparse::try_new(indices, values, 10, Scalar::null_native::<i16>()).unwrap();
912 let actual = array
913 .validity()
914 .unwrap()
915 .execute_mask(array.len(), &mut LEGACY_SESSION.create_execution_ctx())
916 .unwrap();
917 let expected = Mask::from_iter([
918 true, false, true, false, false, false, false, false, true, false,
919 ]);
920
921 assert_eq!(actual, expected);
922 }
923}