Skip to main content

vortex_sparse/
lib.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Debug;
5use std::fmt::Display;
6use std::fmt::Formatter;
7use std::hash::Hash;
8use std::hash::Hasher;
9
10use kernel::PARENT_KERNELS;
11use prost::Message as _;
12use vortex_array::Array;
13use vortex_array::ArrayEq;
14use vortex_array::ArrayHash;
15use vortex_array::ArrayId;
16use vortex_array::ArrayParts;
17use vortex_array::ArrayRef;
18use vortex_array::ArrayView;
19use vortex_array::Canonical;
20use vortex_array::ExecutionCtx;
21use vortex_array::ExecutionResult;
22use vortex_array::IntoArray;
23use vortex_array::LEGACY_SESSION;
24use vortex_array::Precision;
25use vortex_array::ToCanonical;
26use vortex_array::VortexSessionExecute;
27use vortex_array::arrays::ConstantArray;
28use vortex_array::arrays::bool::BoolArrayExt;
29use vortex_array::buffer::BufferHandle;
30use vortex_array::builtins::ArrayBuiltins;
31use vortex_array::dtype::DType;
32use vortex_array::dtype::Nullability;
33use vortex_array::patches::Patches;
34use vortex_array::patches::PatchesMetadata;
35use vortex_array::scalar::Scalar;
36use vortex_array::scalar::ScalarValue;
37use vortex_array::scalar_fn::fns::operators::Operator;
38use vortex_array::serde::ArrayChildren;
39use vortex_array::validity::Validity;
40use vortex_array::vtable::VTable;
41use vortex_array::vtable::ValidityVTable;
42use vortex_buffer::Buffer;
43use vortex_buffer::ByteBufferMut;
44use vortex_error::VortexExpect as _;
45use vortex_error::VortexResult;
46use vortex_error::vortex_bail;
47use vortex_error::vortex_ensure;
48use vortex_error::vortex_ensure_eq;
49use vortex_error::vortex_panic;
50use vortex_mask::AllOr;
51use vortex_mask::Mask;
52use vortex_session::VortexSession;
53use vortex_session::registry::CachedId;
54
55use crate::canonical::execute_sparse;
56use crate::rules::RULES;
57
58mod canonical;
59mod compute;
60mod kernel;
61mod ops;
62mod rules;
63mod slice;
64
65/// A [`Sparse`]-encoded Vortex array.
66pub type SparseArray = Array<Sparse>;
67
68#[derive(Clone, prost::Message)]
69#[repr(C)]
70pub struct SparseMetadata {
71    #[prost(message, required, tag = "1")]
72    patches: PatchesMetadata,
73}
74
75impl ArrayHash for SparseData {
76    fn array_hash<H: Hasher>(&self, state: &mut H, precision: Precision) {
77        self.patches.array_hash(state, precision);
78        self.fill_value.hash(state);
79    }
80}
81
82impl ArrayEq for SparseData {
83    fn array_eq(&self, other: &Self, precision: Precision) -> bool {
84        self.patches.array_eq(&other.patches, precision) && self.fill_value == other.fill_value
85    }
86}
87
88impl VTable for Sparse {
89    type ArrayData = SparseData;
90
91    type OperationsVTable = Self;
92    type ValidityVTable = Self;
93
94    fn id(&self) -> ArrayId {
95        static ID: CachedId = CachedId::new("vortex.sparse");
96        *ID
97    }
98
99    fn validate(
100        &self,
101        data: &Self::ArrayData,
102        dtype: &DType,
103        len: usize,
104        _slots: &[Option<ArrayRef>],
105    ) -> VortexResult<()> {
106        SparseData::validate(data.patches(), data.fill_scalar(), dtype, len)
107    }
108
109    fn nbuffers(_array: ArrayView<'_, Self>) -> usize {
110        1
111    }
112
113    fn buffer(array: ArrayView<'_, Self>, idx: usize) -> BufferHandle {
114        match idx {
115            0 => {
116                let fill_value_buffer =
117                    ScalarValue::to_proto_bytes::<ByteBufferMut>(array.fill_value.value()).freeze();
118                BufferHandle::new_host(fill_value_buffer)
119            }
120            _ => vortex_panic!("SparseArray buffer index {idx} out of bounds"),
121        }
122    }
123
124    fn buffer_name(_array: ArrayView<'_, Self>, idx: usize) -> Option<String> {
125        match idx {
126            0 => Some("fill_value".to_string()),
127            _ => vortex_panic!("SparseArray buffer_name index {idx} out of bounds"),
128        }
129    }
130
131    fn serialize(
132        array: ArrayView<'_, Self>,
133        _session: &VortexSession,
134    ) -> VortexResult<Option<Vec<u8>>> {
135        let patches = array.patches().to_metadata(array.len(), array.dtype())?;
136        let metadata = SparseMetadata { patches };
137
138        // Note that we DO NOT serialize the fill value since that is stored in the buffers.
139        Ok(Some(metadata.encode_to_vec()))
140    }
141
142    fn deserialize(
143        &self,
144        dtype: &DType,
145        len: usize,
146        metadata: &[u8],
147        buffers: &[BufferHandle],
148        children: &dyn ArrayChildren,
149        session: &VortexSession,
150    ) -> VortexResult<ArrayParts<Self>> {
151        let metadata = SparseMetadata::decode(metadata)?;
152
153        // Once we have the patches metadata, we need to get the fill value from the buffers.
154
155        if buffers.len() != 1 {
156            vortex_bail!("Expected 1 buffer, got {}", buffers.len());
157        }
158        let scalar_bytes: &[u8] = &buffers[0].clone().try_to_host_sync()?;
159
160        let scalar_value = ScalarValue::from_proto_bytes(scalar_bytes, dtype, session)?;
161        let fill_value = Scalar::try_new(dtype.clone(), scalar_value)?;
162
163        vortex_ensure_eq!(
164            children.len(),
165            2,
166            "SparseArray expects 2 children for sparse encoding, found {}",
167            children.len()
168        );
169
170        let patch_indices = children.get(
171            0,
172            &metadata.patches.indices_dtype()?,
173            metadata.patches.len()?,
174        )?;
175        let patch_values = children.get(1, dtype, metadata.patches.len()?)?;
176
177        let patches = Patches::new(
178            len,
179            metadata.patches.offset()?,
180            patch_indices,
181            patch_values,
182            None,
183        )?;
184        let slots = SparseData::make_slots(&patches);
185        let data = SparseData::try_new_from_patches(patches, fill_value)?;
186        Ok(ArrayParts::new(self.clone(), dtype.clone(), len, data).with_slots(slots))
187    }
188
189    fn slot_name(_array: ArrayView<'_, Self>, idx: usize) -> String {
190        SLOT_NAMES[idx].to_string()
191    }
192
193    fn reduce_parent(
194        array: ArrayView<'_, Self>,
195        parent: &ArrayRef,
196        child_idx: usize,
197    ) -> VortexResult<Option<ArrayRef>> {
198        RULES.evaluate(array, parent, child_idx)
199    }
200
201    fn execute_parent(
202        array: ArrayView<'_, Self>,
203        parent: &ArrayRef,
204        child_idx: usize,
205        ctx: &mut ExecutionCtx,
206    ) -> VortexResult<Option<ArrayRef>> {
207        PARENT_KERNELS.execute(array, parent, child_idx, ctx)
208    }
209
210    fn execute(array: Array<Self>, ctx: &mut ExecutionCtx) -> VortexResult<ExecutionResult> {
211        execute_sparse(&array, ctx).map(ExecutionResult::done)
212    }
213}
214
215pub(crate) const NUM_SLOTS: usize = 3;
216pub(crate) const SLOT_NAMES: [&str; NUM_SLOTS] =
217    ["patch_indices", "patch_values", "patch_chunk_offsets"];
218
219#[derive(Clone, Debug)]
220pub struct SparseData {
221    patches: Patches,
222    fill_value: Scalar,
223}
224
225impl Display for SparseData {
226    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
227        write!(f, "fill_value: {}", self.fill_value)
228    }
229}
230
231#[derive(Clone, Debug)]
232pub struct Sparse;
233
234impl Sparse {
235    /// Construct a new [`SparseArray`] from indices, values, length, and fill value.
236    pub fn try_new(
237        indices: ArrayRef,
238        values: ArrayRef,
239        len: usize,
240        fill_value: Scalar,
241    ) -> VortexResult<SparseArray> {
242        let dtype = fill_value.dtype().clone();
243        let patches = Patches::new(len, 0, indices, values, None)?;
244        let slots = SparseData::make_slots(&patches);
245        let data = SparseData::try_new_from_patches(patches, fill_value)?;
246        Ok(unsafe {
247            Array::from_parts_unchecked(ArrayParts::new(Sparse, dtype, len, data).with_slots(slots))
248        })
249    }
250
251    pub fn try_new_from_patches(patches: Patches, fill_value: Scalar) -> VortexResult<SparseArray> {
252        let dtype = fill_value.dtype().clone();
253        let len = patches.array_len();
254        let slots = SparseData::make_slots(&patches);
255        let data = SparseData::try_new_from_patches(patches, fill_value)?;
256        Ok(unsafe {
257            Array::from_parts_unchecked(ArrayParts::new(Sparse, dtype, len, data).with_slots(slots))
258        })
259    }
260
261    pub(crate) unsafe fn new_unchecked(patches: Patches, fill_value: Scalar) -> SparseArray {
262        let dtype = fill_value.dtype().clone();
263        let len = patches.array_len();
264        let slots = SparseData::make_slots(&patches);
265        let data = unsafe { SparseData::new_unchecked(patches, fill_value) };
266        unsafe {
267            Array::from_parts_unchecked(ArrayParts::new(Sparse, dtype, len, data).with_slots(slots))
268        }
269    }
270
271    /// Encode the given array as a [`SparseArray`].
272    pub fn encode(array: &ArrayRef, fill_value: Option<Scalar>) -> VortexResult<ArrayRef> {
273        SparseData::encode(array, fill_value)
274    }
275}
276
277impl SparseData {
278    fn normalize_patches_dtype(patches: Patches, fill_value: &Scalar) -> VortexResult<Patches> {
279        let fill_dtype = fill_value.dtype();
280        let values_dtype = patches.values().dtype();
281
282        vortex_ensure!(
283            values_dtype.eq_ignore_nullability(fill_dtype),
284            "fill value, {:?}, should be instance of values dtype, {} but was {}.",
285            fill_value,
286            values_dtype,
287            fill_dtype,
288        );
289
290        if values_dtype == fill_dtype {
291            Ok(patches)
292        } else {
293            patches.cast_values(fill_dtype)
294        }
295    }
296
297    pub fn validate(
298        patches: &Patches,
299        fill_value: &Scalar,
300        dtype: &DType,
301        len: usize,
302    ) -> VortexResult<()> {
303        vortex_ensure!(
304            fill_value.dtype() == dtype,
305            "fill value dtype {} does not match array dtype {}",
306            fill_value.dtype(),
307            dtype,
308        );
309        vortex_ensure!(
310            patches.array_len() == len,
311            "patches length {} does not match array length {}",
312            patches.array_len(),
313            len
314        );
315        vortex_ensure!(
316            patches.values().dtype() == dtype,
317            "patch values dtype {} does not match array dtype {}",
318            patches.values().dtype(),
319            dtype,
320        );
321        Ok(())
322    }
323
324    fn make_slots(patches: &Patches) -> Vec<Option<ArrayRef>> {
325        vec![
326            Some(patches.indices().clone()),
327            Some(patches.values().clone()),
328            patches.chunk_offsets().clone(),
329        ]
330    }
331
332    /// Build a new SparseArray from an existing set of patches.
333    pub fn try_new_from_patches(patches: Patches, fill_value: Scalar) -> VortexResult<Self> {
334        let patches = Self::normalize_patches_dtype(patches, &fill_value)?;
335        Ok(Self {
336            patches,
337            fill_value,
338        })
339    }
340
341    pub(crate) unsafe fn new_unchecked(patches: Patches, fill_value: Scalar) -> Self {
342        Self {
343            patches,
344            fill_value,
345        }
346    }
347
348    /// Returns the length of the array.
349    #[inline]
350    pub fn len(&self) -> usize {
351        self.patches.array_len()
352    }
353
354    /// Returns whether the array is empty.
355    #[inline]
356    pub fn is_empty(&self) -> bool {
357        self.patches.array_len() == 0
358    }
359
360    /// Returns the logical data type of the array.
361    #[inline]
362    pub fn dtype(&self) -> &DType {
363        self.fill_scalar().dtype()
364    }
365
366    #[inline]
367    pub fn patches(&self) -> &Patches {
368        &self.patches
369    }
370
371    #[inline]
372    pub fn resolved_patches(&self) -> VortexResult<Patches> {
373        let patches = self.patches();
374        let indices_offset = Scalar::from(patches.offset()).cast(patches.indices().dtype())?;
375        let indices = patches.indices().binary(
376            ConstantArray::new(indices_offset, patches.indices().len()).into_array(),
377            Operator::Sub,
378        )?;
379
380        Patches::new(
381            patches.array_len(),
382            0,
383            indices,
384            patches.values().clone(),
385            // TODO(0ax1): handle chunk offsets
386            None,
387        )
388    }
389
390    #[inline]
391    pub fn fill_scalar(&self) -> &Scalar {
392        &self.fill_value
393    }
394
395    /// Encode given array as a SparseArray.
396    ///
397    /// Optionally provided fill value will be respected if the array is less than 90% null.
398    pub fn encode(array: &ArrayRef, fill_value: Option<Scalar>) -> VortexResult<ArrayRef> {
399        if let Some(fill_value) = fill_value.as_ref()
400            && !array.dtype().eq_ignore_nullability(fill_value.dtype())
401        {
402            vortex_bail!(
403                "Array and fill value types must have the same base type. got {} and {}",
404                array.dtype(),
405                fill_value.dtype()
406            )
407        }
408        let mask = array
409            .validity()?
410            .to_mask(array.len(), &mut LEGACY_SESSION.create_execution_ctx())?;
411
412        if mask.all_false() {
413            // Array is constant NULL
414            return Ok(
415                ConstantArray::new(Scalar::null(array.dtype().clone()), array.len()).into_array(),
416            );
417        } else if mask.false_count() as f64 > (0.9 * mask.len() as f64) {
418            // Array is dominated by NULL but has non-NULL values
419            // TODO(joe): use exe ctx?
420            let non_null_values = array
421                .filter(mask.clone())?
422                .execute::<Canonical>(&mut LEGACY_SESSION.create_execution_ctx())?
423                .into_array();
424            let non_null_indices = match mask.indices() {
425                AllOr::All => {
426                    // We already know that the mask is 90%+ false
427                    unreachable!("Mask is mostly null")
428                }
429                AllOr::None => {
430                    // we know there are some non-NULL values
431                    unreachable!("Mask is mostly null but not all null")
432                }
433                AllOr::Some(values) => {
434                    let buffer: Buffer<u32> = values
435                        .iter()
436                        .map(|&v| v.try_into().vortex_expect("indices must fit in u32"))
437                        .collect();
438
439                    buffer.into_array()
440                }
441            };
442
443            return Sparse::try_new(
444                non_null_indices,
445                non_null_values,
446                array.len(),
447                Scalar::null(array.dtype().clone()),
448            )
449            .map(IntoArray::into_array);
450        }
451
452        let fill = if let Some(fill) = fill_value {
453            fill.cast(array.dtype())?
454        } else {
455            // TODO(robert): Support other dtypes, only thing missing is getting most common value out of the array
456            let (top_pvalue, _) = array
457                .to_primitive()
458                .top_value()?
459                .vortex_expect("Non empty or all null array");
460
461            Scalar::primitive_value(top_pvalue, top_pvalue.ptype(), array.dtype().nullability())
462        };
463
464        let fill_array = ConstantArray::new(fill.clone(), array.len()).into_array();
465        let non_top_mask = Mask::from_buffer(
466            array
467                .binary(fill_array.clone(), Operator::NotEq)?
468                .fill_null(Scalar::bool(true, Nullability::NonNullable))?
469                .to_bool()
470                .to_bit_buffer(),
471        );
472
473        let non_top_values = array
474            .filter(non_top_mask.clone())?
475            .execute::<Canonical>(&mut LEGACY_SESSION.create_execution_ctx())?
476            .into_array();
477
478        let indices: Buffer<u64> = match non_top_mask {
479            Mask::AllTrue(count) => {
480                // all true -> complete slice
481                (0u64..count as u64).collect()
482            }
483            Mask::AllFalse(_) => {
484                // All values are equal to the top value
485                return Ok(fill_array);
486            }
487            Mask::Values(values) => values.indices().iter().map(|v| *v as u64).collect(),
488        };
489
490        Sparse::try_new(indices.into_array(), non_top_values, array.len(), fill)
491            .map(IntoArray::into_array)
492    }
493}
494
495impl ValidityVTable<Sparse> for Sparse {
496    fn validity(array: ArrayView<'_, Sparse>) -> VortexResult<Validity> {
497        let patches = unsafe {
498            Patches::new_unchecked(
499                array.patches.array_len(),
500                array.patches.offset(),
501                array.patches.indices().clone(),
502                array
503                    .patches
504                    .values()
505                    .validity()?
506                    .to_array(array.patches.values().len()),
507                array.patches.chunk_offsets().clone(),
508                array.patches.offset_within_chunk(),
509            )
510        };
511
512        Ok(Validity::Array(
513            unsafe { Sparse::new_unchecked(patches, array.fill_value.is_valid().into()) }
514                .into_array(),
515        ))
516    }
517}
518
519#[cfg(test)]
520mod test {
521    use itertools::Itertools;
522    use vortex_array::IntoArray;
523    use vortex_array::LEGACY_SESSION;
524    use vortex_array::VortexSessionExecute;
525    use vortex_array::arrays::ConstantArray;
526    use vortex_array::arrays::PrimitiveArray;
527    use vortex_array::assert_arrays_eq;
528    use vortex_array::builtins::ArrayBuiltins;
529    use vortex_array::dtype::DType;
530    use vortex_array::dtype::Nullability;
531    use vortex_array::dtype::PType;
532    use vortex_array::scalar::Scalar;
533    use vortex_array::validity::Validity;
534    use vortex_buffer::buffer;
535    use vortex_error::VortexExpect;
536
537    use super::*;
538    use crate::Sparse;
539
540    fn nullable_fill() -> Scalar {
541        Scalar::null(DType::Primitive(PType::I32, Nullability::Nullable))
542    }
543
544    fn non_nullable_fill() -> Scalar {
545        Scalar::from(42i32)
546    }
547
548    fn sparse_array(fill_value: Scalar) -> ArrayRef {
549        // merged array: [null, null, 100, null, null, 200, null, null, 300, null]
550        let mut values = buffer![100i32, 200, 300].into_array();
551        values = values.cast(fill_value.dtype().clone()).unwrap();
552
553        Sparse::try_new(buffer![2u64, 5, 8].into_array(), values, 10, fill_value)
554            .unwrap()
555            .into_array()
556    }
557
558    #[test]
559    pub fn test_scalar_at() {
560        let array = sparse_array(nullable_fill());
561
562        assert_eq!(
563            array
564                .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
565                .unwrap(),
566            nullable_fill()
567        );
568        assert_eq!(
569            array
570                .execute_scalar(2, &mut LEGACY_SESSION.create_execution_ctx())
571                .unwrap(),
572            Scalar::from(Some(100_i32))
573        );
574        assert_eq!(
575            array
576                .execute_scalar(5, &mut LEGACY_SESSION.create_execution_ctx())
577                .unwrap(),
578            Scalar::from(Some(200_i32))
579        );
580    }
581
582    #[test]
583    #[should_panic(expected = "out of bounds")]
584    fn test_scalar_at_oob() {
585        let array = sparse_array(nullable_fill());
586        array
587            .execute_scalar(10, &mut LEGACY_SESSION.create_execution_ctx())
588            .unwrap();
589    }
590
591    #[test]
592    pub fn test_scalar_at_again() {
593        let arr = Sparse::try_new(
594            ConstantArray::new(10u32, 1).into_array(),
595            ConstantArray::new(Scalar::primitive(1234u32, Nullability::Nullable), 1).into_array(),
596            100,
597            Scalar::null(DType::Primitive(PType::U32, Nullability::Nullable)),
598        )
599        .unwrap();
600
601        assert_eq!(
602            arr.execute_scalar(10, &mut LEGACY_SESSION.create_execution_ctx())
603                .unwrap()
604                .as_primitive()
605                .typed_value::<u32>(),
606            Some(1234)
607        );
608        assert!(
609            arr.execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
610                .unwrap()
611                .is_null()
612        );
613        assert!(
614            arr.execute_scalar(99, &mut LEGACY_SESSION.create_execution_ctx())
615                .unwrap()
616                .is_null()
617        );
618    }
619
620    #[test]
621    pub fn scalar_at_sliced() {
622        let sliced = sparse_array(nullable_fill()).slice(2..7).unwrap();
623        assert_eq!(
624            usize::try_from(
625                &sliced
626                    .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
627                    .unwrap()
628            )
629            .unwrap(),
630            100
631        );
632    }
633
634    #[test]
635    pub fn validity_mask_sliced_null_fill() {
636        let sliced = sparse_array(nullable_fill()).slice(2..7).unwrap();
637        assert_eq!(
638            sliced
639                .validity()
640                .unwrap()
641                .to_mask(sliced.len(), &mut LEGACY_SESSION.create_execution_ctx())
642                .unwrap(),
643            Mask::from_iter(vec![true, false, false, true, false])
644        );
645    }
646
647    #[test]
648    pub fn validity_mask_sliced_nonnull_fill() {
649        let sliced = Sparse::try_new(
650            buffer![2u64, 5, 8].into_array(),
651            ConstantArray::new(
652                Scalar::null(DType::Primitive(PType::F32, Nullability::Nullable)),
653                3,
654            )
655            .into_array(),
656            10,
657            Scalar::primitive(1.0f32, Nullability::Nullable),
658        )
659        .unwrap()
660        .slice(2..7)
661        .unwrap();
662
663        assert_eq!(
664            sliced
665                .validity()
666                .unwrap()
667                .to_mask(sliced.len(), &mut LEGACY_SESSION.create_execution_ctx())
668                .unwrap(),
669            Mask::from_iter(vec![false, true, true, false, true])
670        );
671    }
672
673    #[test]
674    pub fn scalar_at_sliced_twice() {
675        let sliced_once = sparse_array(nullable_fill()).slice(1..8).unwrap();
676        assert_eq!(
677            usize::try_from(
678                &sliced_once
679                    .execute_scalar(1, &mut LEGACY_SESSION.create_execution_ctx())
680                    .unwrap()
681            )
682            .unwrap(),
683            100
684        );
685
686        let sliced_twice = sliced_once.slice(1..6).unwrap();
687        assert_eq!(
688            usize::try_from(
689                &sliced_twice
690                    .execute_scalar(3, &mut LEGACY_SESSION.create_execution_ctx())
691                    .unwrap()
692            )
693            .unwrap(),
694            200
695        );
696    }
697
698    #[test]
699    pub fn sparse_validity_mask() {
700        let array = sparse_array(nullable_fill());
701        assert_eq!(
702            array
703                .validity()
704                .unwrap()
705                .to_mask(array.len(), &mut LEGACY_SESSION.create_execution_ctx())
706                .unwrap()
707                .to_bit_buffer()
708                .iter()
709                .collect_vec(),
710            [
711                false, false, true, false, false, true, false, false, true, false
712            ]
713        );
714    }
715
716    #[test]
717    fn sparse_validity_mask_non_null_fill() {
718        let array = sparse_array(non_nullable_fill());
719        assert!(
720            array
721                .validity()
722                .unwrap()
723                .to_mask(array.len(), &mut LEGACY_SESSION.create_execution_ctx())
724                .unwrap()
725                .all_true()
726        );
727    }
728
729    #[test]
730    #[should_panic]
731    fn test_invalid_length() {
732        let values = buffer![15_u32, 135, 13531, 42].into_array();
733        let indices = buffer![10_u64, 11, 50, 100].into_array();
734
735        Sparse::try_new(indices, values, 100, 0_u32.into()).unwrap();
736    }
737
738    #[test]
739    fn test_valid_length() {
740        let values = buffer![15_u32, 135, 13531, 42].into_array();
741        let indices = buffer![10_u64, 11, 50, 100].into_array();
742
743        Sparse::try_new(indices, values, 101, 0_u32.into()).unwrap();
744    }
745
746    #[test]
747    fn encode_with_nulls() {
748        let original = PrimitiveArray::new(
749            buffer![0i32, 1, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4],
750            Validity::from_iter(vec![
751                true, true, false, true, false, true, false, true, true, false, true, false,
752            ]),
753        );
754        let sparse = Sparse::encode(&original.clone().into_array(), None)
755            .vortex_expect("Sparse::encode should succeed for test data");
756        assert_eq!(
757            sparse
758                .validity()
759                .unwrap()
760                .to_mask(sparse.len(), &mut LEGACY_SESSION.create_execution_ctx())
761                .unwrap(),
762            Mask::from_iter(vec![
763                true, true, false, true, false, true, false, true, true, false, true, false,
764            ])
765        );
766        assert_arrays_eq!(sparse.to_primitive(), original);
767    }
768
769    #[test]
770    fn validity_mask_includes_null_values_when_fill_is_null() {
771        let indices = buffer![0u8, 2, 4, 6, 8].into_array();
772        let values = PrimitiveArray::from_option_iter([Some(0i16), Some(1), None, None, Some(4)])
773            .into_array();
774        let array = Sparse::try_new(indices, values, 10, Scalar::null_native::<i16>()).unwrap();
775        let actual = array
776            .validity()
777            .unwrap()
778            .to_mask(array.len(), &mut LEGACY_SESSION.create_execution_ctx())
779            .unwrap();
780        let expected = Mask::from_iter([
781            true, false, true, false, false, false, false, false, true, false,
782        ]);
783
784        assert_eq!(actual, expected);
785    }
786}