Skip to main content

vortex_sparse/
lib.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Debug;
5use std::fmt::Display;
6use std::fmt::Formatter;
7use std::hash::Hash;
8use std::hash::Hasher;
9
10use kernel::PARENT_KERNELS;
11use prost::Message as _;
12use vortex_array::Array;
13use vortex_array::ArrayEq;
14use vortex_array::ArrayHash;
15use vortex_array::ArrayId;
16use vortex_array::ArrayParts;
17use vortex_array::ArrayRef;
18use vortex_array::ArrayView;
19use vortex_array::Canonical;
20use vortex_array::ExecutionCtx;
21use vortex_array::ExecutionResult;
22use vortex_array::IntoArray;
23use vortex_array::Precision;
24use vortex_array::arrays::BoolArray;
25use vortex_array::arrays::ConstantArray;
26use vortex_array::arrays::PrimitiveArray;
27use vortex_array::arrays::bool::BoolArrayExt;
28use vortex_array::buffer::BufferHandle;
29use vortex_array::builtins::ArrayBuiltins;
30use vortex_array::dtype::DType;
31use vortex_array::dtype::Nullability;
32use vortex_array::patches::Patches;
33use vortex_array::patches::PatchesMetadata;
34use vortex_array::scalar::Scalar;
35use vortex_array::scalar::ScalarValue;
36use vortex_array::scalar_fn::fns::operators::Operator;
37use vortex_array::serde::ArrayChildren;
38use vortex_array::validity::Validity;
39use vortex_array::vtable::VTable;
40use vortex_array::vtable::ValidityVTable;
41use vortex_buffer::Buffer;
42use vortex_buffer::ByteBufferMut;
43use vortex_error::VortexExpect as _;
44use vortex_error::VortexResult;
45use vortex_error::vortex_bail;
46use vortex_error::vortex_ensure;
47use vortex_error::vortex_ensure_eq;
48use vortex_error::vortex_panic;
49use vortex_mask::AllOr;
50use vortex_mask::Mask;
51use vortex_session::VortexSession;
52use vortex_session::registry::CachedId;
53
54use crate::canonical::execute_sparse;
55use crate::rules::RULES;
56
57mod canonical;
58mod compute;
59mod kernel;
60mod ops;
61mod rules;
62mod slice;
63
64/// A [`Sparse`]-encoded Vortex array.
65pub type SparseArray = Array<Sparse>;
66
67#[derive(Clone, prost::Message)]
68#[repr(C)]
69pub struct SparseMetadata {
70    #[prost(message, required, tag = "1")]
71    patches: PatchesMetadata,
72}
73
74impl ArrayHash for SparseData {
75    fn array_hash<H: Hasher>(&self, state: &mut H, precision: Precision) {
76        self.patches.array_hash(state, precision);
77        self.fill_value.hash(state);
78    }
79}
80
81impl ArrayEq for SparseData {
82    fn array_eq(&self, other: &Self, precision: Precision) -> bool {
83        self.patches.array_eq(&other.patches, precision) && self.fill_value == other.fill_value
84    }
85}
86
87impl VTable for Sparse {
88    type ArrayData = SparseData;
89
90    type OperationsVTable = Self;
91    type ValidityVTable = Self;
92
93    fn id(&self) -> ArrayId {
94        static ID: CachedId = CachedId::new("vortex.sparse");
95        *ID
96    }
97
98    fn validate(
99        &self,
100        data: &Self::ArrayData,
101        dtype: &DType,
102        len: usize,
103        _slots: &[Option<ArrayRef>],
104    ) -> VortexResult<()> {
105        SparseData::validate(data.patches(), data.fill_scalar(), dtype, len)
106    }
107
108    fn nbuffers(_array: ArrayView<'_, Self>) -> usize {
109        1
110    }
111
112    fn buffer(array: ArrayView<'_, Self>, idx: usize) -> BufferHandle {
113        match idx {
114            0 => {
115                let fill_value_buffer =
116                    ScalarValue::to_proto_bytes::<ByteBufferMut>(array.fill_value.value()).freeze();
117                BufferHandle::new_host(fill_value_buffer)
118            }
119            _ => vortex_panic!("SparseArray buffer index {idx} out of bounds"),
120        }
121    }
122
123    fn buffer_name(_array: ArrayView<'_, Self>, idx: usize) -> Option<String> {
124        match idx {
125            0 => Some("fill_value".to_string()),
126            _ => vortex_panic!("SparseArray buffer_name index {idx} out of bounds"),
127        }
128    }
129
130    fn serialize(
131        array: ArrayView<'_, Self>,
132        _session: &VortexSession,
133    ) -> VortexResult<Option<Vec<u8>>> {
134        let patches = array.patches().to_metadata(array.len(), array.dtype())?;
135        let metadata = SparseMetadata { patches };
136
137        // Note that we DO NOT serialize the fill value since that is stored in the buffers.
138        Ok(Some(metadata.encode_to_vec()))
139    }
140
141    fn deserialize(
142        &self,
143        dtype: &DType,
144        len: usize,
145        metadata: &[u8],
146        buffers: &[BufferHandle],
147        children: &dyn ArrayChildren,
148        session: &VortexSession,
149    ) -> VortexResult<ArrayParts<Self>> {
150        let metadata = SparseMetadata::decode(metadata)?;
151
152        // Once we have the patches metadata, we need to get the fill value from the buffers.
153
154        if buffers.len() != 1 {
155            vortex_bail!("Expected 1 buffer, got {}", buffers.len());
156        }
157        let scalar_bytes: &[u8] = &buffers[0].clone().try_to_host_sync()?;
158
159        let scalar_value = ScalarValue::from_proto_bytes(scalar_bytes, dtype, session)?;
160        let fill_value = Scalar::try_new(dtype.clone(), scalar_value)?;
161
162        vortex_ensure_eq!(
163            children.len(),
164            2,
165            "SparseArray expects 2 children for sparse encoding, found {}",
166            children.len()
167        );
168
169        let patch_indices = children.get(
170            0,
171            &metadata.patches.indices_dtype()?,
172            metadata.patches.len()?,
173        )?;
174        let patch_values = children.get(1, dtype, metadata.patches.len()?)?;
175
176        let patches = Patches::new(
177            len,
178            metadata.patches.offset()?,
179            patch_indices,
180            patch_values,
181            None,
182        )?;
183        let slots = SparseData::make_slots(&patches);
184        let data = SparseData::try_new_from_patches(patches, fill_value)?;
185        Ok(ArrayParts::new(self.clone(), dtype.clone(), len, data).with_slots(slots))
186    }
187
188    fn slot_name(_array: ArrayView<'_, Self>, idx: usize) -> String {
189        SLOT_NAMES[idx].to_string()
190    }
191
192    fn reduce_parent(
193        array: ArrayView<'_, Self>,
194        parent: &ArrayRef,
195        child_idx: usize,
196    ) -> VortexResult<Option<ArrayRef>> {
197        RULES.evaluate(array, parent, child_idx)
198    }
199
200    fn execute_parent(
201        array: ArrayView<'_, Self>,
202        parent: &ArrayRef,
203        child_idx: usize,
204        ctx: &mut ExecutionCtx,
205    ) -> VortexResult<Option<ArrayRef>> {
206        PARENT_KERNELS.execute(array, parent, child_idx, ctx)
207    }
208
209    fn execute(array: Array<Self>, ctx: &mut ExecutionCtx) -> VortexResult<ExecutionResult> {
210        execute_sparse(&array, ctx).map(ExecutionResult::done)
211    }
212}
213
214pub(crate) const NUM_SLOTS: usize = 3;
215pub(crate) const SLOT_NAMES: [&str; NUM_SLOTS] =
216    ["patch_indices", "patch_values", "patch_chunk_offsets"];
217
218#[derive(Clone, Debug)]
219pub struct SparseData {
220    patches: Patches,
221    fill_value: Scalar,
222}
223
224impl Display for SparseData {
225    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
226        write!(f, "fill_value: {}", self.fill_value)
227    }
228}
229
230#[derive(Clone, Debug)]
231pub struct Sparse;
232
233impl Sparse {
234    /// Construct a new [`SparseArray`] from indices, values, length, and fill value.
235    pub fn try_new(
236        indices: ArrayRef,
237        values: ArrayRef,
238        len: usize,
239        fill_value: Scalar,
240    ) -> VortexResult<SparseArray> {
241        let dtype = fill_value.dtype().clone();
242        let patches = Patches::new(len, 0, indices, values, None)?;
243        let slots = SparseData::make_slots(&patches);
244        let data = SparseData::try_new_from_patches(patches, fill_value)?;
245        Ok(unsafe {
246            Array::from_parts_unchecked(ArrayParts::new(Sparse, dtype, len, data).with_slots(slots))
247        })
248    }
249
250    pub fn try_new_from_patches(patches: Patches, fill_value: Scalar) -> VortexResult<SparseArray> {
251        let dtype = fill_value.dtype().clone();
252        let len = patches.array_len();
253        let slots = SparseData::make_slots(&patches);
254        let data = SparseData::try_new_from_patches(patches, fill_value)?;
255        Ok(unsafe {
256            Array::from_parts_unchecked(ArrayParts::new(Sparse, dtype, len, data).with_slots(slots))
257        })
258    }
259
260    pub(crate) unsafe fn new_unchecked(patches: Patches, fill_value: Scalar) -> SparseArray {
261        let dtype = fill_value.dtype().clone();
262        let len = patches.array_len();
263        let slots = SparseData::make_slots(&patches);
264        let data = unsafe { SparseData::new_unchecked(patches, fill_value) };
265        unsafe {
266            Array::from_parts_unchecked(ArrayParts::new(Sparse, dtype, len, data).with_slots(slots))
267        }
268    }
269
270    /// Encode the given array as a [`SparseArray`].
271    pub fn encode(
272        array: &ArrayRef,
273        fill_value: Option<Scalar>,
274        ctx: &mut ExecutionCtx,
275    ) -> VortexResult<ArrayRef> {
276        SparseData::encode(array, fill_value, ctx)
277    }
278}
279
280impl SparseData {
281    fn normalize_patches_dtype(patches: Patches, fill_value: &Scalar) -> VortexResult<Patches> {
282        let fill_dtype = fill_value.dtype();
283        let values_dtype = patches.values().dtype();
284
285        vortex_ensure!(
286            values_dtype.eq_ignore_nullability(fill_dtype),
287            "fill value, {:?}, should be instance of values dtype, {} but was {}.",
288            fill_value,
289            values_dtype,
290            fill_dtype,
291        );
292
293        if values_dtype == fill_dtype {
294            Ok(patches)
295        } else {
296            patches.cast_values(fill_dtype)
297        }
298    }
299
300    pub fn validate(
301        patches: &Patches,
302        fill_value: &Scalar,
303        dtype: &DType,
304        len: usize,
305    ) -> VortexResult<()> {
306        vortex_ensure!(
307            fill_value.dtype() == dtype,
308            "fill value dtype {} does not match array dtype {}",
309            fill_value.dtype(),
310            dtype,
311        );
312        vortex_ensure!(
313            patches.array_len() == len,
314            "patches length {} does not match array length {}",
315            patches.array_len(),
316            len
317        );
318        vortex_ensure!(
319            patches.values().dtype() == dtype,
320            "patch values dtype {} does not match array dtype {}",
321            patches.values().dtype(),
322            dtype,
323        );
324        Ok(())
325    }
326
327    fn make_slots(patches: &Patches) -> Vec<Option<ArrayRef>> {
328        vec![
329            Some(patches.indices().clone()),
330            Some(patches.values().clone()),
331            patches.chunk_offsets().clone(),
332        ]
333    }
334
335    /// Build a new SparseArray from an existing set of patches.
336    pub fn try_new_from_patches(patches: Patches, fill_value: Scalar) -> VortexResult<Self> {
337        let patches = Self::normalize_patches_dtype(patches, &fill_value)?;
338        Ok(Self {
339            patches,
340            fill_value,
341        })
342    }
343
344    pub(crate) unsafe fn new_unchecked(patches: Patches, fill_value: Scalar) -> Self {
345        Self {
346            patches,
347            fill_value,
348        }
349    }
350
351    /// Returns the length of the array.
352    #[inline]
353    pub fn len(&self) -> usize {
354        self.patches.array_len()
355    }
356
357    /// Returns whether the array is empty.
358    #[inline]
359    pub fn is_empty(&self) -> bool {
360        self.patches.array_len() == 0
361    }
362
363    /// Returns the logical data type of the array.
364    #[inline]
365    pub fn dtype(&self) -> &DType {
366        self.fill_scalar().dtype()
367    }
368
369    #[inline]
370    pub fn patches(&self) -> &Patches {
371        &self.patches
372    }
373
374    #[inline]
375    pub fn resolved_patches(&self) -> VortexResult<Patches> {
376        let patches = self.patches();
377        let indices_offset = Scalar::from(patches.offset()).cast(patches.indices().dtype())?;
378        let indices = patches.indices().binary(
379            ConstantArray::new(indices_offset, patches.indices().len()).into_array(),
380            Operator::Sub,
381        )?;
382
383        Patches::new(
384            patches.array_len(),
385            0,
386            indices,
387            patches.values().clone(),
388            // TODO(0ax1): handle chunk offsets
389            None,
390        )
391    }
392
393    #[inline]
394    pub fn fill_scalar(&self) -> &Scalar {
395        &self.fill_value
396    }
397
398    /// Encode given array as a SparseArray.
399    ///
400    /// Optionally provided fill value will be respected if the array is less than 90% null.
401    pub fn encode(
402        array: &ArrayRef,
403        fill_value: Option<Scalar>,
404        ctx: &mut ExecutionCtx,
405    ) -> VortexResult<ArrayRef> {
406        if let Some(fill_value) = fill_value.as_ref()
407            && !array.dtype().eq_ignore_nullability(fill_value.dtype())
408        {
409            vortex_bail!(
410                "Array and fill value types must have the same base type. got {} and {}",
411                array.dtype(),
412                fill_value.dtype()
413            )
414        }
415        let mask = array.validity()?.execute_mask(array.len(), ctx)?;
416
417        if mask.all_false() {
418            // Array is constant NULL
419            return Ok(
420                ConstantArray::new(Scalar::null(array.dtype().clone()), array.len()).into_array(),
421            );
422        } else if mask.false_count() as f64 > (0.9 * mask.len() as f64) {
423            // Array is dominated by NULL but has non-NULL values
424            let non_null_values = array
425                .filter(mask.clone())?
426                .execute::<Canonical>(ctx)?
427                .into_array();
428            let non_null_indices = match mask.indices() {
429                AllOr::All => {
430                    // We already know that the mask is 90%+ false
431                    unreachable!("Mask is mostly null")
432                }
433                AllOr::None => {
434                    // we know there are some non-NULL values
435                    unreachable!("Mask is mostly null but not all null")
436                }
437                AllOr::Some(values) => {
438                    let buffer: Buffer<u32> = values
439                        .iter()
440                        .map(|&v| v.try_into().vortex_expect("indices must fit in u32"))
441                        .collect();
442
443                    buffer.into_array()
444                }
445            };
446
447            return Sparse::try_new(
448                non_null_indices,
449                non_null_values,
450                array.len(),
451                Scalar::null(array.dtype().clone()),
452            )
453            .map(IntoArray::into_array);
454        }
455
456        let fill = if let Some(fill) = fill_value {
457            fill.cast(array.dtype())?
458        } else {
459            // TODO(robert): Support other dtypes, only thing missing is getting most common value out of the array
460            let primitive = array.clone().execute::<PrimitiveArray>(ctx)?;
461            let (top_pvalue, _) = primitive
462                .top_value()?
463                .vortex_expect("Non empty or all null array");
464
465            Scalar::primitive_value(top_pvalue, top_pvalue.ptype(), array.dtype().nullability())
466        };
467
468        let fill_array = ConstantArray::new(fill.clone(), array.len()).into_array();
469        let non_top_bool = array
470            .binary(fill_array.clone(), Operator::NotEq)?
471            .fill_null(Scalar::bool(true, Nullability::NonNullable))?
472            .execute::<BoolArray>(ctx)?;
473        let non_top_mask = Mask::from_buffer(non_top_bool.to_bit_buffer());
474
475        let non_top_values = array
476            .filter(non_top_mask.clone())?
477            .execute::<Canonical>(ctx)?
478            .into_array();
479
480        let indices: Buffer<u64> = match non_top_mask {
481            Mask::AllTrue(count) => {
482                // all true -> complete slice
483                (0u64..count as u64).collect()
484            }
485            Mask::AllFalse(_) => {
486                // All values are equal to the top value
487                return Ok(fill_array);
488            }
489            Mask::Values(values) => values.indices().iter().map(|v| *v as u64).collect(),
490        };
491
492        Sparse::try_new(indices.into_array(), non_top_values, array.len(), fill)
493            .map(IntoArray::into_array)
494    }
495}
496
497impl ValidityVTable<Sparse> for Sparse {
498    fn validity(array: ArrayView<'_, Sparse>) -> VortexResult<Validity> {
499        let patches = unsafe {
500            Patches::new_unchecked(
501                array.patches.array_len(),
502                array.patches.offset(),
503                array.patches.indices().clone(),
504                array
505                    .patches
506                    .values()
507                    .validity()?
508                    .to_array(array.patches.values().len()),
509                array.patches.chunk_offsets().clone(),
510                array.patches.offset_within_chunk(),
511            )
512        };
513
514        Ok(Validity::Array(
515            unsafe { Sparse::new_unchecked(patches, array.fill_value.is_valid().into()) }
516                .into_array(),
517        ))
518    }
519}
520
521#[cfg(test)]
522mod test {
523    use itertools::Itertools;
524    use vortex_array::IntoArray;
525    use vortex_array::LEGACY_SESSION;
526    use vortex_array::VortexSessionExecute;
527    use vortex_array::arrays::ConstantArray;
528    use vortex_array::arrays::PrimitiveArray;
529    use vortex_array::assert_arrays_eq;
530    use vortex_array::builtins::ArrayBuiltins;
531    use vortex_array::dtype::DType;
532    use vortex_array::dtype::Nullability;
533    use vortex_array::dtype::PType;
534    use vortex_array::scalar::Scalar;
535    use vortex_array::validity::Validity;
536    use vortex_buffer::buffer;
537    use vortex_error::VortexExpect;
538
539    use super::*;
540    use crate::Sparse;
541
542    fn nullable_fill() -> Scalar {
543        Scalar::null(DType::Primitive(PType::I32, Nullability::Nullable))
544    }
545
546    fn non_nullable_fill() -> Scalar {
547        Scalar::from(42i32)
548    }
549
550    fn sparse_array(fill_value: Scalar) -> ArrayRef {
551        // merged array: [null, null, 100, null, null, 200, null, null, 300, null]
552        let mut values = buffer![100i32, 200, 300].into_array();
553        values = values.cast(fill_value.dtype().clone()).unwrap();
554
555        Sparse::try_new(buffer![2u64, 5, 8].into_array(), values, 10, fill_value)
556            .unwrap()
557            .into_array()
558    }
559
560    #[test]
561    pub fn test_scalar_at() {
562        let array = sparse_array(nullable_fill());
563
564        assert_eq!(
565            array
566                .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
567                .unwrap(),
568            nullable_fill()
569        );
570        assert_eq!(
571            array
572                .execute_scalar(2, &mut LEGACY_SESSION.create_execution_ctx())
573                .unwrap(),
574            Scalar::from(Some(100_i32))
575        );
576        assert_eq!(
577            array
578                .execute_scalar(5, &mut LEGACY_SESSION.create_execution_ctx())
579                .unwrap(),
580            Scalar::from(Some(200_i32))
581        );
582    }
583
584    #[test]
585    #[should_panic(expected = "out of bounds")]
586    fn test_scalar_at_oob() {
587        let array = sparse_array(nullable_fill());
588        array
589            .execute_scalar(10, &mut LEGACY_SESSION.create_execution_ctx())
590            .unwrap();
591    }
592
593    #[test]
594    pub fn test_scalar_at_again() {
595        let arr = Sparse::try_new(
596            ConstantArray::new(10u32, 1).into_array(),
597            ConstantArray::new(Scalar::primitive(1234u32, Nullability::Nullable), 1).into_array(),
598            100,
599            Scalar::null(DType::Primitive(PType::U32, Nullability::Nullable)),
600        )
601        .unwrap();
602
603        assert_eq!(
604            arr.execute_scalar(10, &mut LEGACY_SESSION.create_execution_ctx())
605                .unwrap()
606                .as_primitive()
607                .typed_value::<u32>(),
608            Some(1234)
609        );
610        assert!(
611            arr.execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
612                .unwrap()
613                .is_null()
614        );
615        assert!(
616            arr.execute_scalar(99, &mut LEGACY_SESSION.create_execution_ctx())
617                .unwrap()
618                .is_null()
619        );
620    }
621
622    #[test]
623    pub fn scalar_at_sliced() {
624        let sliced = sparse_array(nullable_fill()).slice(2..7).unwrap();
625        assert_eq!(
626            usize::try_from(
627                &sliced
628                    .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
629                    .unwrap()
630            )
631            .unwrap(),
632            100
633        );
634    }
635
636    #[test]
637    pub fn validity_mask_sliced_null_fill() {
638        let sliced = sparse_array(nullable_fill()).slice(2..7).unwrap();
639        assert_eq!(
640            sliced
641                .validity()
642                .unwrap()
643                .execute_mask(sliced.len(), &mut LEGACY_SESSION.create_execution_ctx())
644                .unwrap(),
645            Mask::from_iter(vec![true, false, false, true, false])
646        );
647    }
648
649    #[test]
650    pub fn validity_mask_sliced_nonnull_fill() {
651        let sliced = Sparse::try_new(
652            buffer![2u64, 5, 8].into_array(),
653            ConstantArray::new(
654                Scalar::null(DType::Primitive(PType::F32, Nullability::Nullable)),
655                3,
656            )
657            .into_array(),
658            10,
659            Scalar::primitive(1.0f32, Nullability::Nullable),
660        )
661        .unwrap()
662        .slice(2..7)
663        .unwrap();
664
665        assert_eq!(
666            sliced
667                .validity()
668                .unwrap()
669                .execute_mask(sliced.len(), &mut LEGACY_SESSION.create_execution_ctx())
670                .unwrap(),
671            Mask::from_iter(vec![false, true, true, false, true])
672        );
673    }
674
675    #[test]
676    pub fn scalar_at_sliced_twice() {
677        let sliced_once = sparse_array(nullable_fill()).slice(1..8).unwrap();
678        assert_eq!(
679            usize::try_from(
680                &sliced_once
681                    .execute_scalar(1, &mut LEGACY_SESSION.create_execution_ctx())
682                    .unwrap()
683            )
684            .unwrap(),
685            100
686        );
687
688        let sliced_twice = sliced_once.slice(1..6).unwrap();
689        assert_eq!(
690            usize::try_from(
691                &sliced_twice
692                    .execute_scalar(3, &mut LEGACY_SESSION.create_execution_ctx())
693                    .unwrap()
694            )
695            .unwrap(),
696            200
697        );
698    }
699
700    #[test]
701    pub fn sparse_validity_mask() {
702        let array = sparse_array(nullable_fill());
703        assert_eq!(
704            array
705                .validity()
706                .unwrap()
707                .execute_mask(array.len(), &mut LEGACY_SESSION.create_execution_ctx())
708                .unwrap()
709                .to_bit_buffer()
710                .iter()
711                .collect_vec(),
712            [
713                false, false, true, false, false, true, false, false, true, false
714            ]
715        );
716    }
717
718    #[test]
719    fn sparse_validity_mask_non_null_fill() {
720        let array = sparse_array(non_nullable_fill());
721        assert!(
722            array
723                .validity()
724                .unwrap()
725                .execute_mask(array.len(), &mut LEGACY_SESSION.create_execution_ctx())
726                .unwrap()
727                .all_true()
728        );
729    }
730
731    #[test]
732    #[should_panic]
733    fn test_invalid_length() {
734        let values = buffer![15_u32, 135, 13531, 42].into_array();
735        let indices = buffer![10_u64, 11, 50, 100].into_array();
736
737        Sparse::try_new(indices, values, 100, 0_u32.into()).unwrap();
738    }
739
740    #[test]
741    fn test_valid_length() {
742        let values = buffer![15_u32, 135, 13531, 42].into_array();
743        let indices = buffer![10_u64, 11, 50, 100].into_array();
744
745        Sparse::try_new(indices, values, 101, 0_u32.into()).unwrap();
746    }
747
748    #[test]
749    fn encode_with_nulls() {
750        let mut ctx = LEGACY_SESSION.create_execution_ctx();
751        let original = PrimitiveArray::new(
752            buffer![0i32, 1, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4],
753            Validity::from_iter(vec![
754                true, true, false, true, false, true, false, true, true, false, true, false,
755            ]),
756        );
757        let sparse = Sparse::encode(&original.clone().into_array(), None, &mut ctx)
758            .vortex_expect("Sparse::encode should succeed for test data");
759        assert_eq!(
760            sparse
761                .validity()
762                .unwrap()
763                .execute_mask(sparse.len(), &mut ctx)
764                .unwrap(),
765            Mask::from_iter(vec![
766                true, true, false, true, false, true, false, true, true, false, true, false,
767            ])
768        );
769        let sparse_primitive = sparse.execute::<PrimitiveArray>(&mut ctx).unwrap();
770        assert_arrays_eq!(sparse_primitive, original);
771    }
772
773    #[test]
774    fn validity_mask_includes_null_values_when_fill_is_null() {
775        let indices = buffer![0u8, 2, 4, 6, 8].into_array();
776        let values = PrimitiveArray::from_option_iter([Some(0i16), Some(1), None, None, Some(4)])
777            .into_array();
778        let array = Sparse::try_new(indices, values, 10, Scalar::null_native::<i16>()).unwrap();
779        let actual = array
780            .validity()
781            .unwrap()
782            .execute_mask(array.len(), &mut LEGACY_SESSION.create_execution_ctx())
783            .unwrap();
784        let expected = Mask::from_iter([
785            true, false, true, false, false, false, false, false, true, false,
786        ]);
787
788        assert_eq!(actual, expected);
789    }
790}