Skip to main content

vortex_sparse/
lib.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Debug;
5use std::fmt::Display;
6use std::fmt::Formatter;
7use std::hash::Hash;
8use std::hash::Hasher;
9
10use kernel::PARENT_KERNELS;
11use prost::Message as _;
12use vortex_array::AnyCanonical;
13use vortex_array::Array;
14use vortex_array::ArrayEq;
15use vortex_array::ArrayHash;
16use vortex_array::ArrayId;
17use vortex_array::ArrayParts;
18use vortex_array::ArrayRef;
19use vortex_array::ArraySlots;
20use vortex_array::ArrayView;
21use vortex_array::Canonical;
22use vortex_array::ExecutionCtx;
23use vortex_array::ExecutionResult;
24use vortex_array::IntoArray;
25use vortex_array::Precision;
26use vortex_array::arrays::BoolArray;
27use vortex_array::arrays::ConstantArray;
28use vortex_array::arrays::Primitive;
29use vortex_array::arrays::PrimitiveArray;
30use vortex_array::arrays::bool::BoolArrayExt;
31use vortex_array::buffer::BufferHandle;
32use vortex_array::builtins::ArrayBuiltins;
33use vortex_array::dtype::DType;
34use vortex_array::dtype::Nullability;
35use vortex_array::patches::PatchSlotIndices;
36use vortex_array::patches::Patches;
37use vortex_array::patches::PatchesData;
38use vortex_array::patches::PatchesMetadata;
39use vortex_array::require_child;
40use vortex_array::require_opt_child;
41use vortex_array::scalar::Scalar;
42use vortex_array::scalar::ScalarValue;
43use vortex_array::scalar_fn::fns::operators::Operator;
44use vortex_array::serde::ArrayChildren;
45use vortex_array::validity::Validity;
46use vortex_array::vtable::VTable;
47use vortex_array::vtable::ValidityVTable;
48use vortex_buffer::Buffer;
49use vortex_buffer::ByteBufferMut;
50use vortex_error::VortexExpect as _;
51use vortex_error::VortexResult;
52use vortex_error::vortex_bail;
53use vortex_error::vortex_ensure;
54use vortex_error::vortex_ensure_eq;
55use vortex_error::vortex_panic;
56use vortex_mask::AllOr;
57use vortex_mask::Mask;
58use vortex_session::VortexSession;
59use vortex_session::registry::CachedId;
60
61use crate::canonical::execute_sparse;
62use crate::rules::RULES;
63
64mod canonical;
65mod compute;
66mod kernel;
67mod ops;
68mod rules;
69mod slice;
70
71/// A [`Sparse`]-encoded Vortex array.
72pub type SparseArray = Array<Sparse>;
73
74#[vortex_array::array_slots(Sparse)]
75pub struct SparseSlots {
76    pub patch_indices: ArrayRef,
77    pub patch_values: ArrayRef,
78    pub patch_chunk_offsets: Option<ArrayRef>,
79}
80
81/// Concrete parts of a [`SparseArray`] after iterative execution.
82pub(crate) struct SparseParts {
83    pub patches: Patches,
84    pub fill_value: Scalar,
85    pub dtype: DType,
86    pub len: usize,
87}
88
89pub(crate) trait SparseOwnedExt {
90    fn into_parts(self) -> VortexResult<SparseParts>;
91}
92
93impl SparseOwnedExt for Array<Sparse> {
94    fn into_parts(self) -> VortexResult<SparseParts> {
95        let patches = Patches::new(
96            self.len(),
97            self.patches().offset(),
98            self.as_ref().slots()[SparseSlots::PATCH_INDICES]
99                .clone()
100                .vortex_expect("indices"),
101            self.as_ref().slots()[SparseSlots::PATCH_VALUES]
102                .clone()
103                .vortex_expect("values"),
104            self.as_ref().slots()[SparseSlots::PATCH_CHUNK_OFFSETS].clone(),
105        )?;
106        Ok(SparseParts {
107            patches,
108            fill_value: self.fill_scalar().clone(),
109            dtype: self.dtype().clone(),
110            len: self.len(),
111        })
112    }
113}
114
115#[derive(Clone, prost::Message)]
116#[repr(C)]
117pub struct SparseMetadata {
118    #[prost(message, required, tag = "1")]
119    patches: PatchesMetadata,
120}
121
122impl ArrayHash for SparseData {
123    fn array_hash<H: Hasher>(&self, state: &mut H, _precision: Precision) {
124        self.array_len.hash(state);
125        self.patches_data.hash(state);
126        self.fill_value.hash(state);
127    }
128}
129
130impl ArrayEq for SparseData {
131    fn array_eq(&self, other: &Self, _precision: Precision) -> bool {
132        self.array_len == other.array_len
133            && self.patches_data == other.patches_data
134            && self.fill_value == other.fill_value
135    }
136}
137
138impl VTable for Sparse {
139    type TypedArrayData = SparseData;
140
141    type OperationsVTable = Self;
142    type ValidityVTable = Self;
143
144    fn id(&self) -> ArrayId {
145        static ID: CachedId = CachedId::new("vortex.sparse");
146        *ID
147    }
148
149    fn validate(
150        &self,
151        data: &Self::TypedArrayData,
152        dtype: &DType,
153        len: usize,
154        slots: &[Option<ArrayRef>],
155    ) -> VortexResult<()> {
156        let patches = SparseData::patches_from_slots(data, len, slots);
157        SparseData::validate(&patches, data.fill_scalar(), dtype, len)
158    }
159
160    fn nbuffers(_array: ArrayView<'_, Self>) -> usize {
161        1
162    }
163
164    fn buffer(array: ArrayView<'_, Self>, idx: usize) -> BufferHandle {
165        match idx {
166            0 => {
167                let fill_value_buffer =
168                    ScalarValue::to_proto_bytes::<ByteBufferMut>(array.fill_value.value()).freeze();
169                BufferHandle::new_host(fill_value_buffer)
170            }
171            _ => vortex_panic!("SparseArray buffer index {idx} out of bounds"),
172        }
173    }
174
175    fn buffer_name(_array: ArrayView<'_, Self>, idx: usize) -> Option<String> {
176        match idx {
177            0 => Some("fill_value".to_string()),
178            _ => vortex_panic!("SparseArray buffer_name index {idx} out of bounds"),
179        }
180    }
181
182    fn serialize(
183        array: ArrayView<'_, Self>,
184        _session: &VortexSession,
185    ) -> VortexResult<Option<Vec<u8>>> {
186        let patches = array.patches().to_metadata(array.len(), array.dtype())?;
187        let metadata = SparseMetadata { patches };
188
189        // Note that we DO NOT serialize the fill value since that is stored in the buffers.
190        Ok(Some(metadata.encode_to_vec()))
191    }
192
193    fn deserialize(
194        &self,
195        dtype: &DType,
196        len: usize,
197        metadata: &[u8],
198        buffers: &[BufferHandle],
199        children: &dyn ArrayChildren,
200        session: &VortexSession,
201    ) -> VortexResult<ArrayParts<Self>> {
202        let metadata = SparseMetadata::decode(metadata)?;
203
204        // Once we have the patches metadata, we need to get the fill value from the buffers.
205
206        if buffers.len() != 1 {
207            vortex_bail!("Expected 1 buffer, got {}", buffers.len());
208        }
209        let scalar_bytes: &[u8] = &buffers[0].clone().try_to_host_sync()?;
210
211        let scalar_value = ScalarValue::from_proto_bytes(scalar_bytes, dtype, session)?;
212        let fill_value = Scalar::try_new(dtype.clone(), scalar_value)?;
213
214        vortex_ensure_eq!(
215            children.len(),
216            2,
217            "SparseArray expects 2 children for sparse encoding, found {}",
218            children.len()
219        );
220
221        let patch_indices = children.get(
222            0,
223            &metadata.patches.indices_dtype()?,
224            metadata.patches.len()?,
225        )?;
226        let patch_values = children.get(1, dtype, metadata.patches.len()?)?;
227
228        let patches = Patches::new(
229            len,
230            metadata.patches.offset()?,
231            patch_indices,
232            patch_values,
233            None,
234        )?;
235        let slots = SparseData::make_slots(&patches);
236        let data = SparseData::from_patches(&patches, fill_value)?;
237        Ok(ArrayParts::new(self.clone(), dtype.clone(), len, data).with_slots(slots))
238    }
239
240    fn slot_name(_array: ArrayView<'_, Self>, idx: usize) -> String {
241        SparseSlots::NAMES[idx].to_string()
242    }
243
244    fn reduce_parent(
245        array: ArrayView<'_, Self>,
246        parent: &ArrayRef,
247        child_idx: usize,
248    ) -> VortexResult<Option<ArrayRef>> {
249        RULES.evaluate(array, parent, child_idx)
250    }
251
252    fn execute_parent(
253        array: ArrayView<'_, Self>,
254        parent: &ArrayRef,
255        child_idx: usize,
256        ctx: &mut ExecutionCtx,
257    ) -> VortexResult<Option<ArrayRef>> {
258        PARENT_KERNELS.execute(array, parent, child_idx, ctx)
259    }
260
261    fn execute(array: Array<Self>, ctx: &mut ExecutionCtx) -> VortexResult<ExecutionResult> {
262        // Resolve offset first: wrap indices in Binary(indices, offset, Sub) and
263        // reassemble with offset=0. Uses slot children (not data) since the executor
264        // may have updated slots via reduce_parent/execute_parent.
265        let array = if array.patches().offset() != 0 {
266            let offset = array.patches().offset();
267            let indices = array.patch_indices();
268            let values = array.patch_values().clone();
269            let len = array.len();
270            let offset_scalar = Scalar::from(offset).cast(indices.dtype())?;
271            let resolved_indices = indices.binary(
272                ConstantArray::new(offset_scalar, indices.len()).into_array(),
273                Operator::Sub,
274            )?;
275            let patches = Patches::new(len, 0, resolved_indices.clone(), values, None)?;
276            // Decompose, update in place, and reassemble without re-validation.
277            match array.try_into_parts() {
278                Ok(mut parts) => {
279                    parts.data.patches_data = PatchesData::from_patches(&patches);
280                    parts.slots[SparseSlots::PATCH_INDICES] = Some(resolved_indices);
281                    parts.slots[SparseSlots::PATCH_CHUNK_OFFSETS] = None;
282                    unsafe { Array::from_parts_unchecked(parts) }
283                }
284                Err(array) => unsafe {
285                    Sparse::new_unchecked(patches, array.fill_scalar().clone())
286                },
287            }
288        } else {
289            array
290        };
291
292        // Require children to be executed through the scheduler,
293        // enabling cross-step optimization via reduce_parent rules.
294        let array = require_child!(
295            array, array.patch_indices(), SparseSlots::PATCH_INDICES => Primitive
296        );
297        let array = require_child!(
298            array, array.patch_values(), SparseSlots::PATCH_VALUES => AnyCanonical
299        );
300        require_opt_child!(
301            array,
302            array.patch_chunk_offsets(),
303            SparseSlots::PATCH_CHUNK_OFFSETS => Primitive
304        );
305
306        let parts = array.into_parts()?;
307        // TODO(joe): remove ctx from execute_sparse since all slots should be canonical.
308        execute_sparse(parts, ctx).map(ExecutionResult::done)
309    }
310}
311
312const PATCH_SLOTS: PatchSlotIndices = PatchSlotIndices {
313    indices: SparseSlots::PATCH_INDICES,
314    values: SparseSlots::PATCH_VALUES,
315    chunk_offsets: SparseSlots::PATCH_CHUNK_OFFSETS,
316};
317
318#[derive(Clone, Debug)]
319pub struct SparseData {
320    /// The total length of the sparse array.
321    array_len: usize,
322    /// Patch metadata (offset, offset_within_chunk) for reconstructing Patches from slots.
323    patches_data: PatchesData,
324    fill_value: Scalar,
325}
326
327impl Display for SparseData {
328    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
329        write!(f, "fill_value: {}", self.fill_value)
330    }
331}
332
333#[derive(Clone, Debug)]
334pub struct Sparse;
335
336impl Sparse {
337    /// Construct a new [`SparseArray`] from indices, values, length, and fill value.
338    pub fn try_new(
339        indices: ArrayRef,
340        values: ArrayRef,
341        len: usize,
342        fill_value: Scalar,
343    ) -> VortexResult<SparseArray> {
344        let dtype = fill_value.dtype().clone();
345        let patches = Patches::new(len, 0, indices, values, None)?;
346        let slots = SparseData::make_slots(&patches);
347        let data = SparseData::from_patches(&patches, fill_value)?;
348        Ok(unsafe {
349            Array::from_parts_unchecked(ArrayParts::new(Sparse, dtype, len, data).with_slots(slots))
350        })
351    }
352
353    pub fn try_new_from_patches(patches: Patches, fill_value: Scalar) -> VortexResult<SparseArray> {
354        let dtype = fill_value.dtype().clone();
355        let len = patches.array_len();
356        let slots = SparseData::make_slots(&patches);
357        let data = SparseData::from_patches(&patches, fill_value)?;
358        Ok(unsafe {
359            Array::from_parts_unchecked(ArrayParts::new(Sparse, dtype, len, data).with_slots(slots))
360        })
361    }
362
363    pub(crate) unsafe fn new_unchecked(patches: Patches, fill_value: Scalar) -> SparseArray {
364        let dtype = fill_value.dtype().clone();
365        let len = patches.array_len();
366        let slots = SparseData::make_slots(&patches);
367        let data = SparseData::from_patches_unchecked(&patches, fill_value);
368        unsafe {
369            Array::from_parts_unchecked(ArrayParts::new(Sparse, dtype, len, data).with_slots(slots))
370        }
371    }
372
373    /// Encode the given array as a [`SparseArray`].
374    pub fn encode(
375        array: &ArrayRef,
376        fill_value: Option<Scalar>,
377        ctx: &mut ExecutionCtx,
378    ) -> VortexResult<ArrayRef> {
379        SparseData::encode(array, fill_value, ctx)
380    }
381}
382
383impl SparseData {
384    fn normalize_patches_dtype(patches: Patches, fill_value: &Scalar) -> VortexResult<Patches> {
385        let fill_dtype = fill_value.dtype();
386        let values_dtype = patches.values().dtype();
387
388        vortex_ensure!(
389            values_dtype.eq_ignore_nullability(fill_dtype),
390            "fill value, {:?}, should be instance of values dtype, {} but was {}.",
391            fill_value,
392            values_dtype,
393            fill_dtype,
394        );
395
396        if values_dtype == fill_dtype {
397            Ok(patches)
398        } else {
399            patches.cast_values(fill_dtype)
400        }
401    }
402
403    pub fn validate(
404        patches: &Patches,
405        fill_value: &Scalar,
406        dtype: &DType,
407        len: usize,
408    ) -> VortexResult<()> {
409        vortex_ensure!(
410            fill_value.dtype() == dtype,
411            "fill value dtype {} does not match array dtype {}",
412            fill_value.dtype(),
413            dtype,
414        );
415        vortex_ensure!(
416            patches.array_len() == len,
417            "patches length {} does not match array length {}",
418            patches.array_len(),
419            len
420        );
421        vortex_ensure!(
422            patches.values().dtype() == dtype,
423            "patch values dtype {} does not match array dtype {}",
424            patches.values().dtype(),
425            dtype,
426        );
427        Ok(())
428    }
429
430    fn make_slots(patches: &Patches) -> ArraySlots {
431        let mut slots = ArraySlots::with_capacity(SparseSlots::COUNT);
432        PatchesData::push_slots(&mut slots, Some(patches));
433        slots
434    }
435
436    /// Reconstruct a [`Patches`] from the stored metadata and the array's slots.
437    fn patches_from_slots(data: &SparseData, len: usize, slots: &[Option<ArrayRef>]) -> Patches {
438        PatchesData::patches_from_slots(Some(&data.patches_data), len, slots, PATCH_SLOTS)
439            .vortex_expect("SparseArray patch slots must be present")
440    }
441
442    /// Build a new SparseData from an existing set of patches, normalizing dtypes.
443    pub fn try_new_from_patches(patches: Patches, fill_value: Scalar) -> VortexResult<Self> {
444        let patches = Self::normalize_patches_dtype(patches, &fill_value)?;
445        Ok(Self::from_patches_unchecked(&patches, fill_value))
446    }
447
448    /// Extract metadata from patches to create SparseData, with dtype normalization.
449    fn from_patches(patches: &Patches, fill_value: Scalar) -> VortexResult<Self> {
450        let patches = Self::normalize_patches_dtype(patches.clone(), &fill_value)?;
451        Ok(Self::from_patches_unchecked(&patches, fill_value))
452    }
453
454    /// Extract metadata from patches to create SparseData, without validation.
455    fn from_patches_unchecked(patches: &Patches, fill_value: Scalar) -> Self {
456        Self {
457            array_len: patches.array_len(),
458            patches_data: PatchesData::from_patches(patches),
459            fill_value,
460        }
461    }
462
463    /// Returns the length of the array.
464    #[inline]
465    pub fn len(&self) -> usize {
466        self.array_len
467    }
468
469    /// Returns whether the array is empty.
470    #[inline]
471    pub fn is_empty(&self) -> bool {
472        self.array_len == 0
473    }
474
475    /// Returns the logical data type of the array.
476    #[inline]
477    pub fn dtype(&self) -> &DType {
478        self.fill_scalar().dtype()
479    }
480
481    /// Returns the offset of the patches within the parent array.
482    #[inline]
483    pub fn offset(&self) -> usize {
484        self.patches_data.offset()
485    }
486
487    #[inline]
488    pub fn fill_scalar(&self) -> &Scalar {
489        &self.fill_value
490    }
491
492    /// Encode given array as a SparseArray.
493    ///
494    /// Optionally provided fill value will be respected if the array is less than 90% null.
495    pub fn encode(
496        array: &ArrayRef,
497        fill_value: Option<Scalar>,
498        ctx: &mut ExecutionCtx,
499    ) -> VortexResult<ArrayRef> {
500        if let Some(fill_value) = fill_value.as_ref()
501            && !array.dtype().eq_ignore_nullability(fill_value.dtype())
502        {
503            vortex_bail!(
504                "Array and fill value types must have the same base type. got {} and {}",
505                array.dtype(),
506                fill_value.dtype()
507            )
508        }
509        let mask = array.validity()?.execute_mask(array.len(), ctx)?;
510
511        if mask.all_false() {
512            // Array is constant NULL
513            return Ok(
514                ConstantArray::new(Scalar::null(array.dtype().clone()), array.len()).into_array(),
515            );
516        } else if mask.false_count() as f64 > (0.9 * mask.len() as f64) {
517            // Array is dominated by NULL but has non-NULL values
518            let non_null_values = array
519                .filter(mask.clone())?
520                .execute::<Canonical>(ctx)?
521                .into_array();
522            let non_null_indices = match mask.indices() {
523                AllOr::All => {
524                    // We already know that the mask is 90%+ false
525                    unreachable!("Mask is mostly null")
526                }
527                AllOr::None => {
528                    // we know there are some non-NULL values
529                    unreachable!("Mask is mostly null but not all null")
530                }
531                AllOr::Some(values) => {
532                    let buffer: Buffer<u32> = values
533                        .iter()
534                        .map(|&v| v.try_into().vortex_expect("indices must fit in u32"))
535                        .collect();
536
537                    buffer.into_array()
538                }
539            };
540
541            return Sparse::try_new(
542                non_null_indices,
543                non_null_values,
544                array.len(),
545                Scalar::null(array.dtype().clone()),
546            )
547            .map(IntoArray::into_array);
548        }
549
550        let fill = if let Some(fill) = fill_value {
551            fill.cast(array.dtype())?
552        } else {
553            // TODO(robert): Support other dtypes, only thing missing is getting most common value out of the array
554            let primitive = array.clone().execute::<PrimitiveArray>(ctx)?;
555            let (top_pvalue, _) = primitive
556                .top_value()?
557                .vortex_expect("Non empty or all null array");
558
559            Scalar::primitive_value(top_pvalue, top_pvalue.ptype(), array.dtype().nullability())
560        };
561
562        let fill_array = ConstantArray::new(fill.clone(), array.len()).into_array();
563        let non_top_bool = array
564            .binary(fill_array.clone(), Operator::NotEq)?
565            .fill_null(Scalar::bool(true, Nullability::NonNullable))?
566            .execute::<BoolArray>(ctx)?;
567        let non_top_mask = Mask::from_buffer(non_top_bool.to_bit_buffer());
568
569        let non_top_values = array
570            .filter(non_top_mask.clone())?
571            .execute::<Canonical>(ctx)?
572            .into_array();
573
574        let indices: Buffer<u64> = match non_top_mask {
575            Mask::AllTrue(count) => {
576                // all true -> complete slice
577                (0u64..count as u64).collect()
578            }
579            Mask::AllFalse(_) => {
580                // All values are equal to the top value
581                return Ok(fill_array);
582            }
583            Mask::Values(values) => values.indices().iter().map(|v| *v as u64).collect(),
584        };
585
586        Sparse::try_new(indices.into_array(), non_top_values, array.len(), fill)
587            .map(IntoArray::into_array)
588    }
589}
590
591/// Extension trait for accessing patches on [`SparseArray`] and [`ArrayView<'_, Sparse>`].
592///
593/// Patches are reconstructed from the array's slots and stored metadata on each call.
594pub trait SparseExt {
595    /// Reconstruct patches from the array's slots and metadata.
596    fn patches(&self) -> Patches;
597
598    /// Return patches with offset-resolved indices (offset subtracted from each index).
599    fn resolved_patches(&self) -> VortexResult<Patches> {
600        let patches = self.patches();
601        let indices_offset = Scalar::from(patches.offset()).cast(patches.indices().dtype())?;
602        let indices = patches.indices().binary(
603            ConstantArray::new(indices_offset, patches.indices().len()).into_array(),
604            Operator::Sub,
605        )?;
606
607        Patches::new(
608            patches.array_len(),
609            0,
610            indices,
611            patches.values().clone(),
612            // TODO(0ax1): handle chunk offsets
613            None,
614        )
615    }
616}
617
618impl SparseExt for ArrayView<'_, Sparse> {
619    fn patches(&self) -> Patches {
620        SparseData::patches_from_slots(self.data(), self.len(), self.slots())
621    }
622}
623
624impl SparseExt for Array<Sparse> {
625    fn patches(&self) -> Patches {
626        SparseData::patches_from_slots(self.data(), self.as_array().len(), self.slots())
627    }
628}
629
630impl ValidityVTable<Sparse> for Sparse {
631    fn validity(array: ArrayView<'_, Sparse>) -> VortexResult<Validity> {
632        let orig_patches = array.patches();
633        let validity_patches = unsafe {
634            Patches::new_unchecked(
635                orig_patches.array_len(),
636                orig_patches.offset(),
637                orig_patches.indices().clone(),
638                orig_patches
639                    .values()
640                    .validity()?
641                    .to_array(orig_patches.values().len()),
642                orig_patches.chunk_offsets().clone(),
643                orig_patches.offset_within_chunk(),
644            )
645        };
646
647        Ok(Validity::Array(
648            unsafe { Sparse::new_unchecked(validity_patches, array.fill_value.is_valid().into()) }
649                .into_array(),
650        ))
651    }
652}
653
654#[cfg(test)]
655mod test {
656    use itertools::Itertools;
657    use vortex_array::IntoArray;
658    use vortex_array::LEGACY_SESSION;
659    use vortex_array::VortexSessionExecute;
660    use vortex_array::arrays::ConstantArray;
661    use vortex_array::arrays::PrimitiveArray;
662    use vortex_array::assert_arrays_eq;
663    use vortex_array::builtins::ArrayBuiltins;
664    use vortex_array::dtype::DType;
665    use vortex_array::dtype::Nullability;
666    use vortex_array::dtype::PType;
667    use vortex_array::scalar::Scalar;
668    use vortex_array::validity::Validity;
669    use vortex_buffer::buffer;
670    use vortex_error::VortexExpect;
671
672    use super::*;
673    use crate::Sparse;
674
675    fn nullable_fill() -> Scalar {
676        Scalar::null(DType::Primitive(PType::I32, Nullability::Nullable))
677    }
678
679    fn non_nullable_fill() -> Scalar {
680        Scalar::from(42i32)
681    }
682
683    fn sparse_array(fill_value: Scalar) -> ArrayRef {
684        // merged array: [null, null, 100, null, null, 200, null, null, 300, null]
685        let mut values = buffer![100i32, 200, 300].into_array();
686        values = values.cast(fill_value.dtype().clone()).unwrap();
687
688        Sparse::try_new(buffer![2u64, 5, 8].into_array(), values, 10, fill_value)
689            .unwrap()
690            .into_array()
691    }
692
693    #[test]
694    pub fn test_scalar_at() {
695        let array = sparse_array(nullable_fill());
696
697        assert_eq!(
698            array
699                .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
700                .unwrap(),
701            nullable_fill()
702        );
703        assert_eq!(
704            array
705                .execute_scalar(2, &mut LEGACY_SESSION.create_execution_ctx())
706                .unwrap(),
707            Scalar::from(Some(100_i32))
708        );
709        assert_eq!(
710            array
711                .execute_scalar(5, &mut LEGACY_SESSION.create_execution_ctx())
712                .unwrap(),
713            Scalar::from(Some(200_i32))
714        );
715    }
716
717    #[test]
718    #[should_panic(expected = "out of bounds")]
719    fn test_scalar_at_oob() {
720        let array = sparse_array(nullable_fill());
721        array
722            .execute_scalar(10, &mut LEGACY_SESSION.create_execution_ctx())
723            .unwrap();
724    }
725
726    #[test]
727    pub fn test_scalar_at_again() {
728        let arr = Sparse::try_new(
729            ConstantArray::new(10u32, 1).into_array(),
730            ConstantArray::new(Scalar::primitive(1234u32, Nullability::Nullable), 1).into_array(),
731            100,
732            Scalar::null(DType::Primitive(PType::U32, Nullability::Nullable)),
733        )
734        .unwrap();
735
736        assert_eq!(
737            arr.execute_scalar(10, &mut LEGACY_SESSION.create_execution_ctx())
738                .unwrap()
739                .as_primitive()
740                .typed_value::<u32>(),
741            Some(1234)
742        );
743        assert!(
744            arr.execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
745                .unwrap()
746                .is_null()
747        );
748        assert!(
749            arr.execute_scalar(99, &mut LEGACY_SESSION.create_execution_ctx())
750                .unwrap()
751                .is_null()
752        );
753    }
754
755    #[test]
756    pub fn scalar_at_sliced() {
757        let sliced = sparse_array(nullable_fill()).slice(2..7).unwrap();
758        assert_eq!(
759            usize::try_from(
760                &sliced
761                    .execute_scalar(0, &mut LEGACY_SESSION.create_execution_ctx())
762                    .unwrap()
763            )
764            .unwrap(),
765            100
766        );
767    }
768
769    #[test]
770    pub fn validity_mask_sliced_null_fill() {
771        let sliced = sparse_array(nullable_fill()).slice(2..7).unwrap();
772        assert_eq!(
773            sliced
774                .validity()
775                .unwrap()
776                .execute_mask(sliced.len(), &mut LEGACY_SESSION.create_execution_ctx())
777                .unwrap(),
778            Mask::from_iter(vec![true, false, false, true, false])
779        );
780    }
781
782    #[test]
783    pub fn validity_mask_sliced_nonnull_fill() {
784        let sliced = Sparse::try_new(
785            buffer![2u64, 5, 8].into_array(),
786            ConstantArray::new(
787                Scalar::null(DType::Primitive(PType::F32, Nullability::Nullable)),
788                3,
789            )
790            .into_array(),
791            10,
792            Scalar::primitive(1.0f32, Nullability::Nullable),
793        )
794        .unwrap()
795        .slice(2..7)
796        .unwrap();
797
798        assert_eq!(
799            sliced
800                .validity()
801                .unwrap()
802                .execute_mask(sliced.len(), &mut LEGACY_SESSION.create_execution_ctx())
803                .unwrap(),
804            Mask::from_iter(vec![false, true, true, false, true])
805        );
806    }
807
808    #[test]
809    pub fn scalar_at_sliced_twice() {
810        let sliced_once = sparse_array(nullable_fill()).slice(1..8).unwrap();
811        assert_eq!(
812            usize::try_from(
813                &sliced_once
814                    .execute_scalar(1, &mut LEGACY_SESSION.create_execution_ctx())
815                    .unwrap()
816            )
817            .unwrap(),
818            100
819        );
820
821        let sliced_twice = sliced_once.slice(1..6).unwrap();
822        assert_eq!(
823            usize::try_from(
824                &sliced_twice
825                    .execute_scalar(3, &mut LEGACY_SESSION.create_execution_ctx())
826                    .unwrap()
827            )
828            .unwrap(),
829            200
830        );
831    }
832
833    #[test]
834    pub fn sparse_validity_mask() {
835        let array = sparse_array(nullable_fill());
836        assert_eq!(
837            array
838                .validity()
839                .unwrap()
840                .execute_mask(array.len(), &mut LEGACY_SESSION.create_execution_ctx())
841                .unwrap()
842                .to_bit_buffer()
843                .iter()
844                .collect_vec(),
845            [
846                false, false, true, false, false, true, false, false, true, false
847            ]
848        );
849    }
850
851    #[test]
852    fn sparse_validity_mask_non_null_fill() {
853        let array = sparse_array(non_nullable_fill());
854        assert!(
855            array
856                .validity()
857                .unwrap()
858                .execute_mask(array.len(), &mut LEGACY_SESSION.create_execution_ctx())
859                .unwrap()
860                .all_true()
861        );
862    }
863
864    #[test]
865    #[should_panic]
866    fn test_invalid_length() {
867        let values = buffer![15_u32, 135, 13531, 42].into_array();
868        let indices = buffer![10_u64, 11, 50, 100].into_array();
869
870        Sparse::try_new(indices, values, 100, 0_u32.into()).unwrap();
871    }
872
873    #[test]
874    fn test_valid_length() {
875        let values = buffer![15_u32, 135, 13531, 42].into_array();
876        let indices = buffer![10_u64, 11, 50, 100].into_array();
877
878        Sparse::try_new(indices, values, 101, 0_u32.into()).unwrap();
879    }
880
881    #[test]
882    fn encode_with_nulls() {
883        let mut ctx = LEGACY_SESSION.create_execution_ctx();
884        let original = PrimitiveArray::new(
885            buffer![0i32, 1, 2, 3, 3, 3, 3, 3, 3, 3, 4, 4],
886            Validity::from_iter(vec![
887                true, true, false, true, false, true, false, true, true, false, true, false,
888            ]),
889        );
890        let sparse = Sparse::encode(&original.clone().into_array(), None, &mut ctx)
891            .vortex_expect("Sparse::encode should succeed for test data");
892        assert_eq!(
893            sparse
894                .validity()
895                .unwrap()
896                .execute_mask(sparse.len(), &mut ctx)
897                .unwrap(),
898            Mask::from_iter(vec![
899                true, true, false, true, false, true, false, true, true, false, true, false,
900            ])
901        );
902        let sparse_primitive = sparse.execute::<PrimitiveArray>(&mut ctx).unwrap();
903        assert_arrays_eq!(sparse_primitive, original);
904    }
905
906    #[test]
907    fn validity_mask_includes_null_values_when_fill_is_null() {
908        let indices = buffer![0u8, 2, 4, 6, 8].into_array();
909        let values = PrimitiveArray::from_option_iter([Some(0i16), Some(1), None, None, Some(4)])
910            .into_array();
911        let array = Sparse::try_new(indices, values, 10, Scalar::null_native::<i16>()).unwrap();
912        let actual = array
913            .validity()
914            .unwrap()
915            .execute_mask(array.len(), &mut LEGACY_SESSION.create_execution_ctx())
916            .unwrap();
917        let expected = Mask::from_iter([
918            true, false, true, false, false, false, false, false, true, false,
919        ]);
920
921        assert_eq!(actual, expected);
922    }
923}