Skip to main content

vortex_alp/alp/
array.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Debug;
5use std::fmt::Display;
6use std::fmt::Formatter;
7use std::hash::Hash;
8use std::hash::Hasher;
9
10use prost::Message;
11use vortex_array::Array;
12use vortex_array::ArrayEq;
13use vortex_array::ArrayHash;
14use vortex_array::ArrayId;
15use vortex_array::ArrayParts;
16use vortex_array::ArrayRef;
17use vortex_array::ArrayView;
18use vortex_array::ExecutionCtx;
19use vortex_array::ExecutionResult;
20use vortex_array::IntoArray;
21use vortex_array::Precision;
22use vortex_array::TypedArrayRef;
23use vortex_array::array_slots;
24use vortex_array::arrays::Primitive;
25use vortex_array::buffer::BufferHandle;
26use vortex_array::dtype::DType;
27use vortex_array::dtype::PType;
28use vortex_array::patches::Patches;
29use vortex_array::patches::PatchesMetadata;
30use vortex_array::require_child;
31use vortex_array::require_patches;
32use vortex_array::serde::ArrayChildren;
33use vortex_array::vtable::VTable;
34use vortex_array::vtable::ValidityChild;
35use vortex_array::vtable::ValidityVTableFromChild;
36use vortex_error::VortexExpect;
37use vortex_error::VortexResult;
38use vortex_error::vortex_bail;
39use vortex_error::vortex_ensure;
40use vortex_error::vortex_panic;
41use vortex_session::VortexSession;
42
43use crate::ALPFloat;
44use crate::alp::Exponents;
45use crate::alp::decompress::execute_decompress;
46use crate::alp::rules::PARENT_KERNELS;
47use crate::alp::rules::RULES;
48
49/// A [`ALP`]-encoded Vortex array.
50pub type ALPArray = Array<ALP>;
51
52impl ArrayHash for ALPData {
53    fn array_hash<H: Hasher>(&self, state: &mut H, _precision: Precision) {
54        self.exponents.hash(state);
55        self.patch_offset.hash(state);
56        self.patch_offset_within_chunk.hash(state);
57    }
58}
59
60impl ArrayEq for ALPData {
61    fn array_eq(&self, other: &Self, _precision: Precision) -> bool {
62        self.exponents == other.exponents
63            && self.patch_offset == other.patch_offset
64            && self.patch_offset_within_chunk == other.patch_offset_within_chunk
65    }
66}
67
68impl VTable for ALP {
69    type ArrayData = ALPData;
70
71    type OperationsVTable = Self;
72    type ValidityVTable = ValidityVTableFromChild;
73
74    fn id(&self) -> ArrayId {
75        Self::ID
76    }
77
78    fn validate(
79        &self,
80        data: &ALPData,
81        dtype: &DType,
82        len: usize,
83        slots: &[Option<ArrayRef>],
84    ) -> VortexResult<()> {
85        let slots = ALPSlotsView::from_slots(slots);
86        validate_parts(
87            dtype,
88            len,
89            data.exponents,
90            slots.encoded,
91            patches_from_slots(
92                &slots,
93                data.patch_offset,
94                data.patch_offset_within_chunk,
95                len,
96            ),
97        )
98    }
99
100    fn nbuffers(_array: ArrayView<'_, Self>) -> usize {
101        0
102    }
103
104    fn buffer(_array: ArrayView<'_, Self>, idx: usize) -> BufferHandle {
105        vortex_panic!("ALPArray buffer index {idx} out of bounds")
106    }
107
108    fn buffer_name(_array: ArrayView<'_, Self>, _idx: usize) -> Option<String> {
109        None
110    }
111
112    fn serialize(
113        array: ArrayView<'_, Self>,
114        _session: &VortexSession,
115    ) -> VortexResult<Option<Vec<u8>>> {
116        let exponents = array.exponents();
117        Ok(Some(
118            ALPMetadata {
119                exp_e: exponents.e as u32,
120                exp_f: exponents.f as u32,
121                patches: array
122                    .patches()
123                    .map(|p| p.to_metadata(array.len(), array.dtype()))
124                    .transpose()?,
125            }
126            .encode_to_vec(),
127        ))
128    }
129
130    fn deserialize(
131        &self,
132        dtype: &DType,
133        len: usize,
134        metadata: &[u8],
135        _buffers: &[BufferHandle],
136        children: &dyn ArrayChildren,
137        _session: &VortexSession,
138    ) -> VortexResult<ArrayParts<Self>> {
139        let metadata = ALPMetadata::decode(metadata)?;
140        let encoded_ptype = match &dtype {
141            DType::Primitive(PType::F32, n) => DType::Primitive(PType::I32, *n),
142            DType::Primitive(PType::F64, n) => DType::Primitive(PType::I64, *n),
143            d => vortex_bail!(MismatchedTypes: "f32 or f64", d),
144        };
145        let encoded = children.get(0, &encoded_ptype, len)?;
146
147        let patches = metadata
148            .patches
149            .map(|p| {
150                let indices = children.get(1, &p.indices_dtype()?, p.len()?)?;
151                let values = children.get(2, dtype, p.len()?)?;
152                let chunk_offsets = p
153                    .chunk_offsets_dtype()?
154                    .map(|dtype| children.get(3, &dtype, usize::try_from(p.chunk_offsets_len())?))
155                    .transpose()?;
156
157                Patches::new(len, p.offset()?, indices, values, chunk_offsets)
158            })
159            .transpose()?;
160
161        let slots = ALPData::make_slots(&encoded, &patches);
162        let data = ALPData::new(
163            Exponents {
164                e: u8::try_from(metadata.exp_e)?,
165                f: u8::try_from(metadata.exp_f)?,
166            },
167            patches,
168        );
169        Ok(ArrayParts::new(self.clone(), dtype.clone(), len, data).with_slots(slots))
170    }
171
172    fn slot_name(_array: ArrayView<'_, Self>, idx: usize) -> String {
173        ALPSlots::NAMES[idx].to_string()
174    }
175
176    fn execute(array: Array<Self>, ctx: &mut ExecutionCtx) -> VortexResult<ExecutionResult> {
177        let array = require_child!(array, array.encoded(), ALPSlots::ENCODED => Primitive);
178        require_patches!(
179            array,
180            ALPSlots::PATCH_INDICES,
181            ALPSlots::PATCH_VALUES,
182            ALPSlots::PATCH_CHUNK_OFFSETS
183        );
184
185        Ok(ExecutionResult::done(
186            execute_decompress(array, ctx)?.into_array(),
187        ))
188    }
189
190    fn reduce_parent(
191        array: ArrayView<'_, Self>,
192        parent: &ArrayRef,
193        child_idx: usize,
194    ) -> VortexResult<Option<ArrayRef>> {
195        RULES.evaluate(array, parent, child_idx)
196    }
197
198    fn execute_parent(
199        array: ArrayView<'_, Self>,
200        parent: &ArrayRef,
201        child_idx: usize,
202        ctx: &mut ExecutionCtx,
203    ) -> VortexResult<Option<ArrayRef>> {
204        PARENT_KERNELS.execute(array, parent, child_idx, ctx)
205    }
206}
207
208#[array_slots(ALP)]
209pub struct ALPSlots {
210    /// The ALP-encoded values array.
211    pub encoded: ArrayRef,
212    /// The indices of exception values that could not be ALP-encoded.
213    pub patch_indices: Option<ArrayRef>,
214    /// The exception values that could not be ALP-encoded.
215    pub patch_values: Option<ArrayRef>,
216    /// Chunk offsets for the patch indices/values.
217    pub patch_chunk_offsets: Option<ArrayRef>,
218}
219
220#[derive(Clone, Debug)]
221pub struct ALPData {
222    patch_offset: Option<usize>,
223    patch_offset_within_chunk: Option<usize>,
224    exponents: Exponents,
225}
226
227impl Display for ALPData {
228    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
229        write!(f, "exponents: {}", self.exponents)?;
230        if let Some(offset) = self.patch_offset {
231            write!(f, ", patch_offset: {offset}")?;
232        }
233        Ok(())
234    }
235}
236
237#[derive(Clone, Debug)]
238pub struct ALP;
239
240impl ALP {
241    pub const ID: ArrayId = ArrayId::new_ref("vortex.alp");
242}
243
244#[derive(Clone, prost::Message)]
245pub struct ALPMetadata {
246    #[prost(uint32, tag = "1")]
247    pub(crate) exp_e: u32,
248    #[prost(uint32, tag = "2")]
249    pub(crate) exp_f: u32,
250    #[prost(message, optional, tag = "3")]
251    pub(crate) patches: Option<PatchesMetadata>,
252}
253
254impl ALPData {
255    fn validate_components(
256        encoded: &ArrayRef,
257        exponents: Exponents,
258        patches: Option<&Patches>,
259    ) -> VortexResult<()> {
260        vortex_ensure!(
261            matches!(
262                encoded.dtype(),
263                DType::Primitive(PType::I32 | PType::I64, _)
264            ),
265            "ALP encoded ints have invalid DType {}",
266            encoded.dtype(),
267        );
268
269        // Validate exponents are in-bounds for the float, and that patches have the proper
270        // length and type.
271        let Exponents { e, f } = exponents;
272        match encoded.dtype().as_ptype() {
273            PType::I32 => {
274                vortex_ensure!(exponents.e <= f32::MAX_EXPONENT, "e out of bounds: {e}");
275                vortex_ensure!(exponents.f <= f32::MAX_EXPONENT, "f out of bounds: {f}");
276                if let Some(patches) = patches {
277                    Self::validate_patches::<f32>(patches, encoded)?;
278                }
279            }
280            PType::I64 => {
281                vortex_ensure!(e <= f64::MAX_EXPONENT, "e out of bounds: {e}");
282                vortex_ensure!(f <= f64::MAX_EXPONENT, "f out of bounds: {f}");
283
284                if let Some(patches) = patches {
285                    Self::validate_patches::<f64>(patches, encoded)?;
286                }
287            }
288            _ => unreachable!(),
289        }
290
291        // Validate patches
292        if let Some(patches) = patches {
293            vortex_ensure!(
294                patches.array_len() == encoded.len(),
295                "patches array_len != encoded len: {} != {}",
296                patches.array_len(),
297                encoded.len()
298            );
299
300            // Verify that the patches DType are of the proper DType.
301        }
302
303        Ok(())
304    }
305
306    fn logical_dtype(encoded: &ArrayRef) -> VortexResult<DType> {
307        match encoded.dtype() {
308            DType::Primitive(PType::I32, nullability) => {
309                Ok(DType::Primitive(PType::F32, *nullability))
310            }
311            DType::Primitive(PType::I64, nullability) => {
312                Ok(DType::Primitive(PType::F64, *nullability))
313            }
314            _ => vortex_bail!("ALP encoded ints have invalid DType {}", encoded.dtype(),),
315        }
316    }
317
318    /// Validate that any patches provided are valid for the ALPArray.
319    fn validate_patches<T: ALPFloat>(patches: &Patches, encoded: &ArrayRef) -> VortexResult<()> {
320        vortex_ensure!(
321            patches.array_len() == encoded.len(),
322            "patches array_len != encoded len: {} != {}",
323            patches.array_len(),
324            encoded.len()
325        );
326
327        let expected_type = DType::Primitive(T::PTYPE, encoded.dtype().nullability());
328        vortex_ensure!(
329            patches.dtype() == &expected_type,
330            "Expected patches type {expected_type}, actual {}",
331            patches.dtype(),
332        );
333
334        Ok(())
335    }
336}
337
338impl ALPData {
339    /// Build a new `ALPArray` from components, panicking on validation failure.
340    ///
341    /// See [`ALP::try_new`] for reference on preconditions that must pass before
342    /// calling this method.
343    pub fn new(exponents: Exponents, patches: Option<Patches>) -> Self {
344        let (patch_offset, patch_offset_within_chunk) = match &patches {
345            Some(p) => (Some(p.offset()), p.offset_within_chunk()),
346            None => (None, None),
347        };
348
349        Self {
350            patch_offset,
351            patch_offset_within_chunk,
352            exponents,
353        }
354    }
355
356    /// Build a new `ALPArray` from components:
357    ///
358    /// * `encoded` contains the ALP-encoded ints. Any null values are replaced with placeholders
359    /// * `exponents` are the ALP exponents, valid range depends on the data type
360    /// * `patches` are any patch values that don't cleanly encode using the ALP conversion function
361    ///
362    /// Build a new `ALPArray` from components without validation.
363    ///
364    /// See [`ALP::try_new`] for information about the preconditions that should be checked
365    /// **before** calling this method.
366    pub(crate) unsafe fn new_unchecked(exponents: Exponents, patches: Option<Patches>) -> Self {
367        Self::new(exponents, patches)
368    }
369}
370
371/// Constructors for [`ALPArray`].
372impl ALP {
373    pub fn new(encoded: ArrayRef, exponents: Exponents, patches: Option<Patches>) -> ALPArray {
374        let dtype = ALPData::logical_dtype(&encoded).vortex_expect("ALP encoded dtype");
375        let len = encoded.len();
376        let slots = ALPData::make_slots(&encoded, &patches);
377        unsafe {
378            Array::from_parts_unchecked(
379                ArrayParts::new(ALP, dtype, len, ALPData::new(exponents, patches))
380                    .with_slots(slots),
381            )
382        }
383    }
384
385    pub fn try_new(
386        encoded: ArrayRef,
387        exponents: Exponents,
388        patches: Option<Patches>,
389    ) -> VortexResult<ALPArray> {
390        let dtype = ALPData::logical_dtype(&encoded)?;
391        let len = encoded.len();
392        let slots = ALPData::make_slots(&encoded, &patches);
393        let data = ALPData::new(exponents, patches);
394        Array::try_from_parts(ArrayParts::new(ALP, dtype, len, data).with_slots(slots))
395    }
396
397    /// # Safety
398    /// See [`ALP::try_new`] for preconditions.
399    pub unsafe fn new_unchecked(
400        encoded: ArrayRef,
401        exponents: Exponents,
402        patches: Option<Patches>,
403    ) -> ALPArray {
404        let dtype = ALPData::logical_dtype(&encoded).vortex_expect("ALP encoded dtype");
405        let len = encoded.len();
406        let slots = ALPData::make_slots(&encoded, &patches);
407        let data = unsafe { ALPData::new_unchecked(exponents, patches) };
408        unsafe {
409            Array::from_parts_unchecked(ArrayParts::new(ALP, dtype, len, data).with_slots(slots))
410        }
411    }
412}
413
414impl ALPData {
415    fn make_slots(encoded: &ArrayRef, patches: &Option<Patches>) -> Vec<Option<ArrayRef>> {
416        let (patch_indices, patch_values, patch_chunk_offsets) = match patches {
417            Some(p) => (
418                Some(p.indices().clone()),
419                Some(p.values().clone()),
420                p.chunk_offsets().clone(),
421            ),
422            None => (None, None, None),
423        };
424        vec![
425            Some(encoded.clone()),
426            patch_indices,
427            patch_values,
428            patch_chunk_offsets,
429        ]
430    }
431
432    #[inline]
433    pub fn exponents(&self) -> Exponents {
434        self.exponents
435    }
436}
437
438pub trait ALPArrayExt: ALPArraySlotsExt {
439    fn exponents(&self) -> Exponents {
440        self.exponents
441    }
442
443    fn patches(&self) -> Option<Patches> {
444        patches_from_slots(
445            &self.slots_view(),
446            self.patch_offset,
447            self.patch_offset_within_chunk,
448            self.as_ref().len(),
449        )
450    }
451}
452
453fn patches_from_slots(
454    slots: &ALPSlotsView,
455    patch_offset: Option<usize>,
456    patch_offset_within_chunk: Option<usize>,
457    len: usize,
458) -> Option<Patches> {
459    match (slots.patch_indices, slots.patch_values) {
460        (Some(indices), Some(values)) => {
461            let patch_offset = patch_offset.vortex_expect("has patch slots but no patch_offset");
462            Some(unsafe {
463                Patches::new_unchecked(
464                    len,
465                    patch_offset,
466                    indices.clone(),
467                    values.clone(),
468                    slots.patch_chunk_offsets.cloned(),
469                    patch_offset_within_chunk,
470                )
471            })
472        }
473        _ => None,
474    }
475}
476
477fn validate_parts(
478    dtype: &DType,
479    len: usize,
480    exponents: Exponents,
481    encoded: &ArrayRef,
482    patches: Option<Patches>,
483) -> VortexResult<()> {
484    let logical_dtype = ALPData::logical_dtype(encoded)?;
485    ALPData::validate_components(encoded, exponents, patches.as_ref())?;
486    vortex_ensure!(
487        encoded.len() == len,
488        "ALP encoded len {} != outer len {len}",
489        encoded.len(),
490    );
491    vortex_ensure!(
492        &logical_dtype == dtype,
493        "ALP dtype {} does not match encoded logical dtype {}",
494        dtype,
495        logical_dtype,
496    );
497    Ok(())
498}
499
500impl<T: TypedArrayRef<ALP>> ALPArrayExt for T {}
501
502pub trait ALPArrayOwnedExt {
503    fn into_parts(self) -> (ArrayRef, Exponents, Option<Patches>);
504}
505
506impl ALPArrayOwnedExt for Array<ALP> {
507    #[inline]
508    fn into_parts(self) -> (ArrayRef, Exponents, Option<Patches>) {
509        let patches = self.patches();
510        let exponents = self.exponents();
511        let encoded = self.encoded().clone();
512        (encoded, exponents, patches)
513    }
514}
515
516impl ValidityChild<ALP> for ALP {
517    fn validity_child(array: ArrayView<'_, ALP>) -> ArrayRef {
518        array.encoded().clone()
519    }
520}
521
522#[cfg(test)]
523mod tests {
524    use std::f64::consts::PI;
525    use std::sync::LazyLock;
526
527    use rstest::rstest;
528    use vortex_array::Canonical;
529    use vortex_array::IntoArray;
530    use vortex_array::LEGACY_SESSION;
531    use vortex_array::ToCanonical;
532    use vortex_array::VortexSessionExecute;
533    use vortex_array::arrays::PrimitiveArray;
534    use vortex_array::assert_arrays_eq;
535    use vortex_array::session::ArraySession;
536    use vortex_error::VortexExpect;
537    use vortex_session::VortexSession;
538
539    use super::*;
540    use crate::alp_encode;
541    use crate::decompress_into_array;
542
543    static SESSION: LazyLock<VortexSession> =
544        LazyLock::new(|| VortexSession::empty().with::<ArraySession>());
545
546    #[rstest]
547    #[case(0)]
548    #[case(1)]
549    #[case(100)]
550    #[case(1023)]
551    #[case(1024)]
552    #[case(1025)]
553    #[case(2047)]
554    #[case(2048)]
555    #[case(2049)]
556    fn test_execute_f32(#[case] size: usize) {
557        let values = PrimitiveArray::from_iter((0..size).map(|i| i as f32));
558        let encoded = alp_encode(&values, None).unwrap();
559
560        let result_canonical = {
561            let mut ctx = SESSION.create_execution_ctx();
562            encoded
563                .clone()
564                .into_array()
565                .execute::<Canonical>(&mut ctx)
566                .unwrap()
567        };
568        // Compare against the traditional array-based decompress path
569        let expected =
570            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
571
572        assert_arrays_eq!(result_canonical.into_array(), expected);
573    }
574
575    #[rstest]
576    #[case(0)]
577    #[case(1)]
578    #[case(100)]
579    #[case(1023)]
580    #[case(1024)]
581    #[case(1025)]
582    #[case(2047)]
583    #[case(2048)]
584    #[case(2049)]
585    fn test_execute_f64(#[case] size: usize) {
586        let values = PrimitiveArray::from_iter((0..size).map(|i| i as f64));
587        let encoded = alp_encode(&values, None).unwrap();
588
589        let result_canonical = {
590            let mut ctx = SESSION.create_execution_ctx();
591            encoded
592                .clone()
593                .into_array()
594                .execute::<Canonical>(&mut ctx)
595                .unwrap()
596        };
597        // Compare against the traditional array-based decompress path
598        let expected =
599            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
600
601        assert_arrays_eq!(result_canonical.into_array(), expected);
602    }
603
604    #[rstest]
605    #[case(100)]
606    #[case(1023)]
607    #[case(1024)]
608    #[case(1025)]
609    #[case(2047)]
610    #[case(2048)]
611    #[case(2049)]
612    fn test_execute_with_patches(#[case] size: usize) {
613        let values: Vec<f64> = (0..size)
614            .map(|i| match i % 4 {
615                0..=2 => 1.0,
616                _ => PI,
617            })
618            .collect();
619
620        let array = PrimitiveArray::from_iter(values);
621        let encoded = alp_encode(&array, None).unwrap();
622        assert!(encoded.patches().unwrap().array_len() > 0);
623
624        let result_canonical = {
625            let mut ctx = SESSION.create_execution_ctx();
626            encoded
627                .clone()
628                .into_array()
629                .execute::<Canonical>(&mut ctx)
630                .unwrap()
631        };
632        // Compare against the traditional array-based decompress path
633        let expected =
634            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
635
636        assert_arrays_eq!(result_canonical.into_array(), expected);
637    }
638
639    #[rstest]
640    #[case(0)]
641    #[case(1)]
642    #[case(100)]
643    #[case(1023)]
644    #[case(1024)]
645    #[case(1025)]
646    #[case(2047)]
647    #[case(2048)]
648    #[case(2049)]
649    fn test_execute_with_validity(#[case] size: usize) {
650        let values: Vec<Option<f32>> = (0..size)
651            .map(|i| if i % 2 == 1 { None } else { Some(1.0) })
652            .collect();
653
654        let array = PrimitiveArray::from_option_iter(values);
655        let encoded = alp_encode(&array, None).unwrap();
656
657        let result_canonical = {
658            let mut ctx = SESSION.create_execution_ctx();
659            encoded
660                .clone()
661                .into_array()
662                .execute::<Canonical>(&mut ctx)
663                .unwrap()
664        };
665        // Compare against the traditional array-based decompress path
666        let expected =
667            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
668
669        assert_arrays_eq!(result_canonical.into_array(), expected);
670    }
671
672    #[rstest]
673    #[case(100)]
674    #[case(1023)]
675    #[case(1024)]
676    #[case(1025)]
677    #[case(2047)]
678    #[case(2048)]
679    #[case(2049)]
680    fn test_execute_with_patches_and_validity(#[case] size: usize) {
681        let values: Vec<Option<f64>> = (0..size)
682            .map(|idx| match idx % 3 {
683                0 => Some(1.0),
684                1 => None,
685                _ => Some(PI),
686            })
687            .collect();
688
689        let array = PrimitiveArray::from_option_iter(values);
690        let encoded = alp_encode(&array, None).unwrap();
691        assert!(encoded.patches().unwrap().array_len() > 0);
692
693        let result_canonical = {
694            let mut ctx = SESSION.create_execution_ctx();
695            encoded
696                .clone()
697                .into_array()
698                .execute::<Canonical>(&mut ctx)
699                .unwrap()
700        };
701        // Compare against the traditional array-based decompress path
702        let expected =
703            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
704
705        assert_arrays_eq!(result_canonical.into_array(), expected);
706    }
707
708    #[rstest]
709    #[case(500, 100)]
710    #[case(1000, 200)]
711    #[case(2048, 512)]
712    fn test_execute_sliced_vector(#[case] size: usize, #[case] slice_start: usize) {
713        let values: Vec<Option<f64>> = (0..size)
714            .map(|i| {
715                if i % 5 == 0 {
716                    None
717                } else if i % 4 == 3 {
718                    Some(PI)
719                } else {
720                    Some(1.0)
721                }
722            })
723            .collect();
724
725        let array = PrimitiveArray::from_option_iter(values.clone());
726        let encoded = alp_encode(&array, None).unwrap();
727
728        let slice_end = size - slice_start;
729        let slice_len = slice_end - slice_start;
730        let sliced_encoded = encoded.slice(slice_start..slice_end).unwrap();
731
732        let result_canonical = {
733            let mut ctx = SESSION.create_execution_ctx();
734            sliced_encoded.execute::<Canonical>(&mut ctx).unwrap()
735        };
736        let result_primitive = result_canonical.into_primitive();
737
738        for idx in 0..slice_len {
739            let expected_value = values[slice_start + idx];
740
741            let result_valid = result_primitive
742                .validity()
743                .vortex_expect("result validity should be derivable")
744                .is_valid(idx)
745                .unwrap();
746            assert_eq!(
747                result_valid,
748                expected_value.is_some(),
749                "Validity mismatch at idx={idx}",
750            );
751
752            if let Some(expected_val) = expected_value {
753                let result_val = result_primitive.as_slice::<f64>()[idx];
754                assert_eq!(result_val, expected_val, "Value mismatch at idx={idx}",);
755            }
756        }
757    }
758
759    #[rstest]
760    #[case(500, 100)]
761    #[case(1000, 200)]
762    #[case(2048, 512)]
763    fn test_sliced_to_primitive(#[case] size: usize, #[case] slice_start: usize) {
764        let values: Vec<Option<f64>> = (0..size)
765            .map(|i| {
766                if i % 5 == 0 {
767                    None
768                } else if i % 4 == 3 {
769                    Some(PI)
770                } else {
771                    Some(1.0)
772                }
773            })
774            .collect();
775
776        let array = PrimitiveArray::from_option_iter(values.clone());
777        let encoded = alp_encode(&array, None).unwrap();
778
779        let slice_end = size - slice_start;
780        let slice_len = slice_end - slice_start;
781        let sliced_encoded = encoded.slice(slice_start..slice_end).unwrap();
782
783        let result_primitive = sliced_encoded.to_primitive();
784
785        for idx in 0..slice_len {
786            let expected_value = values[slice_start + idx];
787
788            let result_valid = result_primitive.validity_mask().unwrap().value(idx);
789            assert_eq!(
790                result_valid,
791                expected_value.is_some(),
792                "Validity mismatch at idx={idx}",
793            );
794
795            if let Some(expected_val) = expected_value {
796                let buf = result_primitive.to_buffer::<f64>();
797                let result_val = buf.as_slice()[idx];
798                assert_eq!(result_val, expected_val, "Value mismatch at idx={idx}",);
799            }
800        }
801    }
802
803    /// Regression test for issue #5948: execute_decompress drops patches when chunk_offsets is
804    /// None.
805    ///
806    /// When patches exist but do NOT have chunk_offsets, the execute path incorrectly passes
807    /// `None` to `decompress_unchunked_core` instead of the actual patches.
808    ///
809    /// This can happen after file IO serialization/deserialization where chunk_offsets may not
810    /// be preserved, or when building ALPArrays manually without chunk_offsets.
811    #[test]
812    fn test_execute_decompress_with_patches_no_chunk_offsets_regression_5948() {
813        // Create an array with values that will produce patches. PI doesn't encode cleanly.
814        let values: Vec<f64> = vec![1.0, 2.0, PI, 4.0, 5.0];
815        let original = PrimitiveArray::from_iter(values);
816
817        // First encode normally to get a properly formed ALPArray with patches.
818        let normally_encoded = alp_encode(&original, None).unwrap();
819        assert!(
820            normally_encoded.patches().is_some(),
821            "Test requires patches to be present"
822        );
823
824        let original_patches = normally_encoded.patches().unwrap();
825        assert!(
826            original_patches.chunk_offsets().is_some(),
827            "Normal encoding should have chunk_offsets"
828        );
829
830        // Rebuild the patches WITHOUT chunk_offsets to simulate deserialized patches.
831        let patches_without_chunk_offsets = Patches::new(
832            original_patches.array_len(),
833            original_patches.offset(),
834            original_patches.indices().clone(),
835            original_patches.values().clone(),
836            None, // NO chunk_offsets - this triggers the bug!
837        )
838        .unwrap();
839
840        // Build a new ALPArray with the same encoded data but patches without chunk_offsets.
841        let alp_without_chunk_offsets = ALP::new(
842            normally_encoded.encoded().clone(),
843            normally_encoded.exponents(),
844            Some(patches_without_chunk_offsets),
845        );
846
847        // The legacy decompress_into_array path should work correctly.
848        let result_legacy = decompress_into_array(
849            alp_without_chunk_offsets.clone(),
850            &mut LEGACY_SESSION.create_execution_ctx(),
851        )
852        .unwrap();
853        let legacy_slice = result_legacy.as_slice::<f64>();
854
855        // Verify the legacy path produces correct values.
856        assert!(
857            (legacy_slice[2] - PI).abs() < 1e-10,
858            "Legacy path should have PI at index 2, got {}",
859            legacy_slice[2]
860        );
861
862        // The execute path has the bug - it drops patches when chunk_offsets is None.
863        let result_execute = {
864            let mut ctx = SESSION.create_execution_ctx();
865            execute_decompress(alp_without_chunk_offsets, &mut ctx).unwrap()
866        };
867        let execute_slice = result_execute.as_slice::<f64>();
868
869        // This assertion FAILS until the bug is fixed because execute_decompress drops patches.
870        assert!(
871            (execute_slice[2] - PI).abs() < 1e-10,
872            "Execute path should have PI at index 2, but got {} (patches were dropped!)",
873            execute_slice[2]
874        );
875    }
876}