Skip to main content

vortex_alp/alp/
array.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Debug;
5use std::fmt::Display;
6use std::fmt::Formatter;
7use std::hash::Hash;
8use std::hash::Hasher;
9
10use prost::Message;
11use vortex_array::Array;
12use vortex_array::ArrayEq;
13use vortex_array::ArrayHash;
14use vortex_array::ArrayId;
15use vortex_array::ArrayParts;
16use vortex_array::ArrayRef;
17use vortex_array::ArrayView;
18use vortex_array::ExecutionCtx;
19use vortex_array::ExecutionResult;
20use vortex_array::IntoArray;
21use vortex_array::Precision;
22use vortex_array::TypedArrayRef;
23use vortex_array::array_slots;
24use vortex_array::arrays::Primitive;
25use vortex_array::buffer::BufferHandle;
26use vortex_array::dtype::DType;
27use vortex_array::dtype::PType;
28use vortex_array::patches::Patches;
29use vortex_array::patches::PatchesMetadata;
30use vortex_array::require_child;
31use vortex_array::require_patches;
32use vortex_array::serde::ArrayChildren;
33use vortex_array::vtable::VTable;
34use vortex_array::vtable::ValidityChild;
35use vortex_array::vtable::ValidityVTableFromChild;
36use vortex_error::VortexExpect;
37use vortex_error::VortexResult;
38use vortex_error::vortex_bail;
39use vortex_error::vortex_ensure;
40use vortex_error::vortex_panic;
41use vortex_session::VortexSession;
42use vortex_session::registry::CachedId;
43
44use crate::ALPFloat;
45use crate::alp::Exponents;
46use crate::alp::decompress::execute_decompress;
47use crate::alp::rules::PARENT_KERNELS;
48use crate::alp::rules::RULES;
49
50/// A [`ALP`]-encoded Vortex array.
51pub type ALPArray = Array<ALP>;
52
53impl ArrayHash for ALPData {
54    fn array_hash<H: Hasher>(&self, state: &mut H, _precision: Precision) {
55        self.exponents.hash(state);
56        self.patch_offset.hash(state);
57        self.patch_offset_within_chunk.hash(state);
58    }
59}
60
61impl ArrayEq for ALPData {
62    fn array_eq(&self, other: &Self, _precision: Precision) -> bool {
63        self.exponents == other.exponents
64            && self.patch_offset == other.patch_offset
65            && self.patch_offset_within_chunk == other.patch_offset_within_chunk
66    }
67}
68
69impl VTable for ALP {
70    type ArrayData = ALPData;
71
72    type OperationsVTable = Self;
73    type ValidityVTable = ValidityVTableFromChild;
74
75    fn id(&self) -> ArrayId {
76        static ID: CachedId = CachedId::new("vortex.alp");
77        *ID
78    }
79
80    fn validate(
81        &self,
82        data: &ALPData,
83        dtype: &DType,
84        len: usize,
85        slots: &[Option<ArrayRef>],
86    ) -> VortexResult<()> {
87        let slots = ALPSlotsView::from_slots(slots);
88        validate_parts(
89            dtype,
90            len,
91            data.exponents,
92            slots.encoded,
93            patches_from_slots(
94                &slots,
95                data.patch_offset,
96                data.patch_offset_within_chunk,
97                len,
98            ),
99        )
100    }
101
102    fn nbuffers(_array: ArrayView<'_, Self>) -> usize {
103        0
104    }
105
106    fn buffer(_array: ArrayView<'_, Self>, idx: usize) -> BufferHandle {
107        vortex_panic!("ALPArray buffer index {idx} out of bounds")
108    }
109
110    fn buffer_name(_array: ArrayView<'_, Self>, _idx: usize) -> Option<String> {
111        None
112    }
113
114    fn serialize(
115        array: ArrayView<'_, Self>,
116        _session: &VortexSession,
117    ) -> VortexResult<Option<Vec<u8>>> {
118        let exponents = array.exponents();
119        Ok(Some(
120            ALPMetadata {
121                exp_e: exponents.e as u32,
122                exp_f: exponents.f as u32,
123                patches: array
124                    .patches()
125                    .map(|p| p.to_metadata(array.len(), array.dtype()))
126                    .transpose()?,
127            }
128            .encode_to_vec(),
129        ))
130    }
131
132    fn deserialize(
133        &self,
134        dtype: &DType,
135        len: usize,
136        metadata: &[u8],
137        _buffers: &[BufferHandle],
138        children: &dyn ArrayChildren,
139        _session: &VortexSession,
140    ) -> VortexResult<ArrayParts<Self>> {
141        let metadata = ALPMetadata::decode(metadata)?;
142        let encoded_ptype = match &dtype {
143            DType::Primitive(PType::F32, n) => DType::Primitive(PType::I32, *n),
144            DType::Primitive(PType::F64, n) => DType::Primitive(PType::I64, *n),
145            d => vortex_bail!(MismatchedTypes: "f32 or f64", d),
146        };
147        let encoded = children.get(0, &encoded_ptype, len)?;
148
149        let patches = metadata
150            .patches
151            .map(|p| {
152                let indices = children.get(1, &p.indices_dtype()?, p.len()?)?;
153                let values = children.get(2, dtype, p.len()?)?;
154                let chunk_offsets = p
155                    .chunk_offsets_dtype()?
156                    .map(|dtype| children.get(3, &dtype, usize::try_from(p.chunk_offsets_len())?))
157                    .transpose()?;
158
159                Patches::new(len, p.offset()?, indices, values, chunk_offsets)
160            })
161            .transpose()?;
162
163        let slots = ALPData::make_slots(&encoded, patches.as_ref());
164        let data = ALPData::new(
165            Exponents {
166                e: u8::try_from(metadata.exp_e)?,
167                f: u8::try_from(metadata.exp_f)?,
168            },
169            patches,
170        );
171        Ok(ArrayParts::new(self.clone(), dtype.clone(), len, data).with_slots(slots))
172    }
173
174    fn slot_name(_array: ArrayView<'_, Self>, idx: usize) -> String {
175        ALPSlots::NAMES[idx].to_string()
176    }
177
178    fn execute(array: Array<Self>, ctx: &mut ExecutionCtx) -> VortexResult<ExecutionResult> {
179        let array = require_child!(array, array.encoded(), ALPSlots::ENCODED => Primitive);
180        require_patches!(
181            array,
182            ALPSlots::PATCH_INDICES,
183            ALPSlots::PATCH_VALUES,
184            ALPSlots::PATCH_CHUNK_OFFSETS
185        );
186
187        Ok(ExecutionResult::done(
188            execute_decompress(array, ctx)?.into_array(),
189        ))
190    }
191
192    fn reduce_parent(
193        array: ArrayView<'_, Self>,
194        parent: &ArrayRef,
195        child_idx: usize,
196    ) -> VortexResult<Option<ArrayRef>> {
197        RULES.evaluate(array, parent, child_idx)
198    }
199
200    fn execute_parent(
201        array: ArrayView<'_, Self>,
202        parent: &ArrayRef,
203        child_idx: usize,
204        ctx: &mut ExecutionCtx,
205    ) -> VortexResult<Option<ArrayRef>> {
206        PARENT_KERNELS.execute(array, parent, child_idx, ctx)
207    }
208}
209
210#[array_slots(ALP)]
211pub struct ALPSlots {
212    /// The ALP-encoded values array.
213    pub encoded: ArrayRef,
214    /// The indices of exception values that could not be ALP-encoded.
215    pub patch_indices: Option<ArrayRef>,
216    /// The exception values that could not be ALP-encoded.
217    pub patch_values: Option<ArrayRef>,
218    /// Chunk offsets for the patch indices/values.
219    pub patch_chunk_offsets: Option<ArrayRef>,
220}
221
222#[derive(Clone, Debug)]
223pub struct ALPData {
224    patch_offset: Option<usize>,
225    patch_offset_within_chunk: Option<usize>,
226    exponents: Exponents,
227}
228
229impl Display for ALPData {
230    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
231        write!(f, "exponents: {}", self.exponents)?;
232        if let Some(offset) = self.patch_offset {
233            write!(f, ", patch_offset: {offset}")?;
234        }
235        Ok(())
236    }
237}
238
239#[derive(Clone, Debug)]
240pub struct ALP;
241
242#[derive(Clone, prost::Message)]
243pub struct ALPMetadata {
244    #[prost(uint32, tag = "1")]
245    pub(crate) exp_e: u32,
246    #[prost(uint32, tag = "2")]
247    pub(crate) exp_f: u32,
248    #[prost(message, optional, tag = "3")]
249    pub(crate) patches: Option<PatchesMetadata>,
250}
251
252impl ALPData {
253    fn validate_components(
254        encoded: &ArrayRef,
255        exponents: Exponents,
256        patches: Option<&Patches>,
257    ) -> VortexResult<()> {
258        vortex_ensure!(
259            matches!(
260                encoded.dtype(),
261                DType::Primitive(PType::I32 | PType::I64, _)
262            ),
263            "ALP encoded ints have invalid DType {}",
264            encoded.dtype(),
265        );
266
267        // Validate exponents are in-bounds for the float, and that patches have the proper
268        // length and type.
269        let Exponents { e, f } = exponents;
270        match encoded.dtype().as_ptype() {
271            PType::I32 => {
272                vortex_ensure!(exponents.e <= f32::MAX_EXPONENT, "e out of bounds: {e}");
273                vortex_ensure!(exponents.f <= f32::MAX_EXPONENT, "f out of bounds: {f}");
274                if let Some(patches) = patches {
275                    Self::validate_patches::<f32>(patches, encoded)?;
276                }
277            }
278            PType::I64 => {
279                vortex_ensure!(e <= f64::MAX_EXPONENT, "e out of bounds: {e}");
280                vortex_ensure!(f <= f64::MAX_EXPONENT, "f out of bounds: {f}");
281
282                if let Some(patches) = patches {
283                    Self::validate_patches::<f64>(patches, encoded)?;
284                }
285            }
286            _ => unreachable!(),
287        }
288
289        // Validate patches
290        if let Some(patches) = patches {
291            vortex_ensure!(
292                patches.array_len() == encoded.len(),
293                "patches array_len != encoded len: {} != {}",
294                patches.array_len(),
295                encoded.len()
296            );
297
298            // Verify that the patches DType are of the proper DType.
299        }
300
301        Ok(())
302    }
303
304    fn logical_dtype(encoded: &ArrayRef) -> VortexResult<DType> {
305        match encoded.dtype() {
306            DType::Primitive(PType::I32, nullability) => {
307                Ok(DType::Primitive(PType::F32, *nullability))
308            }
309            DType::Primitive(PType::I64, nullability) => {
310                Ok(DType::Primitive(PType::F64, *nullability))
311            }
312            _ => vortex_bail!("ALP encoded ints have invalid DType {}", encoded.dtype(),),
313        }
314    }
315
316    /// Validate that any patches provided are valid for the ALPArray.
317    fn validate_patches<T: ALPFloat>(patches: &Patches, encoded: &ArrayRef) -> VortexResult<()> {
318        vortex_ensure!(
319            patches.array_len() == encoded.len(),
320            "patches array_len != encoded len: {} != {}",
321            patches.array_len(),
322            encoded.len()
323        );
324
325        let expected_type = DType::Primitive(T::PTYPE, encoded.dtype().nullability());
326        vortex_ensure!(
327            patches.dtype() == &expected_type,
328            "Expected patches type {expected_type}, actual {}",
329            patches.dtype(),
330        );
331
332        Ok(())
333    }
334}
335
336impl ALPData {
337    /// Build a new `ALPArray` from components, panicking on validation failure.
338    ///
339    /// See [`ALP::try_new`] for reference on preconditions that must pass before
340    /// calling this method.
341    pub fn new(exponents: Exponents, patches: Option<Patches>) -> Self {
342        let (patch_offset, patch_offset_within_chunk) = match &patches {
343            Some(p) => (Some(p.offset()), p.offset_within_chunk()),
344            None => (None, None),
345        };
346
347        Self {
348            patch_offset,
349            patch_offset_within_chunk,
350            exponents,
351        }
352    }
353
354    /// Build a new `ALPArray` from components:
355    ///
356    /// * `encoded` contains the ALP-encoded ints. Any null values are replaced with placeholders
357    /// * `exponents` are the ALP exponents, valid range depends on the data type
358    /// * `patches` are any patch values that don't cleanly encode using the ALP conversion function
359    ///
360    /// Build a new `ALPArray` from components without validation.
361    ///
362    /// See [`ALP::try_new`] for information about the preconditions that should be checked
363    /// **before** calling this method.
364    pub(crate) unsafe fn new_unchecked(exponents: Exponents, patches: Option<Patches>) -> Self {
365        Self::new(exponents, patches)
366    }
367}
368
369/// Constructors for [`ALPArray`].
370impl ALP {
371    pub fn new(encoded: ArrayRef, exponents: Exponents, patches: Option<Patches>) -> ALPArray {
372        let dtype = ALPData::logical_dtype(&encoded).vortex_expect("ALP encoded dtype");
373        let len = encoded.len();
374        let slots = ALPData::make_slots(&encoded, patches.as_ref());
375        unsafe {
376            Array::from_parts_unchecked(
377                ArrayParts::new(ALP, dtype, len, ALPData::new(exponents, patches))
378                    .with_slots(slots),
379            )
380        }
381    }
382
383    pub fn try_new(
384        encoded: ArrayRef,
385        exponents: Exponents,
386        patches: Option<Patches>,
387    ) -> VortexResult<ALPArray> {
388        let dtype = ALPData::logical_dtype(&encoded)?;
389        let len = encoded.len();
390        let slots = ALPData::make_slots(&encoded, patches.as_ref());
391        let data = ALPData::new(exponents, patches);
392        Array::try_from_parts(ArrayParts::new(ALP, dtype, len, data).with_slots(slots))
393    }
394
395    /// # Safety
396    /// See [`ALP::try_new`] for preconditions.
397    pub unsafe fn new_unchecked(
398        encoded: ArrayRef,
399        exponents: Exponents,
400        patches: Option<Patches>,
401    ) -> ALPArray {
402        let dtype = ALPData::logical_dtype(&encoded).vortex_expect("ALP encoded dtype");
403        let len = encoded.len();
404        let slots = ALPData::make_slots(&encoded, patches.as_ref());
405        let data = unsafe { ALPData::new_unchecked(exponents, patches) };
406        unsafe {
407            Array::from_parts_unchecked(ArrayParts::new(ALP, dtype, len, data).with_slots(slots))
408        }
409    }
410}
411
412impl ALPData {
413    fn make_slots(encoded: &ArrayRef, patches: Option<&Patches>) -> Vec<Option<ArrayRef>> {
414        let (patch_indices, patch_values, patch_chunk_offsets) = match patches {
415            Some(p) => (
416                Some(p.indices().clone()),
417                Some(p.values().clone()),
418                p.chunk_offsets().clone(),
419            ),
420            None => (None, None, None),
421        };
422        vec![
423            Some(encoded.clone()),
424            patch_indices,
425            patch_values,
426            patch_chunk_offsets,
427        ]
428    }
429
430    #[inline]
431    pub fn exponents(&self) -> Exponents {
432        self.exponents
433    }
434}
435
436pub trait ALPArrayExt: ALPArraySlotsExt {
437    fn exponents(&self) -> Exponents {
438        self.exponents
439    }
440
441    fn patches(&self) -> Option<Patches> {
442        patches_from_slots(
443            &self.slots_view(),
444            self.patch_offset,
445            self.patch_offset_within_chunk,
446            self.as_ref().len(),
447        )
448    }
449}
450
451fn patches_from_slots(
452    slots: &ALPSlotsView,
453    patch_offset: Option<usize>,
454    patch_offset_within_chunk: Option<usize>,
455    len: usize,
456) -> Option<Patches> {
457    match (slots.patch_indices, slots.patch_values) {
458        (Some(indices), Some(values)) => {
459            let patch_offset = patch_offset.vortex_expect("has patch slots but no patch_offset");
460            Some(unsafe {
461                Patches::new_unchecked(
462                    len,
463                    patch_offset,
464                    indices.clone(),
465                    values.clone(),
466                    slots.patch_chunk_offsets.cloned(),
467                    patch_offset_within_chunk,
468                )
469            })
470        }
471        _ => None,
472    }
473}
474
475fn validate_parts(
476    dtype: &DType,
477    len: usize,
478    exponents: Exponents,
479    encoded: &ArrayRef,
480    patches: Option<Patches>,
481) -> VortexResult<()> {
482    let logical_dtype = ALPData::logical_dtype(encoded)?;
483    ALPData::validate_components(encoded, exponents, patches.as_ref())?;
484    vortex_ensure!(
485        encoded.len() == len,
486        "ALP encoded len {} != outer len {len}",
487        encoded.len(),
488    );
489    vortex_ensure!(
490        &logical_dtype == dtype,
491        "ALP dtype {} does not match encoded logical dtype {}",
492        dtype,
493        logical_dtype,
494    );
495    Ok(())
496}
497
498impl<T: TypedArrayRef<ALP>> ALPArrayExt for T {}
499
500pub trait ALPArrayOwnedExt {
501    fn into_parts(self) -> (ArrayRef, Exponents, Option<Patches>);
502}
503
504impl ALPArrayOwnedExt for Array<ALP> {
505    #[inline]
506    fn into_parts(self) -> (ArrayRef, Exponents, Option<Patches>) {
507        let patches = self.patches();
508        let exponents = self.exponents();
509        let encoded = self.encoded().clone();
510        (encoded, exponents, patches)
511    }
512}
513
514impl ValidityChild<ALP> for ALP {
515    fn validity_child(array: ArrayView<'_, ALP>) -> ArrayRef {
516        array.encoded().clone()
517    }
518}
519
520#[cfg(test)]
521mod tests {
522    use std::f64::consts::PI;
523    use std::sync::LazyLock;
524
525    use rstest::rstest;
526    use vortex_array::Canonical;
527    use vortex_array::IntoArray;
528    use vortex_array::LEGACY_SESSION;
529    use vortex_array::VortexSessionExecute;
530    use vortex_array::arrays::PrimitiveArray;
531    use vortex_array::assert_arrays_eq;
532    use vortex_array::session::ArraySession;
533    use vortex_error::VortexExpect;
534    use vortex_session::VortexSession;
535
536    use super::*;
537    use crate::alp_encode;
538    use crate::decompress_into_array;
539
540    static SESSION: LazyLock<VortexSession> =
541        LazyLock::new(|| VortexSession::empty().with::<ArraySession>());
542
543    #[rstest]
544    #[case(0)]
545    #[case(1)]
546    #[case(100)]
547    #[case(1023)]
548    #[case(1024)]
549    #[case(1025)]
550    #[case(2047)]
551    #[case(2048)]
552    #[case(2049)]
553    fn test_execute_f32(#[case] size: usize) {
554        let mut ctx = SESSION.create_execution_ctx();
555        let values = PrimitiveArray::from_iter((0..size).map(|i| i as f32));
556        let encoded = alp_encode(values.as_view(), None, &mut ctx).unwrap();
557
558        let result_canonical = {
559            encoded
560                .clone()
561                .into_array()
562                .execute::<Canonical>(&mut ctx)
563                .unwrap()
564        };
565        // Compare against the traditional array-based decompress path
566        let expected =
567            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
568
569        assert_arrays_eq!(result_canonical.into_array(), expected);
570    }
571
572    #[rstest]
573    #[case(0)]
574    #[case(1)]
575    #[case(100)]
576    #[case(1023)]
577    #[case(1024)]
578    #[case(1025)]
579    #[case(2047)]
580    #[case(2048)]
581    #[case(2049)]
582    fn test_execute_f64(#[case] size: usize) {
583        let values = PrimitiveArray::from_iter((0..size).map(|i| i as f64));
584        let encoded = alp_encode(
585            values.as_view(),
586            None,
587            &mut LEGACY_SESSION.create_execution_ctx(),
588        )
589        .unwrap();
590
591        let result_canonical = {
592            let mut ctx = SESSION.create_execution_ctx();
593            encoded
594                .clone()
595                .into_array()
596                .execute::<Canonical>(&mut ctx)
597                .unwrap()
598        };
599        // Compare against the traditional array-based decompress path
600        let expected =
601            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
602
603        assert_arrays_eq!(result_canonical.into_array(), expected);
604    }
605
606    #[rstest]
607    #[case(100)]
608    #[case(1023)]
609    #[case(1024)]
610    #[case(1025)]
611    #[case(2047)]
612    #[case(2048)]
613    #[case(2049)]
614    fn test_execute_with_patches(#[case] size: usize) {
615        let values: Vec<f64> = (0..size)
616            .map(|i| match i % 4 {
617                0..=2 => 1.0,
618                _ => PI,
619            })
620            .collect();
621
622        let array = PrimitiveArray::from_iter(values);
623        let encoded = alp_encode(
624            array.as_view(),
625            None,
626            &mut LEGACY_SESSION.create_execution_ctx(),
627        )
628        .unwrap();
629        assert!(encoded.patches().unwrap().array_len() > 0);
630
631        let result_canonical = {
632            let mut ctx = SESSION.create_execution_ctx();
633            encoded
634                .clone()
635                .into_array()
636                .execute::<Canonical>(&mut ctx)
637                .unwrap()
638        };
639        // Compare against the traditional array-based decompress path
640        let expected =
641            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
642
643        assert_arrays_eq!(result_canonical.into_array(), expected);
644    }
645
646    #[rstest]
647    #[case(0)]
648    #[case(1)]
649    #[case(100)]
650    #[case(1023)]
651    #[case(1024)]
652    #[case(1025)]
653    #[case(2047)]
654    #[case(2048)]
655    #[case(2049)]
656    fn test_execute_with_validity(#[case] size: usize) {
657        let values: Vec<Option<f32>> = (0..size)
658            .map(|i| if i % 2 == 1 { None } else { Some(1.0) })
659            .collect();
660
661        let array = PrimitiveArray::from_option_iter(values);
662        let encoded = alp_encode(
663            array.as_view(),
664            None,
665            &mut LEGACY_SESSION.create_execution_ctx(),
666        )
667        .unwrap();
668
669        let result_canonical = {
670            let mut ctx = SESSION.create_execution_ctx();
671            encoded
672                .clone()
673                .into_array()
674                .execute::<Canonical>(&mut ctx)
675                .unwrap()
676        };
677        // Compare against the traditional array-based decompress path
678        let expected =
679            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
680
681        assert_arrays_eq!(result_canonical.into_array(), expected);
682    }
683
684    #[rstest]
685    #[case(100)]
686    #[case(1023)]
687    #[case(1024)]
688    #[case(1025)]
689    #[case(2047)]
690    #[case(2048)]
691    #[case(2049)]
692    fn test_execute_with_patches_and_validity(#[case] size: usize) {
693        let values: Vec<Option<f64>> = (0..size)
694            .map(|idx| match idx % 3 {
695                0 => Some(1.0),
696                1 => None,
697                _ => Some(PI),
698            })
699            .collect();
700
701        let array = PrimitiveArray::from_option_iter(values);
702        let encoded = alp_encode(
703            array.as_view(),
704            None,
705            &mut LEGACY_SESSION.create_execution_ctx(),
706        )
707        .unwrap();
708        assert!(encoded.patches().unwrap().array_len() > 0);
709
710        let result_canonical = {
711            let mut ctx = SESSION.create_execution_ctx();
712            encoded
713                .clone()
714                .into_array()
715                .execute::<Canonical>(&mut ctx)
716                .unwrap()
717        };
718        // Compare against the traditional array-based decompress path
719        let expected =
720            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
721
722        assert_arrays_eq!(result_canonical.into_array(), expected);
723    }
724
725    #[rstest]
726    #[case(500, 100)]
727    #[case(1000, 200)]
728    #[case(2048, 512)]
729    fn test_execute_sliced_vector(#[case] size: usize, #[case] slice_start: usize) {
730        let values: Vec<Option<f64>> = (0..size)
731            .map(|i| {
732                if i % 5 == 0 {
733                    None
734                } else if i % 4 == 3 {
735                    Some(PI)
736                } else {
737                    Some(1.0)
738                }
739            })
740            .collect();
741
742        let array = PrimitiveArray::from_option_iter(values.clone());
743        let encoded = alp_encode(
744            array.as_view(),
745            None,
746            &mut LEGACY_SESSION.create_execution_ctx(),
747        )
748        .unwrap();
749
750        let slice_end = size - slice_start;
751        let slice_len = slice_end - slice_start;
752        let sliced_encoded = encoded.slice(slice_start..slice_end).unwrap();
753
754        let result_canonical = {
755            let mut ctx = SESSION.create_execution_ctx();
756            sliced_encoded.execute::<Canonical>(&mut ctx).unwrap()
757        };
758        let result_primitive = result_canonical.into_primitive();
759
760        for idx in 0..slice_len {
761            let expected_value = values[slice_start + idx];
762
763            let result_valid = result_primitive
764                .validity()
765                .vortex_expect("result validity should be derivable")
766                .is_valid(idx)
767                .unwrap();
768            assert_eq!(
769                result_valid,
770                expected_value.is_some(),
771                "Validity mismatch at idx={idx}",
772            );
773
774            if let Some(expected_val) = expected_value {
775                let result_val = result_primitive.as_slice::<f64>()[idx];
776                assert_eq!(result_val, expected_val, "Value mismatch at idx={idx}",);
777            }
778        }
779    }
780
781    #[rstest]
782    #[case(500, 100)]
783    #[case(1000, 200)]
784    #[case(2048, 512)]
785    fn test_sliced_to_primitive(#[case] size: usize, #[case] slice_start: usize) {
786        let mut ctx = LEGACY_SESSION.create_execution_ctx();
787        let values: Vec<Option<f64>> = (0..size)
788            .map(|i| {
789                if i % 5 == 0 {
790                    None
791                } else if i % 4 == 3 {
792                    Some(PI)
793                } else {
794                    Some(1.0)
795                }
796            })
797            .collect();
798
799        let array = PrimitiveArray::from_option_iter(values.clone());
800        let encoded = alp_encode(array.as_view(), None, &mut ctx).unwrap();
801
802        let slice_end = size - slice_start;
803        let slice_len = slice_end - slice_start;
804        let sliced_encoded = encoded.slice(slice_start..slice_end).unwrap();
805
806        let result_primitive = sliced_encoded.execute::<PrimitiveArray>(&mut ctx).unwrap();
807
808        for idx in 0..slice_len {
809            let expected_value = values[slice_start + idx];
810
811            let result_valid = result_primitive
812                .as_ref()
813                .validity()
814                .unwrap()
815                .execute_mask(result_primitive.as_ref().len(), &mut ctx)
816                .unwrap()
817                .value(idx);
818            assert_eq!(
819                result_valid,
820                expected_value.is_some(),
821                "Validity mismatch at idx={idx}",
822            );
823
824            if let Some(expected_val) = expected_value {
825                let buf = result_primitive.to_buffer::<f64>();
826                let result_val = buf.as_slice()[idx];
827                assert_eq!(result_val, expected_val, "Value mismatch at idx={idx}",);
828            }
829        }
830    }
831
832    /// Regression test for issue #5948: execute_decompress drops patches when chunk_offsets is
833    /// None.
834    ///
835    /// When patches exist but do NOT have chunk_offsets, the execute path incorrectly passes
836    /// `None` to `decompress_unchunked_core` instead of the actual patches.
837    ///
838    /// This can happen after file IO serialization/deserialization where chunk_offsets may not
839    /// be preserved, or when building ALPArrays manually without chunk_offsets.
840    #[test]
841    fn test_execute_decompress_with_patches_no_chunk_offsets_regression_5948() {
842        // Create an array with values that will produce patches. PI doesn't encode cleanly.
843        let values: Vec<f64> = vec![1.0, 2.0, PI, 4.0, 5.0];
844        let original = PrimitiveArray::from_iter(values);
845
846        // First encode normally to get a properly formed ALPArray with patches.
847        let normally_encoded = alp_encode(
848            original.as_view(),
849            None,
850            &mut LEGACY_SESSION.create_execution_ctx(),
851        )
852        .unwrap();
853        assert!(
854            normally_encoded.patches().is_some(),
855            "Test requires patches to be present"
856        );
857
858        let original_patches = normally_encoded.patches().unwrap();
859        assert!(
860            original_patches.chunk_offsets().is_some(),
861            "Normal encoding should have chunk_offsets"
862        );
863
864        // Rebuild the patches WITHOUT chunk_offsets to simulate deserialized patches.
865        let patches_without_chunk_offsets = Patches::new(
866            original_patches.array_len(),
867            original_patches.offset(),
868            original_patches.indices().clone(),
869            original_patches.values().clone(),
870            None, // NO chunk_offsets - this triggers the bug!
871        )
872        .unwrap();
873
874        // Build a new ALPArray with the same encoded data but patches without chunk_offsets.
875        let alp_without_chunk_offsets = ALP::new(
876            normally_encoded.encoded().clone(),
877            normally_encoded.exponents(),
878            Some(patches_without_chunk_offsets),
879        );
880
881        // The legacy decompress_into_array path should work correctly.
882        let result_legacy = decompress_into_array(
883            alp_without_chunk_offsets.clone(),
884            &mut LEGACY_SESSION.create_execution_ctx(),
885        )
886        .unwrap();
887        let legacy_slice = result_legacy.as_slice::<f64>();
888
889        // Verify the legacy path produces correct values.
890        assert!(
891            (legacy_slice[2] - PI).abs() < 1e-10,
892            "Legacy path should have PI at index 2, got {}",
893            legacy_slice[2]
894        );
895
896        // The execute path has the bug - it drops patches when chunk_offsets is None.
897        let result_execute = {
898            let mut ctx = SESSION.create_execution_ctx();
899            execute_decompress(alp_without_chunk_offsets, &mut ctx).unwrap()
900        };
901        let execute_slice = result_execute.as_slice::<f64>();
902
903        // This assertion FAILS until the bug is fixed because execute_decompress drops patches.
904        assert!(
905            (execute_slice[2] - PI).abs() < 1e-10,
906            "Execute path should have PI at index 2, but got {} (patches were dropped!)",
907            execute_slice[2]
908        );
909    }
910}