Skip to main content

vortex_alp/alp/
array.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Debug;
5use std::fmt::Display;
6use std::fmt::Formatter;
7use std::hash::Hash;
8use std::hash::Hasher;
9
10use prost::Message;
11use vortex_array::Array;
12use vortex_array::ArrayEq;
13use vortex_array::ArrayHash;
14use vortex_array::ArrayId;
15use vortex_array::ArrayParts;
16use vortex_array::ArrayRef;
17use vortex_array::ArraySlots;
18use vortex_array::ArrayView;
19use vortex_array::ExecutionCtx;
20use vortex_array::ExecutionResult;
21use vortex_array::IntoArray;
22use vortex_array::Precision;
23use vortex_array::TypedArrayRef;
24use vortex_array::array_slots;
25use vortex_array::arrays::Primitive;
26use vortex_array::buffer::BufferHandle;
27use vortex_array::dtype::DType;
28use vortex_array::dtype::PType;
29use vortex_array::patches::PatchSlotIndices;
30use vortex_array::patches::Patches;
31use vortex_array::patches::PatchesData;
32use vortex_array::patches::PatchesMetadata;
33use vortex_array::require_child;
34use vortex_array::require_patches;
35use vortex_array::serde::ArrayChildren;
36use vortex_array::smallvec::smallvec;
37use vortex_array::vtable::VTable;
38use vortex_array::vtable::ValidityChild;
39use vortex_array::vtable::ValidityVTableFromChild;
40use vortex_error::VortexExpect;
41use vortex_error::VortexResult;
42use vortex_error::vortex_bail;
43use vortex_error::vortex_ensure;
44use vortex_error::vortex_panic;
45use vortex_session::VortexSession;
46use vortex_session::registry::CachedId;
47
48use crate::ALPFloat;
49use crate::alp::Exponents;
50use crate::alp::decompress::execute_decompress;
51use crate::alp::rules::PARENT_KERNELS;
52use crate::alp::rules::RULES;
53
54/// A [`ALP`]-encoded Vortex array.
55pub type ALPArray = Array<ALP>;
56
57impl ArrayHash for ALPData {
58    fn array_hash<H: Hasher>(&self, state: &mut H, _precision: Precision) {
59        self.exponents.hash(state);
60        self.patches_data.hash(state);
61    }
62}
63
64impl ArrayEq for ALPData {
65    fn array_eq(&self, other: &Self, _precision: Precision) -> bool {
66        self.exponents == other.exponents && self.patches_data == other.patches_data
67    }
68}
69
70impl VTable for ALP {
71    type TypedArrayData = ALPData;
72
73    type OperationsVTable = Self;
74    type ValidityVTable = ValidityVTableFromChild;
75
76    fn id(&self) -> ArrayId {
77        static ID: CachedId = CachedId::new("vortex.alp");
78        *ID
79    }
80
81    fn validate(
82        &self,
83        data: &ALPData,
84        dtype: &DType,
85        len: usize,
86        slots: &[Option<ArrayRef>],
87    ) -> VortexResult<()> {
88        let alp_slots = ALPSlotsView::from_slots(slots);
89        let patches =
90            PatchesData::patches_from_slots(data.patches_data.as_ref(), len, slots, PATCH_SLOTS);
91        validate_parts(dtype, len, data.exponents, alp_slots.encoded, patches)
92    }
93
94    fn nbuffers(_array: ArrayView<'_, Self>) -> usize {
95        0
96    }
97
98    fn buffer(_array: ArrayView<'_, Self>, idx: usize) -> BufferHandle {
99        vortex_panic!("ALPArray buffer index {idx} out of bounds")
100    }
101
102    fn buffer_name(_array: ArrayView<'_, Self>, _idx: usize) -> Option<String> {
103        None
104    }
105
106    fn serialize(
107        array: ArrayView<'_, Self>,
108        _session: &VortexSession,
109    ) -> VortexResult<Option<Vec<u8>>> {
110        let exponents = array.exponents();
111        Ok(Some(
112            ALPMetadata {
113                exp_e: exponents.e as u32,
114                exp_f: exponents.f as u32,
115                patches: array
116                    .patches()
117                    .map(|p| p.to_metadata(array.len(), array.dtype()))
118                    .transpose()?,
119            }
120            .encode_to_vec(),
121        ))
122    }
123
124    fn deserialize(
125        &self,
126        dtype: &DType,
127        len: usize,
128        metadata: &[u8],
129        _buffers: &[BufferHandle],
130        children: &dyn ArrayChildren,
131        _session: &VortexSession,
132    ) -> VortexResult<ArrayParts<Self>> {
133        let metadata = ALPMetadata::decode(metadata)?;
134        let encoded_ptype = match &dtype {
135            DType::Primitive(PType::F32, n) => DType::Primitive(PType::I32, *n),
136            DType::Primitive(PType::F64, n) => DType::Primitive(PType::I64, *n),
137            d => vortex_bail!(MismatchedTypes: "f32 or f64", d),
138        };
139        let encoded = children.get(0, &encoded_ptype, len)?;
140
141        let patches = metadata
142            .patches
143            .map(|p| {
144                let indices = children.get(1, &p.indices_dtype()?, p.len()?)?;
145                let values = children.get(2, dtype, p.len()?)?;
146                let chunk_offsets = p
147                    .chunk_offsets_dtype()?
148                    .map(|dtype| children.get(3, &dtype, usize::try_from(p.chunk_offsets_len())?))
149                    .transpose()?;
150
151                Patches::new(len, p.offset()?, indices, values, chunk_offsets)
152            })
153            .transpose()?;
154
155        let slots = ALPData::make_slots(&encoded, patches.as_ref());
156        let data = ALPData::new(
157            Exponents {
158                e: u8::try_from(metadata.exp_e)?,
159                f: u8::try_from(metadata.exp_f)?,
160            },
161            patches,
162        );
163        Ok(ArrayParts::new(self.clone(), dtype.clone(), len, data).with_slots(slots))
164    }
165
166    fn slot_name(_array: ArrayView<'_, Self>, idx: usize) -> String {
167        ALPSlots::NAMES[idx].to_string()
168    }
169
170    fn execute(array: Array<Self>, ctx: &mut ExecutionCtx) -> VortexResult<ExecutionResult> {
171        let array = require_child!(array, array.encoded(), ALPSlots::ENCODED => Primitive);
172        require_patches!(
173            array,
174            ALPSlots::PATCH_INDICES,
175            ALPSlots::PATCH_VALUES,
176            ALPSlots::PATCH_CHUNK_OFFSETS
177        );
178
179        Ok(ExecutionResult::done(
180            execute_decompress(array, ctx)?.into_array(),
181        ))
182    }
183
184    fn reduce_parent(
185        array: ArrayView<'_, Self>,
186        parent: &ArrayRef,
187        child_idx: usize,
188    ) -> VortexResult<Option<ArrayRef>> {
189        RULES.evaluate(array, parent, child_idx)
190    }
191
192    fn execute_parent(
193        array: ArrayView<'_, Self>,
194        parent: &ArrayRef,
195        child_idx: usize,
196        ctx: &mut ExecutionCtx,
197    ) -> VortexResult<Option<ArrayRef>> {
198        PARENT_KERNELS.execute(array, parent, child_idx, ctx)
199    }
200}
201
202#[array_slots(ALP)]
203pub struct ALPSlots {
204    /// The ALP-encoded values array.
205    pub encoded: ArrayRef,
206    /// The indices of exception values that could not be ALP-encoded.
207    pub patch_indices: Option<ArrayRef>,
208    /// The exception values that could not be ALP-encoded.
209    pub patch_values: Option<ArrayRef>,
210    /// Chunk offsets for the patch indices/values.
211    pub patch_chunk_offsets: Option<ArrayRef>,
212}
213
214const PATCH_SLOTS: PatchSlotIndices = PatchSlotIndices {
215    indices: ALPSlots::PATCH_INDICES,
216    values: ALPSlots::PATCH_VALUES,
217    chunk_offsets: ALPSlots::PATCH_CHUNK_OFFSETS,
218};
219
220#[derive(Clone, Debug)]
221pub struct ALPData {
222    patches_data: Option<PatchesData>,
223    exponents: Exponents,
224}
225
226impl Display for ALPData {
227    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
228        write!(f, "exponents: {}", self.exponents)?;
229        if let Some(pd) = &self.patches_data {
230            write!(f, ", patch_offset: {}", pd.offset())?;
231        }
232        Ok(())
233    }
234}
235
236#[derive(Clone, Debug)]
237pub struct ALP;
238
239#[derive(Clone, prost::Message)]
240pub struct ALPMetadata {
241    #[prost(uint32, tag = "1")]
242    pub(crate) exp_e: u32,
243    #[prost(uint32, tag = "2")]
244    pub(crate) exp_f: u32,
245    #[prost(message, optional, tag = "3")]
246    pub(crate) patches: Option<PatchesMetadata>,
247}
248
249impl ALPData {
250    fn validate_components(
251        encoded: &ArrayRef,
252        exponents: Exponents,
253        patches: Option<&Patches>,
254    ) -> VortexResult<()> {
255        vortex_ensure!(
256            matches!(
257                encoded.dtype(),
258                DType::Primitive(PType::I32 | PType::I64, _)
259            ),
260            "ALP encoded ints have invalid DType {}",
261            encoded.dtype(),
262        );
263
264        // Validate exponents are in-bounds for the float, and that patches have the proper
265        // length and type.
266        let Exponents { e, f } = exponents;
267        match encoded.dtype().as_ptype() {
268            PType::I32 => {
269                vortex_ensure!(exponents.e <= f32::MAX_EXPONENT, "e out of bounds: {e}");
270                vortex_ensure!(exponents.f <= f32::MAX_EXPONENT, "f out of bounds: {f}");
271                if let Some(patches) = patches {
272                    Self::validate_patches::<f32>(patches, encoded)?;
273                }
274            }
275            PType::I64 => {
276                vortex_ensure!(e <= f64::MAX_EXPONENT, "e out of bounds: {e}");
277                vortex_ensure!(f <= f64::MAX_EXPONENT, "f out of bounds: {f}");
278
279                if let Some(patches) = patches {
280                    Self::validate_patches::<f64>(patches, encoded)?;
281                }
282            }
283            _ => unreachable!(),
284        }
285
286        // Validate patches
287        if let Some(patches) = patches {
288            vortex_ensure!(
289                patches.array_len() == encoded.len(),
290                "patches array_len != encoded len: {} != {}",
291                patches.array_len(),
292                encoded.len()
293            );
294
295            // Verify that the patches DType are of the proper DType.
296        }
297
298        Ok(())
299    }
300
301    fn logical_dtype(encoded: &ArrayRef) -> VortexResult<DType> {
302        match encoded.dtype() {
303            DType::Primitive(PType::I32, nullability) => {
304                Ok(DType::Primitive(PType::F32, *nullability))
305            }
306            DType::Primitive(PType::I64, nullability) => {
307                Ok(DType::Primitive(PType::F64, *nullability))
308            }
309            _ => vortex_bail!("ALP encoded ints have invalid DType {}", encoded.dtype(),),
310        }
311    }
312
313    /// Validate that any patches provided are valid for the ALPArray.
314    fn validate_patches<T: ALPFloat>(patches: &Patches, encoded: &ArrayRef) -> VortexResult<()> {
315        vortex_ensure!(
316            patches.array_len() == encoded.len(),
317            "patches array_len != encoded len: {} != {}",
318            patches.array_len(),
319            encoded.len()
320        );
321
322        let expected_type = DType::Primitive(T::PTYPE, encoded.dtype().nullability());
323        vortex_ensure!(
324            patches.dtype() == &expected_type,
325            "Expected patches type {expected_type}, actual {}",
326            patches.dtype(),
327        );
328
329        Ok(())
330    }
331}
332
333impl ALPData {
334    /// Build a new `ALPArray` from components, panicking on validation failure.
335    ///
336    /// See [`ALP::try_new`] for reference on preconditions that must pass before
337    /// calling this method.
338    pub fn new(exponents: Exponents, patches: Option<Patches>) -> Self {
339        Self {
340            patches_data: patches.as_ref().map(PatchesData::from_patches),
341            exponents,
342        }
343    }
344
345    /// Build a new `ALPArray` from components:
346    ///
347    /// * `encoded` contains the ALP-encoded ints. Any null values are replaced with placeholders
348    /// * `exponents` are the ALP exponents, valid range depends on the data type
349    /// * `patches` are any patch values that don't cleanly encode using the ALP conversion function
350    ///
351    /// Build a new `ALPArray` from components without validation.
352    ///
353    /// See [`ALP::try_new`] for information about the preconditions that should be checked
354    /// **before** calling this method.
355    pub(crate) unsafe fn new_unchecked(exponents: Exponents, patches: Option<Patches>) -> Self {
356        Self::new(exponents, patches)
357    }
358}
359
360/// Constructors for [`ALPArray`].
361impl ALP {
362    pub fn new(encoded: ArrayRef, exponents: Exponents, patches: Option<Patches>) -> ALPArray {
363        let dtype = ALPData::logical_dtype(&encoded).vortex_expect("ALP encoded dtype");
364        let len = encoded.len();
365        let slots = ALPData::make_slots(&encoded, patches.as_ref());
366        unsafe {
367            Array::from_parts_unchecked(
368                ArrayParts::new(ALP, dtype, len, ALPData::new(exponents, patches))
369                    .with_slots(slots),
370            )
371        }
372    }
373
374    pub fn try_new(
375        encoded: ArrayRef,
376        exponents: Exponents,
377        patches: Option<Patches>,
378    ) -> VortexResult<ALPArray> {
379        let dtype = ALPData::logical_dtype(&encoded)?;
380        let len = encoded.len();
381        let slots = ALPData::make_slots(&encoded, patches.as_ref());
382        let data = ALPData::new(exponents, patches);
383        Array::try_from_parts(ArrayParts::new(ALP, dtype, len, data).with_slots(slots))
384    }
385
386    /// # Safety
387    /// See [`ALP::try_new`] for preconditions.
388    pub unsafe fn new_unchecked(
389        encoded: ArrayRef,
390        exponents: Exponents,
391        patches: Option<Patches>,
392    ) -> ALPArray {
393        let dtype = ALPData::logical_dtype(&encoded).vortex_expect("ALP encoded dtype");
394        let len = encoded.len();
395        let slots = ALPData::make_slots(&encoded, patches.as_ref());
396        let data = unsafe { ALPData::new_unchecked(exponents, patches) };
397        unsafe {
398            Array::from_parts_unchecked(ArrayParts::new(ALP, dtype, len, data).with_slots(slots))
399        }
400    }
401}
402
403impl ALPData {
404    fn make_slots(encoded: &ArrayRef, patches: Option<&Patches>) -> ArraySlots {
405        let mut slots: ArraySlots = smallvec![Some(encoded.clone())];
406        PatchesData::push_slots(&mut slots, patches);
407        slots
408    }
409
410    #[inline]
411    pub fn exponents(&self) -> Exponents {
412        self.exponents
413    }
414}
415
416pub trait ALPArrayExt: ALPArraySlotsExt {
417    fn exponents(&self) -> Exponents {
418        self.exponents
419    }
420
421    fn patches(&self) -> Option<Patches> {
422        PatchesData::patches_from_slots(
423            self.patches_data.as_ref(),
424            self.as_ref().len(),
425            self.as_ref().slots(),
426            PATCH_SLOTS,
427        )
428    }
429}
430
431fn validate_parts(
432    dtype: &DType,
433    len: usize,
434    exponents: Exponents,
435    encoded: &ArrayRef,
436    patches: Option<Patches>,
437) -> VortexResult<()> {
438    let logical_dtype = ALPData::logical_dtype(encoded)?;
439    ALPData::validate_components(encoded, exponents, patches.as_ref())?;
440    vortex_ensure!(
441        encoded.len() == len,
442        "ALP encoded len {} != outer len {len}",
443        encoded.len(),
444    );
445    vortex_ensure!(
446        &logical_dtype == dtype,
447        "ALP dtype {} does not match encoded logical dtype {}",
448        dtype,
449        logical_dtype,
450    );
451    Ok(())
452}
453
454impl<T: TypedArrayRef<ALP>> ALPArrayExt for T {}
455
456pub trait ALPArrayOwnedExt {
457    fn into_parts(self) -> (ArrayRef, Exponents, Option<Patches>);
458}
459
460impl ALPArrayOwnedExt for Array<ALP> {
461    #[inline]
462    fn into_parts(self) -> (ArrayRef, Exponents, Option<Patches>) {
463        let patches = self.patches();
464        let exponents = self.exponents();
465        let encoded = self.encoded().clone();
466        (encoded, exponents, patches)
467    }
468}
469
470impl ValidityChild<ALP> for ALP {
471    fn validity_child(array: ArrayView<'_, ALP>) -> ArrayRef {
472        array.encoded().clone()
473    }
474}
475
476#[cfg(test)]
477mod tests {
478    use std::f64::consts::PI;
479    use std::sync::LazyLock;
480
481    use rstest::rstest;
482    use vortex_array::Canonical;
483    use vortex_array::IntoArray;
484    use vortex_array::LEGACY_SESSION;
485    use vortex_array::VortexSessionExecute;
486    use vortex_array::arrays::PrimitiveArray;
487    use vortex_array::assert_arrays_eq;
488    use vortex_array::session::ArraySession;
489    use vortex_error::VortexExpect;
490    use vortex_session::VortexSession;
491
492    use super::*;
493    use crate::alp_encode;
494    use crate::decompress_into_array;
495
496    static SESSION: LazyLock<VortexSession> =
497        LazyLock::new(|| VortexSession::empty().with::<ArraySession>());
498
499    #[rstest]
500    #[case(0)]
501    #[case(1)]
502    #[case(100)]
503    #[case(1023)]
504    #[case(1024)]
505    #[case(1025)]
506    #[case(2047)]
507    #[case(2048)]
508    #[case(2049)]
509    fn test_execute_f32(#[case] size: usize) {
510        let mut ctx = SESSION.create_execution_ctx();
511        let values = PrimitiveArray::from_iter((0..size).map(|i| i as f32));
512        let encoded = alp_encode(values.as_view(), None, &mut ctx).unwrap();
513
514        let result_canonical = {
515            encoded
516                .clone()
517                .into_array()
518                .execute::<Canonical>(&mut ctx)
519                .unwrap()
520        };
521        // Compare against the traditional array-based decompress path
522        let expected =
523            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
524
525        assert_arrays_eq!(result_canonical.into_array(), expected);
526    }
527
528    #[rstest]
529    #[case(0)]
530    #[case(1)]
531    #[case(100)]
532    #[case(1023)]
533    #[case(1024)]
534    #[case(1025)]
535    #[case(2047)]
536    #[case(2048)]
537    #[case(2049)]
538    fn test_execute_f64(#[case] size: usize) {
539        let values = PrimitiveArray::from_iter((0..size).map(|i| i as f64));
540        let encoded = alp_encode(
541            values.as_view(),
542            None,
543            &mut LEGACY_SESSION.create_execution_ctx(),
544        )
545        .unwrap();
546
547        let result_canonical = {
548            let mut ctx = SESSION.create_execution_ctx();
549            encoded
550                .clone()
551                .into_array()
552                .execute::<Canonical>(&mut ctx)
553                .unwrap()
554        };
555        // Compare against the traditional array-based decompress path
556        let expected =
557            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
558
559        assert_arrays_eq!(result_canonical.into_array(), expected);
560    }
561
562    #[rstest]
563    #[case(100)]
564    #[case(1023)]
565    #[case(1024)]
566    #[case(1025)]
567    #[case(2047)]
568    #[case(2048)]
569    #[case(2049)]
570    fn test_execute_with_patches(#[case] size: usize) {
571        let values: Vec<f64> = (0..size)
572            .map(|i| match i % 4 {
573                0..=2 => 1.0,
574                _ => PI,
575            })
576            .collect();
577
578        let array = PrimitiveArray::from_iter(values);
579        let encoded = alp_encode(
580            array.as_view(),
581            None,
582            &mut LEGACY_SESSION.create_execution_ctx(),
583        )
584        .unwrap();
585        assert!(encoded.patches().unwrap().array_len() > 0);
586
587        let result_canonical = {
588            let mut ctx = SESSION.create_execution_ctx();
589            encoded
590                .clone()
591                .into_array()
592                .execute::<Canonical>(&mut ctx)
593                .unwrap()
594        };
595        // Compare against the traditional array-based decompress path
596        let expected =
597            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
598
599        assert_arrays_eq!(result_canonical.into_array(), expected);
600    }
601
602    #[rstest]
603    #[case(0)]
604    #[case(1)]
605    #[case(100)]
606    #[case(1023)]
607    #[case(1024)]
608    #[case(1025)]
609    #[case(2047)]
610    #[case(2048)]
611    #[case(2049)]
612    fn test_execute_with_validity(#[case] size: usize) {
613        let values: Vec<Option<f32>> = (0..size)
614            .map(|i| if i % 2 == 1 { None } else { Some(1.0) })
615            .collect();
616
617        let array = PrimitiveArray::from_option_iter(values);
618        let encoded = alp_encode(
619            array.as_view(),
620            None,
621            &mut LEGACY_SESSION.create_execution_ctx(),
622        )
623        .unwrap();
624
625        let result_canonical = {
626            let mut ctx = SESSION.create_execution_ctx();
627            encoded
628                .clone()
629                .into_array()
630                .execute::<Canonical>(&mut ctx)
631                .unwrap()
632        };
633        // Compare against the traditional array-based decompress path
634        let expected =
635            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
636
637        assert_arrays_eq!(result_canonical.into_array(), expected);
638    }
639
640    #[rstest]
641    #[case(100)]
642    #[case(1023)]
643    #[case(1024)]
644    #[case(1025)]
645    #[case(2047)]
646    #[case(2048)]
647    #[case(2049)]
648    fn test_execute_with_patches_and_validity(#[case] size: usize) {
649        let values: Vec<Option<f64>> = (0..size)
650            .map(|idx| match idx % 3 {
651                0 => Some(1.0),
652                1 => None,
653                _ => Some(PI),
654            })
655            .collect();
656
657        let array = PrimitiveArray::from_option_iter(values);
658        let encoded = alp_encode(
659            array.as_view(),
660            None,
661            &mut LEGACY_SESSION.create_execution_ctx(),
662        )
663        .unwrap();
664        assert!(encoded.patches().unwrap().array_len() > 0);
665
666        let result_canonical = {
667            let mut ctx = SESSION.create_execution_ctx();
668            encoded
669                .clone()
670                .into_array()
671                .execute::<Canonical>(&mut ctx)
672                .unwrap()
673        };
674        // Compare against the traditional array-based decompress path
675        let expected =
676            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
677
678        assert_arrays_eq!(result_canonical.into_array(), expected);
679    }
680
681    #[rstest]
682    #[case(500, 100)]
683    #[case(1000, 200)]
684    #[case(2048, 512)]
685    fn test_execute_sliced_vector(#[case] size: usize, #[case] slice_start: usize) {
686        let values: Vec<Option<f64>> = (0..size)
687            .map(|i| {
688                if i % 5 == 0 {
689                    None
690                } else if i % 4 == 3 {
691                    Some(PI)
692                } else {
693                    Some(1.0)
694                }
695            })
696            .collect();
697
698        let array = PrimitiveArray::from_option_iter(values.clone());
699        let encoded = alp_encode(
700            array.as_view(),
701            None,
702            &mut LEGACY_SESSION.create_execution_ctx(),
703        )
704        .unwrap();
705
706        let slice_end = size - slice_start;
707        let slice_len = slice_end - slice_start;
708        let sliced_encoded = encoded.slice(slice_start..slice_end).unwrap();
709
710        let result_canonical = {
711            let mut ctx = SESSION.create_execution_ctx();
712            sliced_encoded.execute::<Canonical>(&mut ctx).unwrap()
713        };
714        let result_primitive = result_canonical.into_primitive();
715
716        for idx in 0..slice_len {
717            let expected_value = values[slice_start + idx];
718
719            let result_valid = result_primitive
720                .validity()
721                .vortex_expect("result validity should be derivable")
722                .is_valid(idx)
723                .unwrap();
724            assert_eq!(
725                result_valid,
726                expected_value.is_some(),
727                "Validity mismatch at idx={idx}",
728            );
729
730            if let Some(expected_val) = expected_value {
731                let result_val = result_primitive.as_slice::<f64>()[idx];
732                assert_eq!(result_val, expected_val, "Value mismatch at idx={idx}",);
733            }
734        }
735    }
736
737    #[rstest]
738    #[case(500, 100)]
739    #[case(1000, 200)]
740    #[case(2048, 512)]
741    fn test_sliced_to_primitive(#[case] size: usize, #[case] slice_start: usize) {
742        let mut ctx = LEGACY_SESSION.create_execution_ctx();
743        let values: Vec<Option<f64>> = (0..size)
744            .map(|i| {
745                if i % 5 == 0 {
746                    None
747                } else if i % 4 == 3 {
748                    Some(PI)
749                } else {
750                    Some(1.0)
751                }
752            })
753            .collect();
754
755        let array = PrimitiveArray::from_option_iter(values.clone());
756        let encoded = alp_encode(array.as_view(), None, &mut ctx).unwrap();
757
758        let slice_end = size - slice_start;
759        let slice_len = slice_end - slice_start;
760        let sliced_encoded = encoded.slice(slice_start..slice_end).unwrap();
761
762        let result_primitive = sliced_encoded.execute::<PrimitiveArray>(&mut ctx).unwrap();
763
764        for idx in 0..slice_len {
765            let expected_value = values[slice_start + idx];
766
767            let result_valid = result_primitive
768                .as_ref()
769                .validity()
770                .unwrap()
771                .execute_mask(result_primitive.as_ref().len(), &mut ctx)
772                .unwrap()
773                .value(idx);
774            assert_eq!(
775                result_valid,
776                expected_value.is_some(),
777                "Validity mismatch at idx={idx}",
778            );
779
780            if let Some(expected_val) = expected_value {
781                let buf = result_primitive.to_buffer::<f64>();
782                let result_val = buf.as_slice()[idx];
783                assert_eq!(result_val, expected_val, "Value mismatch at idx={idx}",);
784            }
785        }
786    }
787
788    /// Regression test for issue #5948: execute_decompress drops patches when chunk_offsets is
789    /// None.
790    ///
791    /// When patches exist but do NOT have chunk_offsets, the execute path incorrectly passes
792    /// `None` to `decompress_unchunked_core` instead of the actual patches.
793    ///
794    /// This can happen after file IO serialization/deserialization where chunk_offsets may not
795    /// be preserved, or when building ALPArrays manually without chunk_offsets.
796    #[test]
797    fn test_execute_decompress_with_patches_no_chunk_offsets_regression_5948() {
798        // Create an array with values that will produce patches. PI doesn't encode cleanly.
799        let values: Vec<f64> = vec![1.0, 2.0, PI, 4.0, 5.0];
800        let original = PrimitiveArray::from_iter(values);
801
802        // First encode normally to get a properly formed ALPArray with patches.
803        let normally_encoded = alp_encode(
804            original.as_view(),
805            None,
806            &mut LEGACY_SESSION.create_execution_ctx(),
807        )
808        .unwrap();
809        assert!(
810            normally_encoded.patches().is_some(),
811            "Test requires patches to be present"
812        );
813
814        let original_patches = normally_encoded.patches().unwrap();
815        assert!(
816            original_patches.chunk_offsets().is_some(),
817            "Normal encoding should have chunk_offsets"
818        );
819
820        // Rebuild the patches WITHOUT chunk_offsets to simulate deserialized patches.
821        let patches_without_chunk_offsets = Patches::new(
822            original_patches.array_len(),
823            original_patches.offset(),
824            original_patches.indices().clone(),
825            original_patches.values().clone(),
826            None, // NO chunk_offsets - this triggers the bug!
827        )
828        .unwrap();
829
830        // Build a new ALPArray with the same encoded data but patches without chunk_offsets.
831        let alp_without_chunk_offsets = ALP::new(
832            normally_encoded.encoded().clone(),
833            normally_encoded.exponents(),
834            Some(patches_without_chunk_offsets),
835        );
836
837        // The legacy decompress_into_array path should work correctly.
838        let result_legacy = decompress_into_array(
839            alp_without_chunk_offsets.clone(),
840            &mut LEGACY_SESSION.create_execution_ctx(),
841        )
842        .unwrap();
843        let legacy_slice = result_legacy.as_slice::<f64>();
844
845        // Verify the legacy path produces correct values.
846        assert!(
847            (legacy_slice[2] - PI).abs() < 1e-10,
848            "Legacy path should have PI at index 2, got {}",
849            legacy_slice[2]
850        );
851
852        // The execute path has the bug - it drops patches when chunk_offsets is None.
853        let result_execute = {
854            let mut ctx = SESSION.create_execution_ctx();
855            execute_decompress(alp_without_chunk_offsets, &mut ctx).unwrap()
856        };
857        let execute_slice = result_execute.as_slice::<f64>();
858
859        // This assertion FAILS until the bug is fixed because execute_decompress drops patches.
860        assert!(
861            (execute_slice[2] - PI).abs() < 1e-10,
862            "Execute path should have PI at index 2, but got {} (patches were dropped!)",
863            execute_slice[2]
864        );
865    }
866}