Skip to main content

vortex_alp/alp/
array.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::fmt::Debug;
5use std::fmt::Display;
6use std::fmt::Formatter;
7use std::hash::Hash;
8use std::hash::Hasher;
9
10use prost::Message;
11use vortex_array::Array;
12use vortex_array::ArrayEq;
13use vortex_array::ArrayHash;
14use vortex_array::ArrayId;
15use vortex_array::ArrayParts;
16use vortex_array::ArrayRef;
17use vortex_array::ArrayView;
18use vortex_array::ExecutionCtx;
19use vortex_array::ExecutionResult;
20use vortex_array::IntoArray;
21use vortex_array::Precision;
22use vortex_array::TypedArrayRef;
23use vortex_array::array_slots;
24use vortex_array::arrays::Primitive;
25use vortex_array::buffer::BufferHandle;
26use vortex_array::dtype::DType;
27use vortex_array::dtype::PType;
28use vortex_array::patches::Patches;
29use vortex_array::patches::PatchesMetadata;
30use vortex_array::require_child;
31use vortex_array::require_patches;
32use vortex_array::serde::ArrayChildren;
33use vortex_array::vtable::VTable;
34use vortex_array::vtable::ValidityChild;
35use vortex_array::vtable::ValidityVTableFromChild;
36use vortex_error::VortexExpect;
37use vortex_error::VortexResult;
38use vortex_error::vortex_bail;
39use vortex_error::vortex_ensure;
40use vortex_error::vortex_panic;
41use vortex_session::VortexSession;
42use vortex_session::registry::CachedId;
43
44use crate::ALPFloat;
45use crate::alp::Exponents;
46use crate::alp::decompress::execute_decompress;
47use crate::alp::rules::PARENT_KERNELS;
48use crate::alp::rules::RULES;
49
50/// A [`ALP`]-encoded Vortex array.
51pub type ALPArray = Array<ALP>;
52
53impl ArrayHash for ALPData {
54    fn array_hash<H: Hasher>(&self, state: &mut H, _precision: Precision) {
55        self.exponents.hash(state);
56        self.patch_offset.hash(state);
57        self.patch_offset_within_chunk.hash(state);
58    }
59}
60
61impl ArrayEq for ALPData {
62    fn array_eq(&self, other: &Self, _precision: Precision) -> bool {
63        self.exponents == other.exponents
64            && self.patch_offset == other.patch_offset
65            && self.patch_offset_within_chunk == other.patch_offset_within_chunk
66    }
67}
68
69impl VTable for ALP {
70    type ArrayData = ALPData;
71
72    type OperationsVTable = Self;
73    type ValidityVTable = ValidityVTableFromChild;
74
75    fn id(&self) -> ArrayId {
76        static ID: CachedId = CachedId::new("vortex.alp");
77        *ID
78    }
79
80    fn validate(
81        &self,
82        data: &ALPData,
83        dtype: &DType,
84        len: usize,
85        slots: &[Option<ArrayRef>],
86    ) -> VortexResult<()> {
87        let slots = ALPSlotsView::from_slots(slots);
88        validate_parts(
89            dtype,
90            len,
91            data.exponents,
92            slots.encoded,
93            patches_from_slots(
94                &slots,
95                data.patch_offset,
96                data.patch_offset_within_chunk,
97                len,
98            ),
99        )
100    }
101
102    fn nbuffers(_array: ArrayView<'_, Self>) -> usize {
103        0
104    }
105
106    fn buffer(_array: ArrayView<'_, Self>, idx: usize) -> BufferHandle {
107        vortex_panic!("ALPArray buffer index {idx} out of bounds")
108    }
109
110    fn buffer_name(_array: ArrayView<'_, Self>, _idx: usize) -> Option<String> {
111        None
112    }
113
114    fn serialize(
115        array: ArrayView<'_, Self>,
116        _session: &VortexSession,
117    ) -> VortexResult<Option<Vec<u8>>> {
118        let exponents = array.exponents();
119        Ok(Some(
120            ALPMetadata {
121                exp_e: exponents.e as u32,
122                exp_f: exponents.f as u32,
123                patches: array
124                    .patches()
125                    .map(|p| p.to_metadata(array.len(), array.dtype()))
126                    .transpose()?,
127            }
128            .encode_to_vec(),
129        ))
130    }
131
132    fn deserialize(
133        &self,
134        dtype: &DType,
135        len: usize,
136        metadata: &[u8],
137        _buffers: &[BufferHandle],
138        children: &dyn ArrayChildren,
139        _session: &VortexSession,
140    ) -> VortexResult<ArrayParts<Self>> {
141        let metadata = ALPMetadata::decode(metadata)?;
142        let encoded_ptype = match &dtype {
143            DType::Primitive(PType::F32, n) => DType::Primitive(PType::I32, *n),
144            DType::Primitive(PType::F64, n) => DType::Primitive(PType::I64, *n),
145            d => vortex_bail!(MismatchedTypes: "f32 or f64", d),
146        };
147        let encoded = children.get(0, &encoded_ptype, len)?;
148
149        let patches = metadata
150            .patches
151            .map(|p| {
152                let indices = children.get(1, &p.indices_dtype()?, p.len()?)?;
153                let values = children.get(2, dtype, p.len()?)?;
154                let chunk_offsets = p
155                    .chunk_offsets_dtype()?
156                    .map(|dtype| children.get(3, &dtype, usize::try_from(p.chunk_offsets_len())?))
157                    .transpose()?;
158
159                Patches::new(len, p.offset()?, indices, values, chunk_offsets)
160            })
161            .transpose()?;
162
163        let slots = ALPData::make_slots(&encoded, &patches);
164        let data = ALPData::new(
165            Exponents {
166                e: u8::try_from(metadata.exp_e)?,
167                f: u8::try_from(metadata.exp_f)?,
168            },
169            patches,
170        );
171        Ok(ArrayParts::new(self.clone(), dtype.clone(), len, data).with_slots(slots))
172    }
173
174    fn slot_name(_array: ArrayView<'_, Self>, idx: usize) -> String {
175        ALPSlots::NAMES[idx].to_string()
176    }
177
178    fn execute(array: Array<Self>, ctx: &mut ExecutionCtx) -> VortexResult<ExecutionResult> {
179        let array = require_child!(array, array.encoded(), ALPSlots::ENCODED => Primitive);
180        require_patches!(
181            array,
182            ALPSlots::PATCH_INDICES,
183            ALPSlots::PATCH_VALUES,
184            ALPSlots::PATCH_CHUNK_OFFSETS
185        );
186
187        Ok(ExecutionResult::done(
188            execute_decompress(array, ctx)?.into_array(),
189        ))
190    }
191
192    fn reduce_parent(
193        array: ArrayView<'_, Self>,
194        parent: &ArrayRef,
195        child_idx: usize,
196    ) -> VortexResult<Option<ArrayRef>> {
197        RULES.evaluate(array, parent, child_idx)
198    }
199
200    fn execute_parent(
201        array: ArrayView<'_, Self>,
202        parent: &ArrayRef,
203        child_idx: usize,
204        ctx: &mut ExecutionCtx,
205    ) -> VortexResult<Option<ArrayRef>> {
206        PARENT_KERNELS.execute(array, parent, child_idx, ctx)
207    }
208}
209
210#[array_slots(ALP)]
211pub struct ALPSlots {
212    /// The ALP-encoded values array.
213    pub encoded: ArrayRef,
214    /// The indices of exception values that could not be ALP-encoded.
215    pub patch_indices: Option<ArrayRef>,
216    /// The exception values that could not be ALP-encoded.
217    pub patch_values: Option<ArrayRef>,
218    /// Chunk offsets for the patch indices/values.
219    pub patch_chunk_offsets: Option<ArrayRef>,
220}
221
222#[derive(Clone, Debug)]
223pub struct ALPData {
224    patch_offset: Option<usize>,
225    patch_offset_within_chunk: Option<usize>,
226    exponents: Exponents,
227}
228
229impl Display for ALPData {
230    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
231        write!(f, "exponents: {}", self.exponents)?;
232        if let Some(offset) = self.patch_offset {
233            write!(f, ", patch_offset: {offset}")?;
234        }
235        Ok(())
236    }
237}
238
239#[derive(Clone, Debug)]
240pub struct ALP;
241
242#[derive(Clone, prost::Message)]
243pub struct ALPMetadata {
244    #[prost(uint32, tag = "1")]
245    pub(crate) exp_e: u32,
246    #[prost(uint32, tag = "2")]
247    pub(crate) exp_f: u32,
248    #[prost(message, optional, tag = "3")]
249    pub(crate) patches: Option<PatchesMetadata>,
250}
251
252impl ALPData {
253    fn validate_components(
254        encoded: &ArrayRef,
255        exponents: Exponents,
256        patches: Option<&Patches>,
257    ) -> VortexResult<()> {
258        vortex_ensure!(
259            matches!(
260                encoded.dtype(),
261                DType::Primitive(PType::I32 | PType::I64, _)
262            ),
263            "ALP encoded ints have invalid DType {}",
264            encoded.dtype(),
265        );
266
267        // Validate exponents are in-bounds for the float, and that patches have the proper
268        // length and type.
269        let Exponents { e, f } = exponents;
270        match encoded.dtype().as_ptype() {
271            PType::I32 => {
272                vortex_ensure!(exponents.e <= f32::MAX_EXPONENT, "e out of bounds: {e}");
273                vortex_ensure!(exponents.f <= f32::MAX_EXPONENT, "f out of bounds: {f}");
274                if let Some(patches) = patches {
275                    Self::validate_patches::<f32>(patches, encoded)?;
276                }
277            }
278            PType::I64 => {
279                vortex_ensure!(e <= f64::MAX_EXPONENT, "e out of bounds: {e}");
280                vortex_ensure!(f <= f64::MAX_EXPONENT, "f out of bounds: {f}");
281
282                if let Some(patches) = patches {
283                    Self::validate_patches::<f64>(patches, encoded)?;
284                }
285            }
286            _ => unreachable!(),
287        }
288
289        // Validate patches
290        if let Some(patches) = patches {
291            vortex_ensure!(
292                patches.array_len() == encoded.len(),
293                "patches array_len != encoded len: {} != {}",
294                patches.array_len(),
295                encoded.len()
296            );
297
298            // Verify that the patches DType are of the proper DType.
299        }
300
301        Ok(())
302    }
303
304    fn logical_dtype(encoded: &ArrayRef) -> VortexResult<DType> {
305        match encoded.dtype() {
306            DType::Primitive(PType::I32, nullability) => {
307                Ok(DType::Primitive(PType::F32, *nullability))
308            }
309            DType::Primitive(PType::I64, nullability) => {
310                Ok(DType::Primitive(PType::F64, *nullability))
311            }
312            _ => vortex_bail!("ALP encoded ints have invalid DType {}", encoded.dtype(),),
313        }
314    }
315
316    /// Validate that any patches provided are valid for the ALPArray.
317    fn validate_patches<T: ALPFloat>(patches: &Patches, encoded: &ArrayRef) -> VortexResult<()> {
318        vortex_ensure!(
319            patches.array_len() == encoded.len(),
320            "patches array_len != encoded len: {} != {}",
321            patches.array_len(),
322            encoded.len()
323        );
324
325        let expected_type = DType::Primitive(T::PTYPE, encoded.dtype().nullability());
326        vortex_ensure!(
327            patches.dtype() == &expected_type,
328            "Expected patches type {expected_type}, actual {}",
329            patches.dtype(),
330        );
331
332        Ok(())
333    }
334}
335
336impl ALPData {
337    /// Build a new `ALPArray` from components, panicking on validation failure.
338    ///
339    /// See [`ALP::try_new`] for reference on preconditions that must pass before
340    /// calling this method.
341    pub fn new(exponents: Exponents, patches: Option<Patches>) -> Self {
342        let (patch_offset, patch_offset_within_chunk) = match &patches {
343            Some(p) => (Some(p.offset()), p.offset_within_chunk()),
344            None => (None, None),
345        };
346
347        Self {
348            patch_offset,
349            patch_offset_within_chunk,
350            exponents,
351        }
352    }
353
354    /// Build a new `ALPArray` from components:
355    ///
356    /// * `encoded` contains the ALP-encoded ints. Any null values are replaced with placeholders
357    /// * `exponents` are the ALP exponents, valid range depends on the data type
358    /// * `patches` are any patch values that don't cleanly encode using the ALP conversion function
359    ///
360    /// Build a new `ALPArray` from components without validation.
361    ///
362    /// See [`ALP::try_new`] for information about the preconditions that should be checked
363    /// **before** calling this method.
364    pub(crate) unsafe fn new_unchecked(exponents: Exponents, patches: Option<Patches>) -> Self {
365        Self::new(exponents, patches)
366    }
367}
368
369/// Constructors for [`ALPArray`].
370impl ALP {
371    pub fn new(encoded: ArrayRef, exponents: Exponents, patches: Option<Patches>) -> ALPArray {
372        let dtype = ALPData::logical_dtype(&encoded).vortex_expect("ALP encoded dtype");
373        let len = encoded.len();
374        let slots = ALPData::make_slots(&encoded, &patches);
375        unsafe {
376            Array::from_parts_unchecked(
377                ArrayParts::new(ALP, dtype, len, ALPData::new(exponents, patches))
378                    .with_slots(slots),
379            )
380        }
381    }
382
383    pub fn try_new(
384        encoded: ArrayRef,
385        exponents: Exponents,
386        patches: Option<Patches>,
387    ) -> VortexResult<ALPArray> {
388        let dtype = ALPData::logical_dtype(&encoded)?;
389        let len = encoded.len();
390        let slots = ALPData::make_slots(&encoded, &patches);
391        let data = ALPData::new(exponents, patches);
392        Array::try_from_parts(ArrayParts::new(ALP, dtype, len, data).with_slots(slots))
393    }
394
395    /// # Safety
396    /// See [`ALP::try_new`] for preconditions.
397    pub unsafe fn new_unchecked(
398        encoded: ArrayRef,
399        exponents: Exponents,
400        patches: Option<Patches>,
401    ) -> ALPArray {
402        let dtype = ALPData::logical_dtype(&encoded).vortex_expect("ALP encoded dtype");
403        let len = encoded.len();
404        let slots = ALPData::make_slots(&encoded, &patches);
405        let data = unsafe { ALPData::new_unchecked(exponents, patches) };
406        unsafe {
407            Array::from_parts_unchecked(ArrayParts::new(ALP, dtype, len, data).with_slots(slots))
408        }
409    }
410}
411
412impl ALPData {
413    fn make_slots(encoded: &ArrayRef, patches: &Option<Patches>) -> Vec<Option<ArrayRef>> {
414        let (patch_indices, patch_values, patch_chunk_offsets) = match patches {
415            Some(p) => (
416                Some(p.indices().clone()),
417                Some(p.values().clone()),
418                p.chunk_offsets().clone(),
419            ),
420            None => (None, None, None),
421        };
422        vec![
423            Some(encoded.clone()),
424            patch_indices,
425            patch_values,
426            patch_chunk_offsets,
427        ]
428    }
429
430    #[inline]
431    pub fn exponents(&self) -> Exponents {
432        self.exponents
433    }
434}
435
436pub trait ALPArrayExt: ALPArraySlotsExt {
437    fn exponents(&self) -> Exponents {
438        self.exponents
439    }
440
441    fn patches(&self) -> Option<Patches> {
442        patches_from_slots(
443            &self.slots_view(),
444            self.patch_offset,
445            self.patch_offset_within_chunk,
446            self.as_ref().len(),
447        )
448    }
449}
450
451fn patches_from_slots(
452    slots: &ALPSlotsView,
453    patch_offset: Option<usize>,
454    patch_offset_within_chunk: Option<usize>,
455    len: usize,
456) -> Option<Patches> {
457    match (slots.patch_indices, slots.patch_values) {
458        (Some(indices), Some(values)) => {
459            let patch_offset = patch_offset.vortex_expect("has patch slots but no patch_offset");
460            Some(unsafe {
461                Patches::new_unchecked(
462                    len,
463                    patch_offset,
464                    indices.clone(),
465                    values.clone(),
466                    slots.patch_chunk_offsets.cloned(),
467                    patch_offset_within_chunk,
468                )
469            })
470        }
471        _ => None,
472    }
473}
474
475fn validate_parts(
476    dtype: &DType,
477    len: usize,
478    exponents: Exponents,
479    encoded: &ArrayRef,
480    patches: Option<Patches>,
481) -> VortexResult<()> {
482    let logical_dtype = ALPData::logical_dtype(encoded)?;
483    ALPData::validate_components(encoded, exponents, patches.as_ref())?;
484    vortex_ensure!(
485        encoded.len() == len,
486        "ALP encoded len {} != outer len {len}",
487        encoded.len(),
488    );
489    vortex_ensure!(
490        &logical_dtype == dtype,
491        "ALP dtype {} does not match encoded logical dtype {}",
492        dtype,
493        logical_dtype,
494    );
495    Ok(())
496}
497
498impl<T: TypedArrayRef<ALP>> ALPArrayExt for T {}
499
500pub trait ALPArrayOwnedExt {
501    fn into_parts(self) -> (ArrayRef, Exponents, Option<Patches>);
502}
503
504impl ALPArrayOwnedExt for Array<ALP> {
505    #[inline]
506    fn into_parts(self) -> (ArrayRef, Exponents, Option<Patches>) {
507        let patches = self.patches();
508        let exponents = self.exponents();
509        let encoded = self.encoded().clone();
510        (encoded, exponents, patches)
511    }
512}
513
514impl ValidityChild<ALP> for ALP {
515    fn validity_child(array: ArrayView<'_, ALP>) -> ArrayRef {
516        array.encoded().clone()
517    }
518}
519
520#[cfg(test)]
521mod tests {
522    use std::f64::consts::PI;
523    use std::sync::LazyLock;
524
525    use rstest::rstest;
526    use vortex_array::Canonical;
527    use vortex_array::IntoArray;
528    use vortex_array::LEGACY_SESSION;
529    use vortex_array::ToCanonical;
530    use vortex_array::VortexSessionExecute;
531    use vortex_array::arrays::PrimitiveArray;
532    use vortex_array::assert_arrays_eq;
533    use vortex_array::session::ArraySession;
534    use vortex_error::VortexExpect;
535    use vortex_session::VortexSession;
536
537    use super::*;
538    use crate::alp_encode;
539    use crate::decompress_into_array;
540
541    static SESSION: LazyLock<VortexSession> =
542        LazyLock::new(|| VortexSession::empty().with::<ArraySession>());
543
544    #[rstest]
545    #[case(0)]
546    #[case(1)]
547    #[case(100)]
548    #[case(1023)]
549    #[case(1024)]
550    #[case(1025)]
551    #[case(2047)]
552    #[case(2048)]
553    #[case(2049)]
554    fn test_execute_f32(#[case] size: usize) {
555        let mut ctx = SESSION.create_execution_ctx();
556        let values = PrimitiveArray::from_iter((0..size).map(|i| i as f32));
557        let encoded = alp_encode(values.as_view(), None, &mut ctx).unwrap();
558
559        let result_canonical = {
560            encoded
561                .clone()
562                .into_array()
563                .execute::<Canonical>(&mut ctx)
564                .unwrap()
565        };
566        // Compare against the traditional array-based decompress path
567        let expected =
568            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
569
570        assert_arrays_eq!(result_canonical.into_array(), expected);
571    }
572
573    #[rstest]
574    #[case(0)]
575    #[case(1)]
576    #[case(100)]
577    #[case(1023)]
578    #[case(1024)]
579    #[case(1025)]
580    #[case(2047)]
581    #[case(2048)]
582    #[case(2049)]
583    fn test_execute_f64(#[case] size: usize) {
584        let values = PrimitiveArray::from_iter((0..size).map(|i| i as f64));
585        let encoded = alp_encode(
586            values.as_view(),
587            None,
588            &mut LEGACY_SESSION.create_execution_ctx(),
589        )
590        .unwrap();
591
592        let result_canonical = {
593            let mut ctx = SESSION.create_execution_ctx();
594            encoded
595                .clone()
596                .into_array()
597                .execute::<Canonical>(&mut ctx)
598                .unwrap()
599        };
600        // Compare against the traditional array-based decompress path
601        let expected =
602            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
603
604        assert_arrays_eq!(result_canonical.into_array(), expected);
605    }
606
607    #[rstest]
608    #[case(100)]
609    #[case(1023)]
610    #[case(1024)]
611    #[case(1025)]
612    #[case(2047)]
613    #[case(2048)]
614    #[case(2049)]
615    fn test_execute_with_patches(#[case] size: usize) {
616        let values: Vec<f64> = (0..size)
617            .map(|i| match i % 4 {
618                0..=2 => 1.0,
619                _ => PI,
620            })
621            .collect();
622
623        let array = PrimitiveArray::from_iter(values);
624        let encoded = alp_encode(
625            array.as_view(),
626            None,
627            &mut LEGACY_SESSION.create_execution_ctx(),
628        )
629        .unwrap();
630        assert!(encoded.patches().unwrap().array_len() > 0);
631
632        let result_canonical = {
633            let mut ctx = SESSION.create_execution_ctx();
634            encoded
635                .clone()
636                .into_array()
637                .execute::<Canonical>(&mut ctx)
638                .unwrap()
639        };
640        // Compare against the traditional array-based decompress path
641        let expected =
642            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
643
644        assert_arrays_eq!(result_canonical.into_array(), expected);
645    }
646
647    #[rstest]
648    #[case(0)]
649    #[case(1)]
650    #[case(100)]
651    #[case(1023)]
652    #[case(1024)]
653    #[case(1025)]
654    #[case(2047)]
655    #[case(2048)]
656    #[case(2049)]
657    fn test_execute_with_validity(#[case] size: usize) {
658        let values: Vec<Option<f32>> = (0..size)
659            .map(|i| if i % 2 == 1 { None } else { Some(1.0) })
660            .collect();
661
662        let array = PrimitiveArray::from_option_iter(values);
663        let encoded = alp_encode(
664            array.as_view(),
665            None,
666            &mut LEGACY_SESSION.create_execution_ctx(),
667        )
668        .unwrap();
669
670        let result_canonical = {
671            let mut ctx = SESSION.create_execution_ctx();
672            encoded
673                .clone()
674                .into_array()
675                .execute::<Canonical>(&mut ctx)
676                .unwrap()
677        };
678        // Compare against the traditional array-based decompress path
679        let expected =
680            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
681
682        assert_arrays_eq!(result_canonical.into_array(), expected);
683    }
684
685    #[rstest]
686    #[case(100)]
687    #[case(1023)]
688    #[case(1024)]
689    #[case(1025)]
690    #[case(2047)]
691    #[case(2048)]
692    #[case(2049)]
693    fn test_execute_with_patches_and_validity(#[case] size: usize) {
694        let values: Vec<Option<f64>> = (0..size)
695            .map(|idx| match idx % 3 {
696                0 => Some(1.0),
697                1 => None,
698                _ => Some(PI),
699            })
700            .collect();
701
702        let array = PrimitiveArray::from_option_iter(values);
703        let encoded = alp_encode(
704            array.as_view(),
705            None,
706            &mut LEGACY_SESSION.create_execution_ctx(),
707        )
708        .unwrap();
709        assert!(encoded.patches().unwrap().array_len() > 0);
710
711        let result_canonical = {
712            let mut ctx = SESSION.create_execution_ctx();
713            encoded
714                .clone()
715                .into_array()
716                .execute::<Canonical>(&mut ctx)
717                .unwrap()
718        };
719        // Compare against the traditional array-based decompress path
720        let expected =
721            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
722
723        assert_arrays_eq!(result_canonical.into_array(), expected);
724    }
725
726    #[rstest]
727    #[case(500, 100)]
728    #[case(1000, 200)]
729    #[case(2048, 512)]
730    fn test_execute_sliced_vector(#[case] size: usize, #[case] slice_start: usize) {
731        let values: Vec<Option<f64>> = (0..size)
732            .map(|i| {
733                if i % 5 == 0 {
734                    None
735                } else if i % 4 == 3 {
736                    Some(PI)
737                } else {
738                    Some(1.0)
739                }
740            })
741            .collect();
742
743        let array = PrimitiveArray::from_option_iter(values.clone());
744        let encoded = alp_encode(
745            array.as_view(),
746            None,
747            &mut LEGACY_SESSION.create_execution_ctx(),
748        )
749        .unwrap();
750
751        let slice_end = size - slice_start;
752        let slice_len = slice_end - slice_start;
753        let sliced_encoded = encoded.slice(slice_start..slice_end).unwrap();
754
755        let result_canonical = {
756            let mut ctx = SESSION.create_execution_ctx();
757            sliced_encoded.execute::<Canonical>(&mut ctx).unwrap()
758        };
759        let result_primitive = result_canonical.into_primitive();
760
761        for idx in 0..slice_len {
762            let expected_value = values[slice_start + idx];
763
764            let result_valid = result_primitive
765                .validity()
766                .vortex_expect("result validity should be derivable")
767                .is_valid(idx)
768                .unwrap();
769            assert_eq!(
770                result_valid,
771                expected_value.is_some(),
772                "Validity mismatch at idx={idx}",
773            );
774
775            if let Some(expected_val) = expected_value {
776                let result_val = result_primitive.as_slice::<f64>()[idx];
777                assert_eq!(result_val, expected_val, "Value mismatch at idx={idx}",);
778            }
779        }
780    }
781
782    #[rstest]
783    #[case(500, 100)]
784    #[case(1000, 200)]
785    #[case(2048, 512)]
786    fn test_sliced_to_primitive(#[case] size: usize, #[case] slice_start: usize) {
787        let values: Vec<Option<f64>> = (0..size)
788            .map(|i| {
789                if i % 5 == 0 {
790                    None
791                } else if i % 4 == 3 {
792                    Some(PI)
793                } else {
794                    Some(1.0)
795                }
796            })
797            .collect();
798
799        let array = PrimitiveArray::from_option_iter(values.clone());
800        let encoded = alp_encode(
801            array.as_view(),
802            None,
803            &mut LEGACY_SESSION.create_execution_ctx(),
804        )
805        .unwrap();
806
807        let slice_end = size - slice_start;
808        let slice_len = slice_end - slice_start;
809        let sliced_encoded = encoded.slice(slice_start..slice_end).unwrap();
810
811        let result_primitive = sliced_encoded.to_primitive();
812
813        for idx in 0..slice_len {
814            let expected_value = values[slice_start + idx];
815
816            let result_valid = result_primitive
817                .as_ref()
818                .validity()
819                .unwrap()
820                .to_mask(
821                    result_primitive.as_ref().len(),
822                    &mut LEGACY_SESSION.create_execution_ctx(),
823                )
824                .unwrap()
825                .value(idx);
826            assert_eq!(
827                result_valid,
828                expected_value.is_some(),
829                "Validity mismatch at idx={idx}",
830            );
831
832            if let Some(expected_val) = expected_value {
833                let buf = result_primitive.to_buffer::<f64>();
834                let result_val = buf.as_slice()[idx];
835                assert_eq!(result_val, expected_val, "Value mismatch at idx={idx}",);
836            }
837        }
838    }
839
840    /// Regression test for issue #5948: execute_decompress drops patches when chunk_offsets is
841    /// None.
842    ///
843    /// When patches exist but do NOT have chunk_offsets, the execute path incorrectly passes
844    /// `None` to `decompress_unchunked_core` instead of the actual patches.
845    ///
846    /// This can happen after file IO serialization/deserialization where chunk_offsets may not
847    /// be preserved, or when building ALPArrays manually without chunk_offsets.
848    #[test]
849    fn test_execute_decompress_with_patches_no_chunk_offsets_regression_5948() {
850        // Create an array with values that will produce patches. PI doesn't encode cleanly.
851        let values: Vec<f64> = vec![1.0, 2.0, PI, 4.0, 5.0];
852        let original = PrimitiveArray::from_iter(values);
853
854        // First encode normally to get a properly formed ALPArray with patches.
855        let normally_encoded = alp_encode(
856            original.as_view(),
857            None,
858            &mut LEGACY_SESSION.create_execution_ctx(),
859        )
860        .unwrap();
861        assert!(
862            normally_encoded.patches().is_some(),
863            "Test requires patches to be present"
864        );
865
866        let original_patches = normally_encoded.patches().unwrap();
867        assert!(
868            original_patches.chunk_offsets().is_some(),
869            "Normal encoding should have chunk_offsets"
870        );
871
872        // Rebuild the patches WITHOUT chunk_offsets to simulate deserialized patches.
873        let patches_without_chunk_offsets = Patches::new(
874            original_patches.array_len(),
875            original_patches.offset(),
876            original_patches.indices().clone(),
877            original_patches.values().clone(),
878            None, // NO chunk_offsets - this triggers the bug!
879        )
880        .unwrap();
881
882        // Build a new ALPArray with the same encoded data but patches without chunk_offsets.
883        let alp_without_chunk_offsets = ALP::new(
884            normally_encoded.encoded().clone(),
885            normally_encoded.exponents(),
886            Some(patches_without_chunk_offsets),
887        );
888
889        // The legacy decompress_into_array path should work correctly.
890        let result_legacy = decompress_into_array(
891            alp_without_chunk_offsets.clone(),
892            &mut LEGACY_SESSION.create_execution_ctx(),
893        )
894        .unwrap();
895        let legacy_slice = result_legacy.as_slice::<f64>();
896
897        // Verify the legacy path produces correct values.
898        assert!(
899            (legacy_slice[2] - PI).abs() < 1e-10,
900            "Legacy path should have PI at index 2, got {}",
901            legacy_slice[2]
902        );
903
904        // The execute path has the bug - it drops patches when chunk_offsets is None.
905        let result_execute = {
906            let mut ctx = SESSION.create_execution_ctx();
907            execute_decompress(alp_without_chunk_offsets, &mut ctx).unwrap()
908        };
909        let execute_slice = result_execute.as_slice::<f64>();
910
911        // This assertion FAILS until the bug is fixed because execute_decompress drops patches.
912        assert!(
913            (execute_slice[2] - PI).abs() < 1e-10,
914            "Execute path should have PI at index 2, but got {} (patches were dropped!)",
915            execute_slice[2]
916        );
917    }
918}