Skip to main content

vortex_alp/alp/
compress.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use itertools::Itertools;
5use vortex_array::ArrayRef;
6use vortex_array::ArrayView;
7use vortex_array::ExecutionCtx;
8use vortex_array::IntoArray;
9use vortex_array::arrays::Primitive;
10use vortex_array::arrays::PrimitiveArray;
11use vortex_array::dtype::PType;
12use vortex_array::patches::Patches;
13use vortex_array::validity::Validity;
14use vortex_buffer::Buffer;
15use vortex_buffer::BufferMut;
16use vortex_error::VortexResult;
17use vortex_error::vortex_bail;
18use vortex_mask::Mask;
19
20use crate::ALP;
21use crate::Exponents;
22use crate::alp::ALPArray;
23use crate::alp::ALPFloat;
24
25#[macro_export]
26macro_rules! match_each_alp_float_ptype {
27    ($self:expr, | $enc:ident | $body:block) => {{
28        use vortex_array::dtype::PType;
29        use vortex_error::vortex_panic;
30        let ptype = $self;
31        match ptype {
32            PType::F32 => {
33                type $enc = f32;
34                $body
35            }
36            PType::F64 => {
37                type $enc = f64;
38                $body
39            }
40            _ => vortex_panic!("ALP can only encode f32 and f64, got {}", ptype),
41        }
42    }};
43}
44
45pub fn alp_encode(
46    parray: ArrayView<'_, Primitive>,
47    exponents: Option<Exponents>,
48    ctx: &mut ExecutionCtx,
49) -> VortexResult<ALPArray> {
50    let (exponents, encoded, patches) = match parray.ptype() {
51        PType::F32 => alp_encode_components_typed::<f32>(parray, exponents, ctx)?,
52        PType::F64 => alp_encode_components_typed::<f64>(parray, exponents, ctx)?,
53        _ => vortex_bail!("ALP can only encode f32 and f64"),
54    };
55
56    // SAFETY: alp_encode_components_typed must return well-formed components
57    unsafe { Ok(ALP::new_unchecked(encoded, exponents, patches)) }
58}
59
60#[expect(
61    clippy::cast_possible_truncation,
62    reason = "u64 index cast to usize is safe for reasonable array sizes"
63)]
64fn alp_encode_components_typed<T>(
65    values: ArrayView<'_, Primitive>,
66    exponents: Option<Exponents>,
67    ctx: &mut ExecutionCtx,
68) -> VortexResult<(Exponents, ArrayRef, Option<Patches>)>
69where
70    T: ALPFloat,
71{
72    let values_slice = values.as_slice::<T>();
73
74    let (exponents, encoded, exceptional_positions, exceptional_values, mut chunk_offsets) =
75        T::encode(values_slice, exponents);
76
77    let encoded_array = PrimitiveArray::new(encoded, values.validity()?).into_array();
78
79    let validity = values
80        .array()
81        .validity()?
82        .execute_mask(values.array().len(), ctx)?;
83    // exceptional_positions may contain exceptions at invalid positions (which contain garbage
84    // data). We remove null exceptions in order to keep the Patches small.
85    let (valid_exceptional_positions, valid_exceptional_values): (Buffer<u64>, Buffer<T>) =
86        match validity {
87            Mask::AllTrue(_) => (exceptional_positions, exceptional_values),
88            Mask::AllFalse(_) => {
89                // no valid positions, ergo nothing worth patching
90                (Buffer::empty(), Buffer::empty())
91            }
92            Mask::Values(is_valid) => {
93                let (pos, vals): (BufferMut<u64>, BufferMut<T>) = exceptional_positions
94                    .into_iter()
95                    .zip_eq(exceptional_values)
96                    .filter(|(index, _)| {
97                        let is_valid = is_valid.value(*index as usize);
98                        if !is_valid {
99                            let patch_chunk = *index as usize / 1024;
100                            for chunk_idx in (patch_chunk + 1)..chunk_offsets.len() {
101                                chunk_offsets[chunk_idx] -= 1;
102                            }
103                        }
104                        is_valid
105                    })
106                    .unzip();
107                (pos.freeze(), vals.freeze())
108            }
109        };
110    let patches = if valid_exceptional_positions.is_empty() {
111        None
112    } else {
113        let patches_validity = if values.dtype().is_nullable() {
114            Validity::AllValid
115        } else {
116            Validity::NonNullable
117        };
118        let valid_exceptional_values =
119            PrimitiveArray::new(valid_exceptional_values, patches_validity).into_array();
120
121        Some(Patches::new(
122            values_slice.len(),
123            0,
124            valid_exceptional_positions.into_array(),
125            valid_exceptional_values,
126            Some(chunk_offsets.into_array()),
127        )?)
128    };
129    Ok((exponents, encoded_array, patches))
130}
131
132#[cfg(test)]
133mod tests {
134    use core::f32;
135    use core::f64;
136
137    use f64::consts::E;
138    use f64::consts::PI;
139    use vortex_array::LEGACY_SESSION;
140    use vortex_array::VortexSessionExecute;
141    use vortex_array::assert_arrays_eq;
142    use vortex_array::dtype::NativePType;
143    use vortex_array::validity::Validity;
144    use vortex_buffer::Buffer;
145    use vortex_buffer::buffer;
146
147    use super::*;
148    use crate::alp::array::ALPArrayExt;
149    use crate::alp::array::ALPArraySlotsExt;
150    use crate::decompress_into_array;
151
152    #[test]
153    fn test_compress() {
154        let array = PrimitiveArray::new(buffer![1.234f32; 1025], Validity::NonNullable);
155        let encoded = alp_encode(
156            array.as_view(),
157            None,
158            &mut LEGACY_SESSION.create_execution_ctx(),
159        )
160        .unwrap();
161        assert!(encoded.patches().is_none());
162        let expected_encoded = PrimitiveArray::from_iter(vec![1234i32; 1025]);
163        assert_arrays_eq!(encoded.encoded(), expected_encoded);
164        assert_eq!(encoded.exponents(), Exponents { e: 9, f: 6 });
165
166        let decoded =
167            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
168        assert_arrays_eq!(decoded, array);
169    }
170
171    #[test]
172    fn test_nullable_compress() {
173        let array = PrimitiveArray::from_option_iter([None, Some(1.234f32), None]);
174        let encoded = alp_encode(
175            array.as_view(),
176            None,
177            &mut LEGACY_SESSION.create_execution_ctx(),
178        )
179        .unwrap();
180        assert!(encoded.patches().is_none());
181        let expected_encoded = PrimitiveArray::from_option_iter([None, Some(1234i32), None]);
182        assert_arrays_eq!(encoded.encoded(), expected_encoded);
183        assert_eq!(encoded.exponents(), Exponents { e: 9, f: 6 });
184
185        let decoded =
186            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
187        let expected = PrimitiveArray::from_option_iter(vec![None, Some(1.234f32), None]);
188        assert_arrays_eq!(decoded, expected);
189    }
190
191    #[test]
192    #[expect(clippy::approx_constant)] // Clippy objects to 2.718, an approximation of e, the base of the natural logarithm.
193    fn test_patched_compress() {
194        let values = buffer![1.234f64, 2.718, PI, 4.0];
195        let array = PrimitiveArray::new(values.clone(), Validity::NonNullable);
196        let encoded = alp_encode(
197            array.as_view(),
198            None,
199            &mut LEGACY_SESSION.create_execution_ctx(),
200        )
201        .unwrap();
202        assert!(encoded.patches().is_some());
203        let expected_encoded = PrimitiveArray::from_iter(vec![1234i64, 2718, 1234, 4000]);
204        assert_arrays_eq!(encoded.encoded(), expected_encoded);
205        assert_eq!(encoded.exponents(), Exponents { e: 16, f: 13 });
206
207        let decoded =
208            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
209        let expected_decoded = PrimitiveArray::new(values, Validity::NonNullable);
210        assert_arrays_eq!(decoded, expected_decoded);
211    }
212
213    #[test]
214    #[expect(clippy::approx_constant)] // Clippy objects to 2.718, an approximation of e, the base of the natural logarithm.
215    fn test_compress_ignores_invalid_exceptional_values() {
216        let values = buffer![1.234f64, 2.718, PI, 4.0];
217        let array = PrimitiveArray::new(values, Validity::from_iter([true, true, false, true]));
218        let encoded = alp_encode(
219            array.as_view(),
220            None,
221            &mut LEGACY_SESSION.create_execution_ctx(),
222        )
223        .unwrap();
224        assert!(encoded.patches().is_none());
225        let expected_encoded =
226            PrimitiveArray::from_option_iter(buffer![Some(1234i64), Some(2718), None, Some(4000)]);
227        assert_arrays_eq!(encoded.encoded(), expected_encoded);
228        assert_eq!(encoded.exponents(), Exponents { e: 16, f: 13 });
229
230        let decoded =
231            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
232        assert_arrays_eq!(decoded, array);
233    }
234
235    #[test]
236    #[expect(clippy::approx_constant)] // ALP doesn't like E
237    fn test_nullable_patched_scalar_at() {
238        let array = PrimitiveArray::from_option_iter([
239            Some(1.234f64),
240            Some(2.718),
241            Some(PI),
242            Some(4.0),
243            None,
244        ]);
245        let encoded = alp_encode(
246            array.as_view(),
247            None,
248            &mut LEGACY_SESSION.create_execution_ctx(),
249        )
250        .unwrap();
251        assert!(encoded.patches().is_some());
252
253        assert_eq!(encoded.exponents(), Exponents { e: 16, f: 13 });
254
255        assert_arrays_eq!(encoded, array);
256
257        let _decoded =
258            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
259    }
260
261    #[test]
262    fn roundtrips_close_fractional() {
263        let original = PrimitiveArray::from_iter([195.26274f32, 195.27837, -48.815685]);
264        let alp_arr = alp_encode(
265            original.as_view(),
266            None,
267            &mut LEGACY_SESSION.create_execution_ctx(),
268        )
269        .unwrap();
270        assert_arrays_eq!(alp_arr, original);
271    }
272
273    #[test]
274    fn roundtrips_all_null() {
275        let mut ctx = LEGACY_SESSION.create_execution_ctx();
276        let original =
277            PrimitiveArray::new(buffer![195.26274f64, PI, -48.815685], Validity::AllInvalid);
278        let alp_arr = alp_encode(original.as_view(), None, &mut ctx).unwrap();
279        let decompressed = alp_arr
280            .into_array()
281            .execute::<PrimitiveArray>(&mut ctx)
282            .unwrap();
283
284        assert_eq!(
285            // The second and third values become exceptions and are replaced
286            [195.26274, 195.26274, 195.26274],
287            decompressed.as_slice::<f64>()
288        );
289
290        assert_arrays_eq!(decompressed, original);
291    }
292
293    #[test]
294    fn non_finite_numbers() {
295        let mut ctx = LEGACY_SESSION.create_execution_ctx();
296        let original = PrimitiveArray::new(
297            buffer![0.0f32, -0.0, f32::NAN, f32::NEG_INFINITY, f32::INFINITY],
298            Validity::NonNullable,
299        );
300        let encoded = alp_encode(original.as_view(), None, &mut ctx).unwrap();
301        let decoded = encoded
302            .as_array()
303            .clone()
304            .execute::<PrimitiveArray>(&mut ctx)
305            .unwrap();
306        for idx in 0..original.len() {
307            let decoded_val = decoded.as_slice::<f32>()[idx];
308            let original_val = original.as_slice::<f32>()[idx];
309            assert!(
310                decoded_val.is_eq(original_val),
311                "Expected {original_val} but got {decoded_val}"
312            );
313        }
314    }
315
316    #[test]
317    fn test_chunk_offsets() {
318        let mut ctx = LEGACY_SESSION.create_execution_ctx();
319        let mut values = vec![1.0f64; 3072];
320
321        values[1023] = PI;
322        values[1024] = E;
323        values[1025] = PI;
324
325        let array = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
326        let encoded = alp_encode(array.as_view(), None, &mut ctx).unwrap();
327        let patches = encoded.patches().unwrap();
328
329        let chunk_offsets = patches
330            .chunk_offsets()
331            .clone()
332            .unwrap()
333            .execute::<PrimitiveArray>(&mut ctx)
334            .unwrap();
335        let expected_offsets = PrimitiveArray::from_iter(vec![0u64, 1, 3]);
336        assert_arrays_eq!(chunk_offsets, expected_offsets);
337
338        let patch_indices = patches
339            .indices()
340            .clone()
341            .execute::<PrimitiveArray>(&mut ctx)
342            .unwrap();
343        let expected_indices = PrimitiveArray::from_iter(vec![1023u64, 1024, 1025]);
344        assert_arrays_eq!(patch_indices, expected_indices);
345
346        let patch_values = patches
347            .values()
348            .clone()
349            .execute::<PrimitiveArray>(&mut ctx)
350            .unwrap();
351        let expected_values = PrimitiveArray::from_iter(vec![PI, E, PI]);
352        assert_arrays_eq!(patch_values, expected_values);
353    }
354
355    #[test]
356    fn test_chunk_offsets_no_patches_in_middle() {
357        let mut ctx = LEGACY_SESSION.create_execution_ctx();
358        let mut values = vec![1.0f64; 3072];
359        values[0] = PI;
360        values[2048] = E;
361
362        let array = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
363        let encoded = alp_encode(array.as_view(), None, &mut ctx).unwrap();
364        let patches = encoded.patches().unwrap();
365
366        let chunk_offsets = patches
367            .chunk_offsets()
368            .clone()
369            .unwrap()
370            .execute::<PrimitiveArray>(&mut ctx)
371            .unwrap();
372        let expected_offsets = PrimitiveArray::from_iter(vec![0u64, 1, 1]);
373        assert_arrays_eq!(chunk_offsets, expected_offsets);
374
375        let patch_indices = patches
376            .indices()
377            .clone()
378            .execute::<PrimitiveArray>(&mut ctx)
379            .unwrap();
380        let expected_indices = PrimitiveArray::from_iter(vec![0u64, 2048]);
381        assert_arrays_eq!(patch_indices, expected_indices);
382
383        let patch_values = patches
384            .values()
385            .clone()
386            .execute::<PrimitiveArray>(&mut ctx)
387            .unwrap();
388        let expected_values = PrimitiveArray::from_iter(vec![PI, E]);
389        assert_arrays_eq!(patch_values, expected_values);
390    }
391
392    #[test]
393    fn test_chunk_offsets_trailing_empty_chunks() {
394        let mut ctx = LEGACY_SESSION.create_execution_ctx();
395        let mut values = vec![1.0f64; 3072];
396        values[0] = PI;
397
398        let array = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
399        let encoded = alp_encode(array.as_view(), None, &mut ctx).unwrap();
400        let patches = encoded.patches().unwrap();
401
402        let chunk_offsets = patches
403            .chunk_offsets()
404            .clone()
405            .unwrap()
406            .execute::<PrimitiveArray>(&mut ctx)
407            .unwrap();
408        let expected_offsets = PrimitiveArray::from_iter(vec![0u64, 1, 1]);
409        assert_arrays_eq!(chunk_offsets, expected_offsets);
410
411        let patch_indices = patches
412            .indices()
413            .clone()
414            .execute::<PrimitiveArray>(&mut ctx)
415            .unwrap();
416        let expected_indices = PrimitiveArray::from_iter(vec![0u64]);
417        assert_arrays_eq!(patch_indices, expected_indices);
418
419        let patch_values = patches
420            .values()
421            .clone()
422            .execute::<PrimitiveArray>(&mut ctx)
423            .unwrap();
424        let expected_values = PrimitiveArray::from_iter(vec![PI]);
425        assert_arrays_eq!(patch_values, expected_values);
426    }
427
428    #[test]
429    fn test_chunk_offsets_single_chunk() {
430        let mut ctx = LEGACY_SESSION.create_execution_ctx();
431        let mut values = vec![1.0f64; 512];
432        values[0] = PI;
433        values[100] = E;
434
435        let array = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
436        let encoded = alp_encode(array.as_view(), None, &mut ctx).unwrap();
437        let patches = encoded.patches().unwrap();
438
439        let chunk_offsets = patches
440            .chunk_offsets()
441            .clone()
442            .unwrap()
443            .execute::<PrimitiveArray>(&mut ctx)
444            .unwrap();
445        let expected_offsets = PrimitiveArray::from_iter(vec![0u64]);
446        assert_arrays_eq!(chunk_offsets, expected_offsets);
447
448        let patch_indices = patches
449            .indices()
450            .clone()
451            .execute::<PrimitiveArray>(&mut ctx)
452            .unwrap();
453        let expected_indices = PrimitiveArray::from_iter(vec![0u64, 100]);
454        assert_arrays_eq!(patch_indices, expected_indices);
455
456        let patch_values = patches
457            .values()
458            .clone()
459            .execute::<PrimitiveArray>(&mut ctx)
460            .unwrap();
461        let expected_values = PrimitiveArray::from_iter(vec![PI, E]);
462        assert_arrays_eq!(patch_values, expected_values);
463    }
464
465    #[test]
466    fn test_slice_half_chunk_f32_roundtrip() {
467        // Create 1024 elements, encode, slice to first 512, then decode
468        let values = vec![1.234f32; 1024];
469        let original = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
470        let encoded = alp_encode(
471            original.as_view(),
472            None,
473            &mut LEGACY_SESSION.create_execution_ctx(),
474        )
475        .unwrap();
476
477        let sliced_alp = encoded.slice(512..1024).unwrap();
478
479        let expected_slice = original.slice(512..1024).unwrap();
480        assert_arrays_eq!(sliced_alp, expected_slice);
481    }
482
483    #[test]
484    fn test_slice_half_chunk_f64_roundtrip() {
485        let values = vec![5.678f64; 1024];
486        let original = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
487        let encoded = alp_encode(
488            original.as_view(),
489            None,
490            &mut LEGACY_SESSION.create_execution_ctx(),
491        )
492        .unwrap();
493
494        let sliced_alp = encoded.slice(512..1024).unwrap();
495
496        let expected_slice = original.slice(512..1024).unwrap();
497        assert_arrays_eq!(sliced_alp, expected_slice);
498    }
499
500    #[test]
501    fn test_slice_half_chunk_with_patches_roundtrip() {
502        let mut values = vec![1.0f64; 1024];
503        values[100] = PI;
504        values[200] = E;
505        values[600] = 42.42;
506
507        let original = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
508        let encoded = alp_encode(
509            original.as_view(),
510            None,
511            &mut LEGACY_SESSION.create_execution_ctx(),
512        )
513        .unwrap();
514
515        let sliced_alp = encoded.slice(512..1024).unwrap();
516
517        let expected_slice = original.slice(512..1024).unwrap();
518        assert_arrays_eq!(sliced_alp, expected_slice);
519        assert!(encoded.patches().is_some());
520    }
521
522    #[test]
523    fn test_slice_across_chunks_with_patches_roundtrip() {
524        let mut values = vec![1.0f64; 2048];
525        values[100] = PI;
526        values[200] = E;
527        values[600] = 42.42;
528        values[800] = 42.42;
529        values[1000] = 42.42;
530        values[1023] = 42.42;
531
532        let original = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
533        let encoded = alp_encode(
534            original.as_view(),
535            None,
536            &mut LEGACY_SESSION.create_execution_ctx(),
537        )
538        .unwrap();
539
540        let sliced_alp = encoded.slice(1023..1025).unwrap();
541
542        let expected_slice = original.slice(1023..1025).unwrap();
543        assert_arrays_eq!(sliced_alp, expected_slice);
544        assert!(encoded.patches().is_some());
545    }
546
547    #[test]
548    fn test_slice_half_chunk_nullable_roundtrip() {
549        let mut ctx = LEGACY_SESSION.create_execution_ctx();
550        let values = (0..1024)
551            .map(|i| if i % 3 == 0 { None } else { Some(2.5f32) })
552            .collect::<Vec<_>>();
553
554        let original = PrimitiveArray::from_option_iter(values);
555        let encoded = alp_encode(original.as_view(), None, &mut ctx).unwrap();
556
557        let sliced_alp = encoded.slice(512..1024).unwrap();
558        let decoded = sliced_alp.execute::<PrimitiveArray>(&mut ctx).unwrap();
559
560        let expected_slice = original.slice(512..1024).unwrap();
561        assert_arrays_eq!(decoded, expected_slice);
562    }
563
564    #[test]
565    fn test_large_f32_array_uniform_values() {
566        let size = 10_000;
567        let array = PrimitiveArray::new(buffer![42.125f32; size], Validity::NonNullable);
568        let encoded = alp_encode(
569            array.as_view(),
570            None,
571            &mut LEGACY_SESSION.create_execution_ctx(),
572        )
573        .unwrap();
574
575        assert!(encoded.patches().is_none());
576        let decoded =
577            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
578        assert_arrays_eq!(decoded, array);
579    }
580
581    #[test]
582    fn test_large_f64_array_uniform_values() {
583        let size = 50_000;
584        let array = PrimitiveArray::new(buffer![123.456789f64; size], Validity::NonNullable);
585        let encoded = alp_encode(
586            array.as_view(),
587            None,
588            &mut LEGACY_SESSION.create_execution_ctx(),
589        )
590        .unwrap();
591
592        assert!(encoded.patches().is_none());
593        let decoded =
594            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
595        assert_arrays_eq!(decoded, array);
596    }
597
598    #[test]
599    fn test_large_f32_array_with_patches() {
600        let size = 5_000;
601        let mut values = vec![1.5f32; size];
602        values[100] = f32::consts::PI;
603        values[1500] = f32::consts::E;
604        values[3000] = f32::NEG_INFINITY;
605        values[4500] = f32::INFINITY;
606
607        let array = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
608        let encoded = alp_encode(
609            array.as_view(),
610            None,
611            &mut LEGACY_SESSION.create_execution_ctx(),
612        )
613        .unwrap();
614
615        assert!(encoded.patches().is_some());
616        let decoded =
617            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
618        assert_arrays_eq!(decoded, array);
619    }
620
621    #[test]
622    fn test_large_f64_array_with_patches() {
623        let size = 8_000;
624        let mut values = vec![2.2184f64; size];
625        values[0] = PI;
626        values[1000] = E;
627        values[2000] = f64::NAN;
628        values[3000] = f64::INFINITY;
629        values[4000] = f64::NEG_INFINITY;
630        values[5000] = 0.0;
631        values[6000] = -0.0;
632        values[7000] = 999.999999999;
633
634        let array = PrimitiveArray::new(Buffer::from(values.clone()), Validity::NonNullable);
635        let encoded = alp_encode(
636            array.as_view(),
637            None,
638            &mut LEGACY_SESSION.create_execution_ctx(),
639        )
640        .unwrap();
641
642        assert!(encoded.patches().is_some());
643        let decoded =
644            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
645
646        for idx in 0..size {
647            let decoded_val = decoded.as_slice::<f64>()[idx];
648            let original_val = values[idx];
649            assert!(
650                decoded_val.is_eq(original_val),
651                "At index {idx}: Expected {original_val} but got {decoded_val}"
652            );
653        }
654    }
655
656    #[test]
657    fn test_large_nullable_array() {
658        let size = 12_000;
659        let values: Vec<Option<f32>> = (0..size)
660            .map(|i| {
661                if i % 7 == 0 {
662                    None
663                } else {
664                    Some((i as f32) * 0.1)
665                }
666            })
667            .collect();
668
669        let array = PrimitiveArray::from_option_iter(values);
670        let encoded = alp_encode(
671            array.as_view(),
672            None,
673            &mut LEGACY_SESSION.create_execution_ctx(),
674        )
675        .unwrap();
676        let decoded =
677            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
678
679        assert_arrays_eq!(decoded, array);
680    }
681
682    #[test]
683    fn test_large_mixed_validity_with_patches() {
684        let size = 6_000;
685        let mut values = vec![10.125f64; size];
686
687        values[500] = PI;
688        values[1500] = E;
689        values[2500] = f64::INFINITY;
690        values[3500] = f64::NEG_INFINITY;
691        values[4500] = f64::NAN;
692
693        let validity = Validity::from_iter((0..size).map(|i| !matches!(i, 500 | 2500)));
694
695        let array = PrimitiveArray::new(Buffer::from(values), validity);
696        let encoded = alp_encode(
697            array.as_view(),
698            None,
699            &mut LEGACY_SESSION.create_execution_ctx(),
700        )
701        .unwrap();
702        let decoded =
703            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
704
705        assert_arrays_eq!(decoded, array);
706    }
707
708    /// Regression test for patch_chunk index-out-of-bounds when slicing a multi-chunk
709    /// ALP array mid-chunk with patches in the trailing chunk.
710    ///
711    /// The bug: chunk_offsets are sliced at chunk granularity (1024-row boundaries)
712    /// but patches indices/values are sliced at element granularity. When a slice ends
713    /// mid-chunk, patches_end_idx could exceed patches_indices.len(), causing OOB panic
714    /// during decompression.
715    #[test]
716    fn test_slice_mid_chunk_with_patches_in_trailing_chunk() {
717        // 3 chunks (3072 elements), patches scattered across all chunks.
718        let mut values = vec![1.0f64; 3072];
719        // Chunk 0 patches (indices 0..1024)
720        values[100] = PI;
721        values[500] = E;
722        // Chunk 1 patches (indices 1024..2048)
723        values[1100] = PI;
724        values[1500] = E;
725        values[1900] = PI;
726        // Chunk 2 patches (indices 2048..3072)
727        values[2100] = PI;
728        values[2500] = E;
729        values[2900] = PI;
730
731        let original = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
732        let encoded = alp_encode(
733            original.as_view(),
734            None,
735            &mut LEGACY_SESSION.create_execution_ctx(),
736        )
737        .unwrap();
738        assert!(encoded.patches().is_some());
739
740        // Slice ending mid-chunk-2 (element 2500 is inside chunk 2 = 2048..3072).
741        // This creates a mismatch: chunk_offsets includes the full chunk 2 offset,
742        // but patches_indices only includes patches up to element 2500.
743        let sliced_alp = encoded.slice(0..2500).unwrap();
744        let expected = original.slice(0..2500).unwrap();
745        assert_arrays_eq!(sliced_alp, expected);
746
747        // Also test slicing that starts mid-chunk (both start and end mid-chunk).
748        let sliced_alp = encoded.slice(500..2500).unwrap();
749        let expected = original.slice(500..2500).unwrap();
750        assert_arrays_eq!(sliced_alp, expected);
751    }
752}