Skip to main content

vortex_alp/alp/
compress.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use itertools::Itertools;
5use vortex_array::ArrayRef;
6use vortex_array::ArrayView;
7use vortex_array::ExecutionCtx;
8use vortex_array::IntoArray;
9use vortex_array::arrays::Primitive;
10use vortex_array::arrays::PrimitiveArray;
11use vortex_array::dtype::PType;
12use vortex_array::patches::Patches;
13use vortex_array::validity::Validity;
14use vortex_buffer::Buffer;
15use vortex_buffer::BufferMut;
16use vortex_error::VortexResult;
17use vortex_error::vortex_bail;
18use vortex_mask::Mask;
19
20use crate::ALP;
21use crate::Exponents;
22use crate::alp::ALPArray;
23use crate::alp::ALPFloat;
24
25#[macro_export]
26macro_rules! match_each_alp_float_ptype {
27    ($self:expr, | $enc:ident | $body:block) => {{
28        use vortex_array::dtype::PType;
29        use vortex_error::vortex_panic;
30        let ptype = $self;
31        match ptype {
32            PType::F32 => {
33                type $enc = f32;
34                $body
35            }
36            PType::F64 => {
37                type $enc = f64;
38                $body
39            }
40            _ => vortex_panic!("ALP can only encode f32 and f64, got {}", ptype),
41        }
42    }};
43}
44
45pub fn alp_encode(
46    parray: ArrayView<'_, Primitive>,
47    exponents: Option<Exponents>,
48    ctx: &mut ExecutionCtx,
49) -> VortexResult<ALPArray> {
50    let (exponents, encoded, patches) = match parray.ptype() {
51        PType::F32 => alp_encode_components_typed::<f32>(parray, exponents, ctx)?,
52        PType::F64 => alp_encode_components_typed::<f64>(parray, exponents, ctx)?,
53        _ => vortex_bail!("ALP can only encode f32 and f64"),
54    };
55
56    // SAFETY: alp_encode_components_typed must return well-formed components
57    unsafe { Ok(ALP::new_unchecked(encoded, exponents, patches)) }
58}
59
60#[expect(
61    clippy::cast_possible_truncation,
62    reason = "u64 index cast to usize is safe for reasonable array sizes"
63)]
64fn alp_encode_components_typed<T>(
65    values: ArrayView<'_, Primitive>,
66    exponents: Option<Exponents>,
67    ctx: &mut ExecutionCtx,
68) -> VortexResult<(Exponents, ArrayRef, Option<Patches>)>
69where
70    T: ALPFloat,
71{
72    let values_slice = values.as_slice::<T>();
73
74    let (exponents, encoded, exceptional_positions, exceptional_values, mut chunk_offsets) =
75        T::encode(values_slice, exponents);
76
77    let encoded_array = PrimitiveArray::new(encoded, values.validity()?).into_array();
78
79    let validity = values
80        .array()
81        .validity()?
82        .execute_mask(values.array().len(), ctx)?;
83    // exceptional_positions may contain exceptions at invalid positions (which contain garbage
84    // data). We remove null exceptions in order to keep the Patches small.
85    let (valid_exceptional_positions, valid_exceptional_values): (Buffer<u64>, Buffer<T>) =
86        match validity {
87            Mask::AllTrue(_) => (exceptional_positions, exceptional_values),
88            Mask::AllFalse(_) => {
89                // no valid positions, ergo nothing worth patching
90                (Buffer::empty(), Buffer::empty())
91            }
92            Mask::Values(is_valid) => {
93                let (pos, vals): (BufferMut<u64>, BufferMut<T>) = exceptional_positions
94                    .into_iter()
95                    .zip_eq(exceptional_values)
96                    .filter(|(index, _)| {
97                        let is_valid = is_valid.value(*index as usize);
98                        if !is_valid {
99                            let patch_chunk = *index as usize / 1024;
100                            for chunk_idx in (patch_chunk + 1)..chunk_offsets.len() {
101                                chunk_offsets[chunk_idx] -= 1;
102                            }
103                        }
104                        is_valid
105                    })
106                    .unzip();
107                (pos.freeze(), vals.freeze())
108            }
109        };
110    let patches = if valid_exceptional_positions.is_empty() {
111        None
112    } else {
113        let patches_validity = if values.dtype().is_nullable() {
114            Validity::AllValid
115        } else {
116            Validity::NonNullable
117        };
118        let valid_exceptional_values =
119            PrimitiveArray::new(valid_exceptional_values, patches_validity).into_array();
120
121        Some(Patches::new(
122            values_slice.len(),
123            0,
124            valid_exceptional_positions.into_array(),
125            valid_exceptional_values,
126            Some(chunk_offsets.into_array()),
127        )?)
128    };
129    Ok((exponents, encoded_array, patches))
130}
131
132#[cfg(test)]
133mod tests {
134    use core::f64;
135
136    use f64::consts::E;
137    use f64::consts::PI;
138    use vortex_array::LEGACY_SESSION;
139    use vortex_array::VortexSessionExecute;
140    use vortex_array::assert_arrays_eq;
141    use vortex_array::dtype::NativePType;
142    use vortex_array::validity::Validity;
143    use vortex_buffer::Buffer;
144    use vortex_buffer::buffer;
145
146    use super::*;
147    use crate::alp::array::ALPArrayExt;
148    use crate::alp::array::ALPArraySlotsExt;
149    use crate::decompress_into_array;
150
151    #[test]
152    fn test_compress() {
153        let array = PrimitiveArray::new(buffer![1.234f32; 1025], Validity::NonNullable);
154        let encoded = alp_encode(
155            array.as_view(),
156            None,
157            &mut LEGACY_SESSION.create_execution_ctx(),
158        )
159        .unwrap();
160        assert!(encoded.patches().is_none());
161        let expected_encoded = PrimitiveArray::from_iter(vec![1234i32; 1025]);
162        assert_arrays_eq!(encoded.encoded(), expected_encoded);
163        assert_eq!(encoded.exponents(), Exponents { e: 9, f: 6 });
164
165        let decoded =
166            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
167        assert_arrays_eq!(decoded, array);
168    }
169
170    #[test]
171    fn test_nullable_compress() {
172        let array = PrimitiveArray::from_option_iter([None, Some(1.234f32), None]);
173        let encoded = alp_encode(
174            array.as_view(),
175            None,
176            &mut LEGACY_SESSION.create_execution_ctx(),
177        )
178        .unwrap();
179        assert!(encoded.patches().is_none());
180        let expected_encoded = PrimitiveArray::from_option_iter([None, Some(1234i32), None]);
181        assert_arrays_eq!(encoded.encoded(), expected_encoded);
182        assert_eq!(encoded.exponents(), Exponents { e: 9, f: 6 });
183
184        let decoded =
185            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
186        let expected = PrimitiveArray::from_option_iter(vec![None, Some(1.234f32), None]);
187        assert_arrays_eq!(decoded, expected);
188    }
189
190    #[test]
191    #[expect(clippy::approx_constant)] // Clippy objects to 2.718, an approximation of e, the base of the natural logarithm.
192    fn test_patched_compress() {
193        let values = buffer![1.234f64, 2.718, PI, 4.0];
194        let array = PrimitiveArray::new(values.clone(), Validity::NonNullable);
195        let encoded = alp_encode(
196            array.as_view(),
197            None,
198            &mut LEGACY_SESSION.create_execution_ctx(),
199        )
200        .unwrap();
201        assert!(encoded.patches().is_some());
202        let expected_encoded = PrimitiveArray::from_iter(vec![1234i64, 2718, 1234, 4000]);
203        assert_arrays_eq!(encoded.encoded(), expected_encoded);
204        assert_eq!(encoded.exponents(), Exponents { e: 16, f: 13 });
205
206        let decoded =
207            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
208        let expected_decoded = PrimitiveArray::new(values, Validity::NonNullable);
209        assert_arrays_eq!(decoded, expected_decoded);
210    }
211
212    #[test]
213    #[expect(clippy::approx_constant)] // Clippy objects to 2.718, an approximation of e, the base of the natural logarithm.
214    fn test_compress_ignores_invalid_exceptional_values() {
215        let values = buffer![1.234f64, 2.718, PI, 4.0];
216        let array = PrimitiveArray::new(values, Validity::from_iter([true, true, false, true]));
217        let encoded = alp_encode(
218            array.as_view(),
219            None,
220            &mut LEGACY_SESSION.create_execution_ctx(),
221        )
222        .unwrap();
223        assert!(encoded.patches().is_none());
224        let expected_encoded =
225            PrimitiveArray::from_option_iter(buffer![Some(1234i64), Some(2718), None, Some(4000)]);
226        assert_arrays_eq!(encoded.encoded(), expected_encoded);
227        assert_eq!(encoded.exponents(), Exponents { e: 16, f: 13 });
228
229        let decoded =
230            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
231        assert_arrays_eq!(decoded, array);
232    }
233
234    #[test]
235    #[expect(clippy::approx_constant)] // ALP doesn't like E
236    fn test_nullable_patched_scalar_at() {
237        let array = PrimitiveArray::from_option_iter([
238            Some(1.234f64),
239            Some(2.718),
240            Some(PI),
241            Some(4.0),
242            None,
243        ]);
244        let encoded = alp_encode(
245            array.as_view(),
246            None,
247            &mut LEGACY_SESSION.create_execution_ctx(),
248        )
249        .unwrap();
250        assert!(encoded.patches().is_some());
251
252        assert_eq!(encoded.exponents(), Exponents { e: 16, f: 13 });
253
254        assert_arrays_eq!(encoded, array);
255
256        let _decoded =
257            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
258    }
259
260    #[test]
261    fn roundtrips_close_fractional() {
262        let original = PrimitiveArray::from_iter([195.26274f32, 195.27837, -48.815685]);
263        let alp_arr = alp_encode(
264            original.as_view(),
265            None,
266            &mut LEGACY_SESSION.create_execution_ctx(),
267        )
268        .unwrap();
269        assert_arrays_eq!(alp_arr, original);
270    }
271
272    #[test]
273    fn roundtrips_all_null() {
274        let mut ctx = LEGACY_SESSION.create_execution_ctx();
275        let original =
276            PrimitiveArray::new(buffer![195.26274f64, PI, -48.815685], Validity::AllInvalid);
277        let alp_arr = alp_encode(original.as_view(), None, &mut ctx).unwrap();
278        let decompressed = alp_arr
279            .into_array()
280            .execute::<PrimitiveArray>(&mut ctx)
281            .unwrap();
282
283        assert_eq!(
284            // The second and third values become exceptions and are replaced
285            [195.26274, 195.26274, 195.26274],
286            decompressed.as_slice::<f64>()
287        );
288
289        assert_arrays_eq!(decompressed, original);
290    }
291
292    #[test]
293    fn non_finite_numbers() {
294        let mut ctx = LEGACY_SESSION.create_execution_ctx();
295        let original = PrimitiveArray::new(
296            buffer![0.0f32, -0.0, f32::NAN, f32::NEG_INFINITY, f32::INFINITY],
297            Validity::NonNullable,
298        );
299        let encoded = alp_encode(original.as_view(), None, &mut ctx).unwrap();
300        let decoded = encoded
301            .as_array()
302            .clone()
303            .execute::<PrimitiveArray>(&mut ctx)
304            .unwrap();
305        for idx in 0..original.len() {
306            let decoded_val = decoded.as_slice::<f32>()[idx];
307            let original_val = original.as_slice::<f32>()[idx];
308            assert!(
309                decoded_val.is_eq(original_val),
310                "Expected {original_val} but got {decoded_val}"
311            );
312        }
313    }
314
315    #[test]
316    fn test_chunk_offsets() {
317        let mut ctx = LEGACY_SESSION.create_execution_ctx();
318        let mut values = vec![1.0f64; 3072];
319
320        values[1023] = PI;
321        values[1024] = E;
322        values[1025] = PI;
323
324        let array = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
325        let encoded = alp_encode(array.as_view(), None, &mut ctx).unwrap();
326        let patches = encoded.patches().unwrap();
327
328        let chunk_offsets = patches
329            .chunk_offsets()
330            .clone()
331            .unwrap()
332            .execute::<PrimitiveArray>(&mut ctx)
333            .unwrap();
334        let expected_offsets = PrimitiveArray::from_iter(vec![0u64, 1, 3]);
335        assert_arrays_eq!(chunk_offsets, expected_offsets);
336
337        let patch_indices = patches
338            .indices()
339            .clone()
340            .execute::<PrimitiveArray>(&mut ctx)
341            .unwrap();
342        let expected_indices = PrimitiveArray::from_iter(vec![1023u64, 1024, 1025]);
343        assert_arrays_eq!(patch_indices, expected_indices);
344
345        let patch_values = patches
346            .values()
347            .clone()
348            .execute::<PrimitiveArray>(&mut ctx)
349            .unwrap();
350        let expected_values = PrimitiveArray::from_iter(vec![PI, E, PI]);
351        assert_arrays_eq!(patch_values, expected_values);
352    }
353
354    #[test]
355    fn test_chunk_offsets_no_patches_in_middle() {
356        let mut ctx = LEGACY_SESSION.create_execution_ctx();
357        let mut values = vec![1.0f64; 3072];
358        values[0] = PI;
359        values[2048] = E;
360
361        let array = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
362        let encoded = alp_encode(array.as_view(), None, &mut ctx).unwrap();
363        let patches = encoded.patches().unwrap();
364
365        let chunk_offsets = patches
366            .chunk_offsets()
367            .clone()
368            .unwrap()
369            .execute::<PrimitiveArray>(&mut ctx)
370            .unwrap();
371        let expected_offsets = PrimitiveArray::from_iter(vec![0u64, 1, 1]);
372        assert_arrays_eq!(chunk_offsets, expected_offsets);
373
374        let patch_indices = patches
375            .indices()
376            .clone()
377            .execute::<PrimitiveArray>(&mut ctx)
378            .unwrap();
379        let expected_indices = PrimitiveArray::from_iter(vec![0u64, 2048]);
380        assert_arrays_eq!(patch_indices, expected_indices);
381
382        let patch_values = patches
383            .values()
384            .clone()
385            .execute::<PrimitiveArray>(&mut ctx)
386            .unwrap();
387        let expected_values = PrimitiveArray::from_iter(vec![PI, E]);
388        assert_arrays_eq!(patch_values, expected_values);
389    }
390
391    #[test]
392    fn test_chunk_offsets_trailing_empty_chunks() {
393        let mut ctx = LEGACY_SESSION.create_execution_ctx();
394        let mut values = vec![1.0f64; 3072];
395        values[0] = PI;
396
397        let array = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
398        let encoded = alp_encode(array.as_view(), None, &mut ctx).unwrap();
399        let patches = encoded.patches().unwrap();
400
401        let chunk_offsets = patches
402            .chunk_offsets()
403            .clone()
404            .unwrap()
405            .execute::<PrimitiveArray>(&mut ctx)
406            .unwrap();
407        let expected_offsets = PrimitiveArray::from_iter(vec![0u64, 1, 1]);
408        assert_arrays_eq!(chunk_offsets, expected_offsets);
409
410        let patch_indices = patches
411            .indices()
412            .clone()
413            .execute::<PrimitiveArray>(&mut ctx)
414            .unwrap();
415        let expected_indices = PrimitiveArray::from_iter(vec![0u64]);
416        assert_arrays_eq!(patch_indices, expected_indices);
417
418        let patch_values = patches
419            .values()
420            .clone()
421            .execute::<PrimitiveArray>(&mut ctx)
422            .unwrap();
423        let expected_values = PrimitiveArray::from_iter(vec![PI]);
424        assert_arrays_eq!(patch_values, expected_values);
425    }
426
427    #[test]
428    fn test_chunk_offsets_single_chunk() {
429        let mut ctx = LEGACY_SESSION.create_execution_ctx();
430        let mut values = vec![1.0f64; 512];
431        values[0] = PI;
432        values[100] = E;
433
434        let array = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
435        let encoded = alp_encode(array.as_view(), None, &mut ctx).unwrap();
436        let patches = encoded.patches().unwrap();
437
438        let chunk_offsets = patches
439            .chunk_offsets()
440            .clone()
441            .unwrap()
442            .execute::<PrimitiveArray>(&mut ctx)
443            .unwrap();
444        let expected_offsets = PrimitiveArray::from_iter(vec![0u64]);
445        assert_arrays_eq!(chunk_offsets, expected_offsets);
446
447        let patch_indices = patches
448            .indices()
449            .clone()
450            .execute::<PrimitiveArray>(&mut ctx)
451            .unwrap();
452        let expected_indices = PrimitiveArray::from_iter(vec![0u64, 100]);
453        assert_arrays_eq!(patch_indices, expected_indices);
454
455        let patch_values = patches
456            .values()
457            .clone()
458            .execute::<PrimitiveArray>(&mut ctx)
459            .unwrap();
460        let expected_values = PrimitiveArray::from_iter(vec![PI, E]);
461        assert_arrays_eq!(patch_values, expected_values);
462    }
463
464    #[test]
465    fn test_slice_half_chunk_f32_roundtrip() {
466        // Create 1024 elements, encode, slice to first 512, then decode
467        let values = vec![1.234f32; 1024];
468        let original = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
469        let encoded = alp_encode(
470            original.as_view(),
471            None,
472            &mut LEGACY_SESSION.create_execution_ctx(),
473        )
474        .unwrap();
475
476        let sliced_alp = encoded.slice(512..1024).unwrap();
477
478        let expected_slice = original.slice(512..1024).unwrap();
479        assert_arrays_eq!(sliced_alp, expected_slice);
480    }
481
482    #[test]
483    fn test_slice_half_chunk_f64_roundtrip() {
484        let values = vec![5.678f64; 1024];
485        let original = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
486        let encoded = alp_encode(
487            original.as_view(),
488            None,
489            &mut LEGACY_SESSION.create_execution_ctx(),
490        )
491        .unwrap();
492
493        let sliced_alp = encoded.slice(512..1024).unwrap();
494
495        let expected_slice = original.slice(512..1024).unwrap();
496        assert_arrays_eq!(sliced_alp, expected_slice);
497    }
498
499    #[test]
500    fn test_slice_half_chunk_with_patches_roundtrip() {
501        let mut values = vec![1.0f64; 1024];
502        values[100] = PI;
503        values[200] = E;
504        values[600] = 42.42;
505
506        let original = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
507        let encoded = alp_encode(
508            original.as_view(),
509            None,
510            &mut LEGACY_SESSION.create_execution_ctx(),
511        )
512        .unwrap();
513
514        let sliced_alp = encoded.slice(512..1024).unwrap();
515
516        let expected_slice = original.slice(512..1024).unwrap();
517        assert_arrays_eq!(sliced_alp, expected_slice);
518        assert!(encoded.patches().is_some());
519    }
520
521    #[test]
522    fn test_slice_across_chunks_with_patches_roundtrip() {
523        let mut values = vec![1.0f64; 2048];
524        values[100] = PI;
525        values[200] = E;
526        values[600] = 42.42;
527        values[800] = 42.42;
528        values[1000] = 42.42;
529        values[1023] = 42.42;
530
531        let original = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
532        let encoded = alp_encode(
533            original.as_view(),
534            None,
535            &mut LEGACY_SESSION.create_execution_ctx(),
536        )
537        .unwrap();
538
539        let sliced_alp = encoded.slice(1023..1025).unwrap();
540
541        let expected_slice = original.slice(1023..1025).unwrap();
542        assert_arrays_eq!(sliced_alp, expected_slice);
543        assert!(encoded.patches().is_some());
544    }
545
546    #[test]
547    fn test_slice_half_chunk_nullable_roundtrip() {
548        let mut ctx = LEGACY_SESSION.create_execution_ctx();
549        let values = (0..1024)
550            .map(|i| if i % 3 == 0 { None } else { Some(2.5f32) })
551            .collect::<Vec<_>>();
552
553        let original = PrimitiveArray::from_option_iter(values);
554        let encoded = alp_encode(original.as_view(), None, &mut ctx).unwrap();
555
556        let sliced_alp = encoded.slice(512..1024).unwrap();
557        let decoded = sliced_alp.execute::<PrimitiveArray>(&mut ctx).unwrap();
558
559        let expected_slice = original.slice(512..1024).unwrap();
560        assert_arrays_eq!(decoded, expected_slice);
561    }
562
563    #[test]
564    fn test_large_f32_array_uniform_values() {
565        let size = 10_000;
566        let array = PrimitiveArray::new(buffer![42.125f32; size], Validity::NonNullable);
567        let encoded = alp_encode(
568            array.as_view(),
569            None,
570            &mut LEGACY_SESSION.create_execution_ctx(),
571        )
572        .unwrap();
573
574        assert!(encoded.patches().is_none());
575        let decoded =
576            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
577        assert_arrays_eq!(decoded, array);
578    }
579
580    #[test]
581    fn test_large_f64_array_uniform_values() {
582        let size = 50_000;
583        let array = PrimitiveArray::new(buffer![123.456789f64; size], Validity::NonNullable);
584        let encoded = alp_encode(
585            array.as_view(),
586            None,
587            &mut LEGACY_SESSION.create_execution_ctx(),
588        )
589        .unwrap();
590
591        assert!(encoded.patches().is_none());
592        let decoded =
593            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
594        assert_arrays_eq!(decoded, array);
595    }
596
597    #[test]
598    fn test_large_f32_array_with_patches() {
599        let size = 5_000;
600        let mut values = vec![1.5f32; size];
601        values[100] = std::f32::consts::PI;
602        values[1500] = std::f32::consts::E;
603        values[3000] = f32::NEG_INFINITY;
604        values[4500] = f32::INFINITY;
605
606        let array = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
607        let encoded = alp_encode(
608            array.as_view(),
609            None,
610            &mut LEGACY_SESSION.create_execution_ctx(),
611        )
612        .unwrap();
613
614        assert!(encoded.patches().is_some());
615        let decoded =
616            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
617        assert_arrays_eq!(decoded, array);
618    }
619
620    #[test]
621    fn test_large_f64_array_with_patches() {
622        let size = 8_000;
623        let mut values = vec![2.2184f64; size];
624        values[0] = PI;
625        values[1000] = E;
626        values[2000] = f64::NAN;
627        values[3000] = f64::INFINITY;
628        values[4000] = f64::NEG_INFINITY;
629        values[5000] = 0.0;
630        values[6000] = -0.0;
631        values[7000] = 999.999999999;
632
633        let array = PrimitiveArray::new(Buffer::from(values.clone()), Validity::NonNullable);
634        let encoded = alp_encode(
635            array.as_view(),
636            None,
637            &mut LEGACY_SESSION.create_execution_ctx(),
638        )
639        .unwrap();
640
641        assert!(encoded.patches().is_some());
642        let decoded =
643            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
644
645        for idx in 0..size {
646            let decoded_val = decoded.as_slice::<f64>()[idx];
647            let original_val = values[idx];
648            assert!(
649                decoded_val.is_eq(original_val),
650                "At index {idx}: Expected {original_val} but got {decoded_val}"
651            );
652        }
653    }
654
655    #[test]
656    fn test_large_nullable_array() {
657        let size = 12_000;
658        let values: Vec<Option<f32>> = (0..size)
659            .map(|i| {
660                if i % 7 == 0 {
661                    None
662                } else {
663                    Some((i as f32) * 0.1)
664                }
665            })
666            .collect();
667
668        let array = PrimitiveArray::from_option_iter(values);
669        let encoded = alp_encode(
670            array.as_view(),
671            None,
672            &mut LEGACY_SESSION.create_execution_ctx(),
673        )
674        .unwrap();
675        let decoded =
676            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
677
678        assert_arrays_eq!(decoded, array);
679    }
680
681    #[test]
682    fn test_large_mixed_validity_with_patches() {
683        let size = 6_000;
684        let mut values = vec![10.125f64; size];
685
686        values[500] = PI;
687        values[1500] = E;
688        values[2500] = f64::INFINITY;
689        values[3500] = f64::NEG_INFINITY;
690        values[4500] = f64::NAN;
691
692        let validity = Validity::from_iter((0..size).map(|i| !matches!(i, 500 | 2500)));
693
694        let array = PrimitiveArray::new(Buffer::from(values), validity);
695        let encoded = alp_encode(
696            array.as_view(),
697            None,
698            &mut LEGACY_SESSION.create_execution_ctx(),
699        )
700        .unwrap();
701        let decoded =
702            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
703
704        assert_arrays_eq!(decoded, array);
705    }
706
707    /// Regression test for patch_chunk index-out-of-bounds when slicing a multi-chunk
708    /// ALP array mid-chunk with patches in the trailing chunk.
709    ///
710    /// The bug: chunk_offsets are sliced at chunk granularity (1024-row boundaries)
711    /// but patches indices/values are sliced at element granularity. When a slice ends
712    /// mid-chunk, patches_end_idx could exceed patches_indices.len(), causing OOB panic
713    /// during decompression.
714    #[test]
715    fn test_slice_mid_chunk_with_patches_in_trailing_chunk() {
716        // 3 chunks (3072 elements), patches scattered across all chunks.
717        let mut values = vec![1.0f64; 3072];
718        // Chunk 0 patches (indices 0..1024)
719        values[100] = PI;
720        values[500] = E;
721        // Chunk 1 patches (indices 1024..2048)
722        values[1100] = PI;
723        values[1500] = E;
724        values[1900] = PI;
725        // Chunk 2 patches (indices 2048..3072)
726        values[2100] = PI;
727        values[2500] = E;
728        values[2900] = PI;
729
730        let original = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
731        let encoded = alp_encode(
732            original.as_view(),
733            None,
734            &mut LEGACY_SESSION.create_execution_ctx(),
735        )
736        .unwrap();
737        assert!(encoded.patches().is_some());
738
739        // Slice ending mid-chunk-2 (element 2500 is inside chunk 2 = 2048..3072).
740        // This creates a mismatch: chunk_offsets includes the full chunk 2 offset,
741        // but patches_indices only includes patches up to element 2500.
742        let sliced_alp = encoded.slice(0..2500).unwrap();
743        let expected = original.slice(0..2500).unwrap();
744        assert_arrays_eq!(sliced_alp, expected);
745
746        // Also test slicing that starts mid-chunk (both start and end mid-chunk).
747        let sliced_alp = encoded.slice(500..2500).unwrap();
748        let expected = original.slice(500..2500).unwrap();
749        assert_arrays_eq!(sliced_alp, expected);
750    }
751}