Skip to main content

vortex_alp/alp/
compress.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use itertools::Itertools;
5use vortex_array::ArrayRef;
6use vortex_array::IntoArray;
7use vortex_array::arrays::PrimitiveArray;
8use vortex_array::dtype::PType;
9use vortex_array::patches::Patches;
10use vortex_array::validity::Validity;
11use vortex_array::vtable::ValidityHelper;
12use vortex_buffer::Buffer;
13use vortex_buffer::BufferMut;
14use vortex_error::VortexResult;
15use vortex_error::vortex_bail;
16use vortex_mask::Mask;
17
18use crate::Exponents;
19use crate::alp::ALPArray;
20use crate::alp::ALPFloat;
21
22#[macro_export]
23macro_rules! match_each_alp_float_ptype {
24    ($self:expr, | $enc:ident | $body:block) => {{
25        use vortex_array::dtype::PType;
26        use vortex_error::vortex_panic;
27        let ptype = $self;
28        match ptype {
29            PType::F32 => {
30                type $enc = f32;
31                $body
32            }
33            PType::F64 => {
34                type $enc = f64;
35                $body
36            }
37            _ => vortex_panic!("ALP can only encode f32 and f64, got {}", ptype),
38        }
39    }};
40}
41
42pub fn alp_encode(parray: &PrimitiveArray, exponents: Option<Exponents>) -> VortexResult<ALPArray> {
43    let (exponents, encoded, patches) = match parray.ptype() {
44        PType::F32 => alp_encode_components_typed::<f32>(parray, exponents)?,
45        PType::F64 => alp_encode_components_typed::<f64>(parray, exponents)?,
46        _ => vortex_bail!("ALP can only encode f32 and f64"),
47    };
48
49    // SAFETY: alp_encode_components_typed must return well-formed components
50    unsafe {
51        Ok(ALPArray::new_unchecked(
52            encoded,
53            exponents,
54            patches,
55            parray.dtype().clone(),
56        ))
57    }
58}
59
60#[expect(
61    clippy::cast_possible_truncation,
62    reason = "u64 index cast to usize is safe for reasonable array sizes"
63)]
64fn alp_encode_components_typed<T>(
65    values: &PrimitiveArray,
66    exponents: Option<Exponents>,
67) -> VortexResult<(Exponents, ArrayRef, Option<Patches>)>
68where
69    T: ALPFloat,
70{
71    let values_slice = values.as_slice::<T>();
72
73    let (exponents, encoded, exceptional_positions, exceptional_values, mut chunk_offsets) =
74        T::encode(values_slice, exponents);
75
76    let encoded_array = PrimitiveArray::new(encoded, values.validity().clone()).into_array();
77
78    let validity = values.validity_mask()?;
79    // exceptional_positions may contain exceptions at invalid positions (which contain garbage
80    // data). We remove null exceptions in order to keep the Patches small.
81    let (valid_exceptional_positions, valid_exceptional_values): (Buffer<u64>, Buffer<T>) =
82        match validity {
83            Mask::AllTrue(_) => (exceptional_positions, exceptional_values),
84            Mask::AllFalse(_) => {
85                // no valid positions, ergo nothing worth patching
86                (Buffer::empty(), Buffer::empty())
87            }
88            Mask::Values(is_valid) => {
89                let (pos, vals): (BufferMut<u64>, BufferMut<T>) = exceptional_positions
90                    .into_iter()
91                    .zip_eq(exceptional_values)
92                    .filter(|(index, _)| {
93                        let is_valid = is_valid.value(*index as usize);
94                        if !is_valid {
95                            let patch_chunk = *index as usize / 1024;
96                            for chunk_idx in (patch_chunk + 1)..chunk_offsets.len() {
97                                chunk_offsets[chunk_idx] -= 1;
98                            }
99                        }
100                        is_valid
101                    })
102                    .unzip();
103                (pos.freeze(), vals.freeze())
104            }
105        };
106    let patches = if valid_exceptional_positions.is_empty() {
107        None
108    } else {
109        let patches_validity = if values.dtype().is_nullable() {
110            Validity::AllValid
111        } else {
112            Validity::NonNullable
113        };
114        let valid_exceptional_values =
115            PrimitiveArray::new(valid_exceptional_values, patches_validity).into_array();
116
117        Some(Patches::new(
118            values_slice.len(),
119            0,
120            valid_exceptional_positions.into_array(),
121            valid_exceptional_values,
122            Some(chunk_offsets.into_array()),
123        )?)
124    };
125    Ok((exponents, encoded_array, patches))
126}
127
128#[cfg(test)]
129mod tests {
130    use core::f64;
131
132    use f64::consts::E;
133    use f64::consts::PI;
134    use vortex_array::ToCanonical;
135    use vortex_array::assert_arrays_eq;
136    use vortex_array::dtype::NativePType;
137    use vortex_array::validity::Validity;
138    use vortex_buffer::Buffer;
139    use vortex_buffer::buffer;
140
141    use super::*;
142    use crate::decompress_into_array;
143
144    #[test]
145    fn test_compress() {
146        let array = PrimitiveArray::new(buffer![1.234f32; 1025], Validity::NonNullable);
147        let encoded = alp_encode(&array, None).unwrap();
148        assert!(encoded.patches().is_none());
149        let expected_encoded = PrimitiveArray::from_iter(vec![1234i32; 1025]);
150        assert_arrays_eq!(encoded.encoded(), expected_encoded);
151        assert_eq!(encoded.exponents(), Exponents { e: 9, f: 6 });
152
153        let decoded = decompress_into_array(encoded).unwrap();
154        assert_arrays_eq!(decoded, array);
155    }
156
157    #[test]
158    fn test_nullable_compress() {
159        let array = PrimitiveArray::from_option_iter([None, Some(1.234f32), None]);
160        let encoded = alp_encode(&array, None).unwrap();
161        assert!(encoded.patches().is_none());
162        let expected_encoded = PrimitiveArray::from_option_iter([None, Some(1234i32), None]);
163        assert_arrays_eq!(encoded.encoded(), expected_encoded);
164        assert_eq!(encoded.exponents(), Exponents { e: 9, f: 6 });
165
166        let decoded = decompress_into_array(encoded).unwrap();
167        let expected = PrimitiveArray::from_option_iter(vec![None, Some(1.234f32), None]);
168        assert_arrays_eq!(decoded, expected);
169    }
170
171    #[test]
172    #[allow(clippy::approx_constant)] // Clippy objects to 2.718, an approximation of e, the base of the natural logarithm.
173    fn test_patched_compress() {
174        let values = buffer![1.234f64, 2.718, PI, 4.0];
175        let array = PrimitiveArray::new(values.clone(), Validity::NonNullable);
176        let encoded = alp_encode(&array, None).unwrap();
177        assert!(encoded.patches().is_some());
178        let expected_encoded = PrimitiveArray::from_iter(vec![1234i64, 2718, 1234, 4000]);
179        assert_arrays_eq!(encoded.encoded(), expected_encoded);
180        assert_eq!(encoded.exponents(), Exponents { e: 16, f: 13 });
181
182        let decoded = decompress_into_array(encoded).unwrap();
183        let expected_decoded = PrimitiveArray::new(values, Validity::NonNullable);
184        assert_arrays_eq!(decoded, expected_decoded);
185    }
186
187    #[test]
188    #[allow(clippy::approx_constant)] // Clippy objects to 2.718, an approximation of e, the base of the natural logarithm.
189    fn test_compress_ignores_invalid_exceptional_values() {
190        let values = buffer![1.234f64, 2.718, PI, 4.0];
191        let array = PrimitiveArray::new(values, Validity::from_iter([true, true, false, true]));
192        let encoded = alp_encode(&array, None).unwrap();
193        assert!(encoded.patches().is_none());
194        let expected_encoded =
195            PrimitiveArray::from_option_iter(buffer![Some(1234i64), Some(2718), None, Some(4000)]);
196        assert_arrays_eq!(encoded.encoded(), expected_encoded);
197        assert_eq!(encoded.exponents(), Exponents { e: 16, f: 13 });
198
199        let decoded = decompress_into_array(encoded).unwrap();
200        assert_arrays_eq!(decoded, array);
201    }
202
203    #[test]
204    #[allow(clippy::approx_constant)] // ALP doesn't like E
205    fn test_nullable_patched_scalar_at() {
206        let array = PrimitiveArray::from_option_iter([
207            Some(1.234f64),
208            Some(2.718),
209            Some(PI),
210            Some(4.0),
211            None,
212        ]);
213        let encoded = alp_encode(&array, None).unwrap();
214        assert!(encoded.patches().is_some());
215
216        assert_eq!(encoded.exponents(), Exponents { e: 16, f: 13 });
217
218        assert_arrays_eq!(encoded, array);
219
220        let _decoded = decompress_into_array(encoded).unwrap();
221    }
222
223    #[test]
224    fn roundtrips_close_fractional() {
225        let original = PrimitiveArray::from_iter([195.26274f32, 195.27837, -48.815685]);
226        let alp_arr = alp_encode(&original, None).unwrap();
227        assert_arrays_eq!(alp_arr, original);
228    }
229
230    #[test]
231    fn roundtrips_all_null() {
232        let original =
233            PrimitiveArray::new(buffer![195.26274f64, PI, -48.815685], Validity::AllInvalid);
234        let alp_arr = alp_encode(&original, None).unwrap();
235        let decompressed = alp_arr.to_primitive();
236
237        assert_eq!(
238            // The second and third values become exceptions and are replaced
239            [195.26274, 195.26274, 195.26274],
240            decompressed.as_slice::<f64>()
241        );
242
243        assert_arrays_eq!(decompressed, original);
244    }
245
246    #[test]
247    fn non_finite_numbers() {
248        let original = PrimitiveArray::new(
249            buffer![0.0f32, -0.0, f32::NAN, f32::NEG_INFINITY, f32::INFINITY],
250            Validity::NonNullable,
251        );
252        let encoded = alp_encode(&original, None).unwrap();
253        let decoded = encoded.to_primitive();
254        for idx in 0..original.len() {
255            let decoded_val = decoded.as_slice::<f32>()[idx];
256            let original_val = original.as_slice::<f32>()[idx];
257            assert!(
258                decoded_val.is_eq(original_val),
259                "Expected {original_val} but got {decoded_val}"
260            );
261        }
262    }
263
264    #[test]
265    fn test_chunk_offsets() {
266        let mut values = vec![1.0f64; 3072];
267
268        values[1023] = PI;
269        values[1024] = E;
270        values[1025] = PI;
271
272        let array = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
273        let encoded = alp_encode(&array, None).unwrap();
274        let patches = encoded.patches().unwrap();
275
276        let chunk_offsets = patches.chunk_offsets().clone().unwrap().to_primitive();
277        let expected_offsets = PrimitiveArray::from_iter(vec![0u64, 1, 3]);
278        assert_arrays_eq!(chunk_offsets, expected_offsets);
279
280        let patch_indices = patches.indices().to_primitive();
281        let expected_indices = PrimitiveArray::from_iter(vec![1023u64, 1024, 1025]);
282        assert_arrays_eq!(patch_indices, expected_indices);
283
284        let patch_values = patches.values().to_primitive();
285        let expected_values = PrimitiveArray::from_iter(vec![PI, E, PI]);
286        assert_arrays_eq!(patch_values, expected_values);
287    }
288
289    #[test]
290    fn test_chunk_offsets_no_patches_in_middle() {
291        let mut values = vec![1.0f64; 3072];
292        values[0] = PI;
293        values[2048] = E;
294
295        let array = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
296        let encoded = alp_encode(&array, None).unwrap();
297        let patches = encoded.patches().unwrap();
298
299        let chunk_offsets = patches.chunk_offsets().clone().unwrap().to_primitive();
300        let expected_offsets = PrimitiveArray::from_iter(vec![0u64, 1, 1]);
301        assert_arrays_eq!(chunk_offsets, expected_offsets);
302
303        let patch_indices = patches.indices().to_primitive();
304        let expected_indices = PrimitiveArray::from_iter(vec![0u64, 2048]);
305        assert_arrays_eq!(patch_indices, expected_indices);
306
307        let patch_values = patches.values().to_primitive();
308        let expected_values = PrimitiveArray::from_iter(vec![PI, E]);
309        assert_arrays_eq!(patch_values, expected_values);
310    }
311
312    #[test]
313    fn test_chunk_offsets_trailing_empty_chunks() {
314        let mut values = vec![1.0f64; 3072];
315        values[0] = PI;
316
317        let array = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
318        let encoded = alp_encode(&array, None).unwrap();
319        let patches = encoded.patches().unwrap();
320
321        let chunk_offsets = patches.chunk_offsets().clone().unwrap().to_primitive();
322        let expected_offsets = PrimitiveArray::from_iter(vec![0u64, 1, 1]);
323        assert_arrays_eq!(chunk_offsets, expected_offsets);
324
325        let patch_indices = patches.indices().to_primitive();
326        let expected_indices = PrimitiveArray::from_iter(vec![0u64]);
327        assert_arrays_eq!(patch_indices, expected_indices);
328
329        let patch_values = patches.values().to_primitive();
330        let expected_values = PrimitiveArray::from_iter(vec![PI]);
331        assert_arrays_eq!(patch_values, expected_values);
332    }
333
334    #[test]
335    fn test_chunk_offsets_single_chunk() {
336        let mut values = vec![1.0f64; 512];
337        values[0] = PI;
338        values[100] = E;
339
340        let array = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
341        let encoded = alp_encode(&array, None).unwrap();
342        let patches = encoded.patches().unwrap();
343
344        let chunk_offsets = patches.chunk_offsets().clone().unwrap().to_primitive();
345        let expected_offsets = PrimitiveArray::from_iter(vec![0u64]);
346        assert_arrays_eq!(chunk_offsets, expected_offsets);
347
348        let patch_indices = patches.indices().to_primitive();
349        let expected_indices = PrimitiveArray::from_iter(vec![0u64, 100]);
350        assert_arrays_eq!(patch_indices, expected_indices);
351
352        let patch_values = patches.values().to_primitive();
353        let expected_values = PrimitiveArray::from_iter(vec![PI, E]);
354        assert_arrays_eq!(patch_values, expected_values);
355    }
356
357    #[test]
358    fn test_slice_half_chunk_f32_roundtrip() {
359        // Create 1024 elements, encode, slice to first 512, then decode
360        let values = vec![1.234f32; 1024];
361        let original = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
362        let encoded = alp_encode(&original, None).unwrap();
363
364        let sliced_alp = encoded.slice(512..1024).unwrap();
365
366        let expected_slice = original.slice(512..1024).unwrap();
367        assert_arrays_eq!(sliced_alp, expected_slice);
368    }
369
370    #[test]
371    fn test_slice_half_chunk_f64_roundtrip() {
372        let values = vec![5.678f64; 1024];
373        let original = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
374        let encoded = alp_encode(&original, None).unwrap();
375
376        let sliced_alp = encoded.slice(512..1024).unwrap();
377
378        let expected_slice = original.slice(512..1024).unwrap();
379        assert_arrays_eq!(sliced_alp, expected_slice);
380    }
381
382    #[test]
383    fn test_slice_half_chunk_with_patches_roundtrip() {
384        let mut values = vec![1.0f64; 1024];
385        values[100] = PI;
386        values[200] = E;
387        values[600] = 42.42;
388
389        let original = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
390        let encoded = alp_encode(&original, None).unwrap();
391
392        let sliced_alp = encoded.slice(512..1024).unwrap();
393
394        let expected_slice = original.slice(512..1024).unwrap();
395        assert_arrays_eq!(sliced_alp, expected_slice);
396        assert!(encoded.patches().is_some());
397    }
398
399    #[test]
400    fn test_slice_across_chunks_with_patches_roundtrip() {
401        let mut values = vec![1.0f64; 2048];
402        values[100] = PI;
403        values[200] = E;
404        values[600] = 42.42;
405        values[800] = 42.42;
406        values[1000] = 42.42;
407        values[1023] = 42.42;
408
409        let original = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
410        let encoded = alp_encode(&original, None).unwrap();
411
412        let sliced_alp = encoded.slice(1023..1025).unwrap();
413
414        let expected_slice = original.slice(1023..1025).unwrap();
415        assert_arrays_eq!(sliced_alp, expected_slice);
416        assert!(encoded.patches().is_some());
417    }
418
419    #[test]
420    fn test_slice_half_chunk_nullable_roundtrip() {
421        let values = (0..1024)
422            .map(|i| if i % 3 == 0 { None } else { Some(2.5f32) })
423            .collect::<Vec<_>>();
424
425        let original = PrimitiveArray::from_option_iter(values);
426        let encoded = alp_encode(&original, None).unwrap();
427
428        let sliced_alp = encoded.slice(512..1024).unwrap();
429        let decoded = sliced_alp.to_primitive();
430
431        let expected_slice = original.slice(512..1024).unwrap();
432        assert_arrays_eq!(decoded, expected_slice);
433    }
434
435    #[test]
436    fn test_large_f32_array_uniform_values() {
437        let size = 10_000;
438        let array = PrimitiveArray::new(buffer![42.125f32; size], Validity::NonNullable);
439        let encoded = alp_encode(&array, None).unwrap();
440
441        assert!(encoded.patches().is_none());
442        let decoded = decompress_into_array(encoded).unwrap();
443        assert_arrays_eq!(decoded, array);
444    }
445
446    #[test]
447    fn test_large_f64_array_uniform_values() {
448        let size = 50_000;
449        let array = PrimitiveArray::new(buffer![123.456789f64; size], Validity::NonNullable);
450        let encoded = alp_encode(&array, None).unwrap();
451
452        assert!(encoded.patches().is_none());
453        let decoded = decompress_into_array(encoded).unwrap();
454        assert_arrays_eq!(decoded, array);
455    }
456
457    #[test]
458    fn test_large_f32_array_with_patches() {
459        let size = 5_000;
460        let mut values = vec![1.5f32; size];
461        values[100] = std::f32::consts::PI;
462        values[1500] = std::f32::consts::E;
463        values[3000] = f32::NEG_INFINITY;
464        values[4500] = f32::INFINITY;
465
466        let array = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
467        let encoded = alp_encode(&array, None).unwrap();
468
469        assert!(encoded.patches().is_some());
470        let decoded = decompress_into_array(encoded).unwrap();
471        assert_arrays_eq!(decoded, array);
472    }
473
474    #[test]
475    fn test_large_f64_array_with_patches() {
476        let size = 8_000;
477        let mut values = vec![2.2184f64; size];
478        values[0] = PI;
479        values[1000] = E;
480        values[2000] = f64::NAN;
481        values[3000] = f64::INFINITY;
482        values[4000] = f64::NEG_INFINITY;
483        values[5000] = 0.0;
484        values[6000] = -0.0;
485        values[7000] = 999.999999999;
486
487        let array = PrimitiveArray::new(Buffer::from(values.clone()), Validity::NonNullable);
488        let encoded = alp_encode(&array, None).unwrap();
489
490        assert!(encoded.patches().is_some());
491        let decoded = decompress_into_array(encoded).unwrap();
492
493        for idx in 0..size {
494            let decoded_val = decoded.as_slice::<f64>()[idx];
495            let original_val = values[idx];
496            assert!(
497                decoded_val.is_eq(original_val),
498                "At index {idx}: Expected {original_val} but got {decoded_val}"
499            );
500        }
501    }
502
503    #[test]
504    fn test_large_nullable_array() {
505        let size = 12_000;
506        let values: Vec<Option<f32>> = (0..size)
507            .map(|i| {
508                if i % 7 == 0 {
509                    None
510                } else {
511                    Some((i as f32) * 0.1)
512                }
513            })
514            .collect();
515
516        let array = PrimitiveArray::from_option_iter(values);
517        let encoded = alp_encode(&array, None).unwrap();
518        let decoded = decompress_into_array(encoded).unwrap();
519
520        assert_arrays_eq!(decoded, array);
521    }
522
523    #[test]
524    fn test_large_mixed_validity_with_patches() {
525        let size = 6_000;
526        let mut values = vec![10.125f64; size];
527
528        values[500] = PI;
529        values[1500] = E;
530        values[2500] = f64::INFINITY;
531        values[3500] = f64::NEG_INFINITY;
532        values[4500] = f64::NAN;
533
534        let validity = Validity::from_iter((0..size).map(|i| !matches!(i, 500 | 2500)));
535
536        let array = PrimitiveArray::new(Buffer::from(values), validity);
537        let encoded = alp_encode(&array, None).unwrap();
538        let decoded = decompress_into_array(encoded).unwrap();
539
540        assert_arrays_eq!(decoded, array);
541    }
542}