Skip to main content

vortex_alp/alp/
compress.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use itertools::Itertools;
5use vortex_array::ArrayRef;
6use vortex_array::IntoArray;
7use vortex_array::arrays::PrimitiveArray;
8use vortex_array::dtype::PType;
9use vortex_array::patches::Patches;
10use vortex_array::validity::Validity;
11use vortex_array::vtable::ValidityHelper;
12use vortex_buffer::Buffer;
13use vortex_buffer::BufferMut;
14use vortex_error::VortexResult;
15use vortex_error::vortex_bail;
16use vortex_mask::Mask;
17
18use crate::Exponents;
19use crate::alp::ALPArray;
20use crate::alp::ALPFloat;
21
22#[macro_export]
23macro_rules! match_each_alp_float_ptype {
24    ($self:expr, | $enc:ident | $body:block) => {{
25        use vortex_array::dtype::PType;
26        use vortex_error::vortex_panic;
27        let ptype = $self;
28        match ptype {
29            PType::F32 => {
30                type $enc = f32;
31                $body
32            }
33            PType::F64 => {
34                type $enc = f64;
35                $body
36            }
37            _ => vortex_panic!("ALP can only encode f32 and f64, got {}", ptype),
38        }
39    }};
40}
41
42pub fn alp_encode(parray: &PrimitiveArray, exponents: Option<Exponents>) -> VortexResult<ALPArray> {
43    let (exponents, encoded, patches) = match parray.ptype() {
44        PType::F32 => alp_encode_components_typed::<f32>(parray, exponents)?,
45        PType::F64 => alp_encode_components_typed::<f64>(parray, exponents)?,
46        _ => vortex_bail!("ALP can only encode f32 and f64"),
47    };
48
49    // SAFETY: alp_encode_components_typed must return well-formed components
50    unsafe {
51        Ok(ALPArray::new_unchecked(
52            encoded,
53            exponents,
54            patches,
55            parray.dtype().clone(),
56        ))
57    }
58}
59
60#[expect(
61    clippy::cast_possible_truncation,
62    reason = "u64 index cast to usize is safe for reasonable array sizes"
63)]
64fn alp_encode_components_typed<T>(
65    values: &PrimitiveArray,
66    exponents: Option<Exponents>,
67) -> VortexResult<(Exponents, ArrayRef, Option<Patches>)>
68where
69    T: ALPFloat,
70{
71    let values_slice = values.as_slice::<T>();
72
73    let (exponents, encoded, exceptional_positions, exceptional_values, mut chunk_offsets) =
74        T::encode(values_slice, exponents);
75
76    let encoded_array = PrimitiveArray::new(encoded, values.validity().clone()).into_array();
77
78    let validity = values.validity_mask()?;
79    // exceptional_positions may contain exceptions at invalid positions (which contain garbage
80    // data). We remove null exceptions in order to keep the Patches small.
81    let (valid_exceptional_positions, valid_exceptional_values): (Buffer<u64>, Buffer<T>) =
82        match validity {
83            Mask::AllTrue(_) => (exceptional_positions, exceptional_values),
84            Mask::AllFalse(_) => {
85                // no valid positions, ergo nothing worth patching
86                (Buffer::empty(), Buffer::empty())
87            }
88            Mask::Values(is_valid) => {
89                let (pos, vals): (BufferMut<u64>, BufferMut<T>) = exceptional_positions
90                    .into_iter()
91                    .zip_eq(exceptional_values)
92                    .filter(|(index, _)| {
93                        let is_valid = is_valid.value(*index as usize);
94                        if !is_valid {
95                            let patch_chunk = *index as usize / 1024;
96                            for chunk_idx in (patch_chunk + 1)..chunk_offsets.len() {
97                                chunk_offsets[chunk_idx] -= 1;
98                            }
99                        }
100                        is_valid
101                    })
102                    .unzip();
103                (pos.freeze(), vals.freeze())
104            }
105        };
106    let patches = if valid_exceptional_positions.is_empty() {
107        None
108    } else {
109        let patches_validity = if values.dtype().is_nullable() {
110            Validity::AllValid
111        } else {
112            Validity::NonNullable
113        };
114        let valid_exceptional_values =
115            PrimitiveArray::new(valid_exceptional_values, patches_validity).into_array();
116
117        Some(Patches::new(
118            values_slice.len(),
119            0,
120            valid_exceptional_positions.into_array(),
121            valid_exceptional_values,
122            Some(chunk_offsets.into_array()),
123        )?)
124    };
125    Ok((exponents, encoded_array, patches))
126}
127
128#[cfg(test)]
129mod tests {
130    use core::f64;
131
132    use f64::consts::E;
133    use f64::consts::PI;
134    use vortex_array::LEGACY_SESSION;
135    use vortex_array::ToCanonical;
136    use vortex_array::VortexSessionExecute;
137    use vortex_array::assert_arrays_eq;
138    use vortex_array::dtype::NativePType;
139    use vortex_array::validity::Validity;
140    use vortex_buffer::Buffer;
141    use vortex_buffer::buffer;
142
143    use super::*;
144    use crate::decompress_into_array;
145
146    #[test]
147    fn test_compress() {
148        let array = PrimitiveArray::new(buffer![1.234f32; 1025], Validity::NonNullable);
149        let encoded = alp_encode(&array, None).unwrap();
150        assert!(encoded.patches().is_none());
151        let expected_encoded = PrimitiveArray::from_iter(vec![1234i32; 1025]);
152        assert_arrays_eq!(encoded.encoded(), expected_encoded);
153        assert_eq!(encoded.exponents(), Exponents { e: 9, f: 6 });
154
155        let decoded =
156            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
157        assert_arrays_eq!(decoded, array);
158    }
159
160    #[test]
161    fn test_nullable_compress() {
162        let array = PrimitiveArray::from_option_iter([None, Some(1.234f32), None]);
163        let encoded = alp_encode(&array, None).unwrap();
164        assert!(encoded.patches().is_none());
165        let expected_encoded = PrimitiveArray::from_option_iter([None, Some(1234i32), None]);
166        assert_arrays_eq!(encoded.encoded(), expected_encoded);
167        assert_eq!(encoded.exponents(), Exponents { e: 9, f: 6 });
168
169        let decoded =
170            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
171        let expected = PrimitiveArray::from_option_iter(vec![None, Some(1.234f32), None]);
172        assert_arrays_eq!(decoded, expected);
173    }
174
175    #[test]
176    #[allow(clippy::approx_constant)] // Clippy objects to 2.718, an approximation of e, the base of the natural logarithm.
177    fn test_patched_compress() {
178        let values = buffer![1.234f64, 2.718, PI, 4.0];
179        let array = PrimitiveArray::new(values.clone(), Validity::NonNullable);
180        let encoded = alp_encode(&array, None).unwrap();
181        assert!(encoded.patches().is_some());
182        let expected_encoded = PrimitiveArray::from_iter(vec![1234i64, 2718, 1234, 4000]);
183        assert_arrays_eq!(encoded.encoded(), expected_encoded);
184        assert_eq!(encoded.exponents(), Exponents { e: 16, f: 13 });
185
186        let decoded =
187            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
188        let expected_decoded = PrimitiveArray::new(values, Validity::NonNullable);
189        assert_arrays_eq!(decoded, expected_decoded);
190    }
191
192    #[test]
193    #[allow(clippy::approx_constant)] // Clippy objects to 2.718, an approximation of e, the base of the natural logarithm.
194    fn test_compress_ignores_invalid_exceptional_values() {
195        let values = buffer![1.234f64, 2.718, PI, 4.0];
196        let array = PrimitiveArray::new(values, Validity::from_iter([true, true, false, true]));
197        let encoded = alp_encode(&array, None).unwrap();
198        assert!(encoded.patches().is_none());
199        let expected_encoded =
200            PrimitiveArray::from_option_iter(buffer![Some(1234i64), Some(2718), None, Some(4000)]);
201        assert_arrays_eq!(encoded.encoded(), expected_encoded);
202        assert_eq!(encoded.exponents(), Exponents { e: 16, f: 13 });
203
204        let decoded =
205            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
206        assert_arrays_eq!(decoded, array);
207    }
208
209    #[test]
210    #[allow(clippy::approx_constant)] // ALP doesn't like E
211    fn test_nullable_patched_scalar_at() {
212        let array = PrimitiveArray::from_option_iter([
213            Some(1.234f64),
214            Some(2.718),
215            Some(PI),
216            Some(4.0),
217            None,
218        ]);
219        let encoded = alp_encode(&array, None).unwrap();
220        assert!(encoded.patches().is_some());
221
222        assert_eq!(encoded.exponents(), Exponents { e: 16, f: 13 });
223
224        assert_arrays_eq!(encoded, array);
225
226        let _decoded =
227            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
228    }
229
230    #[test]
231    fn roundtrips_close_fractional() {
232        let original = PrimitiveArray::from_iter([195.26274f32, 195.27837, -48.815685]);
233        let alp_arr = alp_encode(&original, None).unwrap();
234        assert_arrays_eq!(alp_arr, original);
235    }
236
237    #[test]
238    fn roundtrips_all_null() {
239        let original =
240            PrimitiveArray::new(buffer![195.26274f64, PI, -48.815685], Validity::AllInvalid);
241        let alp_arr = alp_encode(&original, None).unwrap();
242        let decompressed = alp_arr.to_primitive();
243
244        assert_eq!(
245            // The second and third values become exceptions and are replaced
246            [195.26274, 195.26274, 195.26274],
247            decompressed.as_slice::<f64>()
248        );
249
250        assert_arrays_eq!(decompressed, original);
251    }
252
253    #[test]
254    fn non_finite_numbers() {
255        let original = PrimitiveArray::new(
256            buffer![0.0f32, -0.0, f32::NAN, f32::NEG_INFINITY, f32::INFINITY],
257            Validity::NonNullable,
258        );
259        let encoded = alp_encode(&original, None).unwrap();
260        let decoded = encoded.to_primitive();
261        for idx in 0..original.len() {
262            let decoded_val = decoded.as_slice::<f32>()[idx];
263            let original_val = original.as_slice::<f32>()[idx];
264            assert!(
265                decoded_val.is_eq(original_val),
266                "Expected {original_val} but got {decoded_val}"
267            );
268        }
269    }
270
271    #[test]
272    fn test_chunk_offsets() {
273        let mut values = vec![1.0f64; 3072];
274
275        values[1023] = PI;
276        values[1024] = E;
277        values[1025] = PI;
278
279        let array = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
280        let encoded = alp_encode(&array, None).unwrap();
281        let patches = encoded.patches().unwrap();
282
283        let chunk_offsets = patches.chunk_offsets().clone().unwrap().to_primitive();
284        let expected_offsets = PrimitiveArray::from_iter(vec![0u64, 1, 3]);
285        assert_arrays_eq!(chunk_offsets, expected_offsets);
286
287        let patch_indices = patches.indices().to_primitive();
288        let expected_indices = PrimitiveArray::from_iter(vec![1023u64, 1024, 1025]);
289        assert_arrays_eq!(patch_indices, expected_indices);
290
291        let patch_values = patches.values().to_primitive();
292        let expected_values = PrimitiveArray::from_iter(vec![PI, E, PI]);
293        assert_arrays_eq!(patch_values, expected_values);
294    }
295
296    #[test]
297    fn test_chunk_offsets_no_patches_in_middle() {
298        let mut values = vec![1.0f64; 3072];
299        values[0] = PI;
300        values[2048] = E;
301
302        let array = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
303        let encoded = alp_encode(&array, None).unwrap();
304        let patches = encoded.patches().unwrap();
305
306        let chunk_offsets = patches.chunk_offsets().clone().unwrap().to_primitive();
307        let expected_offsets = PrimitiveArray::from_iter(vec![0u64, 1, 1]);
308        assert_arrays_eq!(chunk_offsets, expected_offsets);
309
310        let patch_indices = patches.indices().to_primitive();
311        let expected_indices = PrimitiveArray::from_iter(vec![0u64, 2048]);
312        assert_arrays_eq!(patch_indices, expected_indices);
313
314        let patch_values = patches.values().to_primitive();
315        let expected_values = PrimitiveArray::from_iter(vec![PI, E]);
316        assert_arrays_eq!(patch_values, expected_values);
317    }
318
319    #[test]
320    fn test_chunk_offsets_trailing_empty_chunks() {
321        let mut values = vec![1.0f64; 3072];
322        values[0] = PI;
323
324        let array = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
325        let encoded = alp_encode(&array, None).unwrap();
326        let patches = encoded.patches().unwrap();
327
328        let chunk_offsets = patches.chunk_offsets().clone().unwrap().to_primitive();
329        let expected_offsets = PrimitiveArray::from_iter(vec![0u64, 1, 1]);
330        assert_arrays_eq!(chunk_offsets, expected_offsets);
331
332        let patch_indices = patches.indices().to_primitive();
333        let expected_indices = PrimitiveArray::from_iter(vec![0u64]);
334        assert_arrays_eq!(patch_indices, expected_indices);
335
336        let patch_values = patches.values().to_primitive();
337        let expected_values = PrimitiveArray::from_iter(vec![PI]);
338        assert_arrays_eq!(patch_values, expected_values);
339    }
340
341    #[test]
342    fn test_chunk_offsets_single_chunk() {
343        let mut values = vec![1.0f64; 512];
344        values[0] = PI;
345        values[100] = E;
346
347        let array = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
348        let encoded = alp_encode(&array, None).unwrap();
349        let patches = encoded.patches().unwrap();
350
351        let chunk_offsets = patches.chunk_offsets().clone().unwrap().to_primitive();
352        let expected_offsets = PrimitiveArray::from_iter(vec![0u64]);
353        assert_arrays_eq!(chunk_offsets, expected_offsets);
354
355        let patch_indices = patches.indices().to_primitive();
356        let expected_indices = PrimitiveArray::from_iter(vec![0u64, 100]);
357        assert_arrays_eq!(patch_indices, expected_indices);
358
359        let patch_values = patches.values().to_primitive();
360        let expected_values = PrimitiveArray::from_iter(vec![PI, E]);
361        assert_arrays_eq!(patch_values, expected_values);
362    }
363
364    #[test]
365    fn test_slice_half_chunk_f32_roundtrip() {
366        // Create 1024 elements, encode, slice to first 512, then decode
367        let values = vec![1.234f32; 1024];
368        let original = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
369        let encoded = alp_encode(&original, None).unwrap();
370
371        let sliced_alp = encoded.slice(512..1024).unwrap();
372
373        let expected_slice = original.slice(512..1024).unwrap();
374        assert_arrays_eq!(sliced_alp, expected_slice);
375    }
376
377    #[test]
378    fn test_slice_half_chunk_f64_roundtrip() {
379        let values = vec![5.678f64; 1024];
380        let original = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
381        let encoded = alp_encode(&original, None).unwrap();
382
383        let sliced_alp = encoded.slice(512..1024).unwrap();
384
385        let expected_slice = original.slice(512..1024).unwrap();
386        assert_arrays_eq!(sliced_alp, expected_slice);
387    }
388
389    #[test]
390    fn test_slice_half_chunk_with_patches_roundtrip() {
391        let mut values = vec![1.0f64; 1024];
392        values[100] = PI;
393        values[200] = E;
394        values[600] = 42.42;
395
396        let original = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
397        let encoded = alp_encode(&original, None).unwrap();
398
399        let sliced_alp = encoded.slice(512..1024).unwrap();
400
401        let expected_slice = original.slice(512..1024).unwrap();
402        assert_arrays_eq!(sliced_alp, expected_slice);
403        assert!(encoded.patches().is_some());
404    }
405
406    #[test]
407    fn test_slice_across_chunks_with_patches_roundtrip() {
408        let mut values = vec![1.0f64; 2048];
409        values[100] = PI;
410        values[200] = E;
411        values[600] = 42.42;
412        values[800] = 42.42;
413        values[1000] = 42.42;
414        values[1023] = 42.42;
415
416        let original = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
417        let encoded = alp_encode(&original, None).unwrap();
418
419        let sliced_alp = encoded.slice(1023..1025).unwrap();
420
421        let expected_slice = original.slice(1023..1025).unwrap();
422        assert_arrays_eq!(sliced_alp, expected_slice);
423        assert!(encoded.patches().is_some());
424    }
425
426    #[test]
427    fn test_slice_half_chunk_nullable_roundtrip() {
428        let values = (0..1024)
429            .map(|i| if i % 3 == 0 { None } else { Some(2.5f32) })
430            .collect::<Vec<_>>();
431
432        let original = PrimitiveArray::from_option_iter(values);
433        let encoded = alp_encode(&original, None).unwrap();
434
435        let sliced_alp = encoded.slice(512..1024).unwrap();
436        let decoded = sliced_alp.to_primitive();
437
438        let expected_slice = original.slice(512..1024).unwrap();
439        assert_arrays_eq!(decoded, expected_slice);
440    }
441
442    #[test]
443    fn test_large_f32_array_uniform_values() {
444        let size = 10_000;
445        let array = PrimitiveArray::new(buffer![42.125f32; size], Validity::NonNullable);
446        let encoded = alp_encode(&array, None).unwrap();
447
448        assert!(encoded.patches().is_none());
449        let decoded =
450            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
451        assert_arrays_eq!(decoded, array);
452    }
453
454    #[test]
455    fn test_large_f64_array_uniform_values() {
456        let size = 50_000;
457        let array = PrimitiveArray::new(buffer![123.456789f64; size], Validity::NonNullable);
458        let encoded = alp_encode(&array, None).unwrap();
459
460        assert!(encoded.patches().is_none());
461        let decoded =
462            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
463        assert_arrays_eq!(decoded, array);
464    }
465
466    #[test]
467    fn test_large_f32_array_with_patches() {
468        let size = 5_000;
469        let mut values = vec![1.5f32; size];
470        values[100] = std::f32::consts::PI;
471        values[1500] = std::f32::consts::E;
472        values[3000] = f32::NEG_INFINITY;
473        values[4500] = f32::INFINITY;
474
475        let array = PrimitiveArray::new(Buffer::from(values), Validity::NonNullable);
476        let encoded = alp_encode(&array, None).unwrap();
477
478        assert!(encoded.patches().is_some());
479        let decoded =
480            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
481        assert_arrays_eq!(decoded, array);
482    }
483
484    #[test]
485    fn test_large_f64_array_with_patches() {
486        let size = 8_000;
487        let mut values = vec![2.2184f64; size];
488        values[0] = PI;
489        values[1000] = E;
490        values[2000] = f64::NAN;
491        values[3000] = f64::INFINITY;
492        values[4000] = f64::NEG_INFINITY;
493        values[5000] = 0.0;
494        values[6000] = -0.0;
495        values[7000] = 999.999999999;
496
497        let array = PrimitiveArray::new(Buffer::from(values.clone()), Validity::NonNullable);
498        let encoded = alp_encode(&array, None).unwrap();
499
500        assert!(encoded.patches().is_some());
501        let decoded =
502            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
503
504        for idx in 0..size {
505            let decoded_val = decoded.as_slice::<f64>()[idx];
506            let original_val = values[idx];
507            assert!(
508                decoded_val.is_eq(original_val),
509                "At index {idx}: Expected {original_val} but got {decoded_val}"
510            );
511        }
512    }
513
514    #[test]
515    fn test_large_nullable_array() {
516        let size = 12_000;
517        let values: Vec<Option<f32>> = (0..size)
518            .map(|i| {
519                if i % 7 == 0 {
520                    None
521                } else {
522                    Some((i as f32) * 0.1)
523                }
524            })
525            .collect();
526
527        let array = PrimitiveArray::from_option_iter(values);
528        let encoded = alp_encode(&array, None).unwrap();
529        let decoded =
530            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
531
532        assert_arrays_eq!(decoded, array);
533    }
534
535    #[test]
536    fn test_large_mixed_validity_with_patches() {
537        let size = 6_000;
538        let mut values = vec![10.125f64; size];
539
540        values[500] = PI;
541        values[1500] = E;
542        values[2500] = f64::INFINITY;
543        values[3500] = f64::NEG_INFINITY;
544        values[4500] = f64::NAN;
545
546        let validity = Validity::from_iter((0..size).map(|i| !matches!(i, 500 | 2500)));
547
548        let array = PrimitiveArray::new(Buffer::from(values), validity);
549        let encoded = alp_encode(&array, None).unwrap();
550        let decoded =
551            decompress_into_array(encoded, &mut LEGACY_SESSION.create_execution_ctx()).unwrap();
552
553        assert_arrays_eq!(decoded, array);
554    }
555}