vortex_alp/alp/
decompress.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::mem::transmute;
5
6use num_traits::AsPrimitive;
7use vortex_array::ArrayRef;
8use vortex_array::ToCanonical;
9use vortex_array::arrays::PrimitiveArray;
10use vortex_array::arrays::patch_chunk;
11use vortex_array::patches::Patches;
12use vortex_array::vtable::ValidityHelper;
13use vortex_buffer::BufferMut;
14use vortex_dtype::DType;
15use vortex_dtype::NativePType;
16use vortex_dtype::match_each_unsigned_integer_ptype;
17use vortex_error::VortexResult;
18use vortex_vector::Vector;
19use vortex_vector::VectorMutOps;
20use vortex_vector::VectorOps;
21use vortex_vector::primitive::PVectorMut;
22
23use crate::ALPArray;
24use crate::ALPFloat;
25use crate::Exponents;
26use crate::match_each_alp_float_ptype;
27
28/// Decompresses an ALP-encoded array.
29///
30/// # Returns
31///
32/// A `PrimitiveArray` containing the decompressed floating-point values with all patches applied.
33pub fn decompress_into_array(array: ALPArray) -> PrimitiveArray {
34    let (encoded, exponents, patches, dtype) = array.into_parts();
35    if let Some(ref patches) = patches
36        && let Some(chunk_offsets) = patches.chunk_offsets()
37    {
38        decompress_chunked(
39            encoded,
40            exponents,
41            patches,
42            &chunk_offsets.as_ref().to_primitive(),
43            dtype,
44        )
45    } else {
46        decompress_unchunked(encoded, exponents, patches, dtype)
47    }
48}
49
50/// Decompresses an ALP-encoded array.
51///
52/// # Returns
53///
54/// A `Vector` containing the decompressed floating-point values with all patches applied.
55pub fn decompress_into_vector<T: ALPFloat>(
56    encoded_vector: Vector,
57    exponents: Exponents,
58    patches_vectors: Option<(Vector, Vector, Option<Vector>)>,
59    patches_offset: usize,
60) -> VortexResult<Vector> {
61    let encoded_primitive = encoded_vector.into_primitive().into_mut();
62    let (mut alp_buffer, mask) = T::ALPInt::downcast(encoded_primitive).into_parts();
63    <T>::decode_slice_inplace(alp_buffer.as_mut_slice(), exponents);
64
65    // SAFETY: `Buffer<T::ALPInt> and `BufferMut<T>` have the same layout.
66    let mut decoded_buffer: BufferMut<T> = unsafe { transmute(alp_buffer) };
67
68    // Apply patches if they exist.
69    if let Some((patches_indices, patches_values, _)) = patches_vectors {
70        let patches_indices = patches_indices.into_primitive();
71        let patches_values = patches_values.into_primitive();
72
73        let values_buffer = T::downcast(patches_values.into_mut()).into_parts().0;
74        let values_slice = values_buffer.as_slice();
75        let decoded_slice = decoded_buffer.as_mut_slice();
76
77        match_each_unsigned_integer_ptype!(patches_indices.ptype(), |I| {
78            let indices_buffer = I::downcast(patches_indices.into_mut()).into_parts().0;
79            let indices_slice = indices_buffer.as_slice();
80
81            for (&idx, &value) in indices_slice.iter().zip(values_slice.iter()) {
82                decoded_slice[AsPrimitive::<usize>::as_(idx) - patches_offset] = value;
83            }
84        });
85    }
86
87    Ok(PVectorMut::<T>::new(decoded_buffer, mask).freeze().into())
88}
89
90/// Decompresses an ALP-encoded array in 1024-element chunks.
91///
92/// # Returns
93///
94/// A `PrimitiveArray` containing the decompressed values with all patches applied.
95#[expect(
96    clippy::cognitive_complexity,
97    reason = "complexity is from nested match_each_* macros"
98)]
99fn decompress_chunked(
100    array: ArrayRef,
101    exponents: Exponents,
102    patches: &Patches,
103    patches_chunk_offsets: &PrimitiveArray,
104    dtype: DType,
105) -> PrimitiveArray {
106    let encoded = array.to_primitive();
107
108    let validity = encoded.validity().clone();
109
110    let patches_indices = patches.indices().to_primitive();
111    let patches_values = patches.values().to_primitive();
112    let ptype = dtype.as_ptype();
113    let array_len = array.len();
114    let patches_offset = patches.offset();
115
116    // We need to drop ALPArray here in case converting encoded buffer into
117    // primitive didn't create a copy. In that case both alp_encoded and array
118    // will hold a reference to the buffer we want to mutate.
119    drop(array);
120
121    match_each_alp_float_ptype!(ptype, |T| {
122        let patches_values = patches_values.as_slice::<T>();
123        let mut alp_buffer = encoded.into_buffer_mut();
124        match_each_unsigned_integer_ptype!(patches_chunk_offsets.ptype(), |C| {
125            let patches_chunk_offsets = patches_chunk_offsets.as_slice::<C>();
126            // There always is at least one chunk offset.
127            let base_offset = patches_chunk_offsets[0];
128            let offset_within_chunk = patches.offset_within_chunk().unwrap_or(0);
129
130            match_each_unsigned_integer_ptype!(patches_indices.ptype(), |I| {
131                let patches_indices = patches_indices.as_slice::<I>();
132
133                for (chunk_idx, chunk_start) in (0..array_len).step_by(1024).enumerate() {
134                    let chunk_end = (chunk_start + 1024).min(array_len);
135                    let chunk_slice = &mut alp_buffer.as_mut_slice()[chunk_start..chunk_end];
136
137                    <T>::decode_slice_inplace(chunk_slice, exponents);
138
139                    let decoded_chunk: &mut [T] = unsafe { transmute(chunk_slice) };
140                    patch_chunk(
141                        decoded_chunk,
142                        patches_indices,
143                        patches_values,
144                        patches_offset,
145                        patches_chunk_offsets,
146                        chunk_idx,
147                        base_offset.as_(),
148                        offset_within_chunk,
149                    );
150                }
151
152                let decoded_buffer: BufferMut<T> = unsafe { transmute(alp_buffer) };
153                PrimitiveArray::new::<T>(decoded_buffer.freeze(), validity)
154            })
155        })
156    })
157}
158
159/// Decompresses an ALP-encoded array without chunk offsets.
160///
161/// # Returns
162///
163/// A `PrimitiveArray` containing the decompressed values with all patches applied.
164fn decompress_unchunked(
165    array: ArrayRef,
166    exponents: Exponents,
167    patches: Option<Patches>,
168    dtype: DType,
169) -> PrimitiveArray {
170    let encoded = array.to_primitive();
171
172    // We need to drop ALPArray here in case converting encoded buffer into
173    // primitive didn't create a copy. In that case both alp_encoded and array
174    // will hold a reference to the buffer we want to mutate.
175    drop(array);
176
177    let validity = encoded.validity().clone();
178    let ptype = dtype.as_ptype();
179
180    let decoded = match_each_alp_float_ptype!(ptype, |T| {
181        let mut alp_buffer = encoded.into_buffer_mut();
182        <T>::decode_slice_inplace(alp_buffer.as_mut_slice(), exponents);
183        let decoded_buffer: BufferMut<T> = unsafe { transmute(alp_buffer) };
184        PrimitiveArray::new::<T>(decoded_buffer.freeze(), validity)
185    });
186
187    if let Some(patches) = patches {
188        decoded.patch(&patches)
189    } else {
190        decoded
191    }
192}