Skip to main content

vortex_alp/alp/
decompress.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use std::mem::transmute;
5
6use vortex_array::ExecutionCtx;
7use vortex_array::ToCanonical;
8use vortex_array::arrays::PrimitiveArray;
9use vortex_array::arrays::chunk_range;
10use vortex_array::arrays::patch_chunk;
11use vortex_array::dtype::DType;
12use vortex_array::match_each_unsigned_integer_ptype;
13use vortex_array::patches::Patches;
14use vortex_array::vtable::ValidityHelper;
15use vortex_buffer::BufferMut;
16use vortex_error::VortexResult;
17
18use crate::ALPArray;
19use crate::ALPFloat;
20use crate::Exponents;
21use crate::match_each_alp_float_ptype;
22
23/// Decompresses an ALP-encoded array using `to_primitive` (legacy path).
24///
25/// # Returns
26///
27/// A `PrimitiveArray` containing the decompressed floating-point values with all patches applied.
28pub fn decompress_into_array(array: ALPArray) -> VortexResult<PrimitiveArray> {
29    let (encoded, exponents, patches, dtype) = array.into_parts();
30    if let Some(ref patches) = patches
31        && let Some(chunk_offsets) = patches.chunk_offsets()
32    {
33        let prim_encoded = encoded.to_primitive();
34        // We need to drop ALPArray here in case converting encoded buffer into
35        // primitive didn't create a copy. In that case both alp_encoded and array
36        // will hold a reference to the buffer we want to mutate.
37        drop(encoded);
38        let patches_chunk_offsets = chunk_offsets.as_ref().to_primitive();
39        let patches_indices = patches.indices().to_primitive();
40        let patches_values = patches.values().to_primitive();
41        Ok(decompress_chunked_core(
42            prim_encoded,
43            exponents,
44            &patches_indices,
45            &patches_values,
46            &patches_chunk_offsets,
47            patches,
48            dtype,
49        ))
50    } else {
51        let encoded_prim = encoded.to_primitive();
52        // We need to drop ALPArray here in case converting encoded buffer into
53        // primitive didn't create a copy. In that case both alp_encoded and array
54        // will hold a reference to the buffer we want to mutate.
55        drop(encoded);
56        decompress_unchunked_core(encoded_prim, exponents, patches, dtype)
57    }
58}
59
60/// Decompresses an ALP-encoded array using `execute` (execution path).
61///
62/// This version uses `execute` on child arrays instead of `to_primitive`,
63/// ensuring proper recursive execution through the execution context.
64///
65/// # Returns
66///
67/// A `PrimitiveArray` containing the decompressed floating-point values with all patches applied.
68pub fn execute_decompress(array: ALPArray, ctx: &mut ExecutionCtx) -> VortexResult<PrimitiveArray> {
69    let (encoded, exponents, patches, dtype) = array.into_parts();
70    if let Some(ref patches) = patches
71        && let Some(chunk_offsets) = patches.chunk_offsets()
72    {
73        // TODO(joe): have into parts.
74        let encoded = encoded.execute::<PrimitiveArray>(ctx)?;
75        let patches_chunk_offsets = chunk_offsets.clone().execute::<PrimitiveArray>(ctx)?;
76        let patches_indices = patches.indices().clone().execute::<PrimitiveArray>(ctx)?;
77        let patches_values = patches.values().clone().execute::<PrimitiveArray>(ctx)?;
78        Ok(decompress_chunked_core(
79            encoded,
80            exponents,
81            &patches_indices,
82            &patches_values,
83            &patches_chunk_offsets,
84            patches,
85            dtype,
86        ))
87    } else {
88        let encoded = encoded.execute::<PrimitiveArray>(ctx)?;
89        decompress_unchunked_core(encoded, exponents, patches, dtype)
90    }
91}
92
93/// Core decompression logic for chunked ALP arrays.
94///
95/// Takes pre-resolved `PrimitiveArray` inputs to avoid duplication between
96/// the `to_primitive` and `execute` paths.
97#[expect(
98    clippy::cognitive_complexity,
99    reason = "complexity is from nested match_each_* macros"
100)]
101fn decompress_chunked_core(
102    encoded: PrimitiveArray,
103    exponents: Exponents,
104    patches_indices: &PrimitiveArray,
105    patches_values: &PrimitiveArray,
106    patches_chunk_offsets: &PrimitiveArray,
107    patches: &Patches,
108    dtype: DType,
109) -> PrimitiveArray {
110    let validity = encoded.validity().clone();
111    let ptype = dtype.as_ptype();
112    let array_len = encoded.len();
113    let offset_within_chunk = patches.offset_within_chunk().unwrap_or(0);
114
115    match_each_alp_float_ptype!(ptype, |T| {
116        let patches_values = patches_values.as_slice::<T>();
117        let mut alp_buffer = encoded.into_buffer_mut();
118        match_each_unsigned_integer_ptype!(patches_chunk_offsets.ptype(), |C| {
119            let patches_chunk_offsets = patches_chunk_offsets.as_slice::<C>();
120
121            match_each_unsigned_integer_ptype!(patches_indices.ptype(), |I| {
122                let patches_indices = patches_indices.as_slice::<I>();
123
124                for chunk_idx in 0..patches_chunk_offsets.len() {
125                    let chunk_range = chunk_range(chunk_idx, patches.offset(), array_len);
126                    let chunk_slice = &mut alp_buffer.as_mut_slice()[chunk_range];
127
128                    <T>::decode_slice_inplace(chunk_slice, exponents);
129
130                    let decoded_chunk: &mut [T] = unsafe { transmute(chunk_slice) };
131                    patch_chunk(
132                        decoded_chunk,
133                        patches_indices,
134                        patches_values,
135                        patches.offset(),
136                        patches_chunk_offsets,
137                        chunk_idx,
138                        offset_within_chunk,
139                    );
140                }
141
142                let decoded_buffer: BufferMut<T> = unsafe { transmute(alp_buffer) };
143                PrimitiveArray::new::<T>(decoded_buffer.freeze(), validity)
144            })
145        })
146    })
147}
148
149/// Core decompression logic for unchunked ALP arrays.
150///
151/// Takes a pre-resolved `PrimitiveArray` to avoid duplication between
152/// the `to_primitive` and `execute` paths.
153fn decompress_unchunked_core(
154    encoded: PrimitiveArray,
155    exponents: Exponents,
156    patches: Option<Patches>,
157    dtype: DType,
158) -> VortexResult<PrimitiveArray> {
159    let validity = encoded.validity().clone();
160    let ptype = dtype.as_ptype();
161
162    let decoded = match_each_alp_float_ptype!(ptype, |T| {
163        let mut alp_buffer = encoded.into_buffer_mut();
164        <T>::decode_slice_inplace(alp_buffer.as_mut_slice(), exponents);
165        let decoded_buffer: BufferMut<T> = unsafe { transmute(alp_buffer) };
166        PrimitiveArray::new::<T>(decoded_buffer.freeze(), validity)
167    });
168
169    if let Some(patches) = patches {
170        decoded.patch(&patches)
171    } else {
172        Ok(decoded)
173    }
174}