vortex_fastlanes/bitpacking/vtable/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use vortex_array::ArrayRef;
5use vortex_array::DeserializeMetadata;
6use vortex_array::ExecutionCtx;
7use vortex_array::ProstMetadata;
8use vortex_array::SerializeMetadata;
9use vortex_array::buffer::BufferHandle;
10use vortex_array::patches::Patches;
11use vortex_array::patches::PatchesMetadata;
12use vortex_array::serde::ArrayChildren;
13use vortex_array::validity::Validity;
14use vortex_array::vtable;
15use vortex_array::vtable::ArrayId;
16use vortex_array::vtable::ArrayVTable;
17use vortex_array::vtable::ArrayVTableExt;
18use vortex_array::vtable::NotSupported;
19use vortex_array::vtable::VTable;
20use vortex_array::vtable::ValidityVTableFromValidityHelper;
21use vortex_dtype::DType;
22use vortex_dtype::PType;
23use vortex_error::VortexError;
24use vortex_error::VortexResult;
25use vortex_error::vortex_bail;
26use vortex_error::vortex_ensure;
27use vortex_error::vortex_err;
28use vortex_vector::Vector;
29use vortex_vector::VectorMutOps;
30
31use crate::BitPackedArray;
32use crate::bitpack_decompress::unpack_to_primitive_vector;
33use crate::bitpacking::vtable::kernels::filter::PARENT_KERNELS;
34
35mod array;
36mod canonical;
37mod encode;
38mod kernels;
39mod operations;
40mod validity;
41mod visitor;
42
43vtable!(BitPacked);
44
45#[derive(Clone, prost::Message)]
46pub struct BitPackedMetadata {
47    #[prost(uint32, tag = "1")]
48    pub(crate) bit_width: u32,
49    #[prost(uint32, tag = "2")]
50    pub(crate) offset: u32, // must be <1024
51    #[prost(message, optional, tag = "3")]
52    pub(crate) patches: Option<PatchesMetadata>,
53}
54
55impl VTable for BitPackedVTable {
56    type Array = BitPackedArray;
57
58    type Metadata = ProstMetadata<BitPackedMetadata>;
59
60    type ArrayVTable = Self;
61    type CanonicalVTable = Self;
62    type OperationsVTable = Self;
63    type ValidityVTable = ValidityVTableFromValidityHelper;
64    type VisitorVTable = Self;
65    type ComputeVTable = NotSupported;
66    type EncodeVTable = Self;
67
68    fn id(&self) -> ArrayId {
69        ArrayId::new_ref("fastlanes.bitpacked")
70    }
71
72    fn encoding(_array: &Self::Array) -> ArrayVTable {
73        BitPackedVTable.as_vtable()
74    }
75
76    fn with_children(array: &mut Self::Array, children: Vec<ArrayRef>) -> VortexResult<()> {
77        // Children: patches (if present): indices, values, chunk_offsets; then validity (if present)
78        let patches_info = array
79            .patches()
80            .map(|p| (p.offset(), p.chunk_offsets().is_some()));
81
82        let mut child_idx = 0;
83        let patches = if let Some((patch_offset, has_chunk_offsets)) = patches_info {
84            let patch_indices = children
85                .get(child_idx)
86                .ok_or_else(|| vortex_err!("Expected patch_indices child at index {}", child_idx))?
87                .clone();
88            child_idx += 1;
89
90            let patch_values = children
91                .get(child_idx)
92                .ok_or_else(|| vortex_err!("Expected patch_values child at index {}", child_idx))?
93                .clone();
94            child_idx += 1;
95
96            let patch_chunk_offsets = if has_chunk_offsets {
97                let offsets = children
98                    .get(child_idx)
99                    .ok_or_else(|| {
100                        vortex_err!("Expected patch_chunk_offsets child at index {}", child_idx)
101                    })?
102                    .clone();
103                child_idx += 1;
104                Some(offsets)
105            } else {
106                None
107            };
108
109            Some(Patches::new(
110                array.len(),
111                patch_offset,
112                patch_indices,
113                patch_values,
114                patch_chunk_offsets,
115            ))
116        } else {
117            None
118        };
119
120        let validity = if child_idx < children.len() {
121            Validity::Array(children[child_idx].clone())
122        } else {
123            Validity::from(array.dtype().nullability())
124        };
125
126        let expected_children = child_idx
127            + if matches!(validity, Validity::Array(_)) {
128                1
129            } else {
130                0
131            };
132        vortex_ensure!(
133            children.len() == expected_children,
134            "Expected {} children, got {}",
135            expected_children,
136            children.len()
137        );
138
139        array.patches = patches;
140        array.validity = validity;
141
142        Ok(())
143    }
144
145    fn metadata(array: &BitPackedArray) -> VortexResult<Self::Metadata> {
146        Ok(ProstMetadata(BitPackedMetadata {
147            bit_width: array.bit_width() as u32,
148            offset: array.offset() as u32,
149            patches: array
150                .patches()
151                .map(|p| p.to_metadata(array.len(), array.dtype()))
152                .transpose()?,
153        }))
154    }
155
156    fn serialize(metadata: Self::Metadata) -> VortexResult<Option<Vec<u8>>> {
157        Ok(Some(metadata.serialize()))
158    }
159
160    fn deserialize(buffer: &[u8]) -> VortexResult<Self::Metadata> {
161        let inner = <ProstMetadata<BitPackedMetadata> as DeserializeMetadata>::deserialize(buffer)?;
162        Ok(ProstMetadata(inner))
163    }
164
165    /// Deserialize a BitPackedArray from its components.
166    ///
167    /// Note that the layout depends on whether patches and chunk_offsets are present:
168    /// - No patches: `[validity?]`
169    /// - With patches: `[patch_indices, patch_values, chunk_offsets?, validity?]`
170    fn build(
171        &self,
172        dtype: &DType,
173        len: usize,
174        metadata: &Self::Metadata,
175        buffers: &[BufferHandle],
176        children: &dyn ArrayChildren,
177    ) -> VortexResult<BitPackedArray> {
178        if buffers.len() != 1 {
179            vortex_bail!("Expected 1 buffer, got {}", buffers.len());
180        }
181        let packed = buffers[0].clone().try_to_bytes()?;
182
183        let load_validity = |child_idx: usize| {
184            if children.len() == child_idx {
185                Ok(Validity::from(dtype.nullability()))
186            } else if children.len() == child_idx + 1 {
187                let validity = children.get(child_idx, &Validity::DTYPE, len)?;
188                Ok(Validity::Array(validity))
189            } else {
190                vortex_bail!(
191                    "Expected {} or {} children, got {}",
192                    child_idx,
193                    child_idx + 1,
194                    children.len()
195                );
196            }
197        };
198
199        let validity_idx = match &metadata.patches {
200            None => 0,
201            Some(patches_meta) if patches_meta.chunk_offsets_dtype().is_some() => 3,
202            Some(_) => 2,
203        };
204
205        let validity = load_validity(validity_idx)?;
206
207        let patches = metadata
208            .patches
209            .map(|p| {
210                let indices = children.get(0, &p.indices_dtype(), p.len())?;
211                let values = children.get(1, dtype, p.len())?;
212                let chunk_offsets = p
213                    .chunk_offsets_dtype()
214                    .map(|dtype| children.get(2, &dtype, p.chunk_offsets_len() as usize))
215                    .transpose()?;
216
217                Ok::<_, VortexError>(Patches::new(
218                    len,
219                    p.offset(),
220                    indices,
221                    values,
222                    chunk_offsets,
223                ))
224            })
225            .transpose()?;
226
227        BitPackedArray::try_new(
228            packed,
229            PType::try_from(dtype)?,
230            validity,
231            patches,
232            u8::try_from(metadata.bit_width).map_err(|_| {
233                vortex_err!(
234                    "BitPackedMetadata bit_width {} does not fit in u8",
235                    metadata.bit_width
236                )
237            })?,
238            len,
239            u16::try_from(metadata.offset).map_err(|_| {
240                vortex_err!(
241                    "BitPackedMetadata offset {} does not fit in u16",
242                    metadata.offset
243                )
244            })?,
245        )
246    }
247
248    fn execute(array: &Self::Array, _ctx: &mut ExecutionCtx) -> VortexResult<Vector> {
249        Ok(unpack_to_primitive_vector(array).freeze().into())
250    }
251
252    fn execute_parent(
253        array: &Self::Array,
254        parent: &ArrayRef,
255        child_idx: usize,
256        ctx: &mut ExecutionCtx,
257    ) -> VortexResult<Option<Vector>> {
258        PARENT_KERNELS.execute(array, parent, child_idx, ctx)
259    }
260}
261
262#[derive(Debug)]
263pub struct BitPackedVTable;