Skip to main content

vortex_fastlanes/bitpacking/vtable/
mod.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4use vortex_array::ArrayRef;
5use vortex_array::DeserializeMetadata;
6use vortex_array::ExecutionCtx;
7use vortex_array::IntoArray;
8use vortex_array::ProstMetadata;
9use vortex_array::SerializeMetadata;
10use vortex_array::buffer::BufferHandle;
11use vortex_array::builders::ArrayBuilder;
12use vortex_array::dtype::DType;
13use vortex_array::dtype::PType;
14use vortex_array::match_each_integer_ptype;
15use vortex_array::patches::Patches;
16use vortex_array::patches::PatchesMetadata;
17use vortex_array::serde::ArrayChildren;
18use vortex_array::validity::Validity;
19use vortex_array::vtable;
20use vortex_array::vtable::ArrayId;
21use vortex_array::vtable::VTable;
22use vortex_array::vtable::ValidityVTableFromValidityHelper;
23use vortex_error::VortexExpect;
24use vortex_error::VortexResult;
25use vortex_error::vortex_bail;
26use vortex_error::vortex_ensure;
27use vortex_error::vortex_err;
28use vortex_session::VortexSession;
29
30use crate::BitPackedArray;
31use crate::bitpack_decompress::unpack_array;
32use crate::bitpack_decompress::unpack_into_primitive_builder;
33use crate::bitpacking::vtable::kernels::PARENT_KERNELS;
34use crate::bitpacking::vtable::rules::RULES;
35mod array;
36mod kernels;
37mod operations;
38mod rules;
39mod validity;
40mod visitor;
41
42vtable!(BitPacked);
43
44#[derive(Clone, prost::Message)]
45pub struct BitPackedMetadata {
46    #[prost(uint32, tag = "1")]
47    pub(crate) bit_width: u32,
48    #[prost(uint32, tag = "2")]
49    pub(crate) offset: u32, // must be <1024
50    #[prost(message, optional, tag = "3")]
51    pub(crate) patches: Option<PatchesMetadata>,
52}
53
54impl VTable for BitPackedVTable {
55    type Array = BitPackedArray;
56
57    type Metadata = ProstMetadata<BitPackedMetadata>;
58
59    type ArrayVTable = Self;
60    type OperationsVTable = Self;
61    type ValidityVTable = ValidityVTableFromValidityHelper;
62    type VisitorVTable = Self;
63
64    fn id(_array: &Self::Array) -> ArrayId {
65        Self::ID
66    }
67
68    fn reduce_parent(
69        array: &Self::Array,
70        parent: &ArrayRef,
71        child_idx: usize,
72    ) -> VortexResult<Option<ArrayRef>> {
73        RULES.evaluate(array, parent, child_idx)
74    }
75
76    fn with_children(array: &mut Self::Array, children: Vec<ArrayRef>) -> VortexResult<()> {
77        // Children: patches (if present): indices, values, chunk_offsets; then validity (if present)
78        let patches_info = array
79            .patches()
80            .map(|p| (p.offset(), p.chunk_offsets().is_some()));
81
82        let mut child_idx = 0;
83        let patches = if let Some((patch_offset, has_chunk_offsets)) = patches_info {
84            let patch_indices = children
85                .get(child_idx)
86                .ok_or_else(|| vortex_err!("Expected patch_indices child at index {}", child_idx))?
87                .clone();
88            child_idx += 1;
89
90            let patch_values = children
91                .get(child_idx)
92                .ok_or_else(|| vortex_err!("Expected patch_values child at index {}", child_idx))?
93                .clone();
94            child_idx += 1;
95
96            let patch_chunk_offsets = if has_chunk_offsets {
97                let offsets = children
98                    .get(child_idx)
99                    .ok_or_else(|| {
100                        vortex_err!("Expected patch_chunk_offsets child at index {}", child_idx)
101                    })?
102                    .clone();
103                child_idx += 1;
104                Some(offsets)
105            } else {
106                None
107            };
108
109            Some(Patches::new(
110                array.len(),
111                patch_offset,
112                patch_indices,
113                patch_values,
114                patch_chunk_offsets,
115            )?)
116        } else {
117            None
118        };
119
120        let validity = if child_idx < children.len() {
121            Validity::Array(children[child_idx].clone())
122        } else {
123            Validity::from(array.dtype().nullability())
124        };
125
126        let expected_children = child_idx
127            + if matches!(validity, Validity::Array(_)) {
128                1
129            } else {
130                0
131            };
132        vortex_ensure!(
133            children.len() == expected_children,
134            "Expected {} children, got {}",
135            expected_children,
136            children.len()
137        );
138
139        array.patches = patches;
140        array.validity = validity;
141
142        Ok(())
143    }
144
145    fn metadata(array: &BitPackedArray) -> VortexResult<Self::Metadata> {
146        Ok(ProstMetadata(BitPackedMetadata {
147            bit_width: array.bit_width() as u32,
148            offset: array.offset() as u32,
149            patches: array
150                .patches()
151                .map(|p| p.to_metadata(array.len(), array.dtype()))
152                .transpose()?,
153        }))
154    }
155
156    fn serialize(metadata: Self::Metadata) -> VortexResult<Option<Vec<u8>>> {
157        Ok(Some(metadata.serialize()))
158    }
159
160    fn deserialize(
161        bytes: &[u8],
162        _dtype: &DType,
163        _len: usize,
164        _buffers: &[BufferHandle],
165        _session: &VortexSession,
166    ) -> VortexResult<Self::Metadata> {
167        let inner = <ProstMetadata<BitPackedMetadata> as DeserializeMetadata>::deserialize(bytes)?;
168        Ok(ProstMetadata(inner))
169    }
170
171    /// Deserialize a BitPackedArray from its components.
172    ///
173    /// Note that the layout depends on whether patches and chunk_offsets are present:
174    /// - No patches: `[validity?]`
175    /// - With patches: `[patch_indices, patch_values, chunk_offsets?, validity?]`
176    fn build(
177        dtype: &DType,
178        len: usize,
179        metadata: &Self::Metadata,
180        buffers: &[BufferHandle],
181        children: &dyn ArrayChildren,
182    ) -> VortexResult<BitPackedArray> {
183        if buffers.len() != 1 {
184            vortex_bail!("Expected 1 buffer, got {}", buffers.len());
185        }
186        let packed = buffers[0].clone();
187
188        let load_validity = |child_idx: usize| {
189            if children.len() == child_idx {
190                Ok(Validity::from(dtype.nullability()))
191            } else if children.len() == child_idx + 1 {
192                let validity = children.get(child_idx, &Validity::DTYPE, len)?;
193                Ok(Validity::Array(validity))
194            } else {
195                vortex_bail!(
196                    "Expected {} or {} children, got {}",
197                    child_idx,
198                    child_idx + 1,
199                    children.len()
200                );
201            }
202        };
203
204        let validity_idx = match &metadata.patches {
205            None => 0,
206            Some(patches_meta) if patches_meta.chunk_offsets_dtype()?.is_some() => 3,
207            Some(_) => 2,
208        };
209
210        let validity = load_validity(validity_idx)?;
211
212        let patches = metadata
213            .patches
214            .map(|p| {
215                let indices = children.get(0, &p.indices_dtype()?, p.len()?)?;
216                let values = children.get(1, dtype, p.len()?)?;
217                let chunk_offsets = p
218                    .chunk_offsets_dtype()?
219                    .map(|dtype| children.get(2, &dtype, p.chunk_offsets_len() as usize))
220                    .transpose()?;
221
222                Patches::new(len, p.offset()?, indices, values, chunk_offsets)
223            })
224            .transpose()?;
225
226        BitPackedArray::try_new(
227            packed,
228            PType::try_from(dtype)?,
229            validity,
230            patches,
231            u8::try_from(metadata.bit_width).map_err(|_| {
232                vortex_err!(
233                    "BitPackedMetadata bit_width {} does not fit in u8",
234                    metadata.bit_width
235                )
236            })?,
237            len,
238            u16::try_from(metadata.offset).map_err(|_| {
239                vortex_err!(
240                    "BitPackedMetadata offset {} does not fit in u16",
241                    metadata.offset
242                )
243            })?,
244        )
245    }
246
247    fn append_to_builder(
248        array: &BitPackedArray,
249        builder: &mut dyn ArrayBuilder,
250        ctx: &mut ExecutionCtx,
251    ) -> VortexResult<()> {
252        match_each_integer_ptype!(array.ptype(), |T| {
253            unpack_into_primitive_builder::<T>(
254                array,
255                builder
256                    .as_any_mut()
257                    .downcast_mut()
258                    .vortex_expect("bit packed array must canonicalize into a primitive array"),
259                ctx,
260            )
261        })
262    }
263
264    fn execute(array: &Self::Array, ctx: &mut ExecutionCtx) -> VortexResult<ArrayRef> {
265        Ok(unpack_array(array, ctx)?.into_array())
266    }
267
268    fn execute_parent(
269        array: &Self::Array,
270        parent: &ArrayRef,
271        child_idx: usize,
272        ctx: &mut ExecutionCtx,
273    ) -> VortexResult<Option<ArrayRef>> {
274        PARENT_KERNELS.execute(array, parent, child_idx, ctx)
275    }
276}
277
278#[derive(Debug)]
279pub struct BitPackedVTable;
280
281impl BitPackedVTable {
282    pub const ID: ArrayId = ArrayId::new_ref("fastlanes.bitpacked");
283}