Skip to main content

vortex_btrblocks/schemes/integer/
bitpacking.rs

1// SPDX-License-Identifier: Apache-2.0
2// SPDX-FileCopyrightText: Copyright the Vortex contributors
3
4//! BitPacking integer encoding.
5
6use vortex_array::ArrayRef;
7use vortex_array::Canonical;
8use vortex_array::ExecutionCtx;
9use vortex_array::IntoArray;
10use vortex_array::arrays::Patched;
11use vortex_array::arrays::patched::use_experimental_patches;
12use vortex_array::arrays::primitive::PrimitiveArrayExt;
13use vortex_compressor::estimate::CompressionEstimate;
14use vortex_compressor::estimate::DeferredEstimate;
15use vortex_compressor::estimate::EstimateVerdict;
16use vortex_error::VortexResult;
17use vortex_fastlanes::BitPacked;
18use vortex_fastlanes::bitpack_compress::bit_width_histogram;
19use vortex_fastlanes::bitpack_compress::bitpack_encode;
20use vortex_fastlanes::bitpack_compress::find_best_bit_width;
21
22use crate::ArrayAndStats;
23use crate::CascadingCompressor;
24use crate::CompressorContext;
25use crate::Scheme;
26use crate::compress_patches;
27
28/// BitPacking encoding for non-negative integers.
29#[derive(Debug, Copy, Clone, PartialEq, Eq)]
30pub struct BitPackingScheme;
31
32impl Scheme for BitPackingScheme {
33    fn scheme_name(&self) -> &'static str {
34        "vortex.int.bitpacking"
35    }
36
37    fn matches(&self, canonical: &Canonical) -> bool {
38        canonical.dtype().is_int()
39    }
40
41    fn expected_compression_ratio(
42        &self,
43        data: &ArrayAndStats,
44        _compress_ctx: CompressorContext,
45        exec_ctx: &mut ExecutionCtx,
46    ) -> CompressionEstimate {
47        let stats = data.integer_stats(exec_ctx);
48
49        // BitPacking only works for non-negative values.
50        if stats.erased().min_is_negative() {
51            return CompressionEstimate::Verdict(EstimateVerdict::Skip);
52        }
53
54        CompressionEstimate::Deferred(DeferredEstimate::Sample)
55    }
56
57    fn compress(
58        &self,
59        _compressor: &CascadingCompressor,
60        data: &ArrayAndStats,
61        _compress_ctx: CompressorContext,
62        exec_ctx: &mut ExecutionCtx,
63    ) -> VortexResult<ArrayRef> {
64        let primitive_array = data.array_as_primitive();
65
66        let histogram = bit_width_histogram(primitive_array, exec_ctx)?;
67        let bw = find_best_bit_width(primitive_array.ptype(), &histogram)?;
68
69        // If best bw is determined to be the current bit-width, return the original array.
70        if bw as usize == primitive_array.ptype().bit_width() {
71            return Ok(primitive_array.array().clone());
72        }
73
74        // Otherwise we can bitpack the array.
75        let primitive_array = primitive_array.into_owned();
76        let packed = bitpack_encode(&primitive_array, bw, Some(&histogram), exec_ctx)?;
77
78        let packed_stats = packed.statistics().to_owned();
79        let ptype = packed.dtype().as_ptype();
80        let mut parts = BitPacked::into_parts(packed);
81
82        let array = if use_experimental_patches() {
83            let patches = parts.patches.take();
84            // Transpose patches into G-ALP style PatchedArray, wrapping an inner BitPackedArray.
85            let array = BitPacked::try_new(
86                parts.packed,
87                ptype,
88                parts.validity,
89                None,
90                parts.bit_width,
91                parts.len,
92                parts.offset,
93            )?
94            .into_array();
95
96            match patches {
97                None => array,
98                Some(p) => Patched::from_array_and_patches(array, &p, exec_ctx)?
99                    .with_stats_set(packed_stats)
100                    .into_array(),
101            }
102        } else {
103            // Compress patches and place back into BitPackedArray.
104            let patches = parts
105                .patches
106                .take()
107                .map(|p| compress_patches(p, exec_ctx))
108                .transpose()?;
109            parts.patches = patches;
110            BitPacked::try_new(
111                parts.packed,
112                ptype,
113                parts.validity,
114                parts.patches,
115                parts.bit_width,
116                parts.len,
117                parts.offset,
118            )?
119            .with_stats_set(packed_stats)
120            .into_array()
121        };
122
123        Ok(array)
124    }
125}