vortex_btrblocks/schemes/integer/
bitpacking.rs1use vortex_array::ArrayRef;
7use vortex_array::Canonical;
8use vortex_array::ExecutionCtx;
9use vortex_array::IntoArray;
10use vortex_array::arrays::Patched;
11use vortex_array::arrays::patched::use_experimental_patches;
12use vortex_array::arrays::primitive::PrimitiveArrayExt;
13use vortex_compressor::estimate::CompressionEstimate;
14use vortex_compressor::estimate::DeferredEstimate;
15use vortex_compressor::estimate::EstimateVerdict;
16use vortex_error::VortexResult;
17use vortex_fastlanes::BitPacked;
18use vortex_fastlanes::bitpack_compress::bit_width_histogram;
19use vortex_fastlanes::bitpack_compress::bitpack_encode;
20use vortex_fastlanes::bitpack_compress::find_best_bit_width;
21
22use crate::ArrayAndStats;
23use crate::CascadingCompressor;
24use crate::CompressorContext;
25use crate::Scheme;
26use crate::compress_patches;
27
28#[derive(Debug, Copy, Clone, PartialEq, Eq)]
30pub struct BitPackingScheme;
31
32impl Scheme for BitPackingScheme {
33 fn scheme_name(&self) -> &'static str {
34 "vortex.int.bitpacking"
35 }
36
37 fn matches(&self, canonical: &Canonical) -> bool {
38 canonical.dtype().is_int()
39 }
40
41 fn expected_compression_ratio(
42 &self,
43 data: &ArrayAndStats,
44 _compress_ctx: CompressorContext,
45 exec_ctx: &mut ExecutionCtx,
46 ) -> CompressionEstimate {
47 let stats = data.integer_stats(exec_ctx);
48
49 if stats.erased().min_is_negative() {
51 return CompressionEstimate::Verdict(EstimateVerdict::Skip);
52 }
53
54 CompressionEstimate::Deferred(DeferredEstimate::Sample)
55 }
56
57 fn compress(
58 &self,
59 _compressor: &CascadingCompressor,
60 data: &ArrayAndStats,
61 _compress_ctx: CompressorContext,
62 exec_ctx: &mut ExecutionCtx,
63 ) -> VortexResult<ArrayRef> {
64 let primitive_array = data.array_as_primitive();
65
66 let histogram = bit_width_histogram(primitive_array, exec_ctx)?;
67 let bw = find_best_bit_width(primitive_array.ptype(), &histogram)?;
68
69 if bw as usize == primitive_array.ptype().bit_width() {
71 return Ok(primitive_array.array().clone());
72 }
73
74 let primitive_array = primitive_array.into_owned();
76 let packed = bitpack_encode(&primitive_array, bw, Some(&histogram), exec_ctx)?;
77
78 let packed_stats = packed.statistics().to_owned();
79 let ptype = packed.dtype().as_ptype();
80 let mut parts = BitPacked::into_parts(packed);
81
82 let array = if use_experimental_patches() {
83 let patches = parts.patches.take();
84 let array = BitPacked::try_new(
86 parts.packed,
87 ptype,
88 parts.validity,
89 None,
90 parts.bit_width,
91 parts.len,
92 parts.offset,
93 )?
94 .into_array();
95
96 match patches {
97 None => array,
98 Some(p) => Patched::from_array_and_patches(array, &p, exec_ctx)?
99 .with_stats_set(packed_stats)
100 .into_array(),
101 }
102 } else {
103 let patches = parts
105 .patches
106 .take()
107 .map(|p| compress_patches(p, exec_ctx))
108 .transpose()?;
109 parts.patches = patches;
110 BitPacked::try_new(
111 parts.packed,
112 ptype,
113 parts.validity,
114 parts.patches,
115 parts.bit_width,
116 parts.len,
117 parts.offset,
118 )?
119 .with_stats_set(packed_stats)
120 .into_array()
121 };
122
123 Ok(array)
124 }
125}