vortex_sampling_compressor/compressors/
bitpacked.rs1#![allow(clippy::cast_possible_truncation)]
2use vortex_array::aliases::hash_set::HashSet;
3use vortex_array::array::PrimitiveArray;
4use vortex_array::variants::PrimitiveArrayTrait;
5use vortex_array::{Array, Encoding, EncodingId, IntoArray, IntoArrayVariant};
6use vortex_dtype::match_each_integer_ptype;
7use vortex_error::{vortex_bail, vortex_err, vortex_panic, VortexResult};
8use vortex_fastlanes::{
9 bitpack_unchecked, count_exceptions, find_best_bit_width, find_min_patchless_bit_width,
10 gather_patches, BitPackedArray, BitPackedEncoding,
11};
12
13use crate::compressors::{CompressedArray, CompressionTree, EncodingCompressor};
14use crate::{constants, SamplingCompressor};
15
16pub const BITPACK_WITH_PATCHES: BitPackedCompressor = BitPackedCompressor {
17 allow_patches: true,
18};
19pub const BITPACK_NO_PATCHES: BitPackedCompressor = BitPackedCompressor {
20 allow_patches: false,
21};
22
23#[derive(Debug)]
24pub struct BitPackedCompressor {
25 allow_patches: bool,
26}
27
28impl BitPackedCompressor {
29 fn find_bit_width(&self, array: &PrimitiveArray) -> VortexResult<u8> {
30 if self.allow_patches {
31 find_best_bit_width(array)
32 } else {
33 find_min_patchless_bit_width(array)
34 }
35 }
36}
37
38impl EncodingCompressor for BitPackedCompressor {
39 fn id(&self) -> &str {
40 if self.allow_patches {
41 "fastlanes.bitpacked"
42 } else {
43 "fastlanes.bitpacked_no_patches"
44 }
45 }
46
47 fn cost(&self) -> u8 {
48 if self.allow_patches {
49 constants::BITPACKED_WITH_PATCHES_COST
50 } else {
51 constants::BITPACKED_NO_PATCHES_COST
52 }
53 }
54
55 fn can_compress(&self, array: &Array) -> Option<&dyn EncodingCompressor> {
56 let parray = PrimitiveArray::maybe_from(array)?;
58
59 if !parray.ptype().is_int() {
61 return None;
62 }
63
64 if !parray.ptype().is_unsigned_int() {
66 let has_negative_elements = match_each_integer_ptype!(parray.ptype(), |$P| {
67 parray.statistics().compute_min::<Option<$P>>().unwrap_or_default().unwrap_or_default() < 0
68 });
69
70 if has_negative_elements {
71 return None;
72 }
73 }
74
75 let bit_width = self.find_bit_width(&parray).ok()?;
76
77 if bit_width == parray.ptype().bit_width() as u8 {
79 return None;
80 }
81
82 Some(self)
83 }
84
85 fn compress<'a>(
86 &'a self,
87 array: &Array,
88 _like: Option<CompressionTree<'a>>,
89 ctx: SamplingCompressor<'a>,
90 ) -> VortexResult<CompressedArray<'a>> {
91 let parray = array.clone().into_primitive()?;
92 if !parray.ptype().is_unsigned_int() {
94 let has_negative_elements = match_each_integer_ptype!(parray.ptype(), |$P| {
95 parray.statistics().compute_min::<Option<$P>>().unwrap_or_default().unwrap_or_default() < 0
96 });
97
98 if has_negative_elements {
99 vortex_bail!("Cannot BitPackCompressor::compress an array with negative values");
100 }
101 }
102
103 let bit_width_freq = parray
104 .statistics()
105 .compute_bit_width_freq()
106 .ok_or_else(|| vortex_err!(ComputeError: "missing bit width frequency"))?;
107
108 let bit_width = self.find_bit_width(&parray)?;
109 let num_exceptions = count_exceptions(bit_width, &bit_width_freq);
110 if !self.allow_patches && num_exceptions > 0 {
111 vortex_panic!(
112 "Found {} exceptions with patchless bit width {}",
113 num_exceptions,
114 bit_width
115 )
116 }
117
118 if bit_width == parray.ptype().bit_width() as u8 {
119 return Ok(CompressedArray::uncompressed(array.clone()));
121 }
122
123 let validity = ctx.compress_validity(parray.validity())?;
124 let packed_buffer = unsafe { bitpack_unchecked(&parray, bit_width)? };
126 let patches = (num_exceptions > 0)
127 .then(|| {
128 gather_patches(&parray, bit_width, num_exceptions).map(|p| {
129 ctx.auxiliary("patches")
130 .excluding(&BITPACK_WITH_PATCHES)
131 .including(&BITPACK_NO_PATCHES)
132 .compress_patches(p)
133 })
134 })
135 .flatten()
136 .transpose()?;
137
138 Ok(CompressedArray::compressed(
139 unsafe {
141 BitPackedArray::new_unchecked(
142 packed_buffer,
143 parray.ptype(),
144 validity,
145 patches,
146 bit_width,
147 parray.len(),
148 )?
149 }
150 .into_array(),
151 Some(CompressionTree::new(self, vec![])),
152 array,
153 ))
154 }
155
156 fn used_encodings(&self) -> HashSet<EncodingId> {
157 HashSet::from([BitPackedEncoding::ID])
158 }
159}
160
161#[cfg(test)]
162mod tests {
163 use vortex_array::array::ConstantArray;
164 use vortex_array::IntoArray;
165 use vortex_buffer::buffer;
166
167 use crate::compressors::bitpacked::{BITPACK_NO_PATCHES, BITPACK_WITH_PATCHES};
168 use crate::compressors::EncodingCompressor;
169 use crate::SamplingCompressor;
170
171 #[test]
172 fn cannot_compress() {
173 assert!(BITPACK_NO_PATCHES
175 .can_compress(&buffer![-1i32, 0i32, 1i32].into_array())
176 .is_none());
177
178 assert!(BITPACK_NO_PATCHES
180 .can_compress(&buffer![0f32, 1f32].into_array())
181 .is_none());
182
183 assert!(BITPACK_NO_PATCHES
185 .can_compress(&ConstantArray::new(3u32, 10).into_array())
186 .is_none());
187 }
188
189 #[test]
190 fn can_compress() {
191 assert!(BITPACK_NO_PATCHES
193 .can_compress(&buffer![0u32, 1u32, 2u32].into_array())
194 .is_some());
195
196 assert!(BITPACK_WITH_PATCHES
198 .can_compress(&buffer![0i32, 1i32, 2i32].into_array())
199 .is_some());
200 }
201
202 #[test]
203 fn compress_negatives_fails() {
204 assert!(BITPACK_NO_PATCHES
205 .compress(
206 &buffer![-1i32, 0i32].into_array(),
207 None,
208 SamplingCompressor::default(),
209 )
210 .is_err());
211 }
212}