bitpacking_plus/lib.rs
1/*! # Extra [bitpacking](https://docs.rs/bitpacking/latest/bitpacking/) formats
2
3This crate wraps crate `bitpacking`. It contains variant bitpacking formats, inspired by [BPCells](https://github.com/bnprks/BPCells).
4
5See also this [article](https://bnprks.github.io/BPCells/articles/web-only/bitpacking-format.html)
6
7## Bitpacking format
8
9### The vanilla format
10Same as behaviors in vanilla compression of [bitpacking](https://docs.rs/bitpacking/latest/bitpacking/trait.BitPacker.html#examples-without-delta-encoding).
11
12### `m1` format
13Same as behaviors in vanilla compression of [bitpacking](https://docs.rs/bitpacking/latest/bitpacking/trait.BitPacker.html#examples-without-delta-encoding), but with 1 subtracted from each value prior to compression.
14
15### `d1` format
16Same as behaviors in delta compression of [bitpacking](https://docs.rs/bitpacking/latest/bitpacking/trait.BitPacker.html#examples-with-delta-encoding), which transforms the original input into the difference between consecutive values prior to bitpacking. Therefore, the original input block must be sorted.
17
18### `d1z` format
19Similar to `d1` format but with zigzag encoding applied after difference encoding, where $zigzag(x) = 2x$ if $x > 0$, while $x < 0$, $zigzag(x) = -2x - 1$. This is best for lists of close but not fully sorted runs of integers.
20
21*/
22
23#[cfg(test)]
24#[macro_use]
25pub(crate) mod tests;
26
27pub mod convert;
28pub use crate::convert::*;
29
30use bitpacking::{BitPacker, BitPacker1x, BitPacker4x, BitPacker8x};
31
32/** # Wrappers of the orignial bitpacking trait
33The `panic` rules of `compressed` and `decompressed` array data are exactly the same as `bitpacking` crate. Details can
34be found in [bitpacking::BitPacker](https://docs.rs/bitpacking/latest/bitpacking/trait.BitPacker.html).
35
36## Note:
37* The `num_bits` parameter in `pack*` methods can be set to 0, which means to detect actual `num_bits` by internally call
38[`bitpacker.num_bits()`](https://docs.rs/bitpacking/latest/bitpacking/trait.BitPacker.html#tymethod.num_bits) or
39[`bitpacker.num_bits_sorted()`](https://docs.rs/bitpacking/latest/bitpacking/trait.BitPacker.html#tymethod.num_bits_sorted)
40
41## Examples
42Here are some examples for the variant bitpacking formats (from tests.rs):
43
44```
45use bitpacking_plus::BitPackOps;
46
47use bitpacking::{BitPacker, BitPacker1x, BitPacker4x, BitPacker8x};
48use rand::{thread_rng, Rng};
49
50#[derive(Debug)]
51enum PackMethod {
52 Vanilla,
53 M1,
54 D1,
55 D1Z,
56}
57
58fn test_unpack_helper(
59 bitpacker: &dyn BitPackOps,
60 decompressed: &[u32],
61 compressed: &mut [u8],
62 block_size: usize,
63 pack_method: PackMethod,
64) {
65 // We only test one block.
66 println!("Test method: {:?}\nBlock size: {}", &pack_method, &block_size);
67 let initial = decompressed[0];
68 let n1 = match pack_method {
69 PackMethod::Vanilla => {
70 bitpacker.pack(decompressed.get(0..block_size).unwrap(), compressed, 0)
71 }
72 PackMethod::M1 => {
73 bitpacker.pack_m1(decompressed.get(0..block_size).unwrap(), compressed, 0)
74 }
75 PackMethod::D1 => {
76 bitpacker.pack_d1(decompressed.get(0..block_size).unwrap(), compressed, 0)
77 }
78 PackMethod::D1Z => {
79 bitpacker.pack_d1z(decompressed.get(0..block_size).unwrap(), compressed, 0)
80 }
81 };
82 let num_bits = 8 * n1 / block_size;
83 let mut new_decompressed = [0_u32; 256];
84 let n2 = match pack_method {
85 PackMethod::Vanilla => bitpacker.unpack(compressed, &mut new_decompressed, num_bits as u8),
86 PackMethod::M1 => bitpacker.unpack_m1(compressed, &mut new_decompressed, num_bits as u8),
87 PackMethod::D1 => {
88 bitpacker.unpack_d1(initial, compressed, &mut new_decompressed, num_bits as u8)
89 }
90 PackMethod::D1Z => {
91 bitpacker.unpack_d1z(initial, compressed, &mut new_decompressed, num_bits as u8)
92 }
93 };
94 assert_eq!(n1, n2);
95 assert_eq!(
96 decompressed.get(0..block_size).unwrap(),
97 new_decompressed.get(0..block_size).unwrap()
98 );
99 println!("Bytes used: {}", n1);
100 println!(
101 "Decompresed: {:?}\n",
102 new_decompressed.get(0..block_size).unwrap()
103 );
104}
105
106fn main() {
107 let mut my_data: [u32; 256] = [(); 256].map(|_| thread_rng().gen_range(0..20000));
108 println!("Orignial: {:?}\n", my_data);
109
110 let mut compressed = [0_u8; 8192];
111
112 let bitpacker4 = BitPacker4x::new();
113
114 test_unpack_helper(&bitpacker4, &my_data, &mut compressed, BitPacker4x::BLOCK_LEN, PackMethod::Vanilla);
115 test_unpack_helper(&bitpacker4, &my_data, &mut compressed, BitPacker4x::BLOCK_LEN, PackMethod::M1);
116 test_unpack_helper(&bitpacker4, &my_data, &mut compressed, BitPacker4x::BLOCK_LEN, PackMethod::D1Z);
117
118 // For `(un)pack_d1`, the decompressed data must be sorted.
119 my_data.sort();
120
121 test_unpack_helper(&bitpacker4, &my_data, &mut compressed, BitPacker4x::BLOCK_LEN, PackMethod::D1);
122}
123```
124*/
125pub trait BitPackOps {
126 fn pack(&self, decompressed: &[u32], compressed: &mut [u8], num_bits: u8) -> usize;
127
128 fn unpack(&self, compressed: &[u8], decompressed: &mut [u32], num_bits: u8) -> usize;
129
130 fn pack_m1(&self, decompressed: &[u32], compressed: &mut [u8], num_bits: u8) -> usize;
131
132 fn unpack_m1(&self, compressed: &[u8], decompressed: &mut [u32], num_bits: u8) -> usize;
133
134 fn pack_d1(&self, decompressed: &[u32], compressed: &mut [u8], num_bits: u8) -> usize;
135
136 fn unpack_d1(
137 &self,
138 initial: u32,
139 compressed: &[u8],
140 decompressed: &mut [u32],
141 num_bits: u8,
142 ) -> usize;
143
144 fn pack_d1z(&self, decompressed: &[u32], compressed: &mut [u8], num_bits: u8) -> usize;
145
146 fn unpack_d1z(
147 &self,
148 initial: u32,
149 compressed: &[u8],
150 decompressed: &mut [u32],
151 num_bits: u8,
152 ) -> usize;
153}
154
155macro_rules! impl_bit_pack_ops {
156 ($bitpacker:ident) => {
157 impl BitPackOps for $bitpacker {
158 fn pack(&self, decompressed: &[u32], compressed: &mut [u8], num_bits: u8) -> usize {
159 let num_bits: u8 = match num_bits {
160 0 => self.num_bits(decompressed),
161 _ => num_bits,
162 };
163 self.compress(decompressed, compressed, num_bits)
164 }
165
166 fn unpack(&self, compressed: &[u8], decompressed: &mut [u32], num_bits: u8) -> usize {
167 self.decompress(compressed, decompressed, num_bits)
168 }
169
170 fn pack_m1(&self, decompressed: &[u32], compressed: &mut [u8], num_bits: u8) -> usize {
171 let mut tmp = vec![0_u32; decompressed.len()];
172 vanilla_to_m1(decompressed, tmp.as_mut_slice());
173 self.pack(tmp.as_slice(), compressed, num_bits)
174 }
175
176 fn unpack_m1(
177 &self,
178 compressed: &[u8],
179 decompressed: &mut [u32],
180 num_bits: u8,
181 ) -> usize {
182 let n = self.unpack(compressed, decompressed, num_bits);
183 let n_decompressed = n * 8 / num_bits as usize;
184 m1_to_vanilla_self(decompressed.get_mut(0..n_decompressed).unwrap());
185 return n;
186 }
187
188 fn pack_d1(&self, decompressed: &[u32], compressed: &mut [u8], num_bits: u8) -> usize {
189 let initial = decompressed[0];
190 let num_bits: u8 = match num_bits {
191 0 => self.num_bits_sorted(initial, decompressed),
192 _ => num_bits,
193 };
194 self.compress_sorted(initial, decompressed, compressed, num_bits)
195 }
196
197 fn unpack_d1(
198 &self,
199 initial: u32,
200 compressed: &[u8],
201 decompressed: &mut [u32],
202 num_bits: u8,
203 ) -> usize {
204 self.decompress_sorted(initial, compressed, decompressed, num_bits)
205 }
206
207 fn pack_d1z(&self, decompressed: &[u32], compressed: &mut [u8], num_bits: u8) -> usize {
208 let mut tmp = vec![0_u32; decompressed.len()];
209 vanilla_to_d1z(decompressed, tmp.as_mut_slice());
210 self.pack(tmp.as_slice(), compressed, num_bits)
211 }
212
213 fn unpack_d1z(
214 &self,
215 initial: u32,
216 compressed: &[u8],
217 decompressed: &mut [u32],
218 num_bits: u8,
219 ) -> usize {
220 let n = self.decompress(compressed, decompressed, num_bits);
221 d1z_to_vanilla_self(decompressed, initial);
222 return n;
223 }
224 }
225 };
226}
227
228impl_bit_pack_ops!(BitPacker1x);
229impl_bit_pack_ops!(BitPacker4x);
230impl_bit_pack_ops!(BitPacker8x);