bitpacking_plus/
lib.rs

1/*! # Extra [bitpacking](https://docs.rs/bitpacking/latest/bitpacking/) formats
2
3This crate wraps crate `bitpacking`. It contains variant bitpacking formats, inspired by [BPCells](https://github.com/bnprks/BPCells).
4
5See also this [article](https://bnprks.github.io/BPCells/articles/web-only/bitpacking-format.html)
6
7## Bitpacking format
8
9### The vanilla format
10Same as behaviors in vanilla compression of [bitpacking](https://docs.rs/bitpacking/latest/bitpacking/trait.BitPacker.html#examples-without-delta-encoding).
11
12### `m1` format
13Same as behaviors in vanilla compression of [bitpacking](https://docs.rs/bitpacking/latest/bitpacking/trait.BitPacker.html#examples-without-delta-encoding), but with 1 subtracted from each value prior to compression.
14
15### `d1` format
16Same as behaviors in delta compression of [bitpacking](https://docs.rs/bitpacking/latest/bitpacking/trait.BitPacker.html#examples-with-delta-encoding), which transforms the original input into the difference between consecutive values prior to bitpacking. Therefore, the original input block must be sorted.
17
18### `d1z` format
19Similar to `d1` format but with zigzag encoding applied after difference encoding, where $zigzag(x) = 2x$ if $x > 0$, while $x < 0$, $zigzag(x) = -2x - 1$. This is best for lists of close but not fully sorted runs of integers.
20
21*/
22
23#[cfg(test)]
24#[macro_use]
25pub(crate) mod tests;
26
27pub mod convert;
28pub use crate::convert::*;
29
30use bitpacking::{BitPacker, BitPacker1x, BitPacker4x, BitPacker8x};
31
32/** # Wrappers of the orignial bitpacking trait
33The `panic` rules of `compressed` and `decompressed` array data are exactly the same as `bitpacking` crate. Details can
34be found in [bitpacking::BitPacker](https://docs.rs/bitpacking/latest/bitpacking/trait.BitPacker.html).
35
36## Note:
37* The `num_bits` parameter in `pack*` methods can be set to 0, which means to detect actual `num_bits` by internally call
38[`bitpacker.num_bits()`](https://docs.rs/bitpacking/latest/bitpacking/trait.BitPacker.html#tymethod.num_bits) or
39[`bitpacker.num_bits_sorted()`](https://docs.rs/bitpacking/latest/bitpacking/trait.BitPacker.html#tymethod.num_bits_sorted)
40
41## Examples
42Here are some examples for the variant bitpacking formats (from tests.rs):
43
44```
45use bitpacking_plus::BitPackOps;
46
47use bitpacking::{BitPacker, BitPacker1x, BitPacker4x, BitPacker8x};
48use rand::{thread_rng, Rng};
49
50#[derive(Debug)]
51enum PackMethod {
52    Vanilla,
53    M1,
54    D1,
55    D1Z,
56}
57
58fn test_unpack_helper(
59    bitpacker: &dyn BitPackOps,
60    decompressed: &[u32],
61    compressed: &mut [u8],
62    block_size: usize,
63    pack_method: PackMethod,
64) {
65    // We only test one block.
66    println!("Test method: {:?}\nBlock size: {}", &pack_method, &block_size);
67    let initial = decompressed[0];
68    let n1 = match pack_method {
69        PackMethod::Vanilla => {
70            bitpacker.pack(decompressed.get(0..block_size).unwrap(), compressed, 0)
71        }
72        PackMethod::M1 => {
73            bitpacker.pack_m1(decompressed.get(0..block_size).unwrap(), compressed, 0)
74        }
75        PackMethod::D1 => {
76            bitpacker.pack_d1(decompressed.get(0..block_size).unwrap(), compressed, 0)
77        }
78        PackMethod::D1Z => {
79            bitpacker.pack_d1z(decompressed.get(0..block_size).unwrap(), compressed, 0)
80        }
81    };
82    let num_bits = 8 * n1 / block_size;
83    let mut new_decompressed = [0_u32; 256];
84    let n2 = match pack_method {
85        PackMethod::Vanilla => bitpacker.unpack(compressed, &mut new_decompressed, num_bits as u8),
86        PackMethod::M1 => bitpacker.unpack_m1(compressed, &mut new_decompressed, num_bits as u8),
87        PackMethod::D1 => {
88            bitpacker.unpack_d1(initial, compressed, &mut new_decompressed, num_bits as u8)
89        }
90        PackMethod::D1Z => {
91            bitpacker.unpack_d1z(initial, compressed, &mut new_decompressed, num_bits as u8)
92        }
93    };
94    assert_eq!(n1, n2);
95    assert_eq!(
96        decompressed.get(0..block_size).unwrap(),
97        new_decompressed.get(0..block_size).unwrap()
98    );
99    println!("Bytes used: {}", n1);
100    println!(
101        "Decompresed: {:?}\n",
102        new_decompressed.get(0..block_size).unwrap()
103    );
104}
105
106fn main() {
107    let mut my_data: [u32; 256] = [(); 256].map(|_| thread_rng().gen_range(0..20000));
108    println!("Orignial: {:?}\n", my_data);
109
110    let mut compressed = [0_u8; 8192];
111
112    let bitpacker4 = BitPacker4x::new();
113
114    test_unpack_helper(&bitpacker4, &my_data, &mut compressed, BitPacker4x::BLOCK_LEN, PackMethod::Vanilla);
115    test_unpack_helper(&bitpacker4, &my_data, &mut compressed, BitPacker4x::BLOCK_LEN, PackMethod::M1);
116    test_unpack_helper(&bitpacker4, &my_data, &mut compressed, BitPacker4x::BLOCK_LEN, PackMethod::D1Z);
117
118    // For `(un)pack_d1`, the decompressed data must be sorted.
119    my_data.sort();
120
121    test_unpack_helper(&bitpacker4, &my_data, &mut compressed, BitPacker4x::BLOCK_LEN, PackMethod::D1);
122}
123```
124*/
125pub trait BitPackOps {
126    fn pack(&self, decompressed: &[u32], compressed: &mut [u8], num_bits: u8) -> usize;
127
128    fn unpack(&self, compressed: &[u8], decompressed: &mut [u32], num_bits: u8) -> usize;
129
130    fn pack_m1(&self, decompressed: &[u32], compressed: &mut [u8], num_bits: u8) -> usize;
131
132    fn unpack_m1(&self, compressed: &[u8], decompressed: &mut [u32], num_bits: u8) -> usize;
133
134    fn pack_d1(&self, decompressed: &[u32], compressed: &mut [u8], num_bits: u8) -> usize;
135
136    fn unpack_d1(
137        &self,
138        initial: u32,
139        compressed: &[u8],
140        decompressed: &mut [u32],
141        num_bits: u8,
142    ) -> usize;
143
144    fn pack_d1z(&self, decompressed: &[u32], compressed: &mut [u8], num_bits: u8) -> usize;
145
146    fn unpack_d1z(
147        &self,
148        initial: u32,
149        compressed: &[u8],
150        decompressed: &mut [u32],
151        num_bits: u8,
152    ) -> usize;
153}
154
155macro_rules! impl_bit_pack_ops {
156    ($bitpacker:ident) => {
157        impl BitPackOps for $bitpacker {
158            fn pack(&self, decompressed: &[u32], compressed: &mut [u8], num_bits: u8) -> usize {
159                let num_bits: u8 = match num_bits {
160                    0 => self.num_bits(decompressed),
161                    _ => num_bits,
162                };
163                self.compress(decompressed, compressed, num_bits)
164            }
165
166            fn unpack(&self, compressed: &[u8], decompressed: &mut [u32], num_bits: u8) -> usize {
167                self.decompress(compressed, decompressed, num_bits)
168            }
169
170            fn pack_m1(&self, decompressed: &[u32], compressed: &mut [u8], num_bits: u8) -> usize {
171                let mut tmp = vec![0_u32; decompressed.len()];
172                vanilla_to_m1(decompressed, tmp.as_mut_slice());
173                self.pack(tmp.as_slice(), compressed, num_bits)
174            }
175
176            fn unpack_m1(
177                &self,
178                compressed: &[u8],
179                decompressed: &mut [u32],
180                num_bits: u8,
181            ) -> usize {
182                let n = self.unpack(compressed, decompressed, num_bits);
183                let n_decompressed = n * 8 / num_bits as usize;
184                m1_to_vanilla_self(decompressed.get_mut(0..n_decompressed).unwrap());
185                return n;
186            }
187
188            fn pack_d1(&self, decompressed: &[u32], compressed: &mut [u8], num_bits: u8) -> usize {
189                let initial = decompressed[0];
190                let num_bits: u8 = match num_bits {
191                    0 => self.num_bits_sorted(initial, decompressed),
192                    _ => num_bits,
193                };
194                self.compress_sorted(initial, decompressed, compressed, num_bits)
195            }
196
197            fn unpack_d1(
198                &self,
199                initial: u32,
200                compressed: &[u8],
201                decompressed: &mut [u32],
202                num_bits: u8,
203            ) -> usize {
204                self.decompress_sorted(initial, compressed, decompressed, num_bits)
205            }
206
207            fn pack_d1z(&self, decompressed: &[u32], compressed: &mut [u8], num_bits: u8) -> usize {
208                let mut tmp = vec![0_u32; decompressed.len()];
209                vanilla_to_d1z(decompressed, tmp.as_mut_slice());
210                self.pack(tmp.as_slice(), compressed, num_bits)
211            }
212
213            fn unpack_d1z(
214                &self,
215                initial: u32,
216                compressed: &[u8],
217                decompressed: &mut [u32],
218                num_bits: u8,
219            ) -> usize {
220                let n = self.decompress(compressed, decompressed, num_bits);
221                d1z_to_vanilla_self(decompressed, initial);
222                return n;
223            }
224        }
225    };
226}
227
228impl_bit_pack_ops!(BitPacker1x);
229impl_bit_pack_ops!(BitPacker4x);
230impl_bit_pack_ops!(BitPacker8x);