1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220
/*! # Fast Bitpacking algorithms This crate is a **Rust port of [Daniel Lemire's simdcomp C library](https://github.com/lemire/simdcomp)**. It contains different implementation of integers compression via bitpacking. Each implementation requires on a different CPU SIMD instruction set, for state of the art performance. This crate is used by [`tantivy`](https://github.com/tantivy-search/tantivy). For instance, with `SSE3`, you can typically expect more than 4 billions ints per seconds. Check the [Benchmark](#benchmark) for more accurate details. # Usage This crate contains different implementation for bitpacking depending on the instruction set available with your processor. **The resulting format are not compatible one with each other.** Currently the following are available : - scalar: implementation not using any SIMD instruction. A block has a size of 32. This implementation is still more performant naive solutions. - SSE3: bitpacking 4 integer at once. (block size of 128). *Requires the sse3 feature to be enabled. This feature is enabled by default.* - AVX: butpacking 8 integers at once. (block size of 256). *Delta integration is comparatively expensive*. Requires to enable the avx feature. I recommend using the SSE3 implementation if you are not sure what you are doing and you are targetting x86_64 CPUs that have been produced after 2006. Make sure to compile with RUSTFLAGS="-C target-cpu=native" # Examples without delta-encoding ``` extern crate bitpacking; use bitpacking::{SSE3BitPacker, BitPacker}; # fn main() { # let my_data = vec![7, 7, 7, 7, 11, 10, 15, 13, 6, 5, 3, 14, 5, 7, # 15, 12, 1, 10, 8, 10, 12, 14, 13, 1, 10, 1, 1, 10, 4, 15, 12, # 1, 2, 0, 8, 5, 14, 5, 2, 4, 1, 6, 14, 13, 5, 10, 10, 1, 6, 4, # 1, 12, 1, 1, 5, 15, 15, 2, 8, 6, 4, 3, 10, 8, 8, 9, 2, 6, 10, # 5, 7, 9, 0, 13, 15, 5, 13, 10, 0, 2, 10, 14, 5, 9, 12, 8, 5, 10, # 8, 8, 10, 5, 13, 8, 11, 14, 7, 14, 4, 2, 9, 12, 14, 5, 15, 12, 0, # 12, 13, 3, 13, 5, 4, 15, 9, 8, 9, 3, 3, 3, 1, 12, 0, 6, 11, 11, 12, 4]; let num_bits: u8 = SSE3BitPacker::num_bits(&my_data); // A block will be take at most 4 bytes per-integers. let mut compressed = vec![0u8; 4 * SSE3BitPacker::BLOCK_LEN]; # assert_eq!(num_bits, 4); let compressed_len = SSE3BitPacker::compress(&my_data, &mut compressed[..], num_bits); assert_eq!((num_bits as usize) * SSE3BitPacker::BLOCK_LEN / 8, compressed_len); // Decompressing let mut decompressed = vec![0u32; SSE3BitPacker::BLOCK_LEN]; SSE3BitPacker::decompress(&compressed[..compressed_len], &mut decompressed[..], num_bits); assert_eq!(&my_data, &decompressed); # } ``` # Examples with delta-encoding ``` extern crate bitpacking; use bitpacking::{SSE3BitPacker, BitPacker}; # fn main() { # let sorted_array = (17..145).collect(); let num_bits: u8 = SSE3BitPacker::num_bits_sorted(16u32, &my_data); // A block will be take at most 4 bytes per-integers. let mut compressed = vec![0u8; 4 * SSE3BitPacker::BLOCK_LEN]; # assert_eq!(num_bits, 4); let compressed_len = SSE3BitPacker::compress_sorted(&sorted_array, &mut compressed[..], num_bits); assert_eq!((num_bits as usize) * SSE3BitPacker::BLOCK_LEN / 8, compressed_len); // Decompressing let mut decompressed = vec![0u32; SSE3BitPacker::BLOCK_LEN]; SSE3BitPacker::decompress(17, &compressed[..compressed_len], &mut decompressed[..], num_bits); assert_eq!(&sorted_array, &decompressed); # } ``` */ #![allow(unused_unsafe)] #![feature(stdsimd)] #![feature(test)] #[macro_use] extern crate crunchy; #[cfg(test)] #[macro_use] pub(crate) mod tests; #[macro_use] mod macros; mod scalar; pub use scalar::ScalarBitPacker; #[cfg(feature = "sse3")] mod sse3; #[cfg(feature = "sse3")] pub use sse3::SSE3BitPacker; #[cfg(feature = "avx2")] mod avx2; #[cfg(feature = "avx2")] pub use avx2::AVX2BitPacker; pub trait BitPacker { /// Integers are compressed in pack of `BLOCK_LEN` `u32`-integers. /// /// `BLOCK_LEN` is required to be a power of 2, greater than 8. /// A high `BLOCK_LEN` may negatively impact the compression rate. /// /// Indeed all integers of a given block will take as many bits as the /// most significant bit of the largest integer. const BLOCK_LEN: usize; /// Type of the register used by the `BitPacker`. type DataType; /// Compress a block of `u32` /// /// Assumes that the integers are all lower than `2^num_bits`. /// The result is undefined if they are larger. /// /// Returns the amount of bytes of the compressed block. /// /// # Panics /// /// Panics if the compressed destination array is assumed to be large enough. /// Panics if `decompressed` length is not exactly the `BLOCK_LEN`. fn compress(decompressed: &[u32], compressed: &mut [u8], num_bits: u8) -> usize; /// Delta encode and compressed the `decompressed` array. /// /// Assumes that the `decompressed` array is sorted. /// The first element will assume the previous element is `initial`. /// /// Returns the amount of bytes of the compressed block. /// /// # Panics /// /// Panics if the compressed array is too short. /// Panics if the decompressed array is not exactly the `BLOCK_LEN`. fn compress_sorted(initial: u32, decompressed: &[u32], compressed: &mut [u8], num_bits: u8) -> usize; /// Decompresses the `compressed` array and streams registers full of `u32` /// to the output functions. fn decompress_to<Output: FnMut(Self::DataType)>(compressed: &[u8], output: Output, num_bits: u8) -> usize; // Decompress the `compress` array to the `decompressed` array. // // Returns the amount of bytes that were consumed. // // # Panics // // Panics if the compressed array is too short, or the decompressed array is too short. fn decompress(compressed: &[u8], decompressed: &mut [u32], num_bits: u8) -> usize; // Decompress the`compress`array to the `decompressed` array. // The `compressed` array is assumed to have been delta-encoded and compressed. // // `initial` contains the previous used to delta-decode the first element. // // Returns the amount of bytes that were consumed. // // # Panics // // Panics if the compressed array is too short, or the decompressed array is too short. fn decompress_sorted(initial: u32, compressed: &[u8], decompressed: &mut [u32], num_bits: u8) -> usize; /// Returns the minimum number of bits used to represent all integers in the /// `decompressed` array. fn num_bits(decompressed: &[u32]) -> u8; /// Returns the minimum number of bits used to represent all the deltas in the /// `decompressed` array. fn num_bits_sorted(initial: u32, decompressed: &[u32]) -> u8; /// Returns the size of a compressed block. fn compressed_block_size(num_bits: u8) -> usize { Self::BLOCK_LEN * ( num_bits as usize) / 8 } } /// Returns the most significant bit. fn most_significant_bit(v: u32) -> u8 { if v == 0 { 0 } else { 32u8 - (v.leading_zeros() as u8) } }