1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
/*!  # Fast Bitpacking algorithms

This crate is a **Rust port of [Daniel Lemire's simdcomp C library](https://github.com/lemire/simdcomp)**.
It contains different flavor of integers compression via bitpacking :  `BitPacker1x`, `BitPacker4x`, and `BitPacker8x`.

Each produces different formats, and are incompatible one with another,
and requires integers to be encoded in block of different size..

`BitPacker4x` and `BitPacker8x` are designed specifically to leverage `SSE3`
and `AVX2` instructions respectively.

The library will fallback to a scalar implementation if these instruction
sets are not available. For instance :
- because your compilation target architecture is not `x86_64`
- because the CPU you use is from an older generation

I recommend using `BitPacker4x` if you are in doubt.

See the [`BitPacker` trait](./trait.BitPacker.html) for example usage.

*/

#![allow(unused_unsafe)]
#![warn(missing_docs)]

#[macro_use]
extern crate crunchy;

use std::marker::Sized;

#[cfg(test)]
#[macro_use]
pub(crate) mod tests;

#[macro_use]
mod macros;


trait Available {
    fn available() -> bool;
}

trait UnsafeBitPacker {
    const BLOCK_LEN: usize;
    unsafe fn compress(decompressed: &[u32], compressed: &mut [u8], num_bits: u8) -> usize;
    unsafe fn compress_sorted(initial: u32, decompressed: &[u32], compressed: &mut [u8], num_bits: u8) -> usize;
    unsafe fn decompress(compressed: &[u8], decompressed: &mut [u32], num_bits: u8) -> usize;
    unsafe fn decompress_sorted(initial: u32, compressed: &[u8], decompressed: &mut [u32], num_bits: u8) -> usize;
    unsafe fn num_bits(decompressed: &[u32]) -> u8;
    unsafe fn num_bits_sorted(initial: u32, decompressed: &[u32]) -> u8;
}


/// # Examples without delta-encoding
/// ```
/// extern crate bitpacking;
///
/// use bitpacking::{BitPacker4x, BitPacker};
///
/// # fn main() {
/// # let my_data: Vec<u32> = vec![7, 7, 7, 7, 11, 10, 15, 13, 6, 5, 3, 14, 5, 7,
/// #    15, 12, 1, 10, 8, 10, 12, 14, 13, 1, 10, 1, 1, 10, 4, 15, 12,
/// #    1, 2, 0, 8, 5, 14, 5, 2, 4, 1, 6, 14, 13, 5, 10, 10, 1, 6, 4,
/// #    1, 12, 1, 1, 5, 15, 15, 2, 8, 6, 4, 3, 10, 8, 8, 9, 2, 6, 10,
/// #    5, 7, 9, 0, 13, 15, 5, 13, 10, 0, 2, 10, 14, 5, 9, 12, 8, 5, 10,
/// #    8, 8, 10, 5, 13, 8, 11, 14, 7, 14, 4, 2, 9, 12, 14, 5, 15, 12, 0,
/// #    12, 13, 3, 13, 5, 4, 15, 9, 8, 9, 3, 3, 3, 1, 12, 0, 6, 11, 11, 12, 4];
///
/// let bitpacker = BitPacker4x::new();
///
/// let num_bits: u8 = bitpacker.num_bits(&my_data);
///
/// // A block will be take at most 4 bytes per-integers.
/// let mut compressed = vec![0u8; 4 * BitPacker4x::BLOCK_LEN];
///
/// # assert_eq!(num_bits, 4);
/// let compressed_len = bitpacker.compress(&my_data, &mut compressed[..], num_bits);
///
/// assert_eq!((num_bits as usize) *  BitPacker4x::BLOCK_LEN / 8, compressed_len);
///
/// // Decompressing
/// let mut decompressed = vec![0u32; BitPacker4x::BLOCK_LEN];
/// bitpacker.decompress(&compressed[..compressed_len], &mut decompressed[..], num_bits);
///
/// assert_eq!(&my_data, &decompressed);
/// # }
/// ```
///
///
///
/// # Examples with delta-encoding
///
/// Delta-encoding makes it possible to store sorted integers in an efficient manner.
/// Rather than encoding the integers directly, the interval (or deltas) between each of them
/// are computed and then encoded.
///
/// Decoding then requires to first decode the deltas and then operate a cumulative sum (also called
/// integration or prefix sum) on them.
///
/// ```
/// extern crate bitpacking;
///
/// use bitpacking::{BitPacker4x, BitPacker};
///
/// # fn main() {
/// # let my_data: Vec<u32> = vec![0, 5, 6, 8, 12, 21, 30, 38,
/// # 46, 52, 59, 61, 62, 62, 71, 71, 73, 74, 76,
/// # 77, 80, 87, 96, 99, 105, 114, 119, 121, 128,
/// # 133, 138, 145, 152, 161, 161, 166, 175, 176,
/// # 180, 186, 189, 193, 202, 211, 220, 224, 229,
/// # 238, 247, 255, 261, 267, 267, 268, 269, 269,
/// # 270, 271, 279, 283, 285, 291, 297, 303, 305,
/// # 309, 310, 315, 316, 316, 321, 324, 329, 337,
/// # 339, 342, 350, 355, 364, 373, 382, 386, 392,
/// # 400, 408, 414, 423, 431, 433, 436, 441, 444,
/// # 445, 454, 463, 463, 465, 472, 474, 477, 480,
/// # 488, 493, 496, 501, 503, 509, 515, 519, 526,
/// # 526, 532, 539, 542, 542, 542, 549, 557, 565,
/// # 566, 573, 578, 580, 581, 585, 588, 588, 591];
///
///
/// // The initial value is used to compute the first delta.
/// // In most use cases, you will be compressing long increasing
/// // integer sequences.
/// //
/// // You should probably pass an initial value of `0u32` to the
/// // first block if you do not have any information.
/// //
/// // When encoding the second block however, you will want to pass the last
/// // value of the first block.
/// let initial_value = 0u32;
///
/// let bitpacker = BitPacker4x::new();
///
/// let num_bits: u8 = bitpacker.num_bits_sorted(initial_value, &my_data);
///
/// // A block will be take at most 4 bytes per-integers.
/// let mut compressed = vec![0u8; 4 * BitPacker4x::BLOCK_LEN];
///
/// # assert_eq!(num_bits, 4);
///
/// let compressed_len = bitpacker.compress_sorted(initial_value, &my_data, &mut compressed[..], num_bits);
///
/// assert_eq!((num_bits as usize) *  BitPacker4x::BLOCK_LEN / 8, compressed_len);
///
/// // Decompressing
/// let mut decompressed = vec![0u32; BitPacker4x::BLOCK_LEN];
///
/// // The initial value must be the same as the one passed
/// // when compressing the block.
/// bitpacker.decompress_sorted(initial_value, &compressed[..compressed_len], &mut decompressed[..], num_bits);
///
/// assert_eq!(&my_data, &decompressed);
/// # }
pub trait BitPacker: Sized {

    /// Number of `u32` per compressed block
    const BLOCK_LEN: usize;

    /// Checks the available instructions set on the current
    /// CPU and returns the best available implementation.
    ///
    /// Calling `.new()` is extremely cheap, and does not
    /// require any heap allocation. It is *not* required to cache
    /// its result too aggressively.
    fn new() -> Self;

    /// Compress a block of `u32`.
    ///
    /// Assumes that the integers are all lower than `2^num_bits`.
    /// The result is undefined if they are larger.
    ///
    /// Returns the amount of bytes of the compressed block.
    ///
    /// # Panics
    ///
    /// - Panics if the compressed destination array is too small
    /// - Panics if `decompressed` length is not exactly the `BLOCK_LEN`.
    fn compress(&self, decompressed: &[u32], compressed: &mut [u8], num_bits: u8) -> usize;

    /// Delta encode and compressed the `decompressed` array.
    ///
    /// Assumes that the elements in the `decompressed` array are sorted.
    /// `initial` will be used to compute the first `delta`.
    ///
    /// # Panics
    ///
    /// - Panics if `initial` is greater than `decompressed[0]`
    /// - Panics if `decompressed` is not sorted
    /// - Panics if `decompressed`'s length is not exactly `BLOCK_LEN`
    /// - Panics if `compressed` is not large enough to receive the compressed data
    /// - Panics if the compressed destination array is too small.
    ///
    /// Returns the amount of bytes of the compressed block.
    ///
    /// # Panics
    ///
    /// - Panics if the compressed array is too short.
    /// - Panics if the decompressed array is not exactly the `BLOCK_LEN`.
    fn compress_sorted(
        &self,
        initial: u32,
        decompressed: &[u32],
        compressed: &mut [u8],
        num_bits: u8,
    ) -> usize;

    /// Decompress the `compress` array to the `decompressed` array.
    ///
    /// Returns the amount of bytes that were consumed.
    ///
    /// # Panics
    ///
    /// Panics if the compressed array is too short, or the decompressed array is too short.
    fn decompress(&self, compressed: &[u8], decompressed: &mut [u32], num_bits: u8) -> usize;

    /// Decompress the`compress`array to the `decompressed` array.
    /// The `compressed` array is assumed to have been delta-encoded and compressed.
    ///
    /// `initial` must be the value that was passed as the `initial` argument compressing
    /// the block.
    ///
    /// Returns the amount of bytes that have been read.
    ///
    /// # Panics
    ///
    /// - Panics if the compressed array is too short to contain `BLOCK_LEN` elements
    /// - Panics if the decompressed array is too short.
    fn decompress_sorted(
        &self,
        initial: u32,
        compressed: &[u8],
        decompressed: &mut [u32],
        num_bits: u8,
    ) -> usize;

    /// Returns the minimum number of bits used to represent the largest integer in the
    /// `decompressed` array.
    fn num_bits(&self, decompressed: &[u32]) -> u8;

    /// Returns the minimum number of bits used to represent the largest `delta` in the deltas in the
    /// `decompressed` array.
    fn num_bits_sorted(&self, initial: u32, decompressed: &[u32]) -> u8;

    /// Returns the size of a compressed block.
    fn compressed_block_size(num_bits: u8) -> usize {
        Self::BLOCK_LEN * (num_bits as usize) / 8
    }
}

/// Returns the most significant bit.&self,
fn most_significant_bit(v: u32) -> u8 {
    if v == 0 {
        0
    } else {
        32u8 - (v.leading_zeros() as u8)
    }
}

mod bitpacker1x;
mod bitpacker4x;
mod bitpacker8x;

pub use bitpacker1x::BitPacker1x;
pub use bitpacker4x::BitPacker4x;
pub use bitpacker8x::BitPacker8x;