quick_compression/
lib.rs

1mod sorting;
2
3#[cfg(test)]
4mod tests {
5    use super::*;
6
7    #[test]
8    fn it_works() {
9        let cases = [
10            [0u32, 0, 0, 0],
11            [7, 6, 4, 3],
12            [0, 1, 2, 3],
13            [123, 123, 123, 234],
14            [255, 0, 0, 0],
15            [255, 2, 3, 6],
16        ];
17
18        let mut buffer = Vec::new();
19        for c in cases {
20            encode_4(&mut buffer, c);
21            let (rest, result) = decode_4(&buffer);
22            assert_eq!(rest, b"", "no data remains after decoding");
23
24            assert_eq!(result, c, "same numbers before and after decoding");
25            buffer.clear();
26        }
27    }
28}
29
30pub fn decode_4(data: &[u8]) -> (&[u8], [u32; 4]) {
31    let encoded_decoding_info = data[0];
32    // the first 5 bits are the encoded order.
33    let order = encoded_decoding_info & 0b11111;
34    let offset = encoded_decoding_info >> 5;
35    let offset = decode_offset(offset);
36
37    let (rest, mut nums) = group_varint_encoding::decompress_4(&data[1..]);
38
39    // resolve delta encoding
40    // afterwards all numbers should be sorted.
41    nums[0] += offset;
42    for i in [1, 2, 3] {
43        nums[i] += nums[i - 1];
44    }
45
46    // TODO we don't want to apply the order a second time
47    // we want the inverse
48
49    let order = sorting::inverse_encoding(order);
50    let nums = sorting::apply_encoding(nums, order);
51
52    (rest, nums)
53}
54
55pub fn encode_4(buffer: &mut Vec<u8>, ns: [u32; 4]) {
56    use group_varint_encoding::compress_block;
57
58    let (block, order, offset) = delta_encode(ns);
59
60    let encoded_decoding_info = sorting::encode_sorting_order(order) | (offset << 5);
61
62    buffer.push(encoded_decoding_info);
63
64    // let group varint encoding compress the remaining block for us.
65    compress_block(buffer, block);
66}
67
68fn decode_offset(offset: u8) -> u32 {
69    match offset {
70        0b000 => 0x0,
71        0b001 => 0xff,
72        0b010 => 0xfff,
73        0b011 => 0xffff,
74        0b100 => 0xfffff,
75        0b101 => 0xffffff,
76        0b110 => 0xfffffff,
77        0b111 => 0xffffffff,
78        _ => unreachable!("error in implementation, offset: 0b{offset:b}"),
79    }
80}
81
82// returns the best offset for a given number, encoded
83fn best_offset(n: u32) -> u8 {
84    for offset in (1..=0b111).rev() {
85        // check that this offset is not greater than n itself.
86        // and pick the first one for which it applies.
87        // Works because we start checking the big ones.
88        if decode_offset(offset) <= n {
89            return offset;
90        }
91    }
92    0
93}
94
95// returns delta encoded numbers, sorting order and initial offset (pre encoded)
96fn delta_encode(nums: [u32; 4]) -> ([u32; 4], [u8; 4], u8) {
97    // sort numbers
98    let (order, nums) = sorting::sorting_order(nums);
99    // pick lowest number first.
100    let first = nums[0];
101    let offset = best_offset(first);
102    let first_ = first - decode_offset(offset);
103
104    // TODO SIMD here? (and then overwrite first)
105
106    // res are the values subtracted from one another
107    // works fine because sorting order is preserved
108    let mut res = [first_, 0, 0, 0];
109    for i in [1, 2, 3] {
110        // TODO ensure that subtraction is not checked
111        res[i] = nums[i] - nums[i - 1];
112    }
113
114    (res, order, offset)
115}