varint_compression/
lib.rs

1#[cfg(test)]
2mod tests {
3    use super::*;
4
5    macro_rules! single_test {
6        ($n:ident, $i:expr) => {
7            #[test]
8            fn $n() {
9                let i = $i;
10                let c = compress(i);
11                let (d, rest) = decompress(&c).unwrap();
12                assert_eq!(i, d);
13                assert!(rest.is_empty(), "no rest should be remaining.");
14            }
15        };
16    }
17
18    #[test]
19    fn simple1() {
20        let i = 36;
21        let c = compress(i);
22        let (d, rest) = decompress(&c).unwrap();
23        assert_eq!(i, d);
24        assert!(rest.is_empty(), "no rest should be remaining.");
25    }
26
27    single_test!(simple0, 0);
28    single_test!(simple2, 2);
29    single_test!(simple3, 3);
30    single_test!(simple32, 32);
31    single_test!(simple64, 64);
32    single_test!(simple65, 65);
33    single_test!(simple127, 127);
34    single_test!(simple128, 128);
35    single_test!(simple244, 244);
36    single_test!(simple12341234, 12341234);
37
38    #[test]
39    fn test_set() {
40        let ints = [
41            2134123213213u64,
42            2313,
43            3,
44            3213,
45            21321,
46            3213,
47            213,
48            213,
49            5435,
50            5654,
51            6,
52            5437,
53            567,
54            3465241345,
55            677,
56            90,
57            98765,
58            4,
59            324567897654321,
60            3456,
61            7754,
62            32,
63            4567,
64            432,
65            56789654321,
66            4,
67            5678906543,
68            256,
69            7895432,
70            56789654,
71            3256,
72            78543,
73        ];
74
75        for i in ints {
76            let c = compress(i);
77            let (d, rest) = decompress(&c).unwrap();
78
79            assert_eq!(d, i);
80            assert_eq!(rest, Vec::new());
81        }
82
83        // now test if rest is parsed correctly
84
85        for i in ints {
86            let mut c = compress(i);
87            c.push(1);
88            c.push(2);
89            c.push(3);
90            c.push(4);
91            let (d, rest) = decompress(&c).unwrap();
92
93            assert_eq!(d, i);
94            assert_eq!(rest, vec![1, 2, 3, 4]);
95        }
96    }
97
98    #[test]
99    fn list_compression() {
100        let list = (0..100000).map(|n| n * 13).collect::<Vec<u64>>();
101        let c = compress_list(&list);
102        let d = decompress_list(&c).unwrap();
103
104        assert_eq!(d, list);
105    }
106
107    #[test]
108    fn compression_fuzzing() {
109        let list = (0..100000u64).map(|n| n * 13);
110        for elem in list {
111            let c = compress(elem);
112            let (d, rest) = decompress(&c).unwrap();
113
114            assert_eq!(d, elem);
115            assert!(rest.is_empty(), "no data should be remaining.");
116        }
117    }
118}
119
120pub fn compress(mut val: u64) -> Vec<u8> {
121    if val == 0 {
122        return vec![0];
123    }
124
125    let mut v = Vec::new();
126
127    while val > 0 {
128        // take the first 7 bytes of the value
129        let mut byte = (val & 0b111_1111) as u8;
130
131        // decrement value
132        val >>= 7;
133
134        // Set the `follow` byte,
135        // if there remains information to be encoded
136        if val > 0 {
137            byte |= 0b1000_0000;
138        }
139
140        v.push(byte);
141    }
142
143    v.shrink_to_fit();
144    v
145}
146
147pub fn compress_list(vs: &[u64]) -> Vec<u8> {
148    let mut buffer = Vec::new();
149    for v in vs {
150        let c = compress(*v);
151        buffer.extend(c);
152    }
153
154    buffer
155}
156
157/// decompresses a string, returning the rest of the input as second argument.
158/// If an error occured, it means that more data was expected
159pub fn decompress(data: &[u8]) -> Result<(u64, &[u8]), &str> {
160    let mut val = 0u64;
161
162    for i in 0..data.len() {
163        let byte = data[i];
164        let byte_index = i as u64 * 7;
165
166        // update value
167        {
168            // cut of leading byte, if present
169            let byte = (byte & 0b0111_1111) as u64;
170            // decode proper position in value
171            let byte = byte << byte_index;
172            // assign to value
173            val |= byte;
174        }
175
176        // continue?
177        if byte & 0b1000_0000 != 0 {
178            continue;
179        }
180
181        // end of value is reached, return
182
183        let i = i + 1;
184        let rest = &data[i..];
185        return Ok((val, rest));
186    }
187
188    Err("end of input reached")
189}
190
191pub fn decompress_n<const N: usize>(mut data: &[u8]) -> Result<([u64; N], &[u8]), &str> {
192    let mut out = [0; N];
193    for entry in out.iter_mut() {
194        let (val, rest) = decompress(data)?;
195        *entry = val;
196        data = rest;
197    }
198
199    Ok((out, data))
200}
201
202pub fn decompress_list(mut data: &[u8]) -> Result<Vec<u64>, &str> {
203    let mut out = Vec::with_capacity(data.len());
204    while !data.is_empty() {
205        let (val, rest) = decompress(data)?;
206        out.push(val);
207        data = rest;
208    }
209
210    Ok(out)
211}