1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
//! A integer compression library which wraps the c [streamvbyte](https://github.com/lemire/streamvbyte) encoder/decoder
//!
//! Encodes 32 bit integers into variable length byte sequences in `O(n)` time.
//!
//! Input can be at most `u32::MAX` integers.
//!
//! # Examples
//!
//! Encode an u32 slice into a new buf in with [`encode`]:
//!
//! ```
//! use streamvbyte::encode;
//! let out_bytes: Vec<u8> = encode(&[1,2,44,5123,43,534]);
//! ```
//!
//! ...or by using the [`encode_to_buf`] function into an existing buffer:
//!
//! ```
//! use streamvbyte::{max_compressedbytes,encode_to_buf};
//! let input = vec![1,2,44,5123,43,534];
//! let max_bytes = max_compressedbytes(input.len());
//! let mut out_buf = vec![0;max_bytes];
//! let bytes_written = encode_to_buf(&input,&mut out_buf);
//! assert_eq!(bytes_written.unwrap(),10);
//! ```
//!
//! You can use [`encode_delta`] to encode increasing sequences more effectively:
//!
//! ```
//! use streamvbyte::encode_delta;
//! let out_bytes: Vec<u8> = encode_delta(&[1,2,44,64,71,534],0);
//! ```
//!
//! Decoding values works in much the same way:
//!
//! ```
//! use streamvbyte::{decode_delta,encode_delta};
//! let out_bytes: Vec<u8> = encode_delta(&[1,2,44,64,71,534],0);
//! let mut recovered = vec![0;6];
//! let bytes_read = decode_delta(&out_bytes,&mut recovered,0);
//! assert_eq!(out_bytes.len(),bytes_read);
//! assert_eq!(&recovered,&[1,2,44,64,71,534]);
//! ```
//! Note: **length** of the output buf `recovered` needs to match the input length. This information needs to be stored
//! external to compressed output.
//!

use thiserror::Error;

///! Errors that can be emitted from the streamvbyte crate and the underlying -sys crate
#[derive(Error, Debug)]
pub enum StreamVbyteError {
    /// Output buffer might overflow as it is not at least max_compressedbytes long
    #[error("insufficient output buffer len: is {0}, expected {1}")]
    OutbufOverflow(usize, usize),
}

///! Returns the maximum number of bytes required by the compressor to encode `length` u32s
pub fn max_compressedbytes(length: usize) -> usize {
    // number of control bytes:
    let cb = (length + 3) / 4;
    // maximum number of control bytes:
    let db = length * std::mem::size_of::<u32>();
    cb + db
}

/// Encode a sequence of u32 integers into a vbyte encoded byte representation.
/// Internally a buffer of length [`max_compressedbytes`] is allocated to store the compressed result.
///
/// The buffer will be truncated to the correct length before returning.
///
/// # Examples
///
/// ```
/// use streamvbyte::encode;
/// let out_bytes: Vec<u8> = encode(&[1,2,44,5123,43,534]);
/// ```
/// # Return
///
/// Returns the encoded output as a byte buffer
///
pub fn encode(input: &[u32]) -> Vec<u8> {
    let output_bytes_req = max_compressedbytes(input.len()) as usize;
    let mut buf = vec![0; output_bytes_req];
    // SAFETY: unwrap ok as we compute required max bytes beforehand
    let bytes_written = encode_to_buf(input, &mut buf).unwrap();
    buf.truncate(bytes_written);
    buf
}

/// Decode a sequence of u32 integers from a vbyte encoded byte representation into an existing buffer `output`.
///
/// # Arguments
///
/// * `input` - The input sequence of vbyte encoding (u8s)
/// * `output` - The output buf to store the recovered u32 integers. **MUST** be the same size as the original input sequence
///
/// # Examples
///
/// ```
/// use streamvbyte::{max_compressedbytes,encode,decode};
/// let input = vec![1,2,44,5123,43,534];
/// let out_buf = encode(&input);
/// let mut recovered = vec![0;6];
/// let bytes_read = decode(&out_buf,&mut recovered);
/// assert_eq!(bytes_read,out_buf.len());
/// ```
///
/// # Return
///
/// Returns the number of bytes processed from input during decoding
///
pub fn decode(input: &[u8], output: &mut [u32]) -> usize {
    unsafe {
        streamvbyte_sys::streamvbyte_decode(
            input.as_ptr(),
            output.as_mut_ptr(),
            output.len() as u32,
        ) as usize
    }
}

/// Encode a sequence of u32 integers into a vbyte encoded byte representation into an existing buffer `output`.
///
/// Required: output buf is at least [`max_compressedbytes`] long.
///
///
/// # Examples
///
/// ```
/// use streamvbyte::{max_compressedbytes,encode_to_buf};
/// let input = vec![1,2,44,5123,43,534];
/// let max_bytes = max_compressedbytes(input.len());
/// let mut out_buf = vec![0;max_bytes];
/// let bytes_written = encode_to_buf(&input,&mut out_buf);
/// assert_eq!(bytes_written.unwrap(),10);
/// ```
/// # Return
///
/// Returns the number of bytes written to output during encoding
///
pub fn encode_to_buf(input: &[u32], output: &mut [u8]) -> Result<usize, StreamVbyteError> {
    let output_bytes_req = max_compressedbytes(input.len());
    if output.len() < output_bytes_req {
        return Err(StreamVbyteError::OutbufOverflow(
            output.len(),
            output_bytes_req,
        ));
    }
    // SAFETY: output buf is as long as max compressed size
    unsafe {
        Ok(streamvbyte_sys::streamvbyte_encode(
            input.as_ptr(),
            input.len() as u32,
            output.as_mut_ptr(),
        ) as usize)
    }
}

/// Encode a sequence **non decreasing** of u32 integers into a vbyte encoded byte representation.
/// Internally a buffer of length [`max_compressedbytes`] is allocated to store the compressed result.
///
/// The buffer will be truncated to the correct length before returning.
///
/// # Arguments
///
/// * `input` - The input sequence of non decreasing u32 integers
/// * `initial` - The intial value to substract from the all the value in the array to decrease the universe. **MUST** be <= input\[0\]
///
/// # Examples
///
/// ```
/// use streamvbyte::encode_delta;
/// let out_bytes: Vec<u8> = encode_delta(&[1,2,44,5123,43,534],1);
/// ```
/// # Return
///
/// Returns the encoded output as a byte buffer
///
pub fn encode_delta(input: &[u32], intial: u32) -> Vec<u8> {
    let output_bytes_req = max_compressedbytes(input.len());
    let mut buf = vec![0; output_bytes_req];
    // SAFETY: unwrap ok as we compute required max bytes beforehand
    let bytes_written = encode_delta_to_buf(input, &mut buf, intial).unwrap();
    buf.truncate(bytes_written);
    buf
}

/// Encode a sequence **non decreasing** of u32 integers into a vbyte encoded byte representation into an existing buffer `output`.
///
/// # Arguments
///
/// * `input` - The input sequence of non decreasing u32 integers
/// * `output` - The output u8 slice of at least  [`max_compressedbytes`]
/// * `initial` - The intial value to substract from the all the value in the array to decrease the universe. **MUST** be <= input\[0\]
///
/// Required: output buf is at least [`max_compressedbytes`] long.
///
///
/// # Examples
///
/// ```
/// use streamvbyte::{max_compressedbytes,encode_delta_to_buf};
/// let input = vec![1,2,44,54,433,534];
/// let max_bytes = max_compressedbytes(input.len());
/// let mut out_buf = vec![0;max_bytes];
/// let bytes_written = encode_delta_to_buf(&input,&mut out_buf,0);
/// assert_eq!(bytes_written.unwrap(),9);
/// ```
/// # Return
///
/// Returns the number of bytes written to output during encoding
///
pub fn encode_delta_to_buf(
    input: &[u32],
    output: &mut [u8],
    initial: u32,
) -> Result<usize, StreamVbyteError> {
    let output_bytes_req = max_compressedbytes(input.len());
    if output.len() < output_bytes_req {
        return Err(StreamVbyteError::OutbufOverflow(
            output.len(),
            output_bytes_req,
        ));
    }
    unsafe {
        Ok(streamvbyte_sys::streamvbyte_delta_encode(
            input.as_ptr(),
            input.len() as u32,
            output.as_mut_ptr(),
            initial,
        ) as usize)
    }
}

/// Decode a sequence of non decreasing u32 integers from a vbyte encoded byte representation into an existing buffer `output`.
///
/// # Arguments
///
/// * `input` - The input sequence of vbyte encoding (u8s)
/// * `output` - The output buf to store the recovered non decreasing u32 integers. **MUST** be the same size as the original input sequence
/// * `initial` - The intial value thaw was substract from the all the value in the array during encoding.
///
/// # Examples
///
/// ```
/// use streamvbyte::{max_compressedbytes,encode_delta,decode_delta};
/// let input = vec![1,2,44,5123,43,534];
/// let out_buf = encode_delta(&input,1);
/// let mut recovered = vec![0;6];
/// let bytes_read = decode_delta(&out_buf,&mut recovered,1);
/// assert_eq!(bytes_read,out_buf.len());
/// ```
/// # Return
///
/// Returns the number of bytes processed from input during decoding
///
pub fn decode_delta(input: &[u8], output: &mut [u32], initial: u32) -> usize {
    unsafe {
        streamvbyte_sys::streamvbyte_delta_decode(
            input.as_ptr(),
            output.as_mut_ptr(),
            output.len() as u32,
            initial,
        ) as usize
    }
}

#[cfg(test)]
mod tests {

    fn create_input(bits: u32, len: usize) -> Vec<u32> {
        use rand::distributions::{Distribution, Uniform};
        let min = 0;
        let max: u64 = (1 << bits) - 1;
        let between = Uniform::from(min..=max);
        let mut rng = rand::thread_rng();
        let mut vec = Vec::with_capacity(len);
        for _ in 0..len {
            vec.push(between.sample(&mut rng) as u32);
        }
        vec
    }

    #[test]
    fn encode_decode_roundtrip() {
        let len = 10000;
        for bits in 1..=32 {
            for _ in 0..2 {
                let input = create_input(bits, len);
                let output_buf = super::encode(&input);
                let mut recovered: Vec<u32> = vec![0; len];
                let read_bytes = super::decode(&output_buf, &mut recovered);
                assert_eq!(read_bytes, output_buf.len());
                assert_eq!(recovered, input);
            }
        }
    }

    fn create_delta_input(bits: u32, len: usize) -> Vec<u32> {
        use rand::distributions::{Distribution, Uniform};
        let min = 0;
        let max: u64 = (1 << bits) - 1;
        let between = Uniform::from(min..=max);
        let mut rng = rand::thread_rng();
        let mut vec = Vec::with_capacity(len);
        let mut prev: u32 = 0;
        for _ in 0..len {
            let gap = between.sample(&mut rng) as u32;
            let new = prev + gap;
            prev = new;
            vec.push(new);
        }
        vec
    }

    #[test]
    fn encode_decode_delta_roundtrip() {
        let len = 10000;
        for bits in 1..=16 {
            for _ in 0..2 {
                let input = create_delta_input(bits, len);
                let output_buf = super::encode_delta(&input, 0);
                let mut recovered: Vec<u32> = vec![0; len];
                let read_bytes = super::decode_delta(&output_buf, &mut recovered, 0);
                assert_eq!(read_bytes, output_buf.len());
                assert_eq!(recovered, input);
            }
        }
    }
}