stream_vbyte/encode/
mod.rs

1use std::cmp;
2
3use crate::encoded_shape;
4use crate::scalar::Scalar;
5
6#[cfg(feature = "x86_sse41")]
7pub mod sse41;
8
9/// Encode numbers to bytes.
10pub trait Encoder {
11    /// Encode complete quads of input numbers.
12    ///
13    /// `control_bytes` will be exactly as long as the number of complete 4-number quads in `input`.
14    ///
15    /// Control bytes are written to `control_bytes` and encoded numbers to `output`.
16    ///
17    /// Implementations may choose to encode fewer than the full provided input, but any writes done
18    /// must be for full quads.
19    ///
20    /// Implementations must not write to `output` outside of the area that will be populated by
21    /// encoded numbers when all control bytes are processed..
22    ///
23    /// Returns the number of numbers encoded and the number of bytes written to `output`.
24    fn encode_quads(input: &[u32], control_bytes: &mut [u8], output: &mut [u8]) -> (usize, usize);
25}
26
27/// Encode the `input` slice into the `output` slice.
28///
29/// If you don't have specific knowledge of the input that would let you determine the encoded
30/// length ahead of time, make `output` 5x as long as `input`. The worst-case encoded length is 4
31/// bytes per `u32` plus another byte for every 4 `u32`s, including any trailing partial 4-some.
32///
33/// Returns the number of bytes written to the `output` slice.
34pub fn encode<E: Encoder>(input: &[u32], output: &mut [u8]) -> usize {
35    if input.len() == 0 {
36        return 0;
37    }
38
39    let shape = encoded_shape(input.len());
40
41    let (control_bytes, encoded_bytes) = output.split_at_mut(shape.control_bytes_len);
42
43    let (nums_encoded, mut num_bytes_written) = E::encode_quads(
44        &input[..],
45        &mut control_bytes[0..shape.complete_control_bytes_len],
46        &mut encoded_bytes[..],
47    );
48
49    // may be some input left, use Scalar to finish it
50    let control_bytes_written = nums_encoded / 4;
51
52    let (more_nums_encoded, more_bytes_written) = Scalar::encode_quads(
53        &input[nums_encoded..],
54        &mut control_bytes[control_bytes_written..shape.complete_control_bytes_len],
55        &mut encoded_bytes[num_bytes_written..],
56    );
57
58    num_bytes_written += more_bytes_written;
59
60    debug_assert_eq!(
61        shape.complete_control_bytes_len * 4,
62        nums_encoded + more_nums_encoded
63    );
64
65    // last control byte, if there were leftovers
66    if shape.leftover_numbers > 0 {
67        let mut control_byte = 0;
68        let mut nums_encoded = shape.complete_control_bytes_len * 4;
69
70        for i in 0..shape.leftover_numbers {
71            let num = input[nums_encoded];
72            let len = encode_num_scalar(num, &mut encoded_bytes[num_bytes_written..]);
73
74            control_byte |= ((len - 1) as u8) << (i * 2);
75
76            num_bytes_written += len;
77            nums_encoded += 1;
78        }
79        control_bytes[shape.complete_control_bytes_len] = control_byte;
80    }
81
82    control_bytes.len() + num_bytes_written
83}
84
85#[inline]
86pub fn encode_num_scalar(num: u32, output: &mut [u8]) -> usize {
87    // this will calculate 0_u32 as taking 0 bytes, so ensure at least 1 byte
88    let len = cmp::max(1_usize, 4 - num.leading_zeros() as usize / 8);
89    let buf = num.to_le_bytes();
90
91    for i in 0..len {
92        output[i] = buf[i];
93    }
94
95    len
96}
97
98#[cfg(test)]
99mod tests {
100    use super::*;
101
102    #[test]
103    fn encode_num_zero() {
104        let mut buf = [0; 4];
105
106        assert_eq!(1, encode_num_scalar(0, &mut buf));
107        assert_eq!(&[0x00_u8, 0x00_u8, 0x00_u8, 0x00_u8], &buf);
108    }
109
110    #[test]
111    fn encode_num_bottom_two_bytes() {
112        let mut buf = [0; 4];
113
114        assert_eq!(2, encode_num_scalar((1 << 16) - 1, &mut buf));
115        assert_eq!(&[0xFF_u8, 0xFF_u8, 0x00_u8, 0x00_u8], &buf);
116    }
117
118    #[test]
119    fn encode_num_middleish() {
120        let mut buf = [0; 4];
121
122        assert_eq!(3, encode_num_scalar((1 << 16) + 3, &mut buf));
123        assert_eq!(&[0x03_u8, 0x00_u8, 0x01_u8, 0x00_u8], &buf);
124    }
125
126    #[test]
127    fn encode_num_u32_max() {
128        let mut buf = [0; 4];
129
130        assert_eq!(4, encode_num_scalar(u32::MAX, &mut buf));
131        assert_eq!(&[0xFF_u8, 0xFF_u8, 0xFF_u8, 0xFF_u8], &buf);
132    }
133}