1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
use std::cmp;

use crate::encoded_shape;
use crate::scalar::Scalar;

#[cfg(feature = "x86_sse41")]
pub mod sse41;

/// Encode numbers to bytes.
pub trait Encoder {
    /// Encode complete quads of input numbers.
    ///
    /// `control_bytes` will be exactly as long as the number of complete 4-number quads in `input`.
    ///
    /// Control bytes are written to `control_bytes` and encoded numbers to `output`.
    ///
    /// Implementations may choose to encode fewer than the full provided input, but any writes done
    /// must be for full quads.
    ///
    /// Implementations must not write to `output` outside of the area that will be populated by
    /// encoded numbers when all control bytes are processed..
    ///
    /// Returns the number of numbers encoded and the number of bytes written to `output`.
    fn encode_quads(input: &[u32], control_bytes: &mut [u8], output: &mut [u8]) -> (usize, usize);
}

/// Encode the `input` slice into the `output` slice.
///
/// If you don't have specific knowledge of the input that would let you determine the encoded
/// length ahead of time, make `output` 5x as long as `input`. The worst-case encoded length is 4
/// bytes per `u32` plus another byte for every 4 `u32`s, including any trailing partial 4-some.
///
/// Returns the number of bytes written to the `output` slice.
pub fn encode<E: Encoder>(input: &[u32], output: &mut [u8]) -> usize {
    if input.len() == 0 {
        return 0;
    }

    let shape = encoded_shape(input.len());

    let (control_bytes, encoded_bytes) = output.split_at_mut(shape.control_bytes_len);

    let (nums_encoded, mut num_bytes_written) = E::encode_quads(
        &input[..],
        &mut control_bytes[0..shape.complete_control_bytes_len],
        &mut encoded_bytes[..],
    );

    // may be some input left, use Scalar to finish it
    let control_bytes_written = nums_encoded / 4;

    let (more_nums_encoded, more_bytes_written) = Scalar::encode_quads(
        &input[nums_encoded..],
        &mut control_bytes[control_bytes_written..shape.complete_control_bytes_len],
        &mut encoded_bytes[num_bytes_written..],
    );

    num_bytes_written += more_bytes_written;

    debug_assert_eq!(
        shape.complete_control_bytes_len * 4,
        nums_encoded + more_nums_encoded
    );

    // last control byte, if there were leftovers
    if shape.leftover_numbers > 0 {
        let mut control_byte = 0;
        let mut nums_encoded = shape.complete_control_bytes_len * 4;

        for i in 0..shape.leftover_numbers {
            let num = input[nums_encoded];
            let len = encode_num_scalar(num, &mut encoded_bytes[num_bytes_written..]);

            control_byte |= ((len - 1) as u8) << (i * 2);

            num_bytes_written += len;
            nums_encoded += 1;
        }
        control_bytes[shape.complete_control_bytes_len] = control_byte;
    }

    control_bytes.len() + num_bytes_written
}

#[inline]
pub fn encode_num_scalar(num: u32, output: &mut [u8]) -> usize {
    // this will calculate 0_u32 as taking 0 bytes, so ensure at least 1 byte
    let len = cmp::max(1_usize, 4 - num.leading_zeros() as usize / 8);
    let buf = num.to_le_bytes();

    for i in 0..len {
        output[i] = buf[i];
    }

    len
}

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn encode_num_zero() {
        let mut buf = [0; 4];

        assert_eq!(1, encode_num_scalar(0, &mut buf));
        assert_eq!(&[0x00_u8, 0x00_u8, 0x00_u8, 0x00_u8], &buf);
    }

    #[test]
    fn encode_num_bottom_two_bytes() {
        let mut buf = [0; 4];

        assert_eq!(2, encode_num_scalar((1 << 16) - 1, &mut buf));
        assert_eq!(&[0xFF_u8, 0xFF_u8, 0x00_u8, 0x00_u8], &buf);
    }

    #[test]
    fn encode_num_middleish() {
        let mut buf = [0; 4];

        assert_eq!(3, encode_num_scalar((1 << 16) + 3, &mut buf));
        assert_eq!(&[0x03_u8, 0x00_u8, 0x01_u8, 0x00_u8], &buf);
    }

    #[test]
    fn encode_num_u32_max() {
        let mut buf = [0; 4];

        assert_eq!(4, encode_num_scalar(u32::MAX, &mut buf));
        assert_eq!(&[0xFF_u8, 0xFF_u8, 0xFF_u8, 0xFF_u8], &buf);
    }
}