1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
//! Implementation of the [varu64 format](https://github.com/AljoschaMeyer/varu64-rs) in rust.

use std::{fmt, error};

/// Return how many bytes the encoding of `n` will take up.
pub fn encoding_length(n: u64) -> usize {
    if n < 248 {
        1
    } else if n < 256 {
        2
    } else if n < 65536 {
        3
    } else if n < 16777216 {
        4
    } else if n < 4294967296 {
        5
    } else if n < 1099511627776 {
        6
    } else if n < 281474976710656 {
        7
    } else if n < 72057594037927936 {
        8
    } else {
        9
    }
}

/// Encodes `n` into the output buffer, returning how many bytes have been written.
///
/// # Panics
/// Panics if the buffer is not large enough to hold the encoding.
pub fn encode(n: u64, out: &mut [u8]) -> usize {
    if n < 248 {
        out[0] = n as u8;
        1
    } else if n < 256 {
        out[0] = 248;
        write_bytes(n, 1, &mut out[1..]);
        2
    } else if n < 65536 {
        out[0] = 249;
        write_bytes(n, 2, &mut out[1..]);
        3
    } else if n < 16777216 {
        out[0] = 250;
        write_bytes(n, 3, &mut out[1..]);
        4
    } else if n < 4294967296 {
        out[0] = 251;
        write_bytes(n, 4, &mut out[1..]);
        5
    } else if n < 1099511627776 {
        out[0] = 252;
        write_bytes(n, 5, &mut out[1..]);
        6
    } else if n < 281474976710656 {
        out[0] = 253;
        write_bytes(n, 6, &mut out[1..]);
        7
    } else if n < 72057594037927936 {
        out[0] = 254;
        write_bytes(n, 7, &mut out[1..]);
        8
    } else {
        out[0] = 255;
        write_bytes(n, 8, &mut out[1..]);
        9
    }
}

// Write the k least significant bytes of n into out, in big-endian byteorder, panicking
// if out is too small.
//
// k must be smaller than 8.
fn write_bytes(n: u64, k: usize, out: &mut [u8]) {
    let bytes: [u8; 8] = unsafe { std::mem::transmute(u64::to_be(n)) };
    for i in 0..k {
        out[i] = bytes[(8 - k) + i];
    }
}

/// Decode a `u64` from the `input` buffer, returning the number and how many bytes were read.
///
/// # Errors
/// On error, this also returns how many bytes were read (including the erroneous byte). In case
/// of noncanonical data (encodings that are valid except they are not the smallest possible
/// encoding), the full data is parsed, even if the non-canonicty can be detected early on.
///
/// If there is not enough input data, an `UnexpectedEndOfInput` error is returned, never
/// a `NonCanonical` error (even if the partial input could already be detected to be
/// noncanonical).
pub fn decode(input: &[u8]) -> Result<(u64, usize), (DecodeError, usize)> {
    let first: u8;
    match input.get(0) {
        Some(b) => first = *b,
        None => return Err((UnexpectedEndOfInput, 0)),
    }

    if (first | 0b0000_0111) == 0b1111_1111 {
        // first five bytes are ones, value is 248 or more

        // Total length of the encoded data is 1 byte for the tag plus the value of
        // the three least sgnificant bits incremented by 1.
        let length = (first & 0b0000_0111) as usize + 2;
        let mut out: u64 = 0;

        for i in 1..length {
            out <<= 8;
            match input.get(i) {
                Some(b) => out += *b as u64,
                None => return Err((UnexpectedEndOfInput, i)),
            }
        }

        if length > encoding_length(out) {
            return Err((NonCanonical(out), length));
        } else {
            return Ok((out, length));
        }
    } else {
        // value is less than 248
        return Ok((first as u64, 1));
    }
}

/// Everything that can go wrong when decoding a varu64.
#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub enum DecodeError {
    /// The encoding is not the shortest possible one for the number.
    /// Contains the encoded number.
    NonCanonical(u64),
    /// The slice contained less data than the encoding needs.
    UnexpectedEndOfInput,
}
use DecodeError::*;

impl fmt::Display for DecodeError {
    fn fmt(&self, f: &mut fmt::Formatter) -> std::result::Result<(), fmt::Error> {
        match self {
            NonCanonical(n) => write!(f, "Invalid varu64: NonCanonical encoding of {}", n),
            UnexpectedEndOfInput => write!(f, "Invalid varu64: Not enough bytes"),
        }
    }
}

impl error::Error for DecodeError {}

#[cfg(test)]
mod tests {
    use super::*;

    // Assert that the given u64 encodes to the expected encoding, and that the
    // expected encoding decodes to the u64.
    fn test_fixture(n: u64, exp: &[u8]) {
        let mut foo = [0u8; 9];

        let enc_len = encode(n, &mut foo[..]);
        assert_eq!(&foo[..enc_len], exp);

        let (dec, dec_len) = decode(exp).unwrap();
        assert_eq!(dec, n);
        assert_eq!(dec_len, exp.len());
    }

    #[test]
    fn fixtures() {
        test_fixture(0, &[0]);
        test_fixture(1, &[1]);
        test_fixture(247, &[247]);
        test_fixture(248, &[248, 248]);
        test_fixture(255, &[248, 255]);
        test_fixture(256, &[249, 1, 0]);
        test_fixture(65535, &[249, 255, 255]);
        test_fixture(65536, &[250, 1, 0, 0]);
        test_fixture(72057594037927935, &[254, 255, 255, 255, 255, 255, 255, 255]);
        test_fixture(72057594037927936, &[255, 1, 0, 0, 0, 0, 0, 0, 0]);

        assert_eq!(decode(&[]).unwrap_err(), (UnexpectedEndOfInput, 0));
        assert_eq!(decode(&[248]).unwrap_err(), (UnexpectedEndOfInput, 1));
        assert_eq!(decode(&[255, 0, 1, 2, 3, 4, 5]).unwrap_err(),
                   (UnexpectedEndOfInput, 7));
        assert_eq!(decode(&[255, 0, 1, 2, 3, 4, 5, 6]).unwrap_err(),
                   (UnexpectedEndOfInput, 8));

        assert_eq!(decode(&[248, 42]).unwrap_err(), (NonCanonical(42), 2));
        assert_eq!(decode(&[249, 0, 42]).unwrap_err(), (NonCanonical(42), 3));
    }
}