varu64/
lib.rs

1//! Implementation of the [varu64 format](https://github.com/AljoschaMeyer/varu64-rs) in rust.
2#![cfg_attr(not(feature = "std"), no_std)]
3
4use core::convert::TryFrom;
5use core::num::NonZeroU64;
6use snafu::{OptionExt, Snafu};
7
8#[cfg(feature = "std")]
9use std::io;
10
11#[cfg(feature = "std")]
12pub mod nb;
13
14/// Return how many bytes the encoding of `n` will take up.
15pub fn encoding_length(n: u64) -> usize {
16    if n < 248 {
17        1
18    } else if n < 256 {
19        2
20    } else if n < 65536 {
21        3
22    } else if n < 16777216 {
23        4
24    } else if n < 4294967296 {
25        5
26    } else if n < 1099511627776 {
27        6
28    } else if n < 281474976710656 {
29        7
30    } else if n < 72057594037927936 {
31        8
32    } else {
33        9
34    }
35}
36
37/// Return how many bytes the encoding of `n: NonZeroU64` will take up.
38pub fn encoding_length_non_zero_u64(n: NonZeroU64) -> usize {
39    encoding_length_gt_x(u64::from(n), 0)
40}
41
42fn encoding_length_gt_x(n: u64, x: u64) -> usize {
43    encoding_length(u64::from(n) - 1 - x)
44}
45
46/// Encodes `n: NonZeroU64` into the output buffer, returning how many bytes have been written.
47///
48/// # Panics
49/// Panics if the buffer is not large enough to hold the encoding.
50pub fn encode_non_zero_u64(n: NonZeroU64, out: &mut [u8]) -> usize {
51    encode_gt_x(n.into(), 0, out)
52}
53
54fn encode_gt_x(n: u64, x: u64, out: &mut [u8]) -> usize {
55    encode(n - 1 - x, out)
56}
57
58/// Decode a `NonZeroU64` from the `input` buffer, returning the number and the remaining bytes.
59///
60/// # Errors
61/// On error, this also returns how many bytes were read (including the erroneous byte). In case
62/// of noncanonical data (encodings that are valid except they are not the smallest possible
63/// encoding), the full data is parsed, even if the non-canonicty could be detected early on.
64///
65/// If there is not enough input data, an `UnexpectedEndOfInput` error is returned, never
66/// a `NonCanonical` error (even if the partial input could already be detected to be
67/// noncanonical).
68pub fn decode_non_zero_u64(input: &[u8]) -> Result<(NonZeroU64, &[u8]), (DecodeError, &[u8])> {
69    decode_gt_x(input, 0).and_then(|(n, buff)| match NonZeroU64::try_from(n) {
70        Ok(num) => Ok((num, buff)),
71        Err(_) => Err((DecodeError::ExpectedANonZeroU64, buff)),
72    })
73}
74
75fn decode_gt_x(input: &[u8], x: u64) -> Result<(u64, &[u8]), (DecodeError, &[u8])> {
76    decode(input).and_then(|(n, buff)| {
77        let res = n.checked_add(x + 1)
78            .context(EncodedNonZeroValueOverflowed)
79            .map_err(|err| (err, buff))?;
80        Ok((res, buff))
81    })
82}
83
84/// Encodes `n` into the output buffer, returning how many bytes have been written.
85///
86/// # Panics
87/// Panics if the buffer is not large enough to hold the encoding.
88pub fn encode(n: u64, out: &mut [u8]) -> usize {
89    if n < 248 {
90        out[0] = n as u8;
91        1
92    } else if n < 256 {
93        out[0] = 248;
94        write_bytes(n, 1, &mut out[1..]);
95        2
96    } else if n < 65536 {
97        out[0] = 249;
98        write_bytes(n, 2, &mut out[1..]);
99        3
100    } else if n < 16777216 {
101        out[0] = 250;
102        write_bytes(n, 3, &mut out[1..]);
103        4
104    } else if n < 4294967296 {
105        out[0] = 251;
106        write_bytes(n, 4, &mut out[1..]);
107        5
108    } else if n < 1099511627776 {
109        out[0] = 252;
110        write_bytes(n, 5, &mut out[1..]);
111        6
112    } else if n < 281474976710656 {
113        out[0] = 253;
114        write_bytes(n, 6, &mut out[1..]);
115        7
116    } else if n < 72057594037927936 {
117        out[0] = 254;
118        write_bytes(n, 7, &mut out[1..]);
119        8
120    } else {
121        out[0] = 255;
122        write_bytes(n, 8, &mut out[1..]);
123        9
124    }
125}
126
127/// Encodes `n` into the writer, returning how many bytes have been written.
128#[cfg(feature = "std")]
129pub fn encode_write<W: io::Write>(n: u64, mut w: W) -> Result<usize, io::Error> {
130    let mut tmp = [0u8; 9];
131    let written = encode(n, &mut tmp[..]);
132    w.write_all(&tmp[..written]).map(|_| written)
133}
134
135// Write the k least significant bytes of n into out, in big-endian byteorder, panicking
136// if out is too small.
137//
138// k must be smaller than 8.
139fn write_bytes(n: u64, k: usize, out: &mut [u8]) {
140    let bytes: [u8; 8] = unsafe { core::mem::transmute(u64::to_be(n)) };
141    for i in 0..k {
142        out[i] = bytes[(8 - k) + i];
143    }
144}
145
146/// Decode a `u64` from the `input` buffer, returning the number and the remaining bytes.
147///
148/// # Errors
149/// On error, this also returns how many bytes were read (including the erroneous byte). In case
150/// of noncanonical data (encodings that are valid except they are not the smallest possible
151/// encoding), the full data is parsed, even if the non-canonicty could be detected early on.
152///
153/// If there is not enough input data, an `UnexpectedEndOfInput` error is returned, never
154/// a `NonCanonical` error (even if the partial input could already be detected to be
155/// noncanonical).
156pub fn decode(input: &[u8]) -> Result<(u64, &[u8]), (DecodeError, &[u8])> {
157    let first = input
158        .get(0)
159        .context(UnexpectedEndOfInput)
160        .map_err(|err| (err, input))?;
161
162    if (first | 0b0000_0111) == 0b1111_1111 {
163        // first five bytes are ones, value is 248 or more
164
165        // Total length of the encoded data is 1 byte for the tag plus the value of
166        // the three least sgnificant bits incremented by 1.
167        let length = (first & 0b0000_0111) as usize + 2;
168        let mut out: u64 = 0;
169
170        for i in 1..length {
171            out <<= 8;
172
173            input
174                .get(i)
175                .context(UnexpectedEndOfInput)
176                .map_err(|err| (err, &input[i..]))
177                .map(|b| out += *b as u64)?;
178        }
179
180        if length > encoding_length(out) {
181            return Err((
182                DecodeError::NonCanonical {
183                    encoded_number: out,
184                },
185                &input[length..],
186            ));
187        } else {
188            return Ok((out, &input[length..]));
189        }
190    } else {
191        // value is less than 248
192        return Ok((*first as u64, &input[1..]));
193    }
194}
195
196/// Everything that can go wrong when decoding a varu64.
197#[derive(Debug, Snafu, PartialEq)]
198pub enum DecodeError {
199    /// The encoding is not the shortest possible one for the number.
200    /// Contains the encoded number.
201    NonCanonical { encoded_number: u64 },
202    /// The slice contains less data than the encoding needs.
203    UnexpectedEndOfInput,
204    /// Did not encode a non-zero u64
205    ExpectedANonZeroU64,
206    /// Encoded a value that overflowed when converting from varu64 to non_zero value
207    EncodedNonZeroValueOverflowed,
208}
209
210#[cfg(test)]
211mod tests {
212    use super::*;
213
214    // Assert that the given u64 encodes to the expected encoding, and that the
215    // expected encoding decodes to the u64.
216    fn test_fixture(n: u64, exp: &[u8]) {
217        let mut foo = [0u8; 9];
218
219        let enc_len = encode(n, &mut foo[..]);
220        assert_eq!(&foo[..enc_len], exp);
221
222        let (dec, tail) = decode(exp).unwrap();
223        assert_eq!(dec, n);
224        assert_eq!(tail, &[][..]);
225    }
226
227    #[test]
228    fn fixtures() {
229        test_fixture(0, &[0]);
230        test_fixture(1, &[1]);
231        test_fixture(247, &[247]);
232        test_fixture(248, &[248, 248]);
233        test_fixture(255, &[248, 255]);
234        test_fixture(256, &[249, 1, 0]);
235        test_fixture(65535, &[249, 255, 255]);
236        test_fixture(65536, &[250, 1, 0, 0]);
237        test_fixture(72057594037927935, &[254, 255, 255, 255, 255, 255, 255, 255]);
238        test_fixture(72057594037927936, &[255, 1, 0, 0, 0, 0, 0, 0, 0]);
239
240        assert_eq!(
241            decode(&[]).unwrap_err(),
242            (DecodeError::UnexpectedEndOfInput, &[][..])
243        );
244        assert_eq!(
245            decode(&[248]).unwrap_err(),
246            (DecodeError::UnexpectedEndOfInput, &[][..])
247        );
248        assert_eq!(
249            decode(&[255, 0, 1, 2, 3, 4, 5]).unwrap_err(),
250            (DecodeError::UnexpectedEndOfInput, &[][..])
251        );
252        assert_eq!(
253            decode(&[255, 0, 1, 2, 3, 4, 5, 6]).unwrap_err(),
254            (DecodeError::UnexpectedEndOfInput, &[][..])
255        );
256
257        assert_eq!(
258            decode(&[248, 42]).unwrap_err(),
259            (DecodeError::NonCanonical { encoded_number: 42 }, &[][..])
260        );
261        assert_eq!(
262            decode(&[249, 0, 42]).unwrap_err(),
263            (DecodeError::NonCanonical { encoded_number: 42 }, &[][..])
264        );
265    }
266
267    #[test]
268    fn encode_decode_non_zero_u64() {
269        let expected = NonZeroU64::new(3).unwrap();
270        let mut buffer = [0; 4];
271        let encoded_length = encode_non_zero_u64(expected, &mut buffer);
272
273        let (decoded, _) = decode_non_zero_u64(&buffer[..encoded_length]).unwrap();
274
275        assert_eq!(decoded, expected);
276    }
277    #[test]
278    fn decode_non_zero_u64_no_panic_with_too_large_encoded_value() {
279        let mut buffer = [0; 9];
280        let encoded_length = encode(u64::MAX, &mut buffer);
281
282        let res = decode_non_zero_u64(&buffer[..encoded_length]);
283
284        assert!(res.is_err());
285    }
286
287}