sigma_ser/
vlq_encode.rs

1use super::zig_zag_encode;
2use std::convert::TryFrom;
3use std::io;
4
5use bitvec::order::Lsb0;
6use bitvec::prelude::BitVec;
7#[cfg(test)]
8use proptest::{num::u64, prelude::*};
9use thiserror::Error;
10
11/// Ways VLQ encoding/decoding might fail
12#[derive(Error, Debug, Clone, Eq, PartialEq)]
13pub enum VlqEncodingError {
14    /// IO fail (EOF, etc.)
15    #[error("IO error: {0}")]
16    Io(String),
17    /// value bounds check error
18    #[error("Bounds check error: {1} for input: {0}")]
19    TryFrom(String, std::num::TryFromIntError),
20    /// Fail to decode a value from bytes
21    #[error("VLQ decoding failed")]
22    VlqDecodingFailed,
23}
24
25impl From<io::Error> for VlqEncodingError {
26    fn from(error: io::Error) -> Self {
27        VlqEncodingError::Io(error.to_string())
28    }
29}
30
31/// Write encoded unsigned values using VLQ and signed values first with ZigZag, then using VLQ
32/// for VLQ see <https://en.wikipedia.org/wiki/Variable-length_quantity> (GLE)
33/// for ZigZag see <https://developers.google.com/protocol-buffers/docs/encoding#types>
34pub trait WriteSigmaVlqExt: io::Write {
35    /// Write i8 without encoding
36    fn put_i8(&mut self, v: i8) -> io::Result<()> {
37        Self::put_u8(self, v as u8)
38    }
39
40    /// Write u8 without encoding
41    fn put_u8(&mut self, v: u8) -> io::Result<()> {
42        self.write_all(&[v])
43    }
44
45    /// Encode using ZigZag and then VLQ.
46    fn put_i16(&mut self, v: i16) -> io::Result<()> {
47        Self::put_u32(self, zig_zag_encode::encode_i32(v as i32) as u32)
48    }
49
50    /// Encode using VLQ.
51    fn put_u16(&mut self, v: u16) -> io::Result<()> {
52        Self::put_u64(self, v as u64)
53    }
54
55    /// Cast to u16 (panics if out of range) and encode using VLQ
56    fn put_usize_as_u16_unwrapped(&mut self, v: usize) -> io::Result<()> {
57        #[allow(clippy::unwrap_used)]
58        Self::put_u16(self, u16::try_from(v).unwrap())
59    }
60
61    /// Cast to u32 (panics if out of range) and encode using VLQ
62    fn put_usize_as_u32_unwrapped(&mut self, v: usize) -> io::Result<()> {
63        #[allow(clippy::unwrap_used)]
64        Self::put_u32(self, u32::try_from(v).unwrap())
65    }
66
67    /// Encode using ZigZag and then VLQ.
68    fn put_i32(&mut self, v: i32) -> io::Result<()> {
69        Self::put_u64(self, zig_zag_encode::encode_i32(v))
70    }
71
72    /// Encode using VLQ.
73    fn put_u32(&mut self, v: u32) -> io::Result<()> {
74        Self::put_u64(self, v as u64)
75    }
76
77    /// Write bytes of v directly (big-endian format)
78    fn put_u32_be_bytes(&mut self, v: u32) -> io::Result<()> {
79        self.write_all(&v.to_be_bytes())?;
80        Ok(())
81    }
82
83    /// Encode using ZigZag and then VLQ.
84    fn put_i64(&mut self, v: i64) -> io::Result<()> {
85        Self::put_u64(self, zig_zag_encode::encode_i64(v))
86    }
87
88    /// Encode using VLQ.
89    fn put_u64(&mut self, v: u64) -> io::Result<()> {
90        let mut buffer: [u8; 10] = [0; 10];
91        let mut position = 0;
92        let mut value = v;
93        // from https://github.com/ScorexFoundation/scorex-util/blob/3dc334f68ebefbfab6d33b57f2373e80245ab34d/src/main/scala/scorex/util/serialization/VLQWriter.scala#L97-L117
94        // original source: http://github.com/google/protobuf/blob/a7252bf42df8f0841cf3a0c85fdbf1a5172adecb/java/core/src/main/java/com/google/protobuf/CodedOutputStream.java#L1387
95        loop {
96            if (value & !0x7F) == 0 {
97                buffer[position] = value as u8;
98                position += 1;
99                break;
100            } else {
101                buffer[position] = (((value as i32) & 0x7F) | 0x80) as u8;
102                position += 1;
103                value >>= 7;
104            };
105        }
106        self.write_all(&buffer[..position])
107    }
108
109    /// Encode bool array as bit vector, filling trailing bits with `false`
110    fn put_bits(&mut self, bools: &[bool]) -> io::Result<()> {
111        let mut bits = BitVec::<u8, Lsb0>::new();
112        for b in bools {
113            bits.push(*b);
114        }
115        for c in bits.as_bitslice().domain() {
116            self.put_u8(c)?;
117        }
118        Ok(())
119    }
120
121    /// Put the two bytes of the big-endian representation of the i16 value into the writer.
122    fn put_i16_be_bytes(&mut self, v: i16) -> io::Result<()> {
123        self.write_all(v.to_be_bytes().as_ref())
124    }
125
126    /// Put a short string (< 256 bytes) into the writer. Writes length (as u8) and string bytes to the writer
127    fn put_short_string(&mut self, s: &str) -> io::Result<()> {
128        if s.len() > 255 {
129            return Err(io::Error::new(
130                io::ErrorKind::Unsupported,
131                "Serializing strings with more than 255 bytes is not allowed",
132            ));
133        }
134        self.put_u8(s.len() as u8)?;
135        self.write_all(s.as_bytes())?;
136        Ok(())
137    }
138
139    /// Encode an optional value
140    fn put_option<T>(
141        &mut self,
142        opt: Option<T>,
143        put_value: &dyn Fn(&mut Self, T) -> io::Result<()>,
144    ) -> io::Result<()> {
145        match opt {
146            Some(s) => {
147                self.put_u8(1)?;
148                put_value(self, s)?;
149            }
150            None => self.put_u8(0)?,
151        }
152        Ok(())
153    }
154}
155
156/// Mark all types implementing `Write` as implementing the extension.
157impl<W: io::Write + ?Sized> WriteSigmaVlqExt for W {}
158
159/// Read and decode values using VLQ (+ ZigZag for signed values) encoded and written with [`WriteSigmaVlqExt`]
160/// for VLQ see <https://en.wikipedia.org/wiki/Variable-length_quantity> (GLE)
161/// for ZigZag see <https://developers.google.com/protocol-buffers/docs/encoding#types>
162pub trait ReadSigmaVlqExt: io::Read {
163    /// Read i8 without decoding
164    fn get_i8(&mut self) -> Result<i8, io::Error> {
165        Self::get_u8(self).map(|v| v as i8)
166    }
167
168    /// Read u8 without decoding
169    fn get_u8(&mut self) -> std::result::Result<u8, io::Error> {
170        let mut slice = [0u8; 1];
171        self.read_exact(&mut slice)?;
172        Ok(slice[0])
173    }
174
175    /// Read and decode using VLQ and ZigZag value written with [`WriteSigmaVlqExt::put_i16`]
176    fn get_i16(&mut self) -> Result<i16, VlqEncodingError> {
177        Self::get_u64(self).and_then(|v| {
178            let vd = zig_zag_encode::decode_u32(v);
179            i16::try_from(vd).map_err(|err| VlqEncodingError::TryFrom(vd.to_string(), err))
180        })
181    }
182
183    /// Read and decode using VLQ value written with [`WriteSigmaVlqExt::put_u16`]
184    fn get_u16(&mut self) -> Result<u16, VlqEncodingError> {
185        Self::get_u64(self).and_then(|v| {
186            u16::try_from(v).map_err(|err| VlqEncodingError::TryFrom(v.to_string(), err))
187        })
188    }
189
190    /// Read and decode using VLQ and ZigZag value written with [`WriteSigmaVlqExt::put_i32`]
191    fn get_i32(&mut self) -> Result<i32, VlqEncodingError> {
192        Self::get_u64(self).map(zig_zag_encode::decode_u32)
193    }
194
195    /// Read and decode using VLQ value written with [`WriteSigmaVlqExt::put_u32`]
196    fn get_u32(&mut self) -> Result<u32, VlqEncodingError> {
197        Self::get_u64(self).and_then(|v| {
198            u32::try_from(v).map_err(|err| VlqEncodingError::TryFrom(v.to_string(), err))
199        })
200    }
201
202    /// Read and decode using VLQ and ZigZag value written with [`WriteSigmaVlqExt::put_i64`]
203    fn get_i64(&mut self) -> Result<i64, VlqEncodingError> {
204        Self::get_u64(self).map(zig_zag_encode::decode_u64)
205    }
206
207    /// Read and decode using VLQ value written with [`WriteSigmaVlqExt::put_u64`]
208    fn get_u64(&mut self) -> Result<u64, VlqEncodingError> {
209        // source: http://github.com/google/protobuf/blob/a7252bf42df8f0841cf3a0c85fdbf1a5172adecb/java/core/src/main/java/com/google/protobuf/CodedInputStream.java#L2653
210        // for faster version see: http://github.com/google/protobuf/blob/a7252bf42df8f0841cf3a0c85fdbf1a5172adecb/java/core/src/main/java/com/google/protobuf/CodedInputStream.java#L1085
211        let mut result: i64 = 0;
212        let mut shift = 0;
213        while shift < 64 {
214            let b = self.get_u8()?;
215            result |= ((b & 0x7F) as i64) << shift;
216            if (b & 0x80) == 0 {
217                return Ok(result as u64);
218            }
219            shift += 7;
220        }
221        Err(VlqEncodingError::VlqDecodingFailed)
222    }
223
224    /// Read a vector of bits with the given size
225    fn get_bits(&mut self, size: usize) -> Result<Vec<bool>, VlqEncodingError> {
226        let byte_num = (size + 7) / 8;
227        let mut buf = vec![0u8; byte_num];
228        self.read_exact(&mut buf)?;
229        // May fail if number of bits in buf is larger that maximum value of usize
230        let mut bits = BitVec::<u8, Lsb0>::from_vec(buf);
231        bits.truncate(size);
232        Ok(bits.iter().map(|x| *x).collect::<Vec<bool>>())
233    }
234
235    /// Reads a string from the reader. Reads a byte (size), and the string
236    fn get_short_string(&mut self) -> Result<String, VlqEncodingError> {
237        let size_bytes = self.get_u8()?;
238        let mut bytes = vec![0u8; size_bytes as usize];
239        self.read_exact(&mut bytes)?;
240        let string = String::from_utf8(bytes).map_err(|_| VlqEncodingError::VlqDecodingFailed)?;
241        Ok(string)
242    }
243
244    /// Read and decode an optional value using supplied function
245    fn get_option<T>(
246        &mut self,
247        get_value: &dyn Fn(&mut Self) -> Result<T, VlqEncodingError>,
248    ) -> Option<T> {
249        let is_opt = self.get_u8().ok()?;
250        match is_opt {
251            1 => Some(get_value(self).ok()?),
252            // Should only ever be 0 or 1
253            _ => None,
254        }
255    }
256}
257
258/// Mark all types implementing `Read` as implementing the extension.
259impl<R: io::Read + ?Sized> ReadSigmaVlqExt for R {}
260
261#[allow(clippy::unwrap_used)]
262#[cfg(test)]
263#[allow(clippy::panic)]
264mod tests {
265    // See corresponding test suite in
266    // https://github.com/ScorexFoundation/scorex-util/blob/9adb6c68b8a1c00ec17730e6da11c2976a892ad8/src/test/scala/scorex/util/serialization/VLQReaderWriterSpecification.scala#L11
267    use super::*;
268    use proptest::collection;
269    use std::io::Cursor;
270    use std::io::Read;
271    use std::io::Write;
272
273    extern crate derive_more;
274    use derive_more::From;
275
276    #[derive(Debug, From, Clone, PartialEq)]
277    enum Val {
278        I8(i8),
279        U8(u8),
280        I16(i16),
281        U16(u16),
282        I32(i32),
283        U32(u32),
284        I64(i64),
285        U64(u64),
286        Bytes(Vec<u8>),
287        Bits(Vec<bool>),
288    }
289
290    impl Arbitrary for Val {
291        type Strategy = BoxedStrategy<Self>;
292        type Parameters = ();
293
294        fn arbitrary_with(_args: Self::Parameters) -> Self::Strategy {
295            prop_oneof![
296                any::<i8>().prop_map_into(),
297                any::<u8>().prop_map_into(),
298                any::<i16>().prop_map_into(),
299                any::<u16>().prop_map_into(),
300                any::<i32>().prop_map_into(),
301                any::<u32>().prop_map_into(),
302                any::<i64>().prop_map_into(),
303                any::<u64>().prop_map_into(),
304                any::<Vec<u8>>().prop_map_into(),
305                any::<Vec<bool>>().prop_map_into(),
306            ]
307            .boxed()
308        }
309    }
310
311    fn bytes_u64(v: u64) -> Vec<u8> {
312        let mut w = Cursor::new(vec![]);
313        w.put_u64(v).unwrap();
314        w.into_inner()
315    }
316
317    fn bytes_i64(v: i64) -> Vec<u8> {
318        let mut w = Cursor::new(vec![]);
319        w.put_i64(v).unwrap();
320        w.into_inner()
321    }
322
323    fn bytes_u32(v: u32) -> Vec<u8> {
324        let mut w = Cursor::new(vec![]);
325        w.put_u32(v).unwrap();
326        w.into_inner()
327    }
328
329    fn bytes_i32(v: i32) -> Vec<u8> {
330        let mut w = Cursor::new(vec![]);
331        w.put_i32(v).unwrap();
332        w.into_inner()
333    }
334    fn bytes_u16(v: u16) -> Vec<u8> {
335        let mut w = Cursor::new(vec![]);
336        w.put_u16(v).unwrap();
337        w.into_inner()
338    }
339
340    fn bytes_i16(v: i16) -> Vec<u8> {
341        let mut w = Cursor::new(vec![]);
342        w.put_i16(v).unwrap();
343        w.into_inner()
344    }
345
346    #[test]
347    fn test_write_u8() {
348        let mut w = Cursor::new(vec![]);
349        w.put_u8(0).unwrap();
350        w.put_u8(1).unwrap();
351        w.put_u8(255).unwrap();
352
353        assert_eq!(w.into_inner(), vec![0, 1, 255])
354    }
355
356    #[test]
357    fn test_read_u8() {
358        let mut r = Cursor::new(vec![0, 1, 255]);
359        assert_eq!(r.get_u8().unwrap(), 0);
360        assert_eq!(r.get_u8().unwrap(), 1);
361        assert_eq!(r.get_u8().unwrap(), 255);
362    }
363
364    // from https://github.com/ScorexFoundation/scorex-util/blob/3dc334f68ebefbfab6d33b57f2373e80245ab34d/src/test/scala/scorex/util/serialization/VLQReaderWriterSpecification.scala#L32-L32
365    // original source: http://github.com/google/protobuf/blob/a7252bf42df8f0841cf3a0c85fdbf1a5172adecb/java/core/src/test/java/com/google/protobuf/CodedInputStreamTest.java#L239
366    #[allow(clippy::identity_op)]
367    fn expected_values() -> Vec<(Vec<u8>, u64)> {
368        vec![
369            (vec![0x00], 0),
370            (vec![0x01], 1),
371            (vec![0x7f], 127),
372            // 14882
373            (vec![0xa2, 0x74], (0x22 << 0) | (0x74 << 7)),
374            // 2961488830
375            (
376                vec![0xbe, 0xf7, 0x92, 0x84, 0x0b],
377                (0x3e << 0) | (0x77 << 7) | (0x12 << 14) | (0x04 << 21) | (0x0b << 28),
378            ),
379            // 64-bit
380            // 7256456126
381            (
382                vec![0xbe, 0xf7, 0x92, 0x84, 0x1b],
383                (0x3e << 0) | (0x77 << 7) | (0x12 << 14) | (0x04 << 21) | (0x1b << 28),
384            ),
385            // 41256202580718336
386            (
387                vec![0x80, 0xe6, 0xeb, 0x9c, 0xc3, 0xc9, 0xa4, 0x49],
388                (0x00 << 0)
389                    | (0x66 << 7)
390                    | (0x6b << 14)
391                    | (0x1c << 21)
392                    | (0x43 << 28)
393                    | (0x49 << 35)
394                    | (0x24 << 42)
395                    | (0x49 << 49),
396            ),
397            // 11964378330978735131 (-6482365742730816485)
398            (
399                vec![0x9b, 0xa8, 0xf9, 0xc2, 0xbb, 0xd6, 0x80, 0x85, 0xa6, 0x01],
400                (0x1b << 0)
401                    | (0x28 << 7)
402                    | (0x79 << 14)
403                    | (0x42 << 21)
404                    | (0x3b << 28)
405                    | (0x56 << 35)
406                    | (0x00 << 42)
407                    | (0x05 << 49)
408                    | (0x26 << 56)
409                    | (0x01 << 63),
410            ),
411        ]
412    }
413
414    #[test]
415    fn test_write_u64_expected_values() {
416        for pair in expected_values() {
417            let (bytes, value) = pair;
418            let mut w = Cursor::new(vec![]);
419            w.put_u64(value).unwrap();
420            assert_eq!(w.into_inner(), bytes)
421        }
422    }
423
424    #[test]
425    fn test_read_u64_expected_values() {
426        for pair in expected_values() {
427            let (bytes, value) = pair;
428            let mut r = Cursor::new(bytes);
429            let decoded_value = r.get_u64().unwrap();
430            assert_eq!(decoded_value, value)
431        }
432    }
433
434    #[test]
435    fn test_i32_ten_bytes_case() {
436        let input = 1234567890i32;
437        let mut w = Cursor::new(vec![]);
438        w.put_i32(input).unwrap();
439        let bytes = w.into_inner();
440        assert_eq!(bytes.len(), 10);
441        // 164, 139, 176, 153, 9,
442        let mut r = Cursor::new(bytes);
443        let decoded_value = r.get_i32().unwrap();
444        assert_eq!(decoded_value, input);
445    }
446
447    #[test]
448    fn malformed_input() {
449        // source: http://github.com/google/protobuf/blob/a7252bf42df8f0841cf3a0c85fdbf1a5172adecb/java/core/src/test/java/com/google/protobuf/CodedInputStreamTest.java#L281
450        assert!(Cursor::new([0x80]).get_u64().is_err());
451        assert!(
452            Cursor::new([0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x00])
453                .get_u64()
454                .is_err()
455        );
456    }
457
458    #[test]
459    fn i16_corner_cases() {
460        fn roundtrip(v: i16, expected_bytes: &[u8]) {
461            let mut w = Cursor::new(vec![]);
462            w.put_i16(v).unwrap();
463            let bytes = w.into_inner();
464            assert_eq!(bytes, expected_bytes);
465            let mut r = Cursor::new(expected_bytes);
466            let decoded_value = r.get_i16().unwrap();
467            assert_eq!(decoded_value, v);
468        }
469
470        roundtrip(i16::MIN, &[0xFF, 0xFF, 0x03]);
471        roundtrip(-8194, &[0x83, 0x80, 0x01]);
472        roundtrip(-8193, &[0x81, 0x80, 0x01]);
473        roundtrip(-8192, &[0xFF, 0x7F]);
474        roundtrip(-8191, &[0xFD, 0x7F]);
475        roundtrip(-66, &[0x83, 0x01]);
476        assert_eq!(Cursor::new([0x83, 0x00]).get_i16().unwrap(), -2);
477        roundtrip(-65, &[0x81, 0x01]);
478        assert_eq!(Cursor::new([0x81, 0x00]).get_i16().unwrap(), -1);
479        roundtrip(-64, &[0x7F]);
480        roundtrip(-63, &[0x7D]);
481        roundtrip(-1, &[0x01]);
482        roundtrip(0, &[0]);
483        roundtrip(1, &[0x02]);
484        roundtrip(62, &[0x7C]);
485        roundtrip(63, &[0x7E]);
486        assert_eq!(Cursor::new([0x80, 0x00]).get_i16().unwrap(), 0);
487        roundtrip(64, &[0x80, 0x01]);
488        assert_eq!(Cursor::new([0x82, 0x00]).get_i16().unwrap(), 1);
489        roundtrip(65, &[0x82, 0x01]);
490        roundtrip(8190, &[0xFC, 0x7F]);
491        roundtrip(8191, &[0xFE, 0x7F]);
492        roundtrip(8192, &[0x80, 0x80, 0x01]);
493        roundtrip(8193, &[0x82, 0x80, 0x01]);
494        roundtrip(i16::MAX, &[0xFE, 0xFF, 0x03]);
495    }
496
497    #[test]
498    fn u16_corner_cases() {
499        fn roundtrip(v: u16, expected_bytes: &[u8]) {
500            let mut w = Cursor::new(vec![]);
501            w.put_u16(v).unwrap();
502            let bytes = w.into_inner();
503            assert_eq!(bytes, expected_bytes);
504            let mut r = Cursor::new(expected_bytes);
505            let decoded_value = r.get_u16().unwrap();
506            assert_eq!(decoded_value, v);
507        }
508
509        roundtrip(0, &[0x00]);
510        roundtrip(1, &[0x01]);
511        roundtrip(126, &[0x7E]);
512        roundtrip(127, &[0x7F]);
513        roundtrip(128, &[0x80, 0x01]);
514        roundtrip(129, &[0x81, 0x01]);
515        roundtrip(16382, &[0xFE, 0x7F]);
516        roundtrip(16383, &[0xFF, 0x7F]);
517        roundtrip(16384, &[0x80, 0x80, 0x01]);
518        roundtrip(16385, &[0x81, 0x80, 0x01]);
519        roundtrip(65534, &[0xFE, 0xFF, 0x03]);
520        roundtrip(65535, &[0xFF, 0xFF, 0x03]);
521    }
522
523    #[test]
524    fn i32_corner_cases() {
525        fn roundtrip(v: i32, expected_bytes: &[u8]) {
526            let mut w = Cursor::new(vec![]);
527            w.put_i32(v).unwrap();
528            let bytes = w.into_inner();
529            assert_eq!(
530                bytes,
531                expected_bytes,
532                "for {}, zigzag: {}",
533                v,
534                zig_zag_encode::encode_i32(v)
535            );
536            let mut r = Cursor::new(expected_bytes);
537            let decoded_value = r.get_i32().unwrap();
538            assert_eq!(decoded_value, v);
539        }
540
541        roundtrip(
542            i32::MIN,
543            &[0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01],
544        ); // 10 bytes
545        roundtrip(
546            -1073741825,
547            &[0x81, 0x80, 0x80, 0x80, 0xF8, 0xFF, 0xFF, 0xFF, 0xFF, 0x01],
548        ); // 10 bytes
549        roundtrip(-1073741824, &[0xFF, 0xFF, 0xFF, 0xFF, 0x07]); // 5 bytes
550        roundtrip(-134217729, &[0x81, 0x80, 0x80, 0x80, 0x01]); // 5 bytes
551        roundtrip(-134217728, &[0xFF, 0xFF, 0xFF, 0x7F]); // 4 bytes
552        roundtrip(-1048577, &[0x81, 0x80, 0x80, 0x01]); // 4 bytes
553        roundtrip(-1048576, &[0xFF, 0xFF, 0x7F]);
554        roundtrip(-8194, &[0x83, 0x80, 0x01]);
555        roundtrip(-8193, &[0x81, 0x80, 0x01]);
556        roundtrip(-8192, &[0xFF, 0x7F]);
557        roundtrip(-8191, &[0xFD, 0x7F]);
558        roundtrip(-66, &[0x83, 0x01]);
559        roundtrip(-65, &[0x81, 0x01]);
560        roundtrip(-64, &[0x7F]);
561        roundtrip(-63, &[0x7D]);
562        roundtrip(-1, &[0x01]);
563        roundtrip(0, &[0]);
564        roundtrip(1, &[0x02]);
565        roundtrip(62, &[0x7C]);
566        roundtrip(63, &[0x7E]);
567        roundtrip(64, &[0x80, 0x01]);
568        roundtrip(65, &[0x82, 0x01]);
569        roundtrip(8190, &[0xFC, 0x7F]);
570        roundtrip(8191, &[0xFE, 0x7F]);
571        roundtrip(8192, &[0x80, 0x80, 0x01]);
572        roundtrip(8193, &[0x82, 0x80, 0x01]);
573        roundtrip(1048575, &[0xFE, 0xFF, 0x7F]);
574        roundtrip(1048576, &[0x80, 0x80, 0x80, 0x01]); // 4 bytes
575        roundtrip(134217727, &[0xFE, 0xFF, 0xFF, 0x7F]); // 4 bytes
576        roundtrip(134217728, &[0x80, 0x80, 0x80, 0x80, 0x01]); // 5 bytes
577        roundtrip(1073741823, &[0xFE, 0xFF, 0xFF, 0xFF, 0x07]); // 5 bytes
578        roundtrip(
579            1073741824,
580            &[0x80, 0x80, 0x80, 0x80, 0xF8, 0xFF, 0xFF, 0xFF, 0xFF, 0x01],
581        ); // 10 bytes
582        roundtrip(
583            i32::MAX,
584            &[0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01],
585        ); // 10 bytes
586    }
587
588    #[test]
589    fn u32_corner_cases() {
590        fn roundtrip(v: u32, expected_bytes: &[u8]) {
591            let mut w = Cursor::new(vec![]);
592            w.put_u32(v).unwrap();
593            let bytes = w.into_inner();
594            assert_eq!(bytes, expected_bytes, "for {}", v);
595            let mut r = Cursor::new(expected_bytes);
596            let decoded_value = r.get_u32().unwrap();
597            assert_eq!(decoded_value, v);
598        }
599
600        roundtrip(0, &[0]);
601        roundtrip(126, &[0x7E]);
602        roundtrip(127, &[0x7F]);
603        roundtrip(128, &[0x80, 0x01]);
604        roundtrip(129, &[0x81, 0x01]);
605        roundtrip(16383, &[0xFF, 0x7F]);
606        roundtrip(16384, &[0x80, 0x80, 0x01]);
607        roundtrip(16385, &[0x81, 0x80, 0x01]);
608        roundtrip(2097151, &[0xFF, 0xFF, 0x7F]);
609        roundtrip(2097152, &[0x80, 0x80, 0x80, 0x01]);
610        roundtrip(268435455, &[0xFF, 0xFF, 0xFF, 0x7F]);
611        roundtrip(268435456, &[0x80, 0x80, 0x80, 0x80, 0x01]);
612        roundtrip(u32::MAX, &[0xFF, 0xFF, 0xFF, 0xFF, 0x0F]);
613    }
614
615    #[test]
616    fn i64_corner_cases() {
617        fn roundtrip(v: i64, expected_bytes: &[u8]) {
618            let mut w = Cursor::new(vec![]);
619            w.put_i64(v).unwrap();
620            let bytes = w.into_inner();
621            assert_eq!(bytes, expected_bytes, "for {}", v);
622            let mut r = Cursor::new(expected_bytes);
623            let decoded_value = r.get_i64().unwrap();
624            assert_eq!(decoded_value, v);
625        }
626
627        roundtrip(
628            i64::MIN,
629            &[0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01],
630        ); // 10 bytes
631        roundtrip(
632            i64::MIN / 2 - 1,
633            &[0x81, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01],
634        ); // 10 bytes
635        roundtrip(
636            i64::MIN / 2,
637            &[0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F],
638        ); // 9 bytes
639        roundtrip(
640            -36028797018963969,
641            &[0x81, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01],
642        ); // 9 bytes
643        roundtrip(
644            -36028797018963968,
645            &[0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F],
646        ); // 8 bytes
647        roundtrip(
648            -281474976710657,
649            &[0x81, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01],
650        ); // 8 bytes
651        roundtrip(
652            -281474976710656,
653            &[0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F],
654        ); // 7 bytes
655        roundtrip(-2199023255553, &[0x81, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01]); // 7 bytes
656        roundtrip(-2199023255552, &[0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F]); // 6 bytes
657        roundtrip(-17179869185, &[0x81, 0x80, 0x80, 0x80, 0x80, 0x01]); // 6 bytes
658        roundtrip(-17179869184, &[0xFF, 0xFF, 0xFF, 0xFF, 0x7F]); // 5 bytes
659        roundtrip(-1073741824, &[0xFF, 0xFF, 0xFF, 0xFF, 0x07]); // 5 bytes
660        roundtrip(-134217729, &[0x81, 0x80, 0x80, 0x80, 0x01]); // 5 bytes
661        roundtrip(-134217728, &[0xFF, 0xFF, 0xFF, 0x7F]); // 4 bytes
662        roundtrip(-1048577, &[0x81, 0x80, 0x80, 0x01]); // 4 bytes
663        roundtrip(-1048576, &[0xFF, 0xFF, 0x7F]);
664        roundtrip(-8194, &[0x83, 0x80, 0x01]);
665        roundtrip(-8193, &[0x81, 0x80, 0x01]);
666        roundtrip(-8192, &[0xFF, 0x7F]);
667        roundtrip(-8191, &[0xFD, 0x7F]);
668        roundtrip(-66, &[0x83, 0x01]);
669        roundtrip(-65, &[0x81, 0x01]);
670        roundtrip(-64, &[0x7F]);
671        roundtrip(-63, &[0x7D]);
672        roundtrip(-1, &[0x01]);
673        roundtrip(0, &[0]);
674        roundtrip(1, &[0x02]);
675        roundtrip(62, &[0x7C]);
676        roundtrip(63, &[0x7E]);
677        roundtrip(64, &[0x80, 0x01]);
678        roundtrip(65, &[0x82, 0x01]);
679        roundtrip(8190, &[0xFC, 0x7F]);
680        roundtrip(8191, &[0xFE, 0x7F]);
681        roundtrip(8192, &[0x80, 0x80, 0x01]);
682        roundtrip(8193, &[0x82, 0x80, 0x01]);
683        roundtrip(1048575, &[0xFE, 0xFF, 0x7F]);
684        roundtrip(1048576, &[0x80, 0x80, 0x80, 0x01]); // 4 bytes
685        roundtrip(134217727, &[0xFE, 0xFF, 0xFF, 0x7F]); // 4 bytes
686        roundtrip(134217728, &[0x80, 0x80, 0x80, 0x80, 0x01]); // 5 bytes
687        roundtrip(1073741823, &[0xFE, 0xFF, 0xFF, 0xFF, 0x07]); // 5 bytes
688        roundtrip(17179869183, &[0xFE, 0xFF, 0xFF, 0xFF, 0x7F]); // 5 bytes
689        roundtrip(17179869184, &[0x80, 0x80, 0x80, 0x80, 0x80, 0x01]); // 6 bytes
690        roundtrip(2199023255551, &[0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F]); // 6 bytes
691        roundtrip(2199023255552, &[0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01]); // 7 bytes
692        roundtrip(281474976710655, &[0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F]); // 7 bytes
693        roundtrip(
694            281474976710656,
695            &[0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01],
696        ); // 8 bytes
697        roundtrip(
698            36028797018963967,
699            &[0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F],
700        ); // 8 bytes
701        roundtrip(
702            36028797018963968,
703            &[0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01],
704        ); // 9 bytes
705        roundtrip(
706            i64::MAX / 2,
707            &[0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F],
708        ); // 9 bytes
709        roundtrip(
710            i64::MAX / 2 + 1,
711            &[0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01],
712        ); // 10 bytes
713        roundtrip(
714            i64::MAX,
715            &[0xFE, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x01],
716        ); // 10 bytes
717    }
718
719    #[test]
720    fn u64_corner_cases() {
721        fn roundtrip(v: u64, expected_bytes: &[u8]) {
722            let mut w = Cursor::new(vec![]);
723            w.put_u64(v).unwrap();
724            let bytes = w.into_inner();
725            assert_eq!(bytes, expected_bytes, "for {}", v);
726            let mut r = Cursor::new(expected_bytes);
727            let decoded_value = r.get_u64().unwrap();
728            assert_eq!(decoded_value, v);
729        }
730
731        roundtrip(0, &[0]);
732
733        roundtrip(126, &[0x7E]);
734        roundtrip(127, &[0x7F]);
735        roundtrip(128, &[0x80, 0x01]);
736        roundtrip(129, &[0x81, 0x01]);
737        roundtrip(16383, &[0xFF, 0x7F]);
738        roundtrip(16384, &[0x80, 0x80, 0x01]);
739        roundtrip(16385, &[0x81, 0x80, 0x01]);
740        roundtrip(2097151, &[0xFF, 0xFF, 0x7F]);
741        roundtrip(2097152, &[0x80, 0x80, 0x80, 0x01]); // 4 bytes
742        roundtrip(268435455, &[0xFF, 0xFF, 0xFF, 0x7F]); // 4 bytes
743        roundtrip(268435456, &[0x80, 0x80, 0x80, 0x80, 0x01]); // 5 bytes
744        roundtrip(34359738367, &[0xFF, 0xFF, 0xFF, 0xFF, 0x7F]); // 5 bytes
745        roundtrip(34359738368, &[0x80, 0x80, 0x80, 0x80, 0x80, 0x01]); // 6 bytes
746        roundtrip(4398046511103, &[0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F]); // 6 bytes
747        roundtrip(4398046511104, &[0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01]); // 7 bytes
748        roundtrip(562949953421311, &[0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F]); // 7 bytes
749        roundtrip(
750            562949953421312,
751            &[0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01],
752        ); // 8 bytes
753        roundtrip(
754            72057594037927935,
755            &[0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F],
756        ); // 8 bytes
757        roundtrip(
758            72057594037927936,
759            &[0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x01],
760        ); // 9 bytes
761        roundtrip(
762            i64::MAX as u64,
763            &[0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F],
764        ); // 10 bytes
765           // roundtrip(
766           //     i64::MAX as u64 + 1,
767           //     &[0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x7F],
768           // ); // 10 bytes
769    }
770
771    #[cfg(test)]
772    proptest! {
773
774        #[test]
775        fn u64_check_size_1(v in 0u64..=127u64) {
776            prop_assert_eq!(bytes_u64(v).len(), 1);
777        }
778
779        #[test]
780        fn u64_check_size_2(v in 128u64..=16383u64) {
781            prop_assert_eq!(bytes_u64(v).len(), 2);
782        }
783        #[test]
784        fn u64_check_size_3(v in 16384u64..=2097151u64) {
785            prop_assert_eq!(bytes_u64(v).len(), 3);
786        }
787        #[test]
788        fn u64_check_size_4(v in 2097152u64..=268435455u64) {
789            prop_assert_eq!(bytes_u64(v).len(), 4);
790        }
791
792        #[test]
793        fn u64_check_size_5(v in 268435456u64..=34359738367u64) {
794            prop_assert_eq!(bytes_u64(v).len(), 5);
795        }
796
797        #[test]
798        fn u64_check_size_6(v in 34359738368u64..=4398046511103u64) {
799            prop_assert_eq!(bytes_u64(v).len(), 6);
800        }
801
802        #[test]
803        fn u64_check_size_7(v in 4398046511104u64..=562949953421311u64) {
804            prop_assert_eq!(bytes_u64(v).len(), 7);
805        }
806
807        #[test]
808        fn u64_check_size_8(v in 562949953421312u64..=72057594037927935u64) {
809            prop_assert_eq!(bytes_u64(v).len(), 8);
810        }
811
812        #[test]
813        fn u64_check_size_9(v in 72057594037927936u64..=i64::MAX as u64) {
814            prop_assert_eq!(bytes_u64(v).len(), 9);
815        }
816
817        #[test]
818        fn i64_check_size_1(v in -64i64..=64i64 - 1) {
819            prop_assert_eq!(bytes_i64(v).len(), 1);
820        }
821
822        #[test]
823        fn i64_check_size_2_part1(v in -8192i64..=-64i64 - 1) {
824            prop_assert_eq!(bytes_i64(v).len(), 2);
825        }
826        #[test]
827        fn i64_check_size_2_part2(v in 64i64..=8192i64 - 1) {
828            prop_assert_eq!(bytes_i64(v).len(), 2);
829        }
830
831        #[test]
832        fn i64_check_size_3_part1(v in -1048576i64..=-8192i64 - 1) {
833            prop_assert_eq!(bytes_i64(v).len(), 3);
834        }
835        #[test]
836        fn i64_check_size_3_part2(v in 8192i64..=1048576i64 - 1) {
837            prop_assert_eq!(bytes_i64(v).len(), 3);
838        }
839
840        #[test]
841        fn i64_check_size_4_part1(v in -134217728i64..=-1048576i64 - 1) {
842            prop_assert_eq!(bytes_i64(v).len(), 4);
843        }
844        #[test]
845        fn i64_check_size_4_part2(v in 1048576i64..=134217728i64 - 1) {
846            prop_assert_eq!(bytes_i64(v).len(), 4);
847        }
848
849        #[test]
850        fn i64_check_size_5_part1(v in -17179869184i64..=-134217728i64 - 1) {
851            prop_assert_eq!(bytes_i64(v).len(), 5);
852        }
853        #[test]
854        fn i64_check_size_5_part2(v in 134217728i64..=17179869184i64 - 1) {
855            prop_assert_eq!(bytes_i64(v).len(), 5);
856        }
857
858        #[test]
859        fn i64_check_size_6_part1(v in -2199023255552i64..=-17179869184i64 - 1) {
860            prop_assert_eq!(bytes_i64(v).len(), 6);
861        }
862        #[test]
863        fn i64_check_size_6_part2(v in 17179869184i64..=2199023255552i64 - 1) {
864            prop_assert_eq!(bytes_i64(v).len(), 6);
865        }
866
867        #[test]
868        fn i64_check_size_7_part1(v in -281474976710656i64..=-2199023255552i64 - 1) {
869            prop_assert_eq!(bytes_i64(v).len(), 7);
870        }
871        #[test]
872        fn i64_check_size_7_part2(v in 2199023255552i64..=281474976710656i64 - 1) {
873            prop_assert_eq!(bytes_i64(v).len(), 7);
874        }
875
876        #[test]
877        fn i64_check_size_8_part1(v in -36028797018963968i64..=-281474976710656i64 - 1) {
878            prop_assert_eq!(bytes_i64(v).len(), 8);
879        }
880        #[test]
881        fn i64_check_size_8_part2(v in 281474976710656i64..=36028797018963968i64 - 1) {
882            prop_assert_eq!(bytes_i64(v).len(), 8);
883        }
884
885        #[test]
886        fn i64_check_size_9_part1(v in i64::MIN / 2..=-36028797018963968i64 - 1) {
887            prop_assert_eq!(bytes_i64(v).len(), 9);
888        }
889        #[test]
890        fn i64_check_size_9_part2(v in 36028797018963968i64..=i64::MAX / 2) {
891            prop_assert_eq!(bytes_i64(v).len(), 9);
892        }
893
894        #[test]
895        fn i64_check_size_10_part1(v in i64::MIN..=i64::MIN / 2 - 1) {
896            prop_assert_eq!(bytes_i64(v).len(), 10);
897        }
898        #[test]
899        fn i64_check_size_10_part2(v in i64::MAX / 2 + 1..=i64::MAX) {
900            prop_assert_eq!(bytes_i64(v).len(), 10);
901        }
902
903        #[test]
904        fn u64_roundtrip(i in u64::ANY) {
905            let mut w = Cursor::new(vec![]);
906            w.put_u64(i).unwrap();
907            let mut r = Cursor::new(w.into_inner());
908            prop_assert_eq![i, r.get_u64().unwrap()];
909        }
910
911        #[test]
912        fn i64_roundtrip(i in any::<i64>()) {
913            let mut w = Cursor::new(vec![]);
914            w.put_i64(i).unwrap();
915            let mut r = Cursor::new(w.into_inner());
916            prop_assert_eq![i, r.get_i64().unwrap()];
917        }
918
919        #[test]
920        fn prop_u64_array_roundtrip(arr in any::<[u64; 32]>()) {
921            let mut w = Cursor::new(vec![]);
922            for a in arr.iter() {
923                w.put_u64(*a).unwrap();
924            }
925            let mut dec = Vec::new();
926            let mut r = Cursor::new(w.into_inner());
927            for _ in 0..arr.len() {
928                dec.push(r.get_u64().unwrap());
929            }
930            prop_assert_eq![dec, arr];
931        }
932
933        #[test]
934        fn prop_bits_roundtrip(bits in collection::vec(any::<bool>(), 0..400)) {
935            let mut w = Cursor::new(vec![]);
936            w.put_bits(&bits).unwrap();
937            let mut r = Cursor::new(w.into_inner());
938            prop_assert_eq![bits.clone(), r.get_bits(bits.len()).unwrap()];
939        }
940
941        #[test]
942        fn prop_short_string_roundtrip(s in ".{1,255}".prop_filter("Filter strings that are too large", |s| s.len() < 256)) {
943            let mut w = Cursor::new(vec![]);
944            w.put_short_string(&s).unwrap();
945            let inner = w.into_inner();
946            prop_assert_eq!(inner[0] as usize, s.len());
947            prop_assert_eq!(std::str::from_utf8(&inner[1..]), Ok(&*s));
948        }
949
950        #[test]
951        fn arbitrary_values_list(vals in collection::vec(any::<Val>(), 0..100)) {
952            let mut w = Cursor::new(vec![]);
953            for val in vals.clone() {
954                match val {
955                    Val::I8(v) => w.put_i8(v).unwrap(),
956                    Val::U8(v) => w.put_u8(v).unwrap(),
957                    Val::I16(v) => w.put_i16(v).unwrap(),
958                    Val::U16(v) => w.put_u16(v).unwrap(),
959                    Val::I32(v) => w.put_i32(v).unwrap(),
960                    Val::U32(v) => w.put_u32(v).unwrap(),
961                    Val::I64(v) => w.put_i64(v).unwrap(),
962                    Val::U64(v) => w.put_u64(v).unwrap(),
963                    Val::Bytes(v) => w.write_all(&v).unwrap(),
964                    Val::Bits(v) => w.put_bits(&v).unwrap(),
965                }
966
967            }
968            let mut r = Cursor::new(w.into_inner());
969            let mut parsed_vals: Vec<Val> = Vec::new();
970            for val in vals.clone() {
971                match val {
972                    Val::I8(_) => parsed_vals.push(r.get_i8().unwrap().into()),
973                    Val::U8(_) => parsed_vals.push(r.get_u8().unwrap().into()),
974                    Val::I16(_) => parsed_vals.push(r.get_i16().unwrap().into()),
975                    Val::U16(_) => parsed_vals.push(r.get_u16().unwrap().into()),
976                    Val::I32(_) => parsed_vals.push(r.get_i32().unwrap().into()),
977                    Val::U32(_) => parsed_vals.push(r.get_u32().unwrap().into()),
978                    Val::I64(_) => parsed_vals.push(r.get_i64().unwrap().into()),
979                    Val::U64(_) => parsed_vals.push(r.get_u64().unwrap().into()),
980                    Val::Bytes(bytes) => {
981                        let mut buf = vec![0u8; bytes.len()];
982                        r.read_exact(&mut buf).unwrap();
983                        parsed_vals.push(buf.to_vec().into());
984                    },
985                    Val::Bits(bits) => parsed_vals.push(r.get_bits(bits.len()).unwrap().into()),
986                }
987            }
988            prop_assert_eq!(parsed_vals, vals);
989        }
990
991        #[test]
992        fn u16_u32_u64_equivalence(i in any::<u16>()) {
993            let expected_bytes = bytes_u16(i);
994            prop_assert_eq!(&bytes_u64(i as u64), &expected_bytes);
995            prop_assert_eq!(&bytes_u32(i as u32), &expected_bytes);
996        }
997
998        #[test]
999        fn i16_i32_i64_equivalence(i in any::<i16>()) {
1000            let expected_bytes = bytes_i16(i);
1001            prop_assert_eq!(&bytes_i64(i as i64), &expected_bytes);
1002            prop_assert_eq!(&bytes_i32(i as i32), &expected_bytes);
1003        }
1004    }
1005}
sigma_ser/vlq_encode.rs

sigma_ser/
vlq_encode.rs