integer_encoding/
varint.rs

1use std::mem::size_of;
2
3/// Most-significant byte, == 0x80
4pub const MSB: u8 = 0b1000_0000;
5/// All bits except for the most significant. Can be used as bitmask to drop the most-significant
6/// bit using `&` (binary-and).
7const DROP_MSB: u8 = 0b0111_1111;
8
9/// How many bytes an integer uses when being encoded as a `VarInt`.
10#[inline]
11fn required_encoded_space_unsigned(mut v: u64) -> usize {
12    if v == 0 {
13        return 1;
14    }
15
16    let mut logcounter = 0;
17    while v > 0 {
18        logcounter += 1;
19        v >>= 7;
20    }
21    logcounter
22}
23
24/// How many bytes an integer uses when being encoded as a [`VarInt`].
25#[inline]
26fn required_encoded_space_signed(v: i64) -> usize {
27    required_encoded_space_unsigned(zigzag_encode(v))
28}
29
30/// Varint (variable length integer) encoding, as described in
31/// <https://developers.google.com/protocol-buffers/docs/encoding>.
32///
33/// Uses zigzag encoding (also described there) for signed integer representation.
34pub trait VarInt: Sized + Copy {
35    /// Returns the number of bytes this number needs in its encoded form. Note: This varies
36    /// depending on the actual number you want to encode.
37    fn required_space(self) -> usize;
38    /// Decode a value from the slice. Returns the value and the number of bytes read from the
39    /// slice (can be used to read several consecutive values from a big slice)
40    /// return None if all bytes has MSB set.
41    fn decode_var(src: &[u8]) -> Option<(Self, usize)>;
42    /// Encode a value into the slice. The slice must be at least `required_space()` bytes long.
43    /// The number of bytes taken by the encoded integer is returned.
44    fn encode_var(self, src: &mut [u8]) -> usize;
45
46    /// Helper: Encode a value and return the encoded form as Vec. The Vec must be at least
47    /// `required_space()` bytes long.
48    fn encode_var_vec(self) -> Vec<u8> {
49        let mut v = vec![0; self.required_space()];
50        self.encode_var(&mut v);
51        v
52    }
53}
54
55#[inline]
56fn zigzag_encode(from: i64) -> u64 {
57    ((from << 1) ^ (from >> 63)) as u64
58}
59
60// see: http://stackoverflow.com/a/2211086/56332
61// casting required because operations like unary negation
62// cannot be performed on unsigned integers
63#[inline]
64fn zigzag_decode(from: u64) -> i64 {
65    ((from >> 1) ^ (-((from & 1) as i64)) as u64) as i64
66}
67
68pub(crate) trait VarIntMaxSize {
69    fn varint_max_size() -> usize;
70}
71
72impl<VI: VarInt> VarIntMaxSize for VI {
73    fn varint_max_size() -> usize {
74        (size_of::<VI>() * 8 + 7) / 7
75    }
76}
77
78macro_rules! impl_varint {
79    ($t:ty, unsigned) => {
80        impl VarInt for $t {
81            fn required_space(self) -> usize {
82                required_encoded_space_unsigned(self as u64)
83            }
84
85            fn decode_var(src: &[u8]) -> Option<(Self, usize)> {
86                let (n, s) = u64::decode_var(src)?;
87                Some((<Self as std::convert::TryFrom<u64>>::try_from(n).ok()?, s))
88            }
89
90            fn encode_var(self, dst: &mut [u8]) -> usize {
91                (self as u64).encode_var(dst)
92            }
93        }
94    };
95    ($t:ty, signed) => {
96        impl VarInt for $t {
97            fn required_space(self) -> usize {
98                required_encoded_space_signed(self as i64)
99            }
100
101            fn decode_var(src: &[u8]) -> Option<(Self, usize)> {
102                let (n, s) = i64::decode_var(src)?;
103                Some((<Self as std::convert::TryFrom<i64>>::try_from(n).ok()?, s))
104            }
105
106            fn encode_var(self, dst: &mut [u8]) -> usize {
107                (self as i64).encode_var(dst)
108            }
109        }
110    };
111}
112
113impl_varint!(usize, unsigned);
114impl_varint!(u32, unsigned);
115impl_varint!(u16, unsigned);
116impl_varint!(u8, unsigned);
117
118impl_varint!(isize, signed);
119impl_varint!(i32, signed);
120impl_varint!(i16, signed);
121impl_varint!(i8, signed);
122
123// Below are the "base implementations" doing the actual encodings; all other integer types are
124// first cast to these biggest types before being encoded.
125
126impl VarInt for u64 {
127    fn required_space(self) -> usize {
128        required_encoded_space_unsigned(self)
129    }
130
131    #[inline]
132    fn decode_var(src: &[u8]) -> Option<(Self, usize)> {
133        let mut result: u64 = 0;
134        let mut shift = 0;
135
136        let mut success = false;
137        for b in src {
138            let msb_dropped = b & DROP_MSB;
139            result |= u64::from(msb_dropped) << shift;
140            shift += 7;
141
142            if shift > (9 * 7) {
143                success = *b < 2;
144                break;
145            } else if b & MSB == 0 {
146                success = true;
147                break;
148            }
149        }
150
151        if success {
152            Some((result, shift / 7))
153        } else {
154            None
155        }
156    }
157
158    #[inline]
159    fn encode_var(self, dst: &mut [u8]) -> usize {
160        debug_assert!(dst.len() >= self.required_space());
161        let mut n = self;
162        let mut i = 0;
163
164        while n >= 0x80 {
165            dst[i] = MSB | (n as u8);
166            i += 1;
167            n >>= 7;
168        }
169
170        dst[i] = n as u8;
171        i + 1
172    }
173}
174
175impl VarInt for i64 {
176    fn required_space(self) -> usize {
177        required_encoded_space_signed(self)
178    }
179
180    #[inline]
181    fn decode_var(src: &[u8]) -> Option<(Self, usize)> {
182        if let Some((result, size)) = u64::decode_var(src) {
183            Some((zigzag_decode(result), size))
184        } else {
185            None
186        }
187    }
188
189    #[inline]
190    fn encode_var(self, dst: &mut [u8]) -> usize {
191        debug_assert!(dst.len() >= self.required_space());
192        let mut n: u64 = zigzag_encode(self);
193        let mut i = 0;
194
195        while n >= 0x80 {
196            dst[i] = MSB | (n as u8);
197            i += 1;
198            n >>= 7;
199        }
200
201        dst[i] = n as u8;
202        i + 1
203    }
204}