integer_encoding/
varint.rs

1use std::mem::size_of;
2
3/// Most-significant byte, == 0x80
4pub const MSB: u8 = 0b1000_0000;
5/// All bits except for the most significant. Can be used as bitmask to drop the most-signficant
6/// bit using `&` (binary-and).
7const DROP_MSB: u8 = 0b0111_1111;
8
9/// How many bytes an integer uses when being encoded as a VarInt.
10#[inline]
11fn required_encoded_space_unsigned(mut v: u64) -> usize {
12    if v == 0 {
13        return 1;
14    }
15
16    let mut logcounter = 0;
17    while v > 0 {
18        logcounter += 1;
19        v >>= 7;
20    }
21    logcounter
22}
23
24/// How many bytes an integer uses when being encoded as a VarInt.
25#[inline]
26fn required_encoded_space_signed(v: i64) -> usize {
27    required_encoded_space_unsigned(zigzag_encode(v))
28}
29
30/// Varint (variable length integer) encoding, as described in
31/// https://developers.google.com/protocol-buffers/docs/encoding.
32///
33/// Uses zigzag encoding (also described there) for signed integer representation.
34pub trait VarInt: Sized + Copy {
35    /// Returns the number of bytes this number needs in its encoded form. Note: This varies
36    /// depending on the actual number you want to encode.
37    fn required_space(self) -> usize;
38    /// Decode a value from the slice. Returns the value and the number of bytes read from the
39    /// slice (can be used to read several consecutive values from a big slice)
40    /// return None if all bytes has MSB set.
41    fn decode_var(src: &[u8]) -> Option<(Self, usize)>;
42    /// Encode a value into the slice. The slice must be at least `required_space()` bytes long.
43    /// The number of bytes taken by the encoded integer is returned.
44    fn encode_var(self, src: &mut [u8]) -> usize;
45
46    /// Helper: Encode a value and return the encoded form as Vec. The Vec must be at least
47    /// `required_space()` bytes long.
48    fn encode_var_vec(self) -> Vec<u8> {
49        let mut v = vec![0; self.required_space()];
50        self.encode_var(&mut v);
51        v
52    }
53}
54
55#[inline]
56fn zigzag_encode(from: i64) -> u64 {
57    ((from << 1) ^ (from >> 63)) as u64
58}
59
60// see: http://stackoverflow.com/a/2211086/56332
61// casting required because operations like unary negation
62// cannot be performed on unsigned integers
63#[inline]
64fn zigzag_decode(from: u64) -> i64 {
65    ((from >> 1) ^ (-((from & 1) as i64)) as u64) as i64
66}
67
68pub(crate) trait VarIntMaxSize {
69    fn varint_max_size() -> usize;
70}
71
72impl<VI: VarInt> VarIntMaxSize for VI {
73    fn varint_max_size() -> usize {
74        (size_of::<VI>() * 8 + 7) / 7
75    }
76}
77
78macro_rules! impl_varint {
79    ($t:ty, unsigned) => {
80        impl VarInt for $t {
81            fn required_space(self) -> usize {
82                required_encoded_space_unsigned(self as u64)
83            }
84
85            fn decode_var(src: &[u8]) -> Option<(Self, usize)> {
86                let (n, s) = u64::decode_var(src)?;
87                Some((n as Self, s))
88            }
89
90            fn encode_var(self, dst: &mut [u8]) -> usize {
91                (self as u64).encode_var(dst)
92            }
93        }
94    };
95    ($t:ty, signed) => {
96        impl VarInt for $t {
97            fn required_space(self) -> usize {
98                required_encoded_space_signed(self as i64)
99            }
100
101            fn decode_var(src: &[u8]) -> Option<(Self, usize)> {
102                let (n, s) = i64::decode_var(src)?;
103                Some((n as Self, s))
104            }
105
106            fn encode_var(self, dst: &mut [u8]) -> usize {
107                (self as i64).encode_var(dst)
108            }
109        }
110    };
111}
112
113impl_varint!(usize, unsigned);
114impl_varint!(u32, unsigned);
115impl_varint!(u16, unsigned);
116impl_varint!(u8, unsigned);
117
118impl_varint!(isize, signed);
119impl_varint!(i32, signed);
120impl_varint!(i16, signed);
121impl_varint!(i8, signed);
122
123// Below are the "base implementations" doing the actual encodings; all other integer types are
124// first cast to these biggest types before being encoded.
125
126impl VarInt for u64 {
127    fn required_space(self) -> usize {
128        required_encoded_space_unsigned(self)
129    }
130
131    #[inline]
132    fn decode_var(src: &[u8]) -> Option<(Self, usize)> {
133        let mut result: u64 = 0;
134        let mut shift = 0;
135
136        let mut success = false;
137        for b in src.iter() {
138            let msb_dropped = b & DROP_MSB;
139            result |= (msb_dropped as u64) << shift;
140            shift += 7;
141
142            if b & MSB == 0 || shift > (9 * 7) {
143                success = b & MSB == 0;
144                break;
145            }
146        }
147
148        if success {
149            Some((result, shift / 7))
150        } else {
151            None
152        }
153    }
154
155    #[inline]
156    fn encode_var(self, dst: &mut [u8]) -> usize {
157        debug_assert!(dst.len() >= self.required_space());
158        let mut n = self;
159        let mut i = 0;
160
161        while n >= 0x80 {
162            dst[i] = MSB | (n as u8);
163            i += 1;
164            n >>= 7;
165        }
166
167        dst[i] = n as u8;
168        i + 1
169    }
170}
171
172impl VarInt for i64 {
173    fn required_space(self) -> usize {
174        required_encoded_space_signed(self)
175    }
176
177    #[inline]
178    fn decode_var(src: &[u8]) -> Option<(Self, usize)> {
179        if let Some((result, size)) = u64::decode_var(src) {
180            Some((zigzag_decode(result) as Self, size))
181        } else {
182            None
183        }
184    }
185
186    #[inline]
187    fn encode_var(self, dst: &mut [u8]) -> usize {
188        debug_assert!(dst.len() >= self.required_space());
189        let mut n: u64 = zigzag_encode(self);
190        let mut i = 0;
191
192        while n >= 0x80 {
193            dst[i] = MSB | (n as u8);
194            i += 1;
195            n >>= 7;
196        }
197
198        dst[i] = n as u8;
199        i + 1
200    }
201}