parquet_format_safe/thrift/varint/
decode.rs1use std::io;
2use std::io::Result;
3use std::mem::size_of;
4
5pub const MSB: u8 = 0b1000_0000;
7const DROP_MSB: u8 = 0b0111_1111;
10
11#[inline]
13fn required_encoded_space_unsigned(mut v: u64) -> usize {
14 if v == 0 {
15 return 1;
16 }
17
18 let mut logcounter = 0;
19 while v > 0 {
20 logcounter += 1;
21 v >>= 7;
22 }
23 logcounter
24}
25
26#[inline]
28fn required_encoded_space_signed(v: i64) -> usize {
29 required_encoded_space_unsigned(zigzag_encode(v))
30}
31
32pub trait VarInt: Sized + Copy {
37 fn required_space(self) -> usize;
40 fn decode_var(src: &[u8]) -> Option<(Self, usize)>;
44 fn encode_var(self, src: &mut [u8]) -> usize;
47
48 fn encode_var_vec(self) -> Vec<u8> {
51 let mut v = Vec::new();
52 v.resize(self.required_space(), 0);
53 self.encode_var(&mut v);
54 v
55 }
56}
57
58#[inline]
59fn zigzag_encode(from: i64) -> u64 {
60 ((from << 1) ^ (from >> 63)) as u64
61}
62
63#[inline]
67fn zigzag_decode(from: u64) -> i64 {
68 ((from >> 1) ^ (-((from & 1) as i64)) as u64) as i64
69}
70
71pub(crate) trait VarIntMaxSize {
72 fn varint_max_size() -> usize;
73}
74
75impl<VI: VarInt> VarIntMaxSize for VI {
76 fn varint_max_size() -> usize {
77 (size_of::<VI>() * 8 + 7) / 7
78 }
79}
80
81macro_rules! impl_varint {
82 ($t:ty, unsigned) => {
83 impl VarInt for $t {
84 fn required_space(self) -> usize {
85 required_encoded_space_unsigned(self as u64)
86 }
87
88 fn decode_var(src: &[u8]) -> Option<(Self, usize)> {
89 let (n, s) = u64::decode_var(src)?;
90 Some((n as Self, s))
91 }
92
93 fn encode_var(self, dst: &mut [u8]) -> usize {
94 (self as u64).encode_var(dst)
95 }
96 }
97 };
98 ($t:ty, signed) => {
99 impl VarInt for $t {
100 fn required_space(self) -> usize {
101 required_encoded_space_signed(self as i64)
102 }
103
104 fn decode_var(src: &[u8]) -> Option<(Self, usize)> {
105 let (n, s) = i64::decode_var(src)?;
106 Some((n as Self, s))
107 }
108
109 fn encode_var(self, dst: &mut [u8]) -> usize {
110 (self as i64).encode_var(dst)
111 }
112 }
113 };
114}
115
116impl_varint!(u32, unsigned);
117impl_varint!(u16, unsigned);
118impl_varint!(u8, unsigned);
119
120impl_varint!(i32, signed);
121impl_varint!(i16, signed);
122impl_varint!(i8, signed);
123
124impl VarInt for u64 {
128 fn required_space(self) -> usize {
129 required_encoded_space_unsigned(self)
130 }
131
132 #[inline]
133 fn decode_var(src: &[u8]) -> Option<(Self, usize)> {
134 let mut result: u64 = 0;
135 let mut shift = 0;
136
137 let mut success = false;
138 for b in src.iter() {
139 let msb_dropped = b & DROP_MSB;
140 result |= (msb_dropped as u64) << shift;
141 shift += 7;
142
143 if b & MSB == 0 || shift > (9 * 7) {
144 success = b & MSB == 0;
145 break;
146 }
147 }
148
149 if success {
150 Some((result, shift / 7))
151 } else {
152 None
153 }
154 }
155
156 #[inline]
157 fn encode_var(self, dst: &mut [u8]) -> usize {
158 assert!(dst.len() >= self.required_space());
159 let mut n = self;
160 let mut i = 0;
161
162 while n >= 0x80 {
163 dst[i] = MSB | (n as u8);
164 i += 1;
165 n >>= 7;
166 }
167
168 dst[i] = n as u8;
169 i + 1
170 }
171}
172
173impl VarInt for i64 {
174 fn required_space(self) -> usize {
175 required_encoded_space_signed(self)
176 }
177
178 #[inline]
179 fn decode_var(src: &[u8]) -> Option<(Self, usize)> {
180 if let Some((result, size)) = u64::decode_var(src) {
181 Some((zigzag_decode(result) as Self, size))
182 } else {
183 None
184 }
185 }
186
187 #[inline]
188 fn encode_var(self, dst: &mut [u8]) -> usize {
189 assert!(dst.len() >= self.required_space());
190 let mut n: u64 = zigzag_encode(self as i64);
191 let mut i = 0;
192
193 while n >= 0x80 {
194 dst[i] = MSB | (n as u8);
195 i += 1;
196 n >>= 7;
197 }
198
199 dst[i] = n as u8;
200 i + 1
201 }
202}
203
204pub trait VarIntReader {
208 fn read_varint<VI: VarInt>(&mut self) -> Result<VI>;
215}
216
217#[derive(Default)]
219pub(crate) struct VarIntProcessor {
220 buf: [u8; 10],
221 maxsize: usize,
222 pub i: usize,
223}
224
225impl VarIntProcessor {
226 pub fn new<VI: VarIntMaxSize>() -> VarIntProcessor {
227 VarIntProcessor {
228 maxsize: VI::varint_max_size(),
229 ..VarIntProcessor::default()
230 }
231 }
232
233 pub fn push(&mut self, b: u8) -> Result<()> {
234 if self.i >= self.maxsize {
235 return Err(io::Error::new(
236 io::ErrorKind::InvalidData,
237 "Unterminated varint",
238 ));
239 }
240 self.buf[self.i] = b;
241 self.i += 1;
242 Ok(())
243 }
244
245 pub fn finished(&self) -> bool {
246 self.i > 0 && (self.buf[self.i - 1] & MSB == 0)
247 }
248
249 pub fn decode<VI: VarInt>(&self) -> Option<VI> {
250 Some(VI::decode_var(&self.buf[0..self.i])?.0)
251 }
252}
253
254impl<R: io::Read> VarIntReader for R {
255 fn read_varint<VI: VarInt>(&mut self) -> Result<VI> {
256 let mut buf = [0_u8; 1];
257 let mut p = VarIntProcessor::new::<VI>();
258
259 while !p.finished() {
260 let read = self.read(&mut buf)?;
261
262 if read == 0 && p.i == 0 {
264 return Err(io::Error::new(io::ErrorKind::UnexpectedEof, "Reached EOF"));
265 }
266 if read == 0 {
267 break;
268 }
269
270 p.push(buf[0])?;
271 }
272
273 p.decode()
274 .ok_or_else(|| io::Error::new(io::ErrorKind::UnexpectedEof, "Reached EOF"))
275 }
276}