mini_leb128/lib.rs
1//! A minimal library to read and write integers encoded using [LEB128].
2//!
3//! Unlike other LEB128 libraries there are three notable changes:
4//!
5//! 1. Uses [`zc_io`] instead of the standard library for `no_std`
6//! compatability. The standard library can be used through
7//! [`zc_io::IoReader`] and [`zc_io::IoWriter`] instances.
8//! 2. When writing encoded integers, an internal buffer on the stack is used to
9//! possibly reduce system calls; each encoded integer makes a single call to
10//! [`write_all`]. This is particularly useful since buffered writers are
11//! frequently underutilized (and not native to [`zc_io`]).
12//! 3. Methods always return how many bytes were used when reading or writing
13//! the integers, which may help in instances where that information would
14//! have to get retrospectively computed.
15//!
16//! If none of these changes are meaningful to you, consider another LEB128
17//! project, as they would have less friction when just using the standard
18//! library's I/O interfaces.
19//!
20//! # Examples
21//!
22//! Read and write unsigned integers:
23//!
24//! ```
25//! # fn main() -> zc_io::Result<()> {
26//! let mut buf = [0; 5];
27//!
28//! let encoded_length = mini_leb128::write_u32(buf.as_mut_slice(), 624_485)?;
29//! assert_eq!(encoded_length.get(), 3);
30//! assert_eq!(buf, [0xE5, 0x8E, 0x26, 0x00, 0x00]);
31//!
32//! let (value, bytes_read) = mini_leb128::read_u32(buf.as_slice())?;
33//! assert_eq!(value, 624_485);
34//! assert_eq!(bytes_read.get(), 3);
35//! # Ok(())
36//! # }
37//! ```
38//!
39//! Read and write signed integers:
40//!
41//! ```
42//! # fn main() -> zc_io::Result<()> {
43//! let mut buf = [0; 5];
44//!
45//! let encoded_length = mini_leb128::write_i32(buf.as_mut_slice(), -123_456)?;
46//! assert_eq!(encoded_length.get(), 3);
47//! assert_eq!(buf, [0xC0, 0xBB, 0x78, 0x00, 0x00]);
48//!
49//! let (value, bytes_read) = mini_leb128::read_i32(buf.as_slice())?;
50//! assert_eq!(value, -123_456);
51//! assert_eq!(bytes_read.get(), 3);
52//! # Ok(())
53//! # }
54//! ```
55//!
56//! [LEB128]: https://en.wikipedia.org/wiki/LEB128
57//! [`zc_io::IoReader`]: https://docs.rs/zc_io/latest/zc_io/struct.IoReader.html
58//! [`zc_io::IoWriter`]: https://docs.rs/zc_io/latest/zc_io/struct.IoWriter.html
59//! [`write_all`]: Write::write_all
60
61#![no_std]
62#![doc(html_root_url = "https://docs.rs/mini_leb128/0.1.1")]
63#![deny(missing_docs, clippy::pedantic)]
64
65use core::{mem, num::NonZeroUsize};
66
67use arrayvec::ArrayVec;
68use zc_io::{error, Error, Read, Result, Write};
69
70////////////////////////////////////////////////////////////////////////////////
71// LEB128 constants
72////////////////////////////////////////////////////////////////////////////////
73
74const CONTINUATION_BIT: u8 = 0b1000_0000;
75const SIGN_BIT: u8 = 0b0100_0000;
76const VALUE_MASK: u8 = 0b0111_1111;
77const VALUE_LENGTH: u32 = 7;
78
79////////////////////////////////////////////////////////////////////////////////
80// Encoding
81////////////////////////////////////////////////////////////////////////////////
82
83macro_rules! write_unsigned {
84 ($writer:ident, $n:ident, $int:ty) => {{
85 let mut buf = ArrayVec::<u8, { (mem::size_of::<$int>() / 4) * 5 }>::new();
86 let mut bytes_written = 0;
87
88 loop {
89 #[allow(clippy::cast_possible_truncation)]
90 let mut byte = ($n as u8) & VALUE_MASK;
91 $n >>= VALUE_LENGTH;
92
93 let done = $n == 0;
94
95 if !done {
96 byte |= CONTINUATION_BIT;
97 }
98
99 buf.push(byte);
100 bytes_written += 1;
101
102 if done {
103 $writer.write_all(&buf)?;
104 return Ok(unsafe { NonZeroUsize::new_unchecked(bytes_written) });
105 }
106 }
107 }};
108}
109
110/// Encodes an unsigned 32-bit integer using LEB128.
111///
112/// Returns a [`NonZeroUsize`] that stores how many bytes it took to encode the
113/// integer.
114///
115/// # Errors
116///
117/// Propagates any I/O errors originating from the writer. However, encoding the
118/// integer itself is infallible.
119pub fn write_u32<W>(mut writer: W, mut n: u32) -> Result<NonZeroUsize>
120where
121 W: Write,
122{
123 write_unsigned!(writer, n, u32)
124}
125
126/// Encodes an unsigned 64-bit integer using LEB128.
127///
128/// Returns a [`NonZeroUsize`] that stores how many bytes it took to encode the
129/// integer.
130///
131/// # Errors
132///
133/// Propagates any I/O errors originating from the writer. However, encoding the
134/// integer itself is infallible.
135pub fn write_u64<W>(mut writer: W, mut n: u64) -> Result<NonZeroUsize>
136where
137 W: Write,
138{
139 write_unsigned!(writer, n, u64)
140}
141
142/// Encodes an unsigned 128-bit integer using LEB128.
143///
144/// Returns a [`NonZeroUsize`] that stores how many bytes it took to encode the
145/// integer.
146///
147/// # Errors
148///
149/// Propagates any I/O errors originating from the writer. However, encoding the
150/// integer itself is infallible.
151pub fn write_u128<W>(mut writer: W, mut n: u128) -> Result<NonZeroUsize>
152where
153 W: Write,
154{
155 write_unsigned!(writer, n, u128)
156}
157
158macro_rules! write_signed {
159 ($writer:ident, $n:ident, $int:ty) => {{
160 let mut buf = ArrayVec::<u8, { (mem::size_of::<$int>() / 4) * 5 }>::new();
161 let mut bytes_written = 0;
162
163 loop {
164 #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
165 let mut byte = $n as u8;
166 // preserve sign bit for further testing:
167 $n >>= SIGN_BIT.trailing_zeros();
168
169 let done = matches!($n, 0 | -1);
170
171 if done {
172 byte &= VALUE_MASK;
173 } else {
174 // discard sign bit
175 $n >>= 1;
176 byte |= CONTINUATION_BIT;
177 }
178
179 buf.push(byte);
180 bytes_written += 1;
181
182 if done {
183 $writer.write_all(&buf)?;
184 return Ok(unsafe { NonZeroUsize::new_unchecked(bytes_written) });
185 }
186 }
187 }};
188}
189
190/// Encodes a signed 32-bit integer using LEB128.
191///
192/// Returns a [`NonZeroUsize`] that stores how many bytes it took to encode the
193/// integer.
194///
195/// # Errors
196///
197/// Propagates any I/O errors originating from the writer. However, encoding the
198/// integer itself is infallible.
199pub fn write_i32<W>(mut writer: W, mut n: i32) -> Result<NonZeroUsize>
200where
201 W: Write,
202{
203 write_signed!(writer, n, i32)
204}
205
206/// Encodes a signed 64-bit integer using LEB128.
207///
208/// Returns a [`NonZeroUsize`] that stores how many bytes it took to encode the
209/// integer.
210///
211/// # Errors
212///
213/// Propagates any I/O errors originating from the writer. However, encoding the
214/// integer itself is infallible.
215pub fn write_i64<W>(mut writer: W, mut n: i64) -> Result<NonZeroUsize>
216where
217 W: Write,
218{
219 write_signed!(writer, n, i64)
220}
221
222/// Encodes a signed 128-bit integer using LEB128.
223///
224/// Returns a [`NonZeroUsize`] that stores how many bytes it took to encode the
225/// integer.
226///
227/// # Errors
228///
229/// Propagates any I/O errors originating from the writer. However, encoding the
230/// integer itself is infallible.
231pub fn write_i128<W>(mut writer: W, mut n: i128) -> Result<NonZeroUsize>
232where
233 W: Write,
234{
235 write_signed!(writer, n, i128)
236}
237
238////////////////////////////////////////////////////////////////////////////////
239// Decoding
240////////////////////////////////////////////////////////////////////////////////
241
242/// This consumes all of the bytes that, theoretically, are a continuation of
243/// the LEB128-encoded integer. However, it has exceeded what the datatype could
244/// store, so this is more so the stream isn't left in, again, theoretically, an
245/// invalid state.
246///
247/// For instance, a developer could test for the `InvalidData` error variant and
248/// substitute the value as the max that datatype could represent if it was not
249/// critical. Then, resuming the data stream, it wouldn't be in the middle of
250/// an oversized integer.
251#[cold]
252fn discard_leb128<'a, R>(mut reader: R) -> Error
253where
254 R: Read<'a>,
255{
256 loop {
257 let byte = match reader.read_next() {
258 Ok(b) => b,
259 Err(error) => return error,
260 };
261
262 if byte & CONTINUATION_BIT == 0 {
263 break;
264 }
265 }
266
267 error!(
268 InvalidData,
269 "the LEB128-encoded integer getting read exceeded what the datatype could represent"
270 )
271}
272
273macro_rules! read_unsigned {
274 ($reader:expr, $int:ty) => {{
275 const SIZE: u32 = mem::size_of::<$int>() as u32;
276
277 let mut value = 0;
278 let mut shift = 0;
279 let mut bytes_read = 0;
280
281 loop {
282 let byte = $reader.read_next()?;
283 let byte_value = <$int>::from(byte & VALUE_MASK);
284 value |= byte_value << shift;
285
286 shift += VALUE_LENGTH;
287 bytes_read += 1;
288
289 if byte & CONTINUATION_BIT == 0 {
290 return Ok((value, unsafe { NonZeroUsize::new_unchecked(bytes_read) }));
291 }
292
293 if shift == SIZE * VALUE_LENGTH {
294 return Err(discard_leb128(&mut $reader));
295 }
296 }
297 }};
298}
299
300/// Decodes an unsigned 32-bit integer using LEB128.
301///
302/// Returns a tuple of the integer read and a [`NonZeroUsize`] that stores the
303/// number of bytes read.
304///
305/// # Errors
306///
307/// Propagates any I/O errors originating from the reader. Otherwise, an
308/// error kind of `InvalidData` is thrown if the integer read does not fit in
309/// the integer's datatype.
310pub fn read_u32<'a, R>(mut reader: R) -> Result<(u32, NonZeroUsize)>
311where
312 R: Read<'a>,
313{
314 read_unsigned!(reader, u32)
315}
316
317/// Decodes an unsigned 64-bit integer using LEB128.
318///
319/// Returns a tuple of the integer read and a [`NonZeroUsize`] that stores the
320/// number of bytes read.
321///
322/// # Errors
323///
324/// Propagates any I/O errors originating from the reader. Otherwise, an
325/// error kind of `InvalidData` is thrown if the integer read does not fit in
326/// the integer's datatype.
327pub fn read_u64<'a, R>(mut reader: R) -> Result<(u64, NonZeroUsize)>
328where
329 R: Read<'a>,
330{
331 read_unsigned!(reader, u64)
332}
333
334/// Decodes an unsigned 128-bit integer using LEB128.
335///
336/// Returns a tuple of the integer read and a [`NonZeroUsize`] that stores the
337/// number of bytes read.
338///
339/// # Errors
340///
341/// Propagates any I/O errors originating from the reader. Otherwise, an
342/// error kind of `InvalidData` is thrown if the integer read does not fit in
343/// the integer's datatype.
344pub fn read_u128<'a, R>(mut reader: R) -> Result<(u128, NonZeroUsize)>
345where
346 R: Read<'a>,
347{
348 read_unsigned!(reader, u128)
349}
350
351macro_rules! read_signed {
352 ($reader:expr, $int:ty) => {{
353 const SIZE: u32 = mem::size_of::<$int>() as u32;
354
355 let mut value = 0;
356 let mut shift = 0;
357 let mut bytes_read = 0;
358
359 loop {
360 let byte = $reader.read_next()?;
361 let byte_value = <$int>::from(byte & VALUE_MASK);
362 value |= byte_value << shift;
363
364 shift += VALUE_LENGTH;
365 bytes_read += 1;
366
367 if byte & CONTINUATION_BIT == 0 {
368 if shift < <$int>::BITS && byte & SIGN_BIT != 0 {
369 value |= !0 << shift;
370 }
371 return Ok((value, unsafe { NonZeroUsize::new_unchecked(bytes_read) }));
372 }
373
374 if shift == SIZE * VALUE_LENGTH {
375 return Err(discard_leb128(&mut $reader));
376 }
377 }
378 }};
379}
380
381/// Decodes a signed 32-bit integer using LEB128.
382///
383/// Returns a tuple of the integer read and a [`NonZeroUsize`] that stores the
384/// number of bytes read.
385///
386/// # Errors
387///
388/// Propagates any I/O errors originating from the reader. Otherwise, an
389/// error kind of `InvalidData` is thrown if the integer read does not fit in
390/// the integer's datatype.
391pub fn read_i32<'a, R>(mut reader: R) -> Result<(i32, NonZeroUsize)>
392where
393 R: Read<'a>,
394{
395 read_signed!(reader, i32)
396}
397
398/// Decodes a signed 64-bit integer using LEB128.
399///
400/// Returns a tuple of the integer read and a [`NonZeroUsize`] that stores the
401/// number of bytes read.
402///
403/// # Errors
404///
405/// Propagates any I/O errors originating from the reader. Otherwise, an
406/// error kind of `InvalidData` is thrown if the integer read does not fit in
407/// the integer's datatype.
408pub fn read_i64<'a, R>(mut reader: R) -> Result<(i64, NonZeroUsize)>
409where
410 R: Read<'a>,
411{
412 read_signed!(reader, i64)
413}
414
415/// Decodes a signed 128-bit integer using LEB128.
416///
417/// Returns a tuple of the integer read and a [`NonZeroUsize`] that stores the
418/// number of bytes read.
419///
420/// # Errors
421///
422/// Propagates any I/O errors originating from the reader. Otherwise, an
423/// error kind of `InvalidData` is thrown if the integer read does not fit in
424/// the integer's datatype.
425pub fn read_i128<'a, R>(mut reader: R) -> Result<(i128, NonZeroUsize)>
426where
427 R: Read<'a>,
428{
429 read_signed!(reader, i128)
430}