mini_leb128/
lib.rs

1//! A minimal library to read and write integers encoded using [LEB128].
2//!
3//! Unlike other LEB128 libraries there are three notable changes:
4//!
5//! 1. Uses [`zc_io`] instead of the standard library for `no_std`
6//!    compatability. The standard library can be used through
7//!    [`zc_io::IoReader`] and [`zc_io::IoWriter`] instances.
8//! 2. When writing encoded integers, an internal buffer on the stack is used to
9//!    possibly reduce system calls; each encoded integer makes a single call to
10//!    [`write_all`]. This is particularly useful since buffered writers are
11//!    frequently underutilized (and not native to [`zc_io`]).
12//! 3. Methods always return how many bytes were used when reading or writing
13//!    the integers, which may help in instances where that information would
14//!    have to get retrospectively computed.
15//!
16//! If none of these changes are meaningful to you, consider another LEB128
17//! project, as they would have less friction when just using the standard
18//! library's I/O interfaces.
19//!
20//! # Examples
21//!
22//! Read and write unsigned integers:
23//!
24//! ```
25//! # fn main() -> zc_io::Result<()> {
26//! let mut buf = [0; 5];
27//!
28//! let encoded_length = mini_leb128::write_u32(buf.as_mut_slice(), 624_485)?;
29//! assert_eq!(encoded_length.get(), 3);
30//! assert_eq!(buf, [0xE5, 0x8E, 0x26, 0x00, 0x00]);
31//!
32//! let (value, bytes_read) = mini_leb128::read_u32(buf.as_slice())?;
33//! assert_eq!(value, 624_485);
34//! assert_eq!(bytes_read.get(), 3);
35//! # Ok(())
36//! # }
37//! ```
38//!
39//! Read and write signed integers:
40//!
41//! ```
42//! # fn main() -> zc_io::Result<()> {
43//! let mut buf = [0; 5];
44//!
45//! let encoded_length = mini_leb128::write_i32(buf.as_mut_slice(), -123_456)?;
46//! assert_eq!(encoded_length.get(), 3);
47//! assert_eq!(buf, [0xC0, 0xBB, 0x78, 0x00, 0x00]);
48//!
49//! let (value, bytes_read) = mini_leb128::read_i32(buf.as_slice())?;
50//! assert_eq!(value, -123_456);
51//! assert_eq!(bytes_read.get(), 3);
52//! # Ok(())
53//! # }
54//! ```
55//!
56//! [LEB128]: https://en.wikipedia.org/wiki/LEB128
57//! [`zc_io::IoReader`]: https://docs.rs/zc_io/latest/zc_io/struct.IoReader.html
58//! [`zc_io::IoWriter`]: https://docs.rs/zc_io/latest/zc_io/struct.IoWriter.html
59//! [`write_all`]: Write::write_all
60
61#![no_std]
62#![doc(html_root_url = "https://docs.rs/mini_leb128/0.1.1")]
63#![deny(missing_docs, clippy::pedantic)]
64
65use core::{mem, num::NonZeroUsize};
66
67use arrayvec::ArrayVec;
68use zc_io::{error, Error, Read, Result, Write};
69
70////////////////////////////////////////////////////////////////////////////////
71// LEB128 constants
72////////////////////////////////////////////////////////////////////////////////
73
74const CONTINUATION_BIT: u8 = 0b1000_0000;
75const SIGN_BIT: u8 = 0b0100_0000;
76const VALUE_MASK: u8 = 0b0111_1111;
77const VALUE_LENGTH: u32 = 7;
78
79////////////////////////////////////////////////////////////////////////////////
80// Encoding
81////////////////////////////////////////////////////////////////////////////////
82
83macro_rules! write_unsigned {
84    ($writer:ident, $n:ident, $int:ty) => {{
85        let mut buf = ArrayVec::<u8, { (mem::size_of::<$int>() / 4) * 5 }>::new();
86        let mut bytes_written = 0;
87
88        loop {
89            #[allow(clippy::cast_possible_truncation)]
90            let mut byte = ($n as u8) & VALUE_MASK;
91            $n >>= VALUE_LENGTH;
92
93            let done = $n == 0;
94
95            if !done {
96                byte |= CONTINUATION_BIT;
97            }
98
99            buf.push(byte);
100            bytes_written += 1;
101
102            if done {
103                $writer.write_all(&buf)?;
104                return Ok(unsafe { NonZeroUsize::new_unchecked(bytes_written) });
105            }
106        }
107    }};
108}
109
110/// Encodes an unsigned 32-bit integer using LEB128.
111///
112/// Returns a [`NonZeroUsize`] that stores how many bytes it took to encode the
113/// integer.
114///
115/// # Errors
116///
117/// Propagates any I/O errors originating from the writer. However, encoding the
118/// integer itself is infallible.
119pub fn write_u32<W>(mut writer: W, mut n: u32) -> Result<NonZeroUsize>
120where
121    W: Write,
122{
123    write_unsigned!(writer, n, u32)
124}
125
126/// Encodes an unsigned 64-bit integer using LEB128.
127///
128/// Returns a [`NonZeroUsize`] that stores how many bytes it took to encode the
129/// integer.
130///
131/// # Errors
132///
133/// Propagates any I/O errors originating from the writer. However, encoding the
134/// integer itself is infallible.
135pub fn write_u64<W>(mut writer: W, mut n: u64) -> Result<NonZeroUsize>
136where
137    W: Write,
138{
139    write_unsigned!(writer, n, u64)
140}
141
142/// Encodes an unsigned 128-bit integer using LEB128.
143///
144/// Returns a [`NonZeroUsize`] that stores how many bytes it took to encode the
145/// integer.
146///
147/// # Errors
148///
149/// Propagates any I/O errors originating from the writer. However, encoding the
150/// integer itself is infallible.
151pub fn write_u128<W>(mut writer: W, mut n: u128) -> Result<NonZeroUsize>
152where
153    W: Write,
154{
155    write_unsigned!(writer, n, u128)
156}
157
158macro_rules! write_signed {
159    ($writer:ident, $n:ident, $int:ty) => {{
160        let mut buf = ArrayVec::<u8, { (mem::size_of::<$int>() / 4) * 5 }>::new();
161        let mut bytes_written = 0;
162
163        loop {
164            #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
165            let mut byte = $n as u8;
166            // preserve sign bit for further testing:
167            $n >>= SIGN_BIT.trailing_zeros();
168
169            let done = matches!($n, 0 | -1);
170
171            if done {
172                byte &= VALUE_MASK;
173            } else {
174                // discard sign bit
175                $n >>= 1;
176                byte |= CONTINUATION_BIT;
177            }
178
179            buf.push(byte);
180            bytes_written += 1;
181
182            if done {
183                $writer.write_all(&buf)?;
184                return Ok(unsafe { NonZeroUsize::new_unchecked(bytes_written) });
185            }
186        }
187    }};
188}
189
190/// Encodes a signed 32-bit integer using LEB128.
191///
192/// Returns a [`NonZeroUsize`] that stores how many bytes it took to encode the
193/// integer.
194///
195/// # Errors
196///
197/// Propagates any I/O errors originating from the writer. However, encoding the
198/// integer itself is infallible.
199pub fn write_i32<W>(mut writer: W, mut n: i32) -> Result<NonZeroUsize>
200where
201    W: Write,
202{
203    write_signed!(writer, n, i32)
204}
205
206/// Encodes a signed 64-bit integer using LEB128.
207///
208/// Returns a [`NonZeroUsize`] that stores how many bytes it took to encode the
209/// integer.
210///
211/// # Errors
212///
213/// Propagates any I/O errors originating from the writer. However, encoding the
214/// integer itself is infallible.
215pub fn write_i64<W>(mut writer: W, mut n: i64) -> Result<NonZeroUsize>
216where
217    W: Write,
218{
219    write_signed!(writer, n, i64)
220}
221
222/// Encodes a signed 128-bit integer using LEB128.
223///
224/// Returns a [`NonZeroUsize`] that stores how many bytes it took to encode the
225/// integer.
226///
227/// # Errors
228///
229/// Propagates any I/O errors originating from the writer. However, encoding the
230/// integer itself is infallible.
231pub fn write_i128<W>(mut writer: W, mut n: i128) -> Result<NonZeroUsize>
232where
233    W: Write,
234{
235    write_signed!(writer, n, i128)
236}
237
238////////////////////////////////////////////////////////////////////////////////
239// Decoding
240////////////////////////////////////////////////////////////////////////////////
241
242/// This consumes all of the bytes that, theoretically, are a continuation of
243/// the LEB128-encoded integer. However, it has exceeded what the datatype could
244/// store, so this is more so the stream isn't left in, again, theoretically, an
245/// invalid state.
246///
247/// For instance, a developer could test for the `InvalidData` error variant and
248/// substitute the value as the max that datatype could represent if it was not
249/// critical. Then, resuming the data stream, it wouldn't be in the middle of
250/// an oversized integer.
251#[cold]
252fn discard_leb128<'a, R>(mut reader: R) -> Error
253where
254    R: Read<'a>,
255{
256    loop {
257        let byte = match reader.read_next() {
258            Ok(b) => b,
259            Err(error) => return error,
260        };
261
262        if byte & CONTINUATION_BIT == 0 {
263            break;
264        }
265    }
266
267    error!(
268        InvalidData,
269        "the LEB128-encoded integer getting read exceeded what the datatype could represent"
270    )
271}
272
273macro_rules! read_unsigned {
274    ($reader:expr, $int:ty) => {{
275        const SIZE: u32 = mem::size_of::<$int>() as u32;
276
277        let mut value = 0;
278        let mut shift = 0;
279        let mut bytes_read = 0;
280
281        loop {
282            let byte = $reader.read_next()?;
283            let byte_value = <$int>::from(byte & VALUE_MASK);
284            value |= byte_value << shift;
285
286            shift += VALUE_LENGTH;
287            bytes_read += 1;
288
289            if byte & CONTINUATION_BIT == 0 {
290                return Ok((value, unsafe { NonZeroUsize::new_unchecked(bytes_read) }));
291            }
292
293            if shift == SIZE * VALUE_LENGTH {
294                return Err(discard_leb128(&mut $reader));
295            }
296        }
297    }};
298}
299
300/// Decodes an unsigned 32-bit integer using LEB128.
301///
302/// Returns a tuple of the integer read and a [`NonZeroUsize`] that stores the
303/// number of bytes read.
304///
305/// # Errors
306///
307/// Propagates any I/O errors originating from the reader. Otherwise, an
308/// error kind of `InvalidData` is thrown if the integer read does not fit in
309/// the integer's datatype.
310pub fn read_u32<'a, R>(mut reader: R) -> Result<(u32, NonZeroUsize)>
311where
312    R: Read<'a>,
313{
314    read_unsigned!(reader, u32)
315}
316
317/// Decodes an unsigned 64-bit integer using LEB128.
318///
319/// Returns a tuple of the integer read and a [`NonZeroUsize`] that stores the
320/// number of bytes read.
321///
322/// # Errors
323///
324/// Propagates any I/O errors originating from the reader. Otherwise, an
325/// error kind of `InvalidData` is thrown if the integer read does not fit in
326/// the integer's datatype.
327pub fn read_u64<'a, R>(mut reader: R) -> Result<(u64, NonZeroUsize)>
328where
329    R: Read<'a>,
330{
331    read_unsigned!(reader, u64)
332}
333
334/// Decodes an unsigned 128-bit integer using LEB128.
335///
336/// Returns a tuple of the integer read and a [`NonZeroUsize`] that stores the
337/// number of bytes read.
338///
339/// # Errors
340///
341/// Propagates any I/O errors originating from the reader. Otherwise, an
342/// error kind of `InvalidData` is thrown if the integer read does not fit in
343/// the integer's datatype.
344pub fn read_u128<'a, R>(mut reader: R) -> Result<(u128, NonZeroUsize)>
345where
346    R: Read<'a>,
347{
348    read_unsigned!(reader, u128)
349}
350
351macro_rules! read_signed {
352    ($reader:expr, $int:ty) => {{
353        const SIZE: u32 = mem::size_of::<$int>() as u32;
354
355        let mut value = 0;
356        let mut shift = 0;
357        let mut bytes_read = 0;
358
359        loop {
360            let byte = $reader.read_next()?;
361            let byte_value = <$int>::from(byte & VALUE_MASK);
362            value |= byte_value << shift;
363
364            shift += VALUE_LENGTH;
365            bytes_read += 1;
366
367            if byte & CONTINUATION_BIT == 0 {
368                if shift < <$int>::BITS && byte & SIGN_BIT != 0 {
369                    value |= !0 << shift;
370                }
371                return Ok((value, unsafe { NonZeroUsize::new_unchecked(bytes_read) }));
372            }
373
374            if shift == SIZE * VALUE_LENGTH {
375                return Err(discard_leb128(&mut $reader));
376            }
377        }
378    }};
379}
380
381/// Decodes a signed 32-bit integer using LEB128.
382///
383/// Returns a tuple of the integer read and a [`NonZeroUsize`] that stores the
384/// number of bytes read.
385///
386/// # Errors
387///
388/// Propagates any I/O errors originating from the reader. Otherwise, an
389/// error kind of `InvalidData` is thrown if the integer read does not fit in
390/// the integer's datatype.
391pub fn read_i32<'a, R>(mut reader: R) -> Result<(i32, NonZeroUsize)>
392where
393    R: Read<'a>,
394{
395    read_signed!(reader, i32)
396}
397
398/// Decodes a signed 64-bit integer using LEB128.
399///
400/// Returns a tuple of the integer read and a [`NonZeroUsize`] that stores the
401/// number of bytes read.
402///
403/// # Errors
404///
405/// Propagates any I/O errors originating from the reader. Otherwise, an
406/// error kind of `InvalidData` is thrown if the integer read does not fit in
407/// the integer's datatype.
408pub fn read_i64<'a, R>(mut reader: R) -> Result<(i64, NonZeroUsize)>
409where
410    R: Read<'a>,
411{
412    read_signed!(reader, i64)
413}
414
415/// Decodes a signed 128-bit integer using LEB128.
416///
417/// Returns a tuple of the integer read and a [`NonZeroUsize`] that stores the
418/// number of bytes read.
419///
420/// # Errors
421///
422/// Propagates any I/O errors originating from the reader. Otherwise, an
423/// error kind of `InvalidData` is thrown if the integer read does not fit in
424/// the integer's datatype.
425pub fn read_i128<'a, R>(mut reader: R) -> Result<(i128, NonZeroUsize)>
426where
427    R: Read<'a>,
428{
429    read_signed!(reader, i128)
430}