Skip to main content

qubit_io/stream/
leb128_reader.rs

1/*******************************************************************************
2 *
3 *    Copyright (c) 2026 Haixing Hu.
4 *
5 *    SPDX-License-Identifier: Apache-2.0
6 *
7 *    Licensed under the Apache License, Version 2.0.
8 *
9 ******************************************************************************/
10
11use core::marker::PhantomData;
12use std::io::{
13    Error,
14    ErrorKind,
15    Read,
16    Result,
17    Seek,
18    SeekFrom,
19};
20
21use crate::ReadExt;
22use crate::codec::{
23    DecodePolicy,
24    Leb128Codec,
25    Leb128DecodeError,
26    NonStrict,
27    Strict,
28};
29use crate::util::read_utf8_payload;
30
31/// Reader wrapper for LEB128 integers.
32///
33/// The decoding policy is selected by the `P` type parameter. Use
34/// `Leb128Reader<R, NonStrict>` for permissive decoding and
35/// `Leb128Reader<R, Strict>` for canonical-only decoding.
36///
37/// # Target-width integers
38///
39/// `usize` and `isize` methods use the current Rust target's pointer width.
40/// Prefer fixed-width integer methods such as `read_u64` or `read_i64` for
41/// persistent files and cross-platform protocols.
42pub struct Leb128Reader<R, P = NonStrict> {
43    inner: R,
44    buffer: [u8; 19],
45    marker: PhantomData<fn() -> P>,
46}
47
48impl<R, P> Leb128Reader<R, P>
49where
50    P: DecodePolicy,
51{
52    /// Creates a LEB128 reader.
53    #[must_use]
54    #[inline]
55    pub const fn new(inner: R) -> Self {
56        Self {
57            inner,
58            buffer: [0; 19],
59            marker: PhantomData,
60        }
61    }
62
63    /// Returns whether this reader rejects non-canonical encodings.
64    #[must_use]
65    #[inline]
66    pub const fn is_strict(&self) -> bool {
67        P::STRICT
68    }
69
70    /// Returns a shared reference to the underlying reader.
71    #[must_use]
72    #[inline]
73    pub const fn get_ref(&self) -> &R {
74        &self.inner
75    }
76
77    /// Returns an exclusive reference to the underlying reader.
78    #[must_use]
79    #[inline]
80    pub fn get_mut(&mut self) -> &mut R {
81        &mut self.inner
82    }
83
84    /// Consumes this wrapper and returns the underlying reader.
85    #[must_use]
86    #[inline]
87    pub fn into_inner(self) -> R {
88        self.inner
89    }
90}
91
92macro_rules! impl_read_value {
93    ($policy:ty, $method:ident, $ty:ty, $doc:literal) => {
94        #[doc = $doc]
95        #[inline]
96        pub fn $method(&mut self) -> Result<$ty> {
97            type Codec = Leb128Codec<$ty, $policy>;
98
99            self.read_leb128::<$ty, { Codec::REQUIRED_MIN_BUFFER_LEN }, _>(|bytes| unsafe {
100                Codec::read_unchecked(bytes, 0)
101            })
102        }
103    };
104}
105
106macro_rules! impl_for_policy {
107    ($policy:ty) => {
108        impl<R> Leb128Reader<R, $policy>
109        where
110            R: Read,
111        {
112            impl_read_value!($policy, read_u8, u8, "Reads an unsigned LEB128 `u8`.");
113            impl_read_value!($policy, read_u16, u16, "Reads an unsigned LEB128 `u16`.");
114            impl_read_value!($policy, read_u32, u32, "Reads an unsigned LEB128 `u32`.");
115            impl_read_value!($policy, read_u64, u64, "Reads an unsigned LEB128 `u64`.");
116            impl_read_value!($policy, read_u128, u128, "Reads an unsigned LEB128 `u128`.");
117            impl_read_value!($policy, read_usize, usize, "Reads an unsigned LEB128 `usize`.");
118            impl_read_value!($policy, read_i8, i8, "Reads a signed LEB128 `i8`.");
119            impl_read_value!($policy, read_i16, i16, "Reads a signed LEB128 `i16`.");
120            impl_read_value!($policy, read_i32, i32, "Reads a signed LEB128 `i32`.");
121            impl_read_value!($policy, read_i64, i64, "Reads a signed LEB128 `i64`.");
122            impl_read_value!($policy, read_i128, i128, "Reads a signed LEB128 `i128`.");
123            impl_read_value!($policy, read_isize, isize, "Reads a signed LEB128 `isize`.");
124
125            /// Reads a UTF-8 string prefixed by an unsigned LEB128 byte length.
126            ///
127            /// The length prefix is decoded as `usize`, so this format is
128            /// target-width dependent. Prefer a fixed-width length prefix for
129            /// persistent files and cross-platform protocols.
130            ///
131            /// # Parameters
132            ///
133            /// - `max_len`: Maximum accepted UTF-8 payload length in bytes.
134            ///
135            /// # Returns
136            ///
137            /// Returns the decoded UTF-8 string.
138            ///
139            /// # Errors
140            ///
141            /// Returns an I/O error for length or payload reads, [`std::io::ErrorKind::InvalidData`]
142            /// when the encoded length exceeds `max_len`, or [`std::io::ErrorKind::InvalidData`]
143            /// when the payload is not valid UTF-8.
144            #[inline]
145            pub fn read_utf8_string(&mut self, max_len: usize) -> Result<String> {
146                let len = self.read_usize()?;
147                read_utf8_payload(&mut self.inner, len, max_len)
148            }
149        }
150    };
151}
152
153impl<R, P> Leb128Reader<R, P>
154where
155    R: Read,
156    P: DecodePolicy,
157{
158    #[inline]
159    fn read_leb128<T, const N: usize, F>(&mut self, decode: F) -> Result<T>
160    where
161        F: FnOnce(&[u8; 19]) -> std::result::Result<(T, usize), Leb128DecodeError>,
162    {
163        debug_assert!(N <= self.buffer.len(), "LEB128 read length exceeds internal buffer");
164        for index in 0..N {
165            // SAFETY: `index` is produced by `0..N`, where `N` is a
166            // codec-declared length that fits the fixed internal buffer.
167            unsafe {
168                self.inner.read_exact_unchecked(&mut self.buffer, index, 1)?;
169            }
170            if read_byte(&self.buffer, index) & 0x80 == 0 {
171                return decode(&self.buffer)
172                    .map(|(value, _)| value)
173                    .map_err(map_leb128_decode_error);
174            }
175        }
176        decode(&self.buffer)
177            .map(|(value, _)| value)
178            .map_err(map_leb128_decode_error)
179    }
180}
181
182impl_for_policy!(NonStrict);
183impl_for_policy!(Strict);
184
185impl<R, P> Read for Leb128Reader<R, P>
186where
187    R: Read,
188{
189    /// Reads bytes from the wrapped reader.
190    ///
191    /// # Parameters
192    ///
193    /// - `buffer`: Destination byte buffer.
194    ///
195    /// # Returns
196    ///
197    /// Returns the number of bytes read.
198    ///
199    /// # Errors
200    ///
201    /// Returns the I/O error reported by the wrapped reader.
202    #[inline]
203    fn read(&mut self, buffer: &mut [u8]) -> Result<usize> {
204        self.inner.read(buffer)
205    }
206}
207
208impl<R, P> Seek for Leb128Reader<R, P>
209where
210    R: Seek,
211{
212    /// Seeks the wrapped reader.
213    ///
214    /// # Parameters
215    ///
216    /// - `position`: Target seek position.
217    ///
218    /// # Returns
219    ///
220    /// Returns the new stream position.
221    ///
222    /// # Errors
223    ///
224    /// Returns the seek error reported by the wrapped reader.
225    #[inline]
226    fn seek(&mut self, position: SeekFrom) -> Result<u64> {
227        self.inner.seek(position)
228    }
229}
230
231#[inline]
232fn map_leb128_decode_error(error: Leb128DecodeError) -> Error {
233    Error::new(ErrorKind::InvalidData, error)
234}
235
236/// Reads one byte from the internal LEB128 buffer without an extra bounds check.
237#[inline(always)]
238fn read_byte(buffer: &[u8; 19], index: usize) -> u8 {
239    debug_assert!(index < buffer.len(), "LEB128 read index exceeds internal buffer");
240    // SAFETY: `read_leb128` only calls this with an index produced by
241    // `0..N`, where N is a codec-declared length that fits `buffer`.
242    unsafe { *buffer.as_ptr().add(index) }
243}