Skip to main content

qubit_io/codec/
leb128_codec.rs

1/*******************************************************************************
2 *
3 *    Copyright (c) 2026 Haixing Hu.
4 *
5 *    SPDX-License-Identifier: Apache-2.0
6 *
7 *    Licensed under the Apache License, Version 2.0.
8 *
9 ******************************************************************************/
10
11use core::marker::PhantomData;
12
13use crate::{
14    DecodePolicy,
15    Leb128DecodeError,
16    Leb128DecodeErrorKind,
17    NonStrict,
18};
19
20/// Type-level unchecked LEB128 codec.
21///
22/// `T` selects the decoded integer type and `P` selects the decoding policy.
23/// Encoding is always canonical; `P` only affects decoding.
24#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
25pub struct Leb128Codec<T, P = NonStrict> {
26    marker: PhantomData<fn() -> (T, P)>,
27}
28
29macro_rules! impl_unsigned_leb128_codec {
30    ($ty:ty) => {
31        impl<P> Leb128Codec<$ty, P>
32        where
33            P: DecodePolicy,
34        {
35            /// Minimum number of bytes required to encode or decode this type.
36            pub const REQUIRED_MIN_BUFFER_LEN: usize = (<$ty>::BITS as usize).div_ceil(7);
37
38            /// Decodes a value from `input` starting at `index` without bounds checks.
39            ///
40            /// # Parameters
41            ///
42            /// - `input`: Source byte buffer.
43            /// - `index`: Start index in `input`.
44            ///
45            /// # Returns
46            ///
47            /// Returns the decoded value and the number of consumed bytes.
48            ///
49            /// # Errors
50            ///
51            /// Returns [`Leb128DecodeError`] if the bytes are malformed or strict
52            /// decoding rejects a non-canonical representation.
53            ///
54            /// # Safety
55            ///
56            /// The caller must guarantee that `input.as_ptr().add(index)` is valid to
57            /// read [`Self::REQUIRED_MIN_BUFFER_LEN`] bytes, or that a valid terminating byte
58            /// appears before that limit.
59            #[inline(always)]
60            pub unsafe fn read_unchecked(input: &[u8], index: usize) -> Result<($ty, usize), Leb128DecodeError> {
61                // SAFETY: The caller guarantees enough readable bytes for this type.
62                let (value, consumed) =
63                    unsafe { read_uleb_unchecked::<P>(input, index, <$ty>::BITS, Self::REQUIRED_MIN_BUFFER_LEN)? };
64                Ok((value as $ty, consumed))
65            }
66
67            /// Tries to decode from the currently available bytes without bounds checks.
68            ///
69            /// This internal entry point is used by buffered readers that may have
70            /// only part of a variable-length payload in their input buffer. It
71            /// decodes while scanning for the terminating byte, so callers do not
72            /// need a separate terminator pre-scan before calling the codec.
73            ///
74            /// # Parameters
75            ///
76            /// - `input`: Source byte buffer.
77            /// - `index`: Start index in `input`.
78            /// - `available`: Number of readable bytes currently available from
79            ///   `index`.
80            ///
81            /// # Returns
82            ///
83            /// Returns `Ok(Some((value, consumed)))` when a complete value is
84            /// decoded. Returns `Ok(None)` when more bytes are needed. Returns
85            /// `Err((error, consumed))` when the payload is invalid and should be
86            /// consumed before the error is reported.
87            ///
88            /// # Safety
89            ///
90            /// The caller must guarantee that `input.as_ptr().add(index)` is valid
91            /// to read `available` bytes and that `available` is no greater than
92            /// [`Self::REQUIRED_MIN_BUFFER_LEN`].
93            #[inline(always)]
94            pub(crate) unsafe fn read_available_unchecked(
95                input: &[u8],
96                index: usize,
97                available: usize,
98            ) -> Result<Option<($ty, usize)>, (Leb128DecodeError, usize)> {
99                // SAFETY: The caller guarantees that exactly `available` bytes
100                // are readable from `index`.
101                unsafe {
102                    read_uleb_available_unchecked::<P>(
103                        input,
104                        index,
105                        <$ty>::BITS,
106                        Self::REQUIRED_MIN_BUFFER_LEN,
107                        available,
108                    )
109                }
110                .map(|result| result.map(|(value, consumed)| (value as $ty, consumed)))
111            }
112
113            /// Encodes `value` into `output` starting at `index` without bounds checks.
114            ///
115            /// # Parameters
116            ///
117            /// - `output`: Destination byte buffer.
118            /// - `index`: Start index in `output`.
119            /// - `value`: Value to encode.
120            ///
121            /// # Returns
122            ///
123            /// Returns the number of written bytes.
124            ///
125            /// # Safety
126            ///
127            /// The caller must guarantee that `output.as_mut_ptr().add(index)` is valid
128            /// to write [`Self::REQUIRED_MIN_BUFFER_LEN`] bytes.
129            #[inline(always)]
130            pub unsafe fn write_unchecked(output: &mut [u8], index: usize, value: $ty) -> usize {
131                // SAFETY: The caller guarantees enough writable bytes for this type.
132                unsafe { write_uleb_unchecked(output, index, value as u128) }
133            }
134        }
135    };
136}
137
138macro_rules! impl_signed_leb128_codec {
139    ($ty:ty) => {
140        impl<P> Leb128Codec<$ty, P>
141        where
142            P: DecodePolicy,
143        {
144            /// Minimum number of bytes required to encode or decode this type.
145            pub const REQUIRED_MIN_BUFFER_LEN: usize = (<$ty>::BITS as usize).div_ceil(7);
146
147            /// Decodes a value from `input` starting at `index` without bounds checks.
148            ///
149            /// # Parameters
150            ///
151            /// - `input`: Source byte buffer.
152            /// - `index`: Start index in `input`.
153            ///
154            /// # Returns
155            ///
156            /// Returns the decoded value and the number of consumed bytes.
157            ///
158            /// # Errors
159            ///
160            /// Returns [`Leb128DecodeError`] if the bytes are malformed or strict
161            /// decoding rejects a non-canonical representation.
162            ///
163            /// # Safety
164            ///
165            /// The caller must guarantee that `input.as_ptr().add(index)` is valid to
166            /// read [`Self::REQUIRED_MIN_BUFFER_LEN`] bytes, or that a valid terminating byte
167            /// appears before that limit.
168            #[inline(always)]
169            pub unsafe fn read_unchecked(input: &[u8], index: usize) -> Result<($ty, usize), Leb128DecodeError> {
170                // SAFETY: The caller guarantees enough readable bytes for this type.
171                let (value, consumed) =
172                    unsafe { read_sleb_unchecked::<P>(input, index, <$ty>::BITS, Self::REQUIRED_MIN_BUFFER_LEN)? };
173                Ok((value as $ty, consumed))
174            }
175
176            /// Tries to decode from the currently available bytes without bounds checks.
177            ///
178            /// This internal entry point is used by buffered readers that may have
179            /// only part of a variable-length payload in their input buffer. It
180            /// decodes while scanning for the terminating byte, so callers do not
181            /// need a separate terminator pre-scan before calling the codec.
182            ///
183            /// # Parameters
184            ///
185            /// - `input`: Source byte buffer.
186            /// - `index`: Start index in `input`.
187            /// - `available`: Number of readable bytes currently available from
188            ///   `index`.
189            ///
190            /// # Returns
191            ///
192            /// Returns `Ok(Some((value, consumed)))` when a complete value is
193            /// decoded. Returns `Ok(None)` when more bytes are needed. Returns
194            /// `Err((error, consumed))` when the payload is invalid and should be
195            /// consumed before the error is reported.
196            ///
197            /// # Safety
198            ///
199            /// The caller must guarantee that `input.as_ptr().add(index)` is valid
200            /// to read `available` bytes and that `available` is no greater than
201            /// [`Self::REQUIRED_MIN_BUFFER_LEN`].
202            #[inline(always)]
203            pub(crate) unsafe fn read_available_unchecked(
204                input: &[u8],
205                index: usize,
206                available: usize,
207            ) -> Result<Option<($ty, usize)>, (Leb128DecodeError, usize)> {
208                // SAFETY: The caller guarantees that exactly `available` bytes
209                // are readable from `index`.
210                unsafe {
211                    read_sleb_available_unchecked::<P>(
212                        input,
213                        index,
214                        <$ty>::BITS,
215                        Self::REQUIRED_MIN_BUFFER_LEN,
216                        available,
217                    )
218                }
219                .map(|result| result.map(|(value, consumed)| (value as $ty, consumed)))
220            }
221
222            /// Encodes `value` into `output` starting at `index` without bounds checks.
223            ///
224            /// # Parameters
225            ///
226            /// - `output`: Destination byte buffer.
227            /// - `index`: Start index in `output`.
228            /// - `value`: Value to encode.
229            ///
230            /// # Returns
231            ///
232            /// Returns the number of written bytes.
233            ///
234            /// # Safety
235            ///
236            /// The caller must guarantee that `output.as_mut_ptr().add(index)` is valid
237            /// to write [`Self::REQUIRED_MIN_BUFFER_LEN`] bytes.
238            #[inline(always)]
239            pub unsafe fn write_unchecked(output: &mut [u8], index: usize, value: $ty) -> usize {
240                // SAFETY: The caller guarantees enough writable bytes for this type.
241                unsafe { write_sleb_unchecked(output, index, value as i128) }
242            }
243        }
244    };
245}
246
247impl_unsigned_leb128_codec!(u8);
248impl_unsigned_leb128_codec!(u16);
249impl_unsigned_leb128_codec!(u32);
250impl_unsigned_leb128_codec!(u64);
251impl_unsigned_leb128_codec!(u128);
252impl_unsigned_leb128_codec!(usize);
253
254impl_signed_leb128_codec!(i8);
255impl_signed_leb128_codec!(i16);
256impl_signed_leb128_codec!(i32);
257impl_signed_leb128_codec!(i64);
258impl_signed_leb128_codec!(i128);
259impl_signed_leb128_codec!(isize);
260
261#[inline(always)]
262unsafe fn read_uleb_unchecked<P>(
263    input: &[u8],
264    index: usize,
265    bits: u32,
266    max_bytes: usize,
267) -> Result<(u128, usize), Leb128DecodeError>
268where
269    P: DecodePolicy,
270{
271    // SAFETY: The caller guarantees enough readable bytes for the full maximum
272    // payload width, which is exactly the `available` value passed here.
273    match unsafe { read_uleb_available_unchecked::<P>(input, index, bits, max_bytes, max_bytes) } {
274        Ok(Some(value)) => Ok(value),
275        Ok(None) => unreachable!("maximum-width LEB128 input must complete or fail"),
276        Err((error, _consumed)) => Err(error),
277    }
278}
279
280#[inline(always)]
281unsafe fn read_uleb_available_unchecked<P>(
282    input: &[u8],
283    index: usize,
284    bits: u32,
285    max_bytes: usize,
286    available: usize,
287) -> Result<Option<(u128, usize)>, (Leb128DecodeError, usize)>
288where
289    P: DecodePolicy,
290{
291    debug_assert!(available <= max_bytes, "available bytes exceed LEB128 maximum width");
292    let mut value = 0u128;
293    let mut shift = 0u32;
294    // SAFETY: The caller guarantees that `available` bytes are readable from
295    // `index`, so this base pointer can be advanced by every loop offset.
296    let base = unsafe { input.as_ptr().add(index) };
297    for offset in 0..available {
298        // SAFETY: The caller guarantees enough readable bytes for this loop.
299        let byte = unsafe { *base.add(offset) };
300        let payload = u128::from(byte & 0x7F);
301        value |= payload << shift;
302        if byte & 0x80 == 0 {
303            if offset == max_bytes - 1 && !unsigned_final_payload_fits(byte, bits, offset) {
304                return Err(malformed_decode_error(index + offset, offset + 1));
305            }
306            let consumed = offset + 1;
307            if P::STRICT && !has_canonical_uleb_len(value, consumed) {
308                return Err(noncanonical_decode_error(index, consumed));
309            }
310            return Ok(Some((value, consumed)));
311        }
312        shift += 7;
313    }
314    if available < max_bytes {
315        return Ok(None);
316    }
317    Err(malformed_decode_error(index + max_bytes - 1, max_bytes))
318}
319
320#[inline(always)]
321unsafe fn read_sleb_unchecked<P>(
322    input: &[u8],
323    index: usize,
324    bits: u32,
325    max_bytes: usize,
326) -> Result<(i128, usize), Leb128DecodeError>
327where
328    P: DecodePolicy,
329{
330    // SAFETY: The caller guarantees enough readable bytes for the full maximum
331    // payload width, which is exactly the `available` value passed here.
332    match unsafe { read_sleb_available_unchecked::<P>(input, index, bits, max_bytes, max_bytes) } {
333        Ok(Some(value)) => Ok(value),
334        Ok(None) => unreachable!("maximum-width LEB128 input must complete or fail"),
335        Err((error, _consumed)) => Err(error),
336    }
337}
338
339#[inline(always)]
340unsafe fn read_sleb_available_unchecked<P>(
341    input: &[u8],
342    index: usize,
343    bits: u32,
344    max_bytes: usize,
345    available: usize,
346) -> Result<Option<(i128, usize)>, (Leb128DecodeError, usize)>
347where
348    P: DecodePolicy,
349{
350    debug_assert!(available <= max_bytes, "available bytes exceed LEB128 maximum width");
351    let mut value = 0i128;
352    let mut shift = 0u32;
353    // SAFETY: The caller guarantees that `available` bytes are readable from
354    // `index`, so this base pointer can be advanced by every loop offset.
355    let base = unsafe { input.as_ptr().add(index) };
356    for offset in 0..available {
357        // SAFETY: The caller guarantees enough readable bytes for this loop.
358        let byte = unsafe { *base.add(offset) };
359        let payload = i128::from(byte & 0x7F);
360        value |= payload << shift;
361        if byte & 0x80 == 0 {
362            if offset == max_bytes - 1 && !signed_final_payload_fits(byte, bits, offset) {
363                return Err(malformed_decode_error(index + offset, offset + 1));
364            }
365            if byte & 0x40 != 0 && shift + 7 < i128::BITS {
366                value |= (!0i128) << (shift + 7);
367            }
368            let consumed = offset + 1;
369            if P::STRICT && !has_canonical_sleb_len(value, consumed) {
370                return Err(noncanonical_decode_error(index, consumed));
371            }
372            return Ok(Some((value, consumed)));
373        }
374        shift += 7;
375    }
376    if available < max_bytes {
377        return Ok(None);
378    }
379    Err(malformed_decode_error(index + max_bytes - 1, max_bytes))
380}
381
382/// Builds a malformed LEB128 error on the cold error path.
383///
384/// # Parameters
385///
386/// - `index`: Absolute byte index associated with the malformed payload.
387/// - `consumed`: Number of bytes that should be consumed before reporting the
388///   error.
389///
390/// # Returns
391///
392/// Returns the error and the byte count to consume.
393#[cold]
394#[inline(never)]
395fn malformed_decode_error(index: usize, consumed: usize) -> (Leb128DecodeError, usize) {
396    (
397        Leb128DecodeError::new(Leb128DecodeErrorKind::Malformed, index),
398        consumed,
399    )
400}
401
402/// Builds a non-canonical LEB128 error on the cold error path.
403///
404/// # Parameters
405///
406/// - `index`: Absolute byte index at which the non-canonical payload starts.
407/// - `consumed`: Number of bytes that should be consumed before reporting the
408///   error.
409///
410/// # Returns
411///
412/// Returns the error and the byte count to consume.
413#[cold]
414#[inline(never)]
415fn noncanonical_decode_error(index: usize, consumed: usize) -> (Leb128DecodeError, usize) {
416    (
417        Leb128DecodeError::new(Leb128DecodeErrorKind::NonCanonical, index),
418        consumed,
419    )
420}
421
422#[must_use]
423#[inline(always)]
424fn unsigned_final_payload_fits(byte: u8, bits: u32, offset: usize) -> bool {
425    let used_bits = bits - offset as u32 * 7;
426    byte >> used_bits == 0
427}
428
429#[must_use]
430#[inline(always)]
431fn signed_final_payload_fits(byte: u8, bits: u32, offset: usize) -> bool {
432    let used_bits = bits - offset as u32 * 7;
433    let payload = byte & 0x7F;
434    let used_mask = ((1u16 << used_bits) - 1) as u8;
435    let unused_mask = 0x7F & !used_mask;
436    let sign_bit = 1u8 << (used_bits - 1);
437    if payload & sign_bit == 0 {
438        payload & unused_mask == 0
439    } else {
440        payload & unused_mask == unused_mask
441    }
442}
443
444/// Checks whether an unsigned LEB128 value used its canonical encoded length.
445///
446/// # Parameters
447///
448/// - `value`: Decoded unsigned value.
449/// - `actual_len`: Number of bytes consumed from the input.
450///
451/// # Returns
452///
453/// Returns `true` if `actual_len` is the canonical encoded length of `value`.
454#[must_use]
455#[inline(always)]
456fn has_canonical_uleb_len(value: u128, actual_len: usize) -> bool {
457    canonical_uleb_len(value) == actual_len
458}
459
460/// Checks whether a signed LEB128 value used its canonical encoded length.
461///
462/// # Parameters
463///
464/// - `value`: Decoded signed value.
465/// - `actual_len`: Number of bytes consumed from the input.
466///
467/// # Returns
468///
469/// Returns `true` if `actual_len` is the canonical encoded length of `value`.
470#[must_use]
471#[inline(always)]
472fn has_canonical_sleb_len(value: i128, actual_len: usize) -> bool {
473    canonical_sleb_len(value) == actual_len
474}
475
476/// Computes the canonical unsigned LEB128 encoded length.
477///
478/// # Parameters
479///
480/// - `value`: Unsigned value to measure.
481///
482/// # Returns
483///
484/// Returns the number of bytes used by the canonical unsigned LEB128 encoding.
485#[must_use]
486#[inline(always)]
487fn canonical_uleb_len(mut value: u128) -> usize {
488    let mut len = 1;
489    while value >= 0x80 {
490        value >>= 7;
491        len += 1;
492    }
493    len
494}
495
496/// Computes the canonical signed LEB128 encoded length.
497///
498/// # Parameters
499///
500/// - `value`: Signed value to measure.
501///
502/// # Returns
503///
504/// Returns the number of bytes used by the canonical signed LEB128 encoding.
505#[must_use]
506#[inline(always)]
507fn canonical_sleb_len(mut value: i128) -> usize {
508    let mut len = 0;
509    loop {
510        let byte = (value & 0x7F) as u8;
511        let sign_bit_set = byte & 0x40 != 0;
512        value >>= 7;
513        len += 1;
514        if (value == 0 && !sign_bit_set) || (value == -1 && sign_bit_set) {
515            return len;
516        }
517    }
518}
519
520unsafe fn write_uleb_unchecked(output: &mut [u8], index: usize, mut value: u128) -> usize {
521    let mut offset = 0;
522    loop {
523        let mut byte = (value & 0x7F) as u8;
524        value >>= 7;
525        if value != 0 {
526            byte |= 0x80;
527        }
528        // SAFETY: The caller guarantees enough writable bytes for the encoded value.
529        unsafe {
530            *output.as_mut_ptr().add(index + offset) = byte;
531        }
532        offset += 1;
533        if value == 0 {
534            return offset;
535        }
536    }
537}
538
539unsafe fn write_sleb_unchecked(output: &mut [u8], index: usize, mut value: i128) -> usize {
540    let mut offset = 0;
541    loop {
542        let mut byte = (value & 0x7F) as u8;
543        let sign_bit_set = byte & 0x40 != 0;
544        value >>= 7;
545        let done = (value == 0 && !sign_bit_set) || (value == -1 && sign_bit_set);
546        if !done {
547            byte |= 0x80;
548        }
549        // SAFETY: The caller guarantees enough writable bytes for the encoded value.
550        unsafe {
551            *output.as_mut_ptr().add(index + offset) = byte;
552        }
553        offset += 1;
554        if done {
555            return offset;
556        }
557    }
558}