Skip to main content

qubit_io/ext/
leb128_read_ext.rs

1/*******************************************************************************
2 *
3 *    Copyright (c) 2026 Haixing Hu.
4 *
5 *    SPDX-License-Identifier: Apache-2.0
6 *
7 *    Licensed under the Apache License, Version 2.0.
8 *
9 ******************************************************************************/
10use std::io::{
11    Error,
12    ErrorKind,
13    Read,
14    Result,
15};
16
17use crate::BinaryReadExt;
18
19/// Extension methods for reading LEB128 encoded integers.
20///
21/// Unsigned methods read unsigned LEB128 values, and signed methods read signed
22/// LEB128 values. Both forms encode seven payload bits per byte in
23/// least-significant group first order, with the high bit marking
24/// continuation. The integer encoding is described by the WebAssembly Core
25/// binary format:
26/// <https://webassembly.github.io/spec/core/binary/values.html#integers>.
27///
28/// Methods with the `_strict` suffix also reject non-canonical encodings, such
29/// as values encoded with unnecessary continuation bytes.
30pub trait Leb128ReadExt: Read {
31    /// Reads an unsigned LEB128 `u8`.
32    ///
33    /// # Returns
34    /// The decoded value.
35    ///
36    /// # Errors
37    /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow,
38    /// or another I/O error from the underlying reader.
39    fn read_uleb_u8(&mut self) -> Result<u8>;
40
41    /// Reads a canonical unsigned LEB128 `u8`.
42    ///
43    /// # Returns
44    /// The decoded value.
45    ///
46    /// # Errors
47    /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow
48    /// or non-canonical encoding, or another I/O error from the underlying reader.
49    fn read_uleb_u8_strict(&mut self) -> Result<u8>;
50
51    /// Reads an unsigned LEB128 `u16`.
52    ///
53    /// # Returns
54    /// The decoded value.
55    ///
56    /// # Errors
57    /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow,
58    /// or another I/O error from the underlying reader.
59    fn read_uleb_u16(&mut self) -> Result<u16>;
60
61    /// Reads a canonical unsigned LEB128 `u16`.
62    ///
63    /// # Returns
64    /// The decoded value.
65    ///
66    /// # Errors
67    /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow
68    /// or non-canonical encoding, or another I/O error from the underlying reader.
69    fn read_uleb_u16_strict(&mut self) -> Result<u16>;
70
71    /// Reads an unsigned LEB128 `u32`.
72    ///
73    /// # Returns
74    /// The decoded value.
75    ///
76    /// # Errors
77    /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow,
78    /// or another I/O error from the underlying reader.
79    fn read_uleb_u32(&mut self) -> Result<u32>;
80
81    /// Reads a canonical unsigned LEB128 `u32`.
82    ///
83    /// # Returns
84    /// The decoded value.
85    ///
86    /// # Errors
87    /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow
88    /// or non-canonical encoding, or another I/O error from the underlying reader.
89    fn read_uleb_u32_strict(&mut self) -> Result<u32>;
90
91    /// Reads an unsigned LEB128 `u64`.
92    ///
93    /// # Returns
94    /// The decoded value.
95    ///
96    /// # Errors
97    /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow,
98    /// or another I/O error from the underlying reader.
99    fn read_uleb_u64(&mut self) -> Result<u64>;
100
101    /// Reads a canonical unsigned LEB128 `u64`.
102    ///
103    /// # Returns
104    /// The decoded value.
105    ///
106    /// # Errors
107    /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow
108    /// or non-canonical encoding, or another I/O error from the underlying reader.
109    fn read_uleb_u64_strict(&mut self) -> Result<u64>;
110
111    /// Reads an unsigned LEB128 `u128`.
112    ///
113    /// # Returns
114    /// The decoded value.
115    ///
116    /// # Errors
117    /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow,
118    /// or another I/O error from the underlying reader.
119    fn read_uleb_u128(&mut self) -> Result<u128>;
120
121    /// Reads a canonical unsigned LEB128 `u128`.
122    ///
123    /// # Returns
124    /// The decoded value.
125    ///
126    /// # Errors
127    /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow
128    /// or non-canonical encoding, or another I/O error from the underlying reader.
129    fn read_uleb_u128_strict(&mut self) -> Result<u128>;
130
131    /// Reads an unsigned LEB128 `usize`.
132    ///
133    /// # Returns
134    /// The decoded value.
135    ///
136    /// # Errors
137    /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow,
138    /// or another I/O error from the underlying reader.
139    fn read_uleb_usize(&mut self) -> Result<usize>;
140
141    /// Reads a canonical unsigned LEB128 `usize`.
142    ///
143    /// # Returns
144    /// The decoded value.
145    ///
146    /// # Errors
147    /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow
148    /// or non-canonical encoding, or another I/O error from the underlying reader.
149    fn read_uleb_usize_strict(&mut self) -> Result<usize>;
150
151    /// Reads a signed LEB128 `i8`.
152    ///
153    /// # Returns
154    /// The decoded value.
155    ///
156    /// # Errors
157    /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow,
158    /// or another I/O error from the underlying reader.
159    fn read_sleb_i8(&mut self) -> Result<i8>;
160
161    /// Reads a canonical signed LEB128 `i8`.
162    ///
163    /// # Returns
164    /// The decoded value.
165    ///
166    /// # Errors
167    /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow
168    /// or non-canonical encoding, or another I/O error from the underlying reader.
169    fn read_sleb_i8_strict(&mut self) -> Result<i8>;
170
171    /// Reads a signed LEB128 `i16`.
172    ///
173    /// # Returns
174    /// The decoded value.
175    ///
176    /// # Errors
177    /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow,
178    /// or another I/O error from the underlying reader.
179    fn read_sleb_i16(&mut self) -> Result<i16>;
180
181    /// Reads a canonical signed LEB128 `i16`.
182    ///
183    /// # Returns
184    /// The decoded value.
185    ///
186    /// # Errors
187    /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow
188    /// or non-canonical encoding, or another I/O error from the underlying reader.
189    fn read_sleb_i16_strict(&mut self) -> Result<i16>;
190
191    /// Reads a signed LEB128 `i32`.
192    ///
193    /// # Returns
194    /// The decoded value.
195    ///
196    /// # Errors
197    /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow,
198    /// or another I/O error from the underlying reader.
199    fn read_sleb_i32(&mut self) -> Result<i32>;
200
201    /// Reads a canonical signed LEB128 `i32`.
202    ///
203    /// # Returns
204    /// The decoded value.
205    ///
206    /// # Errors
207    /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow
208    /// or non-canonical encoding, or another I/O error from the underlying reader.
209    fn read_sleb_i32_strict(&mut self) -> Result<i32>;
210
211    /// Reads a signed LEB128 `i64`.
212    ///
213    /// # Returns
214    /// The decoded value.
215    ///
216    /// # Errors
217    /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow,
218    /// or another I/O error from the underlying reader.
219    fn read_sleb_i64(&mut self) -> Result<i64>;
220
221    /// Reads a canonical signed LEB128 `i64`.
222    ///
223    /// # Returns
224    /// The decoded value.
225    ///
226    /// # Errors
227    /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow
228    /// or non-canonical encoding, or another I/O error from the underlying reader.
229    fn read_sleb_i64_strict(&mut self) -> Result<i64>;
230
231    /// Reads a signed LEB128 `i128`.
232    ///
233    /// # Returns
234    /// The decoded value.
235    ///
236    /// # Errors
237    /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow,
238    /// or another I/O error from the underlying reader.
239    fn read_sleb_i128(&mut self) -> Result<i128>;
240
241    /// Reads a canonical signed LEB128 `i128`.
242    ///
243    /// # Returns
244    /// The decoded value.
245    ///
246    /// # Errors
247    /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow
248    /// or non-canonical encoding, or another I/O error from the underlying reader.
249    fn read_sleb_i128_strict(&mut self) -> Result<i128>;
250
251    /// Reads a signed LEB128 `isize`.
252    ///
253    /// # Returns
254    /// The decoded value.
255    ///
256    /// # Errors
257    /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow,
258    /// or another I/O error from the underlying reader.
259    fn read_sleb_isize(&mut self) -> Result<isize>;
260
261    /// Reads a canonical signed LEB128 `isize`.
262    ///
263    /// # Returns
264    /// The decoded value.
265    ///
266    /// # Errors
267    /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow
268    /// or non-canonical encoding, or another I/O error from the underlying reader.
269    fn read_sleb_isize_strict(&mut self) -> Result<isize>;
270}
271
272impl<T> Leb128ReadExt for T
273where
274    T: Read + ?Sized,
275{
276    #[inline]
277    fn read_uleb_u8(&mut self) -> Result<u8> {
278        read_uleb(self, u8::BITS, "u8", false).map(u128_to_u8)
279    }
280
281    #[inline]
282    fn read_uleb_u8_strict(&mut self) -> Result<u8> {
283        read_uleb(self, u8::BITS, "u8", true).map(u128_to_u8)
284    }
285
286    #[inline]
287    fn read_uleb_u16(&mut self) -> Result<u16> {
288        read_uleb(self, u16::BITS, "u16", false).map(u128_to_u16)
289    }
290
291    #[inline]
292    fn read_uleb_u16_strict(&mut self) -> Result<u16> {
293        read_uleb(self, u16::BITS, "u16", true).map(u128_to_u16)
294    }
295
296    #[inline]
297    fn read_uleb_u32(&mut self) -> Result<u32> {
298        read_uleb(self, u32::BITS, "u32", false).map(u128_to_u32)
299    }
300
301    #[inline]
302    fn read_uleb_u32_strict(&mut self) -> Result<u32> {
303        read_uleb(self, u32::BITS, "u32", true).map(u128_to_u32)
304    }
305
306    #[inline]
307    fn read_uleb_u64(&mut self) -> Result<u64> {
308        read_uleb(self, u64::BITS, "u64", false).map(u128_to_u64)
309    }
310
311    #[inline]
312    fn read_uleb_u64_strict(&mut self) -> Result<u64> {
313        read_uleb(self, u64::BITS, "u64", true).map(u128_to_u64)
314    }
315
316    #[inline]
317    fn read_uleb_u128(&mut self) -> Result<u128> {
318        read_uleb(self, u128::BITS, "u128", false)
319    }
320
321    #[inline]
322    fn read_uleb_u128_strict(&mut self) -> Result<u128> {
323        read_uleb(self, u128::BITS, "u128", true)
324    }
325
326    #[inline]
327    fn read_uleb_usize(&mut self) -> Result<usize> {
328        read_uleb(self, usize::BITS, "usize", false).map(u128_to_usize)
329    }
330
331    #[inline]
332    fn read_uleb_usize_strict(&mut self) -> Result<usize> {
333        read_uleb(self, usize::BITS, "usize", true).map(u128_to_usize)
334    }
335
336    #[inline]
337    fn read_sleb_i8(&mut self) -> Result<i8> {
338        read_sleb(self, i8::BITS, "i8", false).map(i128_to_i8)
339    }
340
341    #[inline]
342    fn read_sleb_i8_strict(&mut self) -> Result<i8> {
343        read_sleb(self, i8::BITS, "i8", true).map(i128_to_i8)
344    }
345
346    #[inline]
347    fn read_sleb_i16(&mut self) -> Result<i16> {
348        read_sleb(self, i16::BITS, "i16", false).map(i128_to_i16)
349    }
350
351    #[inline]
352    fn read_sleb_i16_strict(&mut self) -> Result<i16> {
353        read_sleb(self, i16::BITS, "i16", true).map(i128_to_i16)
354    }
355
356    #[inline]
357    fn read_sleb_i32(&mut self) -> Result<i32> {
358        read_sleb(self, i32::BITS, "i32", false).map(i128_to_i32)
359    }
360
361    #[inline]
362    fn read_sleb_i32_strict(&mut self) -> Result<i32> {
363        read_sleb(self, i32::BITS, "i32", true).map(i128_to_i32)
364    }
365
366    #[inline]
367    fn read_sleb_i64(&mut self) -> Result<i64> {
368        read_sleb(self, i64::BITS, "i64", false).map(i128_to_i64)
369    }
370
371    #[inline]
372    fn read_sleb_i64_strict(&mut self) -> Result<i64> {
373        read_sleb(self, i64::BITS, "i64", true).map(i128_to_i64)
374    }
375
376    #[inline]
377    fn read_sleb_i128(&mut self) -> Result<i128> {
378        read_sleb(self, i128::BITS, "i128", false)
379    }
380
381    #[inline]
382    fn read_sleb_i128_strict(&mut self) -> Result<i128> {
383        read_sleb(self, i128::BITS, "i128", true)
384    }
385
386    #[inline]
387    fn read_sleb_isize(&mut self) -> Result<isize> {
388        read_sleb(self, isize::BITS, "isize", false).map(i128_to_isize)
389    }
390
391    #[inline]
392    fn read_sleb_isize_strict(&mut self) -> Result<isize> {
393        read_sleb(self, isize::BITS, "isize", true).map(i128_to_isize)
394    }
395}
396
397#[inline]
398fn u128_to_u8(value: u128) -> u8 {
399    value as u8
400}
401
402#[inline]
403fn u128_to_u16(value: u128) -> u16 {
404    value as u16
405}
406
407#[inline]
408fn u128_to_u32(value: u128) -> u32 {
409    value as u32
410}
411
412#[inline]
413fn u128_to_u64(value: u128) -> u64 {
414    value as u64
415}
416
417#[inline]
418fn u128_to_usize(value: u128) -> usize {
419    value as usize
420}
421
422#[inline]
423fn i128_to_i8(value: i128) -> i8 {
424    value as i8
425}
426
427#[inline]
428fn i128_to_i16(value: i128) -> i16 {
429    value as i16
430}
431
432#[inline]
433fn i128_to_i32(value: i128) -> i32 {
434    value as i32
435}
436
437#[inline]
438fn i128_to_i64(value: i128) -> i64 {
439    value as i64
440}
441
442#[inline]
443fn i128_to_isize(value: i128) -> isize {
444    value as isize
445}
446
447/// Decoded unsigned LEB128 value and its raw bytes.
448struct DecodedUleb {
449    value: u128,
450    bytes: Vec<u8>,
451}
452
453/// Decoded signed LEB128 value and its raw bytes.
454struct DecodedSleb {
455    value: i128,
456    bytes: Vec<u8>,
457}
458
459/// Reads an unsigned LEB128 integer constrained to `bits`.
460///
461/// # Parameters
462/// - `reader`: Source reader. It may be a sized reader or a reader trait
463///   object.
464/// - `bits`: Target integer width in bits.
465/// - `type_name`: Target type name used in error messages.
466/// - `strict`: Whether to reject non-canonical encodings.
467///
468/// # Returns
469/// Decoded value as `u128`.
470///
471/// # Errors
472/// Returns `UnexpectedEof` for truncated input, `InvalidData` for malformed,
473/// overflowing, or non-canonical input, or another I/O error from `reader`.
474fn read_uleb<T>(reader: &mut T, bits: u32, type_name: &'static str, strict: bool) -> Result<u128>
475where
476    T: Read + ?Sized,
477{
478    let decoded = read_uleb_with_bytes(reader, bits, type_name)?;
479    if strict && !is_canonical_uleb(decoded.value, &decoded.bytes) {
480        return Err(noncanonical_leb128(type_name));
481    }
482    Ok(decoded.value)
483}
484
485/// Reads an unsigned LEB128 integer and keeps its raw bytes.
486///
487/// # Parameters
488/// - `reader`: Source reader.
489/// - `bits`: Target integer width in bits.
490/// - `type_name`: Target type name used in error messages.
491///
492/// # Returns
493/// Decoded unsigned value and raw bytes.
494///
495/// # Errors
496/// Returns an I/O error, truncated input error, or malformed data error.
497fn read_uleb_with_bytes<T>(
498    reader: &mut T,
499    bits: u32,
500    type_name: &'static str,
501) -> Result<DecodedUleb>
502where
503    T: Read + ?Sized,
504{
505    let max_bytes = bits.div_ceil(7);
506    let final_payload_bits = bits - (max_bytes - 1) * 7;
507    let max_last_payload = ((1u16 << final_payload_bits) - 1) as u8;
508
509    let mut value = 0u128;
510    let mut bytes = Vec::with_capacity(max_bytes as usize);
511    for index in 0..max_bytes {
512        let byte = reader.read_u8()?;
513        let payload = byte & 0x7f;
514        let is_too_wide_final_byte = (index == max_bytes - 1) && (payload > max_last_payload);
515        if is_too_wide_final_byte {
516            return Err(invalid_leb128(type_name));
517        }
518        value |= (payload as u128) << (index * 7);
519        bytes.push(byte);
520        if byte & 0x80 == 0 {
521            return Ok(DecodedUleb { value, bytes });
522        }
523    }
524    Err(invalid_leb128(type_name))
525}
526
527/// Reads a signed LEB128 integer constrained to `bits`.
528///
529/// # Parameters
530/// - `reader`: Source reader. It may be a sized reader or a reader trait
531///   object.
532/// - `bits`: Target integer width in bits.
533/// - `type_name`: Target type name used in error messages.
534/// - `strict`: Whether to reject non-canonical encodings.
535///
536/// # Returns
537/// Decoded value as `i128`.
538///
539/// # Errors
540/// Returns `UnexpectedEof` for truncated input, `InvalidData` for malformed,
541/// overflowing, or non-canonical input, or another I/O error from `reader`.
542fn read_sleb<T>(reader: &mut T, bits: u32, type_name: &'static str, strict: bool) -> Result<i128>
543where
544    T: Read + ?Sized,
545{
546    let decoded = read_sleb_with_bytes(reader, bits, type_name)?;
547    if strict && !is_canonical_sleb(decoded.value, &decoded.bytes) {
548        return Err(noncanonical_leb128(type_name));
549    }
550    Ok(decoded.value)
551}
552
553/// Reads a signed LEB128 integer and keeps its raw bytes.
554///
555/// # Parameters
556/// - `reader`: Source reader.
557/// - `bits`: Target integer width in bits.
558/// - `type_name`: Target type name used in error messages.
559///
560/// # Returns
561/// Decoded signed value and raw bytes.
562///
563/// # Errors
564/// Returns an I/O error, truncated input error, or malformed data error.
565fn read_sleb_with_bytes<T>(
566    reader: &mut T,
567    bits: u32,
568    type_name: &'static str,
569) -> Result<DecodedSleb>
570where
571    T: Read + ?Sized,
572{
573    let max_bytes = bits.div_ceil(7);
574    let mut value = 0i128;
575    let mut shift = 0u32;
576    let mut bytes = Vec::with_capacity(max_bytes as usize);
577    for index in 0..max_bytes {
578        let byte = reader.read_u8()?;
579        let payload = byte & 0x7f;
580        if is_too_wide_signed_final_payload(payload, index, bits) {
581            return Err(invalid_leb128(type_name));
582        }
583
584        value |= (payload as i128) << shift;
585        shift += 7;
586        bytes.push(byte);
587        if byte & 0x80 == 0 {
588            if shift < i128::BITS && byte & 0x40 != 0 {
589                value |= (!0i128) << shift;
590            }
591            return Ok(DecodedSleb { value, bytes });
592        }
593    }
594    Err(invalid_leb128(type_name))
595}
596
597/// Returns whether the final signed payload byte exceeds the target width.
598///
599/// # Parameters
600/// - `payload`: Seven-bit payload from the current byte.
601/// - `index`: Zero-based byte index.
602/// - `bits`: Target signed integer width in bits.
603///
604/// # Returns
605/// `true` when this final payload cannot represent a valid value of the target
606/// width.
607fn is_too_wide_signed_final_payload(payload: u8, index: u32, bits: u32) -> bool {
608    let max_bytes = bits.div_ceil(7);
609    if index != max_bytes - 1 {
610        return false;
611    }
612
613    let used_bits = bits - index * 7;
614    let sign_mask = 1u8 << (used_bits - 1);
615    let used_mask = (1u8 << used_bits) - 1;
616    let unused_mask = 0x7f_u8 & !used_mask;
617    let unused_bits = payload & unused_mask;
618    if payload & sign_mask == 0 {
619        unused_bits != 0
620    } else {
621        unused_bits != unused_mask
622    }
623}
624
625/// Checks whether `bytes` are the canonical unsigned LEB128 encoding.
626///
627/// # Parameters
628/// - `value`: Decoded value.
629/// - `bytes`: Raw bytes read from the stream.
630///
631/// # Returns
632/// `true` when re-encoding `value` produces the same bytes.
633#[inline]
634fn is_canonical_uleb(value: u128, bytes: &[u8]) -> bool {
635    let mut expected = Vec::new();
636    encode_uleb(value, &mut expected);
637    expected == bytes
638}
639
640/// Checks whether `bytes` are the canonical signed LEB128 encoding.
641///
642/// # Parameters
643/// - `value`: Decoded value.
644/// - `bytes`: Raw bytes read from the stream.
645///
646/// # Returns
647/// `true` when re-encoding `value` produces the same bytes.
648fn is_canonical_sleb(value: i128, bytes: &[u8]) -> bool {
649    let mut expected = Vec::new();
650    encode_sleb(value, &mut expected);
651    expected == bytes
652}
653
654/// Encodes an unsigned LEB128 value into `output`.
655///
656/// # Parameters
657/// - `value`: Value to encode.
658/// - `output`: Destination buffer.
659fn encode_uleb(mut value: u128, output: &mut Vec<u8>) {
660    while value > 0x7f {
661        output.push(((value as u8) & 0x7f) | 0x80);
662        value >>= 7;
663    }
664    output.push(value as u8);
665}
666
667/// Encodes a signed LEB128 value into `output`.
668///
669/// # Parameters
670/// - `value`: Value to encode.
671/// - `output`: Destination buffer.
672fn encode_sleb(value: i128, output: &mut Vec<u8>) {
673    let mut remaining = value;
674    loop {
675        let byte = (remaining as u8) & 0x7f;
676        remaining >>= 7;
677        let is_done = (remaining == 0 && byte & 0x40 == 0) || (remaining == -1 && byte & 0x40 != 0);
678        if is_done {
679            output.push(byte);
680            return;
681        }
682        output.push(byte | 0x80);
683    }
684}
685
686/// Builds an invalid-data error for malformed LEB128 integers.
687///
688/// # Parameters
689/// - `type_name`: Target type name.
690///
691/// # Returns
692/// An [`ErrorKind::InvalidData`] error.
693#[inline]
694fn invalid_leb128(type_name: &'static str) -> Error {
695    Error::new(
696        ErrorKind::InvalidData,
697        format!("malformed LEB128 integer for {type_name}"),
698    )
699}
700
701/// Builds an invalid-data error for non-canonical LEB128 integers.
702///
703/// # Parameters
704/// - `type_name`: Target type name.
705///
706/// # Returns
707/// An [`ErrorKind::InvalidData`] error.
708#[inline]
709fn noncanonical_leb128(type_name: &'static str) -> Error {
710    Error::new(
711        ErrorKind::InvalidData,
712        format!("non-canonical LEB128 integer for {type_name}"),
713    )
714}