qubit_io/ext/leb128_read_ext.rs
1/*******************************************************************************
2 *
3 * Copyright (c) 2026 Haixing Hu.
4 *
5 * SPDX-License-Identifier: Apache-2.0
6 *
7 * Licensed under the Apache License, Version 2.0.
8 *
9 ******************************************************************************/
10use std::io::{
11 Error,
12 ErrorKind,
13 Read,
14 Result,
15};
16
17use crate::BinaryReadExt;
18
19/// Extension methods for reading LEB128 encoded integers.
20///
21/// Unsigned methods read unsigned LEB128 values, and signed methods read signed
22/// LEB128 values. Both forms encode seven payload bits per byte in
23/// least-significant group first order, with the high bit marking
24/// continuation. The integer encoding is described by the WebAssembly Core
25/// binary format:
26/// <https://webassembly.github.io/spec/core/binary/values.html#integers>.
27///
28/// Methods with the `_strict` suffix also reject non-canonical encodings, such
29/// as values encoded with unnecessary continuation bytes.
30pub trait Leb128ReadExt: Read {
31 /// Reads an unsigned LEB128 `u8`.
32 ///
33 /// # Returns
34 /// The decoded value.
35 ///
36 /// # Errors
37 /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow,
38 /// or another I/O error from the underlying reader.
39 fn read_uleb_u8(&mut self) -> Result<u8>;
40
41 /// Reads a canonical unsigned LEB128 `u8`.
42 ///
43 /// # Returns
44 /// The decoded value.
45 ///
46 /// # Errors
47 /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow
48 /// or non-canonical encoding, or another I/O error from the underlying reader.
49 fn read_uleb_u8_strict(&mut self) -> Result<u8>;
50
51 /// Reads an unsigned LEB128 `u16`.
52 ///
53 /// # Returns
54 /// The decoded value.
55 ///
56 /// # Errors
57 /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow,
58 /// or another I/O error from the underlying reader.
59 fn read_uleb_u16(&mut self) -> Result<u16>;
60
61 /// Reads a canonical unsigned LEB128 `u16`.
62 ///
63 /// # Returns
64 /// The decoded value.
65 ///
66 /// # Errors
67 /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow
68 /// or non-canonical encoding, or another I/O error from the underlying reader.
69 fn read_uleb_u16_strict(&mut self) -> Result<u16>;
70
71 /// Reads an unsigned LEB128 `u32`.
72 ///
73 /// # Returns
74 /// The decoded value.
75 ///
76 /// # Errors
77 /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow,
78 /// or another I/O error from the underlying reader.
79 fn read_uleb_u32(&mut self) -> Result<u32>;
80
81 /// Reads a canonical unsigned LEB128 `u32`.
82 ///
83 /// # Returns
84 /// The decoded value.
85 ///
86 /// # Errors
87 /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow
88 /// or non-canonical encoding, or another I/O error from the underlying reader.
89 fn read_uleb_u32_strict(&mut self) -> Result<u32>;
90
91 /// Reads an unsigned LEB128 `u64`.
92 ///
93 /// # Returns
94 /// The decoded value.
95 ///
96 /// # Errors
97 /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow,
98 /// or another I/O error from the underlying reader.
99 fn read_uleb_u64(&mut self) -> Result<u64>;
100
101 /// Reads a canonical unsigned LEB128 `u64`.
102 ///
103 /// # Returns
104 /// The decoded value.
105 ///
106 /// # Errors
107 /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow
108 /// or non-canonical encoding, or another I/O error from the underlying reader.
109 fn read_uleb_u64_strict(&mut self) -> Result<u64>;
110
111 /// Reads an unsigned LEB128 `u128`.
112 ///
113 /// # Returns
114 /// The decoded value.
115 ///
116 /// # Errors
117 /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow,
118 /// or another I/O error from the underlying reader.
119 fn read_uleb_u128(&mut self) -> Result<u128>;
120
121 /// Reads a canonical unsigned LEB128 `u128`.
122 ///
123 /// # Returns
124 /// The decoded value.
125 ///
126 /// # Errors
127 /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow
128 /// or non-canonical encoding, or another I/O error from the underlying reader.
129 fn read_uleb_u128_strict(&mut self) -> Result<u128>;
130
131 /// Reads an unsigned LEB128 `usize`.
132 ///
133 /// # Returns
134 /// The decoded value.
135 ///
136 /// # Errors
137 /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow,
138 /// or another I/O error from the underlying reader.
139 fn read_uleb_usize(&mut self) -> Result<usize>;
140
141 /// Reads a canonical unsigned LEB128 `usize`.
142 ///
143 /// # Returns
144 /// The decoded value.
145 ///
146 /// # Errors
147 /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow
148 /// or non-canonical encoding, or another I/O error from the underlying reader.
149 fn read_uleb_usize_strict(&mut self) -> Result<usize>;
150
151 /// Reads a signed LEB128 `i8`.
152 ///
153 /// # Returns
154 /// The decoded value.
155 ///
156 /// # Errors
157 /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow,
158 /// or another I/O error from the underlying reader.
159 fn read_sleb_i8(&mut self) -> Result<i8>;
160
161 /// Reads a canonical signed LEB128 `i8`.
162 ///
163 /// # Returns
164 /// The decoded value.
165 ///
166 /// # Errors
167 /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow
168 /// or non-canonical encoding, or another I/O error from the underlying reader.
169 fn read_sleb_i8_strict(&mut self) -> Result<i8>;
170
171 /// Reads a signed LEB128 `i16`.
172 ///
173 /// # Returns
174 /// The decoded value.
175 ///
176 /// # Errors
177 /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow,
178 /// or another I/O error from the underlying reader.
179 fn read_sleb_i16(&mut self) -> Result<i16>;
180
181 /// Reads a canonical signed LEB128 `i16`.
182 ///
183 /// # Returns
184 /// The decoded value.
185 ///
186 /// # Errors
187 /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow
188 /// or non-canonical encoding, or another I/O error from the underlying reader.
189 fn read_sleb_i16_strict(&mut self) -> Result<i16>;
190
191 /// Reads a signed LEB128 `i32`.
192 ///
193 /// # Returns
194 /// The decoded value.
195 ///
196 /// # Errors
197 /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow,
198 /// or another I/O error from the underlying reader.
199 fn read_sleb_i32(&mut self) -> Result<i32>;
200
201 /// Reads a canonical signed LEB128 `i32`.
202 ///
203 /// # Returns
204 /// The decoded value.
205 ///
206 /// # Errors
207 /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow
208 /// or non-canonical encoding, or another I/O error from the underlying reader.
209 fn read_sleb_i32_strict(&mut self) -> Result<i32>;
210
211 /// Reads a signed LEB128 `i64`.
212 ///
213 /// # Returns
214 /// The decoded value.
215 ///
216 /// # Errors
217 /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow,
218 /// or another I/O error from the underlying reader.
219 fn read_sleb_i64(&mut self) -> Result<i64>;
220
221 /// Reads a canonical signed LEB128 `i64`.
222 ///
223 /// # Returns
224 /// The decoded value.
225 ///
226 /// # Errors
227 /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow
228 /// or non-canonical encoding, or another I/O error from the underlying reader.
229 fn read_sleb_i64_strict(&mut self) -> Result<i64>;
230
231 /// Reads a signed LEB128 `i128`.
232 ///
233 /// # Returns
234 /// The decoded value.
235 ///
236 /// # Errors
237 /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow,
238 /// or another I/O error from the underlying reader.
239 fn read_sleb_i128(&mut self) -> Result<i128>;
240
241 /// Reads a canonical signed LEB128 `i128`.
242 ///
243 /// # Returns
244 /// The decoded value.
245 ///
246 /// # Errors
247 /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow
248 /// or non-canonical encoding, or another I/O error from the underlying reader.
249 fn read_sleb_i128_strict(&mut self) -> Result<i128>;
250
251 /// Reads a signed LEB128 `isize`.
252 ///
253 /// # Returns
254 /// The decoded value.
255 ///
256 /// # Errors
257 /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow,
258 /// or another I/O error from the underlying reader.
259 fn read_sleb_isize(&mut self) -> Result<isize>;
260
261 /// Reads a canonical signed LEB128 `isize`.
262 ///
263 /// # Returns
264 /// The decoded value.
265 ///
266 /// # Errors
267 /// Returns `UnexpectedEof` for truncated input, `InvalidData` for overflow
268 /// or non-canonical encoding, or another I/O error from the underlying reader.
269 fn read_sleb_isize_strict(&mut self) -> Result<isize>;
270}
271
272impl<T> Leb128ReadExt for T
273where
274 T: Read + ?Sized,
275{
276 #[inline]
277 fn read_uleb_u8(&mut self) -> Result<u8> {
278 read_uleb(self, u8::BITS, "u8", false).map(u128_to_u8)
279 }
280
281 #[inline]
282 fn read_uleb_u8_strict(&mut self) -> Result<u8> {
283 read_uleb(self, u8::BITS, "u8", true).map(u128_to_u8)
284 }
285
286 #[inline]
287 fn read_uleb_u16(&mut self) -> Result<u16> {
288 read_uleb(self, u16::BITS, "u16", false).map(u128_to_u16)
289 }
290
291 #[inline]
292 fn read_uleb_u16_strict(&mut self) -> Result<u16> {
293 read_uleb(self, u16::BITS, "u16", true).map(u128_to_u16)
294 }
295
296 #[inline]
297 fn read_uleb_u32(&mut self) -> Result<u32> {
298 read_uleb(self, u32::BITS, "u32", false).map(u128_to_u32)
299 }
300
301 #[inline]
302 fn read_uleb_u32_strict(&mut self) -> Result<u32> {
303 read_uleb(self, u32::BITS, "u32", true).map(u128_to_u32)
304 }
305
306 #[inline]
307 fn read_uleb_u64(&mut self) -> Result<u64> {
308 read_uleb(self, u64::BITS, "u64", false).map(u128_to_u64)
309 }
310
311 #[inline]
312 fn read_uleb_u64_strict(&mut self) -> Result<u64> {
313 read_uleb(self, u64::BITS, "u64", true).map(u128_to_u64)
314 }
315
316 #[inline]
317 fn read_uleb_u128(&mut self) -> Result<u128> {
318 read_uleb(self, u128::BITS, "u128", false)
319 }
320
321 #[inline]
322 fn read_uleb_u128_strict(&mut self) -> Result<u128> {
323 read_uleb(self, u128::BITS, "u128", true)
324 }
325
326 #[inline]
327 fn read_uleb_usize(&mut self) -> Result<usize> {
328 read_uleb(self, usize::BITS, "usize", false).map(u128_to_usize)
329 }
330
331 #[inline]
332 fn read_uleb_usize_strict(&mut self) -> Result<usize> {
333 read_uleb(self, usize::BITS, "usize", true).map(u128_to_usize)
334 }
335
336 #[inline]
337 fn read_sleb_i8(&mut self) -> Result<i8> {
338 read_sleb(self, i8::BITS, "i8", false).map(i128_to_i8)
339 }
340
341 #[inline]
342 fn read_sleb_i8_strict(&mut self) -> Result<i8> {
343 read_sleb(self, i8::BITS, "i8", true).map(i128_to_i8)
344 }
345
346 #[inline]
347 fn read_sleb_i16(&mut self) -> Result<i16> {
348 read_sleb(self, i16::BITS, "i16", false).map(i128_to_i16)
349 }
350
351 #[inline]
352 fn read_sleb_i16_strict(&mut self) -> Result<i16> {
353 read_sleb(self, i16::BITS, "i16", true).map(i128_to_i16)
354 }
355
356 #[inline]
357 fn read_sleb_i32(&mut self) -> Result<i32> {
358 read_sleb(self, i32::BITS, "i32", false).map(i128_to_i32)
359 }
360
361 #[inline]
362 fn read_sleb_i32_strict(&mut self) -> Result<i32> {
363 read_sleb(self, i32::BITS, "i32", true).map(i128_to_i32)
364 }
365
366 #[inline]
367 fn read_sleb_i64(&mut self) -> Result<i64> {
368 read_sleb(self, i64::BITS, "i64", false).map(i128_to_i64)
369 }
370
371 #[inline]
372 fn read_sleb_i64_strict(&mut self) -> Result<i64> {
373 read_sleb(self, i64::BITS, "i64", true).map(i128_to_i64)
374 }
375
376 #[inline]
377 fn read_sleb_i128(&mut self) -> Result<i128> {
378 read_sleb(self, i128::BITS, "i128", false)
379 }
380
381 #[inline]
382 fn read_sleb_i128_strict(&mut self) -> Result<i128> {
383 read_sleb(self, i128::BITS, "i128", true)
384 }
385
386 #[inline]
387 fn read_sleb_isize(&mut self) -> Result<isize> {
388 read_sleb(self, isize::BITS, "isize", false).map(i128_to_isize)
389 }
390
391 #[inline]
392 fn read_sleb_isize_strict(&mut self) -> Result<isize> {
393 read_sleb(self, isize::BITS, "isize", true).map(i128_to_isize)
394 }
395}
396
397#[inline]
398fn u128_to_u8(value: u128) -> u8 {
399 value as u8
400}
401
402#[inline]
403fn u128_to_u16(value: u128) -> u16 {
404 value as u16
405}
406
407#[inline]
408fn u128_to_u32(value: u128) -> u32 {
409 value as u32
410}
411
412#[inline]
413fn u128_to_u64(value: u128) -> u64 {
414 value as u64
415}
416
417#[inline]
418fn u128_to_usize(value: u128) -> usize {
419 value as usize
420}
421
422#[inline]
423fn i128_to_i8(value: i128) -> i8 {
424 value as i8
425}
426
427#[inline]
428fn i128_to_i16(value: i128) -> i16 {
429 value as i16
430}
431
432#[inline]
433fn i128_to_i32(value: i128) -> i32 {
434 value as i32
435}
436
437#[inline]
438fn i128_to_i64(value: i128) -> i64 {
439 value as i64
440}
441
442#[inline]
443fn i128_to_isize(value: i128) -> isize {
444 value as isize
445}
446
447/// Decoded unsigned LEB128 value and its raw bytes.
448struct DecodedUleb {
449 value: u128,
450 bytes: Vec<u8>,
451}
452
453/// Decoded signed LEB128 value and its raw bytes.
454struct DecodedSleb {
455 value: i128,
456 bytes: Vec<u8>,
457}
458
459/// Reads an unsigned LEB128 integer constrained to `bits`.
460///
461/// # Parameters
462/// - `reader`: Source reader. It may be a sized reader or a reader trait
463/// object.
464/// - `bits`: Target integer width in bits.
465/// - `type_name`: Target type name used in error messages.
466/// - `strict`: Whether to reject non-canonical encodings.
467///
468/// # Returns
469/// Decoded value as `u128`.
470///
471/// # Errors
472/// Returns `UnexpectedEof` for truncated input, `InvalidData` for malformed,
473/// overflowing, or non-canonical input, or another I/O error from `reader`.
474fn read_uleb<T>(reader: &mut T, bits: u32, type_name: &'static str, strict: bool) -> Result<u128>
475where
476 T: Read + ?Sized,
477{
478 let decoded = read_uleb_with_bytes(reader, bits, type_name)?;
479 if strict && !is_canonical_uleb(decoded.value, &decoded.bytes) {
480 return Err(noncanonical_leb128(type_name));
481 }
482 Ok(decoded.value)
483}
484
485/// Reads an unsigned LEB128 integer and keeps its raw bytes.
486///
487/// # Parameters
488/// - `reader`: Source reader.
489/// - `bits`: Target integer width in bits.
490/// - `type_name`: Target type name used in error messages.
491///
492/// # Returns
493/// Decoded unsigned value and raw bytes.
494///
495/// # Errors
496/// Returns an I/O error, truncated input error, or malformed data error.
497fn read_uleb_with_bytes<T>(
498 reader: &mut T,
499 bits: u32,
500 type_name: &'static str,
501) -> Result<DecodedUleb>
502where
503 T: Read + ?Sized,
504{
505 let max_bytes = bits.div_ceil(7);
506 let final_payload_bits = bits - (max_bytes - 1) * 7;
507 let max_last_payload = ((1u16 << final_payload_bits) - 1) as u8;
508
509 let mut value = 0u128;
510 let mut bytes = Vec::with_capacity(max_bytes as usize);
511 for index in 0..max_bytes {
512 let byte = reader.read_u8()?;
513 let payload = byte & 0x7f;
514 let is_too_wide_final_byte = (index == max_bytes - 1) && (payload > max_last_payload);
515 if is_too_wide_final_byte {
516 return Err(invalid_leb128(type_name));
517 }
518 value |= (payload as u128) << (index * 7);
519 bytes.push(byte);
520 if byte & 0x80 == 0 {
521 return Ok(DecodedUleb { value, bytes });
522 }
523 }
524 Err(invalid_leb128(type_name))
525}
526
527/// Reads a signed LEB128 integer constrained to `bits`.
528///
529/// # Parameters
530/// - `reader`: Source reader. It may be a sized reader or a reader trait
531/// object.
532/// - `bits`: Target integer width in bits.
533/// - `type_name`: Target type name used in error messages.
534/// - `strict`: Whether to reject non-canonical encodings.
535///
536/// # Returns
537/// Decoded value as `i128`.
538///
539/// # Errors
540/// Returns `UnexpectedEof` for truncated input, `InvalidData` for malformed,
541/// overflowing, or non-canonical input, or another I/O error from `reader`.
542fn read_sleb<T>(reader: &mut T, bits: u32, type_name: &'static str, strict: bool) -> Result<i128>
543where
544 T: Read + ?Sized,
545{
546 let decoded = read_sleb_with_bytes(reader, bits, type_name)?;
547 if strict && !is_canonical_sleb(decoded.value, &decoded.bytes) {
548 return Err(noncanonical_leb128(type_name));
549 }
550 Ok(decoded.value)
551}
552
553/// Reads a signed LEB128 integer and keeps its raw bytes.
554///
555/// # Parameters
556/// - `reader`: Source reader.
557/// - `bits`: Target integer width in bits.
558/// - `type_name`: Target type name used in error messages.
559///
560/// # Returns
561/// Decoded signed value and raw bytes.
562///
563/// # Errors
564/// Returns an I/O error, truncated input error, or malformed data error.
565fn read_sleb_with_bytes<T>(
566 reader: &mut T,
567 bits: u32,
568 type_name: &'static str,
569) -> Result<DecodedSleb>
570where
571 T: Read + ?Sized,
572{
573 let max_bytes = bits.div_ceil(7);
574 let mut value = 0i128;
575 let mut shift = 0u32;
576 let mut bytes = Vec::with_capacity(max_bytes as usize);
577 for index in 0..max_bytes {
578 let byte = reader.read_u8()?;
579 let payload = byte & 0x7f;
580 if is_too_wide_signed_final_payload(payload, index, bits) {
581 return Err(invalid_leb128(type_name));
582 }
583
584 value |= (payload as i128) << shift;
585 shift += 7;
586 bytes.push(byte);
587 if byte & 0x80 == 0 {
588 if shift < i128::BITS && byte & 0x40 != 0 {
589 value |= (!0i128) << shift;
590 }
591 return Ok(DecodedSleb { value, bytes });
592 }
593 }
594 Err(invalid_leb128(type_name))
595}
596
597/// Returns whether the final signed payload byte exceeds the target width.
598///
599/// # Parameters
600/// - `payload`: Seven-bit payload from the current byte.
601/// - `index`: Zero-based byte index.
602/// - `bits`: Target signed integer width in bits.
603///
604/// # Returns
605/// `true` when this final payload cannot represent a valid value of the target
606/// width.
607fn is_too_wide_signed_final_payload(payload: u8, index: u32, bits: u32) -> bool {
608 let max_bytes = bits.div_ceil(7);
609 if index != max_bytes - 1 {
610 return false;
611 }
612
613 let used_bits = bits - index * 7;
614 let sign_mask = 1u8 << (used_bits - 1);
615 let used_mask = (1u8 << used_bits) - 1;
616 let unused_mask = 0x7f_u8 & !used_mask;
617 let unused_bits = payload & unused_mask;
618 if payload & sign_mask == 0 {
619 unused_bits != 0
620 } else {
621 unused_bits != unused_mask
622 }
623}
624
625/// Checks whether `bytes` are the canonical unsigned LEB128 encoding.
626///
627/// # Parameters
628/// - `value`: Decoded value.
629/// - `bytes`: Raw bytes read from the stream.
630///
631/// # Returns
632/// `true` when re-encoding `value` produces the same bytes.
633#[inline]
634fn is_canonical_uleb(value: u128, bytes: &[u8]) -> bool {
635 let mut expected = Vec::new();
636 encode_uleb(value, &mut expected);
637 expected == bytes
638}
639
640/// Checks whether `bytes` are the canonical signed LEB128 encoding.
641///
642/// # Parameters
643/// - `value`: Decoded value.
644/// - `bytes`: Raw bytes read from the stream.
645///
646/// # Returns
647/// `true` when re-encoding `value` produces the same bytes.
648fn is_canonical_sleb(value: i128, bytes: &[u8]) -> bool {
649 let mut expected = Vec::new();
650 encode_sleb(value, &mut expected);
651 expected == bytes
652}
653
654/// Encodes an unsigned LEB128 value into `output`.
655///
656/// # Parameters
657/// - `value`: Value to encode.
658/// - `output`: Destination buffer.
659fn encode_uleb(mut value: u128, output: &mut Vec<u8>) {
660 while value > 0x7f {
661 output.push(((value as u8) & 0x7f) | 0x80);
662 value >>= 7;
663 }
664 output.push(value as u8);
665}
666
667/// Encodes a signed LEB128 value into `output`.
668///
669/// # Parameters
670/// - `value`: Value to encode.
671/// - `output`: Destination buffer.
672fn encode_sleb(value: i128, output: &mut Vec<u8>) {
673 let mut remaining = value;
674 loop {
675 let byte = (remaining as u8) & 0x7f;
676 remaining >>= 7;
677 let is_done = (remaining == 0 && byte & 0x40 == 0) || (remaining == -1 && byte & 0x40 != 0);
678 if is_done {
679 output.push(byte);
680 return;
681 }
682 output.push(byte | 0x80);
683 }
684}
685
686/// Builds an invalid-data error for malformed LEB128 integers.
687///
688/// # Parameters
689/// - `type_name`: Target type name.
690///
691/// # Returns
692/// An [`ErrorKind::InvalidData`] error.
693#[inline]
694fn invalid_leb128(type_name: &'static str) -> Error {
695 Error::new(
696 ErrorKind::InvalidData,
697 format!("malformed LEB128 integer for {type_name}"),
698 )
699}
700
701/// Builds an invalid-data error for non-canonical LEB128 integers.
702///
703/// # Parameters
704/// - `type_name`: Target type name.
705///
706/// # Returns
707/// An [`ErrorKind::InvalidData`] error.
708#[inline]
709fn noncanonical_leb128(type_name: &'static str) -> Error {
710 Error::new(
711 ErrorKind::InvalidData,
712 format!("non-canonical LEB128 integer for {type_name}"),
713 )
714}