qubit_io/codec/leb128_codec.rs
1/*******************************************************************************
2 *
3 * Copyright (c) 2026 Haixing Hu.
4 *
5 * SPDX-License-Identifier: Apache-2.0
6 *
7 * Licensed under the Apache License, Version 2.0.
8 *
9 ******************************************************************************/
10
11use core::marker::PhantomData;
12
13use crate::{
14 DecodePolicy,
15 Leb128DecodeError,
16 Leb128DecodeErrorKind,
17 NonStrict,
18};
19
20/// Type-level unchecked LEB128 codec.
21///
22/// `T` selects the decoded integer type and `P` selects the decoding policy.
23/// Encoding is always canonical; `P` only affects decoding.
24#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
25pub struct Leb128Codec<T, P = NonStrict> {
26 marker: PhantomData<fn() -> (T, P)>,
27}
28
29macro_rules! impl_unsigned_leb128_codec {
30 ($ty:ty) => {
31 impl<P> Leb128Codec<$ty, P>
32 where
33 P: DecodePolicy,
34 {
35 /// Minimum number of bytes required to encode or decode this type.
36 pub const REQUIRED_MIN_BUFFER_LEN: usize = (<$ty>::BITS as usize).div_ceil(7);
37
38 /// Decodes a value from `input` starting at `index` without bounds checks.
39 ///
40 /// # Parameters
41 ///
42 /// - `input`: Source byte buffer.
43 /// - `index`: Start index in `input`.
44 ///
45 /// # Returns
46 ///
47 /// Returns the decoded value and the number of consumed bytes.
48 ///
49 /// # Errors
50 ///
51 /// Returns [`Leb128DecodeError`] if the bytes are malformed or strict
52 /// decoding rejects a non-canonical representation.
53 ///
54 /// # Safety
55 ///
56 /// The caller must guarantee that `input.as_ptr().add(index)` is valid to
57 /// read [`Self::REQUIRED_MIN_BUFFER_LEN`] bytes, or that a valid terminating byte
58 /// appears before that limit.
59 #[inline(always)]
60 pub unsafe fn read_unchecked(input: &[u8], index: usize) -> Result<($ty, usize), Leb128DecodeError> {
61 // SAFETY: The caller guarantees enough readable bytes for this type.
62 let (value, consumed) =
63 unsafe { read_uleb_unchecked::<P>(input, index, <$ty>::BITS, Self::REQUIRED_MIN_BUFFER_LEN)? };
64 Ok((value as $ty, consumed))
65 }
66
67 /// Tries to decode from the currently available bytes without bounds checks.
68 ///
69 /// This internal entry point is used by buffered readers that may have
70 /// only part of a variable-length payload in their input buffer. It
71 /// decodes while scanning for the terminating byte, so callers do not
72 /// need a separate terminator pre-scan before calling the codec.
73 ///
74 /// # Parameters
75 ///
76 /// - `input`: Source byte buffer.
77 /// - `index`: Start index in `input`.
78 /// - `available`: Number of readable bytes currently available from
79 /// `index`.
80 ///
81 /// # Returns
82 ///
83 /// Returns `Ok(Some((value, consumed)))` when a complete value is
84 /// decoded. Returns `Ok(None)` when more bytes are needed. Returns
85 /// `Err((error, consumed))` when the payload is invalid and should be
86 /// consumed before the error is reported.
87 ///
88 /// # Safety
89 ///
90 /// The caller must guarantee that `input.as_ptr().add(index)` is valid
91 /// to read `available` bytes and that `available` is no greater than
92 /// [`Self::REQUIRED_MIN_BUFFER_LEN`].
93 #[inline(always)]
94 pub(crate) unsafe fn read_available_unchecked(
95 input: &[u8],
96 index: usize,
97 available: usize,
98 ) -> Result<Option<($ty, usize)>, (Leb128DecodeError, usize)> {
99 // SAFETY: The caller guarantees that exactly `available` bytes
100 // are readable from `index`.
101 unsafe {
102 read_uleb_available_unchecked::<P>(
103 input,
104 index,
105 <$ty>::BITS,
106 Self::REQUIRED_MIN_BUFFER_LEN,
107 available,
108 )
109 }
110 .map(|result| result.map(|(value, consumed)| (value as $ty, consumed)))
111 }
112
113 /// Encodes `value` into `output` starting at `index` without bounds checks.
114 ///
115 /// # Parameters
116 ///
117 /// - `output`: Destination byte buffer.
118 /// - `index`: Start index in `output`.
119 /// - `value`: Value to encode.
120 ///
121 /// # Returns
122 ///
123 /// Returns the number of written bytes.
124 ///
125 /// # Safety
126 ///
127 /// The caller must guarantee that `output.as_mut_ptr().add(index)` is valid
128 /// to write [`Self::REQUIRED_MIN_BUFFER_LEN`] bytes.
129 #[inline(always)]
130 pub unsafe fn write_unchecked(output: &mut [u8], index: usize, value: $ty) -> usize {
131 // SAFETY: The caller guarantees enough writable bytes for this type.
132 unsafe { write_uleb_unchecked(output, index, value as u128) }
133 }
134 }
135 };
136}
137
138macro_rules! impl_signed_leb128_codec {
139 ($ty:ty) => {
140 impl<P> Leb128Codec<$ty, P>
141 where
142 P: DecodePolicy,
143 {
144 /// Minimum number of bytes required to encode or decode this type.
145 pub const REQUIRED_MIN_BUFFER_LEN: usize = (<$ty>::BITS as usize).div_ceil(7);
146
147 /// Decodes a value from `input` starting at `index` without bounds checks.
148 ///
149 /// # Parameters
150 ///
151 /// - `input`: Source byte buffer.
152 /// - `index`: Start index in `input`.
153 ///
154 /// # Returns
155 ///
156 /// Returns the decoded value and the number of consumed bytes.
157 ///
158 /// # Errors
159 ///
160 /// Returns [`Leb128DecodeError`] if the bytes are malformed or strict
161 /// decoding rejects a non-canonical representation.
162 ///
163 /// # Safety
164 ///
165 /// The caller must guarantee that `input.as_ptr().add(index)` is valid to
166 /// read [`Self::REQUIRED_MIN_BUFFER_LEN`] bytes, or that a valid terminating byte
167 /// appears before that limit.
168 #[inline(always)]
169 pub unsafe fn read_unchecked(input: &[u8], index: usize) -> Result<($ty, usize), Leb128DecodeError> {
170 // SAFETY: The caller guarantees enough readable bytes for this type.
171 let (value, consumed) =
172 unsafe { read_sleb_unchecked::<P>(input, index, <$ty>::BITS, Self::REQUIRED_MIN_BUFFER_LEN)? };
173 Ok((value as $ty, consumed))
174 }
175
176 /// Tries to decode from the currently available bytes without bounds checks.
177 ///
178 /// This internal entry point is used by buffered readers that may have
179 /// only part of a variable-length payload in their input buffer. It
180 /// decodes while scanning for the terminating byte, so callers do not
181 /// need a separate terminator pre-scan before calling the codec.
182 ///
183 /// # Parameters
184 ///
185 /// - `input`: Source byte buffer.
186 /// - `index`: Start index in `input`.
187 /// - `available`: Number of readable bytes currently available from
188 /// `index`.
189 ///
190 /// # Returns
191 ///
192 /// Returns `Ok(Some((value, consumed)))` when a complete value is
193 /// decoded. Returns `Ok(None)` when more bytes are needed. Returns
194 /// `Err((error, consumed))` when the payload is invalid and should be
195 /// consumed before the error is reported.
196 ///
197 /// # Safety
198 ///
199 /// The caller must guarantee that `input.as_ptr().add(index)` is valid
200 /// to read `available` bytes and that `available` is no greater than
201 /// [`Self::REQUIRED_MIN_BUFFER_LEN`].
202 #[inline(always)]
203 pub(crate) unsafe fn read_available_unchecked(
204 input: &[u8],
205 index: usize,
206 available: usize,
207 ) -> Result<Option<($ty, usize)>, (Leb128DecodeError, usize)> {
208 // SAFETY: The caller guarantees that exactly `available` bytes
209 // are readable from `index`.
210 unsafe {
211 read_sleb_available_unchecked::<P>(
212 input,
213 index,
214 <$ty>::BITS,
215 Self::REQUIRED_MIN_BUFFER_LEN,
216 available,
217 )
218 }
219 .map(|result| result.map(|(value, consumed)| (value as $ty, consumed)))
220 }
221
222 /// Encodes `value` into `output` starting at `index` without bounds checks.
223 ///
224 /// # Parameters
225 ///
226 /// - `output`: Destination byte buffer.
227 /// - `index`: Start index in `output`.
228 /// - `value`: Value to encode.
229 ///
230 /// # Returns
231 ///
232 /// Returns the number of written bytes.
233 ///
234 /// # Safety
235 ///
236 /// The caller must guarantee that `output.as_mut_ptr().add(index)` is valid
237 /// to write [`Self::REQUIRED_MIN_BUFFER_LEN`] bytes.
238 #[inline(always)]
239 pub unsafe fn write_unchecked(output: &mut [u8], index: usize, value: $ty) -> usize {
240 // SAFETY: The caller guarantees enough writable bytes for this type.
241 unsafe { write_sleb_unchecked(output, index, value as i128) }
242 }
243 }
244 };
245}
246
247impl_unsigned_leb128_codec!(u8);
248impl_unsigned_leb128_codec!(u16);
249impl_unsigned_leb128_codec!(u32);
250impl_unsigned_leb128_codec!(u64);
251impl_unsigned_leb128_codec!(u128);
252impl_unsigned_leb128_codec!(usize);
253
254impl_signed_leb128_codec!(i8);
255impl_signed_leb128_codec!(i16);
256impl_signed_leb128_codec!(i32);
257impl_signed_leb128_codec!(i64);
258impl_signed_leb128_codec!(i128);
259impl_signed_leb128_codec!(isize);
260
261#[inline(always)]
262unsafe fn read_uleb_unchecked<P>(
263 input: &[u8],
264 index: usize,
265 bits: u32,
266 max_bytes: usize,
267) -> Result<(u128, usize), Leb128DecodeError>
268where
269 P: DecodePolicy,
270{
271 // SAFETY: The caller guarantees enough readable bytes for the full maximum
272 // payload width, which is exactly the `available` value passed here.
273 match unsafe { read_uleb_available_unchecked::<P>(input, index, bits, max_bytes, max_bytes) } {
274 Ok(Some(value)) => Ok(value),
275 Ok(None) => unreachable!("maximum-width LEB128 input must complete or fail"),
276 Err((error, _consumed)) => Err(error),
277 }
278}
279
280#[inline(always)]
281unsafe fn read_uleb_available_unchecked<P>(
282 input: &[u8],
283 index: usize,
284 bits: u32,
285 max_bytes: usize,
286 available: usize,
287) -> Result<Option<(u128, usize)>, (Leb128DecodeError, usize)>
288where
289 P: DecodePolicy,
290{
291 debug_assert!(available <= max_bytes, "available bytes exceed LEB128 maximum width");
292 let mut value = 0u128;
293 let mut shift = 0u32;
294 // SAFETY: The caller guarantees that `available` bytes are readable from
295 // `index`, so this base pointer can be advanced by every loop offset.
296 let base = unsafe { input.as_ptr().add(index) };
297 for offset in 0..available {
298 // SAFETY: The caller guarantees enough readable bytes for this loop.
299 let byte = unsafe { *base.add(offset) };
300 let payload = u128::from(byte & 0x7F);
301 value |= payload << shift;
302 if byte & 0x80 == 0 {
303 if offset == max_bytes - 1 && !unsigned_final_payload_fits(byte, bits, offset) {
304 return Err(malformed_decode_error(index + offset, offset + 1));
305 }
306 let consumed = offset + 1;
307 if P::STRICT && !has_canonical_uleb_len(value, consumed) {
308 return Err(noncanonical_decode_error(index, consumed));
309 }
310 return Ok(Some((value, consumed)));
311 }
312 shift += 7;
313 }
314 if available < max_bytes {
315 return Ok(None);
316 }
317 Err(malformed_decode_error(index + max_bytes - 1, max_bytes))
318}
319
320#[inline(always)]
321unsafe fn read_sleb_unchecked<P>(
322 input: &[u8],
323 index: usize,
324 bits: u32,
325 max_bytes: usize,
326) -> Result<(i128, usize), Leb128DecodeError>
327where
328 P: DecodePolicy,
329{
330 // SAFETY: The caller guarantees enough readable bytes for the full maximum
331 // payload width, which is exactly the `available` value passed here.
332 match unsafe { read_sleb_available_unchecked::<P>(input, index, bits, max_bytes, max_bytes) } {
333 Ok(Some(value)) => Ok(value),
334 Ok(None) => unreachable!("maximum-width LEB128 input must complete or fail"),
335 Err((error, _consumed)) => Err(error),
336 }
337}
338
339#[inline(always)]
340unsafe fn read_sleb_available_unchecked<P>(
341 input: &[u8],
342 index: usize,
343 bits: u32,
344 max_bytes: usize,
345 available: usize,
346) -> Result<Option<(i128, usize)>, (Leb128DecodeError, usize)>
347where
348 P: DecodePolicy,
349{
350 debug_assert!(available <= max_bytes, "available bytes exceed LEB128 maximum width");
351 let mut value = 0i128;
352 let mut shift = 0u32;
353 // SAFETY: The caller guarantees that `available` bytes are readable from
354 // `index`, so this base pointer can be advanced by every loop offset.
355 let base = unsafe { input.as_ptr().add(index) };
356 for offset in 0..available {
357 // SAFETY: The caller guarantees enough readable bytes for this loop.
358 let byte = unsafe { *base.add(offset) };
359 let payload = i128::from(byte & 0x7F);
360 value |= payload << shift;
361 if byte & 0x80 == 0 {
362 if offset == max_bytes - 1 && !signed_final_payload_fits(byte, bits, offset) {
363 return Err(malformed_decode_error(index + offset, offset + 1));
364 }
365 if byte & 0x40 != 0 && shift + 7 < i128::BITS {
366 value |= (!0i128) << (shift + 7);
367 }
368 let consumed = offset + 1;
369 if P::STRICT && !has_canonical_sleb_len(value, consumed) {
370 return Err(noncanonical_decode_error(index, consumed));
371 }
372 return Ok(Some((value, consumed)));
373 }
374 shift += 7;
375 }
376 if available < max_bytes {
377 return Ok(None);
378 }
379 Err(malformed_decode_error(index + max_bytes - 1, max_bytes))
380}
381
382/// Builds a malformed LEB128 error on the cold error path.
383///
384/// # Parameters
385///
386/// - `index`: Absolute byte index associated with the malformed payload.
387/// - `consumed`: Number of bytes that should be consumed before reporting the
388/// error.
389///
390/// # Returns
391///
392/// Returns the error and the byte count to consume.
393#[cold]
394#[inline(never)]
395fn malformed_decode_error(index: usize, consumed: usize) -> (Leb128DecodeError, usize) {
396 (
397 Leb128DecodeError::new(Leb128DecodeErrorKind::Malformed, index),
398 consumed,
399 )
400}
401
402/// Builds a non-canonical LEB128 error on the cold error path.
403///
404/// # Parameters
405///
406/// - `index`: Absolute byte index at which the non-canonical payload starts.
407/// - `consumed`: Number of bytes that should be consumed before reporting the
408/// error.
409///
410/// # Returns
411///
412/// Returns the error and the byte count to consume.
413#[cold]
414#[inline(never)]
415fn noncanonical_decode_error(index: usize, consumed: usize) -> (Leb128DecodeError, usize) {
416 (
417 Leb128DecodeError::new(Leb128DecodeErrorKind::NonCanonical, index),
418 consumed,
419 )
420}
421
422#[must_use]
423#[inline(always)]
424fn unsigned_final_payload_fits(byte: u8, bits: u32, offset: usize) -> bool {
425 let used_bits = bits - offset as u32 * 7;
426 byte >> used_bits == 0
427}
428
429#[must_use]
430#[inline(always)]
431fn signed_final_payload_fits(byte: u8, bits: u32, offset: usize) -> bool {
432 let used_bits = bits - offset as u32 * 7;
433 let payload = byte & 0x7F;
434 let used_mask = ((1u16 << used_bits) - 1) as u8;
435 let unused_mask = 0x7F & !used_mask;
436 let sign_bit = 1u8 << (used_bits - 1);
437 if payload & sign_bit == 0 {
438 payload & unused_mask == 0
439 } else {
440 payload & unused_mask == unused_mask
441 }
442}
443
444/// Checks whether an unsigned LEB128 value used its canonical encoded length.
445///
446/// # Parameters
447///
448/// - `value`: Decoded unsigned value.
449/// - `actual_len`: Number of bytes consumed from the input.
450///
451/// # Returns
452///
453/// Returns `true` if `actual_len` is the canonical encoded length of `value`.
454#[must_use]
455#[inline(always)]
456fn has_canonical_uleb_len(value: u128, actual_len: usize) -> bool {
457 canonical_uleb_len(value) == actual_len
458}
459
460/// Checks whether a signed LEB128 value used its canonical encoded length.
461///
462/// # Parameters
463///
464/// - `value`: Decoded signed value.
465/// - `actual_len`: Number of bytes consumed from the input.
466///
467/// # Returns
468///
469/// Returns `true` if `actual_len` is the canonical encoded length of `value`.
470#[must_use]
471#[inline(always)]
472fn has_canonical_sleb_len(value: i128, actual_len: usize) -> bool {
473 canonical_sleb_len(value) == actual_len
474}
475
476/// Computes the canonical unsigned LEB128 encoded length.
477///
478/// # Parameters
479///
480/// - `value`: Unsigned value to measure.
481///
482/// # Returns
483///
484/// Returns the number of bytes used by the canonical unsigned LEB128 encoding.
485#[must_use]
486#[inline(always)]
487fn canonical_uleb_len(mut value: u128) -> usize {
488 let mut len = 1;
489 while value >= 0x80 {
490 value >>= 7;
491 len += 1;
492 }
493 len
494}
495
496/// Computes the canonical signed LEB128 encoded length.
497///
498/// # Parameters
499///
500/// - `value`: Signed value to measure.
501///
502/// # Returns
503///
504/// Returns the number of bytes used by the canonical signed LEB128 encoding.
505#[must_use]
506#[inline(always)]
507fn canonical_sleb_len(mut value: i128) -> usize {
508 let mut len = 0;
509 loop {
510 let byte = (value & 0x7F) as u8;
511 let sign_bit_set = byte & 0x40 != 0;
512 value >>= 7;
513 len += 1;
514 if (value == 0 && !sign_bit_set) || (value == -1 && sign_bit_set) {
515 return len;
516 }
517 }
518}
519
520unsafe fn write_uleb_unchecked(output: &mut [u8], index: usize, mut value: u128) -> usize {
521 let mut offset = 0;
522 loop {
523 let mut byte = (value & 0x7F) as u8;
524 value >>= 7;
525 if value != 0 {
526 byte |= 0x80;
527 }
528 // SAFETY: The caller guarantees enough writable bytes for the encoded value.
529 unsafe {
530 *output.as_mut_ptr().add(index + offset) = byte;
531 }
532 offset += 1;
533 if value == 0 {
534 return offset;
535 }
536 }
537}
538
539unsafe fn write_sleb_unchecked(output: &mut [u8], index: usize, mut value: i128) -> usize {
540 let mut offset = 0;
541 loop {
542 let mut byte = (value & 0x7F) as u8;
543 let sign_bit_set = byte & 0x40 != 0;
544 value >>= 7;
545 let done = (value == 0 && !sign_bit_set) || (value == -1 && sign_bit_set);
546 if !done {
547 byte |= 0x80;
548 }
549 // SAFETY: The caller guarantees enough writable bytes for the encoded value.
550 unsafe {
551 *output.as_mut_ptr().add(index + offset) = byte;
552 }
553 offset += 1;
554 if done {
555 return offset;
556 }
557 }
558}