wasm_ast/leb128/
mod.rs

1//! Little-Endian Base 128 encoding and decoding of signed and unsigned integers.
2
3mod errors;
4
5pub use errors::LEB128Error;
6
7use std::convert::TryFrom;
8use std::io::Write;
9use std::mem::size_of;
10
11/// The radix (i.e. base) for LEB128 encoding.
12const RADIX: u8 = 128;
13
14/// The number of bits per LEB128 encoding group.
15const GROUP_BITS: usize = 7;
16
17/// The zero-indexed index of the sign bit per LEB128 encoding.
18const SIGN_BIT: usize = 6;
19
20/// Maximum size (in bytes) of an LEB128-encoded integer type
21///
22/// See <https://en.wikipedia.org/wiki/LEB128>
23const fn max_leb128_size<T>() -> usize {
24    let bits = size_of::<T>() * 8;
25
26    (bits / 7) + (bits % 7 != 0) as usize
27}
28
29trait Bits: Copy + Sized {
30    /// Sets the given bit to zero.
31    fn zero_bit_at(&self, bit: usize) -> Self;
32
33    /// Sets the given bit to one.
34    fn one_bit_at(&self, bit: usize) -> Self;
35
36    /// Gets the given bit at the index.
37    fn bit_at(&self, bit: usize) -> bool;
38}
39
40impl Bits for u8 {
41    /// Sets the given bit to zero.
42    fn zero_bit_at(&self, bit: usize) -> u8 {
43        self & !(1 << bit)
44    }
45
46    /// Sets the given bit to one.
47    fn one_bit_at(&self, bit: usize) -> u8 {
48        self | 1 << bit
49    }
50
51    /// Gets the given bit to one.
52    fn bit_at(&self, bit: usize) -> bool {
53        let mask = 1 << bit;
54        self & mask == mask
55    }
56}
57
58/// Parses an unsigned integer using LEB128 (Little-Endian Base 128) encoding.
59/// Returns the parsed integer and the remaining input.
60///
61/// See <https://en.wikipedia.org/wiki/LEB128>
62pub fn parse_unsigned<T>(input: &[u8]) -> Result<(&[u8], T), LEB128Error>
63where
64    T: TryFrom<u128, Error = std::num::TryFromIntError>,
65{
66    let end = input.iter().position(|x| x & RADIX == 0);
67    let max_size = max_leb128_size::<T>();
68    let length = match end {
69        Some(index) if index > max_size => Err(LEB128Error::Overflow(index, max_size)),
70        Some(index) => Ok(index + 1),
71        None => Err(LEB128Error::Invalid),
72    }?;
73
74    let mut result = 0;
75    for (index, &byte) in input[..length].iter().enumerate() {
76        let group = byte.zero_bit_at(GROUP_BITS) as u128;
77
78        result |= group << (index * GROUP_BITS);
79    }
80
81    Ok((&input[length..], T::try_from(result)?))
82}
83
84/// Encodes an unsigned integer using LEB128 (Little-Endian Base 128) encoding.
85///
86/// See <https://en.wikipedia.org/wiki/LEB128>
87pub fn encode_unsigned<I, O: Write>(input: I, mut output: O) -> Result<usize, LEB128Error>
88where
89    I: Into<u128>,
90{
91    let mut value = input.into();
92    let mut written = 0;
93
94    loop {
95        let mut byte = (value as u8).zero_bit_at(GROUP_BITS);
96        value >>= GROUP_BITS;
97
98        if value != 0 {
99            byte = byte.one_bit_at(GROUP_BITS);
100        }
101
102        output.write_all(&[byte])?;
103        written += 1;
104
105        if value == 0 {
106            break;
107        }
108    }
109
110    Ok(written)
111}
112
113/// Parses a signed integer using LEB128 (Little-Endian Base 128) encoding.
114/// Returns the parsed integer and the remaining input.
115///
116/// See <https://en.wikipedia.org/wiki/LEB128>
117pub fn parse_signed<T>(input: &[u8]) -> Result<(&[u8], T), LEB128Error>
118where
119    T: TryFrom<i128, Error = std::num::TryFromIntError>,
120{
121    let end = input.iter().position(|x| x & RADIX == 0);
122    let max_size = max_leb128_size::<T>();
123    let length = match end {
124        Some(index) if index > max_size => Err(LEB128Error::Overflow(index, max_size)),
125        Some(index) => Ok(index + 1),
126        None => Err(LEB128Error::Invalid),
127    }?;
128
129    let mut result = 0;
130    let remaining = &input[length..];
131    let input = &input[..length];
132    for (index, &byte) in input.iter().enumerate() {
133        let group = byte.zero_bit_at(GROUP_BITS) as i128;
134
135        result |= group << (index * GROUP_BITS);
136    }
137
138    if let Some(byte) = input.iter().last() {
139        if byte.bit_at(SIGN_BIT) {
140            result |= !0 << (length * GROUP_BITS);
141        }
142    }
143
144    Ok((remaining, T::try_from(result)?))
145}
146
147/// Encodes a signed integer using LEB128 (Little-Endian Base 128) encoding.
148///
149/// See <https://en.wikipedia.org/wiki/LEB128>
150pub fn encode_signed<I, O: Write>(input: I, mut output: O) -> Result<usize, LEB128Error>
151where
152    I: Into<i128>,
153{
154    let mut value = input.into();
155    let mut written = 0;
156
157    loop {
158        let mut byte = (value as u8).zero_bit_at(GROUP_BITS);
159        value >>= GROUP_BITS;
160
161        if value != 0 {
162            byte = byte.one_bit_at(GROUP_BITS);
163        }
164
165        output.write_all(&[byte])?;
166        written += 1;
167
168        if value == 0 {
169            break;
170        }
171    }
172
173    Ok(written)
174}
175
176#[cfg(test)]
177mod tests {
178    use super::*;
179
180    #[test]
181    fn parse_unsigned_leb128_large() {
182        let input = vec![0xE5, 0x8E, 0x26];
183        let (remaining, actual): (&[u8], u32) = parse_unsigned(input.as_slice()).unwrap();
184
185        assert_eq!(actual, 624485);
186        assert!(remaining.is_empty())
187    }
188
189    #[test]
190    fn parse_unsigned_leb128_small() {
191        let input = vec![64, 0xFF];
192        let (remaining, actual): (&[u8], u8) = parse_unsigned(input.as_slice()).unwrap();
193
194        assert_eq!(actual, 64);
195        assert_eq!(remaining, &[0xFF])
196    }
197
198    #[test]
199    fn parse_unsigned_leb128_zero() {
200        let input = vec![0x00, 0xFF];
201        let (remaining, actual): (&[u8], usize) = parse_unsigned(input.as_slice()).unwrap();
202
203        assert_eq!(actual, 0);
204        assert_eq!(remaining, &[0xFF])
205    }
206
207    #[test]
208    fn encode_unsigned_leb128_large() {
209        let mut output = Vec::new();
210        let written = encode_unsigned(624485u128, &mut output).unwrap();
211
212        assert_eq!(written, 3);
213        assert_eq!(output, vec![0xE5, 0x8E, 0x26]);
214    }
215
216    #[test]
217    fn encode_unsigned_leb128_small() {
218        let input = 64;
219        let mut output = Vec::new();
220        let written = encode_unsigned(input, &mut output).unwrap();
221
222        assert_eq!(written, 1);
223        assert_eq!(output, vec![input]);
224    }
225
226    #[test]
227    fn encode_unsigned_leb128_zero() {
228        let input = 0;
229        let mut output = Vec::new();
230        let written = encode_unsigned(input, &mut output).unwrap();
231
232        assert_eq!(written, 1);
233        assert_eq!(output, vec![input]);
234    }
235
236    #[test]
237    fn parse_signed_leb128_large() {
238        let input = vec![0xC0, 0xBB, 0x78];
239        let (remaining, actual): (&[u8], i32) = parse_signed(input.as_slice()).unwrap();
240
241        println!("{} = {:b}", -123456, -123456);
242        println!("{} = {:b}", actual, actual);
243        assert_eq!(actual, -123456);
244        assert!(remaining.is_empty())
245    }
246
247    #[test]
248    fn parse_signed_leb128_small() {
249        let input = vec![32, 0xFF];
250        let (remaining, actual): (&[u8], i8) = parse_signed(input.as_slice()).unwrap();
251
252        assert_eq!(actual, 32);
253        assert_eq!(remaining, &[0xFF])
254    }
255
256    #[test]
257    fn parse_signed_leb128_zero() {
258        let input = vec![0x00, 0xFF];
259        let (remaining, actual): (&[u8], u64) = parse_signed(input.as_slice()).unwrap();
260
261        assert_eq!(actual, 0);
262        assert_eq!(remaining, &[0xFF])
263    }
264
265    #[test]
266    fn encode_signed_leb128_large() {
267        let mut output = Vec::new();
268        let written = encode_signed(624485i128, &mut output).unwrap();
269
270        assert_eq!(written, 3);
271        assert_eq!(output, vec![0xE5, 0x8E, 0x26]);
272    }
273
274    #[test]
275    fn encode_signed_leb128_small() {
276        let input = 64;
277        let mut output = Vec::new();
278        let written = encode_signed(input, &mut output).unwrap();
279
280        assert_eq!(written, 1);
281        assert_eq!(output, vec![input]);
282    }
283
284    #[test]
285    fn encode_signed_leb128_zero() {
286        let input = 0;
287        let mut output = Vec::new();
288        let written = encode_signed(input, &mut output).unwrap();
289
290        assert_eq!(written, 1);
291        assert_eq!(output, vec![input]);
292    }
293}