ms_codeview/
parser.rs

1//! Support for parsing byte-oriented data
2
3#[cfg(test)]
4mod tests;
5
6use crate::types::TypeIndex;
7use bstr::{BStr, ByteSlice};
8use std::mem::{size_of, take};
9use zerocopy::byteorder::{I16, I32, I64, LE, U16, U32, U64};
10use zerocopy::{FromBytes, Immutable, IntoBytes, KnownLayout, Unaligned, I128, U128};
11
12pub use crate::types::number::Number;
13
14/// A byte-oriented parser, for use in decoding CodeView records.
15#[derive(Clone)]
16pub struct Parser<'a> {
17    /// The bytes that have not yet been parsed.
18    pub bytes: &'a [u8],
19}
20
21impl<'a> Parser<'a> {
22    /// Starts a new parser.
23    pub fn new(bytes: &'a [u8]) -> Self {
24        Self { bytes }
25    }
26
27    /// Gets the rest of the unparsed bytes in the parser. The parser still retains a reference to
28    /// the same data.
29    pub fn peek_rest(&self) -> &'a [u8] {
30        self.bytes
31    }
32
33    /// Gets the rest of the unparsed
34    pub fn take_rest(&mut self) -> &'a [u8] {
35        take(&mut self.bytes)
36    }
37
38    /// Consumes this `Parser` and returns the unparsed bytes within it.
39    ///
40    /// This should be used in situations where there is no valid reason to use the `Parser`
41    /// after taking the rest of the bytes within it. In situations where a `parse()` method only
42    /// has access to `&mut Parser`, then this function cannot be used, and the caller should use
43    /// `Parser::take_rest`.
44    pub fn into_rest(self) -> &'a [u8] {
45        self.bytes
46    }
47
48    /// Indicates whether there are any bytes left to parse.
49    pub fn is_empty(&self) -> bool {
50        self.bytes.is_empty()
51    }
52
53    /// Returns the number of unparsed bytes in the parser.
54    pub fn len(&self) -> usize {
55        self.bytes.len()
56    }
57
58    /// Checks that the buffer has at least `n` bytes.
59    ///
60    /// This can be used as an optimization improvement in some situations. Ordinarily, code like
61    /// this will compile to a series of bounds checks:
62    ///
63    /// ```ignore
64    /// let mut p = Parser::new(bytes);
65    /// let a = p.u32()?;
66    /// let b = p.u16()?;
67    /// let c = p.u16()?;
68    /// let d = p.u32()?;
69    /// ```
70    ///
71    /// Inserting a `a.needs(12)?` statement can sometimes enable the compiler to collapse a
72    /// series of bounds checks (4, in this case) to a single bounds check.
73    #[inline(always)]
74    pub fn needs(&self, n: usize) -> Result<(), ParserError> {
75        if n <= self.bytes.len() {
76            Ok(())
77        } else {
78            Err(ParserError::new())
79        }
80    }
81
82    /// Takes the next `n` bytes of input and returns a slice to it. The parser is advanced by `n`.
83    #[inline(always)]
84    pub fn bytes(&mut self, n: usize) -> Result<&'a [u8], ParserError> {
85        if self.bytes.len() < n {
86            return Err(ParserError::new());
87        }
88
89        let (lo, hi) = self.bytes.split_at(n);
90        self.bytes = hi;
91        Ok(lo)
92    }
93
94    /// Skips `n` bytes.
95    pub fn skip(&mut self, n: usize) -> Result<(), ParserError> {
96        if self.bytes.len() < n {
97            return Err(ParserError::new());
98        }
99
100        self.bytes = &self.bytes[n..];
101        Ok(())
102    }
103
104    /// Parses a reference to a structure. The input must contain at least [`size_of::<T>()`] bytes.
105    #[inline(always)]
106    pub fn get<T: FromBytes + Unaligned + KnownLayout + Immutable>(
107        &mut self,
108    ) -> Result<&'a T, ParserError> {
109        if let Ok((value, rest)) = T::ref_from_prefix(self.bytes) {
110            self.bytes = rest;
111            Ok(value)
112        } else {
113            Err(ParserError::new())
114        }
115    }
116
117    /// Parses a copy of a structure. The input must contain at least [`size_of::<T>()`] bytes.
118    #[inline(always)]
119    pub fn copy<T: FromBytes + Unaligned>(&mut self) -> Result<T, ParserError> {
120        let item = self.bytes(size_of::<T>())?;
121        Ok(T::read_from_bytes(item).unwrap())
122    }
123
124    /// Parses a `T` from the input, if `T` knows how to read from a `Parser`.
125    ///
126    /// This exists mainly to allow more succinct calls, using type inference.
127    #[inline(always)]
128    pub fn parse<T: Parse<'a>>(&mut self) -> Result<T, ParserError> {
129        T::from_parser(self)
130    }
131
132    /// Parses a slice of items. The input must contain at least [`size_of::<T>() * n`] bytes.
133    pub fn slice<T: FromBytes + Unaligned + Immutable>(
134        &mut self,
135        len: usize,
136    ) -> Result<&'a [T], ParserError> {
137        if let Ok((lo, hi)) = <[T]>::ref_from_prefix_with_elems(self.bytes, len) {
138            self.bytes = hi;
139            Ok(lo)
140        } else {
141            Err(ParserError::new())
142        }
143    }
144
145    /// Copies an array of items with a constant size and advances the parser.
146    pub fn array<const N: usize>(&mut self) -> Result<[u8; N], ParserError> {
147        let s = self.bytes(N)?;
148        Ok(<[u8; N]>::try_from(s).unwrap())
149    }
150
151    /// Reads one byte and advances.
152    pub fn u8(&mut self) -> Result<u8, ParserError> {
153        let b = self.bytes(1)?;
154        Ok(b[0])
155    }
156
157    /// Reads one signed byte and advances.
158    pub fn i8(&mut self) -> Result<i8, ParserError> {
159        let b = self.bytes(1)?;
160        Ok(b[0] as i8)
161    }
162
163    /// Reads an `i16` (in little-endian order) and advances.
164    pub fn i16(&mut self) -> Result<i16, ParserError> {
165        Ok(self.copy::<I16<LE>>()?.get())
166    }
167
168    /// Reads an `i32` (in little-endian order) and advances.
169    pub fn i32(&mut self) -> Result<i32, ParserError> {
170        Ok(self.copy::<I32<LE>>()?.get())
171    }
172
173    /// Reads an `i64` (in little-endian order) and advances.
174    pub fn i64(&mut self) -> Result<i64, ParserError> {
175        Ok(self.copy::<I64<LE>>()?.get())
176    }
177
178    /// Reads an `u16` (in little-endian order) and advances.
179    pub fn u16(&mut self) -> Result<u16, ParserError> {
180        Ok(self.copy::<U16<LE>>()?.get())
181    }
182
183    /// Reads an `u32` (in little-endian order) and advances.
184    pub fn u32(&mut self) -> Result<u32, ParserError> {
185        Ok(self.copy::<U32<LE>>()?.get())
186    }
187
188    /// Reads an `u64` (in little-endian order) and advances.
189    pub fn u64(&mut self) -> Result<u64, ParserError> {
190        Ok(self.copy::<U64<LE>>()?.get())
191    }
192
193    /// Reads an `u128` (in little-endian order) and advances.
194    pub fn u128(&mut self) -> Result<u128, ParserError> {
195        Ok(self.copy::<U128<LE>>()?.get())
196    }
197
198    /// Reads an `i128` (in little-endian order) and advances.
199    pub fn i128(&mut self) -> Result<i128, ParserError> {
200        Ok(self.copy::<I128<LE>>()?.get())
201    }
202
203    /// Reads an `f32` (in little-endian order) and advances.
204    pub fn f32(&mut self) -> Result<f32, ParserError> {
205        let bytes: [u8; 4] = self.copy()?;
206        Ok(f32::from_le_bytes(bytes))
207    }
208
209    /// Reads an `f64` (in little-endian order) and advances.
210    pub fn f64(&mut self) -> Result<f64, ParserError> {
211        let bytes: [u8; 8] = self.copy()?;
212        Ok(f64::from_le_bytes(bytes))
213    }
214
215    /// Skips over a NUL-terminated string.
216    pub fn skip_strz(&mut self) -> Result<(), ParserError> {
217        for i in 0..self.bytes.len() {
218            if self.bytes[i] == 0 {
219                self.bytes = &self.bytes[i + 1..];
220                return Ok(());
221            }
222        }
223
224        Err(ParserError::new())
225    }
226
227    /// Reads a NUL-terminated string, without checking that it is UTF-8 encoded.
228    pub fn strz(&mut self) -> Result<&'a BStr, ParserError> {
229        for i in 0..self.bytes.len() {
230            if self.bytes[i] == 0 {
231                let str_bytes = &self.bytes[..i];
232                self.bytes = &self.bytes[i + 1..];
233                return Ok(BStr::new(str_bytes));
234            }
235        }
236
237        Err(ParserError::new())
238    }
239
240    /// Reads a length-prefixed string, without checking that it is UTF-8 encoded.
241    pub fn strt_raw(&mut self) -> Result<&'a BStr, ParserError> {
242        let len = self.u8()?;
243        let bytes = self.bytes(len as usize)?;
244        Ok(BStr::new(bytes))
245    }
246
247    /// Reads a length-prefixed string.
248    pub fn strt(&mut self) -> Result<&'a str, ParserError> {
249        let bytes = self.strt_raw()?;
250        if let Ok(s) = core::str::from_utf8(bytes.as_ref()) {
251            Ok(s)
252        } else {
253            Err(ParserError::new())
254        }
255    }
256
257    /// Parses a 32-bit TypeIndex.
258    pub fn type_index(&mut self) -> Result<TypeIndex, ParserError> {
259        Ok(TypeIndex(self.u32()?))
260    }
261
262    /// Parses a generic number value.
263    ///
264    /// See Section 4, numeric leaves
265    pub fn number(&mut self) -> Result<crate::types::number::Number<'a>, ParserError> {
266        self.parse()
267    }
268}
269
270/// A parser that can return mutable references to the data that it parses.
271///
272/// Most of the methods defined on `ParserMut` are equivalent to the same methods on `Parser`.
273pub struct ParserMut<'a> {
274    /// The remaining, unparsed data.
275    pub bytes: &'a mut [u8],
276}
277
278#[allow(missing_docs)]
279impl<'a> ParserMut<'a> {
280    pub fn new(bytes: &'a mut [u8]) -> Self {
281        Self { bytes }
282    }
283
284    pub fn peek_rest(&self) -> &[u8] {
285        self.bytes
286    }
287
288    pub fn peek_rest_mut(&mut self) -> &mut [u8] {
289        self.bytes
290    }
291
292    pub fn into_rest(self) -> &'a mut [u8] {
293        self.bytes
294    }
295
296    pub fn is_empty(&self) -> bool {
297        self.bytes.is_empty()
298    }
299
300    pub fn len(&self) -> usize {
301        self.bytes.len()
302    }
303
304    pub fn skip(&mut self, n: usize) -> Result<(), ParserError> {
305        if n <= self.bytes.len() {
306            let b = take(&mut self.bytes);
307            self.bytes = &mut b[n..];
308            Ok(())
309        } else {
310            Err(ParserError::new())
311        }
312    }
313
314    #[inline(always)]
315    pub fn bytes(&mut self, n: usize) -> Result<&'a [u8], ParserError> {
316        if self.bytes.len() < n {
317            return Err(ParserError::new());
318        }
319
320        let (lo, hi) = take(&mut self.bytes).split_at_mut(n);
321        self.bytes = hi;
322
323        Ok(lo)
324    }
325
326    #[inline(always)]
327    pub fn bytes_mut(&mut self, n: usize) -> Result<&'a mut [u8], ParserError> {
328        if self.bytes.len() < n {
329            return Err(ParserError::new());
330        }
331
332        let (lo, hi) = take(&mut self.bytes).split_at_mut(n);
333        self.bytes = hi;
334
335        Ok(lo)
336    }
337
338    #[inline(always)]
339    pub fn get<T: FromBytes + Unaligned + Immutable + KnownLayout>(
340        &mut self,
341    ) -> Result<&'a T, ParserError> {
342        let bytes = self.bytes(size_of::<T>())?;
343        Ok(T::ref_from_bytes(bytes).unwrap())
344    }
345
346    #[inline(always)]
347    pub fn get_mut<T: FromBytes + IntoBytes + Unaligned + Immutable + KnownLayout>(
348        &mut self,
349    ) -> Result<&'a mut T, ParserError> {
350        let bytes = self.bytes_mut(size_of::<T>())?;
351        Ok(T::mut_from_bytes(bytes).unwrap())
352    }
353
354    #[inline(always)]
355    pub fn copy<T: FromBytes + Unaligned + Immutable>(&mut self) -> Result<T, ParserError> {
356        let item = self.bytes(size_of::<T>())?;
357        Ok(T::read_from_bytes(item).unwrap())
358    }
359
360    pub fn slice_mut<T: FromBytes + IntoBytes + Unaligned>(
361        &mut self,
362        len: usize,
363    ) -> Result<&'a mut [T], ParserError> {
364        let d = take(&mut self.bytes);
365        if let Ok((lo, hi)) = <[T]>::mut_from_prefix_with_elems(d, len) {
366            self.bytes = hi;
367            Ok(lo)
368        } else {
369            Err(ParserError::new())
370        }
371    }
372
373    pub fn array<const N: usize>(&mut self) -> Result<[u8; N], ParserError> {
374        let s = self.bytes(N)?;
375        Ok(<[u8; N]>::try_from(s).unwrap())
376    }
377
378    pub fn u8(&mut self) -> Result<u8, ParserError> {
379        let b = self.bytes(1)?;
380        Ok(b[0])
381    }
382
383    pub fn i8(&mut self) -> Result<i8, ParserError> {
384        let b = self.bytes(1)?;
385        Ok(b[0] as i8)
386    }
387
388    pub fn i16(&mut self) -> Result<i16, ParserError> {
389        Ok(self.copy::<I16<LE>>()?.get())
390    }
391
392    pub fn i32(&mut self) -> Result<i32, ParserError> {
393        Ok(self.copy::<I32<LE>>()?.get())
394    }
395
396    pub fn i64(&mut self) -> Result<i64, ParserError> {
397        Ok(self.copy::<I64<LE>>()?.get())
398    }
399
400    pub fn u16(&mut self) -> Result<u16, ParserError> {
401        Ok(self.copy::<U16<LE>>()?.get())
402    }
403
404    pub fn u32(&mut self) -> Result<u32, ParserError> {
405        Ok(self.copy::<U32<LE>>()?.get())
406    }
407
408    pub fn u64(&mut self) -> Result<u64, ParserError> {
409        Ok(self.copy::<U64<LE>>()?.get())
410    }
411
412    pub fn skip_strz(&mut self) -> Result<(), ParserError> {
413        for i in 0..self.bytes.len() {
414            if self.bytes[i] == 0 {
415                let stolen_bytes = take(&mut self.bytes);
416                self.bytes = &mut stolen_bytes[i + 1..];
417                return Ok(());
418            }
419        }
420
421        Err(ParserError::new())
422    }
423
424    pub fn strz(&mut self) -> Result<&'a mut BStr, ParserError> {
425        for i in 0..self.bytes.len() {
426            if self.bytes[i] == 0 {
427                let stolen_bytes = take(&mut self.bytes);
428                let (str_bytes, hi) = stolen_bytes.split_at_mut(i);
429                self.bytes = &mut hi[1..];
430                return Ok(str_bytes.as_bstr_mut());
431            }
432        }
433
434        Err(ParserError::new())
435    }
436
437    pub fn type_index(&mut self) -> Result<TypeIndex, ParserError> {
438        Ok(TypeIndex(self.u32()?))
439    }
440
441    pub fn skip_number(&mut self) -> Result<(), ParserError> {
442        let mut p = Parser::new(self.bytes);
443        let len_before = p.len();
444        let _ = p.number()?;
445        let num_len = len_before - p.len();
446        self.skip(num_len)?;
447        Ok(())
448    }
449}
450
451/// Zero-sized type for representing parsing errors.
452#[derive(Copy, Clone, Debug, Eq, PartialEq)]
453pub struct ParserError;
454
455impl ParserError {
456    /// Constructor for ParserError, also logs an event. This is useful for setting breakpoints.
457    #[cfg_attr(debug_assertions, inline(never))]
458    #[cfg_attr(not(debug_assertions), inline(always))]
459    pub fn new() -> Self {
460        #[cfg(debug_assertions)]
461        {
462            tracing::debug!("ParserError");
463        }
464        Self
465    }
466}
467
468impl Default for ParserError {
469    fn default() -> Self {
470        Self::new()
471    }
472}
473
474impl std::error::Error for ParserError {}
475
476impl std::fmt::Display for ParserError {
477    fn fmt(&self, fmt: &mut std::fmt::Formatter) -> std::fmt::Result {
478        fmt.write_str("Parsing error")
479    }
480}
481
482/// Defines types that can parse from a byte stream
483pub trait Parse<'a>
484where
485    Self: Sized,
486{
487    /// Parses an instance of `Self` from a `Parser`.
488    /// This allows the caller to detect which bytes were not consumed at the end of the input.
489    fn from_parser(p: &mut Parser<'a>) -> Result<Self, ParserError>;
490
491    /// Parses an instance of `Self` from a byte slice.
492    fn parse(bytes: &'a [u8]) -> Result<Self, ParserError> {
493        let mut p = Parser::new(bytes);
494        Self::from_parser(&mut p)
495    }
496}