bddisasm/
decoder.rs

1/*
2 * Copyright (c) 2021 Bitdefender
3 * SPDX-License-Identifier: Apache-2.0
4 */
5//! Decodes instructions.
6
7use crate::decoded_instruction::{DecodeMode, DecodeResult, DecodedInstruction};
8
9/// Decodes instructions.
10#[derive(Clone, Eq, PartialEq, Hash, Debug)]
11pub struct Decoder<'a> {
12    code: &'a [u8],
13    ip: u64,
14    mode: DecodeMode,
15    offset: usize,
16}
17
18impl<'a> Decoder<'a> {
19    /// Creates a new decoder.
20    ///
21    /// # Arguments
22    ///
23    /// * `code` - An [`u8`] slice that holds the code to be decoded.
24    /// * `mode` - The mode in which to decode the instruction.
25    /// * `ip` - The instruction pointer value to use when formatting the decoded instruction. Does not affect the
26    /// decoding process in any way.
27    #[must_use]
28    pub fn new(code: &'a [u8], mode: DecodeMode, ip: u64) -> Self {
29        Self {
30            code,
31            mode,
32            ip,
33            offset: 0,
34        }
35    }
36
37    /// Attempts to decode the next instruction from the given code chunk.
38    ///
39    /// # Returns
40    ///
41    /// * `Some(DecodeResult)` - if there are still undecoded bytes in the given code chunk. The decoding may have
42    /// still failed. See `Remarks`.
43    /// * `None` - if all the bytes in the given code chunk were decoded.
44    ///
45    /// # Remarks
46    ///
47    /// This function decodes one instruction from the given code chunk at a time. After each call, the offset inside
48    /// the code chunk is advanced by:
49    ///
50    /// - the size of the decoded instruction, if decoding was succesfull
51    /// - 1, if decoding was not succesfull
52    ///
53    /// The `ip` value specified when the decoder was created is automatically updated:
54    ///
55    /// - if the decoding was succesfull, it is incremented with the instruction size
56    /// - if the decoding was not succesfull, it is incremented with 1
57    ///
58    /// # Examples
59    ///
60    /// ```
61    /// # use bddisasm::DecodeError;
62    /// #
63    /// # fn main() -> Result<(), DecodeError> {
64    /// use bddisasm::{Decoder, DecodeMode};
65    ///
66    /// let mut decoder = Decoder::new(&[0x51, 0x53], DecodeMode::Bits32, 0);
67    ///
68    /// // As long as we have something to decode
69    /// while let Some(result) = decoder.decode_next() {
70    ///     // Check if the decoding succeeded
71    ///     match result {
72    ///         Ok(instruction) => println!("{}", instruction),
73    ///         Err(e) => println!("Unable to decode: {}", e),
74    ///     }
75    /// }
76    ///
77    /// # Ok(())
78    /// # }
79    /// ```
80    pub fn decode_next(&mut self) -> Option<DecodeResult> {
81        if self.offset >= self.code.len() {
82            None
83        } else {
84            let result =
85                DecodedInstruction::decode_with_ip(&self.code[self.offset..], self.mode, self.ip);
86            if let Ok(ins) = result {
87                self.offset += ins.length();
88                self.ip += ins.length() as u64;
89            } else {
90                self.offset += 1;
91                self.ip += 1;
92            };
93
94            Some(result)
95        }
96    }
97
98    /// Attempts to decode the next instruction from the given code chunk.
99    ///
100    /// Behaves like [`decode_next`](Decoder::decode_next), but in addition to the [`DecodeResult`] it
101    /// will also return the offset from which decoding was attempted, as well as the corresponding instruction pointer.
102    ///
103    /// # Examples
104    ///
105    /// ```
106    /// # use bddisasm::DecodeError;
107    /// #
108    /// # fn main() -> Result<(), DecodeError> {
109    /// use bddisasm::{Decoder, DecodeMode};
110    ///
111    /// let mut decoder = Decoder::new(&[0x51, 0x53], DecodeMode::Bits32, 0x1000);
112    ///
113    /// // As long as we have something to decode
114    /// while let Some((result, offset, ip)) = decoder.decode_next_with_info() {
115    ///     // Check if the decoding succeeded
116    ///     match result {
117    ///         Ok(instruction) => println!("IP: {:#x}    {}", ip, instruction),
118    ///         Err(e) => println!("Unable to decode at offset {:#x}: {}", offset, e),
119    ///     }
120    /// }
121    ///
122    /// # Ok(())
123    /// # }
124    /// ```
125    #[inline]
126    pub fn decode_next_with_info(&mut self) -> Option<(DecodeResult, usize, u64)> {
127        let offset = self.offset;
128        let ip = self.ip;
129
130        self.decode_next().map(|res| (res, offset, ip))
131    }
132
133    /// Attempts to decode the next instruction from the given code chunk.
134    ///
135    /// Behaves like [`decode_next`](Decoder::decode_next), but in addition to the [`DecodeResult`] it
136    /// will also return the offset from which decoding was attempted.
137    ///
138    /// # Examples
139    ///
140    /// ```
141    /// # use bddisasm::DecodeError;
142    /// #
143    /// # fn main() -> Result<(), DecodeError> {
144    /// use bddisasm::{Decoder, DecodeMode};
145    ///
146    /// let mut decoder = Decoder::new(&[0x51, 0x53], DecodeMode::Bits32, 0x1000);
147    ///
148    /// // As long as we have something to decode
149    /// while let Some((result, offset)) = decoder.decode_next_with_offset() {
150    ///     // Check if the decoding succeeded
151    ///     match result {
152    ///         Ok(instruction) => println!("{} at offset {:#x}", instruction, offset),
153    ///         Err(e) => println!("Unable to decode at offset {:#x}: {}", offset, e),
154    ///     }
155    /// }
156    ///
157    /// # Ok(())
158    /// # }
159    /// ```
160    #[inline]
161    pub fn decode_next_with_offset(&mut self) -> Option<(DecodeResult, usize)> {
162        let offset = self.offset;
163
164        self.decode_next().map(|res| (res, offset))
165    }
166
167    /// Attempts to decode the next instruction from the given code chunk.
168    ///
169    /// Behaves like [`decode_next`](Decoder::decode_next), but in addition to the [`DecodeResult`] it
170    /// will also return the corresponding instruction pointer.
171    ///
172    /// # Examples
173    ///
174    /// ```
175    /// # use bddisasm::DecodeError;
176    /// #
177    /// # fn main() -> Result<(), DecodeError> {
178    /// use bddisasm::{Decoder, DecodeMode};
179    ///
180    /// let mut decoder = Decoder::new(&[0x51, 0x53], DecodeMode::Bits32, 0x1000);
181    ///
182    /// // As long as we have something to decode
183    /// while let Some((result, ip)) = decoder.decode_next_with_ip() {
184    ///     // Check if the decoding succeeded
185    ///     match result {
186    ///         Ok(instruction) => println!("{:#x}    {}", ip, instruction),
187    ///         Err(e) => println!("Unable to decode: {}", e),
188    ///     }
189    /// }
190    ///
191    /// # Ok(())
192    /// # }
193    /// ```
194    #[inline]
195    pub fn decode_next_with_ip(&mut self) -> Option<(DecodeResult, u64)> {
196        let ip = self.ip;
197
198        self.decode_next().map(|res| (res, ip))
199    }
200}
201
202impl Iterator for Decoder<'_> {
203    type Item = DecodeResult;
204
205    #[inline]
206    fn next(&mut self) -> Option<Self::Item> {
207        self.decode_next()
208    }
209}
210
211impl core::iter::FusedIterator for Decoder<'_> {}
212
213#[cfg(test)]
214mod tests {
215    use super::*;
216    use crate::*;
217
218    #[test]
219    fn decode_next() {
220        let code = vec![0xb8, 0x00, 0x00, 0x00, 0x00, 0x48, 0x8b, 0xf9, 0xff, 0xff];
221        let mut decoder = Decoder::new(&code, DecodeMode::Bits64, 0x1000);
222        let expected: Vec<Result<(Mnemonic, &str, &[u8]), DecodeError>> = vec![
223            Ok((
224                Mnemonic::MOV,
225                "MOV       eax, 0x00000000",
226                &[0xb8, 0x00, 0x00, 0x00, 0x00],
227            )),
228            Ok((Mnemonic::MOV, "MOV       rdi, rcx", &[0x48, 0x8b, 0xf9])),
229            Err(DecodeError::InvalidEncoding),
230            Err(DecodeError::BufferTooSmall),
231        ];
232        let mut exected_index = 0usize;
233        while let Some(ins) = decoder.decode_next() {
234            match expected[exected_index] {
235                Ok((i, s, b)) => {
236                    let ins = ins.expect("Unable to decode");
237                    assert_eq!(i, ins.mnemonic());
238                    assert_eq!(b, ins.bytes());
239                    assert_eq!(s, format!("{}", ins));
240                }
241                Err(e) => {
242                    assert_eq!(e, ins.expect_err("Expected error"));
243                }
244            };
245
246            exected_index += 1;
247        }
248    }
249
250    #[test]
251    fn decoder_iter() {
252        let code = vec![0xb8, 0x00, 0x00, 0x00, 0x00, 0x48, 0x8b, 0xf9, 0xff, 0xff];
253        let decoder = Decoder::new(&code, DecodeMode::Bits64, 0x1000);
254        let expected: Vec<Result<(Mnemonic, &str, &[u8]), DecodeError>> = vec![
255            Ok((
256                Mnemonic::MOV,
257                "MOV       eax, 0x00000000",
258                &[0xb8, 0x00, 0x00, 0x00, 0x00],
259            )),
260            Ok((Mnemonic::MOV, "MOV       rdi, rcx", &[0x48, 0x8b, 0xf9])),
261            Err(DecodeError::InvalidEncoding),
262            Err(DecodeError::BufferTooSmall),
263        ];
264
265        for (index, ins) in decoder.enumerate() {
266            match expected[index] {
267                Ok((i, s, b)) => {
268                    let ins = ins.expect("Unable to decode");
269                    assert_eq!(i, ins.mnemonic());
270                    assert_eq!(b, ins.bytes());
271                    assert_eq!(s, format!("{}", ins));
272                }
273                Err(e) => {
274                    assert_eq!(e, ins.expect_err("Expected error"));
275                }
276            };
277        }
278    }
279}