bddisasm/decoder.rs
1/*
2 * Copyright (c) 2021 Bitdefender
3 * SPDX-License-Identifier: Apache-2.0
4 */
5//! Decodes instructions.
6
7use crate::decoded_instruction::{DecodeMode, DecodeResult, DecodedInstruction};
8
9/// Decodes instructions.
10#[derive(Clone, Eq, PartialEq, Hash, Debug)]
11pub struct Decoder<'a> {
12 code: &'a [u8],
13 ip: u64,
14 mode: DecodeMode,
15 offset: usize,
16}
17
18impl<'a> Decoder<'a> {
19 /// Creates a new decoder.
20 ///
21 /// # Arguments
22 ///
23 /// * `code` - An [`u8`] slice that holds the code to be decoded.
24 /// * `mode` - The mode in which to decode the instruction.
25 /// * `ip` - The instruction pointer value to use when formatting the decoded instruction. Does not affect the
26 /// decoding process in any way.
27 #[must_use]
28 pub fn new(code: &'a [u8], mode: DecodeMode, ip: u64) -> Self {
29 Self {
30 code,
31 mode,
32 ip,
33 offset: 0,
34 }
35 }
36
37 /// Attempts to decode the next instruction from the given code chunk.
38 ///
39 /// # Returns
40 ///
41 /// * `Some(DecodeResult)` - if there are still undecoded bytes in the given code chunk. The decoding may have
42 /// still failed. See `Remarks`.
43 /// * `None` - if all the bytes in the given code chunk were decoded.
44 ///
45 /// # Remarks
46 ///
47 /// This function decodes one instruction from the given code chunk at a time. After each call, the offset inside
48 /// the code chunk is advanced by:
49 ///
50 /// - the size of the decoded instruction, if decoding was succesfull
51 /// - 1, if decoding was not succesfull
52 ///
53 /// The `ip` value specified when the decoder was created is automatically updated:
54 ///
55 /// - if the decoding was succesfull, it is incremented with the instruction size
56 /// - if the decoding was not succesfull, it is incremented with 1
57 ///
58 /// # Examples
59 ///
60 /// ```
61 /// # use bddisasm::DecodeError;
62 /// #
63 /// # fn main() -> Result<(), DecodeError> {
64 /// use bddisasm::{Decoder, DecodeMode};
65 ///
66 /// let mut decoder = Decoder::new(&[0x51, 0x53], DecodeMode::Bits32, 0);
67 ///
68 /// // As long as we have something to decode
69 /// while let Some(result) = decoder.decode_next() {
70 /// // Check if the decoding succeeded
71 /// match result {
72 /// Ok(instruction) => println!("{}", instruction),
73 /// Err(e) => println!("Unable to decode: {}", e),
74 /// }
75 /// }
76 ///
77 /// # Ok(())
78 /// # }
79 /// ```
80 pub fn decode_next(&mut self) -> Option<DecodeResult> {
81 if self.offset >= self.code.len() {
82 None
83 } else {
84 let result =
85 DecodedInstruction::decode_with_ip(&self.code[self.offset..], self.mode, self.ip);
86 if let Ok(ins) = result {
87 self.offset += ins.length();
88 self.ip += ins.length() as u64;
89 } else {
90 self.offset += 1;
91 self.ip += 1;
92 };
93
94 Some(result)
95 }
96 }
97
98 /// Attempts to decode the next instruction from the given code chunk.
99 ///
100 /// Behaves like [`decode_next`](Decoder::decode_next), but in addition to the [`DecodeResult`] it
101 /// will also return the offset from which decoding was attempted, as well as the corresponding instruction pointer.
102 ///
103 /// # Examples
104 ///
105 /// ```
106 /// # use bddisasm::DecodeError;
107 /// #
108 /// # fn main() -> Result<(), DecodeError> {
109 /// use bddisasm::{Decoder, DecodeMode};
110 ///
111 /// let mut decoder = Decoder::new(&[0x51, 0x53], DecodeMode::Bits32, 0x1000);
112 ///
113 /// // As long as we have something to decode
114 /// while let Some((result, offset, ip)) = decoder.decode_next_with_info() {
115 /// // Check if the decoding succeeded
116 /// match result {
117 /// Ok(instruction) => println!("IP: {:#x} {}", ip, instruction),
118 /// Err(e) => println!("Unable to decode at offset {:#x}: {}", offset, e),
119 /// }
120 /// }
121 ///
122 /// # Ok(())
123 /// # }
124 /// ```
125 #[inline]
126 pub fn decode_next_with_info(&mut self) -> Option<(DecodeResult, usize, u64)> {
127 let offset = self.offset;
128 let ip = self.ip;
129
130 self.decode_next().map(|res| (res, offset, ip))
131 }
132
133 /// Attempts to decode the next instruction from the given code chunk.
134 ///
135 /// Behaves like [`decode_next`](Decoder::decode_next), but in addition to the [`DecodeResult`] it
136 /// will also return the offset from which decoding was attempted.
137 ///
138 /// # Examples
139 ///
140 /// ```
141 /// # use bddisasm::DecodeError;
142 /// #
143 /// # fn main() -> Result<(), DecodeError> {
144 /// use bddisasm::{Decoder, DecodeMode};
145 ///
146 /// let mut decoder = Decoder::new(&[0x51, 0x53], DecodeMode::Bits32, 0x1000);
147 ///
148 /// // As long as we have something to decode
149 /// while let Some((result, offset)) = decoder.decode_next_with_offset() {
150 /// // Check if the decoding succeeded
151 /// match result {
152 /// Ok(instruction) => println!("{} at offset {:#x}", instruction, offset),
153 /// Err(e) => println!("Unable to decode at offset {:#x}: {}", offset, e),
154 /// }
155 /// }
156 ///
157 /// # Ok(())
158 /// # }
159 /// ```
160 #[inline]
161 pub fn decode_next_with_offset(&mut self) -> Option<(DecodeResult, usize)> {
162 let offset = self.offset;
163
164 self.decode_next().map(|res| (res, offset))
165 }
166
167 /// Attempts to decode the next instruction from the given code chunk.
168 ///
169 /// Behaves like [`decode_next`](Decoder::decode_next), but in addition to the [`DecodeResult`] it
170 /// will also return the corresponding instruction pointer.
171 ///
172 /// # Examples
173 ///
174 /// ```
175 /// # use bddisasm::DecodeError;
176 /// #
177 /// # fn main() -> Result<(), DecodeError> {
178 /// use bddisasm::{Decoder, DecodeMode};
179 ///
180 /// let mut decoder = Decoder::new(&[0x51, 0x53], DecodeMode::Bits32, 0x1000);
181 ///
182 /// // As long as we have something to decode
183 /// while let Some((result, ip)) = decoder.decode_next_with_ip() {
184 /// // Check if the decoding succeeded
185 /// match result {
186 /// Ok(instruction) => println!("{:#x} {}", ip, instruction),
187 /// Err(e) => println!("Unable to decode: {}", e),
188 /// }
189 /// }
190 ///
191 /// # Ok(())
192 /// # }
193 /// ```
194 #[inline]
195 pub fn decode_next_with_ip(&mut self) -> Option<(DecodeResult, u64)> {
196 let ip = self.ip;
197
198 self.decode_next().map(|res| (res, ip))
199 }
200}
201
202impl Iterator for Decoder<'_> {
203 type Item = DecodeResult;
204
205 #[inline]
206 fn next(&mut self) -> Option<Self::Item> {
207 self.decode_next()
208 }
209}
210
211impl core::iter::FusedIterator for Decoder<'_> {}
212
213#[cfg(test)]
214mod tests {
215 use super::*;
216 use crate::*;
217
218 #[test]
219 fn decode_next() {
220 let code = vec![0xb8, 0x00, 0x00, 0x00, 0x00, 0x48, 0x8b, 0xf9, 0xff, 0xff];
221 let mut decoder = Decoder::new(&code, DecodeMode::Bits64, 0x1000);
222 let expected: Vec<Result<(Mnemonic, &str, &[u8]), DecodeError>> = vec![
223 Ok((
224 Mnemonic::MOV,
225 "MOV eax, 0x00000000",
226 &[0xb8, 0x00, 0x00, 0x00, 0x00],
227 )),
228 Ok((Mnemonic::MOV, "MOV rdi, rcx", &[0x48, 0x8b, 0xf9])),
229 Err(DecodeError::InvalidEncoding),
230 Err(DecodeError::BufferTooSmall),
231 ];
232 let mut exected_index = 0usize;
233 while let Some(ins) = decoder.decode_next() {
234 match expected[exected_index] {
235 Ok((i, s, b)) => {
236 let ins = ins.expect("Unable to decode");
237 assert_eq!(i, ins.mnemonic());
238 assert_eq!(b, ins.bytes());
239 assert_eq!(s, format!("{}", ins));
240 }
241 Err(e) => {
242 assert_eq!(e, ins.expect_err("Expected error"));
243 }
244 };
245
246 exected_index += 1;
247 }
248 }
249
250 #[test]
251 fn decoder_iter() {
252 let code = vec![0xb8, 0x00, 0x00, 0x00, 0x00, 0x48, 0x8b, 0xf9, 0xff, 0xff];
253 let decoder = Decoder::new(&code, DecodeMode::Bits64, 0x1000);
254 let expected: Vec<Result<(Mnemonic, &str, &[u8]), DecodeError>> = vec![
255 Ok((
256 Mnemonic::MOV,
257 "MOV eax, 0x00000000",
258 &[0xb8, 0x00, 0x00, 0x00, 0x00],
259 )),
260 Ok((Mnemonic::MOV, "MOV rdi, rcx", &[0x48, 0x8b, 0xf9])),
261 Err(DecodeError::InvalidEncoding),
262 Err(DecodeError::BufferTooSmall),
263 ];
264
265 for (index, ins) in decoder.enumerate() {
266 match expected[index] {
267 Ok((i, s, b)) => {
268 let ins = ins.expect("Unable to decode");
269 assert_eq!(i, ins.mnemonic());
270 assert_eq!(b, ins.bytes());
271 assert_eq!(s, format!("{}", ins));
272 }
273 Err(e) => {
274 assert_eq!(e, ins.expect_err("Expected error"));
275 }
276 };
277 }
278 }
279}