bad64/lib.rs
1//! # bad64
2//!
3//! bad64 is a set of Rust bindings to the Binja Arm64 Disassembler.
4//!
5//! For more information about the disassembler, please see the
6//! [upstream](https://github.com/Vector35/arch-arm64/tree/dev/disassembler)
7//! repo.
8//!
9//! There are two main entry points:
10//! 1. [`decode`] for decoding a single instruction.
11//! ```
12//! use bad64::{decode, Op};
13//! // nop - "\x1f\x20\x03\xd5"
14//! let decoded = decode(0xd503201f, 0x1000).unwrap();
15//!
16//! assert_eq!(decoded.address(), 0x1000);
17//! assert_eq!(decoded.operands().len(), 0);
18//! assert_eq!(decoded.op(), Op::NOP);
19//! assert_eq!(decoded.op().mnem(), "nop");
20//! ```
21//!
22//! 2. [`disasm`] for disassembling a byte sequence.
23//! ```
24//! use bad64::{disasm, Op, Operand, Reg, Imm};
25//!
26//! // 1000: str x0, [sp, #-16]! ; "\xe0\x0f\x1f\xf8"
27//! // 1004: ldr x0, [sp], #16 ; "\xe0\x07\x41\xf8"
28//! let mut decoded_iter = disasm(b"\xe0\x0f\x1f\xf8\xe0\x07\x41\xf8", 0x1000);
29//!
30//! let push = decoded_iter.next().unwrap().unwrap();
31//!
32//! // check out the push
33//! assert_eq!(push.address(), 0x1000);
34//! assert_eq!(push.operands().len(), 2);
35//! assert_eq!(push.op(), Op::STR);
36//! assert_eq!(
37//! push.operands()[0],
38//! Operand::Reg { reg: Reg::X0, arrspec: None }
39//! );
40//! assert_eq!(
41//! push.operands()[1],
42//! Operand::MemPreIdx { reg: Reg::SP, imm: Imm::Signed(-16) }
43//! );
44//! assert_eq!(push.operands().get(2), None);
45//!
46//! let pop = decoded_iter.next().unwrap().unwrap();
47//!
48//! // check out the pop
49//! assert_eq!(pop.address(), 0x1004);
50//! assert_eq!(pop.operands().len(), 2);
51//! assert_eq!(pop.op(), Op::LDR);
52//! assert_eq!(
53//! pop.operands().get(0),
54//! Some(&Operand::Reg { reg: Reg::X0, arrspec: None })
55//! );
56//! assert_eq!(
57//! pop.operands().get(1),
58//! Some(&Operand::MemPostIdxImm { reg: Reg::SP, imm: Imm::Signed(16) })
59//! );
60//! assert_eq!(pop.operands().get(2), None);
61//!
62//! // make sure there's nothing left
63//! assert_eq!(decoded_iter.next(), None);
64//! ```
65
66#![no_std]
67
68#[macro_use]
69extern crate num_derive;
70
71#[macro_use]
72extern crate static_assertions;
73
74#[cfg(feature = "std")]
75extern crate std;
76
77use core::convert::{TryFrom, TryInto};
78use core::fmt;
79use core::hash::{Hash, Hasher};
80
81use num_traits::FromPrimitive;
82
83use bad64_sys::*;
84
85mod arrspec;
86mod condition;
87mod flageffect;
88mod op;
89mod operand;
90mod reg;
91mod shift;
92mod sysreg;
93
94pub use arrspec::ArrSpec;
95pub use condition::Condition;
96pub use flageffect::FlagEffect;
97pub use op::Op;
98pub use operand::{Imm, Operand};
99pub use reg::Reg;
100pub use shift::Shift;
101pub use sysreg::SysReg;
102
103/// A decoded instruction
104#[derive(Clone)]
105pub struct Instruction {
106 address: u64,
107 opcode: u32,
108 op: Op,
109 num_operands: usize,
110 operands: [Operand; MAX_OPERANDS as usize],
111 flags_set: Option<FlagEffect>,
112}
113
114// Needed because MaybeUninit doesn't allow derives
115impl PartialEq for Instruction {
116 fn eq(&self, other: &Self) -> bool {
117 self.address() == other.address()
118 && self.op() == other.op()
119 && self.opcode() == other.opcode()
120 && self.num_operands == other.num_operands
121 && self.flags_set == other.flags_set
122 && self
123 .operands()
124 .iter()
125 .zip(other.operands().iter())
126 .all(|(a, b)| a == b)
127 }
128}
129
130impl Eq for Instruction {}
131
132impl Hash for Instruction {
133 fn hash<H: Hasher>(&self, state: &mut H) {
134 self.address.hash(state);
135 self.opcode.hash(state);
136 self.op.hash(state);
137 self.num_operands.hash(state);
138 self.flags_set.hash(state);
139
140 for o in self.operands() {
141 o.hash(state);
142 }
143 }
144}
145
146impl fmt::Display for Instruction {
147 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
148 write!(f, "{}", self.op())?;
149
150 let mut ops = self.operands().iter();
151
152 if let Some(op) = ops.next() {
153 write!(f, " {}", op)?;
154
155 for op in ops {
156 write!(f, ", {}", op)?;
157 }
158 }
159
160 Ok(())
161 }
162}
163
164impl fmt::Debug for Instruction {
165 fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
166 f.debug_struct("Instruction")
167 .field("address", &self.address)
168 .field("opcode", &self.opcode)
169 .field("op", &self.op)
170 .field("num_operands", &self.num_operands)
171 .field("operands", &self.operands())
172 .field("flags_set", &self.flags_set)
173 .finish()
174 }
175}
176
177impl Instruction {
178 /// Returns the instruction address
179 ///
180 /// # Example
181 /// ```
182 /// use bad64::decode;
183 /// // nop - "\x1f\x20\x03\xd5"
184 /// let decoded = decode(0xd503201f, 0x1000).unwrap();
185 /// assert_eq!(decoded.address(), 0x1000);
186 /// ```
187 pub fn address(&self) -> u64 {
188 self.address
189 }
190
191 /// Returns the instruction opcode
192 ///
193 /// # Example
194 /// ```
195 /// use bad64::decode;
196 /// // nop - "\x1f\x20\x03\xd5"
197 /// let decoded = decode(0xd503201f, 0x1000).unwrap();
198 /// assert_eq!(decoded.opcode(), 0xd503201f);
199 /// ```
200 pub fn opcode(&self) -> u32 {
201 self.opcode
202 }
203
204 /// Returns the instruction operation
205 ///
206 /// # Example
207 /// ```
208 /// use bad64::{decode, Op};
209 /// // nop - "\x1f\x20\x03\xd5"
210 /// let decoded = decode(0xd503201f, 0x1000).unwrap();
211 /// assert_eq!(decoded.op(), Op::NOP);
212 // ```
213 pub fn op(&self) -> Op {
214 self.op
215 }
216
217 /// Returns a slice of Operands
218 ///
219 /// # Example
220 /// ```
221 /// use bad64::{decode, Operand, Reg};
222 ///
223 /// // eor x0, x1, x2 - "\x20\x00\x02\xca"
224 /// let decoded = decode(0xca020020, 0x1000).unwrap();
225 ///
226 /// let mut ops = decoded.operands();
227 ///
228 /// assert_eq!(ops.len(), 3);
229 /// assert_eq!(ops[0], Operand::Reg { reg: Reg::X0, arrspec: None });
230 /// assert_eq!(ops[1], Operand::Reg { reg: Reg::X1, arrspec: None });
231 /// assert_eq!(ops[2], Operand::Reg { reg: Reg::X2, arrspec: None });
232 /// ```
233 pub fn operands(&self) -> &[Operand] {
234 &self.operands[..self.num_operands]
235 }
236
237 /// Returns if the instruction updates the flags
238 ///
239 /// # Example
240 /// ```
241 /// use bad64::{decode, FlagEffect};
242 ///
243 /// // cmp x0, #0x41 - "\x1f\x04\x01\xf1"
244 /// let decoded = decode(0xf101041f, 0x1000).unwrap();
245 /// assert_eq!(decoded.flags_set(), Some(FlagEffect::Integer));
246 ///
247 /// // nop - "\x1f\x20\x03\xd5"
248 /// let decoded = decode(0xd503201f, 0x1000).unwrap();
249 /// assert_eq!(decoded.flags_set(), None);
250 /// ```
251 pub fn flags_set(&self) -> Option<FlagEffect> {
252 self.flags_set
253 }
254}
255/// Decoding errors types
256#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)]
257#[repr(i32)]
258pub enum DecodeError {
259 Reserved(u64),
260 Unmatched(u64),
261 Unallocated(u64),
262 Undefined(u64),
263 EndOfInstruction(u64),
264 Lost(u64),
265 Unreachable(u64),
266 Short(u64),
267 ErrorOperands(u64),
268}
269
270impl DecodeError {
271 fn new(code: i32, address: u64) -> Self {
272 match code {
273 DECODE_STATUS_RESERVED => Self::Reserved(address),
274 DECODE_STATUS_UNMATCHED => Self::Unmatched(address),
275 DECODE_STATUS_UNALLOCATED => Self::Unallocated(address),
276 DECODE_STATUS_UNDEFINED => Self::Undefined(address),
277 DECODE_STATUS_END_OF_INSTRUCTION => Self::EndOfInstruction(address),
278 DECODE_STATUS_LOST => Self::Lost(address),
279 DECODE_STATUS_UNREACHABLE => Self::Unreachable(address),
280 DECODE_STATUS_ERROR_OPERANDS => Self::ErrorOperands(address),
281 _ => panic!("unknown decode error code"),
282 }
283 }
284
285 pub fn address(&self) -> u64 {
286 match self {
287 Self::Reserved(a) => *a,
288 Self::Unmatched(a) => *a,
289 Self::Unallocated(a) => *a,
290 Self::Undefined(a) => *a,
291 Self::EndOfInstruction(a) => *a,
292 Self::Lost(a) => *a,
293 Self::Unreachable(a) => *a,
294 Self::Short(a) => *a,
295 Self::ErrorOperands(a) => *a,
296 }
297 }
298}
299
300impl fmt::Display for DecodeError {
301 fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
302 match self {
303 DecodeError::Reserved(x) => write!(f, "Reserved: {:#x}", x),
304 DecodeError::Unmatched(x) => write!(f, "Unmatched: {:#x}", x),
305 DecodeError::Unallocated(x) => write!(f, "Unallocated: {:#x}", x),
306 DecodeError::Undefined(x) => write!(f, "Undefined: {:#x}", x),
307 DecodeError::EndOfInstruction(x) => write!(f, "EndOfInstruction: {:#x}", x),
308 DecodeError::Lost(x) => write!(f, "Lost: {:#x}", x),
309 DecodeError::Unreachable(x) => write!(f, "Unreachable: {:#x}", x),
310 DecodeError::Short(x) => write!(f, "Short: {:#x}", x),
311 DecodeError::ErrorOperands(x) => write!(f, "ErrorOperands: {:#x}", x),
312 }
313 }
314}
315
316#[cfg(feature = "std")]
317impl std::error::Error for DecodeError {}
318
319/// Decode a single instruction
320///
321/// # Arguments
322///
323/// * `ins` - A little endian u32 of code to be decoded
324/// * `address` - Location of code in memory
325///
326/// # Examples
327/// ```
328/// use bad64::{decode, Op};
329///
330/// // NOTE: little endian
331/// let decoded = decode(0xd503201f, 0x1000).unwrap();
332///
333/// assert_eq!(decoded.operands().len(), 0);
334/// assert_eq!(decoded.operands(), &[]);
335/// assert_eq!(decoded.op(), Op::NOP);
336/// assert_eq!(decoded.op().mnem(), "nop");
337/// assert_eq!(decoded.address(), 0x1000);
338/// ```
339pub fn decode(ins: u32, address: u64) -> Result<Instruction, DecodeError> {
340 let (r, decoded) = unsafe {
341 let mut decoded: bad64_sys::Instruction = core::mem::zeroed();
342 let r = aarch64_decompose(ins, &mut decoded, address);
343 (r, decoded)
344 };
345
346 match r {
347 0 => {
348 assert_ne!(decoded.operation, Operation_ARM64_ERROR);
349
350 let op = Op::from_u32(decoded.operation as u32).unwrap();
351 let mut operands: [Operand; MAX_OPERANDS as usize] =
352 [Operand::Label(Imm::Unsigned(0)); MAX_OPERANDS as usize];
353 let mut num_operands = 0;
354
355 for (n, operand) in decoded.operands.iter().enumerate() {
356 match Operand::try_from(operand) {
357 Ok(o) => {
358 operands[n] = o;
359 num_operands += 1;
360 }
361 Err(_) => break,
362 }
363 }
364
365 let flags_set = FlagEffect::try_from(&decoded).ok();
366
367 Ok(Instruction {
368 address,
369 opcode: decoded.insword,
370 op,
371 num_operands,
372 operands,
373 flags_set,
374 })
375 }
376 _ => Err(DecodeError::new(r, address)),
377 }
378}
379
380/// Disassemble byte slice
381///
382/// # Arguments
383///
384/// * `code` - u8 slice to zero or more instructions
385/// * `address` - Location of code in memory
386///
387/// # Examples
388/// ```
389/// use bad64::{disasm, Op};
390///
391/// let mut decoded_iter = disasm(b"\x1f\x20\x03\xd5\x1f\x20\x03\xd5", 0x1000);
392///
393/// let decoded1 = decoded_iter.next().unwrap().unwrap();
394///
395/// assert_eq!(decoded1.address(), 0x1000);
396/// assert_eq!(decoded1.operands().len(), 0);
397/// assert_eq!(decoded1.op(), Op::NOP);
398/// assert_eq!(decoded1.op().mnem(), "nop");
399///
400/// let decoded2 = decoded_iter.next().unwrap().unwrap();
401///
402/// assert_eq!(decoded2.address(), 0x1004);
403/// assert_eq!(decoded2.operands().len(), 0);
404/// assert_eq!(decoded2.op(), Op::NOP);
405/// assert_eq!(decoded2.op().mnem(), "nop");
406///
407/// assert_eq!(decoded_iter.next(), None);
408/// ```
409pub fn disasm(
410 code: &[u8],
411 address: u64,
412) -> impl Iterator<Item = Result<Instruction, DecodeError>> + '_ {
413 (address..)
414 .step_by(4)
415 .zip(code.chunks(4))
416 .map(|(addr, bytes)| match bytes.try_into() {
417 Ok(v) => {
418 let vv = u32::from_le_bytes(v);
419
420 decode(vv, addr)
421 }
422 Err(_) => Err(DecodeError::Short(addr)),
423 })
424}