bad64/
lib.rs

1//! # bad64
2//!
3//! bad64 is a set of Rust bindings to the Binja Arm64 Disassembler.
4//!
5//! For more information about the disassembler, please see the
6//! [upstream](https://github.com/Vector35/arch-arm64/tree/dev/disassembler)
7//! repo.
8//!
9//! There are two main entry points:
10//! 1. [`decode`] for decoding a single instruction.
11//! ```
12//! use bad64::{decode, Op};
13//! // nop - "\x1f\x20\x03\xd5"
14//! let decoded = decode(0xd503201f, 0x1000).unwrap();
15//!
16//! assert_eq!(decoded.address(), 0x1000);
17//! assert_eq!(decoded.operands().len(), 0);
18//! assert_eq!(decoded.op(), Op::NOP);
19//! assert_eq!(decoded.op().mnem(), "nop");
20//! ```
21//!
22//! 2. [`disasm`] for disassembling a byte sequence.
23//! ```
24//! use bad64::{disasm, Op, Operand, Reg, Imm};
25//!
26//! // 1000: str   x0, [sp, #-16]! ; "\xe0\x0f\x1f\xf8"
27//! // 1004: ldr   x0, [sp], #16   ; "\xe0\x07\x41\xf8"
28//! let mut decoded_iter = disasm(b"\xe0\x0f\x1f\xf8\xe0\x07\x41\xf8", 0x1000);
29//!
30//! let push = decoded_iter.next().unwrap().unwrap();
31//!
32//! // check out the push
33//! assert_eq!(push.address(), 0x1000);
34//! assert_eq!(push.operands().len(), 2);
35//! assert_eq!(push.op(), Op::STR);
36//! assert_eq!(
37//!     push.operands()[0],
38//!     Operand::Reg { reg: Reg::X0, arrspec: None }
39//! );
40//! assert_eq!(
41//!     push.operands()[1],
42//!     Operand::MemPreIdx { reg: Reg::SP, imm: Imm::Signed(-16) }
43//! );
44//! assert_eq!(push.operands().get(2), None);
45//!
46//! let pop = decoded_iter.next().unwrap().unwrap();
47//!
48//! // check out the pop
49//! assert_eq!(pop.address(), 0x1004);
50//! assert_eq!(pop.operands().len(), 2);
51//! assert_eq!(pop.op(), Op::LDR);
52//! assert_eq!(
53//!     pop.operands().get(0),
54//!     Some(&Operand::Reg { reg: Reg::X0, arrspec: None })
55//! );
56//! assert_eq!(
57//!     pop.operands().get(1),
58//!     Some(&Operand::MemPostIdxImm { reg: Reg::SP, imm: Imm::Signed(16) })
59//! );
60//! assert_eq!(pop.operands().get(2), None);
61//!
62//! // make sure there's nothing left
63//! assert_eq!(decoded_iter.next(), None);
64//! ```
65
66#![no_std]
67
68#[macro_use]
69extern crate num_derive;
70
71#[macro_use]
72extern crate static_assertions;
73
74#[cfg(feature = "std")]
75extern crate std;
76
77use core::convert::{TryFrom, TryInto};
78use core::fmt;
79use core::hash::{Hash, Hasher};
80
81use num_traits::FromPrimitive;
82
83use bad64_sys::*;
84
85mod arrspec;
86mod condition;
87mod flageffect;
88mod op;
89mod operand;
90mod reg;
91mod shift;
92mod sysreg;
93
94pub use arrspec::ArrSpec;
95pub use condition::Condition;
96pub use flageffect::FlagEffect;
97pub use op::Op;
98pub use operand::{Imm, Operand};
99pub use reg::Reg;
100pub use shift::Shift;
101pub use sysreg::SysReg;
102
103/// A decoded instruction
104#[derive(Clone)]
105pub struct Instruction {
106    address: u64,
107    opcode: u32,
108    op: Op,
109    num_operands: usize,
110    operands: [Operand; MAX_OPERANDS as usize],
111    flags_set: Option<FlagEffect>,
112}
113
114// Needed because MaybeUninit doesn't allow derives
115impl PartialEq for Instruction {
116    fn eq(&self, other: &Self) -> bool {
117        self.address() == other.address()
118            && self.op() == other.op()
119            && self.opcode() == other.opcode()
120            && self.num_operands == other.num_operands
121            && self.flags_set == other.flags_set
122            && self
123                .operands()
124                .iter()
125                .zip(other.operands().iter())
126                .all(|(a, b)| a == b)
127    }
128}
129
130impl Eq for Instruction {}
131
132impl Hash for Instruction {
133    fn hash<H: Hasher>(&self, state: &mut H) {
134        self.address.hash(state);
135        self.opcode.hash(state);
136        self.op.hash(state);
137        self.num_operands.hash(state);
138        self.flags_set.hash(state);
139
140        for o in self.operands() {
141            o.hash(state);
142        }
143    }
144}
145
146impl fmt::Display for Instruction {
147    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
148        write!(f, "{}", self.op())?;
149
150        let mut ops = self.operands().iter();
151
152        if let Some(op) = ops.next() {
153            write!(f, " {}", op)?;
154
155            for op in ops {
156                write!(f, ", {}", op)?;
157            }
158        }
159
160        Ok(())
161    }
162}
163
164impl fmt::Debug for Instruction {
165    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
166        f.debug_struct("Instruction")
167            .field("address", &self.address)
168            .field("opcode", &self.opcode)
169            .field("op", &self.op)
170            .field("num_operands", &self.num_operands)
171            .field("operands", &self.operands())
172            .field("flags_set", &self.flags_set)
173            .finish()
174    }
175}
176
177impl Instruction {
178    /// Returns the instruction address
179    ///
180    /// # Example
181    /// ```
182    /// use bad64::decode;
183    /// // nop - "\x1f\x20\x03\xd5"
184    /// let decoded = decode(0xd503201f, 0x1000).unwrap();
185    /// assert_eq!(decoded.address(), 0x1000);
186    /// ```
187    pub fn address(&self) -> u64 {
188        self.address
189    }
190
191    /// Returns the instruction opcode
192    ///
193    /// # Example
194    /// ```
195    /// use bad64::decode;
196    /// // nop - "\x1f\x20\x03\xd5"
197    /// let decoded = decode(0xd503201f, 0x1000).unwrap();
198    /// assert_eq!(decoded.opcode(), 0xd503201f);
199    /// ```
200    pub fn opcode(&self) -> u32 {
201        self.opcode
202    }
203
204    /// Returns the instruction operation
205    ///
206    /// # Example
207    /// ```
208    /// use bad64::{decode, Op};
209    /// // nop - "\x1f\x20\x03\xd5"
210    /// let decoded = decode(0xd503201f, 0x1000).unwrap();
211    /// assert_eq!(decoded.op(), Op::NOP);
212    // ```
213    pub fn op(&self) -> Op {
214        self.op
215    }
216
217    /// Returns a slice of Operands
218    ///
219    /// # Example
220    /// ```
221    /// use bad64::{decode, Operand, Reg};
222    ///
223    /// // eor x0, x1, x2  - "\x20\x00\x02\xca"
224    /// let decoded = decode(0xca020020, 0x1000).unwrap();
225    ///
226    /// let mut ops = decoded.operands();
227    ///
228    /// assert_eq!(ops.len(), 3);
229    /// assert_eq!(ops[0], Operand::Reg { reg: Reg::X0, arrspec: None });
230    /// assert_eq!(ops[1], Operand::Reg { reg: Reg::X1, arrspec: None });
231    /// assert_eq!(ops[2], Operand::Reg { reg: Reg::X2, arrspec: None });
232    /// ```
233    pub fn operands(&self) -> &[Operand] {
234        &self.operands[..self.num_operands]
235    }
236
237    /// Returns if the instruction updates the flags
238    ///
239    /// # Example
240    /// ```
241    /// use bad64::{decode, FlagEffect};
242    ///
243    /// // cmp x0, #0x41 - "\x1f\x04\x01\xf1"
244    /// let decoded = decode(0xf101041f, 0x1000).unwrap();
245    /// assert_eq!(decoded.flags_set(), Some(FlagEffect::Integer));
246    ///
247    /// // nop - "\x1f\x20\x03\xd5"
248    /// let decoded = decode(0xd503201f, 0x1000).unwrap();
249    /// assert_eq!(decoded.flags_set(), None);
250    /// ```
251    pub fn flags_set(&self) -> Option<FlagEffect> {
252        self.flags_set
253    }
254}
255/// Decoding errors types
256#[derive(Clone, Copy, Debug, Hash, Eq, PartialEq)]
257#[repr(i32)]
258pub enum DecodeError {
259    Reserved(u64),
260    Unmatched(u64),
261    Unallocated(u64),
262    Undefined(u64),
263    EndOfInstruction(u64),
264    Lost(u64),
265    Unreachable(u64),
266    Short(u64),
267    ErrorOperands(u64),
268}
269
270impl DecodeError {
271    fn new(code: i32, address: u64) -> Self {
272        match code {
273            DECODE_STATUS_RESERVED => Self::Reserved(address),
274            DECODE_STATUS_UNMATCHED => Self::Unmatched(address),
275            DECODE_STATUS_UNALLOCATED => Self::Unallocated(address),
276            DECODE_STATUS_UNDEFINED => Self::Undefined(address),
277            DECODE_STATUS_END_OF_INSTRUCTION => Self::EndOfInstruction(address),
278            DECODE_STATUS_LOST => Self::Lost(address),
279            DECODE_STATUS_UNREACHABLE => Self::Unreachable(address),
280            DECODE_STATUS_ERROR_OPERANDS => Self::ErrorOperands(address),
281            _ => panic!("unknown decode error code"),
282        }
283    }
284
285    pub fn address(&self) -> u64 {
286        match self {
287            Self::Reserved(a) => *a,
288            Self::Unmatched(a) => *a,
289            Self::Unallocated(a) => *a,
290            Self::Undefined(a) => *a,
291            Self::EndOfInstruction(a) => *a,
292            Self::Lost(a) => *a,
293            Self::Unreachable(a) => *a,
294            Self::Short(a) => *a,
295            Self::ErrorOperands(a) => *a,
296        }
297    }
298}
299
300impl fmt::Display for DecodeError {
301    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
302        match self {
303            DecodeError::Reserved(x) => write!(f, "Reserved: {:#x}", x),
304            DecodeError::Unmatched(x) => write!(f, "Unmatched: {:#x}", x),
305            DecodeError::Unallocated(x) => write!(f, "Unallocated: {:#x}", x),
306            DecodeError::Undefined(x) => write!(f, "Undefined: {:#x}", x),
307            DecodeError::EndOfInstruction(x) => write!(f, "EndOfInstruction: {:#x}", x),
308            DecodeError::Lost(x) => write!(f, "Lost: {:#x}", x),
309            DecodeError::Unreachable(x) => write!(f, "Unreachable: {:#x}", x),
310            DecodeError::Short(x) => write!(f, "Short: {:#x}", x),
311            DecodeError::ErrorOperands(x) => write!(f, "ErrorOperands: {:#x}", x),
312        }
313    }
314}
315
316#[cfg(feature = "std")]
317impl std::error::Error for DecodeError {}
318
319/// Decode a single instruction
320///
321/// # Arguments
322///
323/// * `ins` - A little endian u32 of code to be decoded
324/// * `address` - Location of code in memory
325///
326/// # Examples
327/// ```
328/// use bad64::{decode, Op};
329///
330/// // NOTE: little endian
331/// let decoded = decode(0xd503201f, 0x1000).unwrap();
332///
333/// assert_eq!(decoded.operands().len(), 0);
334/// assert_eq!(decoded.operands(), &[]);
335/// assert_eq!(decoded.op(), Op::NOP);
336/// assert_eq!(decoded.op().mnem(), "nop");
337/// assert_eq!(decoded.address(), 0x1000);
338/// ```
339pub fn decode(ins: u32, address: u64) -> Result<Instruction, DecodeError> {
340    let (r, decoded) = unsafe {
341        let mut decoded: bad64_sys::Instruction = core::mem::zeroed();
342        let r = aarch64_decompose(ins, &mut decoded, address);
343        (r, decoded)
344    };
345
346    match r {
347        0 => {
348            assert_ne!(decoded.operation, Operation_ARM64_ERROR);
349
350            let op = Op::from_u32(decoded.operation as u32).unwrap();
351            let mut operands: [Operand; MAX_OPERANDS as usize] =
352                [Operand::Label(Imm::Unsigned(0)); MAX_OPERANDS as usize];
353            let mut num_operands = 0;
354
355            for (n, operand) in decoded.operands.iter().enumerate() {
356                match Operand::try_from(operand) {
357                    Ok(o) => {
358                        operands[n] = o;
359                        num_operands += 1;
360                    }
361                    Err(_) => break,
362                }
363            }
364
365            let flags_set = FlagEffect::try_from(&decoded).ok();
366
367            Ok(Instruction {
368                address,
369                opcode: decoded.insword,
370                op,
371                num_operands,
372                operands,
373                flags_set,
374            })
375        }
376        _ => Err(DecodeError::new(r, address)),
377    }
378}
379
380/// Disassemble byte slice
381///
382/// # Arguments
383///
384/// * `code` - u8 slice to zero or more instructions
385/// * `address` - Location of code in memory
386///
387/// # Examples
388/// ```
389/// use bad64::{disasm, Op};
390///
391/// let mut decoded_iter = disasm(b"\x1f\x20\x03\xd5\x1f\x20\x03\xd5", 0x1000);
392///
393/// let decoded1 = decoded_iter.next().unwrap().unwrap();
394///
395/// assert_eq!(decoded1.address(), 0x1000);
396/// assert_eq!(decoded1.operands().len(), 0);
397/// assert_eq!(decoded1.op(), Op::NOP);
398/// assert_eq!(decoded1.op().mnem(), "nop");
399///
400/// let decoded2 = decoded_iter.next().unwrap().unwrap();
401///
402/// assert_eq!(decoded2.address(), 0x1004);
403/// assert_eq!(decoded2.operands().len(), 0);
404/// assert_eq!(decoded2.op(), Op::NOP);
405/// assert_eq!(decoded2.op().mnem(), "nop");
406///
407/// assert_eq!(decoded_iter.next(), None);
408/// ```
409pub fn disasm(
410    code: &[u8],
411    address: u64,
412) -> impl Iterator<Item = Result<Instruction, DecodeError>> + '_ {
413    (address..)
414        .step_by(4)
415        .zip(code.chunks(4))
416        .map(|(addr, bytes)| match bytes.try_into() {
417            Ok(v) => {
418                let vv = u32::from_le_bytes(v);
419
420                decode(vv, addr)
421            }
422            Err(_) => Err(DecodeError::Short(addr)),
423        })
424}