1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
use capstone::{prelude::*, Insn};
use nix::errno::Errno;
use std::fmt;
use crate::diag::{Error, Result};
/// A disassembled instruction with human-friendly fields.
///
/// This type stores the instruction address, mnemonic and operands as strings
/// so callers can format or inspect them without depending on capstone types.
pub struct Instruction {
address: u64,
mnemonic: String,
operands: String,
}
impl Instruction {
#[must_use]
/// Create an `Instruction` from a capstone `Insn`.
///
/// Converts a capstone `Insn` into the crate's lightweight `Instruction`
/// representation by copying the instruction address, mnemonic and
/// operand string. This allows callers to own and format instruction
/// data without keeping capstone types around.
///
/// # Arguments
///
/// * `insn` - A reference to a capstone `Insn` to convert.
///
/// # Returns
///
/// An owned `Instruction` containing the address, mnemonic and operands
/// extracted from `insn`.
pub fn new(insn: &Insn) -> Self {
Self {
address: insn.address(),
mnemonic: insn.mnemonic().unwrap_or("").to_string(),
operands: insn.op_str().unwrap_or("").to_string(),
}
}
#[must_use]
/// Return the instruction address.
///
/// # Returns
///
/// The virtual memory address where this instruction is located.
pub fn address(&self) -> u64 {
self.address
}
#[must_use]
/// Return true when the mnemonic represents a call instruction.
///
/// # Returns
///
/// `true` when the instruction mnemonic contains the substring
/// "call" (for example `callq`), otherwise `false`.
pub fn is_call(&self) -> bool {
self.mnemonic.contains("call")
}
}
impl fmt::Display for Instruction {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(
f,
"{:#x}: {}\t{}",
self.address, self.mnemonic, self.operands
)
}
}
/// A light wrapper around capstone used to disassemble buffers.
pub struct Parser {
capstone: Capstone,
}
impl Parser {
/// Create a new instruction `Parser` for the host architecture (`x86_64`).
///
/// # Errors
///
/// Returns an error if the underlying capstone builder fails.
pub fn new() -> Result<Self> {
Ok(Self {
capstone: Capstone::new()
.x86()
.mode(arch::x86::ArchMode::Mode64)
.syntax(arch::x86::ArchSyntax::Att)
.detail(true)
.build()?,
})
}
/// Disassemble a single instruction from `opcode` at address `addr`.
///
/// # Arguments
///
/// * `opcode` - Bytes containing at least one instruction.
/// * `addr` - Virtual address corresponding to the start of `opcode`.
///
/// # Errors
///
/// Returns an error when disassembly fails or no instruction could be
/// decoded.
pub fn get_instruction_from(
&self,
opcode: &[u8],
addr: u64,
) -> Result<Instruction> {
let instructions = self.capstone.disasm_count(opcode, addr, 1)?;
Ok(Instruction::new(
instructions
.iter()
.next()
.ok_or_else(|| Error::from(Errno::ENOEXEC))?,
))
}
/// Disassemble all instructions present in `code`, returning a vector of
/// `Instruction` instances.
///
/// # Arguments
///
/// * `code` - Buffer containing machine code.
/// * `addr` - Base virtual address of `code`.
///
/// # Errors
///
/// Returns an error if the disassembly operation fails.
pub fn get_all_instructions_from(
&self,
code: &[u8],
addr: u64,
) -> Result<Vec<Instruction>> {
let instructions = self.capstone.disasm_all(code, addr)?;
Ok(instructions.iter().map(Instruction::new).collect())
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_parser_get_instruction_from() {
let parser = Parser::new().expect("Failed to create parser");
let opcode: [u8; 5] = [0xe8, 0x05, 0x00, 0x00, 0x00];
let addr: u64 = 0x1000;
let instruction = parser
.get_instruction_from(&opcode, addr)
.expect("Failed to get instruction");
assert_eq!(instruction.address(), addr);
assert_eq!(instruction.is_call(), true);
assert_eq!(instruction.mnemonic, "callq");
assert_eq!(instruction.operands, "0x100a");
}
#[test]
fn test_instruction_fmt() {
let inst = Instruction {
address: 0x1000,
mnemonic: "mov".into(),
operands: "rax, rbx".into(),
};
let s = format!("{}", inst);
assert!(s.contains("0x1000"));
assert!(s.contains("mov"));
assert!(s.contains("rax, rbx"));
}
}