1use capstone::arch::x86::X86OperandType;
2use capstone::arch::ArchOperand;
3use capstone::{self, Arch, Capstone, Insn, InsnDetail, InsnGroupType, Mode};
4use std::collections::HashMap;
5use std::convert::TryInto;
6
7use crate::print::{self, PrintState};
8use crate::Result;
9use parser::{Address, Architecture, File, FunctionDetails, Range, Register};
10
11#[derive(Debug)]
12pub(crate) struct Code<'code> {
13 arch: Arch,
14 mode: Mode,
15 regions: Vec<Region<'code>>,
16 relocations: HashMap<u64, &'code str>,
17 plts: HashMap<u64, &'code str>,
18}
19
20#[derive(Debug)]
21struct Region<'code> {
22 address: u64,
23 code: &'code [u8],
24}
25
26#[derive(Debug)]
27pub(crate) struct Call {
28 pub from: u64,
29 pub to: u64,
30}
31
32impl<'code> Code<'code> {
33 pub(crate) fn new(file: &File<'code>) -> Option<Self> {
34 let (arch, mode) = match file.machine() {
35 Architecture::I386 => (Arch::X86, Mode::Mode32),
36 Architecture::X86_64 => (Arch::X86, Mode::Mode64),
37 _ => return None,
38 };
39
40 let mut regions = Vec::new();
41 for segment in file.segments() {
44 regions.push(Region {
45 address: segment.address,
46 code: segment.bytes,
47 });
48 }
49
50 let mut relocations = HashMap::new();
52 for relocation in file.relocations() {
53 relocations.insert(relocation.address(), relocation.symbol());
54 }
55 let mut plts = HashMap::new();
56 find_plts(&mut plts, &relocations, file, arch, mode);
57
58 Some(Code {
59 arch,
60 mode,
61 regions,
62 relocations,
63 plts,
64 })
65 }
66
67 pub(crate) fn relocation(&self, address: u64) -> Option<&'code str> {
68 self.relocations.get(&address).copied()
69 }
70
71 pub(crate) fn plt(&self, address: u64) -> Option<&'code str> {
72 self.plts.get(&address).copied()
73 }
74
75 pub(crate) fn calls(&self, range: Range) -> Vec<Call> {
76 calls(self, range).unwrap_or_default()
77 }
78
79 pub(crate) fn disassembler(&self) -> Option<Disassembler> {
80 Disassembler::new(self.arch, self.mode)
81 }
82
83 fn range(&self, range: Range) -> Option<&'code [u8]> {
84 for region in &self.regions {
85 if range.begin >= region.address
86 && range.end <= region.address + region.code.len() as u64
87 {
88 let begin = (range.begin - region.address) as usize;
89 let len = (range.end - range.begin) as usize;
90 return Some(®ion.code[begin..][..len]);
91 }
92 }
93 None
94 }
95
96 fn read_mem(&self, address: u64, size: u64) -> Option<u64> {
97 let range = self.range(Range {
98 begin: address,
99 end: address + size,
100 })?;
101 match size {
102 4 => Some(u32::from_le_bytes(range.try_into().unwrap()) as u64),
103 8 => Some(u64::from_le_bytes(range.try_into().unwrap())),
104 _ => None,
105 }
106 }
107}
108
109fn find_plts<'data>(
110 plts: &mut HashMap<u64, &'data str>,
111 relocations: &HashMap<u64, &'data str>,
112 file: &File<'data>,
113 arch: Arch,
114 mode: Mode,
115) -> Option<()> {
116 let mut cs = Capstone::new_raw(arch, mode, capstone::NO_EXTRA_MODE, None).ok()?;
117 cs.set_detail(true).ok()?;
118 for section in file.sections() {
119 if let (Some(name), Some(address)) = (section.name(), section.address()) {
120 if name.starts_with(".plt") {
121 if let Some(bytes) = file.segment_bytes(address) {
122 let insns = cs.disasm_all(bytes, address.begin).ok()?;
123 for insn in insns.iter() {
124 let detail = cs.insn_detail(insn).ok()?;
125 let arch_detail = detail.arch_detail();
126 for op in arch_detail.operands() {
127 if let Some((_offset, target, _size)) = is_ip_offset(insn, &op) {
128 if let Some(symbol) = relocations.get(&target) {
129 plts.insert(insn.address() & !0xf, symbol);
131 }
132 }
133 }
134 }
135 }
136 }
137 }
138 }
139 Some(())
140}
141
142fn calls(code: &Code, range: Range) -> Option<Vec<Call>> {
143 let bytes = code.range(range)?;
144 let mut cs = Capstone::new_raw(code.arch, code.mode, capstone::NO_EXTRA_MODE, None).ok()?;
145 cs.set_detail(true).ok()?;
146 let insns = cs.disasm_all(bytes, range.begin).ok()?;
147 Some(insns.iter().filter_map(|x| call(code, &cs, x)).collect())
148}
149
150fn call(code: &Code, cs: &Capstone, insn: &Insn) -> Option<Call> {
151 match code.arch {
152 Arch::X86 => call_x86(code, cs, insn),
153 _ => None,
154 }
155}
156
157fn call_x86(code: &Code, cs: &Capstone, insn: &Insn) -> Option<Call> {
158 let detail = cs.insn_detail(insn).ok()?;
159 if !is_call(&detail) {
160 return None;
161 }
162 let arch_detail = detail.arch_detail();
163 for op in arch_detail.operands() {
164 if let Some(imm) = is_imm(&op) {
165 return Some(Call {
166 from: insn.address(),
167 to: imm,
168 });
169 } else if let Some((_offset, address, size)) = is_ip_offset(insn, &op) {
170 if let Some(value) = code.read_mem(address, size) {
172 return Some(Call {
173 from: insn.address(),
174 to: value,
175 });
176 }
177 }
178 }
179 None
180}
181
182pub(crate) struct Disassembler {
183 cs: capstone::Capstone,
184}
185
186impl Disassembler {
187 pub(crate) fn new(arch: Arch, mode: Mode) -> Option<Disassembler> {
188 let mut cs = Capstone::new_raw(arch, mode, capstone::NO_EXTRA_MODE, None).ok()?;
189 cs.set_detail(true).ok()?;
190 Some(Disassembler { cs })
191 }
192
193 pub(crate) fn instructions<'a>(
194 &'a self,
195 code: &Code<'a>,
196 range: Range,
197 ) -> Option<Instructions<'a>> {
198 code.range(range)
199 .and_then(|code| self.cs.disasm_all(code, range.begin).ok())
200 .map(|instructions| Instructions { instructions })
201 }
202}
203
204pub(crate) struct Instructions<'a> {
205 instructions: capstone::Instructions<'a>,
206}
207
208impl<'a> Instructions<'a> {
209 pub(crate) fn iter(&'a self) -> InstructionIterator<'a> {
210 let instructions = self.instructions.iter();
211 InstructionIterator { instructions }
212 }
213}
214
215pub(crate) struct InstructionIterator<'a> {
216 instructions: std::slice::Iter<'a, capstone::Insn<'a>>,
217}
218
219impl<'a> Iterator for InstructionIterator<'a> {
220 type Item = Instruction<'a>;
221
222 fn next(&mut self) -> Option<Self::Item> {
223 self.instructions.next().map(|insn| Instruction { insn })
224 }
225}
226
227pub(crate) struct Instruction<'a> {
228 insn: &'a capstone::Insn<'a>,
229}
230
231impl<'a> Instruction<'a> {
232 pub(crate) fn address(&self) -> Address {
233 Address::new(self.insn.address())
234 }
235
236 pub(crate) fn print(
237 &self,
238 state: &mut PrintState,
239 code: &Code,
240 d: &Disassembler,
241 f: &FunctionDetails,
242 range: Range,
243 ) -> Result<()> {
244 let detail = match d.cs.insn_detail(self.insn) {
245 Ok(detail) => detail,
246 Err(_) => return Ok(()),
247 };
248 let arch_detail = detail.arch_detail();
249
250 let address = self.insn.address() - range.begin;
251 if let Some(mnemonic) = self.insn.mnemonic() {
252 state.instruction(Some(address), mnemonic, |w, _hash| {
253 if let Some(op_str) = self.insn.op_str().filter(|s| !s.is_empty()) {
254 let mut ops = arch_detail.operands().into_iter();
255 let mut first = true;
256 for op_str in op_str.split(", ") {
257 if first {
258 write!(w, " ")?;
259 first = false;
260 } else {
261 write!(w, ", ")?;
262 }
263 if let Some(op) = ops.next() {
264 if let Some(imm) = is_imm(&op) {
265 if is_jump(&detail) && range.contains(imm) {
266 write!(w, "+{:x}", imm - range.begin)?;
267 continue;
268 }
269 }
270 } else {
271 debug!("operand count mismatch {:x}", self.insn.address());
272 }
273 write!(w, "{}", op_str)?
274 }
275 }
276 Ok(())
277 })?;
278 } else {
279 state.instruction(Some(address), ".byte", |w, _hash| {
280 for b in self.insn.bytes() {
281 write!(w, "{:02x} ", b)?;
282 }
283 Ok(())
284 })?;
285 }
286
287 let mut first = true;
288 for op in arch_detail.operands() {
289 let address = if first {
290 first = false;
293 self.insn.address() + self.insn.bytes().len() as u64
294 } else {
295 self.insn.address()
296 };
297 if let Some(imm) = is_imm(&op) {
298 if is_jump(&detail) && range.contains(imm) {
299 continue;
300 }
301 if imm == 0 {
303 continue;
304 }
305 if let Some(function) = state.hash().functions_by_address.get(&imm) {
307 state.instruction(None, "", |w, _hash| {
308 write!(w, "0x{:x} = ", imm)?;
309 print::function::print_ref(function, w)
310 })?;
311 } else if let Some(symbol) = code.plt(imm) {
312 state.instruction(None, "", |w, _hash| {
313 write!(w, "0x{:x} = {}@plt", imm, symbol)?;
315 Ok(())
316 })?;
317 } else if let Some(symbol) = code.relocation(imm) {
318 state.instruction(None, "", |w, _hash| {
319 write!(w, "[0x{:x}] = {}", imm, symbol)?;
321 Ok(())
322 })?;
323 }
324 }
325 if let Some(reg) = is_reg(&op) {
326 for parameter in f.parameters() {
327 for (range, register) in parameter.registers() {
328 if reg == register && range.contains(address) {
329 state.instruction(None, "", |w, hash| {
330 print::register::print(register, w, hash)?;
331 write!(w, " = ")?;
332 print::parameter::print_decl(parameter, w, hash)
333 })?;
334 }
335 }
336 }
337 for variable in f.variables() {
338 for (range, register) in variable.registers() {
339 if reg == register && range.contains(address) {
340 state.instruction(None, "", |w, hash| {
341 print::register::print(register, w, hash)?;
342 write!(w, " = ")?;
343 print::local_variable::print_decl(variable, w, hash)
344 })?;
345 }
346 }
347 }
348 }
349 if let Some((reg, ofs)) = is_reg_offset(&op) {
350 for parameter in f.parameters() {
351 let size = parameter.byte_size(state.hash()).unwrap_or(0) as i64;
352 for (range, register, offset) in parameter.register_offsets() {
353 if reg == register
354 && ofs >= offset
355 && ofs < offset + size
356 && range.contains(address)
357 {
358 state.instruction(None, "", |w, hash| {
359 write!(w, "[")?;
360 print::register::print(register, w, hash)?;
361 if offset < 0 {
362 write!(w, " - 0x{:x}", -offset)?;
363 } else if offset > 0 {
364 write!(w, " + 0x{:x}", offset)?;
365 }
366 write!(w, "] = ")?;
367 print::parameter::print_decl(parameter, w, hash)
369 })?;
370 }
371 }
372 }
373 for variable in f.variables() {
374 let size = variable.byte_size(state.hash()).unwrap_or(0) as i64;
375 for (range, register, offset) in variable.register_offsets() {
376 if reg == register
377 && ofs >= offset
378 && ofs < offset + size
379 && range.contains(address)
380 {
381 state.instruction(None, "", |w, hash| {
382 write!(w, "[")?;
383 print::register::print(register, w, hash)?;
384 if offset < 0 {
385 write!(w, " - 0x{:x}", -offset)?;
386 } else if offset > 0 {
387 write!(w, " + 0x{:x}", offset)?;
388 }
389 write!(w, "] = ")?;
390 print::local_variable::print_decl(variable, w, hash)
392 })?;
393 }
394 }
395 }
396 }
397 if let Some((offset, address, size)) = is_ip_offset(self.insn, &op) {
398 if let Some(function) = state.hash().functions_by_address.get(&address) {
400 state.instruction(None, "", |w, _hash| {
401 write!(w, "ip + 0x{:x} = ", offset)?;
402 print::function::print_ref(function, w)?;
403 Ok(())
404 })?;
405 } else if let Some(variable) = state.hash().variables_by_address.get(&address) {
406 state.instruction(None, "", |w, _hash| {
407 write!(w, "ip + 0x{:x} = ", offset)?;
408 print::variable::print_ref(variable, w)?;
409 Ok(())
410 })?;
411 } else if let Some(symbol) = code.relocation(address) {
412 state.instruction(None, "", |w, _hash| {
413 write!(w, "[ip + 0x{:x}] = {}", offset, symbol)?;
414 Ok(())
415 })?;
416 } else if let Some(value) = code.read_mem(address, size) {
417 state.instruction(None, "", |w, hash| {
418 write!(w, "[ip + 0x{:x}] = 0x{:x}", offset, value)?;
419 if let Some(function) = hash.functions_by_address.get(&value) {
420 write!(w, " = ")?;
421 print::function::print_ref(function, w)?;
422 }
423 Ok(())
424 })?;
425 }
426 }
427 }
429
430 Ok(())
431 }
432}
433
434fn is_call(detail: &InsnDetail) -> bool {
435 detail
436 .groups()
437 .iter()
438 .any(|group| group.0 as u32 == InsnGroupType::CS_GRP_CALL)
439}
440
441fn is_jump(detail: &InsnDetail) -> bool {
442 detail
443 .groups()
444 .iter()
445 .any(|group| group.0 as u32 == InsnGroupType::CS_GRP_JUMP)
446}
447
448fn is_imm(op: &ArchOperand) -> Option<u64> {
449 if let ArchOperand::X86Operand(op) = op {
450 if let X86OperandType::Imm(imm) = op.op_type {
451 return Some(imm as u64);
452 }
453 }
454 None
455}
456
457fn is_reg(op: &ArchOperand) -> Option<Register> {
458 if let ArchOperand::X86Operand(op) = op {
459 if let X86OperandType::Reg(reg) = op.op_type {
460 return convert_reg(reg);
461 }
462 if let X86OperandType::Mem(op) = op.op_type {
463 return convert_reg(op.base());
464 }
466 }
467 None
468}
469
470fn is_reg_offset(op: &ArchOperand) -> Option<(Register, i64)> {
471 if let ArchOperand::X86Operand(op) = op {
472 if let X86OperandType::Mem(op) = op.op_type {
473 return convert_reg(op.base()).map(|reg| (reg, op.disp()));
474 }
475 }
476 None
477}
478
479fn is_ip_offset(insn: &Insn, op: &ArchOperand) -> Option<(i64, u64, u64)> {
481 if let ArchOperand::X86Operand(op) = op {
482 if let X86OperandType::Mem(op) = op.op_type {
483 use capstone::arch::x86::X86Reg;
484 let reg = op.base().0 as u32;
485 let size = if reg == X86Reg::X86_REG_RIP {
486 8
487 } else if reg == X86Reg::X86_REG_EIP {
488 4
489 } else {
490 return None;
491 };
492 let offset = op.disp();
493 let address = (insn.address() + insn.bytes().len() as u64).wrapping_add(offset as u64);
494 return Some((offset, address, size));
495 }
496 }
497 None
498}
499
500fn convert_reg(reg: capstone::RegId) -> Option<Register> {
501 use capstone::arch::x86::X86Reg::*;
502 match reg.0 as u32 {
505 X86_REG_RAX | X86_REG_EAX | X86_REG_AX | X86_REG_AH | X86_REG_AL => Some(Register(0)),
506 X86_REG_RDX | X86_REG_EDX | X86_REG_DX | X86_REG_DH | X86_REG_DL => Some(Register(1)),
507 X86_REG_RCX | X86_REG_ECX | X86_REG_CX | X86_REG_CH | X86_REG_CL => Some(Register(2)),
508 X86_REG_RBX | X86_REG_EBX | X86_REG_BX | X86_REG_BH | X86_REG_BL => Some(Register(3)),
509 X86_REG_RSI | X86_REG_ESI | X86_REG_SI | X86_REG_SIL => Some(Register(4)),
510 X86_REG_RDI | X86_REG_EDI | X86_REG_DI | X86_REG_DIL => Some(Register(5)),
511 X86_REG_RBP | X86_REG_EBP | X86_REG_BP | X86_REG_BPL => Some(Register(6)),
512 X86_REG_RSP | X86_REG_ESP | X86_REG_SP | X86_REG_SPL => Some(Register(7)),
513
514 X86_REG_R8 | X86_REG_R8D | X86_REG_R8W | X86_REG_R8B => Some(Register(8)),
515 X86_REG_R9 | X86_REG_R9D | X86_REG_R9W | X86_REG_R9B => Some(Register(9)),
516 X86_REG_R10 | X86_REG_R10D | X86_REG_R10W | X86_REG_R10B => Some(Register(10)),
517 X86_REG_R11 | X86_REG_R11D | X86_REG_R11W | X86_REG_R11B => Some(Register(11)),
518 X86_REG_R12 | X86_REG_R12D | X86_REG_R12W | X86_REG_R12B => Some(Register(12)),
519 X86_REG_R13 | X86_REG_R13D | X86_REG_R13W | X86_REG_R13B => Some(Register(13)),
520 X86_REG_R14 | X86_REG_R14D | X86_REG_R14W | X86_REG_R14B => Some(Register(14)),
521 X86_REG_R15 | X86_REG_R15D | X86_REG_R15W | X86_REG_R15B => Some(Register(15)),
522
523 X86_REG_XMM0 | X86_REG_YMM0 => Some(Register(17)),
524 X86_REG_XMM1 | X86_REG_YMM1 => Some(Register(18)),
525 X86_REG_XMM2 | X86_REG_YMM2 => Some(Register(19)),
526 X86_REG_XMM3 | X86_REG_YMM3 => Some(Register(20)),
527 X86_REG_XMM4 | X86_REG_YMM4 => Some(Register(21)),
528 X86_REG_XMM5 | X86_REG_YMM5 => Some(Register(22)),
529 X86_REG_XMM6 | X86_REG_YMM6 => Some(Register(23)),
530 X86_REG_XMM7 | X86_REG_YMM7 => Some(Register(24)),
531
532 X86_REG_XMM8 | X86_REG_YMM8 => Some(Register(25)),
533 X86_REG_XMM9 | X86_REG_YMM9 => Some(Register(26)),
534 X86_REG_XMM10 | X86_REG_YMM10 => Some(Register(27)),
535 X86_REG_XMM11 | X86_REG_YMM11 => Some(Register(28)),
536 X86_REG_XMM12 | X86_REG_YMM12 => Some(Register(29)),
537 X86_REG_XMM13 | X86_REG_YMM13 => Some(Register(30)),
538 X86_REG_XMM14 | X86_REG_YMM14 => Some(Register(31)),
539 X86_REG_XMM15 | X86_REG_YMM15 => Some(Register(32)),
540
541 X86_REG_INVALID | X86_REG_RIP | X86_REG_EIP => None,
543
544 _ => {
545 debug!("Unsupported x86 register {}", reg.0);
546 None
547 }
548 }
549}