ddbug/
code.rs

1use capstone::arch::x86::X86OperandType;
2use capstone::arch::ArchOperand;
3use capstone::{self, Arch, Capstone, Insn, InsnDetail, InsnGroupType, Mode};
4use std::collections::HashMap;
5use std::convert::TryInto;
6
7use crate::print::{self, PrintState};
8use crate::Result;
9use parser::{Address, Architecture, File, FunctionDetails, Range, Register};
10
11#[derive(Debug)]
12pub(crate) struct Code<'code> {
13    arch: Arch,
14    mode: Mode,
15    regions: Vec<Region<'code>>,
16    relocations: HashMap<u64, &'code str>,
17    plts: HashMap<u64, &'code str>,
18}
19
20#[derive(Debug)]
21struct Region<'code> {
22    address: u64,
23    code: &'code [u8],
24}
25
26#[derive(Debug)]
27pub(crate) struct Call {
28    pub from: u64,
29    pub to: u64,
30}
31
32impl<'code> Code<'code> {
33    pub(crate) fn new(file: &File<'code>) -> Option<Self> {
34        let (arch, mode) = match file.machine() {
35            Architecture::I386 => (Arch::X86, Mode::Mode32),
36            Architecture::X86_64 => (Arch::X86, Mode::Mode64),
37            _ => return None,
38        };
39
40        let mut regions = Vec::new();
41        // TODO: handle object files (no segments)
42        // TODO: handle relocations
43        for segment in file.segments() {
44            regions.push(Region {
45                address: segment.address,
46                code: segment.bytes,
47            });
48        }
49
50        // Create symbols for relocations and PLT entries.
51        let mut relocations = HashMap::new();
52        for relocation in file.relocations() {
53            relocations.insert(relocation.address(), relocation.symbol());
54        }
55        let mut plts = HashMap::new();
56        find_plts(&mut plts, &relocations, file, arch, mode);
57
58        Some(Code {
59            arch,
60            mode,
61            regions,
62            relocations,
63            plts,
64        })
65    }
66
67    pub(crate) fn relocation(&self, address: u64) -> Option<&'code str> {
68        self.relocations.get(&address).copied()
69    }
70
71    pub(crate) fn plt(&self, address: u64) -> Option<&'code str> {
72        self.plts.get(&address).copied()
73    }
74
75    pub(crate) fn calls(&self, range: Range) -> Vec<Call> {
76        calls(self, range).unwrap_or_default()
77    }
78
79    pub(crate) fn disassembler(&self) -> Option<Disassembler> {
80        Disassembler::new(self.arch, self.mode)
81    }
82
83    fn range(&self, range: Range) -> Option<&'code [u8]> {
84        for region in &self.regions {
85            if range.begin >= region.address
86                && range.end <= region.address + region.code.len() as u64
87            {
88                let begin = (range.begin - region.address) as usize;
89                let len = (range.end - range.begin) as usize;
90                return Some(&region.code[begin..][..len]);
91            }
92        }
93        None
94    }
95
96    fn read_mem(&self, address: u64, size: u64) -> Option<u64> {
97        let range = self.range(Range {
98            begin: address,
99            end: address + size,
100        })?;
101        match size {
102            4 => Some(u32::from_le_bytes(range.try_into().unwrap()) as u64),
103            8 => Some(u64::from_le_bytes(range.try_into().unwrap())),
104            _ => None,
105        }
106    }
107}
108
109fn find_plts<'data>(
110    plts: &mut HashMap<u64, &'data str>,
111    relocations: &HashMap<u64, &'data str>,
112    file: &File<'data>,
113    arch: Arch,
114    mode: Mode,
115) -> Option<()> {
116    let mut cs = Capstone::new_raw(arch, mode, capstone::NO_EXTRA_MODE, None).ok()?;
117    cs.set_detail(true).ok()?;
118    for section in file.sections() {
119        if let (Some(name), Some(address)) = (section.name(), section.address()) {
120            if name.starts_with(".plt") {
121                if let Some(bytes) = file.segment_bytes(address) {
122                    let insns = cs.disasm_all(bytes, address.begin).ok()?;
123                    for insn in insns.iter() {
124                        let detail = cs.insn_detail(insn).ok()?;
125                        let arch_detail = detail.arch_detail();
126                        for op in arch_detail.operands() {
127                            if let Some((_offset, target, _size)) = is_ip_offset(insn, &op) {
128                                if let Some(symbol) = relocations.get(&target) {
129                                    // HACK: assume PLT is aligned to 16 bytes
130                                    plts.insert(insn.address() & !0xf, symbol);
131                                }
132                            }
133                        }
134                    }
135                }
136            }
137        }
138    }
139    Some(())
140}
141
142fn calls(code: &Code, range: Range) -> Option<Vec<Call>> {
143    let bytes = code.range(range)?;
144    let mut cs = Capstone::new_raw(code.arch, code.mode, capstone::NO_EXTRA_MODE, None).ok()?;
145    cs.set_detail(true).ok()?;
146    let insns = cs.disasm_all(bytes, range.begin).ok()?;
147    Some(insns.iter().filter_map(|x| call(code, &cs, x)).collect())
148}
149
150fn call(code: &Code, cs: &Capstone, insn: &Insn) -> Option<Call> {
151    match code.arch {
152        Arch::X86 => call_x86(code, cs, insn),
153        _ => None,
154    }
155}
156
157fn call_x86(code: &Code, cs: &Capstone, insn: &Insn) -> Option<Call> {
158    let detail = cs.insn_detail(insn).ok()?;
159    if !is_call(&detail) {
160        return None;
161    }
162    let arch_detail = detail.arch_detail();
163    for op in arch_detail.operands() {
164        if let Some(imm) = is_imm(&op) {
165            return Some(Call {
166                from: insn.address(),
167                to: imm,
168            });
169        } else if let Some((_offset, address, size)) = is_ip_offset(insn, &op) {
170            // TODO: handle `lea rax, [rip + offset]; call rax`
171            if let Some(value) = code.read_mem(address, size) {
172                return Some(Call {
173                    from: insn.address(),
174                    to: value,
175                });
176            }
177        }
178    }
179    None
180}
181
182pub(crate) struct Disassembler {
183    cs: capstone::Capstone,
184}
185
186impl Disassembler {
187    pub(crate) fn new(arch: Arch, mode: Mode) -> Option<Disassembler> {
188        let mut cs = Capstone::new_raw(arch, mode, capstone::NO_EXTRA_MODE, None).ok()?;
189        cs.set_detail(true).ok()?;
190        Some(Disassembler { cs })
191    }
192
193    pub(crate) fn instructions<'a>(
194        &'a self,
195        code: &Code<'a>,
196        range: Range,
197    ) -> Option<Instructions<'a>> {
198        code.range(range)
199            .and_then(|code| self.cs.disasm_all(code, range.begin).ok())
200            .map(|instructions| Instructions { instructions })
201    }
202}
203
204pub(crate) struct Instructions<'a> {
205    instructions: capstone::Instructions<'a>,
206}
207
208impl<'a> Instructions<'a> {
209    pub(crate) fn iter(&'a self) -> InstructionIterator<'a> {
210        let instructions = self.instructions.iter();
211        InstructionIterator { instructions }
212    }
213}
214
215pub(crate) struct InstructionIterator<'a> {
216    instructions: std::slice::Iter<'a, capstone::Insn<'a>>,
217}
218
219impl<'a> Iterator for InstructionIterator<'a> {
220    type Item = Instruction<'a>;
221
222    fn next(&mut self) -> Option<Self::Item> {
223        self.instructions.next().map(|insn| Instruction { insn })
224    }
225}
226
227pub(crate) struct Instruction<'a> {
228    insn: &'a capstone::Insn<'a>,
229}
230
231impl<'a> Instruction<'a> {
232    pub(crate) fn address(&self) -> Address {
233        Address::new(self.insn.address())
234    }
235
236    pub(crate) fn print(
237        &self,
238        state: &mut PrintState,
239        code: &Code,
240        d: &Disassembler,
241        f: &FunctionDetails,
242        range: Range,
243    ) -> Result<()> {
244        let detail = match d.cs.insn_detail(self.insn) {
245            Ok(detail) => detail,
246            Err(_) => return Ok(()),
247        };
248        let arch_detail = detail.arch_detail();
249
250        let address = self.insn.address() - range.begin;
251        if let Some(mnemonic) = self.insn.mnemonic() {
252            state.instruction(Some(address), mnemonic, |w, _hash| {
253                if let Some(op_str) = self.insn.op_str().filter(|s| !s.is_empty()) {
254                    let mut ops = arch_detail.operands().into_iter();
255                    let mut first = true;
256                    for op_str in op_str.split(", ") {
257                        if first {
258                            write!(w, " ")?;
259                            first = false;
260                        } else {
261                            write!(w, ", ")?;
262                        }
263                        if let Some(op) = ops.next() {
264                            if let Some(imm) = is_imm(&op) {
265                                if is_jump(&detail) && range.contains(imm) {
266                                    write!(w, "+{:x}", imm - range.begin)?;
267                                    continue;
268                                }
269                            }
270                        } else {
271                            debug!("operand count mismatch {:x}", self.insn.address());
272                        }
273                        write!(w, "{}", op_str)?
274                    }
275                }
276                Ok(())
277            })?;
278        } else {
279            state.instruction(Some(address), ".byte", |w, _hash| {
280                for b in self.insn.bytes() {
281                    write!(w, "{:02x} ", b)?;
282                }
283                Ok(())
284            })?;
285        }
286
287        let mut first = true;
288        for op in arch_detail.operands() {
289            let address = if first {
290                // HACK: assume only first operand is modified, so calculate it after the instruction
291                // TODO: use cs_regs_access
292                first = false;
293                self.insn.address() + self.insn.bytes().len() as u64
294            } else {
295                self.insn.address()
296            };
297            if let Some(imm) = is_imm(&op) {
298                if is_jump(&detail) && range.contains(imm) {
299                    continue;
300                }
301                // TODO: handle relocations
302                if imm == 0 {
303                    continue;
304                }
305                // TODO: lookup variables too
306                if let Some(function) = state.hash().functions_by_address.get(&imm) {
307                    state.instruction(None, "", |w, _hash| {
308                        write!(w, "0x{:x} = ", imm)?;
309                        print::function::print_ref(function, w)
310                    })?;
311                } else if let Some(symbol) = code.plt(imm) {
312                    state.instruction(None, "", |w, _hash| {
313                        // TODO: link to symbol
314                        write!(w, "0x{:x} = {}@plt", imm, symbol)?;
315                        Ok(())
316                    })?;
317                } else if let Some(symbol) = code.relocation(imm) {
318                    state.instruction(None, "", |w, _hash| {
319                        // TODO: link to symbol
320                        write!(w, "[0x{:x}] = {}", imm, symbol)?;
321                        Ok(())
322                    })?;
323                }
324            }
325            if let Some(reg) = is_reg(&op) {
326                for parameter in f.parameters() {
327                    for (range, register) in parameter.registers() {
328                        if reg == register && range.contains(address) {
329                            state.instruction(None, "", |w, hash| {
330                                print::register::print(register, w, hash)?;
331                                write!(w, " = ")?;
332                                print::parameter::print_decl(parameter, w, hash)
333                            })?;
334                        }
335                    }
336                }
337                for variable in f.variables() {
338                    for (range, register) in variable.registers() {
339                        if reg == register && range.contains(address) {
340                            state.instruction(None, "", |w, hash| {
341                                print::register::print(register, w, hash)?;
342                                write!(w, " = ")?;
343                                print::local_variable::print_decl(variable, w, hash)
344                            })?;
345                        }
346                    }
347                }
348            }
349            if let Some((reg, ofs)) = is_reg_offset(&op) {
350                for parameter in f.parameters() {
351                    let size = parameter.byte_size(state.hash()).unwrap_or(0) as i64;
352                    for (range, register, offset) in parameter.register_offsets() {
353                        if reg == register
354                            && ofs >= offset
355                            && ofs < offset + size
356                            && range.contains(address)
357                        {
358                            state.instruction(None, "", |w, hash| {
359                                write!(w, "[")?;
360                                print::register::print(register, w, hash)?;
361                                if offset < 0 {
362                                    write!(w, " - 0x{:x}", -offset)?;
363                                } else if offset > 0 {
364                                    write!(w, " + 0x{:x}", offset)?;
365                                }
366                                write!(w, "] = ")?;
367                                // FIXME: print members if ofs != offset || reg.size() < size
368                                print::parameter::print_decl(parameter, w, hash)
369                            })?;
370                        }
371                    }
372                }
373                for variable in f.variables() {
374                    let size = variable.byte_size(state.hash()).unwrap_or(0) as i64;
375                    for (range, register, offset) in variable.register_offsets() {
376                        if reg == register
377                            && ofs >= offset
378                            && ofs < offset + size
379                            && range.contains(address)
380                        {
381                            state.instruction(None, "", |w, hash| {
382                                write!(w, "[")?;
383                                print::register::print(register, w, hash)?;
384                                if offset < 0 {
385                                    write!(w, " - 0x{:x}", -offset)?;
386                                } else if offset > 0 {
387                                    write!(w, " + 0x{:x}", offset)?;
388                                }
389                                write!(w, "] = ")?;
390                                // FIXME: print members if ofs != offset || reg.size() < size
391                                print::local_variable::print_decl(variable, w, hash)
392                            })?;
393                        }
394                    }
395                }
396            }
397            if let Some((offset, address, size)) = is_ip_offset(self.insn, &op) {
398                // TODO: show original register name
399                if let Some(function) = state.hash().functions_by_address.get(&address) {
400                    state.instruction(None, "", |w, _hash| {
401                        write!(w, "ip + 0x{:x} = ", offset)?;
402                        print::function::print_ref(function, w)?;
403                        Ok(())
404                    })?;
405                } else if let Some(variable) = state.hash().variables_by_address.get(&address) {
406                    state.instruction(None, "", |w, _hash| {
407                        write!(w, "ip + 0x{:x} = ", offset)?;
408                        print::variable::print_ref(variable, w)?;
409                        Ok(())
410                    })?;
411                } else if let Some(symbol) = code.relocation(address) {
412                    state.instruction(None, "", |w, _hash| {
413                        write!(w, "[ip + 0x{:x}] = {}", offset, symbol)?;
414                        Ok(())
415                    })?;
416                } else if let Some(value) = code.read_mem(address, size) {
417                    state.instruction(None, "", |w, hash| {
418                        write!(w, "[ip + 0x{:x}] = 0x{:x}", offset, value)?;
419                        if let Some(function) = hash.functions_by_address.get(&value) {
420                            write!(w, " = ")?;
421                            print::function::print_ref(function, w)?;
422                        }
423                        Ok(())
424                    })?;
425                }
426            }
427            // TODO: keep track of pointer types, and lookup X86OperandType::Mem offsets
428        }
429
430        Ok(())
431    }
432}
433
434fn is_call(detail: &InsnDetail) -> bool {
435    detail
436        .groups()
437        .iter()
438        .any(|group| group.0 as u32 == InsnGroupType::CS_GRP_CALL)
439}
440
441fn is_jump(detail: &InsnDetail) -> bool {
442    detail
443        .groups()
444        .iter()
445        .any(|group| group.0 as u32 == InsnGroupType::CS_GRP_JUMP)
446}
447
448fn is_imm(op: &ArchOperand) -> Option<u64> {
449    if let ArchOperand::X86Operand(op) = op {
450        if let X86OperandType::Imm(imm) = op.op_type {
451            return Some(imm as u64);
452        }
453    }
454    None
455}
456
457fn is_reg(op: &ArchOperand) -> Option<Register> {
458    if let ArchOperand::X86Operand(op) = op {
459        if let X86OperandType::Reg(reg) = op.op_type {
460            return convert_reg(reg);
461        }
462        if let X86OperandType::Mem(op) = op.op_type {
463            return convert_reg(op.base());
464            // TODO: op.index()?
465        }
466    }
467    None
468}
469
470fn is_reg_offset(op: &ArchOperand) -> Option<(Register, i64)> {
471    if let ArchOperand::X86Operand(op) = op {
472        if let X86OperandType::Mem(op) = op.op_type {
473            return convert_reg(op.base()).map(|reg| (reg, op.disp()));
474        }
475    }
476    None
477}
478
479// Option<(offset, address, size)>
480fn is_ip_offset(insn: &Insn, op: &ArchOperand) -> Option<(i64, u64, u64)> {
481    if let ArchOperand::X86Operand(op) = op {
482        if let X86OperandType::Mem(op) = op.op_type {
483            use capstone::arch::x86::X86Reg;
484            let reg = op.base().0 as u32;
485            let size = if reg == X86Reg::X86_REG_RIP {
486                8
487            } else if reg == X86Reg::X86_REG_EIP {
488                4
489            } else {
490                return None;
491            };
492            let offset = op.disp();
493            let address = (insn.address() + insn.bytes().len() as u64).wrapping_add(offset as u64);
494            return Some((offset, address, size));
495        }
496    }
497    None
498}
499
500fn convert_reg(reg: capstone::RegId) -> Option<Register> {
501    use capstone::arch::x86::X86Reg::*;
502    // FIXME: mapping from capstone to dwarf registers should live elsewhere
503    // FIXME: keep track of register width?
504    match reg.0 as u32 {
505        X86_REG_RAX | X86_REG_EAX | X86_REG_AX | X86_REG_AH | X86_REG_AL => Some(Register(0)),
506        X86_REG_RDX | X86_REG_EDX | X86_REG_DX | X86_REG_DH | X86_REG_DL => Some(Register(1)),
507        X86_REG_RCX | X86_REG_ECX | X86_REG_CX | X86_REG_CH | X86_REG_CL => Some(Register(2)),
508        X86_REG_RBX | X86_REG_EBX | X86_REG_BX | X86_REG_BH | X86_REG_BL => Some(Register(3)),
509        X86_REG_RSI | X86_REG_ESI | X86_REG_SI | X86_REG_SIL => Some(Register(4)),
510        X86_REG_RDI | X86_REG_EDI | X86_REG_DI | X86_REG_DIL => Some(Register(5)),
511        X86_REG_RBP | X86_REG_EBP | X86_REG_BP | X86_REG_BPL => Some(Register(6)),
512        X86_REG_RSP | X86_REG_ESP | X86_REG_SP | X86_REG_SPL => Some(Register(7)),
513
514        X86_REG_R8 | X86_REG_R8D | X86_REG_R8W | X86_REG_R8B => Some(Register(8)),
515        X86_REG_R9 | X86_REG_R9D | X86_REG_R9W | X86_REG_R9B => Some(Register(9)),
516        X86_REG_R10 | X86_REG_R10D | X86_REG_R10W | X86_REG_R10B => Some(Register(10)),
517        X86_REG_R11 | X86_REG_R11D | X86_REG_R11W | X86_REG_R11B => Some(Register(11)),
518        X86_REG_R12 | X86_REG_R12D | X86_REG_R12W | X86_REG_R12B => Some(Register(12)),
519        X86_REG_R13 | X86_REG_R13D | X86_REG_R13W | X86_REG_R13B => Some(Register(13)),
520        X86_REG_R14 | X86_REG_R14D | X86_REG_R14W | X86_REG_R14B => Some(Register(14)),
521        X86_REG_R15 | X86_REG_R15D | X86_REG_R15W | X86_REG_R15B => Some(Register(15)),
522
523        X86_REG_XMM0 | X86_REG_YMM0 => Some(Register(17)),
524        X86_REG_XMM1 | X86_REG_YMM1 => Some(Register(18)),
525        X86_REG_XMM2 | X86_REG_YMM2 => Some(Register(19)),
526        X86_REG_XMM3 | X86_REG_YMM3 => Some(Register(20)),
527        X86_REG_XMM4 | X86_REG_YMM4 => Some(Register(21)),
528        X86_REG_XMM5 | X86_REG_YMM5 => Some(Register(22)),
529        X86_REG_XMM6 | X86_REG_YMM6 => Some(Register(23)),
530        X86_REG_XMM7 | X86_REG_YMM7 => Some(Register(24)),
531
532        X86_REG_XMM8 | X86_REG_YMM8 => Some(Register(25)),
533        X86_REG_XMM9 | X86_REG_YMM9 => Some(Register(26)),
534        X86_REG_XMM10 | X86_REG_YMM10 => Some(Register(27)),
535        X86_REG_XMM11 | X86_REG_YMM11 => Some(Register(28)),
536        X86_REG_XMM12 | X86_REG_YMM12 => Some(Register(29)),
537        X86_REG_XMM13 | X86_REG_YMM13 => Some(Register(30)),
538        X86_REG_XMM14 | X86_REG_YMM14 => Some(Register(31)),
539        X86_REG_XMM15 | X86_REG_YMM15 => Some(Register(32)),
540
541        // Don't need RIP/EIP, there's never variables/parameters there.
542        X86_REG_INVALID | X86_REG_RIP | X86_REG_EIP => None,
543
544        _ => {
545            debug!("Unsupported x86 register {}", reg.0);
546            None
547        }
548    }
549}