Skip to main content

ckb_debugger/
machine_analyzer.rs

1use ckb_vm::cost_model::estimate_cycles;
2use ckb_vm::decoder::{Decoder, build_decoder};
3use ckb_vm::instructions::instruction_length;
4use ckb_vm::machine::VERSION0;
5use ckb_vm::registers::{A0, SP};
6use ckb_vm::{
7    Bytes, CoreMachine, DefaultCoreMachine, DefaultMachine, Error, FlatMemory, ISA_MOP, Machine, Register,
8    SupportMachine, WXorXMemory,
9};
10use std::borrow::Cow;
11use std::cell::RefCell;
12use std::collections::HashMap;
13use std::io::{BufRead, Write};
14use std::rc::Rc;
15
16type Addr2LineEndianReader = addr2line::gimli::EndianReader<addr2line::gimli::RunTimeEndian, Rc<[u8]>>;
17type Addr2LineContext = addr2line::Context<Addr2LineEndianReader>;
18type Addr2LineFrameIter<'a> = addr2line::FrameIter<'a, Addr2LineEndianReader>;
19
20fn sprint_fun(frame_iter: &mut Addr2LineFrameIter) -> String {
21    let mut s = String::from("??");
22    loop {
23        if let Some(data) = frame_iter.next().unwrap() {
24            if let Some(function) = data.function {
25                s = String::from(addr2line::demangle_auto(Cow::from(function.raw_name().unwrap()), function.language));
26                continue;
27            }
28            continue;
29        }
30        break;
31    }
32    s
33}
34
35fn goblin_fun(elf: &goblin::elf::Elf) -> HashMap<u64, String> {
36    let mut map = HashMap::new();
37    for sym in &elf.syms {
38        if !sym.is_function() {
39            continue;
40        }
41        if let Some(Ok(r)) = elf.strtab.get(sym.st_name) {
42            map.insert(sym.st_value, r.to_string());
43        }
44    }
45    map
46}
47
48fn goblin_get_sym(elf: &goblin::elf::Elf, sym: &str) -> u64 {
49    for e in &elf.syms {
50        if let Some(Ok(r)) = elf.strtab.get(e.st_name) {
51            if r == sym {
52                return e.st_value;
53            }
54        }
55    }
56    return 0;
57}
58
59struct TrieNode {
60    addr: u64,
61    link: u64,
62    pc: u64,
63    parent: Option<Rc<RefCell<TrieNode>>>,
64    childs: Vec<Rc<RefCell<TrieNode>>>,
65    cycles: u64,
66    regs: [[u64; 32]; 2],
67}
68
69impl TrieNode {
70    fn root() -> Self {
71        Self { addr: 0, link: 0, pc: 0, parent: None, childs: vec![], cycles: 0, regs: [[0; 32]; 2] }
72    }
73}
74
75#[derive(Clone, Debug)]
76pub struct Tags {
77    addr: u64,
78    file: String,
79    line: u32,
80    func: String,
81}
82
83impl Tags {
84    fn new(addr: u64) -> Self {
85        Tags { addr, file: String::from("??"), line: 0xffffffff, func: String::from("??") }
86    }
87
88    pub fn func(&self) -> String {
89        if self.func != "??" { self.func.clone() } else { format!("func_0x{:x}", self.addr) }
90    }
91
92    pub fn simple(&self) -> String {
93        format!("{}:{}", self.file, self.func())
94    }
95
96    pub fn detail(&self) -> String {
97        if self.line == 0xffffffff {
98            format!("{}:??:{}", self.file, self.func)
99        } else {
100            format!("{}:{}:{}", self.file, self.line, self.func)
101        }
102    }
103}
104
105pub struct MachineFlamegraph {
106    addrctx: Addr2LineContext,
107    trie_root: Rc<RefCell<TrieNode>>,
108    trie_node: Rc<RefCell<TrieNode>>,
109    cache_tag: HashMap<u64, Tags>,
110    cache_fun: HashMap<u64, String>,
111}
112
113impl MachineFlamegraph {
114    pub fn new(program: &Bytes) -> Result<Self, Box<dyn std::error::Error>> {
115        let object = addr2line::object::File::parse(program.as_ref())?;
116        let ctx = addr2line::Context::new(&object)?;
117        let trie_root = Rc::new(RefCell::new(TrieNode::root()));
118        let elf = goblin::elf::Elf::parse(&program)?;
119        trie_root.borrow_mut().addr = elf.entry;
120        Ok(Self {
121            addrctx: ctx,
122            trie_root: trie_root.clone(),
123            trie_node: trie_root,
124            cache_tag: HashMap::new(),
125            cache_fun: goblin_fun(&elf),
126        })
127    }
128
129    pub fn reset(&mut self, program: &Bytes) -> Result<(), Box<dyn std::error::Error>> {
130        let object = addr2line::object::File::parse(program.as_ref())?;
131        let ctx = addr2line::Context::new(&object)?;
132        let trie_root = Rc::new(RefCell::new(TrieNode::root()));
133        let elf = goblin::elf::Elf::parse(&program)?;
134        trie_root.borrow_mut().addr = elf.entry;
135        self.addrctx = ctx;
136        self.trie_root = trie_root.clone();
137        self.trie_node = trie_root;
138        self.cache_tag = HashMap::new();
139        self.cache_fun = goblin_fun(&elf);
140        Ok(())
141    }
142
143    pub fn get_tag(&mut self, addr: u64) -> Tags {
144        if let Some(data) = self.cache_tag.get(&addr) {
145            return data.clone();
146        }
147        let mut tag = Tags::new(addr);
148        let loc = self.addrctx.find_location(addr).unwrap();
149        if let Some(loc) = loc {
150            tag.file = loc.file.unwrap().to_string();
151            if let Some(line) = loc.line {
152                tag.line = line;
153            }
154        }
155        let mut frame_iter = self.addrctx.find_frames(addr).skip_all_loads().unwrap();
156        tag.func = sprint_fun(&mut frame_iter);
157        self.cache_tag.insert(addr, tag.clone());
158        tag
159    }
160
161    fn display_flamegraph_rec(&mut self, prefix: &str, node: Rc<RefCell<TrieNode>>, writer: &mut impl std::io::Write) {
162        let prefix_name = format!("{}{}", prefix, self.get_tag(node.borrow().addr).simple());
163        writer.write_all(format!("{} {}\n", prefix_name, node.borrow().cycles).as_bytes()).unwrap();
164        for e in &node.borrow().childs {
165            self.display_flamegraph_rec(format!("{}; ", prefix_name).as_str(), e.clone(), writer);
166        }
167        writer.flush().unwrap();
168    }
169
170    pub fn display_flamegraph(&mut self, writer: &mut impl std::io::Write) {
171        self.display_flamegraph_rec("", self.trie_root.clone(), writer);
172    }
173
174    pub fn display_stacktrace(&mut self, prefix: &str, writer: &mut impl std::io::Write) {
175        let mut frame = self.trie_node.clone();
176        let mut stack = vec![self.get_tag(frame.borrow().pc).detail()];
177        loop {
178            stack.push(self.get_tag(frame.borrow().link).detail());
179            let parent = frame.borrow().parent.clone();
180            if let Some(p) = parent {
181                frame = p.clone();
182            } else {
183                break;
184            }
185        }
186        stack.reverse();
187        for i in &stack {
188            writer.write_all(format!("{}{}\n", prefix, i).as_bytes()).unwrap();
189        }
190        writer.flush().unwrap();
191    }
192
193    pub fn step(
194        &mut self,
195        decoder: &mut Decoder,
196        machine: &mut DefaultMachine<DefaultCoreMachine<u64, WXorXMemory<FlatMemory<u64>>>>,
197    ) -> Result<(), Error> {
198        let pc = machine.pc().to_u64();
199        let inst = decoder.decode(machine.memory_mut(), pc)?;
200        let opcode = ckb_vm::instructions::extract_opcode(inst);
201        let cycles = estimate_cycles(inst);
202        self.trie_node.borrow_mut().cycles += cycles;
203        self.trie_node.borrow_mut().pc = pc;
204
205        let call = |s: &mut Self, addr: u64, link: u64| {
206            let mut regs = [[0; 32]; 2];
207            for i in 0..32 {
208                regs[0][i] = machine.registers()[i].to_u64();
209            }
210            let chd = Rc::new(RefCell::new(TrieNode {
211                addr: addr,
212                link: link,
213                pc: pc,
214                parent: Some(s.trie_node.clone()),
215                childs: vec![],
216                cycles: 0,
217                regs: regs,
218            }));
219            s.trie_node.borrow_mut().childs.push(chd.clone());
220            s.trie_node = chd;
221        };
222
223        let jump = |s: &mut Self, addr: u64| {
224            let mut f = s.trie_node.clone();
225            loop {
226                if f.borrow().link == addr {
227                    for i in 0..32 {
228                        s.trie_node.borrow_mut().regs[1][i] = machine.registers()[i].to_u64();
229                    }
230                    if let Some(p) = f.borrow().parent.clone() {
231                        s.trie_node = p.clone();
232                    } else {
233                        unimplemented!();
234                    }
235                    break;
236                }
237                let p = f.borrow().parent.clone();
238                if let Some(p) = p {
239                    f = p.clone();
240                } else {
241                    break;
242                }
243            }
244        };
245
246        if opcode == ckb_vm::instructions::insts::OP_JAL {
247            let inst_length = instruction_length(inst) as u64;
248            let inst = ckb_vm::instructions::Utype(inst);
249            let addr = pc.wrapping_add(inst.immediate_s() as u64) & 0xfffffffffffffffe;
250            let link = pc + inst_length;
251            if self.cache_fun.contains_key(&addr) {
252                call(self, addr, link);
253                return Ok(());
254            }
255            jump(self, addr);
256            return Ok(());
257        };
258        if opcode == ckb_vm::instructions::insts::OP_JALR_VERSION0 {
259            let inst_length = instruction_length(inst) as u64;
260            let inst = ckb_vm::instructions::Itype(inst);
261            let base = machine.registers()[inst.rs1()].to_u64();
262            let addr = base.wrapping_add(inst.immediate_s() as u64) & 0xfffffffffffffffe;
263            let link = pc + inst_length;
264            if self.cache_fun.contains_key(&addr) {
265                call(self, addr, link);
266                return Ok(());
267            }
268            jump(self, addr);
269            return Ok(());
270        };
271        if opcode == ckb_vm::instructions::insts::OP_JALR_VERSION1 {
272            let inst_length = instruction_length(inst) as u64;
273            let inst = ckb_vm::instructions::Itype(inst);
274            let base = machine.registers()[inst.rs1()].to_u64();
275            let addr = base.wrapping_add(inst.immediate_s() as u64) & 0xfffffffffffffffe;
276            let link = pc + inst_length;
277            if self.cache_fun.contains_key(&addr) {
278                call(self, addr, link);
279                return Ok(());
280            }
281            jump(self, addr);
282            return Ok(());
283        };
284        if opcode == ckb_vm::instructions::insts::OP_FAR_JUMP_ABS {
285            let inst_length = instruction_length(inst) as u64;
286            let inst = ckb_vm::instructions::Utype(inst);
287            let addr = (inst.immediate_s() as u64) & 0xfffffffffffffffe;
288            let link = pc + inst_length;
289            if self.cache_fun.contains_key(&addr) {
290                call(self, addr, link);
291                return Ok(());
292            }
293            jump(self, addr);
294            return Ok(());
295        }
296        if opcode == ckb_vm::instructions::insts::OP_FAR_JUMP_REL {
297            let inst_length = instruction_length(inst) as u64;
298            let inst = ckb_vm::instructions::Utype(inst);
299            let addr = pc.wrapping_add(inst.immediate_s() as u64) & 0xfffffffffffffffe;
300            let link = pc + inst_length;
301            if self.cache_fun.contains_key(&addr) {
302                call(self, addr, link);
303                return Ok(());
304            }
305            jump(self, addr);
306            return Ok(());
307        }
308        return Ok(());
309    }
310}
311
312pub struct MachineOverlap {
313    sbrk_addr: u64,
314    sbrk_heap: u64,
315}
316
317impl MachineOverlap {
318    pub fn new(program: &Bytes) -> Result<Self, Box<dyn std::error::Error>> {
319        let elf = goblin::elf::Elf::parse(&program)?;
320        Ok(Self { sbrk_addr: goblin_get_sym(&elf, "_sbrk"), sbrk_heap: goblin_get_sym(&elf, "_end") })
321    }
322
323    pub fn step(
324        &mut self,
325        decoder: &mut Decoder,
326        machine: &mut DefaultMachine<DefaultCoreMachine<u64, WXorXMemory<FlatMemory<u64>>>>,
327        flamegraph: &MachineFlamegraph,
328    ) -> Result<(), Error> {
329        let pc = machine.pc().to_u64();
330        let sp = machine.registers()[SP].to_u64();
331        if sp < self.sbrk_heap {
332            return Err(Error::External(format!("Heap and stack overlapping sp={} heap={}", sp, self.sbrk_heap)));
333        }
334        let inst = decoder.decode(machine.memory_mut(), pc)?;
335        let opcode = ckb_vm::instructions::extract_opcode(inst);
336        let addr = match opcode {
337            ckb_vm::instructions::insts::OP_JAL => {
338                let inst = ckb_vm::instructions::Utype(inst);
339                let addr = pc.wrapping_add(inst.immediate_s() as u64) & 0xfffffffffffffffe;
340                addr
341            }
342            ckb_vm::instructions::insts::OP_JALR_VERSION0 => {
343                let inst = ckb_vm::instructions::Itype(inst);
344                let base = machine.registers()[inst.rs1()].to_u64();
345                let addr = base.wrapping_add(inst.immediate_s() as u64) & 0xfffffffffffffffe;
346                addr
347            }
348            ckb_vm::instructions::insts::OP_JALR_VERSION1 => {
349                let inst = ckb_vm::instructions::Itype(inst);
350                let base = machine.registers()[inst.rs1()].to_u64();
351                let addr = base.wrapping_add(inst.immediate_s() as u64) & 0xfffffffffffffffe;
352                addr
353            }
354            ckb_vm::instructions::insts::OP_FAR_JUMP_ABS => {
355                let inst = ckb_vm::instructions::Utype(inst);
356                let addr = (inst.immediate_s() as u64) & 0xfffffffffffffffe;
357                addr
358            }
359            ckb_vm::instructions::insts::OP_FAR_JUMP_REL => {
360                let inst = ckb_vm::instructions::Utype(inst);
361                let addr = pc.wrapping_add(inst.immediate_s() as u64) & 0xfffffffffffffffe;
362                addr
363            }
364            _ => return Ok(()),
365        };
366
367        let mut f = flamegraph.trie_node.clone();
368        loop {
369            if f.borrow().link == addr {
370                if flamegraph.trie_node.borrow().addr == self.sbrk_addr {
371                    // https://github.com/nervosnetwork/riscv-newlib/blob/newlib-4.1.0-fork/libgloss/riscv/sys_sbrk.c#L49
372                    // Note incr could be negative.
373                    self.sbrk_heap = flamegraph.trie_node.borrow().regs[0][A0].wrapping_add(machine.registers()[A0]);
374                }
375                break;
376            }
377            let p = f.borrow().parent.clone();
378            if let Some(p) = p {
379                f = p.clone();
380            } else {
381                break;
382            }
383        }
384
385        return Ok(());
386    }
387}
388
389pub struct MachineStepLog {
390    file: Option<std::fs::File>,
391    name: String,
392}
393
394impl MachineStepLog {
395    pub fn new(filename: &str) -> Self {
396        Self { file: None, name: filename.to_string() }
397    }
398
399    pub fn step(
400        &mut self,
401        machine: &mut DefaultMachine<DefaultCoreMachine<u64, WXorXMemory<FlatMemory<u64>>>>,
402    ) -> Result<(), Error> {
403        match self.file {
404            Some(ref mut data) => {
405                data.write_all(format!("{}", machine).as_bytes())?;
406            }
407            None => {
408                let mut data = std::fs::File::create(&self.name).unwrap();
409                data.write_all(format!("{}", machine).as_bytes())?;
410                self.file = Some(data)
411            }
412        }
413        Ok(())
414    }
415}
416
417pub struct MachineCoverage {
418    addrctx: Addr2LineContext,
419    pc_dict: HashMap<u64, u8>,
420    results: HashMap<String, Vec<u8>>,
421}
422
423impl MachineCoverage {
424    pub fn new(program: &Bytes) -> Result<Self, Box<dyn std::error::Error>> {
425        let object = addr2line::object::File::parse(program.as_ref())?;
426        let ctx = addr2line::Context::new(&object)?;
427        Ok(Self { addrctx: ctx, pc_dict: HashMap::new(), results: HashMap::new() })
428    }
429
430    pub fn step(
431        &mut self,
432        machine: &mut DefaultMachine<DefaultCoreMachine<u64, WXorXMemory<FlatMemory<u64>>>>,
433    ) -> Result<(), Error> {
434        let pc = machine.pc().to_u64();
435        if self.pc_dict.get(&pc).map_or(0, |v| *v) != 0 {
436            return Ok(());
437        }
438        self.pc_dict.insert(pc, 1);
439
440        let location = self.addrctx.find_location(pc).unwrap();
441        if location.is_none() {
442            return Ok(());
443        }
444        let location = location.unwrap();
445        let file = location.file.unwrap().to_string();
446        let line = location.line;
447        if line.is_none() {
448            return Ok(());
449        }
450        let line = line.unwrap() as usize;
451        assert!(line > 0);
452        if !self.results.contains_key(&file) {
453            self.results.insert(file.clone(), vec![]);
454        }
455        let list = self.results.get_mut(&file).unwrap();
456        if line > list.len() {
457            list.resize(line, 0);
458        }
459        list[line - 1] = 1;
460        return Ok(());
461    }
462}
463
464impl MachineCoverage {
465    pub fn display_lcov(&mut self, writer: &mut impl std::io::Write) -> Result<(), Box<dyn std::error::Error>> {
466        // The lcov file format is a simple, text-based format used to store code coverage data generated by tools like
467        // gcov. It's typically used to represent line-level and function-level coverage, and branch coverage in some
468        // cases.
469        //
470        // Here's a breakdown of the format:
471        //
472        // TN: Test Name. Identifies the test case being reported.
473        // SF: Source File. Identifies the path of the source file.
474        // FN: Function Name. Represents a function.
475        // FNDA: Function Data. Indicates which functions are called.
476        // FNF: Function Found. Total number of functions found in the file.
477        // FNH: Function Hit. Total number of functions executed.
478        // DA: Data/Execution Count. Represents line numbers and their execution counts.
479        // LH: Lines Hit. The total number of lines with a non-zero execution count.
480        // LF: Lines Found. The total number of instrumented lines.
481        // BRDA: Branch Data. Specifies the branch numbers and their execution counts.
482        // BRF: Branch Found. The total number of branches.
483        // BRH: Branch Hit. The total number of branches executed.
484        // end_of_record: Marks the end of a file section.
485        for (name, list) in &self.results {
486            if !std::fs::exists(name).unwrap() {
487                continue;
488            }
489            writeln!(writer, "SF:{}", name)?;
490            for (i, hit) in list.iter().enumerate() {
491                writeln!(writer, "DA:{},{}", i + 1, *hit)?;
492            }
493            let lh = list.iter().filter(|&e| *e != 0).count();
494            let lf = std::io::BufReader::new(std::fs::File::open(name)?).lines().count();
495            for i in list.len()..lf {
496                writeln!(writer, "DA:{},{}", i + 1, 0)?;
497            }
498            writeln!(writer, "LH:{}", lf)?;
499            writeln!(writer, "LF:{}", lh)?;
500            writeln!(writer, "end_of_record")?;
501        }
502        writer.flush()?;
503        Ok(())
504    }
505}
506
507pub struct MachineAnalyzer {
508    pub enable_coverage: u8,
509    pub enable_flamegraph: u8,
510    pub enable_overlap: u8,
511    pub enable_steplog: u8,
512    pub machine: DefaultMachine<DefaultCoreMachine<u64, WXorXMemory<FlatMemory<u64>>>>,
513    pub coverage: MachineCoverage,
514    pub flamegraph: MachineFlamegraph,
515    pub overlap: MachineOverlap,
516    pub steplog: MachineStepLog,
517}
518
519impl CoreMachine for MachineAnalyzer {
520    type REG = u64;
521    type MEM = WXorXMemory<FlatMemory<u64>>;
522
523    fn pc(&self) -> &Self::REG {
524        &self.machine.pc()
525    }
526
527    fn update_pc(&mut self, pc: Self::REG) {
528        self.machine.update_pc(pc)
529    }
530
531    fn commit_pc(&mut self) {
532        self.machine.commit_pc()
533    }
534
535    fn memory(&self) -> &Self::MEM {
536        self.machine.memory()
537    }
538
539    fn memory_mut(&mut self) -> &mut Self::MEM {
540        self.machine.memory_mut()
541    }
542
543    fn registers(&self) -> &[Self::REG] {
544        self.machine.registers()
545    }
546
547    fn set_register(&mut self, idx: usize, value: Self::REG) {
548        self.machine.set_register(idx, value)
549    }
550
551    fn isa(&self) -> u8 {
552        self.machine.isa()
553    }
554
555    fn version(&self) -> u32 {
556        self.machine.version()
557    }
558}
559
560impl Machine for MachineAnalyzer {
561    fn ecall(&mut self) -> Result<(), Error> {
562        self.machine.ecall()
563    }
564
565    fn ebreak(&mut self) -> Result<(), Error> {
566        self.machine.ebreak()
567    }
568}
569
570impl std::fmt::Display for MachineAnalyzer {
571    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
572        self.machine.fmt(f)
573    }
574}
575
576impl MachineAnalyzer {
577    pub fn new(
578        machine: DefaultMachine<DefaultCoreMachine<u64, WXorXMemory<FlatMemory<u64>>>>,
579        coverage: MachineCoverage,
580        flamegraph: MachineFlamegraph,
581        overlap: MachineOverlap,
582        steplog: MachineStepLog,
583    ) -> Self {
584        Self {
585            enable_coverage: 0,
586            enable_flamegraph: 0,
587            enable_overlap: 0,
588            enable_steplog: 0,
589            machine,
590            coverage,
591            flamegraph,
592            overlap,
593            steplog,
594        }
595    }
596
597    pub fn run(&mut self) -> Result<i8, Error> {
598        if self.isa() & ISA_MOP != 0 && self.version() == VERSION0 {
599            return Err(Error::InvalidVersion);
600        }
601        let mut decoder = build_decoder::<u64>(self.isa(), self.version());
602        self.machine.set_running(true);
603        while self.machine.running() {
604            if self.machine.reset_signal() {
605                decoder.reset_instructions_cache();
606                self.flamegraph = MachineFlamegraph::new(&self.machine.code()).unwrap();
607            }
608            if self.enable_coverage > 0 {
609                self.coverage.step(&mut self.machine)?;
610            }
611            if self.enable_flamegraph > 0 {
612                self.flamegraph.step(&mut decoder, &mut self.machine)?;
613            }
614            if self.enable_flamegraph > 0 && self.enable_overlap > 0 {
615                self.overlap.step(&mut decoder, &mut self.machine, &self.flamegraph)?;
616            }
617            if self.enable_steplog > 0 {
618                self.steplog.step(&mut self.machine)?;
619            }
620            self.machine.step(&mut decoder)?;
621        }
622        Ok(self.machine.exit_code())
623    }
624}