ckb_debugger/
machine_analyzer.rs

1use crate::machine_assign::MachineAssign;
2use ckb_traits::{CellDataProvider, ExtensionProvider, HeaderProvider};
3use ckb_vm::cost_model::estimate_cycles;
4use ckb_vm::decoder::{Decoder, build_decoder};
5use ckb_vm::instructions::instruction_length;
6use ckb_vm::machine::VERSION0;
7use ckb_vm::registers::{A0, SP};
8use ckb_vm::{Bytes, CoreMachine, Error, ISA_MOP, Machine, Register, SupportMachine};
9use std::borrow::Cow;
10use std::cell::RefCell;
11use std::collections::HashMap;
12use std::io::BufRead;
13use std::rc::Rc;
14
15type Addr2LineEndianReader = addr2line::gimli::EndianReader<addr2line::gimli::RunTimeEndian, Rc<[u8]>>;
16type Addr2LineContext = addr2line::Context<Addr2LineEndianReader>;
17type Addr2LineFrameIter<'a> = addr2line::FrameIter<'a, Addr2LineEndianReader>;
18
19fn sprint_fun(frame_iter: &mut Addr2LineFrameIter) -> String {
20    let mut s = String::from("??");
21    loop {
22        if let Some(data) = frame_iter.next().unwrap() {
23            if let Some(function) = data.function {
24                s = String::from(addr2line::demangle_auto(Cow::from(function.raw_name().unwrap()), function.language));
25                continue;
26            }
27            continue;
28        }
29        break;
30    }
31    s
32}
33
34fn goblin_fun(elf: &goblin::elf::Elf) -> HashMap<u64, String> {
35    let mut map = HashMap::new();
36    for sym in &elf.syms {
37        if !sym.is_function() {
38            continue;
39        }
40        if let Some(Ok(r)) = elf.strtab.get(sym.st_name) {
41            map.insert(sym.st_value, r.to_string());
42        }
43    }
44    map
45}
46
47fn goblin_get_sym(elf: &goblin::elf::Elf, sym: &str) -> u64 {
48    for e in &elf.syms {
49        if let Some(Ok(r)) = elf.strtab.get(e.st_name) {
50            if r == sym {
51                return e.st_value;
52            }
53        }
54    }
55    return 0;
56}
57
58struct TrieNode {
59    addr: u64,
60    link: u64,
61    pc: u64,
62    parent: Option<Rc<RefCell<TrieNode>>>,
63    childs: Vec<Rc<RefCell<TrieNode>>>,
64    cycles: u64,
65    regs: [[u64; 32]; 2],
66}
67
68impl TrieNode {
69    fn root() -> Self {
70        Self { addr: 0, link: 0, pc: 0, parent: None, childs: vec![], cycles: 0, regs: [[0; 32]; 2] }
71    }
72}
73
74#[derive(Clone, Debug)]
75pub struct Tags {
76    addr: u64,
77    file: String,
78    line: u32,
79    func: String,
80}
81
82impl Tags {
83    fn new(addr: u64) -> Self {
84        Tags { addr, file: String::from("??"), line: 0xffffffff, func: String::from("??") }
85    }
86
87    pub fn func(&self) -> String {
88        if self.func != "??" { self.func.clone() } else { format!("func_0x{:x}", self.addr) }
89    }
90
91    pub fn simple(&self) -> String {
92        format!("{}:{}", self.file, self.func())
93    }
94
95    pub fn detail(&self) -> String {
96        if self.line == 0xffffffff {
97            format!("{}:??:{}", self.file, self.func)
98        } else {
99            format!("{}:{}:{}", self.file, self.line, self.func)
100        }
101    }
102}
103
104pub struct MachineProfile {
105    addrctx: Addr2LineContext,
106    trie_root: Rc<RefCell<TrieNode>>,
107    trie_node: Rc<RefCell<TrieNode>>,
108    cache_tag: HashMap<u64, Tags>,
109    cache_fun: HashMap<u64, String>,
110}
111
112impl MachineProfile {
113    pub fn new(program: &Bytes) -> Result<Self, Box<dyn std::error::Error>> {
114        let object = addr2line::object::File::parse(program.as_ref())?;
115        let ctx = addr2line::Context::new(&object)?;
116        let trie_root = Rc::new(RefCell::new(TrieNode::root()));
117        let elf = goblin::elf::Elf::parse(&program)?;
118        trie_root.borrow_mut().addr = elf.entry;
119        Ok(Self {
120            addrctx: ctx,
121            trie_root: trie_root.clone(),
122            trie_node: trie_root,
123            cache_tag: HashMap::new(),
124            cache_fun: goblin_fun(&elf),
125        })
126    }
127
128    pub fn reset(&mut self, program: &Bytes) -> Result<(), Box<dyn std::error::Error>> {
129        let object = addr2line::object::File::parse(program.as_ref())?;
130        let ctx = addr2line::Context::new(&object)?;
131        let trie_root = Rc::new(RefCell::new(TrieNode::root()));
132        let elf = goblin::elf::Elf::parse(&program)?;
133        trie_root.borrow_mut().addr = elf.entry;
134        self.addrctx = ctx;
135        self.trie_root = trie_root.clone();
136        self.trie_node = trie_root;
137        self.cache_tag = HashMap::new();
138        self.cache_fun = goblin_fun(&elf);
139        Ok(())
140    }
141
142    pub fn get_tag(&mut self, addr: u64) -> Tags {
143        if let Some(data) = self.cache_tag.get(&addr) {
144            return data.clone();
145        }
146        let mut tag = Tags::new(addr);
147        let loc = self.addrctx.find_location(addr).unwrap();
148        if let Some(loc) = loc {
149            tag.file = loc.file.unwrap().to_string();
150            if let Some(line) = loc.line {
151                tag.line = line;
152            }
153        }
154        let mut frame_iter = self.addrctx.find_frames(addr).skip_all_loads().unwrap();
155        tag.func = sprint_fun(&mut frame_iter);
156        self.cache_tag.insert(addr, tag.clone());
157        tag
158    }
159
160    fn display_flamegraph_rec(&mut self, prefix: &str, node: Rc<RefCell<TrieNode>>, writer: &mut impl std::io::Write) {
161        let prefix_name = format!("{}{}", prefix, self.get_tag(node.borrow().addr).simple());
162        writer.write_all(format!("{} {}\n", prefix_name, node.borrow().cycles).as_bytes()).unwrap();
163        for e in &node.borrow().childs {
164            self.display_flamegraph_rec(format!("{}; ", prefix_name).as_str(), e.clone(), writer);
165        }
166        writer.flush().unwrap();
167    }
168
169    pub fn display_flamegraph(&mut self, writer: &mut impl std::io::Write) {
170        self.display_flamegraph_rec("", self.trie_root.clone(), writer);
171    }
172
173    pub fn display_stacktrace(&mut self, prefix: &str, writer: &mut impl std::io::Write) {
174        let mut frame = self.trie_node.clone();
175        let mut stack = vec![self.get_tag(frame.borrow().pc).detail()];
176        loop {
177            stack.push(self.get_tag(frame.borrow().link).detail());
178            let parent = frame.borrow().parent.clone();
179            if let Some(p) = parent {
180                frame = p.clone();
181            } else {
182                break;
183            }
184        }
185        stack.reverse();
186        for i in &stack {
187            writer.write_all(format!("{}{}\n", prefix, i).as_bytes()).unwrap();
188        }
189        writer.flush().unwrap();
190    }
191
192    pub fn step<DL>(&mut self, decoder: &mut Decoder, machine: &mut MachineAssign<DL>) -> Result<(), Error>
193    where
194        DL: CellDataProvider + HeaderProvider + ExtensionProvider + Send + Sync + Clone + 'static,
195    {
196        let pc = machine.pc().to_u64();
197        let inst = decoder.decode(machine.memory_mut(), pc)?;
198        let opcode = ckb_vm::instructions::extract_opcode(inst);
199        let cycles = estimate_cycles(inst);
200        self.trie_node.borrow_mut().cycles += cycles;
201        self.trie_node.borrow_mut().pc = pc;
202
203        let call = |s: &mut Self, addr: u64, link: u64| {
204            let mut regs = [[0; 32]; 2];
205            for i in 0..32 {
206                regs[0][i] = machine.registers()[i].to_u64();
207            }
208            let chd = Rc::new(RefCell::new(TrieNode {
209                addr: addr,
210                link: link,
211                pc: pc,
212                parent: Some(s.trie_node.clone()),
213                childs: vec![],
214                cycles: 0,
215                regs: regs,
216            }));
217            s.trie_node.borrow_mut().childs.push(chd.clone());
218            s.trie_node = chd;
219        };
220
221        let jump = |s: &mut Self, addr: u64| {
222            let mut f = s.trie_node.clone();
223            loop {
224                if f.borrow().link == addr {
225                    for i in 0..32 {
226                        s.trie_node.borrow_mut().regs[1][i] = machine.registers()[i].to_u64();
227                    }
228                    if let Some(p) = f.borrow().parent.clone() {
229                        s.trie_node = p.clone();
230                    } else {
231                        unimplemented!();
232                    }
233                    break;
234                }
235                let p = f.borrow().parent.clone();
236                if let Some(p) = p {
237                    f = p.clone();
238                } else {
239                    break;
240                }
241            }
242        };
243
244        if opcode == ckb_vm::instructions::insts::OP_JAL {
245            let inst_length = instruction_length(inst) as u64;
246            let inst = ckb_vm::instructions::Utype(inst);
247            let addr = pc.wrapping_add(inst.immediate_s() as u64) & 0xfffffffffffffffe;
248            let link = pc + inst_length;
249            if self.cache_fun.contains_key(&addr) {
250                call(self, addr, link);
251                return Ok(());
252            }
253            jump(self, addr);
254            return Ok(());
255        };
256        if opcode == ckb_vm::instructions::insts::OP_JALR_VERSION0 {
257            let inst_length = instruction_length(inst) as u64;
258            let inst = ckb_vm::instructions::Itype(inst);
259            let base = machine.registers()[inst.rs1()].to_u64();
260            let addr = base.wrapping_add(inst.immediate_s() as u64) & 0xfffffffffffffffe;
261            let link = pc + inst_length;
262            if self.cache_fun.contains_key(&addr) {
263                call(self, addr, link);
264                return Ok(());
265            }
266            jump(self, addr);
267            return Ok(());
268        };
269        if opcode == ckb_vm::instructions::insts::OP_JALR_VERSION1 {
270            let inst_length = instruction_length(inst) as u64;
271            let inst = ckb_vm::instructions::Itype(inst);
272            let base = machine.registers()[inst.rs1()].to_u64();
273            let addr = base.wrapping_add(inst.immediate_s() as u64) & 0xfffffffffffffffe;
274            let link = pc + inst_length;
275            if self.cache_fun.contains_key(&addr) {
276                call(self, addr, link);
277                return Ok(());
278            }
279            jump(self, addr);
280            return Ok(());
281        };
282        if opcode == ckb_vm::instructions::insts::OP_FAR_JUMP_ABS {
283            let inst_length = instruction_length(inst) as u64;
284            let inst = ckb_vm::instructions::Utype(inst);
285            let addr = (inst.immediate_s() as u64) & 0xfffffffffffffffe;
286            let link = pc + inst_length;
287            if self.cache_fun.contains_key(&addr) {
288                call(self, addr, link);
289                return Ok(());
290            }
291            jump(self, addr);
292            return Ok(());
293        }
294        if opcode == ckb_vm::instructions::insts::OP_FAR_JUMP_REL {
295            let inst_length = instruction_length(inst) as u64;
296            let inst = ckb_vm::instructions::Utype(inst);
297            let addr = pc.wrapping_add(inst.immediate_s() as u64) & 0xfffffffffffffffe;
298            let link = pc + inst_length;
299            if self.cache_fun.contains_key(&addr) {
300                call(self, addr, link);
301                return Ok(());
302            }
303            jump(self, addr);
304            return Ok(());
305        }
306        return Ok(());
307    }
308}
309
310pub struct MachineOverlap {
311    sbrk_addr: u64,
312    sbrk_heap: u64,
313}
314
315impl MachineOverlap {
316    pub fn new(program: &Bytes) -> Result<Self, Box<dyn std::error::Error>> {
317        let elf = goblin::elf::Elf::parse(&program)?;
318        Ok(Self { sbrk_addr: goblin_get_sym(&elf, "_sbrk"), sbrk_heap: goblin_get_sym(&elf, "_end") })
319    }
320
321    pub fn step<DL>(
322        &mut self,
323        decoder: &mut Decoder,
324        machine: &mut MachineAssign<DL>,
325        profile: &MachineProfile,
326    ) -> Result<(), Error>
327    where
328        DL: CellDataProvider + HeaderProvider + ExtensionProvider + Send + Sync + Clone + 'static,
329    {
330        let pc = machine.pc().to_u64();
331        let sp = machine.registers()[SP].to_u64();
332        if sp < self.sbrk_heap {
333            return Err(Error::External(format!("Heap and stack overlapping sp={} heap={}", sp, self.sbrk_heap)));
334        }
335        let inst = decoder.decode(machine.memory_mut(), pc)?;
336        let opcode = ckb_vm::instructions::extract_opcode(inst);
337        let addr = match opcode {
338            ckb_vm::instructions::insts::OP_JAL => {
339                let inst = ckb_vm::instructions::Utype(inst);
340                let addr = pc.wrapping_add(inst.immediate_s() as u64) & 0xfffffffffffffffe;
341                addr
342            }
343            ckb_vm::instructions::insts::OP_JALR_VERSION0 => {
344                let inst = ckb_vm::instructions::Itype(inst);
345                let base = machine.registers()[inst.rs1()].to_u64();
346                let addr = base.wrapping_add(inst.immediate_s() as u64) & 0xfffffffffffffffe;
347                addr
348            }
349            ckb_vm::instructions::insts::OP_JALR_VERSION1 => {
350                let inst = ckb_vm::instructions::Itype(inst);
351                let base = machine.registers()[inst.rs1()].to_u64();
352                let addr = base.wrapping_add(inst.immediate_s() as u64) & 0xfffffffffffffffe;
353                addr
354            }
355            ckb_vm::instructions::insts::OP_FAR_JUMP_ABS => {
356                let inst = ckb_vm::instructions::Utype(inst);
357                let addr = (inst.immediate_s() as u64) & 0xfffffffffffffffe;
358                addr
359            }
360            ckb_vm::instructions::insts::OP_FAR_JUMP_REL => {
361                let inst = ckb_vm::instructions::Utype(inst);
362                let addr = pc.wrapping_add(inst.immediate_s() as u64) & 0xfffffffffffffffe;
363                addr
364            }
365            _ => return Ok(()),
366        };
367
368        let mut f = profile.trie_node.clone();
369        loop {
370            if f.borrow().link == addr {
371                if profile.trie_node.borrow().addr == self.sbrk_addr {
372                    // https://github.com/nervosnetwork/riscv-newlib/blob/newlib-4.1.0-fork/libgloss/riscv/sys_sbrk.c#L49
373                    // Note incr could be negative.
374                    self.sbrk_heap = profile.trie_node.borrow().regs[0][A0].wrapping_add(machine.registers()[A0]);
375                }
376                break;
377            }
378            let p = f.borrow().parent.clone();
379            if let Some(p) = p {
380                f = p.clone();
381            } else {
382                break;
383            }
384        }
385
386        return Ok(());
387    }
388}
389
390pub struct MachineStepLog {}
391
392impl MachineStepLog {
393    pub fn new() -> Self {
394        Self {}
395    }
396
397    pub fn step<DL>(&mut self, machine: &mut MachineAssign<DL>) -> Result<(), Error>
398    where
399        DL: CellDataProvider + HeaderProvider + ExtensionProvider + Send + Sync + Clone + 'static,
400    {
401        println!("{}", machine);
402        Ok(())
403    }
404}
405
406pub struct MachineCoverage {
407    addrctx: Addr2LineContext,
408    pc_dict: HashMap<u64, u8>,
409    results: HashMap<String, Vec<u8>>,
410}
411
412impl MachineCoverage {
413    pub fn new(program: &Bytes) -> Result<Self, Box<dyn std::error::Error>> {
414        let object = addr2line::object::File::parse(program.as_ref())?;
415        let ctx = addr2line::Context::new(&object)?;
416        Ok(Self { addrctx: ctx, pc_dict: HashMap::new(), results: HashMap::new() })
417    }
418
419    pub fn step<DL>(&mut self, machine: &mut MachineAssign<DL>) -> Result<(), Error>
420    where
421        DL: CellDataProvider + HeaderProvider + ExtensionProvider + Send + Sync + Clone + 'static,
422    {
423        let pc = machine.pc().to_u64();
424        if self.pc_dict.get(&pc).map_or(0, |v| *v) != 0 {
425            return Ok(());
426        }
427        self.pc_dict.insert(pc, 1);
428
429        let location = self.addrctx.find_location(pc).unwrap();
430        if location.is_none() {
431            return Ok(());
432        }
433        let location = location.unwrap();
434        let file = location.file.unwrap().to_string();
435        let line = location.line;
436        if line.is_none() {
437            return Ok(());
438        }
439        let line = line.unwrap() as usize;
440        assert!(line > 0);
441        if !self.results.contains_key(&file) {
442            self.results.insert(file.clone(), vec![]);
443        }
444        let list = self.results.get_mut(&file).unwrap();
445        if line > list.len() {
446            list.resize(line, 0);
447        }
448        list[line - 1] = 1;
449        return Ok(());
450    }
451}
452
453impl MachineCoverage {
454    pub fn display_lcov(&mut self, writer: &mut impl std::io::Write) -> Result<(), Box<dyn std::error::Error>> {
455        // The lcov file format is a simple, text-based format used to store code coverage data generated by tools like
456        // gcov. It's typically used to represent line-level and function-level coverage, and branch coverage in some
457        // cases.
458        //
459        // Here's a breakdown of the format:
460        //
461        // TN: Test Name. Identifies the test case being reported.
462        // SF: Source File. Identifies the path of the source file.
463        // FN: Function Name. Represents a function.
464        // FNDA: Function Data. Indicates which functions are called.
465        // FNF: Function Found. Total number of functions found in the file.
466        // FNH: Function Hit. Total number of functions executed.
467        // DA: Data/Execution Count. Represents line numbers and their execution counts.
468        // LH: Lines Hit. The total number of lines with a non-zero execution count.
469        // LF: Lines Found. The total number of instrumented lines.
470        // BRDA: Branch Data. Specifies the branch numbers and their execution counts.
471        // BRF: Branch Found. The total number of branches.
472        // BRH: Branch Hit. The total number of branches executed.
473        // end_of_record: Marks the end of a file section.
474        for (name, list) in &self.results {
475            if !std::fs::exists(name).unwrap() {
476                continue;
477            }
478            writeln!(writer, "SF:{}", name)?;
479            for (i, hit) in list.iter().enumerate() {
480                writeln!(writer, "DA:{},{}", i + 1, *hit)?;
481            }
482            let lh = list.iter().filter(|&e| *e != 0).count();
483            let lf = std::io::BufReader::new(std::fs::File::open(name)?).lines().count();
484            for i in list.len()..lf {
485                writeln!(writer, "DA:{},{}", i + 1, 0)?;
486            }
487            writeln!(writer, "LH:{}", lf)?;
488            writeln!(writer, "LF:{}", lh)?;
489            writeln!(writer, "end_of_record")?;
490        }
491        writer.flush()?;
492        Ok(())
493    }
494}
495
496pub struct MachineAnalyzer<DL>
497where
498    DL: CellDataProvider + HeaderProvider + ExtensionProvider + Send + Sync + Clone + 'static,
499{
500    pub enable_overlap: u8,
501    pub enable_profile: u8,
502    pub enable_steplog: u8,
503    pub enable_coverage: u8,
504    pub machine: MachineAssign<DL>,
505    pub profile: MachineProfile,
506    pub overlap: MachineOverlap,
507    pub steplog: MachineStepLog,
508    pub coverage: MachineCoverage,
509}
510
511impl<DL> CoreMachine for MachineAnalyzer<DL>
512where
513    DL: CellDataProvider + HeaderProvider + ExtensionProvider + Send + Sync + Clone + 'static,
514{
515    type REG = u64;
516    type MEM = <MachineAssign<DL> as CoreMachine>::MEM;
517
518    fn pc(&self) -> &Self::REG {
519        &self.machine.pc()
520    }
521
522    fn update_pc(&mut self, pc: Self::REG) {
523        self.machine.update_pc(pc)
524    }
525
526    fn commit_pc(&mut self) {
527        self.machine.commit_pc()
528    }
529
530    fn memory(&self) -> &Self::MEM {
531        self.machine.memory()
532    }
533
534    fn memory_mut(&mut self) -> &mut Self::MEM {
535        self.machine.memory_mut()
536    }
537
538    fn registers(&self) -> &[Self::REG] {
539        self.machine.registers()
540    }
541
542    fn set_register(&mut self, idx: usize, value: Self::REG) {
543        self.machine.set_register(idx, value)
544    }
545
546    fn isa(&self) -> u8 {
547        self.machine.isa()
548    }
549
550    fn version(&self) -> u32 {
551        self.machine.version()
552    }
553}
554
555impl<DL> Machine for MachineAnalyzer<DL>
556where
557    DL: CellDataProvider + HeaderProvider + ExtensionProvider + Send + Sync + Clone + 'static,
558{
559    fn ecall(&mut self) -> Result<(), Error> {
560        self.machine.ecall()
561    }
562
563    fn ebreak(&mut self) -> Result<(), Error> {
564        self.machine.ebreak()
565    }
566}
567
568impl<DL> std::fmt::Display for MachineAnalyzer<DL>
569where
570    DL: CellDataProvider + HeaderProvider + ExtensionProvider + Send + Sync + Clone + 'static,
571{
572    fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
573        self.machine.fmt(f)
574    }
575}
576
577impl<DL> MachineAnalyzer<DL>
578where
579    DL: CellDataProvider + HeaderProvider + ExtensionProvider + Send + Sync + Clone + 'static,
580{
581    pub fn new(
582        machine: MachineAssign<DL>,
583        profile: MachineProfile,
584        overlap: MachineOverlap,
585        steplog: MachineStepLog,
586        coverage: MachineCoverage,
587    ) -> Self {
588        Self {
589            enable_overlap: 0,
590            enable_profile: 1,
591            enable_steplog: 0,
592            enable_coverage: 0,
593            machine,
594            profile,
595            overlap,
596            steplog,
597            coverage,
598        }
599    }
600
601    pub fn run(&mut self) -> Result<i8, Error> {
602        if self.isa() & ISA_MOP != 0 && self.version() == VERSION0 {
603            return Err(Error::InvalidVersion);
604        }
605        let mut decoder = build_decoder::<u64>(self.isa(), self.version());
606        self.machine.set_running(true);
607        while self.machine.running() {
608            if self.machine.reset_signal() {
609                decoder.reset_instructions_cache();
610                self.profile = MachineProfile::new(&self.machine.code()).unwrap();
611            }
612            if self.enable_profile > 0 && self.enable_overlap > 0 {
613                self.overlap.step(&mut decoder, &mut self.machine, &self.profile)?;
614            }
615            if self.enable_profile > 0 {
616                self.profile.step(&mut decoder, &mut self.machine)?;
617            }
618            if self.enable_steplog > 0 {
619                self.steplog.step(&mut self.machine)?;
620            }
621            if self.enable_coverage > 0 {
622                self.coverage.step(&mut self.machine)?;
623            }
624            self.machine.step(&mut decoder)?;
625        }
626        Ok(self.machine.exit_code())
627    }
628}