ckb_vm_pprof/
lib.rs

1use std::borrow::Cow;
2use std::cell::RefCell;
3use std::collections::HashMap;
4use std::rc::Rc;
5
6use ckb_vm::decoder::{Decoder, build_decoder};
7use ckb_vm::instructions::instruction_length;
8use ckb_vm::machine::{DefaultMachine, DefaultMachineBuilder, VERSION0};
9use ckb_vm::memory::Memory;
10use ckb_vm::registers::{A0, SP};
11use ckb_vm::{
12    Bytes, CoreMachine, DefaultCoreMachine, Error, ISA_MOP, Machine, Register, SparseMemory, SupportMachine, Syscalls,
13    WXorXMemory, cost_model,
14};
15pub use cost_model::estimate_cycles;
16
17type Addr2LineEndianReader = addr2line::gimli::EndianReader<addr2line::gimli::RunTimeEndian, Rc<[u8]>>;
18type Addr2LineContext = addr2line::Context<Addr2LineEndianReader>;
19type Addr2LineFrameIter<'a> = addr2line::FrameIter<'a, Addr2LineEndianReader>;
20
21fn sprint_fun(frame_iter: &mut Addr2LineFrameIter) -> String {
22    let mut s = String::from("??");
23    loop {
24        if let Some(data) = frame_iter.next().unwrap() {
25            if let Some(function) = data.function {
26                s = String::from(addr2line::demangle_auto(Cow::from(function.raw_name().unwrap()), function.language));
27                continue;
28            }
29            continue;
30        }
31        break;
32    }
33    s
34}
35
36fn goblin_fun(elf: &goblin::elf::Elf) -> HashMap<u64, String> {
37    let mut map = HashMap::new();
38    for sym in &elf.syms {
39        if !sym.is_function() {
40            continue;
41        }
42        if let Some(Ok(r)) = elf.strtab.get(sym.st_name) {
43            map.insert(sym.st_value, r.to_string());
44        }
45    }
46    map
47}
48
49fn goblin_get_sym(elf: &goblin::elf::Elf, sym: &str) -> u64 {
50    for e in &elf.syms {
51        if let Some(Ok(r)) = elf.strtab.get(e.st_name) {
52            if r == sym {
53                return e.st_value;
54            }
55        }
56    }
57    return 0;
58}
59
60struct TrieNode {
61    addr: u64,
62    link: u64,
63    pc: u64,
64    parent: Option<Rc<RefCell<TrieNode>>>,
65    childs: Vec<Rc<RefCell<TrieNode>>>,
66    cycles: u64,
67    regs: [[u64; 32]; 2],
68}
69
70impl TrieNode {
71    fn root() -> Self {
72        Self { addr: 0, link: 0, pc: 0, parent: None, childs: vec![], cycles: 0, regs: [[0; 32]; 2] }
73    }
74}
75
76#[derive(Clone, Debug)]
77pub struct Tags {
78    addr: u64,
79    file: String,
80    line: u32,
81    func: String,
82}
83
84impl Tags {
85    fn new(addr: u64) -> Self {
86        Tags { addr, file: String::from("??"), line: 0xffffffff, func: String::from("??") }
87    }
88
89    pub fn func(&self) -> String {
90        if self.func != "??" { self.func.clone() } else { format!("func_0x{:x}", self.addr) }
91    }
92
93    pub fn simple(&self) -> String {
94        format!("{}:{}", self.file, self.func())
95    }
96
97    pub fn detail(&self) -> String {
98        if self.line == 0xffffffff {
99            format!("{}:??:{}", self.file, self.func)
100        } else {
101            format!("{}:{}:{}", self.file, self.line, self.func)
102        }
103    }
104}
105
106pub struct Profile {
107    addrctx: Addr2LineContext,
108    trie_root: Rc<RefCell<TrieNode>>,
109    trie_node: Rc<RefCell<TrieNode>>,
110    cache_tag: HashMap<u64, Tags>,
111    cache_fun: HashMap<u64, String>,
112    sbrk_addr: u64,
113    sbrk_heap: u64,
114    disable_overlapping_detection: bool,
115}
116
117impl Profile {
118    pub fn new(program: &Bytes) -> Result<Self, Box<dyn std::error::Error>> {
119        let object = addr2line::object::File::parse(program.as_ref())?;
120        let ctx = addr2line::Context::new(&object)?;
121        let trie_root = Rc::new(RefCell::new(TrieNode::root()));
122        let elf = goblin::elf::Elf::parse(&program)?;
123        trie_root.borrow_mut().addr = elf.entry;
124        Ok(Self {
125            addrctx: ctx,
126            trie_root: trie_root.clone(),
127            trie_node: trie_root,
128            cache_tag: HashMap::new(),
129            cache_fun: goblin_fun(&elf),
130            sbrk_addr: goblin_get_sym(&elf, "_sbrk"),
131            sbrk_heap: goblin_get_sym(&elf, "_end"),
132            disable_overlapping_detection: false,
133        })
134    }
135
136    pub fn set_disable_overlapping_detection(mut self, disable_detection: bool) -> Self {
137        self.disable_overlapping_detection = disable_detection;
138        self
139    }
140
141    pub fn get_tag(&mut self, addr: u64) -> Tags {
142        if let Some(data) = self.cache_tag.get(&addr) {
143            return data.clone();
144        }
145        let mut tag = Tags::new(addr);
146        let loc = self.addrctx.find_location(addr).unwrap();
147        if let Some(loc) = loc {
148            tag.file = loc.file.as_ref().unwrap().to_string();
149            if let Some(line) = loc.line {
150                tag.line = line;
151            }
152        }
153        let mut frame_iter = self.addrctx.find_frames(addr).skip_all_loads().unwrap();
154        tag.func = sprint_fun(&mut frame_iter);
155        self.cache_tag.insert(addr, tag.clone());
156        tag
157    }
158
159    fn display_flamegraph_rec(&mut self, prefix: &str, node: Rc<RefCell<TrieNode>>, writer: &mut impl std::io::Write) {
160        let prefix_name = format!("{}{}", prefix, self.get_tag(node.borrow().addr).simple());
161        writer.write_all(format!("{} {}\n", prefix_name, node.borrow().cycles).as_bytes()).unwrap();
162        for e in &node.borrow().childs {
163            self.display_flamegraph_rec(format!("{}; ", prefix_name).as_str(), e.clone(), writer);
164        }
165        writer.flush().unwrap();
166    }
167
168    pub fn display_flamegraph(&mut self, writer: &mut impl std::io::Write) {
169        self.display_flamegraph_rec("", self.trie_root.clone(), writer);
170    }
171
172    pub fn display_stacktrace(&mut self, prefix: &str, writer: &mut impl std::io::Write) {
173        let mut frame = self.trie_node.clone();
174        let mut stack = vec![self.get_tag(frame.borrow().pc).detail()];
175        loop {
176            stack.push(self.get_tag(frame.borrow().link).detail());
177            let parent = frame.borrow().parent.clone();
178            if let Some(p) = parent {
179                frame = p.clone();
180            } else {
181                break;
182            }
183        }
184        stack.reverse();
185        for i in &stack {
186            writer.write_all(format!("{}{}\n", prefix, i).as_bytes()).unwrap();
187        }
188        writer.flush().unwrap();
189    }
190
191    fn step<R: Register, M: Memory<REG = R>, Inner: SupportMachine<REG = R, MEM = M>>(
192        &mut self,
193        machine: &mut DefaultMachine<Inner>,
194        decoder: &mut Decoder,
195    ) -> Result<(), Error> {
196        let pc = machine.pc().to_u64();
197        if !self.disable_overlapping_detection {
198            let sp = machine.registers()[SP].to_u64();
199            if sp < self.sbrk_heap {
200                return Err(Error::External(format!("Heap and stack overlapping sp={} heap={}", sp, self.sbrk_heap)));
201            }
202        }
203        let inst = decoder.decode(machine.memory_mut(), pc)?;
204        let opcode = ckb_vm::instructions::extract_opcode(inst);
205        let cycles = machine.instruction_cycle_func()(inst);
206        self.trie_node.borrow_mut().cycles += cycles;
207        self.trie_node.borrow_mut().pc = pc;
208
209        let call = |s: &mut Self, addr: u64, link: u64| {
210            let mut regs = [[0; 32]; 2];
211            for i in 0..32 {
212                regs[0][i] = machine.registers()[i].to_u64();
213            }
214            let chd = Rc::new(RefCell::new(TrieNode {
215                addr: addr,
216                link: link,
217                pc: pc,
218                parent: Some(s.trie_node.clone()),
219                childs: vec![],
220                cycles: 0,
221                regs: regs,
222            }));
223            s.trie_node.borrow_mut().childs.push(chd.clone());
224            s.trie_node = chd;
225        };
226
227        let sbrk_or_skip = |s: &mut Self| {
228            if s.trie_node.borrow().addr == s.sbrk_addr {
229                // https://github.com/nervosnetwork/riscv-newlib/blob/newlib-4.1.0-fork/libgloss/riscv/sys_sbrk.c#L49
230                // Note incr could be negative.
231                s.sbrk_heap = s.trie_node.borrow().regs[0][A0].wrapping_add(s.trie_node.borrow().regs[1][A0]);
232            }
233        };
234
235        let quit_or_skip = |s: &mut Self, addr: u64| {
236            let mut f = s.trie_node.clone();
237            loop {
238                if f.borrow().link == addr {
239                    for i in 0..32 {
240                        s.trie_node.borrow_mut().regs[1][i] = machine.registers()[i].to_u64();
241                    }
242                    sbrk_or_skip(s);
243                    if let Some(p) = f.borrow().parent.clone() {
244                        s.trie_node = p.clone();
245                    } else {
246                        unimplemented!();
247                    }
248                    break;
249                }
250                let p = f.borrow().parent.clone();
251                if let Some(p) = p {
252                    f = p.clone();
253                } else {
254                    break;
255                }
256            }
257        };
258
259        if opcode == ckb_vm::instructions::insts::OP_JAL {
260            let inst_length = instruction_length(inst) as u64;
261            let inst = ckb_vm::instructions::Utype(inst);
262            let addr = pc.wrapping_add(inst.immediate_s() as u64) & 0xfffffffffffffffe;
263            let link = pc + inst_length;
264            if self.cache_fun.contains_key(&addr) {
265                call(self, addr, link);
266                return Ok(());
267            }
268            quit_or_skip(self, addr);
269            return Ok(());
270        };
271        if opcode == ckb_vm::instructions::insts::OP_JALR_VERSION0
272            || opcode == ckb_vm::instructions::insts::OP_JALR_VERSION1
273        {
274            let inst_length = instruction_length(inst) as u64;
275            let inst = ckb_vm::instructions::Itype(inst);
276            let base = machine.registers()[inst.rs1()].to_u64();
277            let addr = base.wrapping_add(inst.immediate_s() as u64) & 0xfffffffffffffffe;
278            let link = pc + inst_length;
279            if self.cache_fun.contains_key(&addr) {
280                call(self, addr, link);
281                return Ok(());
282            }
283            quit_or_skip(self, addr);
284            return Ok(());
285        };
286        if opcode == ckb_vm::instructions::insts::OP_FAR_JUMP_ABS {
287            let inst_length = instruction_length(inst) as u64;
288            let inst = ckb_vm::instructions::Utype(inst);
289            let addr = (inst.immediate_s() as u64) & 0xfffffffffffffffe;
290            let link = pc + inst_length;
291            if self.cache_fun.contains_key(&addr) {
292                call(self, addr, link);
293                return Ok(());
294            }
295            quit_or_skip(self, addr);
296            return Ok(());
297        }
298        if opcode == ckb_vm::instructions::insts::OP_FAR_JUMP_REL {
299            let inst_length = instruction_length(inst) as u64;
300            let inst = ckb_vm::instructions::Utype(inst);
301            let addr = pc.wrapping_add(inst.immediate_s() as u64) & 0xfffffffffffffffe;
302            let link = pc + inst_length;
303            if self.cache_fun.contains_key(&addr) {
304                call(self, addr, link);
305                return Ok(());
306            }
307            quit_or_skip(self, addr);
308            return Ok(());
309        }
310        return Ok(());
311    }
312}
313
314pub struct PProfMachine<Inner> {
315    pub machine: DefaultMachine<Inner>,
316    pub profile: Profile,
317}
318
319impl<R: Register, M: Memory<REG = R>, Inner: SupportMachine<REG = R, MEM = M>> CoreMachine for PProfMachine<Inner> {
320    type REG = <Inner as CoreMachine>::REG;
321    type MEM = <Inner as CoreMachine>::MEM;
322
323    fn pc(&self) -> &Self::REG {
324        &self.machine.pc()
325    }
326
327    fn update_pc(&mut self, pc: Self::REG) {
328        self.machine.update_pc(pc)
329    }
330
331    fn commit_pc(&mut self) {
332        self.machine.commit_pc()
333    }
334
335    fn memory(&self) -> &Self::MEM {
336        self.machine.memory()
337    }
338
339    fn memory_mut(&mut self) -> &mut Self::MEM {
340        self.machine.memory_mut()
341    }
342
343    fn registers(&self) -> &[Self::REG] {
344        self.machine.registers()
345    }
346
347    fn set_register(&mut self, idx: usize, value: Self::REG) {
348        self.machine.set_register(idx, value)
349    }
350
351    fn isa(&self) -> u8 {
352        self.machine.isa()
353    }
354
355    fn version(&self) -> u32 {
356        self.machine.version()
357    }
358}
359
360impl<R: Register, M: Memory<REG = R>, Inner: SupportMachine<REG = R, MEM = M>> Machine for PProfMachine<Inner> {
361    fn ecall(&mut self) -> Result<(), Error> {
362        self.machine.ecall()
363    }
364
365    fn ebreak(&mut self) -> Result<(), Error> {
366        self.machine.ebreak()
367    }
368}
369
370impl<R: Register, M: Memory<REG = R>, Inner: SupportMachine<REG = R, MEM = M>> PProfMachine<Inner> {
371    pub fn new(machine: DefaultMachine<Inner>, profile: Profile) -> Self {
372        Self { machine, profile }
373    }
374
375    pub fn load_program(&mut self, program: &Bytes, args: &[Bytes]) -> Result<u64, Error> {
376        self.machine.load_program(program, args.iter().map(|e| Ok(e.clone())))
377    }
378
379    pub fn run(&mut self) -> Result<i8, Error> {
380        if self.isa() & ISA_MOP != 0 && self.version() == VERSION0 {
381            return Err(Error::InvalidVersion);
382        }
383        let mut decoder = build_decoder::<Inner::REG>(self.isa(), self.version());
384        self.machine.set_running(true);
385        while self.machine.running() {
386            if self.machine.reset_signal() {
387                decoder.reset_instructions_cache();
388                self.profile = Profile::new(&self.machine.code()).unwrap();
389            }
390            self.profile.step(&mut self.machine, &mut decoder)?;
391            self.machine.step(&mut decoder)?;
392        }
393        Ok(self.machine.exit_code())
394    }
395}
396
397pub fn quick_start(
398    syscalls: Vec<Box<(dyn Syscalls<DefaultCoreMachine<u64, WXorXMemory<SparseMemory<u64>>>>)>>,
399    fl_bin: &str,
400    fl_arg: Vec<&str>,
401    output_filename: &str,
402) -> Result<(i8, u64), Error> {
403    let code_data = std::fs::read(fl_bin)?;
404    let code = Bytes::from(code_data);
405
406    let isa = ckb_vm::ISA_IMC | ckb_vm::ISA_A | ckb_vm::ISA_B | ckb_vm::ISA_MOP;
407    let default_core_machine = ckb_vm::DefaultCoreMachine::<
408        u64,
409        ckb_vm::memory::wxorx::WXorXMemory<ckb_vm::memory::sparse::SparseMemory<u64>>,
410    >::new(isa, ckb_vm::machine::VERSION2, 1 << 32);
411    let mut builder =
412        DefaultMachineBuilder::new(default_core_machine).instruction_cycle_func(Box::new(cost_model::estimate_cycles));
413    builder = syscalls.into_iter().fold(builder, |builder, syscall| builder.syscall(syscall));
414    let default_machine = builder.build();
415    let profile = Profile::new(&code).unwrap();
416    let mut machine = PProfMachine::new(default_machine, profile);
417    let mut args = vec![fl_bin.to_string().into()];
418    args.append(&mut fl_arg.iter().map(|x| Bytes::from(x.to_string())).collect());
419    machine.load_program(&code, &args).unwrap();
420    let result = machine.run();
421
422    if let Err(err) = result {
423        machine.profile.display_stacktrace("", &mut std::io::stdout());
424        return Err(err);
425    }
426
427    if output_filename == "-" {
428        machine.profile.display_flamegraph(&mut std::io::stdout());
429    } else {
430        let mut output = std::fs::File::create(&output_filename).expect("can't create file");
431        machine.profile.display_flamegraph(&mut output);
432    }
433
434    Ok((0, machine.machine.cycles()))
435}