ckb_vm/instructions/
mod.rs

1mod common;
2mod execute;
3mod register;
4mod utils;
5
6pub mod a;
7pub mod ast;
8pub mod b;
9pub mod i;
10pub mod m;
11pub mod rvc;
12pub mod tagged;
13
14pub use self::register::Register;
15use super::Error;
16pub use ckb_vm_definitions::{
17    instructions::{
18        self as insts, instruction_opcode_name, Instruction, InstructionOpcode,
19        MAXIMUM_BASIC_BLOCK_END_OPCODE, MINIMAL_BASIC_BLOCK_END_OPCODE, MINIMAL_OPCODE,
20    },
21    registers::REGISTER_ABI_NAMES,
22};
23use core::fmt;
24pub use execute::{
25    execute, execute_instruction, execute_with_thread, handle_invalid_op, Thread, ThreadFactory,
26};
27
28pub type RegisterIndex = usize;
29pub type SImmediate = i32;
30pub type UImmediate = u32;
31
32#[inline(always)]
33pub fn extract_opcode(i: Instruction) -> InstructionOpcode {
34    (((i >> 8) & 0xff00) | (i & 0x00ff)) as u16
35}
36
37pub type InstructionFactory = fn(instruction_bits: u32, version: u32) -> Option<Instruction>;
38
39// Blank instructions need no register indices nor immediates, they only have opcode
40// and module bit set.
41pub fn blank_instruction(op: InstructionOpcode) -> Instruction {
42    (op as u64 >> 8 << 16) | (op as u64 & 0xff)
43}
44
45#[derive(Debug, Clone, Copy, PartialEq)]
46pub struct Rtype(pub Instruction);
47
48impl Rtype {
49    pub fn new(
50        op: InstructionOpcode,
51        rd: RegisterIndex,
52        rs1: RegisterIndex,
53        rs2: RegisterIndex,
54    ) -> Self {
55        Rtype(
56            (u64::from(op) >> 8 << 16)
57                | u64::from(op as u8)
58                | (u64::from(rd as u8) << 8)
59                | (u64::from(rs1 as u8) << 32)
60                | (u64::from(rs2 as u8) << 40),
61        )
62    }
63
64    pub fn op(self) -> InstructionOpcode {
65        ((self.0 >> 16 << 8) | (self.0 & 0xFF)) as InstructionOpcode
66    }
67
68    pub fn rd(self) -> RegisterIndex {
69        (self.0 >> 8) as u8 as RegisterIndex
70    }
71
72    pub fn rs1(self) -> RegisterIndex {
73        (self.0 >> 32) as u8 as RegisterIndex
74    }
75
76    pub fn rs2(self) -> RegisterIndex {
77        (self.0 >> 40) as u8 as RegisterIndex
78    }
79}
80
81impl fmt::Display for Rtype {
82    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
83        write!(
84            f,
85            "{} {},{},{}",
86            instruction_opcode_name(self.op()).to_lowercase(),
87            REGISTER_ABI_NAMES[self.rd()],
88            REGISTER_ABI_NAMES[self.rs1()],
89            REGISTER_ABI_NAMES[self.rs2()]
90        )
91    }
92}
93
94#[derive(Debug, Clone, Copy, PartialEq)]
95pub struct Itype(pub Instruction);
96
97impl Itype {
98    pub fn new_u(
99        op: InstructionOpcode,
100        rd: RegisterIndex,
101        rs1: RegisterIndex,
102        immediate_u: UImmediate,
103    ) -> Self {
104        Itype(
105            (u64::from(op) >> 8 << 16) |
106            u64::from(op as u8) |
107              (u64::from(rd as u8) << 8) |
108              (u64::from(rs1 as u8) << 32) |
109              // Per RISC-V spec, I-type uses 12 bits at most, so it's perfectly
110              // fine we store them in 3-byte location.
111              (u64::from(immediate_u) << 40),
112        )
113    }
114
115    pub fn new_s(
116        op: InstructionOpcode,
117        rd: RegisterIndex,
118        rs1: RegisterIndex,
119        immediate_s: SImmediate,
120    ) -> Self {
121        Self::new_u(op, rd, rs1, immediate_s as UImmediate)
122    }
123
124    pub fn op(self) -> InstructionOpcode {
125        ((self.0 >> 16 << 8) | (self.0 & 0xFF)) as InstructionOpcode
126    }
127
128    pub fn rd(self) -> RegisterIndex {
129        (self.0 >> 8) as u8 as RegisterIndex
130    }
131
132    pub fn rs1(self) -> RegisterIndex {
133        (self.0 >> 32) as u8 as RegisterIndex
134    }
135
136    pub fn immediate_u(self) -> UImmediate {
137        self.immediate_s() as UImmediate
138    }
139
140    pub fn immediate_s(self) -> SImmediate {
141        ((self.0 as i64) >> 40) as SImmediate
142    }
143}
144
145impl fmt::Display for Itype {
146    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
147        // TODO: there are 2 simplifications here:
148        // 1. It will print `addi a1,s0,-64` as `addi a1,-64(s0)`, and also print
149        // `ld ra,88(sp)` as `ld ra,88(sp)`
150        // 2. It will always use signed immediate numbers.
151        // It is debatable if we should do a per-instruction pattern match to show
152        // more patterns.
153        write!(
154            f,
155            "{} {},{}({})",
156            instruction_opcode_name(self.op()).to_lowercase(),
157            REGISTER_ABI_NAMES[self.rd()],
158            self.immediate_s(),
159            REGISTER_ABI_NAMES[self.rs1()]
160        )
161    }
162}
163
164#[derive(Debug, Clone, Copy, PartialEq)]
165pub struct Stype(pub Instruction);
166
167impl Stype {
168    pub fn new_u(
169        op: InstructionOpcode,
170        immediate_u: UImmediate,
171        rs1: RegisterIndex,
172        rs2: RegisterIndex,
173    ) -> Self {
174        Stype(
175            (u64::from(op) >> 8 << 16) |
176            u64::from(op as u8) |
177              (u64::from(rs2 as u8) << 8) |
178              (u64::from(rs1 as u8) << 32) |
179              // Per RISC-V spec, S/B type uses 13 bits at most, so it's perfectly
180              // fine we store them in 3-byte location.
181              (u64::from(immediate_u) << 40),
182        )
183    }
184
185    pub fn new_s(
186        op: InstructionOpcode,
187        immediate_s: SImmediate,
188        rs1: RegisterIndex,
189        rs2: RegisterIndex,
190    ) -> Self {
191        Self::new_u(op, immediate_s as UImmediate, rs1, rs2)
192    }
193
194    pub fn op(self) -> InstructionOpcode {
195        ((self.0 >> 16 << 8) | (self.0 & 0xFF)) as InstructionOpcode
196    }
197
198    pub fn rs1(self) -> RegisterIndex {
199        (self.0 >> 32) as u8 as RegisterIndex
200    }
201
202    pub fn rs2(self) -> RegisterIndex {
203        (self.0 >> 8) as u8 as RegisterIndex
204    }
205
206    pub fn immediate_u(self) -> UImmediate {
207        self.immediate_s() as UImmediate
208    }
209
210    pub fn immediate_s(self) -> SImmediate {
211        ((self.0 as i64) >> 40) as SImmediate
212    }
213}
214
215impl fmt::Display for Stype {
216    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
217        match self.op() {
218            // Branches are in fact of B-type, we reuse S-type in CKB-VM
219            // since they share the same constructs after decoding, but
220            // they have different encoding rules in texts.
221            insts::OP_BEQ
222            | insts::OP_BNE
223            | insts::OP_BLT
224            | insts::OP_BGE
225            | insts::OP_BLTU
226            | insts::OP_BGEU => write!(
227                f,
228                "{} {},{},{}",
229                instruction_opcode_name(self.op()).to_lowercase(),
230                REGISTER_ABI_NAMES[self.rs1()],
231                REGISTER_ABI_NAMES[self.rs2()],
232                self.immediate_s()
233            ),
234            _ => write!(
235                f,
236                "{} {},{}({})",
237                instruction_opcode_name(self.op()).to_lowercase(),
238                REGISTER_ABI_NAMES[self.rs2()],
239                self.immediate_s(),
240                REGISTER_ABI_NAMES[self.rs1()]
241            ),
242        }
243    }
244}
245
246#[derive(Debug, Clone, Copy, PartialEq)]
247pub struct Utype(pub Instruction);
248
249impl Utype {
250    pub fn new(op: InstructionOpcode, rd: RegisterIndex, immediate_u: UImmediate) -> Self {
251        Utype(
252            (u64::from(op) >> 8 << 16)
253                | u64::from(op as u8)
254                | (u64::from(rd as u8) << 8)
255                | (u64::from(immediate_u) << 32),
256        )
257    }
258
259    pub fn new_s(op: InstructionOpcode, rd: RegisterIndex, immediate_s: SImmediate) -> Self {
260        Self::new(op, rd, immediate_s as UImmediate)
261    }
262
263    pub fn op(self) -> InstructionOpcode {
264        ((self.0 >> 16 << 8) | (self.0 & 0xFF)) as InstructionOpcode
265    }
266
267    pub fn rd(self) -> RegisterIndex {
268        (self.0 >> 8) as u8 as RegisterIndex
269    }
270
271    pub fn immediate_u(self) -> UImmediate {
272        self.immediate_s() as UImmediate
273    }
274
275    pub fn immediate_s(self) -> SImmediate {
276        ((self.0 as i64) >> 32) as SImmediate
277    }
278}
279
280impl fmt::Display for Utype {
281    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
282        write!(
283            f,
284            "{} {},{}",
285            instruction_opcode_name(self.op()).to_lowercase(),
286            REGISTER_ABI_NAMES[self.rd()],
287            self.immediate_s()
288        )
289    }
290}
291
292#[derive(Debug, Clone, Copy, PartialEq)]
293pub struct R4type(pub Instruction);
294
295impl R4type {
296    pub fn new(
297        op: InstructionOpcode,
298        rd: RegisterIndex,
299        rs1: RegisterIndex,
300        rs2: RegisterIndex,
301        rs3: RegisterIndex,
302    ) -> Self {
303        R4type(
304            (u64::from(op) >> 8 << 16)
305                | u64::from(op as u8)
306                | (u64::from(rd as u8) << 8)
307                | (u64::from(rs1 as u8) << 32)
308                | (u64::from(rs2 as u8) << 40)
309                | (u64::from(rs3 as u8) << 48),
310        )
311    }
312
313    pub fn op(self) -> InstructionOpcode {
314        ((self.0 >> 16 << 8) | (self.0 & 0xFF)) as InstructionOpcode
315    }
316
317    pub fn rd(self) -> RegisterIndex {
318        (self.0 >> 8) as u8 as RegisterIndex
319    }
320
321    pub fn rs1(self) -> RegisterIndex {
322        (self.0 >> 32) as u8 as RegisterIndex
323    }
324
325    pub fn rs2(self) -> RegisterIndex {
326        (self.0 >> 40) as u8 as RegisterIndex
327    }
328
329    pub fn rs3(self) -> RegisterIndex {
330        (self.0 >> 48) as u8 as RegisterIndex
331    }
332}
333
334impl fmt::Display for R4type {
335    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
336        write!(
337            f,
338            "{} {},{},{},{}",
339            instruction_opcode_name(self.op()).to_lowercase(),
340            REGISTER_ABI_NAMES[self.rd()],
341            REGISTER_ABI_NAMES[self.rs1()],
342            REGISTER_ABI_NAMES[self.rs2()],
343            REGISTER_ABI_NAMES[self.rs3()]
344        )
345    }
346}
347
348#[derive(Debug, Clone, Copy, PartialEq)]
349pub struct R5type(pub Instruction);
350
351impl R5type {
352    pub fn new(
353        op: InstructionOpcode,
354        rd: RegisterIndex,
355        rs1: RegisterIndex,
356        rs2: RegisterIndex,
357        rs3: RegisterIndex,
358        rs4: RegisterIndex,
359    ) -> Self {
360        R5type(
361            ((op as u64) >> 8 << 16)
362                | (op as u8 as u64)
363                | ((rd as u8 as u64) << 8)
364                | ((rs1 as u8 as u64) << 32)
365                | ((rs2 as u8 as u64) << 40)
366                | ((rs3 as u8 as u64) << 48)
367                | ((rs4 as u8 as u64) << 56),
368        )
369    }
370
371    pub fn op(self) -> InstructionOpcode {
372        ((self.0 >> 16 << 8) | (self.0 & 0xFF)) as InstructionOpcode
373    }
374
375    pub fn rd(self) -> RegisterIndex {
376        (self.0 >> 8) as u8 as RegisterIndex
377    }
378
379    pub fn rs1(self) -> RegisterIndex {
380        (self.0 >> 32) as u8 as RegisterIndex
381    }
382
383    pub fn rs2(self) -> RegisterIndex {
384        (self.0 >> 40) as u8 as RegisterIndex
385    }
386
387    pub fn rs3(self) -> RegisterIndex {
388        (self.0 >> 48) as u8 as RegisterIndex
389    }
390
391    pub fn rs4(self) -> RegisterIndex {
392        (self.0 >> 56) as u8 as RegisterIndex
393    }
394}
395
396impl fmt::Display for R5type {
397    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
398        write!(
399            f,
400            "{} {},{},{},{},{}",
401            instruction_opcode_name(self.op()).to_lowercase(),
402            REGISTER_ABI_NAMES[self.rd()],
403            REGISTER_ABI_NAMES[self.rs1()],
404            REGISTER_ABI_NAMES[self.rs2()],
405            REGISTER_ABI_NAMES[self.rs3()],
406            REGISTER_ABI_NAMES[self.rs4()]
407        )
408    }
409}
410
411pub fn is_slowpath_instruction(i: Instruction) -> bool {
412    (i as u8 as u16) < MINIMAL_OPCODE
413}
414
415pub fn is_basic_block_end_instruction(i: Instruction) -> bool {
416    let opcode = extract_opcode(i);
417    (MINIMAL_BASIC_BLOCK_END_OPCODE..=MAXIMUM_BASIC_BLOCK_END_OPCODE).contains(&opcode)
418        || is_slowpath_instruction(i)
419}
420
421#[inline(always)]
422pub fn set_instruction_length_2(i: u64) -> u64 {
423    i | 0x1000000
424}
425
426#[inline(always)]
427pub fn set_instruction_length_4(i: u64) -> u64 {
428    i | 0x2000000
429}
430
431#[inline(always)]
432pub fn set_instruction_length_n(i: u64, n: u8) -> u64 {
433    debug_assert!(n % 2 == 0);
434    debug_assert!(n <= 30);
435    i | ((n as u64 & 0x1f) >> 1 << 24)
436}
437
438#[inline(always)]
439pub fn instruction_length(i: Instruction) -> u8 {
440    (((i >> 24) & 0x0f) << 1) as u8
441}
442
443#[cfg(test)]
444mod tests {
445    use super::i::factory;
446    use super::*;
447    use ckb_vm_definitions::{for_each_inst1, instructions::MAXIMUM_OPCODE};
448    use std::cmp::{max, min};
449    use std::mem::size_of;
450
451    #[test]
452    fn test_instruction_op_should_fit_in_byte() {
453        assert_eq!(2, size_of::<InstructionOpcode>());
454    }
455
456    #[test]
457    fn test_stype_display() {
458        // This is "sd	a5,568(sp)"
459        let sd_inst = 0x22f13c23;
460        let decoded = factory::<u64>(sd_inst, u32::MAX).expect("decoding");
461        let stype = Stype(decoded);
462
463        assert_eq!("sd a5,568(sp)", format!("{}", stype));
464
465        // This is "beq	a0,a5,1012e"
466        let sd_inst = 0xf4f500e3;
467        let decoded = factory::<u64>(sd_inst, u32::MAX).expect("decoding");
468        let stype = Stype(decoded);
469
470        assert_eq!("beq a0,a5,-192", format!("{}", stype));
471    }
472
473    macro_rules! update_min_opcode {
474        ($name:ident, $real_name:ident, $code:expr, $x:ident) => {
475            $x = min($code, $x);
476        };
477    }
478
479    #[test]
480    fn test_minimal_opcode_is_minimal() {
481        let mut o = MINIMAL_OPCODE;
482        for_each_inst1!(update_min_opcode, o);
483        assert_eq!(MINIMAL_OPCODE, o);
484    }
485
486    macro_rules! update_max_opcode {
487        ($name:ident, $real_name:ident, $code:expr, $x:ident) => {
488            $x = max($code, $x);
489        };
490    }
491
492    #[test]
493    fn test_maximal_opcode_is_maximal() {
494        let mut o = MAXIMUM_OPCODE;
495        for_each_inst1!(update_max_opcode, o);
496        assert_eq!(MAXIMUM_OPCODE, o);
497    }
498
499    #[test]
500    fn test_basic_block_end_opcode_is_in_range() {
501        for o in MINIMAL_OPCODE..=MAXIMUM_OPCODE {
502            if is_basic_block_end_instruction(blank_instruction(o)) {
503                assert!(
504                    o >= MINIMAL_BASIC_BLOCK_END_OPCODE,
505                    "Opcode {} ({}) is smaller than minimal basic block end opcode!",
506                    o,
507                    instruction_opcode_name(o)
508                );
509                assert!(
510                    o <= MAXIMUM_BASIC_BLOCK_END_OPCODE,
511                    "Opcode {} ({}) is bigger than maximum basic block end opcode!",
512                    o,
513                    instruction_opcode_name(o)
514                );
515            }
516        }
517    }
518
519    macro_rules! test_opcode_with_last {
520        ($name:ident, $real_name:ident, $code:expr, $last:ident) => {
521            assert_eq!(
522                $last + 1,
523                $code,
524                "Opcode {} ({}) does not follow last opcode!",
525                stringify!($real_name),
526                $code
527            );
528            $last = $code;
529        };
530    }
531
532    #[test]
533    fn test_opcodes_are_defined_seqentially() {
534        let mut last = MINIMAL_OPCODE - 1;
535        for_each_inst1!(test_opcode_with_last, last);
536        assert_eq!(last, MAXIMUM_OPCODE);
537    }
538}