solana_rbpf/
assembler.rs

1#![allow(clippy::arithmetic_side_effects)]
2// Copyright 2017 Rich Lane <lanerl@gmail.com>
3//
4// Licensed under the Apache License, Version 2.0 <http://www.apache.org/licenses/LICENSE-2.0> or
5// the MIT license <http://opensource.org/licenses/MIT>, at your option. This file may not be
6// copied, modified, or distributed except according to those terms.
7
8//! This module translates eBPF assembly language to binary.
9
10use self::InstructionType::{
11    AluBinary, AluUnary, CallImm, CallReg, Endian, JumpConditional, JumpUnconditional, LoadAbs,
12    LoadDwImm, LoadInd, LoadReg, NoOperand, StoreImm, StoreReg, Syscall,
13};
14use crate::{
15    asm_parser::{
16        parse,
17        Operand::{Integer, Label, Memory, Register},
18        Statement,
19    },
20    ebpf::{self, Insn},
21    elf::Executable,
22    program::{BuiltinProgram, FunctionRegistry, SBPFVersion},
23    vm::ContextObject,
24};
25use std::collections::HashMap;
26
27#[cfg(not(feature = "shuttle-test"))]
28use std::sync::Arc;
29
30#[cfg(feature = "shuttle-test")]
31use shuttle::sync::Arc;
32
33#[derive(Clone, Copy, Debug, PartialEq)]
34enum InstructionType {
35    AluBinary,
36    AluUnary,
37    LoadDwImm,
38    LoadAbs,
39    LoadInd,
40    LoadReg,
41    StoreImm,
42    StoreReg,
43    JumpUnconditional,
44    JumpConditional,
45    Syscall,
46    CallImm,
47    CallReg,
48    Endian(i64),
49    NoOperand,
50}
51
52fn make_instruction_map() -> HashMap<String, (InstructionType, u8)> {
53    let mut result = HashMap::new();
54
55    let alu_binary_ops = [
56        ("add", ebpf::BPF_ADD),
57        ("sub", ebpf::BPF_SUB),
58        ("mul", ebpf::BPF_MUL),
59        ("div", ebpf::BPF_DIV),
60        ("or", ebpf::BPF_OR),
61        ("and", ebpf::BPF_AND),
62        ("lsh", ebpf::BPF_LSH),
63        ("rsh", ebpf::BPF_RSH),
64        ("mod", ebpf::BPF_MOD),
65        ("xor", ebpf::BPF_XOR),
66        ("mov", ebpf::BPF_MOV),
67        ("arsh", ebpf::BPF_ARSH),
68        ("hor", ebpf::BPF_HOR),
69    ];
70
71    let mem_sizes = [
72        ("w", ebpf::BPF_W),
73        ("h", ebpf::BPF_H),
74        ("b", ebpf::BPF_B),
75        ("dw", ebpf::BPF_DW),
76    ];
77
78    let jump_conditions = [
79        ("jeq", ebpf::BPF_JEQ),
80        ("jgt", ebpf::BPF_JGT),
81        ("jge", ebpf::BPF_JGE),
82        ("jlt", ebpf::BPF_JLT),
83        ("jle", ebpf::BPF_JLE),
84        ("jset", ebpf::BPF_JSET),
85        ("jne", ebpf::BPF_JNE),
86        ("jsgt", ebpf::BPF_JSGT),
87        ("jsge", ebpf::BPF_JSGE),
88        ("jslt", ebpf::BPF_JSLT),
89        ("jsle", ebpf::BPF_JSLE),
90    ];
91
92    {
93        let mut entry = |name: &str, inst_type: InstructionType, opc: u8| {
94            result.insert(name.to_string(), (inst_type, opc))
95        };
96
97        // Miscellaneous.
98        entry("exit", NoOperand, ebpf::EXIT);
99        entry("ja", JumpUnconditional, ebpf::JA);
100        entry("syscall", Syscall, ebpf::CALL_IMM);
101        entry("call", CallImm, ebpf::CALL_IMM);
102        entry("callx", CallReg, ebpf::CALL_REG);
103        entry("lddw", LoadDwImm, ebpf::LD_DW_IMM);
104
105        // AluUnary.
106        entry("neg", AluUnary, ebpf::NEG64);
107        entry("neg32", AluUnary, ebpf::NEG32);
108        entry("neg64", AluUnary, ebpf::NEG64);
109
110        // AluBinary.
111        for &(name, opc) in &alu_binary_ops {
112            entry(name, AluBinary, ebpf::BPF_ALU64 | opc);
113            entry(&format!("{name}32"), AluBinary, ebpf::BPF_ALU | opc);
114            entry(&format!("{name}64"), AluBinary, ebpf::BPF_ALU64 | opc);
115        }
116
117        // Product Quotient Remainder.
118        entry(
119            "lmul",
120            AluBinary,
121            ebpf::BPF_PQR | ebpf::BPF_B | ebpf::BPF_LMUL,
122        );
123        entry(
124            "lmul64",
125            AluBinary,
126            ebpf::BPF_PQR | ebpf::BPF_B | ebpf::BPF_LMUL,
127        );
128        entry("lmul32", AluBinary, ebpf::BPF_PQR | ebpf::BPF_LMUL);
129        entry(
130            "uhmul",
131            AluBinary,
132            ebpf::BPF_PQR | ebpf::BPF_B | ebpf::BPF_UHMUL,
133        );
134        entry(
135            "uhmul64",
136            AluBinary,
137            ebpf::BPF_PQR | ebpf::BPF_B | ebpf::BPF_UHMUL,
138        );
139        entry("uhmul32", AluBinary, ebpf::BPF_PQR | ebpf::BPF_UHMUL);
140        entry(
141            "shmul",
142            AluBinary,
143            ebpf::BPF_PQR | ebpf::BPF_B | ebpf::BPF_SHMUL,
144        );
145        entry(
146            "shmul64",
147            AluBinary,
148            ebpf::BPF_PQR | ebpf::BPF_B | ebpf::BPF_SHMUL,
149        );
150        entry("shmul32", AluBinary, ebpf::BPF_PQR | ebpf::BPF_SHMUL);
151        entry(
152            "udiv",
153            AluBinary,
154            ebpf::BPF_PQR | ebpf::BPF_B | ebpf::BPF_UDIV,
155        );
156        entry(
157            "udiv64",
158            AluBinary,
159            ebpf::BPF_PQR | ebpf::BPF_B | ebpf::BPF_UDIV,
160        );
161        entry("udiv32", AluBinary, ebpf::BPF_PQR | ebpf::BPF_UDIV);
162        entry(
163            "urem",
164            AluBinary,
165            ebpf::BPF_PQR | ebpf::BPF_B | ebpf::BPF_UREM,
166        );
167        entry(
168            "urem64",
169            AluBinary,
170            ebpf::BPF_PQR | ebpf::BPF_B | ebpf::BPF_UREM,
171        );
172        entry("urem32", AluBinary, ebpf::BPF_PQR | ebpf::BPF_UREM);
173        entry(
174            "sdiv",
175            AluBinary,
176            ebpf::BPF_PQR | ebpf::BPF_B | ebpf::BPF_SDIV,
177        );
178        entry(
179            "sdiv64",
180            AluBinary,
181            ebpf::BPF_PQR | ebpf::BPF_B | ebpf::BPF_SDIV,
182        );
183        entry("sdiv32", AluBinary, ebpf::BPF_PQR | ebpf::BPF_SDIV);
184        entry(
185            "srem",
186            AluBinary,
187            ebpf::BPF_PQR | ebpf::BPF_B | ebpf::BPF_SREM,
188        );
189        entry(
190            "srem64",
191            AluBinary,
192            ebpf::BPF_PQR | ebpf::BPF_B | ebpf::BPF_SREM,
193        );
194        entry("srem32", AluBinary, ebpf::BPF_PQR | ebpf::BPF_SREM);
195
196        // LoadAbs, LoadInd, LoadReg, StoreImm, and StoreReg.
197        for &(suffix, size) in &mem_sizes {
198            entry(
199                &format!("ldabs{suffix}"),
200                LoadAbs,
201                ebpf::BPF_ABS | ebpf::BPF_LD | size,
202            );
203            entry(
204                &format!("ldind{suffix}"),
205                LoadInd,
206                ebpf::BPF_IND | ebpf::BPF_LD | size,
207            );
208            entry(
209                &format!("ldx{suffix}"),
210                LoadReg,
211                ebpf::BPF_MEM | ebpf::BPF_LDX | size,
212            );
213            entry(
214                &format!("st{suffix}"),
215                StoreImm,
216                ebpf::BPF_MEM | ebpf::BPF_ST | size,
217            );
218            entry(
219                &format!("stx{suffix}"),
220                StoreReg,
221                ebpf::BPF_MEM | ebpf::BPF_STX | size,
222            );
223        }
224
225        // JumpConditional.
226        for &(name, condition) in &jump_conditions {
227            entry(name, JumpConditional, ebpf::BPF_JMP | condition);
228        }
229
230        // Endian.
231        for &size in &[16, 32, 64] {
232            entry(&format!("be{size}"), Endian(size), ebpf::BE);
233            entry(&format!("le{size}"), Endian(size), ebpf::LE);
234        }
235    }
236
237    result
238}
239
240fn insn(opc: u8, dst: i64, src: i64, off: i64, imm: i64) -> Result<Insn, String> {
241    if !(0..16).contains(&dst) {
242        return Err(format!("Invalid destination register {dst}"));
243    }
244    if !(0..16).contains(&src) {
245        return Err(format!("Invalid source register {src}"));
246    }
247    if off < i16::MIN as i64 || off > i16::MAX as i64 {
248        return Err(format!("Invalid offset {off}"));
249    }
250    if imm < i32::MIN as i64 || imm > i32::MAX as i64 {
251        return Err(format!("Invalid immediate {imm}"));
252    }
253    Ok(Insn {
254        ptr: 0,
255        opc,
256        dst: dst as u8,
257        src: src as u8,
258        off: off as i16,
259        imm,
260    })
261}
262
263fn resolve_label(
264    insn_ptr: usize,
265    labels: &HashMap<&str, usize>,
266    label: &str,
267) -> Result<i64, String> {
268    labels
269        .get(label)
270        .map(|target_pc| *target_pc as i64 - insn_ptr as i64 - 1)
271        .ok_or_else(|| format!("Label not found {label}"))
272}
273
274/// Parse assembly source and translate to binary.
275///
276/// # Examples
277///
278/// ```
279/// use solana_rbpf::{assembler::assemble, program::BuiltinProgram, vm::{Config, TestContextObject}};
280/// let executable = assemble::<TestContextObject>(
281///    "add64 r1, 0x605
282///     mov64 r2, 0x32
283///     mov64 r1, r0
284///     be16 r0
285///     neg64 r2
286///     exit",
287///     std::sync::Arc::new(BuiltinProgram::new_mock()),
288/// ).unwrap();
289/// let program = executable.get_text_bytes().1;
290/// println!("{:?}", program);
291/// # assert_eq!(program,
292/// #            &[0x07, 0x01, 0x00, 0x00, 0x05, 0x06, 0x00, 0x00,
293/// #              0xb7, 0x02, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00,
294/// #              0xbf, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
295/// #              0xdc, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
296/// #              0x87, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
297/// #              0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]);
298/// ```
299///
300/// This will produce the following output:
301///
302/// ```test
303/// [0x07, 0x01, 0x00, 0x00, 0x05, 0x06, 0x00, 0x00,
304///  0xb7, 0x02, 0x00, 0x00, 0x32, 0x00, 0x00, 0x00,
305///  0xbf, 0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
306///  0xdc, 0x00, 0x00, 0x00, 0x10, 0x00, 0x00, 0x00,
307///  0x87, 0x02, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
308///  0x95, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]
309/// ```
310pub fn assemble<C: ContextObject>(
311    src: &str,
312    loader: Arc<BuiltinProgram<C>>,
313) -> Result<Executable<C>, String> {
314    let sbpf_version = if loader.get_config().enable_sbpf_v2 {
315        SBPFVersion::V2
316    } else {
317        SBPFVersion::V1
318    };
319
320    let statements = parse(src)?;
321    let instruction_map = make_instruction_map();
322    let mut insn_ptr = 0;
323    let mut function_registry = FunctionRegistry::default();
324    let mut labels = HashMap::new();
325    labels.insert("entrypoint", 0);
326    for statement in statements.iter() {
327        match statement {
328            Statement::Label { name } => {
329                if name.starts_with("function_") || name == "entrypoint" {
330                    function_registry
331                        .register_function(insn_ptr as u32, name.as_bytes(), insn_ptr)
332                        .map_err(|_| format!("Label hash collision {name}"))?;
333                }
334                labels.insert(name.as_str(), insn_ptr);
335            }
336            Statement::Directive { name, operands } =>
337            {
338                #[allow(clippy::single_match)]
339                match (name.as_str(), operands.as_slice()) {
340                    ("fill", [Integer(repeat), Integer(_value)]) => {
341                        insn_ptr += *repeat as usize;
342                    }
343                    _ => {}
344                }
345            }
346            Statement::Instruction { name, .. } => {
347                insn_ptr += if name == "lddw" { 2 } else { 1 };
348            }
349        }
350    }
351    insn_ptr = 0;
352    let mut instructions: Vec<Insn> = Vec::new();
353    for statement in statements.iter() {
354        match statement {
355            Statement::Label { .. } => {}
356            Statement::Directive { name, operands } =>
357            {
358                #[allow(clippy::single_match)]
359                match (name.as_str(), operands.as_slice()) {
360                    ("fill", [Integer(repeat), Integer(value)]) => {
361                        for _ in 0..*repeat {
362                            instructions.push(Insn {
363                                ptr: insn_ptr,
364                                opc: *value as u8,
365                                dst: (*value >> 8) as u8 & 0xF,
366                                src: (*value >> 12) as u8 & 0xF,
367                                off: (*value >> 16) as u16 as i16,
368                                imm: (*value >> 32) as u32 as i64,
369                            });
370                            insn_ptr += 1;
371                        }
372                    }
373                    _ => return Err(format!("Invalid directive {name:?}")),
374                }
375            }
376            Statement::Instruction { name, operands } => {
377                let name = name.as_str();
378                match instruction_map.get(name) {
379                    Some(&(inst_type, opc)) => {
380                        let mut insn = match (inst_type, operands.as_slice()) {
381                            (AluBinary, [Register(dst), Register(src)]) => {
382                                insn(opc | ebpf::BPF_X, *dst, *src, 0, 0)
383                            }
384                            (AluBinary, [Register(dst), Integer(imm)]) => {
385                                insn(opc | ebpf::BPF_K, *dst, 0, 0, *imm)
386                            }
387                            (AluUnary, [Register(dst)]) => insn(opc, *dst, 0, 0, 0),
388                            (LoadAbs, [Integer(imm)]) => insn(opc, 0, 0, 0, *imm),
389                            (LoadInd, [Register(src), Integer(imm)]) => insn(opc, 0, *src, 0, *imm),
390                            (LoadReg, [Register(dst), Memory(src, off)])
391                            | (StoreReg, [Memory(dst, off), Register(src)]) => {
392                                insn(opc, *dst, *src, *off, 0)
393                            }
394                            (StoreImm, [Memory(dst, off), Integer(imm)]) => {
395                                insn(opc, *dst, 0, *off, *imm)
396                            }
397                            (NoOperand, []) => insn(opc, 0, 0, 0, 0),
398                            (JumpUnconditional, [Integer(off)]) => insn(opc, 0, 0, *off, 0),
399                            (JumpConditional, [Register(dst), Register(src), Integer(off)]) => {
400                                insn(opc | ebpf::BPF_X, *dst, *src, *off, 0)
401                            }
402                            (JumpConditional, [Register(dst), Integer(imm), Integer(off)]) => {
403                                insn(opc | ebpf::BPF_K, *dst, 0, *off, *imm)
404                            }
405                            (JumpUnconditional, [Label(label)]) => {
406                                insn(opc, 0, 0, resolve_label(insn_ptr, &labels, label)?, 0)
407                            }
408                            (CallImm, [Integer(imm)]) => {
409                                let target_pc = *imm + insn_ptr as i64 + 1;
410                                let label = format!("function_{}", target_pc as usize);
411                                function_registry
412                                    .register_function(
413                                        target_pc as u32,
414                                        label.as_bytes(),
415                                        target_pc as usize,
416                                    )
417                                    .map_err(|_| format!("Label hash collision {name}"))?;
418                                insn(opc, 0, 1, 0, target_pc)
419                            }
420                            (CallReg, [Register(dst)]) => {
421                                if sbpf_version.callx_uses_src_reg() {
422                                    insn(opc, 0, *dst, 0, 0)
423                                } else {
424                                    insn(opc, 0, 0, 0, *dst)
425                                }
426                            }
427                            (JumpConditional, [Register(dst), Register(src), Label(label)]) => {
428                                insn(
429                                    opc | ebpf::BPF_X,
430                                    *dst,
431                                    *src,
432                                    resolve_label(insn_ptr, &labels, label)?,
433                                    0,
434                                )
435                            }
436                            (JumpConditional, [Register(dst), Integer(imm), Label(label)]) => insn(
437                                opc | ebpf::BPF_K,
438                                *dst,
439                                0,
440                                resolve_label(insn_ptr, &labels, label)?,
441                                *imm,
442                            ),
443                            (Syscall, [Label(label)]) => insn(
444                                opc,
445                                0,
446                                0,
447                                0,
448                                ebpf::hash_symbol_name(label.as_bytes()) as i32 as i64,
449                            ),
450                            (CallImm, [Label(label)]) => {
451                                let label: &str = label;
452                                let target_pc = *labels
453                                    .get(label)
454                                    .ok_or_else(|| format!("Label not found {label}"))?;
455                                insn(opc, 0, 1, 0, target_pc as i64)
456                            }
457                            (Endian(size), [Register(dst)]) => insn(opc, *dst, 0, 0, size),
458                            (LoadDwImm, [Register(dst), Integer(imm)]) => {
459                                insn(opc, *dst, 0, 0, (*imm << 32) >> 32)
460                            }
461                            _ => Err(format!("Unexpected operands: {operands:?}")),
462                        }?;
463                        insn.ptr = insn_ptr;
464                        instructions.push(insn);
465                        insn_ptr += 1;
466                        if let LoadDwImm = inst_type {
467                            if let Integer(imm) = operands[1] {
468                                instructions.push(Insn {
469                                    ptr: insn_ptr,
470                                    imm: imm >> 32,
471                                    ..Insn::default()
472                                });
473                                insn_ptr += 1;
474                            }
475                        }
476                    }
477                    None => return Err(format!("Invalid instruction {name:?}")),
478                }
479            }
480        }
481    }
482    let program = instructions
483        .iter()
484        .flat_map(|insn| insn.to_vec())
485        .collect::<Vec<_>>();
486    Executable::<C>::from_text_bytes(&program, loader, sbpf_version, function_registry)
487        .map_err(|err| format!("Executable constructor {err:?}"))
488}