Skip to main content

solana_sbpf/
jit.rs

1//! Just-in-time compiler (Linux x86, macOS x86)
2
3// Derived from uBPF <https://github.com/iovisor/ubpf>
4// Copyright 2015 Big Switch Networks, Inc
5//      (uBPF: JIT algorithm, originally in C)
6// Copyright 2016 6WIND S.A. <quentin.monnet@6wind.com>
7//      (Translation to Rust, MetaBuff addition)
8// Copyright 2020 Solana Maintainers <maintainers@solana.com>
9//
10// Licensed under the Apache License, Version 2.0 <http://www.apache.org/licenses/LICENSE-2.0> or
11// the MIT license <http://opensource.org/licenses/MIT>, at your option. This file may not be
12// copied, modified, or distributed except according to those terms.
13
14#![allow(clippy::arithmetic_side_effects)]
15
16#[cfg(not(feature = "shuttle-test"))]
17use rand::{thread_rng, Rng};
18#[cfg(feature = "shuttle-test")]
19use shuttle::rand::{thread_rng, Rng};
20
21use rand::{
22    distributions::{Distribution, Uniform},
23    rngs::SmallRng,
24    SeedableRng,
25};
26use std::{fmt::Debug, mem, ptr};
27
28use crate::{
29    ebpf::{self, FIRST_SCRATCH_REG, FRAME_PTR_REG, INSN_SIZE, SCRATCH_REGS},
30    elf::Executable,
31    error::{EbpfError, ProgramResult},
32    memory_management::{
33        allocate_pages, free_pages, get_system_page_size, protect_pages, round_to_page_size,
34    },
35    memory_region::MemoryMapping,
36    program::BuiltinFunction,
37    vm::{get_runtime_environment_key, Config, ContextObject, EbpfVm, RuntimeEnvironmentSlot},
38    x86::{
39        FenceType, X86IndirectAccess, X86Instruction,
40        X86Register::{self, *},
41        ARGUMENT_REGISTERS, CALLEE_SAVED_REGISTERS, CALLER_SAVED_REGISTERS,
42    },
43};
44
45/// The maximum machine code length in bytes of a program with no guest instructions
46pub const MAX_EMPTY_PROGRAM_MACHINE_CODE_LENGTH: usize = 4096;
47/// The maximum machine code length in bytes of a single guest instruction
48pub const MAX_MACHINE_CODE_LENGTH_PER_INSTRUCTION: usize = 110;
49/// The maximum machine code length in bytes of an instruction meter checkpoint
50pub const MACHINE_CODE_PER_INSTRUCTION_METER_CHECKPOINT: usize = 24;
51/// The maximum machine code length of the randomized padding
52pub const MAX_START_PADDING_LENGTH: usize = 256;
53
54/// The program compiled to native host machinecode
55pub struct JitProgram {
56    /// OS page size in bytes and the alignment of the sections
57    page_size: usize,
58    /// Byte offset in the text_section for each BPF instruction
59    pc_section: &'static mut [u32],
60    /// The x86 machinecode
61    text_section: &'static mut [u8],
62}
63
64impl JitProgram {
65    fn new(pc: usize, code_size: usize) -> Result<Self, EbpfError> {
66        let page_size = get_system_page_size();
67        let pc_loc_table_size = round_to_page_size(pc * std::mem::size_of::<u32>(), page_size);
68        let over_allocated_code_size = round_to_page_size(code_size, page_size);
69        unsafe {
70            let raw = allocate_pages(pc_loc_table_size + over_allocated_code_size)?;
71            Ok(Self {
72                page_size,
73                pc_section: std::slice::from_raw_parts_mut(raw.cast::<u32>(), pc),
74                text_section: std::slice::from_raw_parts_mut(
75                    raw.add(pc_loc_table_size),
76                    over_allocated_code_size,
77                ),
78            })
79        }
80    }
81
82    fn seal(&mut self, text_section_usage: usize) -> Result<(), EbpfError> {
83        if self.page_size == 0 {
84            return Ok(());
85        }
86        let raw = self.pc_section.as_ptr() as *mut u8;
87        let pc_loc_table_size =
88            round_to_page_size(std::mem::size_of_val(self.pc_section), self.page_size);
89        let over_allocated_code_size = round_to_page_size(self.text_section.len(), self.page_size);
90        let code_size = round_to_page_size(text_section_usage, self.page_size);
91        unsafe {
92            // Fill with debugger traps
93            std::ptr::write_bytes(
94                raw.add(pc_loc_table_size).add(text_section_usage),
95                0xcc,
96                code_size - text_section_usage,
97            );
98            if over_allocated_code_size > code_size {
99                free_pages(
100                    raw.add(pc_loc_table_size).add(code_size),
101                    over_allocated_code_size - code_size,
102                )?;
103            }
104            self.text_section =
105                std::slice::from_raw_parts_mut(raw.add(pc_loc_table_size), text_section_usage);
106            protect_pages(
107                self.pc_section.as_mut_ptr().cast::<u8>(),
108                pc_loc_table_size,
109                false,
110            )?;
111            protect_pages(self.text_section.as_mut_ptr(), code_size, true)?;
112        }
113        Ok(())
114    }
115
116    pub(crate) fn invoke<C: ContextObject>(
117        &self,
118        _config: &Config,
119        vm: &mut EbpfVm<C>,
120        registers: [u64; 12],
121    ) {
122        unsafe {
123            let runtime_environment = std::ptr::addr_of_mut!(*vm)
124                .cast::<u64>()
125                .offset(get_runtime_environment_key() as isize);
126            let instruction_meter =
127                (vm.previous_instruction_meter as i64).wrapping_add(registers[11] as i64);
128            let entrypoint = &self.text_section
129                [self.pc_section[registers[11] as usize] as usize & (i32::MAX as u32 as usize)]
130                as *const u8;
131            macro_rules! stmt_expr_attribute_asm {
132                ($($prologue:literal,)+ cfg(not(feature = $feature:literal)), $guarded:tt, $($epilogue:tt)+) => {
133                    #[cfg(feature = $feature)]
134                    std::arch::asm!($($prologue,)+ $($epilogue)+);
135                    #[cfg(not(feature = $feature))]
136                    std::arch::asm!($($prologue,)+ $guarded, $($epilogue)+);
137                }
138            }
139            stmt_expr_attribute_asm!(
140                // RBP and RBX must be saved and restored manually in the current version of rustc and llvm.
141                "push rbx",
142                "push rbp",
143                "mov [{host_stack_pointer}], rsp",
144                "add QWORD PTR [{host_stack_pointer}], -8",
145                // RBP is zeroed out in order not to compromise the runtime environment (RDI) encryption.
146                cfg(not(feature = "jit-enable-host-stack-frames")),
147                "xor rbp, rbp",
148                "mov [rsp-8], rax",
149                "mov rax, [r11 + 0x00]",
150                "mov rsi, [r11 + 0x08]",
151                "mov rdx, [r11 + 0x10]",
152                "mov rcx, [r11 + 0x18]",
153                "mov r8,  [r11 + 0x20]",
154                "mov r9,  [r11 + 0x28]",
155                "mov rbx, [r11 + 0x30]",
156                "mov r12, [r11 + 0x38]",
157                "mov r13, [r11 + 0x40]",
158                "mov r14, [r11 + 0x48]",
159                "mov r15, [r11 + 0x50]",
160                "mov r11, [r11 + 0x58]",
161                "call [rsp-8]",
162                "pop rbp",
163                "pop rbx",
164                host_stack_pointer = in(reg) &mut vm.host_stack_pointer,
165                inlateout("rdi") runtime_environment => _,
166                inlateout("r10") instruction_meter => _,
167                inlateout("rax") entrypoint => _,
168                inlateout("r11") &registers => _,
169                lateout("rsi") _, lateout("rdx") _, lateout("rcx") _, lateout("r8") _,
170                lateout("r9") _, lateout("r12") _, lateout("r13") _, lateout("r14") _, lateout("r15") _,
171                // lateout("rbp") _, lateout("rbx") _,
172            );
173        }
174    }
175
176    /// The length of the host machinecode in bytes
177    pub fn machine_code_length(&self) -> usize {
178        self.text_section.len()
179    }
180
181    /// The total memory used in bytes rounded up to page boundaries
182    pub fn mem_size(&self) -> usize {
183        let pc_loc_table_size =
184            round_to_page_size(std::mem::size_of_val(self.pc_section), self.page_size);
185        let code_size = round_to_page_size(self.text_section.len(), self.page_size);
186        pc_loc_table_size + code_size
187    }
188}
189
190impl Drop for JitProgram {
191    fn drop(&mut self) {
192        let pc_loc_table_size =
193            round_to_page_size(std::mem::size_of_val(self.pc_section), self.page_size);
194        let code_size = round_to_page_size(self.text_section.len(), self.page_size);
195        if pc_loc_table_size + code_size > 0 {
196            unsafe {
197                let _ = free_pages(
198                    self.pc_section.as_ptr() as *mut u8,
199                    pc_loc_table_size + code_size,
200                );
201            }
202        }
203    }
204}
205
206impl Debug for JitProgram {
207    fn fmt(&self, fmt: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
208        fmt.write_fmt(format_args!("JitProgram {:?}", self as *const _))
209    }
210}
211
212impl PartialEq for JitProgram {
213    fn eq(&self, other: &Self) -> bool {
214        std::ptr::eq(self as *const _, other as *const _)
215    }
216}
217
218// Used to define subroutines and then call them
219// See JitCompiler::set_anchor() and JitCompiler::relative_to_anchor()
220const ANCHOR_TRACE: usize = 0;
221const ANCHOR_THROW_EXCEEDED_MAX_INSTRUCTIONS: usize = 1;
222const ANCHOR_EPILOGUE: usize = 2;
223const ANCHOR_THROW_EXCEPTION_UNCHECKED: usize = 3;
224const ANCHOR_EXIT: usize = 4;
225const ANCHOR_THROW_EXCEPTION: usize = 5;
226const ANCHOR_CALL_DEPTH_EXCEEDED: usize = 6;
227const ANCHOR_CALL_REG_OUTSIDE_TEXT_SEGMENT: usize = 7;
228const ANCHOR_DIV_BY_ZERO: usize = 8;
229const ANCHOR_DIV_OVERFLOW: usize = 9;
230const ANCHOR_CALL_REG_UNSUPPORTED_INSTRUCTION: usize = 10;
231const ANCHOR_CALL_UNSUPPORTED_INSTRUCTION: usize = 11;
232const ANCHOR_EXTERNAL_FUNCTION_CALL: usize = 12;
233const ANCHOR_INTERNAL_FUNCTION_CALL_PROLOGUE: usize = 13;
234const ANCHOR_INTERNAL_FUNCTION_CALL_REG: usize = 14;
235const ANCHOR_TRANSLATE_MEMORY_ADDRESS: usize = 21;
236const ANCHOR_COUNT: usize = 34; // Update me when adding or removing anchors
237
238const REGISTER_MAP: [X86Register; 11] = [
239    CALLER_SAVED_REGISTERS[0], // RAX
240    ARGUMENT_REGISTERS[1],     // RSI
241    ARGUMENT_REGISTERS[2],     // RDX
242    ARGUMENT_REGISTERS[3],     // RCX
243    ARGUMENT_REGISTERS[4],     // R8
244    ARGUMENT_REGISTERS[5],     // R9
245    CALLEE_SAVED_REGISTERS[1], // RBX
246    CALLEE_SAVED_REGISTERS[2], // R12
247    CALLEE_SAVED_REGISTERS[3], // R13
248    CALLEE_SAVED_REGISTERS[4], // R14
249    CALLEE_SAVED_REGISTERS[5], // R15
250];
251
252/// RDI: Used together with slot_in_vm()
253const REGISTER_PTR_TO_VM: X86Register = ARGUMENT_REGISTERS[0];
254/// R10: Program counter limit
255const REGISTER_INSTRUCTION_METER: X86Register = CALLER_SAVED_REGISTERS[7];
256/// R11: Scratch register
257const REGISTER_SCRATCH: X86Register = CALLER_SAVED_REGISTERS[8];
258
259/// Bit width of an instruction operand
260#[derive(Copy, Clone, Debug)]
261pub enum OperandSize {
262    /// Empty
263    S0 = 0,
264    /// 8 bit
265    S8 = 8,
266    /// 16 bit
267    S16 = 16,
268    /// 32 bit
269    S32 = 32,
270    /// 64 bit
271    S64 = 64,
272}
273
274enum Value {
275    Register(X86Register),
276    RegisterIndirect(X86Register, i32, bool),
277    RegisterPlusConstant32(X86Register, i32, bool),
278    RegisterPlusConstant64(X86Register, i64, bool),
279    Constant64(i64, bool),
280}
281
282struct Argument {
283    index: usize,
284    value: Value,
285}
286
287#[derive(Debug)]
288struct Jump {
289    location: *const u8,
290    target_pc: usize,
291}
292
293/* Explanation of the Instruction Meter
294
295    The instruction meter serves two purposes: First, measure how many BPF instructions are
296    executed (profiling) and second, limit this number by stopping the program with an exception
297    once a given threshold is reached (validation). One approach would be to increment and
298    validate the instruction meter before each instruction. However, this would heavily impact
299    performance. Thus, we only profile and validate the instruction meter at branches.
300
301    For this, we implicitly sum up all the instructions between two branches.
302    It is easy to know the end of such a slice of instructions, but how do we know where it
303    started? There could be multiple ways to jump onto a path which all lead to the same final
304    branch. This is, where the integral technique comes in. The program is basically a sequence
305    of instructions with the x-axis being the program counter (short "pc"). The cost function is
306    a constant function which returns one for every point on the x axis. Now, the instruction
307    meter needs to calculate the definite integral of the cost function between the start and the
308    end of the current slice of instructions. For that we need the indefinite integral of the cost
309    function. Fortunately, the derivative of the pc is the cost function (it increases by one for
310    every instruction), thus the pc is an antiderivative of the the cost function and a valid
311    indefinite integral. So, to calculate an definite integral of the cost function, we just need
312    to subtract the start pc from the end pc of the slice. This difference can then be subtracted
313    from the remaining instruction counter until it goes below zero at which point it reaches
314    the instruction meter limit. Ok, but how do we know the start of the slice at the end?
315
316    The trick is: We do not need to know. As subtraction and addition are associative operations,
317    we can reorder them, even beyond the current branch. Thus, we can simply account for the
318    amount the start will subtract at the next branch by already adding that to the remaining
319    instruction counter at the current branch. So, every branch just subtracts its current pc
320    (the end of the slice) and adds the target pc (the start of the next slice) to the remaining
321    instruction counter. This way, no branch needs to know the pc of the last branch explicitly.
322    Another way to think about this trick is as follows: The remaining instruction counter now
323    measures what the maximum pc is, that we can reach with the remaining budget after the last
324    branch.
325
326    One problem are conditional branches. There are basically two ways to handle them: Either,
327    only do the profiling if the branch is taken, which requires two jumps (one for the profiling
328    and one to get to the target pc). Or, always profile it as if the jump to the target pc was
329    taken, but then behind the conditional branch, undo the profiling (as it was not taken). We
330    use the second method and the undo profiling is the same as the normal profiling, just with
331    reversed plus and minus signs.
332
333    Another special case to keep in mind are return instructions. They would require us to know
334    the return address (target pc), but in the JIT we already converted that to be a host address.
335    Of course, one could also save the BPF return address on the stack, but an even simpler
336    solution exists: Just count as if you were jumping to an specific target pc before the exit,
337    and then after returning use the undo profiling. The trick is, that the undo profiling now
338    has the current pc which is the BPF return address. The virtual target pc we count towards
339    and undo again can be anything, so we just set it to zero.
340*/
341
342/// Temporary object which stores the compilation context
343pub struct JitCompiler<'a, C: ContextObject> {
344    result: JitProgram,
345    text_section_jumps: Vec<Jump>,
346    anchors: [*const u8; ANCHOR_COUNT],
347    offset_in_text_section: usize,
348    executable: &'a Executable<C>,
349    program: &'a [u8],
350    program_vm_addr: u64,
351    config: &'a Config,
352    pc: usize,
353    last_instruction_meter_validation_pc: usize,
354    next_noop_insertion: u32,
355    noop_range: Uniform<u32>,
356    runtime_environment_key: i32,
357    immediate_value_key: i64,
358    diversification_rng: SmallRng,
359    stopwatch_is_active: bool,
360}
361
362#[rustfmt::skip]
363impl<'a, C: ContextObject> JitCompiler<'a, C> {
364    /// Constructs a new compiler and allocates memory for the compilation output
365    pub fn new(executable: &'a Executable<C>) -> Result<Self, EbpfError> {
366        let config = executable.get_config();
367        let (program_vm_addr, program) = executable.get_text_bytes();
368
369        // Scan through program to find actual number of instructions
370        let mut pc = 0;
371        if !executable.get_sbpf_version().disable_lddw() {
372            while (pc + 1) * ebpf::INSN_SIZE <= program.len() {
373                let insn = ebpf::get_insn_unchecked(program, pc);
374                pc += match insn.opc {
375                    ebpf::LD_DW_IMM => 2,
376                    _ => 1,
377                };
378            }
379        } else {
380            pc = program.len() / ebpf::INSN_SIZE;
381        }
382
383        let mut code_length_estimate = MAX_EMPTY_PROGRAM_MACHINE_CODE_LENGTH + MAX_START_PADDING_LENGTH + MAX_MACHINE_CODE_LENGTH_PER_INSTRUCTION * pc;
384        if config.noop_instruction_rate != 0 {
385            code_length_estimate += code_length_estimate / config.noop_instruction_rate as usize;
386        }
387        if let Some(q) = pc.checked_div(config.instruction_meter_checkpoint_distance) {
388            code_length_estimate += q * MACHINE_CODE_PER_INSTRUCTION_METER_CHECKPOINT;
389        }
390        // Relative jump destinations limit the maximum output size
391        debug_assert!(code_length_estimate < (i32::MAX as usize));
392
393        let runtime_environment_key = get_runtime_environment_key();
394        let mut diversification_rng = SmallRng::from_rng(thread_rng()).map_err(|_| EbpfError::JitNotCompiled)?;
395        let immediate_value_key = diversification_rng.gen::<i64>();
396
397        Ok(Self {
398            result: JitProgram::new(pc, code_length_estimate)?,
399            text_section_jumps: vec![],
400            anchors: [std::ptr::null(); ANCHOR_COUNT],
401            offset_in_text_section: 0,
402            executable,
403            program_vm_addr,
404            program,
405            config,
406            pc: 0,
407            last_instruction_meter_validation_pc: 0,
408            next_noop_insertion: if config.noop_instruction_rate == 0 { u32::MAX } else { diversification_rng.gen_range(0..config.noop_instruction_rate * 2) },
409            noop_range: Uniform::new_inclusive(0, config.noop_instruction_rate * 2),
410            runtime_environment_key,
411            immediate_value_key,
412            diversification_rng,
413            stopwatch_is_active: false,
414        })
415    }
416
417    /// Compiles the given executable, consuming the compiler
418    pub fn compile(mut self) -> Result<JitProgram, EbpfError> {
419        // Randomized padding at the start before random intervals begin
420        if self.config.noop_instruction_rate != 0 {
421            for _ in 0..self.diversification_rng.gen_range(0..MAX_START_PADDING_LENGTH) {
422                // X86Instruction::noop().emit(self)?;
423                self.emit::<u8>(0x90);
424            }
425        }
426
427        self.emit_subroutines();
428
429        while self.pc * ebpf::INSN_SIZE < self.program.len() {
430            if self.offset_in_text_section + MAX_MACHINE_CODE_LENGTH_PER_INSTRUCTION * 2 >= self.result.text_section.len() {
431                return Err(EbpfError::ExhaustedTextSegment(self.pc));
432            }
433            let mut insn = ebpf::get_insn_unchecked(self.program, self.pc);
434            self.result.pc_section[self.pc] = self.offset_in_text_section as u32;
435
436            // Regular instruction meter checkpoints to prevent long linear runs from exceeding their budget
437            if self.last_instruction_meter_validation_pc + self.config.instruction_meter_checkpoint_distance <= self.pc {
438                self.emit_validate_instruction_count(Some(self.pc));
439            }
440
441            if self.config.enable_register_tracing {
442                self.emit_ins(X86Instruction::load_immediate(REGISTER_SCRATCH, self.pc as i64));
443                self.emit_ins(X86Instruction::call_immediate(self.relative_to_anchor(ANCHOR_TRACE, 5)));
444                self.emit_ins(X86Instruction::load_immediate(REGISTER_SCRATCH, 0));
445            }
446
447            let dst = REGISTER_MAP[insn.dst as usize];
448            let src = REGISTER_MAP[insn.src as usize];
449            let target_pc = (self.pc as isize + insn.off as isize + 1) as usize;
450
451            match insn.opc {
452                ebpf::LD_DW_IMM if !self.executable.get_sbpf_version().disable_lddw() => {
453                    self.emit_validate_and_profile_instruction_count(self.pc + 2);
454                    self.pc += 1;
455                    self.result.pc_section[self.pc] = unsafe { self.anchors[ANCHOR_CALL_UNSUPPORTED_INSTRUCTION].offset_from(self.result.text_section.as_ptr()) as u32 };
456                    ebpf::augment_lddw_unchecked(self.program, &mut insn);
457                    if self.should_sanitize_constant(insn.imm) {
458                        self.emit_sanitized_load_immediate(dst, insn.imm);
459                    } else {
460                        self.emit_ins(X86Instruction::load_immediate(dst, insn.imm));
461                    }
462                },
463
464                // BPF_LDX class
465                ebpf::LD_B_REG  if !self.executable.get_sbpf_version().move_memory_instruction_classes() => {
466                    self.emit_address_translation(Some(dst), Value::RegisterPlusConstant64(src, insn.off as i64, true), 1, None);
467                },
468                ebpf::LD_H_REG  if !self.executable.get_sbpf_version().move_memory_instruction_classes() => {
469                    self.emit_address_translation(Some(dst), Value::RegisterPlusConstant64(src, insn.off as i64, true), 2, None);
470                },
471                ebpf::LD_W_REG  if !self.executable.get_sbpf_version().move_memory_instruction_classes() => {
472                    self.emit_address_translation(Some(dst), Value::RegisterPlusConstant64(src, insn.off as i64, true), 4, None);
473                },
474                ebpf::LD_DW_REG if !self.executable.get_sbpf_version().move_memory_instruction_classes() => {
475                    self.emit_address_translation(Some(dst), Value::RegisterPlusConstant64(src, insn.off as i64, true), 8, None);
476                },
477
478                // BPF_ST class
479                ebpf::ST_B_IMM  if !self.executable.get_sbpf_version().move_memory_instruction_classes() => {
480                    self.emit_address_translation(None, Value::RegisterPlusConstant64(dst, insn.off as i64, true), 1, Some(Value::Constant64(insn.imm, true)));
481                },
482                ebpf::ST_H_IMM  if !self.executable.get_sbpf_version().move_memory_instruction_classes() => {
483                    self.emit_address_translation(None, Value::RegisterPlusConstant64(dst, insn.off as i64, true), 2, Some(Value::Constant64(insn.imm, true)));
484                },
485                ebpf::ST_W_IMM  if !self.executable.get_sbpf_version().move_memory_instruction_classes() => {
486                    self.emit_address_translation(None, Value::RegisterPlusConstant64(dst, insn.off as i64, true), 4, Some(Value::Constant64(insn.imm, true)));
487                },
488                ebpf::ST_DW_IMM if !self.executable.get_sbpf_version().move_memory_instruction_classes() => {
489                    self.emit_address_translation(None, Value::RegisterPlusConstant64(dst, insn.off as i64, true), 8, Some(Value::Constant64(insn.imm, true)));
490                },
491
492                // BPF_STX class
493                ebpf::ST_B_REG  if !self.executable.get_sbpf_version().move_memory_instruction_classes() => {
494                    self.emit_address_translation(None, Value::RegisterPlusConstant64(dst, insn.off as i64, true), 1, Some(Value::Register(src)));
495                },
496                ebpf::ST_H_REG  if !self.executable.get_sbpf_version().move_memory_instruction_classes() => {
497                    self.emit_address_translation(None, Value::RegisterPlusConstant64(dst, insn.off as i64, true), 2, Some(Value::Register(src)));
498                },
499                ebpf::ST_W_REG  if !self.executable.get_sbpf_version().move_memory_instruction_classes() => {
500                    self.emit_address_translation(None, Value::RegisterPlusConstant64(dst, insn.off as i64, true), 4, Some(Value::Register(src)));
501                },
502                ebpf::ST_DW_REG if !self.executable.get_sbpf_version().move_memory_instruction_classes() => {
503                    self.emit_address_translation(None, Value::RegisterPlusConstant64(dst, insn.off as i64, true), 8, Some(Value::Register(src)));
504                },
505
506                // BPF_ALU32_LOAD class
507                ebpf::ADD32_IMM  => {
508                    self.emit_sanitized_alu(OperandSize::S32, 0x01, 0, dst, insn.imm);
509                    if !self.executable.get_sbpf_version().explicit_sign_extension_of_results() {
510                        self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x63, dst, dst, None)); // sign extend i32 to i64
511                    }
512                },
513                ebpf::ADD32_REG  => {
514                    self.emit_ins(X86Instruction::alu(OperandSize::S32, 0x01, src, dst, None));
515                    if !self.executable.get_sbpf_version().explicit_sign_extension_of_results() {
516                        self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x63, dst, dst, None)); // sign extend i32 to i64
517                    }
518                },
519                ebpf::SUB32_IMM  => {
520                    if self.executable.get_sbpf_version().swap_sub_reg_imm_operands() {
521                        self.emit_ins(X86Instruction::alu_immediate(OperandSize::S32, 0xf7, 3, dst, 0, None));
522                        if insn.imm != 0 {
523                            self.emit_sanitized_alu(OperandSize::S32, 0x01, 0, dst, insn.imm);
524                        }
525                    } else {
526                        self.emit_sanitized_alu(OperandSize::S32, 0x29, 5, dst, insn.imm);
527                    }
528                    if !self.executable.get_sbpf_version().explicit_sign_extension_of_results() {
529                        self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x63, dst, dst, None)); // sign extend i32 to i64
530                    }
531                },
532                ebpf::SUB32_REG  => {
533                    self.emit_ins(X86Instruction::alu(OperandSize::S32, 0x29, src, dst, None));
534                    if !self.executable.get_sbpf_version().explicit_sign_extension_of_results() {
535                        self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x63, dst, dst, None)); // sign extend i32 to i64
536                    }
537                },
538                ebpf::MUL32_IMM if !self.executable.get_sbpf_version().enable_pqr() => {
539                    if self.should_sanitize_constant(insn.imm) {
540                        self.emit_sanitized_load_immediate(REGISTER_SCRATCH, insn.imm);
541                    } else {
542                        self.emit_ins(X86Instruction::load_immediate(REGISTER_SCRATCH, insn.imm));
543                    }
544                    self.emit_ins(X86Instruction::alu_escaped(OperandSize::S32, 1, 0xaf, dst, REGISTER_SCRATCH, None));
545                    if !self.executable.get_sbpf_version().explicit_sign_extension_of_results() {
546                        self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x63, dst, dst, None)); // sign extend i32 to i64
547                    }
548                },
549                ebpf::DIV32_IMM | ebpf::MOD32_IMM if !self.executable.get_sbpf_version().enable_pqr() =>
550                    self.emit_product_quotient_remainder(
551                        OperandSize::S32,
552                        (insn.opc & ebpf::BPF_ALU_OP_MASK) == ebpf::BPF_MOD,
553                        (insn.opc & ebpf::BPF_ALU_OP_MASK) != ebpf::BPF_MUL,
554                        (insn.opc & ebpf::BPF_ALU_OP_MASK) == ebpf::BPF_MUL,
555                        dst, dst, Some(insn.imm),
556                    ),
557                ebpf::LD_1B_REG  if self.executable.get_sbpf_version().move_memory_instruction_classes() => {
558                    self.emit_address_translation(Some(dst), Value::RegisterPlusConstant64(src, insn.off as i64, true), 1, None);
559                },
560                ebpf::MUL32_REG if !self.executable.get_sbpf_version().enable_pqr() => {
561                    self.emit_ins(X86Instruction::alu_escaped(OperandSize::S32, 1, 0xaf, dst, src, None));
562                    if !self.executable.get_sbpf_version().explicit_sign_extension_of_results() {
563                        self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x63, dst, dst, None)); // sign extend i32 to i64
564                    }
565                },
566                ebpf::DIV32_REG | ebpf::MOD32_REG if !self.executable.get_sbpf_version().enable_pqr() =>
567                    self.emit_product_quotient_remainder(
568                        OperandSize::S32,
569                        (insn.opc & ebpf::BPF_ALU_OP_MASK) == ebpf::BPF_MOD,
570                        (insn.opc & ebpf::BPF_ALU_OP_MASK) != ebpf::BPF_MUL,
571                        (insn.opc & ebpf::BPF_ALU_OP_MASK) == ebpf::BPF_MUL,
572                        src, dst, None,
573                    ),
574                ebpf::LD_2B_REG  if self.executable.get_sbpf_version().move_memory_instruction_classes() => {
575                    self.emit_address_translation(Some(dst), Value::RegisterPlusConstant64(src, insn.off as i64, true), 2, None);
576                },
577                ebpf::OR32_IMM   => self.emit_sanitized_alu(OperandSize::S32, 0x09, 1, dst, insn.imm),
578                ebpf::OR32_REG   => self.emit_ins(X86Instruction::alu(OperandSize::S32, 0x09, src, dst, None)),
579                ebpf::AND32_IMM  => self.emit_sanitized_alu(OperandSize::S32, 0x21, 4, dst, insn.imm),
580                ebpf::AND32_REG  => self.emit_ins(X86Instruction::alu(OperandSize::S32, 0x21, src, dst, None)),
581                ebpf::LSH32_IMM  => self.emit_shift(OperandSize::S32, 4, REGISTER_SCRATCH, dst, Some(insn.imm)),
582                ebpf::LSH32_REG  => self.emit_shift(OperandSize::S32, 4, src, dst, None),
583                ebpf::RSH32_IMM  => self.emit_shift(OperandSize::S32, 5, REGISTER_SCRATCH, dst, Some(insn.imm)),
584                ebpf::RSH32_REG  => self.emit_shift(OperandSize::S32, 5, src, dst, None),
585                ebpf::NEG32      if !self.executable.get_sbpf_version().disable_neg() => self.emit_ins(X86Instruction::alu_immediate(OperandSize::S32, 0xf7, 3, dst, 0, None)),
586                ebpf::LD_4B_REG  if self.executable.get_sbpf_version().move_memory_instruction_classes() => {
587                    self.emit_address_translation(Some(dst), Value::RegisterPlusConstant64(src, insn.off as i64, true), 4, None);
588                },
589                ebpf::LD_8B_REG  if self.executable.get_sbpf_version().move_memory_instruction_classes() => {
590                    self.emit_address_translation(Some(dst), Value::RegisterPlusConstant64(src, insn.off as i64, true), 8, None);
591                },
592                ebpf::XOR32_IMM  => self.emit_sanitized_alu(OperandSize::S32, 0x31, 6, dst, insn.imm),
593                ebpf::XOR32_REG  => self.emit_ins(X86Instruction::alu(OperandSize::S32, 0x31, src, dst, None)),
594                ebpf::MOV32_IMM  => {
595                    if self.should_sanitize_constant(insn.imm) {
596                        self.emit_sanitized_load_immediate(dst, insn.imm as u32 as u64 as i64);
597                    } else {
598                        self.emit_ins(X86Instruction::load_immediate(dst, insn.imm as u32 as u64 as i64));
599                    }
600                }
601                ebpf::MOV32_REG  => {
602                    if self.executable.get_sbpf_version().explicit_sign_extension_of_results() {
603                        self.emit_ins(X86Instruction::mov_with_sign_extension(OperandSize::S64, src, dst));
604                    } else {
605                        self.emit_ins(X86Instruction::mov(OperandSize::S32, src, dst));
606                    }
607                }
608                ebpf::ARSH32_IMM => self.emit_shift(OperandSize::S32, 7, REGISTER_SCRATCH, dst, Some(insn.imm)),
609                ebpf::ARSH32_REG => self.emit_shift(OperandSize::S32, 7, src, dst, None),
610                ebpf::LE if !self.executable.get_sbpf_version().disable_le() => {
611                    match insn.imm {
612                        16 => {
613                            self.emit_ins(X86Instruction::alu_immediate(OperandSize::S32, 0x81, 4, dst, 0xffff, None)); // Mask to 16 bit
614                        }
615                        32 => {
616                            self.emit_ins(X86Instruction::alu_immediate(OperandSize::S32, 0x81, 4, dst, -1, None)); // Mask to 32 bit
617                        }
618                        64 => {}
619                        _ => {
620                            return Err(EbpfError::InvalidInstruction);
621                        }
622                    }
623                },
624                ebpf::BE         => {
625                    match insn.imm {
626                        16 => {
627                            self.emit_ins(X86Instruction::bswap(OperandSize::S16, dst));
628                            self.emit_ins(X86Instruction::alu_immediate(OperandSize::S32, 0x81, 4, dst, 0xffff, None)); // Mask to 16 bit
629                        }
630                        32 => self.emit_ins(X86Instruction::bswap(OperandSize::S32, dst)),
631                        64 => self.emit_ins(X86Instruction::bswap(OperandSize::S64, dst)),
632                        _ => {
633                            return Err(EbpfError::InvalidInstruction);
634                        }
635                    }
636                },
637
638                // BPF_ALU64_STORE class
639                ebpf::ADD64_IMM  => self.emit_sanitized_alu(OperandSize::S64, 0x01, 0, dst, insn.imm),
640                ebpf::ADD64_REG  => self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x01, src, dst, None)),
641                ebpf::SUB64_IMM  => {
642                    if self.executable.get_sbpf_version().swap_sub_reg_imm_operands() {
643                        self.emit_ins(X86Instruction::alu_immediate(OperandSize::S64, 0xf7, 3, dst, 0, None));
644                        if insn.imm != 0 {
645                            self.emit_sanitized_alu(OperandSize::S64, 0x01, 0, dst, insn.imm);
646                        }
647                    } else {
648                        self.emit_sanitized_alu(OperandSize::S64, 0x29, 5, dst, insn.imm);
649                    }
650                }
651                ebpf::SUB64_REG  => self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x29, src, dst, None)),
652                ebpf::MUL64_IMM if !self.executable.get_sbpf_version().enable_pqr() => {
653                    if self.should_sanitize_constant(insn.imm) {
654                        self.emit_sanitized_load_immediate(REGISTER_SCRATCH, insn.imm);
655                    } else {
656                        self.emit_ins(X86Instruction::load_immediate(REGISTER_SCRATCH, insn.imm));
657                    }
658                    self.emit_ins(X86Instruction::alu_escaped(OperandSize::S64, 1, 0xaf, dst, REGISTER_SCRATCH, None));
659                },
660                ebpf::DIV64_IMM | ebpf::MOD64_IMM if !self.executable.get_sbpf_version().enable_pqr() =>
661                    self.emit_product_quotient_remainder(
662                        OperandSize::S64,
663                        (insn.opc & ebpf::BPF_ALU_OP_MASK) == ebpf::BPF_MOD,
664                        (insn.opc & ebpf::BPF_ALU_OP_MASK) != ebpf::BPF_MUL,
665                        (insn.opc & ebpf::BPF_ALU_OP_MASK) == ebpf::BPF_MUL,
666                        dst, dst, Some(insn.imm),
667                    ),
668                ebpf::ST_1B_IMM  if self.executable.get_sbpf_version().move_memory_instruction_classes() => {
669                    self.emit_address_translation(None, Value::RegisterPlusConstant64(dst, insn.off as i64, true), 1, Some(Value::Constant64(insn.imm, true)));
670                },
671                ebpf::ST_2B_IMM  if self.executable.get_sbpf_version().move_memory_instruction_classes() => {
672                    self.emit_address_translation(None, Value::RegisterPlusConstant64(dst, insn.off as i64, true), 2, Some(Value::Constant64(insn.imm, true)));
673                },
674                ebpf::MUL64_REG if !self.executable.get_sbpf_version().enable_pqr() => {
675                    self.emit_ins(X86Instruction::alu_escaped(OperandSize::S64, 1, 0xaf, dst, src, None));
676                },
677                ebpf::DIV64_REG | ebpf::MOD64_REG if !self.executable.get_sbpf_version().enable_pqr() =>
678                    self.emit_product_quotient_remainder(
679                        OperandSize::S64,
680                        (insn.opc & ebpf::BPF_ALU_OP_MASK) == ebpf::BPF_MOD,
681                        (insn.opc & ebpf::BPF_ALU_OP_MASK) != ebpf::BPF_MUL,
682                        (insn.opc & ebpf::BPF_ALU_OP_MASK) == ebpf::BPF_MUL,
683                        src, dst, None,
684                    ),
685                ebpf::ST_1B_REG  if self.executable.get_sbpf_version().move_memory_instruction_classes() => {
686                    self.emit_address_translation(None, Value::RegisterPlusConstant64(dst, insn.off as i64, true), 1, Some(Value::Register(src)));
687                },
688                ebpf::ST_2B_REG  if self.executable.get_sbpf_version().move_memory_instruction_classes() => {
689                    self.emit_address_translation(None, Value::RegisterPlusConstant64(dst, insn.off as i64, true), 2, Some(Value::Register(src)));
690                },
691                ebpf::OR64_IMM   => self.emit_sanitized_alu(OperandSize::S64, 0x09, 1, dst, insn.imm),
692                ebpf::OR64_REG   => self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x09, src, dst, None)),
693                ebpf::AND64_IMM  => self.emit_sanitized_alu(OperandSize::S64, 0x21, 4, dst, insn.imm),
694                ebpf::AND64_REG  => self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x21, src, dst, None)),
695                ebpf::LSH64_IMM  => self.emit_shift(OperandSize::S64, 4, REGISTER_SCRATCH, dst, Some(insn.imm)),
696                ebpf::LSH64_REG  => self.emit_shift(OperandSize::S64, 4, src, dst, None),
697                ebpf::RSH64_IMM  => self.emit_shift(OperandSize::S64, 5, REGISTER_SCRATCH, dst, Some(insn.imm)),
698                ebpf::RSH64_REG  => self.emit_shift(OperandSize::S64, 5, src, dst, None),
699                ebpf::ST_4B_IMM  if self.executable.get_sbpf_version().move_memory_instruction_classes() => {
700                    self.emit_address_translation(None, Value::RegisterPlusConstant64(dst, insn.off as i64, true), 4, Some(Value::Constant64(insn.imm, true)));
701                },
702                ebpf::NEG64      if !self.executable.get_sbpf_version().disable_neg() => self.emit_ins(X86Instruction::alu_immediate(OperandSize::S64, 0xf7, 3, dst, 0, None)),
703                ebpf::ST_4B_REG  if self.executable.get_sbpf_version().move_memory_instruction_classes() => {
704                    self.emit_address_translation(None, Value::RegisterPlusConstant64(dst, insn.off as i64, true), 4, Some(Value::Register(src)));
705                },
706                ebpf::ST_8B_IMM  if self.executable.get_sbpf_version().move_memory_instruction_classes() => {
707                    self.emit_address_translation(None, Value::RegisterPlusConstant64(dst, insn.off as i64, true), 8, Some(Value::Constant64(insn.imm, true)));
708                },
709                ebpf::ST_8B_REG  if self.executable.get_sbpf_version().move_memory_instruction_classes() => {
710                    self.emit_address_translation(None, Value::RegisterPlusConstant64(dst, insn.off as i64, true), 8, Some(Value::Register(src)));
711                },
712                ebpf::XOR64_IMM  => self.emit_sanitized_alu(OperandSize::S64, 0x31, 6, dst, insn.imm),
713                ebpf::XOR64_REG  => self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x31, src, dst, None)),
714                ebpf::MOV64_IMM  => {
715                    if self.should_sanitize_constant(insn.imm) {
716                        self.emit_sanitized_load_immediate(dst, insn.imm);
717                    } else {
718                        self.emit_ins(X86Instruction::load_immediate(dst, insn.imm));
719                    }
720                }
721                ebpf::MOV64_REG  => self.emit_ins(X86Instruction::mov(OperandSize::S64, src, dst)),
722                ebpf::ARSH64_IMM => self.emit_shift(OperandSize::S64, 7, REGISTER_SCRATCH, dst, Some(insn.imm)),
723                ebpf::ARSH64_REG => self.emit_shift(OperandSize::S64, 7, src, dst, None),
724                ebpf::HOR64_IMM if self.executable.get_sbpf_version().disable_lddw() => {
725                    self.emit_sanitized_alu(OperandSize::S64, 0x09, 1, dst, (insn.imm as u64).wrapping_shl(32) as i64);
726                }
727
728                // BPF_PQR class
729                ebpf::LMUL32_IMM | ebpf::LMUL64_IMM | ebpf::UHMUL64_IMM | ebpf::SHMUL64_IMM |
730                ebpf::UDIV32_IMM | ebpf::UDIV64_IMM | ebpf::UREM32_IMM | ebpf::UREM64_IMM |
731                ebpf::SDIV32_IMM | ebpf::SDIV64_IMM | ebpf::SREM32_IMM | ebpf::SREM64_IMM
732                if self.executable.get_sbpf_version().enable_pqr() => {
733                    let signed = insn.opc & (1 << 7) != 0;
734                    let mut imm = insn.imm;
735                    if !signed {
736                        imm &= u32::MAX as i64;
737                    }
738                    self.emit_product_quotient_remainder(
739                        if insn.opc & (1 << 4) != 0 { OperandSize::S64 } else { OperandSize::S32 },
740                        insn.opc & (1 << 5) != 0,
741                        insn.opc & (1 << 6) != 0,
742                        signed,
743                        dst, dst, Some(imm),
744                    )
745                }
746                ebpf::LMUL32_REG | ebpf::LMUL64_REG | ebpf::UHMUL64_REG | ebpf::SHMUL64_REG |
747                ebpf::UDIV32_REG | ebpf::UDIV64_REG | ebpf::UREM32_REG | ebpf::UREM64_REG |
748                ebpf::SDIV32_REG | ebpf::SDIV64_REG | ebpf::SREM32_REG | ebpf::SREM64_REG
749                if self.executable.get_sbpf_version().enable_pqr() =>
750                    self.emit_product_quotient_remainder(
751                        if insn.opc & (1 << 4) != 0 { OperandSize::S64 } else { OperandSize::S32 },
752                        insn.opc & (1 << 5) != 0,
753                        insn.opc & (1 << 6) != 0,
754                        insn.opc & (1 << 7) != 0,
755                        src, dst, None,
756                    ),
757
758                // BPF_JMP32 class
759                ebpf::JEQ32_IMM   if self.executable.get_sbpf_version().enable_jmp32() => self.emit_conditional_branch_imm(OperandSize::S32, 0x84, false, insn.imm, dst, target_pc),
760                ebpf::JEQ32_REG   if self.executable.get_sbpf_version().enable_jmp32() => self.emit_conditional_branch_reg(OperandSize::S32, 0x84, false, src, dst, target_pc),
761                ebpf::JGT32_IMM   if self.executable.get_sbpf_version().enable_jmp32() => self.emit_conditional_branch_imm(OperandSize::S32, 0x87, false, insn.imm, dst, target_pc),
762                ebpf::JGT32_REG   if self.executable.get_sbpf_version().enable_jmp32() => self.emit_conditional_branch_reg(OperandSize::S32, 0x87, false, src, dst, target_pc),
763                ebpf::JGE32_IMM   if self.executable.get_sbpf_version().enable_jmp32() => self.emit_conditional_branch_imm(OperandSize::S32, 0x83, false, insn.imm, dst, target_pc),
764                ebpf::JGE32_REG   if self.executable.get_sbpf_version().enable_jmp32() => self.emit_conditional_branch_reg(OperandSize::S32, 0x83, false, src, dst, target_pc),
765                ebpf::JLT32_IMM   if self.executable.get_sbpf_version().enable_jmp32() => self.emit_conditional_branch_imm(OperandSize::S32, 0x82, false, insn.imm, dst, target_pc),
766                ebpf::JLT32_REG   if self.executable.get_sbpf_version().enable_jmp32() => self.emit_conditional_branch_reg(OperandSize::S32, 0x82, false, src, dst, target_pc),
767                ebpf::JLE32_IMM   if self.executable.get_sbpf_version().enable_jmp32() => self.emit_conditional_branch_imm(OperandSize::S32, 0x86, false, insn.imm, dst, target_pc),
768                ebpf::JLE32_REG   if self.executable.get_sbpf_version().enable_jmp32() => self.emit_conditional_branch_reg(OperandSize::S32, 0x86, false, src, dst, target_pc),
769                ebpf::JSET32_IMM  if self.executable.get_sbpf_version().enable_jmp32() => self.emit_conditional_branch_imm(OperandSize::S32, 0x85, true, insn.imm, dst, target_pc),
770                ebpf::JSET32_REG  if self.executable.get_sbpf_version().enable_jmp32() => self.emit_conditional_branch_reg(OperandSize::S32, 0x85, true, src, dst, target_pc),
771                ebpf::JNE32_IMM   if self.executable.get_sbpf_version().enable_jmp32() => self.emit_conditional_branch_imm(OperandSize::S32, 0x85, false, insn.imm, dst, target_pc),
772                ebpf::JNE32_REG   if self.executable.get_sbpf_version().enable_jmp32() => self.emit_conditional_branch_reg(OperandSize::S32, 0x85, false, src, dst, target_pc),
773                ebpf::JSGT32_IMM  if self.executable.get_sbpf_version().enable_jmp32() => self.emit_conditional_branch_imm(OperandSize::S32, 0x8f, false, insn.imm, dst, target_pc),
774                ebpf::JSGT32_REG  if self.executable.get_sbpf_version().enable_jmp32() => self.emit_conditional_branch_reg(OperandSize::S32, 0x8f, false, src, dst, target_pc),
775                ebpf::JSGE32_IMM  if self.executable.get_sbpf_version().enable_jmp32() => self.emit_conditional_branch_imm(OperandSize::S32, 0x8d, false, insn.imm, dst, target_pc),
776                ebpf::JSGE32_REG  if self.executable.get_sbpf_version().enable_jmp32() => self.emit_conditional_branch_reg(OperandSize::S32, 0x8d, false, src, dst, target_pc),
777                ebpf::JSLT32_IMM  if self.executable.get_sbpf_version().enable_jmp32() => self.emit_conditional_branch_imm(OperandSize::S32, 0x8c, false, insn.imm, dst, target_pc),
778                ebpf::JSLT32_REG  if self.executable.get_sbpf_version().enable_jmp32() => self.emit_conditional_branch_reg(OperandSize::S32, 0x8c, false, src, dst, target_pc),
779                ebpf::JSLE32_IMM  if self.executable.get_sbpf_version().enable_jmp32() => self.emit_conditional_branch_imm(OperandSize::S32, 0x8e, false, insn.imm, dst, target_pc),
780                ebpf::JSLE32_REG  if self.executable.get_sbpf_version().enable_jmp32() => self.emit_conditional_branch_reg(OperandSize::S32, 0x8e, false, src, dst, target_pc),
781
782                // BPF_JMP64 class
783                ebpf::JA         => {
784                    self.emit_validate_and_profile_instruction_count(target_pc);
785                    let jump_offset = self.relative_to_target_pc(target_pc, 5);
786                    self.emit_ins(X86Instruction::jump_immediate(jump_offset));
787                },
788                ebpf::JEQ64_IMM    => self.emit_conditional_branch_imm(OperandSize::S64, 0x84, false, insn.imm, dst, target_pc),
789                ebpf::JEQ64_REG    => self.emit_conditional_branch_reg(OperandSize::S64, 0x84, false, src, dst, target_pc),
790                ebpf::JGT64_IMM    => self.emit_conditional_branch_imm(OperandSize::S64, 0x87, false, insn.imm, dst, target_pc),
791                ebpf::JGT64_REG    => self.emit_conditional_branch_reg(OperandSize::S64, 0x87, false, src, dst, target_pc),
792                ebpf::JGE64_IMM    => self.emit_conditional_branch_imm(OperandSize::S64, 0x83, false, insn.imm, dst, target_pc),
793                ebpf::JGE64_REG    => self.emit_conditional_branch_reg(OperandSize::S64, 0x83, false, src, dst, target_pc),
794                ebpf::JLT64_IMM    => self.emit_conditional_branch_imm(OperandSize::S64, 0x82, false, insn.imm, dst, target_pc),
795                ebpf::JLT64_REG    => self.emit_conditional_branch_reg(OperandSize::S64, 0x82, false, src, dst, target_pc),
796                ebpf::JLE64_IMM    => self.emit_conditional_branch_imm(OperandSize::S64, 0x86, false, insn.imm, dst, target_pc),
797                ebpf::JLE64_REG    => self.emit_conditional_branch_reg(OperandSize::S64, 0x86, false, src, dst, target_pc),
798                ebpf::JSET64_IMM   => self.emit_conditional_branch_imm(OperandSize::S64, 0x85, true, insn.imm, dst, target_pc),
799                ebpf::JSET64_REG   => self.emit_conditional_branch_reg(OperandSize::S64, 0x85, true, src, dst, target_pc),
800                ebpf::JNE64_IMM    => self.emit_conditional_branch_imm(OperandSize::S64, 0x85, false, insn.imm, dst, target_pc),
801                ebpf::JNE64_REG    => self.emit_conditional_branch_reg(OperandSize::S64, 0x85, false, src, dst, target_pc),
802                ebpf::JSGT64_IMM   => self.emit_conditional_branch_imm(OperandSize::S64, 0x8f, false, insn.imm, dst, target_pc),
803                ebpf::JSGT64_REG   => self.emit_conditional_branch_reg(OperandSize::S64, 0x8f, false, src, dst, target_pc),
804                ebpf::JSGE64_IMM   => self.emit_conditional_branch_imm(OperandSize::S64, 0x8d, false, insn.imm, dst, target_pc),
805                ebpf::JSGE64_REG   => self.emit_conditional_branch_reg(OperandSize::S64, 0x8d, false, src, dst, target_pc),
806                ebpf::JSLT64_IMM   => self.emit_conditional_branch_imm(OperandSize::S64, 0x8c, false, insn.imm, dst, target_pc),
807                ebpf::JSLT64_REG   => self.emit_conditional_branch_reg(OperandSize::S64, 0x8c, false, src, dst, target_pc),
808                ebpf::JSLE64_IMM   => self.emit_conditional_branch_imm(OperandSize::S64, 0x8e, false, insn.imm, dst, target_pc),
809                ebpf::JSLE64_REG   => self.emit_conditional_branch_reg(OperandSize::S64, 0x8e, false, src, dst, target_pc),
810                ebpf::CALL_IMM     => {
811                    let mut resolved = false;
812                    // External syscall
813                    if !self.executable.get_sbpf_version().static_syscalls() || insn.src == 0 {
814                        if let Some((_, (_, callback))) = self.executable.get_loader().get_function_registry().lookup_by_key(insn.imm as u32) {
815                            callback(&mut self);
816                            resolved = true;
817                        }
818                    }
819                    // Internal call
820                    if self.executable.get_sbpf_version().static_syscalls() {
821                        let target_pc = (self.pc as i64).saturating_add(insn.imm).saturating_add(1);
822                        if ebpf::is_pc_in_program(self.program, target_pc as usize) && insn.src == 1 {
823                            self.emit_internal_call(Value::Constant64(target_pc, true));
824                            resolved = true;
825                        }
826                    } else if let Some((_function_name, target_pc)) =
827                        self.executable
828                            .get_function_registry()
829                            .lookup_by_key(insn.imm as u32) {
830                        self.emit_internal_call(Value::Constant64(target_pc as i64, true));
831                        resolved = true;
832                    }
833                    if !resolved {
834                        self.emit_ins(X86Instruction::load_immediate(REGISTER_SCRATCH, self.pc as i64));
835                        self.emit_ins(X86Instruction::jump_immediate(self.relative_to_anchor(ANCHOR_CALL_UNSUPPORTED_INSTRUCTION, 5)));
836                    }
837                },
838                ebpf::CALL_REG  => {
839                    let target_pc = if self.executable.get_sbpf_version().callx_uses_src_reg() {
840                        src
841                    } else if self.executable.get_sbpf_version().callx_uses_dst_reg() {
842                        dst
843                    } else {
844                        REGISTER_MAP[insn.imm as usize]
845                    };
846                    self.emit_internal_call(Value::Register(target_pc));
847                },
848                ebpf::EXIT      => {
849                    self.emit_validate_and_profile_instruction_count(0);
850
851                    let call_depth_access = X86IndirectAccess::Offset(self.slot_in_vm(RuntimeEnvironmentSlot::CallDepth));
852                    // If env.call_depth == 0, we've reached the exit instruction of the entry point
853                    self.emit_ins(X86Instruction::cmp_immediate(OperandSize::S32, REGISTER_PTR_TO_VM, 0, Some(call_depth_access)));
854                    // we're done
855                    self.emit_ins(X86Instruction::conditional_jump_immediate(0x84, self.relative_to_anchor(ANCHOR_EXIT, 6)));
856
857                    // else decrement and update env.call_depth
858                    self.emit_ins(X86Instruction::alu_immediate(OperandSize::S64, 0x81, 5, REGISTER_PTR_TO_VM, 1, Some(call_depth_access))); // env.call_depth -= 1;
859
860                    // and return
861                    self.emit_ins(X86Instruction::return_near());
862                },
863
864                _               => return Err(EbpfError::UnsupportedInstruction),
865            }
866
867            self.pc += 1;
868        }
869
870        // Bumper in case there was no final exit
871        if self.offset_in_text_section + MAX_MACHINE_CODE_LENGTH_PER_INSTRUCTION * 2 >= self.result.text_section.len() {
872            return Err(EbpfError::ExhaustedTextSegment(self.pc));
873        }
874        self.emit_validate_and_profile_instruction_count(self.pc + 1);
875        self.emit_ins(X86Instruction::load_immediate(REGISTER_SCRATCH, self.pc as i64)); // Save pc
876        self.emit_set_exception_kind(EbpfError::ExecutionOverrun);
877        self.emit_ins(X86Instruction::jump_immediate(self.relative_to_anchor(ANCHOR_THROW_EXCEPTION, 5)));
878
879        self.resolve_jumps();
880        self.result.seal(self.offset_in_text_section)?;
881        Ok(self.result)
882    }
883
884    fn should_sanitize_constant(&self, value: i64) -> bool {
885        if !self.config.sanitize_user_provided_values {
886            return false;
887        }
888
889        match value as u64 {
890            0xFFFF
891            | 0xFFFFFF
892            | 0xFFFFFFFF
893            | 0xFFFFFFFFFF
894            | 0xFFFFFFFFFFFF
895            | 0xFFFFFFFFFFFFFF
896            | 0xFFFFFFFFFFFFFFFF => false,
897            v if v <= 0xFF => false,
898            v if !v <= 0xFF => false,
899            _ => true
900        }
901    }
902
903    fn slot_in_vm(&self, slot: RuntimeEnvironmentSlot) -> i32 {
904        slot as i32 - 8 * self.runtime_environment_key
905    }
906
907    pub(crate) fn emit<T>(&mut self, data: T) {
908        unsafe {
909            let ptr = self.result.text_section.as_ptr().add(self.offset_in_text_section);
910            #[allow(clippy::cast_ptr_alignment)]
911            ptr::write_unaligned(ptr as *mut T, data as T);
912        }
913        self.offset_in_text_section += mem::size_of::<T>();
914    }
915
916    pub(crate) fn emit_variable_length(&mut self, size: OperandSize, data: u64) {
917        match size {
918            OperandSize::S0 => {},
919            OperandSize::S8 => self.emit::<u8>(data as u8),
920            OperandSize::S16 => self.emit::<u16>(data as u16),
921            OperandSize::S32 => self.emit::<u32>(data as u32),
922            OperandSize::S64 => self.emit::<u64>(data),
923        }
924    }
925
926    // This function helps the optimizer to inline the machinecode emission while avoiding stack allocations
927    #[inline(always)]
928    fn emit_ins(&mut self, instruction: X86Instruction) {
929        instruction.emit(self);
930        if self.next_noop_insertion == 0 {
931            self.next_noop_insertion = self.noop_range.sample(&mut self.diversification_rng);
932            // X86Instruction::noop().emit(self)?;
933            self.emit::<u8>(0x90);
934        } else {
935            self.next_noop_insertion -= 1;
936        }
937    }
938
939    fn emit_sanitized_load_immediate(&mut self, destination: X86Register, value: i64) {
940        let lower_key = self.immediate_value_key as i32 as i64;
941        if value >= i32::MIN as i64 && value <= i32::MAX as i64 {
942            self.emit_ins(X86Instruction::load_immediate(destination, value.wrapping_sub(lower_key)));
943            self.emit_ins(X86Instruction::alu_immediate(OperandSize::S64, 0x81, 0, destination, lower_key, None)); // wrapping_add(lower_key)
944        } else if value as u64 & u32::MAX as u64 == 0 {
945            self.emit_ins(X86Instruction::load_immediate(destination, value.rotate_right(32).wrapping_sub(lower_key)));
946            self.emit_ins(X86Instruction::alu_immediate(OperandSize::S64, 0x81, 0, destination, lower_key, None)); // wrapping_add(lower_key)
947            self.emit_ins(X86Instruction::alu_immediate(OperandSize::S64, 0xc1, 4, destination, 32, None)); // shift_left(32)
948        } else if destination != REGISTER_SCRATCH {
949            self.emit_ins(X86Instruction::load_immediate(destination, value.wrapping_sub(self.immediate_value_key)));
950            self.emit_ins(X86Instruction::load_immediate(REGISTER_SCRATCH, self.immediate_value_key));
951            self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x01, REGISTER_SCRATCH, destination, None)); // wrapping_add(immediate_value_key)
952        } else {
953            let upper_key = (self.immediate_value_key >> 32) as i32 as i64;
954            self.emit_ins(X86Instruction::load_immediate(destination, value.wrapping_sub(lower_key).rotate_right(32).wrapping_sub(upper_key)));
955            self.emit_ins(X86Instruction::alu_immediate(OperandSize::S64, 0x81, 0, destination, upper_key, None)); // wrapping_add(upper_key)
956            self.emit_ins(X86Instruction::alu_immediate(OperandSize::S64, 0xc1, 1, destination, 32, None)); // rotate_right(32)
957            self.emit_ins(X86Instruction::alu_immediate(OperandSize::S64, 0x81, 0, destination, lower_key, None)); // wrapping_add(lower_key)
958        }
959    }
960
961    fn emit_sanitized_alu(&mut self, size: OperandSize, opcode: u8, opcode_extension: u8, destination: X86Register, immediate: i64) {
962        if self.should_sanitize_constant(immediate) {
963            self.emit_sanitized_load_immediate(REGISTER_SCRATCH, immediate);
964            self.emit_ins(X86Instruction::alu(size, opcode, REGISTER_SCRATCH, destination, None));
965        } else if immediate >= i32::MIN as i64 && immediate <= i32::MAX as i64 {
966            self.emit_ins(X86Instruction::alu_immediate(size, 0x81, opcode_extension, destination, immediate, None));
967        } else {
968            self.emit_ins(X86Instruction::load_immediate(REGISTER_SCRATCH, immediate));
969            self.emit_ins(X86Instruction::alu(size, opcode, REGISTER_SCRATCH, destination, None));
970        }
971    }
972
973    #[allow(dead_code)]
974    fn emit_stopwatch(&mut self, begin: bool) {
975        self.stopwatch_is_active = true;
976        self.emit_ins(X86Instruction::push(RDX, None));
977        self.emit_ins(X86Instruction::push(RAX, None));
978        self.emit_ins(X86Instruction::fence(FenceType::Load)); // lfence
979        self.emit_ins(X86Instruction::cycle_count()); // rdtsc
980        self.emit_ins(X86Instruction::fence(FenceType::Load)); // lfence
981        self.emit_ins(X86Instruction::alu_immediate(OperandSize::S64, 0xc1, 4, RDX, 32, None)); // RDX <<= 32;
982        self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x09, RDX, RAX, None)); // RAX |= RDX;
983        if begin {
984            self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x29, RAX, REGISTER_PTR_TO_VM, Some(X86IndirectAccess::Offset(self.slot_in_vm(RuntimeEnvironmentSlot::StopwatchNumerator))))); // *numerator -= RAX;
985        } else {
986            self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x01, RAX, REGISTER_PTR_TO_VM, Some(X86IndirectAccess::Offset(self.slot_in_vm(RuntimeEnvironmentSlot::StopwatchNumerator))))); // *numerator += RAX;
987            self.emit_ins(X86Instruction::alu_immediate(OperandSize::S64, 0x81, 0, REGISTER_PTR_TO_VM, 1, Some(X86IndirectAccess::Offset(self.slot_in_vm(RuntimeEnvironmentSlot::StopwatchDenominator))))); // *denominator += 1;
988        }
989        self.emit_ins(X86Instruction::pop(RAX));
990        self.emit_ins(X86Instruction::pop(RDX));
991    }
992
993    fn emit_validate_instruction_count(&mut self, pc: Option<usize>) {
994        if !self.config.enable_instruction_meter {
995            return;
996        }
997        // Update `MACHINE_CODE_PER_INSTRUCTION_METER_CHECKPOINT` if you change the code generation here
998        if let Some(pc) = pc {
999            self.last_instruction_meter_validation_pc = pc;
1000            self.emit_sanitized_load_immediate(REGISTER_SCRATCH, pc as i64);
1001        }
1002        // If instruction_meter >= pc, throw ExceededMaxInstructions
1003        self.emit_ins(X86Instruction::cmp(OperandSize::S64, REGISTER_SCRATCH, REGISTER_INSTRUCTION_METER, None));
1004        self.emit_ins(X86Instruction::conditional_jump_immediate(0x86, self.relative_to_anchor(ANCHOR_THROW_EXCEEDED_MAX_INSTRUCTIONS, 6)));
1005    }
1006
1007    fn emit_profile_instruction_count(&mut self, target_pc: usize) {
1008        if !self.config.enable_instruction_meter {
1009            return;
1010        }
1011        self.emit_sanitized_alu(OperandSize::S64, 0x01, 0, REGISTER_INSTRUCTION_METER, target_pc as i64 - self.pc as i64 - 1); // instruction_meter += target_pc - (self.pc + 1);
1012    }
1013
1014    fn emit_undo_profile_instruction_count(&mut self, target_pc: usize) {
1015        if self.config.enable_instruction_meter {
1016            self.emit_sanitized_alu(OperandSize::S64, 0x01, 0, REGISTER_INSTRUCTION_METER, self.pc as i64 + 1 - target_pc as i64); // instruction_meter += (self.pc + 1) - target_pc;
1017        }
1018    }
1019
1020    fn emit_validate_and_profile_instruction_count(&mut self, target_pc: usize) {
1021        self.emit_validate_instruction_count(Some(self.pc));
1022        self.emit_profile_instruction_count(target_pc);
1023    }
1024
1025    fn emit_rust_call(&mut self, target: Value, arguments: &[Argument], result_reg: Option<X86Register>) {
1026        let mut saved_registers = CALLER_SAVED_REGISTERS.to_vec();
1027        if let Some(reg) = result_reg {
1028            if let Some(dst) = saved_registers.iter().position(|x| *x == reg) {
1029                saved_registers.remove(dst);
1030            }
1031        }
1032
1033        // Save registers on stack
1034        for reg in saved_registers.iter() {
1035            self.emit_ins(X86Instruction::push(*reg, None));
1036        }
1037
1038        let stack_arguments = arguments.len().saturating_sub(ARGUMENT_REGISTERS.len()) as i64;
1039        if stack_arguments % 2 != 0 {
1040            // If we're going to pass an odd number of stack args we need to pad
1041            // to preserve alignment
1042            self.emit_ins(X86Instruction::alu_immediate(OperandSize::S64, 0x81, 5, RSP, 8, None));
1043        }
1044
1045        // Pass arguments
1046        for argument in arguments {
1047            let is_stack_argument = argument.index >= ARGUMENT_REGISTERS.len();
1048            let dst = if is_stack_argument {
1049                RSP // Never used
1050            } else {
1051                ARGUMENT_REGISTERS[argument.index]
1052            };
1053            match argument.value {
1054                Value::Register(reg) => {
1055                    if is_stack_argument {
1056                        self.emit_ins(X86Instruction::push(reg, None));
1057                    } else if reg != dst {
1058                        self.emit_ins(X86Instruction::mov(OperandSize::S64, reg, dst));
1059                    }
1060                },
1061                Value::RegisterIndirect(reg, offset, user_provided) => {
1062                    debug_assert!(!user_provided);
1063                    if is_stack_argument {
1064                        debug_assert!(reg != RSP);
1065                        self.emit_ins(X86Instruction::push(reg, Some(X86IndirectAccess::Offset(offset))));
1066                    } else if reg == RSP {
1067                        self.emit_ins(X86Instruction::load(OperandSize::S64, RSP, dst, X86IndirectAccess::OffsetIndexShift(offset, RSP, 0)));
1068                    } else {
1069                        self.emit_ins(X86Instruction::load(OperandSize::S64, reg, dst, X86IndirectAccess::Offset(offset)));
1070                    }
1071                },
1072                Value::RegisterPlusConstant32(reg, offset, user_provided) => {
1073                    debug_assert!(!user_provided);
1074                    if is_stack_argument {
1075                        self.emit_ins(X86Instruction::push(reg, None));
1076                        self.emit_ins(X86Instruction::alu_immediate(OperandSize::S64, 0x81, 0, RSP, offset as i64, Some(X86IndirectAccess::OffsetIndexShift(0, RSP, 0))));
1077                    } else if reg == RSP {
1078                        self.emit_ins(X86Instruction::lea(OperandSize::S64, RSP, dst, Some(X86IndirectAccess::OffsetIndexShift(offset, RSP, 0))));
1079                    } else {
1080                        self.emit_ins(X86Instruction::lea(OperandSize::S64, reg, dst, Some(X86IndirectAccess::Offset(offset))));
1081                    }
1082                },
1083                Value::RegisterPlusConstant64(reg, offset, user_provided) => {
1084                    debug_assert!(!user_provided);
1085                    if is_stack_argument {
1086                        self.emit_ins(X86Instruction::push(reg, None));
1087                        self.emit_ins(X86Instruction::alu_immediate(OperandSize::S64, 0x81, 0, RSP, offset, Some(X86IndirectAccess::OffsetIndexShift(0, RSP, 0))));
1088                    } else {
1089                        self.emit_ins(X86Instruction::load_immediate(dst, offset));
1090                        self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x01, reg, dst, None));
1091                    }
1092                },
1093                Value::Constant64(value, user_provided) => {
1094                    debug_assert!(!user_provided && !is_stack_argument);
1095                    self.emit_ins(X86Instruction::load_immediate(dst, value));
1096                },
1097            }
1098        }
1099
1100        match target {
1101            Value::Register(reg) => {
1102                self.emit_ins(X86Instruction::call_reg(reg, None));
1103            },
1104            Value::Constant64(value, user_provided) => {
1105                debug_assert!(!user_provided);
1106                self.emit_ins(X86Instruction::load_immediate(RAX, value));
1107                self.emit_ins(X86Instruction::call_reg(RAX, None));
1108            },
1109            _ => {
1110                #[cfg(debug_assertions)]
1111                unreachable!();
1112            }
1113        }
1114
1115        // Save returned value in result register
1116        if let Some(reg) = result_reg {
1117            self.emit_ins(X86Instruction::mov(OperandSize::S64, RAX, reg));
1118        }
1119
1120        // Restore registers from stack
1121        self.emit_ins(X86Instruction::alu_immediate(OperandSize::S64, 0x81, 0, RSP,
1122            if stack_arguments % 2 != 0 { stack_arguments + 1 } else { stack_arguments } * 8, None));
1123
1124        for reg in saved_registers.iter().rev() {
1125            self.emit_ins(X86Instruction::pop(*reg));
1126        }
1127    }
1128
1129    fn emit_internal_call(&mut self, dst: Value) {
1130        // Store PC in case the bounds check fails
1131        self.emit_ins(X86Instruction::load_immediate(REGISTER_SCRATCH, self.pc as i64));
1132        self.last_instruction_meter_validation_pc = self.pc;
1133        self.emit_ins(X86Instruction::call_immediate(self.relative_to_anchor(ANCHOR_INTERNAL_FUNCTION_CALL_PROLOGUE, 5)));
1134
1135        match dst {
1136            Value::Register(reg) => {
1137                // REGISTER_SCRATCH contains self.pc, and we must store it for proper error handling.
1138                // We can discard the value if callx succeeds, so we are not incrementing the stack pointer (RSP).
1139                self.emit_ins(X86Instruction::store(OperandSize::S64, REGISTER_SCRATCH, RSP, X86IndirectAccess::OffsetIndexShift(-24, RSP, 0)));
1140                // Move guest_target_address into REGISTER_SCRATCH
1141                self.emit_ins(X86Instruction::mov(OperandSize::S64, reg, REGISTER_SCRATCH));
1142                self.emit_ins(X86Instruction::call_immediate(self.relative_to_anchor(ANCHOR_INTERNAL_FUNCTION_CALL_REG, 5)));
1143            },
1144            Value::Constant64(target_pc, user_provided) => {
1145                debug_assert!(user_provided);
1146                self.emit_profile_instruction_count(target_pc as usize);
1147                if user_provided && self.should_sanitize_constant(target_pc) {
1148                    self.emit_sanitized_load_immediate(REGISTER_SCRATCH, target_pc);
1149                } else {
1150                    self.emit_ins(X86Instruction::load_immediate(REGISTER_SCRATCH, target_pc));
1151                }
1152                let jump_offset = self.relative_to_target_pc(target_pc as usize, 5);
1153                self.emit_ins(X86Instruction::call_immediate(jump_offset));
1154            },
1155            _ => {
1156                #[cfg(debug_assertions)]
1157                unreachable!();
1158            }
1159        }
1160
1161        self.emit_undo_profile_instruction_count(0);
1162
1163        // Restore the previous frame pointer
1164        self.emit_ins(X86Instruction::pop(REGISTER_MAP[FRAME_PTR_REG]));
1165        for reg in REGISTER_MAP.iter().skip(FIRST_SCRATCH_REG).take(SCRATCH_REGS).rev() {
1166            self.emit_ins(X86Instruction::pop(*reg));
1167        }
1168    }
1169
1170    /// Emits a syscall handler invocation
1171    pub fn emit_external_call(&mut self, function: BuiltinFunction<C>) {
1172        self.emit_validate_and_profile_instruction_count(0);
1173        self.emit_ins(X86Instruction::load_immediate(REGISTER_SCRATCH, function as usize as i64));
1174        self.emit_ins(X86Instruction::call_immediate(self.relative_to_anchor(ANCHOR_EXTERNAL_FUNCTION_CALL, 5)));
1175        self.emit_undo_profile_instruction_count(0);
1176    }
1177
1178    fn emit_address_translation(&mut self, dst: Option<X86Register>, vm_addr: Value, len: u64, value: Option<Value>) {
1179        debug_assert_ne!(dst.is_some(), value.is_some());
1180
1181        let stack_slot_of_value_to_store = X86IndirectAccess::OffsetIndexShift(-96, RSP, 0);
1182        match value {
1183            Some(Value::Register(reg)) => {
1184                self.emit_ins(X86Instruction::store(OperandSize::S64, reg, RSP, stack_slot_of_value_to_store));
1185            }
1186            Some(Value::Constant64(constant, user_provided)) => {
1187                debug_assert!(user_provided);
1188                // First half of emit_sanitized_load_immediate(stack_slot_of_value_to_store, constant)
1189                let lower_key = self.immediate_value_key as i32 as i64;
1190                self.emit_ins(X86Instruction::load_immediate(REGISTER_SCRATCH, constant.wrapping_sub(lower_key)));
1191                self.emit_ins(X86Instruction::store(OperandSize::S64, REGISTER_SCRATCH, RSP, stack_slot_of_value_to_store));
1192            }
1193            _ => {}
1194        }
1195
1196        match vm_addr {
1197            Value::RegisterPlusConstant64(reg, constant, user_provided) => {
1198                if user_provided && self.should_sanitize_constant(constant) {
1199                    self.emit_sanitized_load_immediate(REGISTER_SCRATCH, constant);
1200                } else {
1201                    self.emit_ins(X86Instruction::load_immediate(REGISTER_SCRATCH, constant));
1202                }
1203                self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x01, reg, REGISTER_SCRATCH, None));
1204            },
1205            _ => {
1206                #[cfg(debug_assertions)]
1207                unreachable!();
1208            },
1209        }
1210
1211        if self.config.enable_address_translation {
1212            let anchor_base = match value {
1213                Some(Value::Register(_reg)) => 4,
1214                Some(Value::Constant64(_constant, _user_provided)) => 8,
1215                _ => 0,
1216            };
1217            let anchor = ANCHOR_TRANSLATE_MEMORY_ADDRESS + anchor_base + len.trailing_zeros() as usize;
1218            // store self.pc in the first stack slot of the anchor
1219            self.emit_ins(X86Instruction::store_immediate(OperandSize::S64, RSP, X86IndirectAccess::OffsetIndexShift(-16, RSP, 0), self.pc as i64));
1220            self.emit_ins(X86Instruction::call_immediate(self.relative_to_anchor(anchor, 5)));
1221            if let Some(dst) = dst {
1222                self.emit_ins(X86Instruction::mov(OperandSize::S64, REGISTER_SCRATCH, dst));
1223            }
1224        } else if let Some(dst) = dst {
1225            match len {
1226                1 => self.emit_ins(X86Instruction::load(OperandSize::S8, REGISTER_SCRATCH, dst, X86IndirectAccess::Offset(0))),
1227                2 => self.emit_ins(X86Instruction::load(OperandSize::S16, REGISTER_SCRATCH, dst, X86IndirectAccess::Offset(0))),
1228                4 => self.emit_ins(X86Instruction::load(OperandSize::S32, REGISTER_SCRATCH, dst, X86IndirectAccess::Offset(0))),
1229                8 => self.emit_ins(X86Instruction::load(OperandSize::S64, REGISTER_SCRATCH, dst, X86IndirectAccess::Offset(0))),
1230                _ => unreachable!(),
1231            }
1232        } else {
1233            self.emit_ins(X86Instruction::xchg(OperandSize::S64, RSP, REGISTER_MAP[0], Some(stack_slot_of_value_to_store))); // Save REGISTER_MAP[0] and retrieve value to store
1234            match len {
1235                1 => self.emit_ins(X86Instruction::store(OperandSize::S8, REGISTER_MAP[0], REGISTER_SCRATCH, X86IndirectAccess::Offset(0))),
1236                2 => self.emit_ins(X86Instruction::store(OperandSize::S16, REGISTER_MAP[0], REGISTER_SCRATCH, X86IndirectAccess::Offset(0))),
1237                4 => self.emit_ins(X86Instruction::store(OperandSize::S32, REGISTER_MAP[0], REGISTER_SCRATCH, X86IndirectAccess::Offset(0))),
1238                8 => self.emit_ins(X86Instruction::store(OperandSize::S64, REGISTER_MAP[0], REGISTER_SCRATCH, X86IndirectAccess::Offset(0))),
1239                _ => unreachable!(),
1240            }
1241            self.emit_ins(X86Instruction::xchg(OperandSize::S64, RSP, REGISTER_MAP[0], Some(stack_slot_of_value_to_store))); // Restore REGISTER_MAP[0]
1242        }
1243    }
1244
1245    fn emit_conditional_branch_reg(&mut self, size: OperandSize, op: u8, bitwise: bool, first_operand: X86Register, second_operand: X86Register, target_pc: usize) {
1246        self.emit_validate_and_profile_instruction_count(target_pc);
1247        if bitwise { // Logical
1248            self.emit_ins(X86Instruction::test(size, first_operand, second_operand, None));
1249        } else { // Arithmetic
1250            self.emit_ins(X86Instruction::cmp(size, first_operand, second_operand, None));
1251        }
1252        let jump_offset = self.relative_to_target_pc(target_pc, 6);
1253        self.emit_ins(X86Instruction::conditional_jump_immediate(op, jump_offset));
1254        self.emit_undo_profile_instruction_count(target_pc);
1255    }
1256
1257    fn emit_conditional_branch_imm(&mut self, size: OperandSize, op: u8, bitwise: bool, immediate: i64, second_operand: X86Register, target_pc: usize) {
1258        self.emit_validate_and_profile_instruction_count(target_pc);
1259        if self.should_sanitize_constant(immediate) {
1260            self.emit_sanitized_load_immediate(REGISTER_SCRATCH, immediate);
1261            if bitwise { // Logical
1262                self.emit_ins(X86Instruction::test(size, REGISTER_SCRATCH, second_operand, None));
1263            } else { // Arithmetic
1264                self.emit_ins(X86Instruction::cmp(size, REGISTER_SCRATCH, second_operand, None));
1265            }
1266        } else if bitwise { // Logical
1267            self.emit_ins(X86Instruction::test_immediate(size, second_operand, immediate, None));
1268        } else { // Arithmetic
1269            self.emit_ins(X86Instruction::cmp_immediate(size, second_operand, immediate, None));
1270        }
1271        let jump_offset = self.relative_to_target_pc(target_pc, 6);
1272        self.emit_ins(X86Instruction::conditional_jump_immediate(op, jump_offset));
1273        self.emit_undo_profile_instruction_count(target_pc);
1274    }
1275
1276    fn emit_shift(&mut self, size: OperandSize, opcode_extension: u8, source: X86Register, destination: X86Register, immediate: Option<i64>) {
1277        if let Some(immediate) = immediate {
1278            self.emit_ins(X86Instruction::alu_immediate(size, 0xc1, opcode_extension, destination, immediate, None));
1279            return;
1280        }
1281        if let OperandSize::S32 = size {
1282            self.emit_ins(X86Instruction::mov(OperandSize::S32, destination, destination)); // Truncate to 32 bit
1283        }
1284        if source == RCX {
1285            self.emit_ins(X86Instruction::alu_immediate(size, 0xd3, opcode_extension, destination, 0, None));
1286        } else if destination == RCX {
1287            self.emit_ins(X86Instruction::push(source, None));
1288            self.emit_ins(X86Instruction::xchg(OperandSize::S64, source, RCX, None));
1289            self.emit_ins(X86Instruction::alu_immediate(size, 0xd3, opcode_extension, source, 0, None));
1290            self.emit_ins(X86Instruction::mov(OperandSize::S64, source, RCX));
1291            self.emit_ins(X86Instruction::pop(source));
1292        } else {
1293            self.emit_ins(X86Instruction::push(RCX, None));
1294            self.emit_ins(X86Instruction::mov(OperandSize::S64, source, RCX));
1295            self.emit_ins(X86Instruction::alu_immediate(size, 0xd3, opcode_extension, destination, 0, None));
1296            self.emit_ins(X86Instruction::pop(RCX));
1297        }
1298    }
1299
1300    #[allow(clippy::too_many_arguments)]
1301    fn emit_product_quotient_remainder(
1302        &mut self,
1303        size: OperandSize,
1304        alt_dst: bool,
1305        division: bool,
1306        signed: bool,
1307        src: X86Register,
1308        dst: X86Register,
1309        imm: Option<i64>,
1310    ) {
1311        //         LMUL UHMUL SHMUL UDIV SDIV UREM SREM
1312        // ALU     F7/4 F7/4  F7/5  F7/6 F7/7 F7/6 F7/7
1313        // src-in  REGISTER_SCRATCH  REGISTER_SCRATCH   REGISTER_SCRATCH   REGISTER_SCRATCH  REGISTER_SCRATCH  REGISTER_SCRATCH  REGISTER_SCRATCH
1314        // dst-in  RAX  RAX   RAX   RAX  RAX  RAX  RAX
1315        // dst-out RAX  RDX   RDX   RAX  RAX  RDX  RDX
1316
1317        if division {
1318            // Prevent division by zero
1319            if imm.is_none() {
1320                self.emit_ins(X86Instruction::load_immediate(REGISTER_SCRATCH, self.pc as i64)); // Save pc
1321                self.emit_ins(X86Instruction::test(size, src, src, None)); // src == 0
1322                self.emit_ins(X86Instruction::conditional_jump_immediate(0x84, self.relative_to_anchor(ANCHOR_DIV_BY_ZERO, 6)));
1323            }
1324
1325            // Signed division overflows with MIN / -1.
1326            // If we have an immediate and it's not -1, we can skip the following check.
1327            if signed && imm.unwrap_or(-1) == -1 {
1328                self.emit_ins(X86Instruction::load_immediate(REGISTER_SCRATCH, if let OperandSize::S64 = size { i64::MIN } else { i32::MIN as i64 }));
1329                self.emit_ins(X86Instruction::cmp(size, dst, REGISTER_SCRATCH, None)); // dst == MIN
1330
1331                if imm.is_none() {
1332                    // The exception case is: dst == MIN && src == -1
1333                    // Via De Morgan's law becomes: !(dst != MIN || src != -1)
1334                    // Also, we know that src != 0 in here, so we can use it to set REGISTER_SCRATCH to something not zero
1335                    self.emit_ins(X86Instruction::load_immediate(REGISTER_SCRATCH, 0)); // No XOR here because we need to keep the status flags
1336                    self.emit_ins(X86Instruction::cmov(size, 0x45, src, REGISTER_SCRATCH)); // if dst != MIN { REGISTER_SCRATCH = src; }
1337                    self.emit_ins(X86Instruction::cmp_immediate(size, src, -1, None)); // src == -1
1338                    self.emit_ins(X86Instruction::cmov(size, 0x45, src, REGISTER_SCRATCH)); // if src != -1 { REGISTER_SCRATCH = src; }
1339                    self.emit_ins(X86Instruction::test(size, REGISTER_SCRATCH, REGISTER_SCRATCH, None)); // REGISTER_SCRATCH == 0
1340                }
1341
1342                // MIN / -1, raise EbpfError::DivideOverflow
1343                self.emit_ins(X86Instruction::load_immediate(REGISTER_SCRATCH, self.pc as i64));
1344                self.emit_ins(X86Instruction::conditional_jump_immediate(0x84, self.relative_to_anchor(ANCHOR_DIV_OVERFLOW, 6)));
1345            }
1346        }
1347
1348        if let Some(imm) = imm {
1349            if self.should_sanitize_constant(imm) {
1350                self.emit_sanitized_load_immediate(REGISTER_SCRATCH, imm);
1351            } else {
1352                self.emit_ins(X86Instruction::load_immediate(REGISTER_SCRATCH, imm));
1353            }
1354        } else {
1355            self.emit_ins(X86Instruction::mov(OperandSize::S64, src, REGISTER_SCRATCH));
1356        }
1357        if dst != RAX {
1358            self.emit_ins(X86Instruction::push(RAX, None));
1359            self.emit_ins(X86Instruction::mov(OperandSize::S64, dst, RAX));
1360        }
1361        if dst != RDX {
1362            self.emit_ins(X86Instruction::push(RDX, None));
1363        }
1364        if division {
1365            if signed {
1366                self.emit_ins(X86Instruction::sign_extend_rax_rdx(size));
1367            } else {
1368                self.emit_ins(X86Instruction::alu(size, 0x31, RDX, RDX, None)); // RDX = 0
1369            }
1370        }
1371
1372        self.emit_ins(X86Instruction::alu_immediate(size, 0xf7, 0x4 | ((division as u8) << 1) | signed as u8, REGISTER_SCRATCH, 0, None));
1373
1374        if dst != RDX {
1375            if alt_dst {
1376                self.emit_ins(X86Instruction::mov(OperandSize::S64, RDX, dst));
1377            }
1378            self.emit_ins(X86Instruction::pop(RDX));
1379        }
1380        if dst != RAX {
1381            if !alt_dst {
1382                self.emit_ins(X86Instruction::mov(OperandSize::S64, RAX, dst));
1383            }
1384            self.emit_ins(X86Instruction::pop(RAX));
1385        }
1386        if let OperandSize::S32 = size {
1387            if signed && !self.executable.get_sbpf_version().explicit_sign_extension_of_results() {
1388                self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x63, dst, dst, None)); // sign extend i32 to i64
1389            }
1390        }
1391    }
1392
1393    fn emit_set_exception_kind(&mut self, err: EbpfError) {
1394        let err_kind = unsafe { *std::ptr::addr_of!(err).cast::<u64>() };
1395        let err_discriminant = ProgramResult::Err(err).discriminant();
1396        self.emit_ins(X86Instruction::lea(OperandSize::S64, REGISTER_PTR_TO_VM, REGISTER_MAP[0], Some(X86IndirectAccess::Offset(self.slot_in_vm(RuntimeEnvironmentSlot::ProgramResult)))));
1397        self.emit_ins(X86Instruction::store_immediate(OperandSize::S64, REGISTER_MAP[0], X86IndirectAccess::Offset(0), err_discriminant as i64)); // result.discriminant = err_discriminant;
1398        self.emit_ins(X86Instruction::store_immediate(OperandSize::S64, REGISTER_MAP[0], X86IndirectAccess::Offset(std::mem::size_of::<u64>() as i32), err_kind as i64)); // err.kind = err_kind;
1399    }
1400
1401    fn emit_result_is_err(&mut self, destination: X86Register) {
1402        let ok = ProgramResult::Ok(0);
1403        let ok_discriminant = ok.discriminant();
1404        self.emit_ins(X86Instruction::lea(OperandSize::S64, REGISTER_PTR_TO_VM, destination, Some(X86IndirectAccess::Offset(self.slot_in_vm(RuntimeEnvironmentSlot::ProgramResult)))));
1405        self.emit_ins(X86Instruction::cmp_immediate(OperandSize::S64, destination, ok_discriminant as i64, Some(X86IndirectAccess::Offset(0))));
1406    }
1407
1408    fn emit_subroutines(&mut self) {
1409        // Routine for instruction tracing
1410        if self.config.enable_register_tracing {
1411            self.set_anchor(ANCHOR_TRACE);
1412            // Save registers on stack
1413            self.emit_ins(X86Instruction::push(REGISTER_SCRATCH, None));
1414            for reg in REGISTER_MAP.iter().rev() {
1415                self.emit_ins(X86Instruction::push(*reg, None));
1416            }
1417            self.emit_ins(X86Instruction::mov(OperandSize::S64, RSP, REGISTER_MAP[0]));
1418            self.emit_ins(X86Instruction::alu_immediate(OperandSize::S64, 0x81, 0, RSP, - 8 * 3, None)); // RSP -= 8 * 3;
1419            self.emit_rust_call(Value::Constant64(Vec::<crate::static_analysis::RegisterTraceEntry>::push as *const u8 as i64, false), &[
1420                Argument { index: 1, value: Value::Register(REGISTER_MAP[0]) }, // registers
1421                Argument { index: 0, value: Value::RegisterPlusConstant32(REGISTER_PTR_TO_VM, self.slot_in_vm(RuntimeEnvironmentSlot::RegisterTrace), false) },
1422            ], None);
1423            // Pop stack and return
1424            self.emit_ins(X86Instruction::alu_immediate(OperandSize::S64, 0x81, 0, RSP, 8 * 3, None)); // RSP += 8 * 3;
1425            self.emit_ins(X86Instruction::pop(REGISTER_MAP[0]));
1426            self.emit_ins(X86Instruction::alu_immediate(OperandSize::S64, 0x81, 0, RSP, 8 * (REGISTER_MAP.len() - 1) as i64, None)); // RSP += 8 * (REGISTER_MAP.len() - 1);
1427            self.emit_ins(X86Instruction::pop(REGISTER_SCRATCH));
1428            self.emit_ins(X86Instruction::return_near());
1429        }
1430
1431        // Epilogue
1432        self.set_anchor(ANCHOR_EPILOGUE);
1433        if self.config.enable_instruction_meter {
1434            self.emit_ins(X86Instruction::alu_immediate(OperandSize::S64, 0x81, 5, REGISTER_INSTRUCTION_METER, 1, None)); // REGISTER_INSTRUCTION_METER -= 1;
1435            self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x29, REGISTER_SCRATCH, REGISTER_INSTRUCTION_METER, None)); // REGISTER_INSTRUCTION_METER -= pc;
1436            // *DueInsnCount = *PreviousInstructionMeter - REGISTER_INSTRUCTION_METER;
1437            self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x2B, REGISTER_INSTRUCTION_METER, REGISTER_PTR_TO_VM, Some(X86IndirectAccess::Offset(self.slot_in_vm(RuntimeEnvironmentSlot::PreviousInstructionMeter))))); // REGISTER_INSTRUCTION_METER -= *PreviousInstructionMeter;
1438            self.emit_ins(X86Instruction::alu_immediate(OperandSize::S64, 0xf7, 3, REGISTER_INSTRUCTION_METER, 0, None)); // REGISTER_INSTRUCTION_METER = -REGISTER_INSTRUCTION_METER;
1439            self.emit_ins(X86Instruction::store(OperandSize::S64, REGISTER_INSTRUCTION_METER, REGISTER_PTR_TO_VM, X86IndirectAccess::Offset(self.slot_in_vm(RuntimeEnvironmentSlot::DueInsnCount)))); // *DueInsnCount = REGISTER_INSTRUCTION_METER;
1440        }
1441        // Print stop watch value
1442        fn stopwatch_result(numerator: u64, denominator: u64) {
1443            println!("Stop watch: {} / {} = {}", numerator, denominator, if denominator == 0 { 0.0 } else { numerator as f64 / denominator as f64 });
1444        }
1445        if self.stopwatch_is_active {
1446            self.emit_rust_call(Value::Constant64(stopwatch_result as *const u8 as i64, false), &[
1447                Argument { index: 1, value: Value::RegisterIndirect(REGISTER_PTR_TO_VM, self.slot_in_vm(RuntimeEnvironmentSlot::StopwatchDenominator), false) },
1448                Argument { index: 0, value: Value::RegisterIndirect(REGISTER_PTR_TO_VM, self.slot_in_vm(RuntimeEnvironmentSlot::StopwatchNumerator), false) },
1449            ], None);
1450        }
1451        // Restore stack pointer in case we did not exit gracefully
1452        self.emit_ins(X86Instruction::load(OperandSize::S64, REGISTER_PTR_TO_VM, RSP, X86IndirectAccess::Offset(self.slot_in_vm(RuntimeEnvironmentSlot::HostStackPointer))));
1453        self.emit_ins(X86Instruction::return_near());
1454
1455        // Handler for EbpfError::ExceededMaxInstructions
1456        self.set_anchor(ANCHOR_THROW_EXCEEDED_MAX_INSTRUCTIONS);
1457        self.emit_set_exception_kind(EbpfError::ExceededMaxInstructions);
1458        self.emit_ins(X86Instruction::mov(OperandSize::S64, REGISTER_INSTRUCTION_METER, REGISTER_SCRATCH)); // REGISTER_SCRATCH = REGISTER_INSTRUCTION_METER;
1459        // Fall through
1460
1461        // Epilogue for errors
1462        self.set_anchor(ANCHOR_THROW_EXCEPTION_UNCHECKED);
1463        self.emit_ins(X86Instruction::store(OperandSize::S64, REGISTER_SCRATCH, REGISTER_PTR_TO_VM, X86IndirectAccess::Offset(self.slot_in_vm(RuntimeEnvironmentSlot::Registers) + 11 * std::mem::size_of::<u64>() as i32))); // registers[11] = pc;
1464        self.emit_ins(X86Instruction::jump_immediate(self.relative_to_anchor(ANCHOR_EPILOGUE, 5)));
1465
1466        // Quit gracefully
1467        self.set_anchor(ANCHOR_EXIT);
1468        if self.config.enable_instruction_meter {
1469            self.emit_ins(X86Instruction::alu_immediate(OperandSize::S64, 0x81, 0, REGISTER_INSTRUCTION_METER, 1, None)); // REGISTER_INSTRUCTION_METER += 1;
1470        }
1471        self.emit_ins(X86Instruction::lea(OperandSize::S64, REGISTER_PTR_TO_VM, REGISTER_SCRATCH, Some(X86IndirectAccess::Offset(self.slot_in_vm(RuntimeEnvironmentSlot::ProgramResult)))));
1472        self.emit_ins(X86Instruction::store(OperandSize::S64, REGISTER_MAP[0], REGISTER_SCRATCH, X86IndirectAccess::Offset(std::mem::size_of::<u64>() as i32))); // result.return_value = R0;
1473        self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x31, REGISTER_SCRATCH, REGISTER_SCRATCH, None)); // REGISTER_SCRATCH ^= REGISTER_SCRATCH; // REGISTER_SCRATCH = 0;
1474        self.emit_ins(X86Instruction::jump_immediate(self.relative_to_anchor(ANCHOR_EPILOGUE, 5)));
1475
1476        // Handler for exceptions which report their pc
1477        self.set_anchor(ANCHOR_THROW_EXCEPTION);
1478        // Validate that we did not reach the instruction meter limit before the exception occured
1479        self.emit_validate_instruction_count(None);
1480        self.emit_ins(X86Instruction::jump_immediate(self.relative_to_anchor(ANCHOR_THROW_EXCEPTION_UNCHECKED, 5)));
1481
1482        // Handler for EbpfError::CallDepthExceeded
1483        self.set_anchor(ANCHOR_CALL_DEPTH_EXCEEDED);
1484        self.emit_set_exception_kind(EbpfError::CallDepthExceeded);
1485        self.emit_ins(X86Instruction::jump_immediate(self.relative_to_anchor(ANCHOR_THROW_EXCEPTION, 5)));
1486
1487        // Handler for EbpfError::CallOutsideTextSegment
1488        self.set_anchor(ANCHOR_CALL_REG_OUTSIDE_TEXT_SEGMENT);
1489        self.emit_set_exception_kind(EbpfError::CallOutsideTextSegment);
1490        self.emit_ins(X86Instruction::load(OperandSize::S64, RSP, REGISTER_SCRATCH, X86IndirectAccess::OffsetIndexShift(-8, RSP, 0)));
1491        self.emit_ins(X86Instruction::jump_immediate(self.relative_to_anchor(ANCHOR_THROW_EXCEPTION, 5)));
1492
1493        // Handler for EbpfError::DivideByZero
1494        self.set_anchor(ANCHOR_DIV_BY_ZERO);
1495        self.emit_set_exception_kind(EbpfError::DivideByZero);
1496        self.emit_ins(X86Instruction::jump_immediate(self.relative_to_anchor(ANCHOR_THROW_EXCEPTION, 5)));
1497
1498        // Handler for EbpfError::DivideOverflow
1499        self.set_anchor(ANCHOR_DIV_OVERFLOW);
1500        self.emit_set_exception_kind(EbpfError::DivideOverflow);
1501        self.emit_ins(X86Instruction::jump_immediate(self.relative_to_anchor(ANCHOR_THROW_EXCEPTION, 5)));
1502
1503        // See `ANCHOR_INTERNAL_FUNCTION_CALL_REG` for more details.
1504        self.set_anchor(ANCHOR_CALL_REG_UNSUPPORTED_INSTRUCTION);
1505        self.emit_ins(X86Instruction::load(OperandSize::S64, RSP, REGISTER_SCRATCH, X86IndirectAccess::OffsetIndexShift(-8, RSP, 0))); // Retrieve the current program counter from the stack
1506        self.emit_ins(X86Instruction::pop(REGISTER_MAP[0])); // Restore the clobbered REGISTER_MAP[0]
1507        // Fall through
1508
1509        // Handler for EbpfError::UnsupportedInstruction
1510        self.set_anchor(ANCHOR_CALL_UNSUPPORTED_INSTRUCTION);
1511        if self.config.enable_register_tracing {
1512            self.emit_ins(X86Instruction::call_immediate(self.relative_to_anchor(ANCHOR_TRACE, 5)));
1513        }
1514        self.emit_set_exception_kind(EbpfError::UnsupportedInstruction);
1515        self.emit_ins(X86Instruction::jump_immediate(self.relative_to_anchor(ANCHOR_THROW_EXCEPTION, 5)));
1516
1517        // Routine for external functions
1518        self.set_anchor(ANCHOR_EXTERNAL_FUNCTION_CALL);
1519        self.emit_ins(X86Instruction::push_immediate(OperandSize::S64, -1)); // Used as PC value in error case, acts as stack padding otherwise
1520        if self.config.enable_instruction_meter {
1521            self.emit_ins(X86Instruction::store(OperandSize::S64, REGISTER_INSTRUCTION_METER, REGISTER_PTR_TO_VM, X86IndirectAccess::Offset(self.slot_in_vm(RuntimeEnvironmentSlot::DueInsnCount)))); // *DueInsnCount = REGISTER_INSTRUCTION_METER;
1522        }
1523        self.emit_rust_call(Value::Register(REGISTER_SCRATCH), &[
1524            Argument { index: 5, value: Value::Register(ARGUMENT_REGISTERS[5]) },
1525            Argument { index: 4, value: Value::Register(ARGUMENT_REGISTERS[4]) },
1526            Argument { index: 3, value: Value::Register(ARGUMENT_REGISTERS[3]) },
1527            Argument { index: 2, value: Value::Register(ARGUMENT_REGISTERS[2]) },
1528            Argument { index: 1, value: Value::Register(ARGUMENT_REGISTERS[1]) },
1529            Argument { index: 0, value: Value::Register(REGISTER_PTR_TO_VM) },
1530        ], None);
1531        if self.config.enable_instruction_meter {
1532            self.emit_ins(X86Instruction::load(OperandSize::S64, REGISTER_PTR_TO_VM, REGISTER_INSTRUCTION_METER, X86IndirectAccess::Offset(self.slot_in_vm(RuntimeEnvironmentSlot::PreviousInstructionMeter)))); // REGISTER_INSTRUCTION_METER = *PreviousInstructionMeter;
1533        }
1534
1535        // Test if result indicates that an error occured
1536        self.emit_result_is_err(REGISTER_SCRATCH);
1537        self.emit_ins(X86Instruction::pop(REGISTER_SCRATCH));
1538        self.emit_ins(X86Instruction::conditional_jump_immediate(0x85, self.relative_to_anchor(ANCHOR_EPILOGUE, 6)));
1539        // Store Ok value in result register
1540        self.emit_ins(X86Instruction::lea(OperandSize::S64, REGISTER_PTR_TO_VM, REGISTER_SCRATCH, Some(X86IndirectAccess::Offset(self.slot_in_vm(RuntimeEnvironmentSlot::ProgramResult)))));
1541        self.emit_ins(X86Instruction::load(OperandSize::S64, REGISTER_SCRATCH, REGISTER_MAP[0], X86IndirectAccess::Offset(8)));
1542        self.emit_ins(X86Instruction::return_near());
1543
1544        // Routine for prologue of emit_internal_call()
1545        self.set_anchor(ANCHOR_INTERNAL_FUNCTION_CALL_PROLOGUE);
1546        self.emit_validate_instruction_count(None);
1547        self.emit_ins(X86Instruction::alu_immediate(OperandSize::S64, 0x81, 5, RSP, 8 * (SCRATCH_REGS + 1) as i64, None)); // alloca
1548        self.emit_ins(X86Instruction::store(OperandSize::S64, REGISTER_SCRATCH, RSP, X86IndirectAccess::OffsetIndexShift(0, RSP, 0))); // Save original REGISTER_SCRATCH
1549        self.emit_ins(X86Instruction::load(OperandSize::S64, RSP, REGISTER_SCRATCH, X86IndirectAccess::OffsetIndexShift(8 * (SCRATCH_REGS + 1) as i32, RSP, 0))); // Load return address
1550        for (i, reg) in REGISTER_MAP.iter().skip(FIRST_SCRATCH_REG).take(SCRATCH_REGS).enumerate() {
1551            self.emit_ins(X86Instruction::store(OperandSize::S64, *reg, RSP, X86IndirectAccess::OffsetIndexShift(8 * (SCRATCH_REGS - i + 1) as i32, RSP, 0))); // Push SCRATCH_REG
1552        }
1553        // Push the caller's frame pointer. The code to restore it is emitted at the end of emit_internal_call().
1554        self.emit_ins(X86Instruction::store(OperandSize::S64, REGISTER_MAP[FRAME_PTR_REG], RSP, X86IndirectAccess::OffsetIndexShift(8, RSP, 0)));
1555        self.emit_ins(X86Instruction::xchg(OperandSize::S64, REGISTER_SCRATCH, RSP, Some(X86IndirectAccess::OffsetIndexShift(0, RSP, 0)))); // Push return address and restore original REGISTER_SCRATCH
1556        // Increase env.call_depth
1557        let call_depth_access = X86IndirectAccess::Offset(self.slot_in_vm(RuntimeEnvironmentSlot::CallDepth));
1558        self.emit_ins(X86Instruction::alu_immediate(OperandSize::S64, 0x81, 0, REGISTER_PTR_TO_VM, 1, Some(call_depth_access))); // env.call_depth += 1;
1559        // If env.call_depth == self.config.max_call_depth, throw CallDepthExceeded
1560        self.emit_ins(X86Instruction::cmp_immediate(OperandSize::S32, REGISTER_PTR_TO_VM, self.config.max_call_depth as i64, Some(call_depth_access)));
1561        self.emit_ins(X86Instruction::conditional_jump_immediate(0x83, self.relative_to_anchor(ANCHOR_CALL_DEPTH_EXCEEDED, 6)));
1562        // Setup the frame pointer for the new frame. What we do depends on whether we're using dynamic or fixed frames.
1563        if !self.executable.get_sbpf_version().manual_stack_frame_bump() {
1564            // With fixed frames we start the new frame at the next fixed offset
1565            let num_frames = if self.executable.get_sbpf_version().stack_frame_gaps()
1566                && self.config.enable_stack_frame_gaps {
1567                2
1568            } else {
1569                1
1570            };
1571            let stack_frame_size = self.config.stack_frame_size as i64 * num_frames;
1572            self.emit_ins(X86Instruction::alu_immediate(OperandSize::S64, 0x81, 0, REGISTER_MAP[FRAME_PTR_REG], stack_frame_size, None)); // REGISTER_MAP[FRAME_PTR_REG] += stack_frame_size;
1573        }
1574        self.emit_ins(X86Instruction::return_near());
1575
1576        // Routine for emit_internal_call(Value::Register())
1577        // Inputs: Guest current pc in X86IndirectAccess::OffsetIndexShift(-16, RSP, 0), Guest target address in REGISTER_SCRATCH
1578        // Outputs: Guest current pc in X86IndirectAccess::OffsetIndexShift(-16, RSP, 0), Guest target pc in REGISTER_SCRATCH, Host target address in RIP
1579        self.set_anchor(ANCHOR_INTERNAL_FUNCTION_CALL_REG);
1580        self.emit_ins(X86Instruction::push(REGISTER_MAP[0], None));
1581        // Calculate offset relative to program_vm_addr
1582        self.emit_ins(X86Instruction::load_immediate(REGISTER_MAP[0], self.program_vm_addr as i64));
1583        self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x29, REGISTER_MAP[0], REGISTER_SCRATCH, None)); // guest_target_pc = guest_target_address - self.program_vm_addr;
1584        // Force alignment of guest_target_pc
1585        self.emit_ins(X86Instruction::alu_immediate(OperandSize::S64, 0x81, 4, REGISTER_SCRATCH, !(INSN_SIZE as i64 - 1), None)); // guest_target_pc &= !(INSN_SIZE - 1);
1586        // Bound check
1587        // if(guest_target_pc >= number_of_instructions * INSN_SIZE) throw CALL_OUTSIDE_TEXT_SEGMENT;
1588        let number_of_instructions = self.result.pc_section.len();
1589        self.emit_ins(X86Instruction::cmp_immediate(OperandSize::S64, REGISTER_SCRATCH, (number_of_instructions * INSN_SIZE) as i64, None)); // guest_target_pc.cmp(number_of_instructions * INSN_SIZE)
1590        self.emit_ins(X86Instruction::conditional_jump_immediate(0x83, self.relative_to_anchor(ANCHOR_CALL_REG_OUTSIDE_TEXT_SEGMENT, 6)));
1591        // Calculate the guest_target_pc (dst / INSN_SIZE) to update REGISTER_INSTRUCTION_METER
1592        // and as target_pc for potential ANCHOR_CALL_REG_UNSUPPORTED_INSTRUCTION
1593        let shift_amount = INSN_SIZE.trailing_zeros();
1594        debug_assert_eq!(INSN_SIZE, 1 << shift_amount);
1595        self.emit_ins(X86Instruction::alu_immediate(OperandSize::S64, 0xc1, 5, REGISTER_SCRATCH, shift_amount as i64, None)); // guest_target_pc /= INSN_SIZE;
1596        // Load host_target_address offset from self.result.pc_section
1597        self.emit_ins(X86Instruction::load_immediate(REGISTER_MAP[0], self.result.pc_section.as_ptr() as i64)); // host_target_address = self.result.pc_section;
1598        self.emit_ins(X86Instruction::load(OperandSize::S32, REGISTER_MAP[0], REGISTER_MAP[0], X86IndirectAccess::OffsetIndexShift(0, REGISTER_SCRATCH, 2))); // host_target_address = self.result.pc_section[guest_target_pc];
1599        // Check destination is valid
1600        self.emit_ins(X86Instruction::test_immediate(OperandSize::S32, REGISTER_MAP[0], 1 << 31, None)); // host_target_address & (1 << 31)
1601        self.emit_ins(X86Instruction::conditional_jump_immediate(0x85, self.relative_to_anchor(ANCHOR_CALL_REG_UNSUPPORTED_INSTRUCTION, 6))); // If host_target_address & (1 << 31) != 0, throw UnsupportedInstruction
1602        self.emit_ins(X86Instruction::alu_immediate(OperandSize::S32, 0x81, 4, REGISTER_MAP[0], i32::MAX as i64, None)); // host_target_address &= (1 << 31) - 1;
1603        // A version of `self.emit_profile_instruction_count(None);` which reads self.pc from the stack
1604        self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x2b, REGISTER_INSTRUCTION_METER, RSP, Some(X86IndirectAccess::OffsetIndexShift(-8, RSP, 0)))); // instruction_meter -= guest_current_pc;
1605        self.emit_ins(X86Instruction::alu_immediate(OperandSize::S64, 0x81, 5, REGISTER_INSTRUCTION_METER, 1, None)); // instruction_meter -= 1;
1606        self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x01, REGISTER_SCRATCH, REGISTER_INSTRUCTION_METER, None)); // instruction_meter += guest_target_pc;
1607        // Offset host_target_address by self.result.text_section
1608        self.emit_ins(X86Instruction::mov_mmx(OperandSize::S64, REGISTER_SCRATCH, MM0));
1609        self.emit_ins(X86Instruction::load_immediate(REGISTER_SCRATCH, self.result.text_section.as_ptr() as i64)); // REGISTER_SCRATCH = self.result.text_section;
1610        self.emit_ins(X86Instruction::alu(OperandSize::S64, 0x01, REGISTER_SCRATCH, REGISTER_MAP[0], None)); // host_target_address += self.result.text_section;
1611        self.emit_ins(X86Instruction::mov_mmx(OperandSize::S64, MM0, REGISTER_SCRATCH));
1612        // Restore the clobbered REGISTER_MAP[0]
1613        self.emit_ins(X86Instruction::xchg(OperandSize::S64, REGISTER_MAP[0], RSP, Some(X86IndirectAccess::OffsetIndexShift(0, RSP, 0)))); // Swap REGISTER_MAP[0] and host_target_address
1614        self.emit_ins(X86Instruction::return_near()); // Tail call to host_target_address
1615
1616        // Translates a vm memory address to a host memory address
1617        let lower_key = self.immediate_value_key as i32 as i64;
1618        for (anchor_base, len) in &[
1619            (0, 1i32), (0, 2i32), (0, 4i32), (0, 8i32),
1620            (4, 1i32), (4, 2i32), (4, 4i32), (4, 8i32),
1621            (8, 1i32), (8, 2i32), (8, 4i32), (8, 8i32),
1622        ] {
1623            let target_offset = *anchor_base + len.trailing_zeros() as usize;
1624            self.set_anchor(ANCHOR_TRANSLATE_MEMORY_ADDRESS + target_offset);
1625            // skip over the pc slot pushed by the caller, we'll pop it before returning
1626            self.emit_ins(X86Instruction::alu_immediate(OperandSize::S64, 0x81, 5, RSP, 8, None)); // RSP -= 8
1627            // call MemoryMapping::(load|store) storing the result in RuntimeEnvironmentSlot::ProgramResult
1628            if *anchor_base == 0 { // AccessType::Load
1629                let load = match len {
1630                    1 => MemoryMapping::load::<u8> as *const u8 as i64,
1631                    2 => MemoryMapping::load::<u16> as *const u8 as i64,
1632                    4 => MemoryMapping::load::<u32> as *const u8 as i64,
1633                    8 => MemoryMapping::load::<u64> as *const u8 as i64,
1634                    _ => unreachable!()
1635                };
1636                self.emit_rust_call(Value::Constant64(load, false), &[
1637                    Argument { index: 2, value: Value::Register(REGISTER_SCRATCH) }, // Specify first as the src register could be overwritten by other arguments
1638                    Argument { index: 3, value: Value::Constant64(0, false) }, // self.pc is set later
1639                    Argument { index: 1, value: Value::RegisterPlusConstant32(REGISTER_PTR_TO_VM, self.slot_in_vm(RuntimeEnvironmentSlot::MemoryMapping), false) },
1640                    Argument { index: 0, value: Value::RegisterPlusConstant32(REGISTER_PTR_TO_VM, self.slot_in_vm(RuntimeEnvironmentSlot::ProgramResult), false) },
1641                ], None);
1642            } else { // AccessType::Store
1643                if *anchor_base == 8 {
1644                    // Second half of emit_sanitized_load_immediate(stack_slot_of_value_to_store, constant)
1645                    self.emit_ins(X86Instruction::alu_immediate(OperandSize::S64, 0x81, 0, RSP, lower_key, Some(X86IndirectAccess::OffsetIndexShift(-80, RSP, 0))));
1646                }
1647                let store = match len {
1648                    1 => MemoryMapping::store::<u8> as *const u8 as i64,
1649                    2 => MemoryMapping::store::<u16> as *const u8 as i64,
1650                    4 => MemoryMapping::store::<u32> as *const u8 as i64,
1651                    8 => MemoryMapping::store::<u64> as *const u8 as i64,
1652                    _ => unreachable!()
1653                };
1654                self.emit_rust_call(Value::Constant64(store, false), &[
1655                    Argument { index: 3, value: Value::Register(REGISTER_SCRATCH) }, // Specify first as the src register could be overwritten by other arguments
1656                    Argument { index: 2, value: Value::RegisterIndirect(RSP, -8, false) },
1657                    Argument { index: 4, value: Value::Constant64(0, false) }, // self.pc is set later
1658                    Argument { index: 1, value: Value::RegisterPlusConstant32(REGISTER_PTR_TO_VM, self.slot_in_vm(RuntimeEnvironmentSlot::MemoryMapping), false) },
1659                    Argument { index: 0, value: Value::RegisterPlusConstant32(REGISTER_PTR_TO_VM, self.slot_in_vm(RuntimeEnvironmentSlot::ProgramResult), false) },
1660                ], None);
1661            }
1662
1663            // Throw error if the result indicates one
1664            self.emit_result_is_err(REGISTER_SCRATCH);
1665            self.emit_ins(X86Instruction::pop(REGISTER_SCRATCH)); // REGISTER_SCRATCH = pc
1666            self.emit_ins(X86Instruction::conditional_jump_immediate(0x85, self.relative_to_anchor(ANCHOR_THROW_EXCEPTION, 6)));
1667
1668            if *anchor_base == 0 { // AccessType::Load
1669                // unwrap() the result into REGISTER_SCRATCH
1670                self.emit_ins(X86Instruction::load(OperandSize::S64, REGISTER_PTR_TO_VM, REGISTER_SCRATCH, X86IndirectAccess::Offset(self.slot_in_vm(RuntimeEnvironmentSlot::ProgramResult) + std::mem::size_of::<u64>() as i32)));
1671            }
1672
1673            self.emit_ins(X86Instruction::return_near());
1674        }
1675    }
1676
1677    fn set_anchor(&mut self, anchor: usize) {
1678        self.anchors[anchor] = unsafe { self.result.text_section.as_ptr().add(self.offset_in_text_section) };
1679    }
1680
1681    // instruction_length = 5 (Unconditional jump / call)
1682    // instruction_length = 6 (Conditional jump)
1683    fn relative_to_anchor(&self, anchor: usize, instruction_length: usize) -> i32 {
1684        let instruction_end = unsafe { self.result.text_section.as_ptr().add(self.offset_in_text_section).add(instruction_length) };
1685        let destination = self.anchors[anchor];
1686        debug_assert!(!destination.is_null());
1687        (unsafe { destination.offset_from(instruction_end) } as i32) // Relative jump
1688    }
1689
1690    fn relative_to_target_pc(&mut self, target_pc: usize, instruction_length: usize) -> i32 {
1691        let instruction_end = unsafe { self.result.text_section.as_ptr().add(self.offset_in_text_section).add(instruction_length) };
1692        let destination = if self.result.pc_section[target_pc] != 0 {
1693            // Backward jump
1694            &self.result.text_section[self.result.pc_section[target_pc] as usize & (i32::MAX as u32 as usize)] as *const u8
1695        } else {
1696            // Forward jump, needs relocation
1697            self.text_section_jumps.push(Jump { location: unsafe { instruction_end.sub(4) }, target_pc });
1698            return 0;
1699        };
1700        debug_assert!(!destination.is_null());
1701        (unsafe { destination.offset_from(instruction_end) } as i32) // Relative jump
1702    }
1703
1704    fn resolve_jumps(&mut self) {
1705        // Relocate forward jumps
1706        for jump in &self.text_section_jumps {
1707            let destination = &self.result.text_section[self.result.pc_section[jump.target_pc] as usize & (i32::MAX as u32 as usize)] as *const u8;
1708            let offset_value =
1709                unsafe { destination.offset_from(jump.location) } as i32 // Relative jump
1710                - mem::size_of::<i32>() as i32; // Jump from end of instruction
1711            unsafe { ptr::write_unaligned(jump.location as *mut i32, offset_value); }
1712        }
1713    }
1714}