#![allow(clippy::fn_to_numeric_cast)]
use crate::{
do_load_imm_var, do_opt_imm_var, EcallHandler, JitContext, RiscOperand, RiscRegister,
TraceChunkHeader, TraceCollector,
};
use dynasmrt::{
dynasm,
x64::{Assembler, Rq},
AssemblyOffset, DynamicLabel, DynasmApi, DynasmLabelApi,
};
use hashbrown::HashMap;
use std::{
mem::offset_of,
ops::{Deref, DerefMut},
};
mod instruction_impl;
#[cfg(test)]
mod tests;
mod transpiler;
const TEMP_A: u8 = Rq::RBX as u8;
const TEMP_B: u8 = Rq::RBP as u8;
const NUM_MEM_READS: u8 = Rq::R8 as u8;
const TAIL_START: u8 = Rq::R9 as u8;
const MEMORY_PTR: u8 = Rq::R10 as u8;
const CONTEXT: u8 = Rq::R12 as u8;
const JUMP_TABLE: u8 = Rq::R13 as u8;
const TRACE_BUF: u8 = Rq::R14 as u8;
const GLOBAL_CLK: u8 = Rq::RSI as u8;
const CLOCK_OR_SAVED_STACK_PTR: u8 = Rq::R15 as u8;
const PC_OFFSET: i32 = offset_of!(JitContext, pc) as i32;
const CLK_OFFSET: i32 = offset_of!(JitContext, clk) as i32;
const GLOBAL_CLK_OFFSET: i32 = offset_of!(JitContext, global_clk) as i32;
const MEMORY_PTR_OFFSET: i32 = offset_of!(JitContext, memory) as i32;
const REGISTERS_OFFSET: i32 = offset_of!(JitContext, registers) as i32;
const NUM_MEM_READS_OFFSET: i32 = offset_of!(TraceChunkHeader, num_mem_reads) as i32;
const TAIL_START_OFFSET: i32 = std::mem::size_of::<TraceChunkHeader>() as i32;
#[derive(Copy, Clone, Debug, PartialEq, Eq)]
enum Location {
Zero,
Xmm(u8, i8),
Gpr(u8),
}
const REG_LOOKUP: [Location; 32] = [
Location::Zero, Location::Xmm(0, 0), Location::Xmm(1, 0), Location::Xmm(0, 1), Location::Xmm(1, 1), Location::Xmm(2, 0), Location::Xmm(3, 0),
Location::Xmm(4, 0),
Location::Xmm(5, 0), Location::Xmm(6, 0),
Location::Gpr(Rq::R11 as u8), Location::Xmm(7, 0),
Location::Xmm(8, 0),
Location::Xmm(9, 0),
Location::Xmm(10, 0),
Location::Xmm(11, 0),
Location::Xmm(12, 0),
Location::Xmm(13, 0),
Location::Xmm(14, 0), Location::Xmm(2, 1),
Location::Xmm(3, 1),
Location::Xmm(4, 1),
Location::Xmm(5, 1),
Location::Xmm(6, 1),
Location::Xmm(7, 1),
Location::Xmm(8, 1),
Location::Xmm(9, 1),
Location::Xmm(10, 1),
Location::Xmm(11, 1), Location::Xmm(12, 1),
Location::Xmm(13, 1),
Location::Xmm(14, 1),
];
pub struct TranspilerBackend {
inner: Assembler,
jump_table: Vec<usize>,
memory_size: usize,
max_trace_size: u64,
has_instructions: bool,
pc_base: u64,
pc_start: u64,
ecall_handler: EcallHandler,
control_flow_instruction_inserted: bool,
instruction_started: bool,
may_early_exit: bool,
branch_generated: bool,
clk_bump: u64,
pc_current: u64,
reg_values: HashMap<RiscRegister, u64>,
labels: HashMap<usize, DynamicLabel>,
program_size: usize,
}
impl TraceCollector for TranspilerBackend {
fn trace_registers(&mut self) {
for reg in RiscRegister::all_registers().iter() {
let value_byte_offset = *reg as u32 * 8;
match Self::get_xmm_index(*reg) {
Location::Zero => {
dynasm! {
self;
.arch x64;
mov QWORD [Rq(TRACE_BUF) + value_byte_offset as i32], 0
};
}
Location::Xmm(xmm_index, xmm_offset) => {
dynasm! {
self;
.arch x64;
pextrq [Rq(TRACE_BUF) + value_byte_offset as i32], Rx(xmm_index), xmm_offset
};
}
Location::Gpr(gpr_index) => {
dynasm! {
self;
.arch x64;
mov QWORD [Rq(TRACE_BUF) + value_byte_offset as i32], Rq(gpr_index)
};
}
}
}
}
fn trace_mem_value(&mut self, rs1: RiscRegister, imm: u64) {
const IS_UNCONSTRAINED_OFFSET: i32 = offset_of!(JitContext, is_unconstrained) as i32;
self.emit_risc_operand_load(rs1.into(), TEMP_A);
dynasm! {
self;
.arch x64;
mov rcx, QWORD [Rq(CONTEXT) + IS_UNCONSTRAINED_OFFSET];
cmp rcx, 1;
je >done
}
do_opt_imm_var!(self, add, TEMP_A, imm);
dynasm! {
self;
.arch x64;
and Rq(TEMP_A), -8;
lea Rq(TEMP_A), [Rq(MEMORY_PTR) + Rq(TEMP_A) * 2];
movdqu xmm15, [Rq(TEMP_A)];
movdqu [Rq(TAIL_START)], xmm15;
mov rdx, Rq(CLOCK_OR_SAVED_STACK_PTR);
add rdx, 1;
mov [Rq(TEMP_A)], rdx;
add Rq(NUM_MEM_READS), 1;
add Rq(TAIL_START), 16;
done:
}
}
fn trace_pc_start(&mut self) {
const PC_START_OFFSET: i32 = offset_of!(TraceChunkHeader, pc_start) as i32;
self.load_pc_into_register(TEMP_A);
dynasm! {
self;
.arch x64;
mov [Rq(TRACE_BUF) + PC_START_OFFSET], Rq(TEMP_A)
}
}
fn trace_clk_start(&mut self) {
const CLK_START_OFFSET: i32 = offset_of!(TraceChunkHeader, clk_start) as i32;
dynasm! {
self;
.arch x64;
mov Rq(TEMP_A), QWORD [Rq(CONTEXT) + CLK_OFFSET];
mov [Rq(TRACE_BUF) + CLK_START_OFFSET], Rq(TEMP_A)
}
}
fn trace_clk_end(&mut self) {
const CLK_END_OFFSET: i32 = offset_of!(TraceChunkHeader, clk_end) as i32;
const GLOBAL_CLK_END_OFFSET: i32 = offset_of!(TraceChunkHeader, global_clk_end) as i32;
dynasm! {
self;
.arch x64;
mov Rq(TEMP_A), [Rq(CONTEXT) + CLK_OFFSET];
mov [Rq(TRACE_BUF) + CLK_END_OFFSET], Rq(TEMP_A);
mov Rq(TEMP_B), [Rq(CONTEXT) + GLOBAL_CLK_OFFSET];
mov [Rq(TRACE_BUF) + GLOBAL_CLK_END_OFFSET], Rq(TEMP_B)
}
}
}
impl TranspilerBackend {
fn tracing(&self) -> bool {
self.max_trace_size > 0
}
fn exit_if_trace_exceeds(&mut self, max_trace_size: u64) {
if !self.tracing() {
return;
}
let threshold_mem_reads = max_trace_size;
do_load_imm_var!(self, TEMP_B, threshold_mem_reads);
dynasm! {
self;
.arch x64;
cmp r8, Rq(TEMP_B);
jb >done }
self.update_pc(TEMP_A, self.pc_current + 4);
dynasm! {
self;
.arch x64;
jmp ->exit;
done:
}
}
fn prologue(&mut self) {
let jump_table_offset = offset_of!(JitContext, jump_table) as i32;
let trace_buf_offset = offset_of!(JitContext, trace_buf) as i32;
dynasm! {
self;
.arch x64;
push Rq(TEMP_A);
push Rq(TEMP_B);
push Rq(CONTEXT);
push Rq(JUMP_TABLE);
push Rq(TRACE_BUF);
push Rq(CLOCK_OR_SAVED_STACK_PTR);
mov Rq(JUMP_TABLE), [rdi + jump_table_offset];
mov Rq(TRACE_BUF), [rdi + trace_buf_offset];
mov Rq(CONTEXT), rdi
};
self.load_registers_from_context();
self.load_memory_ptr();
if self.tracing() {
self.trace_pc_start();
self.trace_clk_start();
self.trace_registers();
self.hoist_trace_pointers();
}
self.hoist_clock();
self.jump_to_pc();
}
fn epilogue(&mut self) {
if !self.has_instructions {
panic!(
"No instructions were emitted,
cannot finalize as this will break assumptions made in the jump table."
);
}
dynasm! {
self;
.arch x64;
->exit:
}
self.write_back_clock();
if self.tracing() {
self.write_back_trace_pointers();
self.trace_clk_end();
}
self.save_registers_to_context();
dynasm! {
self;
.arch x64;
pop Rq(CLOCK_OR_SAVED_STACK_PTR);
pop Rq(TRACE_BUF);
pop Rq(JUMP_TABLE);
pop Rq(CONTEXT);
pop Rq(TEMP_B);
pop Rq(TEMP_A);
ret
};
for (index, label) in &self.labels {
self.inner
.labels_mut()
.define_dynamic(*label, AssemblyOffset(self.jump_table[*index]))
.expect("define dynamic label");
}
}
fn save_registers_to_context(&mut self) {
for reg in RiscRegister::all_registers().iter() {
let value_byte_offset = *reg as u32 * 8;
match Self::get_xmm_index(*reg) {
Location::Zero => {
dynasm! {
self;
.arch x64;
mov QWORD [Rq(CONTEXT) + REGISTERS_OFFSET + value_byte_offset as i32], 0
};
}
Location::Xmm(xmm_index, xmm_offset) => {
dynasm! {
self;
.arch x64;
pextrq [Rq(CONTEXT) + REGISTERS_OFFSET + value_byte_offset as i32], Rx(xmm_index), xmm_offset
};
}
Location::Gpr(gpr_index) => {
dynasm! {
self;
.arch x64;
mov QWORD [Rq(CONTEXT) + REGISTERS_OFFSET + value_byte_offset as i32], Rq(gpr_index)
};
}
}
}
}
fn load_registers_from_context(&mut self) {
for reg in RiscRegister::all_registers().iter() {
let value_byte_offset = *reg as u32 * 8;
match Self::get_xmm_index(*reg) {
Location::Zero => (),
Location::Xmm(xmm_index, xmm_offset) => {
dynasm! {
self;
.arch x64;
pinsrq Rx(xmm_index), [Rq(CONTEXT) + REGISTERS_OFFSET + value_byte_offset as i32], xmm_offset
};
}
Location::Gpr(gpr_index) => {
dynasm! {
self;
.arch x64;
mov Rq(gpr_index), QWORD [Rq(CONTEXT) + REGISTERS_OFFSET + value_byte_offset as i32]
};
}
}
}
}
fn emit_risc_operand_load(&mut self, op: RiscOperand, dst: u8) {
match op {
RiscOperand::Register(reg) => match Self::get_xmm_index(reg) {
Location::Zero => {
dynasm! {
self;
.arch x64;
mov Rq(dst), 0_i32 };
}
Location::Xmm(xmm_index, xmm_offset) => {
if xmm_offset == 0 {
dynasm! {
self;
.arch x64;
movq Rq(dst), Rx(xmm_index) };
} else {
dynasm! {
self;
.arch x64;
pextrq Rq(dst), Rx(xmm_index), xmm_offset };
}
}
Location::Gpr(gpr_index) => {
if gpr_index != dst {
dynasm! {
self;
.arch x64;
mov Rq(dst), Rq(gpr_index)
};
}
}
},
RiscOperand::Immediate(imm) => {
dynasm! {
self;
.arch x64;
mov Rq(dst), imm
};
}
}
}
#[inline]
fn emit_risc_register_store(&mut self, src_or_temp: u8, imm: Option<u64>, dst: RiscRegister) {
match (Self::get_xmm_index(dst), imm) {
(Location::Zero, _) => (),
(Location::Xmm(xmm_index, xmm_offset), None) => {
self.reg_values.remove(&dst);
dynasm! {
self;
.arch x64;
pinsrq Rx(xmm_index), Rq(src_or_temp), xmm_offset
};
}
(Location::Xmm(xmm_index, xmm_offset), Some(imm)) => {
self.reg_values.insert(dst, imm);
do_load_imm_var!(self, src_or_temp, imm);
dynasm! {
self;
.arch x64;
pinsrq Rx(xmm_index), Rq(src_or_temp), xmm_offset
};
}
(Location::Gpr(gpr_index), None) => {
self.reg_values.remove(&dst);
if gpr_index != src_or_temp {
dynasm! {
self;
.arch x64;
mov Rq(gpr_index), Rq(src_or_temp)
};
}
}
(Location::Gpr(gpr_index), Some(imm)) => {
self.reg_values.insert(dst, imm);
do_load_imm_var!(self, gpr_index, imm);
}
}
}
#[inline]
const fn get_xmm_index(reg: RiscRegister) -> Location {
REG_LOOKUP[reg as usize]
}
#[inline]
fn call_extern_fn_raw(&mut self, fn_ptr: usize) {
self.save_registers_to_context();
if self.tracing() {
self.write_back_trace_pointers();
}
self.write_back_clock();
dynasm! {
self;
.arch x64;
mov Rq(CLOCK_OR_SAVED_STACK_PTR), rsp;
lea rsp, [rsp - 8]; mov rax, rsp; and rax, 15; sub rsp, rax;
mov rax, QWORD fn_ptr as _;
call rax;
mov rsp, Rq(CLOCK_OR_SAVED_STACK_PTR)
}
if self.tracing() {
self.hoist_trace_pointers();
}
self.hoist_clock();
self.load_memory_ptr();
self.load_registers_from_context();
}
#[inline]
fn load_pc_into_register(&mut self, dst: u8) {
let pc_offset = offset_of!(JitContext, pc) as i32;
dynasm! {
self;
.arch x64;
mov Rq(dst), QWORD [Rq(CONTEXT) + pc_offset]
}
}
#[inline]
fn load_memory_ptr(&mut self) {
dynasm! {
self;
.arch x64;
mov Rq(MEMORY_PTR), QWORD [Rq(CONTEXT) + MEMORY_PTR_OFFSET]
}
}
#[inline]
fn hoist_trace_pointers(&mut self) {
dynasm! {
self;
.arch x64;
mov Rq(NUM_MEM_READS), QWORD [Rq(TRACE_BUF) + NUM_MEM_READS_OFFSET];
lea Rq(TEMP_B), [Rq(NUM_MEM_READS) * 8];
lea Rq(TAIL_START), [Rq(TRACE_BUF) + Rq(TEMP_B) * 2 + TAIL_START_OFFSET]
}
}
#[inline]
fn write_back_trace_pointers(&mut self) {
dynasm! {
self;
.arch x64;
mov QWORD [Rq(TRACE_BUF) + NUM_MEM_READS_OFFSET], Rq(NUM_MEM_READS)
}
}
#[inline]
fn hoist_clock(&mut self) {
let global_clk_offset = offset_of!(JitContext, global_clk) as i32;
dynasm! {
self;
.arch x64;
mov Rq(CLOCK_OR_SAVED_STACK_PTR), QWORD [Rq(CONTEXT) + CLK_OFFSET];
mov Rq(GLOBAL_CLK), QWORD [Rq(CONTEXT) + global_clk_offset]
}
}
#[inline]
fn write_back_clock(&mut self) {
let global_clk_offset = offset_of!(JitContext, global_clk) as i32;
dynasm! {
self;
.arch x64;
mov QWORD [Rq(CONTEXT) + CLK_OFFSET], Rq(CLOCK_OR_SAVED_STACK_PTR);
mov QWORD [Rq(CONTEXT) + global_clk_offset], Rq(GLOBAL_CLK)
}
}
#[inline]
#[cfg(test)]
fn bump_pc(&mut self, amt: u32) {
let pc_offset = offset_of!(JitContext, pc) as i32;
dynasm! {
self;
.arch x64;
add QWORD [Rq(CONTEXT) + pc_offset], amt as i32
}
}
#[inline]
fn update_pc(&mut self, temp_reg: u8, pc: u64) {
do_load_imm_var!(self, temp_reg, pc);
dynasm! {
self;
.arch x64;
mov QWORD [Rq(CONTEXT) + PC_OFFSET], Rq(temp_reg)
}
}
#[inline]
fn label_for_pc(&mut self, target: u64) -> Option<DynamicLabel> {
if target < self.pc_base
|| target >= self.pc_base + self.program_size as u64 * 4
|| (!target.is_multiple_of(4))
{
return None;
}
let index = (target - self.pc_base) as usize / 4;
if let Some(label) = self.labels.get(&index) {
return Some(*label);
}
let label = self.inner.new_dynamic_label();
self.labels.insert(index, label);
Some(label)
}
#[inline]
fn jump_to_pc(&mut self) {
self.load_pc_into_register(TEMP_A);
let pc_base = self.pc_base as i32;
dynasm! {
self;
.arch x64;
cmp Rq(TEMP_A), 1;
je ->exit;
sub Rq(TEMP_A), pc_base;
shr Rq(TEMP_A), 2;
mov Rq(TEMP_B), QWORD [Rq(JUMP_TABLE) + Rq(TEMP_A) * 8];
jmp Rq(TEMP_B)
}
}
fn bump_clk(&mut self) {
let is_unconstrained_offset = offset_of!(JitContext, is_unconstrained) as i32;
let clk_bump = self.clk_bump as i32;
dynasm! {
self;
.arch x64;
add Rq(CLOCK_OR_SAVED_STACK_PTR), clk_bump;
mov Rq(TEMP_A), QWORD [Rq(CONTEXT) + is_unconstrained_offset];
xor Rq(TEMP_A), 1;
add Rq(GLOBAL_CLK), Rq(TEMP_A)
}
}
fn end_branch(&mut self, jump_target: Option<u64>) {
self.branch_generated = true;
if self.may_early_exit {
self.exit_if_trace_exceeds(self.max_trace_size);
}
let mut handled = false;
if let Some(target_pc) = jump_target {
if let Some(label) = self.label_for_pc(target_pc) {
dynasm! {
self;
.arch x64;
jmp =>label
}
handled = true;
}
}
if !handled {
self.jump_to_pc();
}
}
}
impl Deref for TranspilerBackend {
type Target = Assembler;
fn deref(&self) -> &Self::Target {
&self.inner
}
}
impl DerefMut for TranspilerBackend {
fn deref_mut(&mut self) -> &mut Self::Target {
&mut self.inner
}
}
#[cfg(not(target_feature = "sse"))]
compile_error!("SSE is required for the x86 backend");
extern "C" fn ecallk(ctx: *mut JitContext) -> u64 {
let ctx = unsafe { &mut *ctx };
eprintln!("dummy ecall handler called with code: 0x{:x}", ctx.registers[5]);
if ctx.registers[5] == 0 {
ctx.pc = 0;
} else {
ctx.pc += 4;
}
ctx.clk += 256;
0
}