#[macro_use]
mod macros;
use crate::assembler::{Assembler, Jumper};
use crate::config::Config;
use crate::generator::{FuncletType, Generator};
use crate::symbol::Loc;
use crate::utils::{align_stack, reg, Reg};
use anyhow::Result;
fn hi(x: u32) -> u32 {
if x & 0x0800 != 0 {
(x >> 12) + 1
} else {
x >> 12
}
}
fn lo(x: u32) -> u32 {
x & 0x0fff
}
pub struct RiscV {
a: Assembler,
}
impl RiscV {
const zero: u8 = 0;
const ra: u8 = 1;
const sp: u8 = 2;
const t0: u8 = 5;
const t1: u8 = 6;
const t2: u8 = 7;
const a0: u8 = 10;
const a1: u8 = 11;
const a2: u8 = 12;
const a3: u8 = 13;
const ft0: u8 = 0;
const ft1: u8 = 1;
const ft2: u8 = 2;
const ft3: u8 = 3;
const ft4: u8 = 4;
const ft5: u8 = 5;
const ft6: u8 = 6;
const ft7: u8 = 7;
const fs0: u8 = 8;
const fs1: u8 = 9;
const fa0: u8 = 10;
const fa1: u8 = 11;
const fa2: u8 = 12;
const fa3: u8 = 13;
const fa4: u8 = 14;
const fa5: u8 = 15;
const fa6: u8 = 16;
const fa7: u8 = 17;
const fs2: u8 = 18;
const fs3: u8 = 19;
const fs4: u8 = 20;
const fs5: u8 = 21;
const fs6: u8 = 22;
const fs7: u8 = 23;
const fs8: u8 = 24;
const fs9: u8 = 25;
const fs10: u8 = 26;
const fs11: u8 = 27;
const ft8: u8 = 28;
const ft9: u8 = 29;
const ft10: u8 = 30;
const ft11: u8 = 31;
}
const FMAP: [u8; 30] = [
RiscV::fa2,
RiscV::fa3,
RiscV::fa4,
RiscV::fa5,
RiscV::fa6,
RiscV::fa7,
RiscV::ft0,
RiscV::ft1,
RiscV::ft2,
RiscV::ft3,
RiscV::ft4,
RiscV::ft5,
RiscV::ft6,
RiscV::ft7,
RiscV::ft8,
RiscV::ft9, RiscV::ft10, RiscV::ft11, RiscV::fs0,
RiscV::fs1,
RiscV::fs2,
RiscV::fs3,
RiscV::fs4,
RiscV::fs5,
RiscV::fs6,
RiscV::fs7,
RiscV::fs8,
RiscV::fs9,
RiscV::fs10,
RiscV::fs11,
];
fn Ï•(r: Reg) -> u8 {
match r {
Reg::Ret | Reg::Left => RiscV::fa0,
Reg::Temp | Reg::Right => RiscV::fa1,
Reg::Gen(dst) => FMAP[dst as usize],
Reg::Static(..) => panic!("passing static registers to codegen"),
}
}
const MEM: u8 = RiscV::fs0; const STATES: u8 = RiscV::fs1; const IDX: u8 = RiscV::fs2; const PARAMS: u8 = RiscV::fs3;
const STACK: u8 = RiscV::sp;
impl RiscV {
pub fn new(_config: Config) -> RiscV {
RiscV {
a: Assembler::new(),
}
}
fn reg_size(&self) -> u32 {
8
}
fn append_quad(&mut self, u: u64) {
self.a.append_quad(u);
}
fn apply_jumps(&mut self) {
self.a.apply_jumps();
}
fn jump(&mut self, label: &str, code: u32, f: Jumper) {
self.a.jump(label, code, f)
}
fn emit(&mut self, w: u32) {
self.a.append_word(w);
}
fn load_float(&mut self, d: u8, base: u8, offset: u32) {
if offset < 2048 {
self.emit(rvv! {fld f(d), x(base), offset});
} else {
self.li(Self::t0, offset as i32);
self.emit(rvv! {add x(Self::t0), x(Self::t0), x(base)});
self.emit(rvv! {fld f(d), x(Self::t0), 0});
}
}
fn save_float(&mut self, d: u8, base: u8, offset: u32) {
if offset < 2048 {
self.emit(rvv! {fsd f(d), x(base), offset});
} else {
self.li(Self::t0, offset as i32);
self.emit(rvv! {add x(Self::t0), x(Self::t0), x(base)});
self.emit(rvv! {fsd f(d), x(Self::t0), 0});
}
}
fn load_complex(&mut self, xd: Reg, yd: Reg, base: u8, offset: u32) {
if offset < 2040 {
self.emit(rvv! {fld f(Ï•(xd)), x(base), offset});
self.emit(rvv! {fld f(Ï•(yd)), x(base), offset + 8});
} else {
self.li(Self::t0, offset as i32);
self.emit(rvv! {add x(Self::t0), x(Self::t0), x(base)});
self.emit(rvv! {fld f(Ï•(xd)), x(Self::t0), 0});
self.emit(rvv! {fld f(Ï•(yd)), x(Self::t0), 8});
}
}
fn save_complex(&mut self, xs: Reg, ys: Reg, base: u8, offset: u32) {
if offset < 2040 {
self.emit(rvv! {fsd f(Ï•(xs)), x(base), offset});
self.emit(rvv! {fsd f(Ï•(ys)), x(base), offset + 8});
} else {
self.li(Self::t0, offset as i32);
self.emit(rvv! {add x(Self::t0), x(Self::t0), x(base)});
self.emit(rvv! {fsd f(Ï•(xs)), x(Self::t0), 0});
self.emit(rvv! {fsd f(Ï•(ys)), x(Self::t0), 8});
}
}
fn load_int(&mut self, d: u8, base: u8, offset: u32) {
if offset < 2048 {
self.emit(rvv! {ld x(d), x(base), offset});
} else {
self.li(Self::t0, offset as i32);
self.emit(rvv! {add x(Self::t0), x(Self::t0), x(base)});
self.emit(rvv! {ld x(d), x(Self::t0), 0});
}
}
fn li(&mut self, dst: u8, val: i32) {
if !(-2048..2048).contains(&val) {
self.emit(rvv! {lui x(dst), hi(val as u32)});
self.emit(rvv! {addi x(dst), x(dst), lo(val as u32)});
} else {
self.emit(rvv! {addi x(dst), x(Self::zero), lo(val as u32)});
}
}
fn sub_stack(&mut self, size: u32) {
if size < 2048 {
self.emit(rvv! {addi x(Self::sp), x(Self::sp), -(size as i32)});
} else {
self.li(Self::t0, -(size as i32));
self.emit(rvv! {add x(Self::sp), x(Self::sp), x(Self::t0)});
}
}
fn add_stack(&mut self, size: u32) {
if size < 2048 {
self.emit(rvv! {addi x(Self::sp), x(Self::sp), size});
} else {
self.li(Self::t0, size as i32);
self.emit(rvv! {add x(Self::sp), x(Self::sp), x(Self::t0)});
}
}
}
impl Generator for RiscV {
fn bytes(&mut self) -> Vec<u8> {
self.a.bytes()
}
fn three_address(&self) -> bool {
true
}
fn count_shadows(&self) -> u8 {
14
}
fn support_funclet(&self) -> FuncletType {
FuncletType::None
}
fn seal(&mut self) {
self.apply_jumps();
}
fn align(&mut self) {}
fn set_label(&mut self, label: &str) {
self.a.set_label(label);
}
fn branch(&mut self, label: &str) {
self.jump(label, 0, |offset, _| rvv! {j offset});
}
fn branch_if(&mut self, cond: Reg, label: &str, is_else: bool) {
self.emit(rvv! {fmv.x.d x(Self::t0), f(Ï•(cond))});
if is_else {
self.emit(rvv! {bne x(Self::t0), x(Self::zero), 8});
} else {
self.emit(rvv! {beq x(Self::t0), x(Self::zero), 8});
}
self.jump(label, 0, |offset, _| rvv! {j offset});
}
fn fuse_load_math(&mut self) {}
fn fmov(&mut self, dst: Reg, s1: Reg) {
if dst == s1 {
return;
}
self.emit(rvv! {fmv.d f(Ï•(dst)), f(Ï•(s1))});
}
fn fxchg(&mut self, _s1: Reg, _s2: Reg) {
todo!();
}
fn load_const(&mut self, dst: Reg, idx: u32) {
let label = format!("_const_{}_", idx);
self.jump(
label.as_str(),
0,
|offset, _| rvv! {auipc x(Self::a0), hi(offset as u32)},
);
self.jump(
label.as_str(),
Ï•(dst) as u32,
|offset, r| rvv! {fld f(r), x(Self::a0), lo((offset + 4) as u32)},
);
}
fn load_mem(&mut self, dst: Reg, idx: u32) {
self.load_float(Ï•(dst), MEM, 8 * idx);
}
fn save_mem(&mut self, dst: Reg, idx: u32) {
self.save_float(Ï•(dst), MEM, 8 * idx);
}
fn save_mem_result(&mut self, idx: u32) {
self.save_float(Self::fa0, MEM, 8 * idx);
}
fn load_param(&mut self, dst: Reg, idx: u32) {
self.load_float(Ï•(dst), PARAMS, 8 * idx);
}
fn load_stack(&mut self, dst: Reg, idx: u32) {
self.load_float(Ï•(dst), Self::sp, 8 * idx);
}
fn save_stack(&mut self, dst: Reg, idx: u32) {
self.save_float(Ï•(dst), Self::sp, 8 * idx);
}
fn load_mem_complex(&mut self, xd: Reg, yd: Reg, idx: u32) {
self.load_complex(xd, yd, MEM, 8 * idx);
}
fn save_mem_complex(&mut self, xs: Reg, ys: Reg, idx: u32) {
self.save_complex(xs, ys, MEM, 8 * idx);
}
fn load_param_complex(&mut self, xd: Reg, yd: Reg, idx: u32) {
self.load_complex(xd, yd, PARAMS, 8 * idx);
}
fn load_stack_complex(&mut self, xd: Reg, yd: Reg, idx: u32) {
self.load_complex(xd, yd, STACK, 8 * idx);
}
fn save_stack_complex(&mut self, xs: Reg, ys: Reg, idx: u32) {
self.save_complex(xs, ys, STACK, 8 * idx);
}
fn save_stack_result(&mut self, idx: u32) {
self.save_float(Self::fa0, Self::sp, 8 * idx);
}
fn neg(&mut self, dst: Reg, s1: Reg) {
self.emit(rvv! {fneg.d f(Ï•(dst)), f(Ï•(s1))});
}
fn abs(&mut self, dst: Reg, s1: Reg) {
self.emit(rvv! {fabs.d f(Ï•(dst)), f(Ï•(s1))});
}
fn root(&mut self, dst: Reg, s1: Reg) {
self.emit(rvv! {fsqrt.d f(Ï•(dst)), f(Ï•(s1))});
}
fn real_root(&mut self, dst: Reg, s1: Reg) {
self.root(dst, s1);
}
fn recip(&mut self, dst: Reg, s1: Reg) {
self.emit(rvv! {addi x(Self::t0), x(Self::zero), 1});
self.emit(rvv! {fcvt.d.w f(Self::fa0), x(Self::t0)});
self.emit(rvv! {fdiv.d f(Ï•(dst)), f(Self::fa0), f(Ï•(s1))});
}
fn half(&mut self, dst: Reg, s1: Reg) {
self.emit(rvv! {addi x(Self::t0), x(Self::zero), 2});
self.emit(rvv! {fcvt.d.w f(Self::fa0), x(Self::t0)});
self.emit(rvv! {fdiv.d f(Ï•(dst)), f(Ï•(s1)), f(Self::fa0)});
}
fn round(&mut self, dst: Reg, s1: Reg) {
self.emit(rvv! {fcvt.l.d x(Self::t0), f(Ï•(s1)), 0});
self.emit(rvv! {fcvt.d.l f(Ï•(dst)), x(Self::t0)});
}
fn trunc(&mut self, dst: Reg, s1: Reg) {
self.emit(rvv! {fcvt.l.d x(Self::t0), f(Ï•(s1)), 1});
self.emit(rvv! {fcvt.d.l f(Ï•(dst)), x(Self::t0)});
}
fn floor(&mut self, dst: Reg, s1: Reg) {
self.emit(rvv! {fcvt.l.d x(Self::t0), f(Ï•(s1)), 2});
self.emit(rvv! {fcvt.d.l f(Ï•(dst)), x(Self::t0)});
}
fn ceiling(&mut self, dst: Reg, s1: Reg) {
self.emit(rvv! {fcvt.l.d x(Self::t0), f(Ï•(s1)), 3});
self.emit(rvv! {fcvt.d.l f(Ï•(dst)), x(Self::t0)});
}
fn frac(&mut self, dst: Reg, s1: Reg) {
self.floor(Reg::Temp, s1);
self.minus(dst, s1, Reg::Temp);
}
fn plus(&mut self, dst: Reg, s1: Reg, s2: Reg) {
self.emit(rvv! {fadd.d f(Ï•(dst)), f(Ï•(s1)), f(Ï•(s2))});
}
fn minus(&mut self, dst: Reg, s1: Reg, s2: Reg) {
self.emit(rvv! {fsub.d f(Ï•(dst)), f(Ï•(s1)), f(Ï•(s2))});
}
fn times(&mut self, dst: Reg, s1: Reg, s2: Reg) {
self.emit(rvv! {fmul.d f(Ï•(dst)), f(Ï•(s1)), f(Ï•(s2))});
}
fn divide(&mut self, dst: Reg, s1: Reg, s2: Reg) {
self.emit(rvv! {fdiv.d f(Ï•(dst)), f(Ï•(s1)), f(Ï•(s2))});
}
fn times_complex(&mut self, xd: Reg, yd: Reg, x1: Reg, y1: Reg, x2: Reg, y2: Reg) -> bool {
let xt = Self::ft10;
let yt = Self::ft11;
self.emit(rvv! {fmul.d f(xt), f(Ï•(y1)), f(Ï•(y2))});
self.emit(rvv! {fmsub.d f(xt), f(Ï•(x1)), f(Ï•(x2)), f(xt)});
self.emit(rvv! {fmul.d f(yt), f(Ï•(x1)), f(Ï•(y2))});
self.emit(rvv! {fmadd.d f(Ï•(yd)), f(Ï•(x2)), f(Ï•(y1)), f(yt)});
self.emit(rvv! {fmv.d f(Ï•(xd)), f(xt)});
true
}
fn divide_complex(&mut self, xd: Reg, yd: Reg, x1: Reg, y1: Reg, x2: Reg, y2: Reg) -> bool {
let t = Self::ft9;
let xt = Self::ft10;
let yt = Self::ft11;
self.emit(rvv! {fmul.d f(xt), f(Ï•(x1)), f(Ï•(x2))});
self.emit(rvv! {fmadd.d f(xt), f(Ï•(y1)), f(Ï•(y2)), f(xt)});
self.emit(rvv! {fmul.d f(yt), f(Ï•(x1)), f(Ï•(y2))});
self.emit(rvv! {fmsub.d f(yt), f(Ï•(x2)), f(Ï•(y1)), f(yt)});
self.emit(rvv! {fmul.d f(t), f(Ï•(x2)), f(Ï•(x2))});
self.emit(rvv! {fmadd.d f(t), f(Ï•(y2)), f(Ï•(y2)), f(t)});
self.emit(rvv! {fdiv.d f(Ï•(xd)), f(xt), f(t)});
self.emit(rvv! {fdiv.d f(Ï•(yd)), f(yt), f(t)});
true
}
fn support_times2(&self) -> bool {
false
}
fn times2_loc(&mut self, _d1: Reg, _s1: Reg, _l1: Loc, _d2: Reg, _s2: Reg, _l2: Loc) {
unreachable!()
}
fn real(&mut self, dst: Reg, s1: Reg) {
self.fmov(dst, s1);
}
fn imaginary(&mut self, dst: Reg, _s1: Reg) {
self.xor(dst, dst, dst);
}
fn conjugate(&mut self, dst: Reg, s1: Reg) {
self.fmov(dst, s1);
}
fn complex(&mut self, dst: Reg, s1: Reg, _s2: Reg) {
self.fmov(dst, s1);
}
fn gt(&mut self, dst: Reg, s1: Reg, s2: Reg) {
self.emit(rvv! {fgt.d x(Self::t0), f(Ï•(s1)), f(Ï•(s2))});
self.emit(rvv! {neg x(Self::t0), x(Self::t0)});
self.emit(rvv! {fmv.d.x f(Ï•(dst)), x(Self::t0)});
}
fn geq(&mut self, dst: Reg, s1: Reg, s2: Reg) {
self.emit(rvv! {fge.d x(Self::t0), f(Ï•(s1)), f(Ï•(s2))});
self.emit(rvv! {neg x(Self::t0), x(Self::t0)});
self.emit(rvv! {fmv.d.x f(Ï•(dst)), x(Self::t0)});
}
fn lt(&mut self, dst: Reg, s1: Reg, s2: Reg) {
self.emit(rvv! {flt.d x(Self::t0), f(Ï•(s1)), f(Ï•(s2))});
self.emit(rvv! {neg x(Self::t0), x(Self::t0)});
self.emit(rvv! {fmv.d.x f(Ï•(dst)), x(Self::t0)});
}
fn leq(&mut self, dst: Reg, s1: Reg, s2: Reg) {
self.emit(rvv! {fle.d x(Self::t0), f(Ï•(s1)), f(Ï•(s2))});
self.emit(rvv! {neg x(Self::t0), x(Self::t0)});
self.emit(rvv! {fmv.d.x f(Ï•(dst)), x(Self::t0)});
}
fn eq(&mut self, dst: Reg, s1: Reg, s2: Reg) {
self.emit(rvv! {feq.d x(Self::t0), f(Ï•(s1)), f(Ï•(s2))});
self.emit(rvv! {neg x(Self::t0), x(Self::t0)});
self.emit(rvv! {fmv.d.x f(Ï•(dst)), x(Self::t0)});
}
fn neq(&mut self, dst: Reg, s1: Reg, s2: Reg) {
self.emit(rvv! {feq.d x(Self::t0), f(Ï•(s1)), f(Ï•(s2))});
self.emit(rvv! {neg x(Self::t0), x(Self::t0)});
self.emit(rvv! {not x(Self::t0), x(Self::t0)});
self.emit(rvv! {fmv.d.x f(Ï•(dst)), x(Self::t0)});
}
fn and(&mut self, dst: Reg, s1: Reg, s2: Reg) {
self.emit(rvv! {fmv.x.d x(Self::t0), f(Ï•(s1))});
self.emit(rvv! {fmv.x.d x(Self::t1), f(Ï•(s2))});
self.emit(rvv! {and x(Self::t0), x(Self::t0), x(Self::t1)});
self.emit(rvv! {fmv.d.x f(Ï•(dst)), x(Self::t0)});
}
fn andnot(&mut self, dst: Reg, s1: Reg, s2: Reg) {
self.emit(rvv! {fmv.x.d x(Self::t0), f(Ï•(s1))});
self.emit(rvv! {not x(Self::t0), x(Self::t0)});
self.emit(rvv! {fmv.x.d x(Self::t1), f(Ï•(s2))});
self.emit(rvv! {and x(Self::t0), x(Self::t0), x(Self::t1)});
self.emit(rvv! {fmv.d.x f(Ï•(dst)), x(Self::t0)});
}
fn or(&mut self, dst: Reg, s1: Reg, s2: Reg) {
self.emit(rvv! {fmv.x.d x(Self::t0), f(Ï•(s1))});
self.emit(rvv! {fmv.x.d x(Self::t1), f(Ï•(s2))});
self.emit(rvv! {or x(Self::t0), x(Self::t0), x(Self::t1)});
self.emit(rvv! {fmv.d.x f(Ï•(dst)), x(Self::t0)});
}
fn xor(&mut self, dst: Reg, s1: Reg, s2: Reg) {
if s1 == s2 {
self.emit(rvv! {fmv.d.x f(Ï•(dst)), x(Self::zero)});
} else {
self.emit(rvv! {fmv.x.d x(Self::t0), f(Ï•(s1))});
self.emit(rvv! {fmv.x.d x(Self::t1), f(Ï•(s2))});
self.emit(rvv! {xor x(Self::t0), x(Self::t0), x(Self::t1)});
self.emit(rvv! {fmv.d.x f(Ï•(dst)), x(Self::t0)});
}
}
fn not(&mut self, dst: Reg, s1: Reg) {
self.emit(rvv! {fmv.x.d x(Self::t0), f(Ï•(s1))});
self.emit(rvv! {not x(Self::t0), x(Self::t0)});
self.emit(rvv! {fmv.d.x f(Ï•(dst)), x(Self::t0)});
}
fn fused_mul_add(&mut self, dst: Reg, s1: Reg, s2: Reg, s3: Reg) {
self.emit(rvv! {fmadd.d f(Ï•(dst)), f(Ï•(s1)), f(Ï•(s2)), f(Ï•(s3))});
}
fn fused_mul_sub(&mut self, dst: Reg, s1: Reg, s2: Reg, s3: Reg) {
self.emit(rvv! {fmsub.d f(Ï•(dst)), f(Ï•(s1)), f(Ï•(s2)), f(Ï•(s3))});
}
fn fused_neg_mul_add(&mut self, dst: Reg, s1: Reg, s2: Reg, s3: Reg) {
self.emit(rvv! {fnmsub.d f(Ï•(dst)), f(Ï•(s1)), f(Ï•(s2)), f(Ï•(s3))});
}
fn fused_neg_mul_sub(&mut self, dst: Reg, s1: Reg, s2: Reg, s3: Reg) {
self.emit(rvv! {fnmadd.d f(Ï•(dst)), f(Ï•(s1)), f(Ï•(s2)), f(Ï•(s3))});
}
fn add_consts(&mut self, consts: &[f64]) {
for (idx, val) in consts.iter().enumerate() {
let label = format!("_const_{}_", idx);
self.set_label(label.as_str());
self.append_quad((*val).to_bits());
}
}
fn add_func(&mut self, f: &str, p: crate::code::Func) {
let label = format!("_func_{}_", f);
self.set_label(label.as_str());
self.append_quad(p.func_ptr());
}
fn call(&mut self, op: &str, _num_args: usize) -> Result<()> {
let label = format!("_func_{}_", op);
self.jump(
label.as_str(),
0,
|offset, _| rvv! {auipc x(Self::a1), hi(offset as u32)},
);
self.jump(
label.as_str(),
0,
|offset, _| rvv! {ld x(Self::a1), x(Self::a1), lo((offset + 4) as u32)},
);
self.emit(rvv! {jalr x(Self::ra), x(Self::a1), 0});
Ok(())
}
fn call_complex(&mut self, op: &str, num_args: usize) -> Result<()> {
self.emit(rvv! {mv x(Self::a0), x(Self::sp)});
if num_args == 2 {
self.save_stack(Reg::Gen(0), 0);
self.save_stack(Reg::Gen(1), 1);
}
self.call(op, num_args)?;
self.load_stack(Reg::Ret, 0);
self.load_stack(Reg::Temp, 1);
Ok(())
}
fn call_funclet(&mut self, _label: &str) {
todo!();
}
fn ret(&mut self) {
self.emit(rvv! {ret});
}
fn ifelse(&mut self, dst: Reg, true_val: Reg, false_val: Reg, idx: u32) {
self.load_int(Self::t0, Self::sp, 8 * idx);
self.emit(rvv! {fmv.x.d x(Self::t1), f(Ï•(true_val))});
self.emit(rvv! {fmv.x.d x(Self::t2), f(Ï•(false_val))});
self.emit(rvv! {and x(Self::t1), x(Self::t1), x(Self::t0)});
self.emit(rvv! {not x(Self::t0), x(Self::t0)});
self.emit(rvv! {and x(Self::t2), x(Self::t2), x(Self::t0)});
self.emit(rvv! {or x(Self::t0), x(Self::t1), x(Self::t2)});
self.emit(rvv! {fmv.d.x f(Ï•(dst)), x(Self::t0)});
}
fn prologue_fast(&mut self, cap: usize, count_states: usize, count_obs: usize) {
self.sub_stack(16);
self.emit(rvv! {sd x(Self::ra), x(Self::sp), 0});
self.emit(rvv! {sd x(MEM), x(Self::sp), 8});
let size = align_stack((count_states + count_obs) as u32 * self.reg_size());
self.sub_stack(size);
self.emit(rvv! {mv x(MEM), x(Self::sp)});
let stack_size = align_stack(cap as u32 * self.reg_size());
self.sub_stack(stack_size);
self.emit(rvv! {mv x(MEM), x(Self::sp)});
for i in 0..count_states {
self.emit(rvv! {fsd f(Self::fa0 + i as u8), x(MEM), 8 * i});
}
}
fn epilogue_fast(&mut self, cap: usize, count_states: usize, count_obs: usize, idx_ret: i32) {
self.emit(rvv! {fld f(Self::fa0), x(MEM), 8*idx_ret});
let total_size = align_stack(cap as u32 * self.reg_size())
+ align_stack((count_states + count_obs) as u32 * self.reg_size());
self.add_stack(total_size);
self.emit(rvv! {ld x(Self::ra), x(Self::sp), 0});
self.emit(rvv! {ld x(MEM), x(Self::sp), 8});
self.add_stack(16);
self.emit(rvv! {ret});
}
fn prologue_indirect(
&mut self,
cap: usize,
count_states: usize,
count_obs: usize,
_count_params: usize,
) {
self.sub_stack(64);
self.emit(rvv! {sd x(Self::ra), x(Self::sp), 0});
self.emit(rvv! {sd x(MEM), x(Self::sp), 8});
self.emit(rvv! {sd x(STATES), x(Self::sp), 16});
self.emit(rvv! {sd x(IDX), x(Self::sp), 24});
self.emit(rvv! {sd x(PARAMS), x(Self::sp), 32});
self.emit(rvv! {mv x(MEM), x(Self::a0)});
self.emit(rvv! {mv x(STATES), x(Self::a1)});
self.emit(rvv! {mv x(IDX), x(Self::a2)});
self.emit(rvv! {mv x(PARAMS), x(Self::a3)});
self.jump(
"@main",
0,
|offset, _| rvv! {beq x(STATES), x(Self::zero), offset},
);
let size = align_stack((count_states + count_obs) as u32 * self.reg_size());
self.sub_stack(size);
self.emit(rvv! {mv x(MEM), x(Self::sp)});
self.emit(rvv! {slli x(IDX), x(IDX), 3});
if count_states > 16 {
self.emit(rvv! {mv x(Self::t1), x(MEM)});
self.li(Self::t2, count_states as i32);
self.set_label("@pro");
self.prologue_inner();
self.emit(rvv! {addi x(Self::t2), x(Self::t2), -1});
self.jump("@pro", 0, |offset, _| {
rvv! {bne x(Self::t2), x(Self::zero), offset}
});
} else if count_states > 0 {
self.emit(rvv! {mv x(Self::t1), x(MEM)});
for _ in 0..count_states {
self.prologue_inner();
}
}
self.set_label("@main");
let stack_size = align_stack(cap as u32 * self.reg_size());
self.sub_stack(stack_size);
}
fn epilogue_indirect(
&mut self,
cap: usize,
count_states: usize,
count_obs: usize,
_count_params: usize,
) {
let stack_size = align_stack(cap as u32 * self.reg_size());
self.add_stack(stack_size);
self.jump("@done", 0, |offset, _| {
rvv! {beq x(STATES), x(Self::zero), offset}
});
if count_obs > 16 {
self.li(Self::t1, 8 * count_states as i32);
self.emit(rvv! {add x(Self::t1), x(Self::t1), x(MEM)});
self.li(Self::t2, count_obs as i32);
self.set_label("@epi");
self.epilogue_inner();
self.emit(rvv! {addi x(Self::t2), x(Self::t2), -1});
self.jump("@epi", 0, |offset, _| {
rvv! {bne x(Self::t2), x(Self::zero), offset}
});
} else if count_obs > 0 {
self.li(Self::t1, 8 * count_states as i32);
self.emit(rvv! {add x(Self::t1), x(Self::t1), x(MEM)});
for _ in 0..count_obs {
self.epilogue_inner();
}
}
let size = align_stack((count_states + count_obs) as u32 * self.reg_size());
self.add_stack(size);
self.set_label("@done");
self.emit(rvv! {ld x(Self::ra), x(Self::sp), 0});
self.emit(rvv! {ld x(MEM), x(Self::sp), 8});
self.emit(rvv! {ld x(STATES), x(Self::sp), 16});
self.emit(rvv! {ld x(IDX), x(Self::sp), 24});
self.emit(rvv! {ld x(PARAMS), x(Self::sp), 32});
self.add_stack(64);
self.emit(rvv! {ret});
}
fn save_used_registers(&mut self, used: &[u8]) {
for r in used {
if *r >= 18 {
self.save_stack(reg(*r), *r as u32 - 14);
}
}
}
fn load_used_registers(&mut self, used: &[u8]) {
for r in used {
if *r >= 18 {
self.load_stack(reg(*r), *r as u32 - 14);
}
}
}
}
impl RiscV {
fn prologue_inner(&mut self) {
self.emit(rvv! {ld x(Self::t0), x(STATES), 0});
self.emit(rvv! {add x(Self::t0), x(Self::t0), x(IDX)});
self.emit(rvv! {fld f(Self::fa0), x(Self::t0), 0});
self.emit(rvv! {fsd f(Self::fa0), x(Self::t1), 0});
self.emit(rvv! {addi x(STATES), x(STATES), 2*8});
self.emit(rvv! {addi x(Self::t1), x(Self::t1), 8});
}
fn epilogue_inner(&mut self) {
self.emit(rvv! {ld x(Self::t0), x(STATES), 0});
self.emit(rvv! {add x(Self::t0), x(Self::t0), x(IDX)});
self.emit(rvv! {fld f(Self::fa0), x(Self::t1), 0});
self.emit(rvv! {fsd f(Self::fa0), x(Self::t0), 0});
self.emit(rvv! {addi x(STATES), x(STATES), 2*8});
self.emit(rvv! {addi x(Self::t1), x(Self::t1), 8});
}
}