namespace asm_code {
string vpermq_mask(array<int, 4> lanes) {
int res=0;
for (int x=0;x<4;++x) {
int lane=lanes[x];
assert(lane>=0 && lane<4);
res|=lane << (2*x);
}
return to_hex(res);
}
string vpblendd_mask_4(array<int, 4> lanes) {
int res=0;
for (int x=0;x<4;++x) {
int lane=lanes[x];
assert(lane>=0 && lane<2);
res|=((lane==1)? 3 : 0) << (2*x);
}
return to_hex(res);
}
string vpblendd_mask_8(array<int, 8> lanes) {
int res=0;
for (int x=0;x<8;++x) {
int lane=lanes[x];
assert(lane>=0 && lane<2);
res|=((lane==1)? 1 : 0) << x;
}
return to_hex(res);
}
struct asm_function {
string name;
reg_alloc regs;
vector<reg_scalar> args;
vector<reg_scalar> pop_regs;
const vector<reg_scalar> all_save_regs={reg_rbp, reg_rbx, reg_r12, reg_r13, reg_r14, reg_r15};
const vector<reg_scalar> all_arg_regs={reg_rdi, reg_rsi, reg_rdx, reg_rcx, reg_r8, reg_r9};
const reg_scalar return_reg=reg_rax;
bool d_align_stack=true;
bool d_return_error_code=true;
asm_function(string t_name, int num_args=0, int num_regs=15, bool align_stack=true, bool return_error_code=true) {
EXPAND_MACROS_SCOPE;
d_align_stack=align_stack;
d_return_error_code=return_error_code;
static bool outputted_header=false;
if (!outputted_header) {
APPEND_M(str( ".intel_syntax noprefix" ));
outputted_header=true;
}
name=t_name;
#ifdef CHIAOSX
APPEND_M(str( ".global _asm_")+asmprefix+str("func_#", t_name ));
APPEND_M(str( "_asm_")+asmprefix+str("func_#:", t_name ));
#else
APPEND_M(str( ".global asm_")+asmprefix+str("func_#", t_name ));
APPEND_M(str( "asm_")+asmprefix+str("func_#:", t_name ));
#endif
assert(num_regs<=15);
regs.init();
for (int x=0;x<num_args;++x) {
reg_scalar r=all_arg_regs.at(x);
regs.get_scalar(r);
args.push_back(r);
}
int num_available_regs=15-all_save_regs.size();
for (reg_scalar s : all_save_regs) {
if (num_regs>num_available_regs) {
APPEND_M(str( "PUSH #", s.name() ));
pop_regs.push_back(s);
++num_available_regs;
} else {
regs.get_scalar(s);
}
}
assert(num_available_regs==num_regs);
if (align_stack) {
APPEND_M(str( "MOV RAX, RSP" ));
APPEND_M(str( "AND RSP, -64" )); APPEND_M(str( "SUB RSP, 64" ));
APPEND_M(str( "MOV [RSP], RAX" ));
}
}
~asm_function() {
EXPAND_MACROS_SCOPE;
if (d_return_error_code) {
APPEND_M(str( "MOV RAX, 0" ));
}
string end_label=m.alloc_label();
APPEND_M(str( "#:", end_label ));
if (d_align_stack) {
APPEND_M(str( "MOV RSP, [RSP]" ));
}
for (int x=pop_regs.size()-1;x>=0;--x) {
APPEND_M(str( "POP #", pop_regs[x].name() ));
}
APPEND_M(str( "RET" ));
while (m.next_output_error_label_id<m.next_error_label_id) {
assert(d_return_error_code);
APPEND_M(asmprefix+str( "label_error_#:", m.next_output_error_label_id ));
assert(m.next_output_error_label_id!=0);
APPEND_M(str( "MOV RAX, #", to_hex(m.next_output_error_label_id) ));
APPEND_M(str( "JMP #", end_label ));
++m.next_output_error_label_id;
}
}
};
}