use cranelift_codegen::ir::condcodes::IntCC;
use cranelift_codegen::ir::types;
use cranelift_codegen::ir::{Endianness, InstBuilder, MemFlags, Type, Value};
use cranelift_frontend::FunctionBuilder;
use super::block_args;
use crate::api::style::FillRule;
pub(super) fn emit_fill_rule_convert(
bcx: &mut FunctionBuilder,
shifted: Value,
c255: Value,
fill_rule: FillRule,
) -> Value {
match fill_rule {
FillRule::NonZero => {
let abs_val = bcx.ins().iabs(shifted);
bcx.ins().umin(abs_val, c255)
}
FillRule::EvenOdd => {
let abs_val = bcx.ins().iabs(shifted);
let val = bcx.ins().band_imm(abs_val, 511);
let c512 = bcx.ins().iconst(types::I32, 512);
let folded = bcx.ins().isub(c512, val);
let min_vf = bcx.ins().umin(val, folded);
bcx.ins().umin(min_vf, c255)
}
}
}
fn emit_fill_rule_convert_simd(
bcx: &mut FunctionBuilder,
shifted: Value,
c255_vec: Value,
fill_rule: FillRule,
) -> Value {
match fill_rule {
FillRule::NonZero => {
let abs_val = bcx.ins().iabs(shifted);
bcx.ins().umin(abs_val, c255_vec)
}
FillRule::EvenOdd => {
let abs_val = bcx.ins().iabs(shifted);
let c511 = bcx.ins().iconst(types::I32, 511);
let c511_vec = bcx.ins().splat(types::I32X4, c511);
let val = bcx.ins().band(abs_val, c511_vec);
let c512 = bcx.ins().iconst(types::I32, 512);
let c512_vec = bcx.ins().splat(types::I32X4, c512);
let folded = bcx.ins().isub(c512_vec, val);
let min_vf = bcx.ins().umin(val, folded);
bcx.ins().umin(min_vf, c255_vec)
}
}
}
pub(super) fn build_sweep(mut bcx: FunctionBuilder, ptr_type: Type, fill_rule: FillRule) {
let entry = bcx.create_block();
let main_loop = bcx.create_block();
let scalar_check = bcx.create_block();
let scalar_loop = bcx.create_block();
let exit = bcx.create_block();
bcx.switch_to_block(entry);
bcx.append_block_params_for_function_params(entry);
let cells = bcx.block_params(entry)[0]; let cov_buf = bcx.block_params(entry)[1]; let len = bcx.block_params(entry)[2];
let count4 = bcx.ins().ushr_imm(len, 2);
let rem = bcx.ins().band_imm(len, 3);
let zero = bcx.ins().iconst(ptr_type, 0);
let c255 = bcx.ins().iconst(types::I32, 255);
let c255_vec = bcx.ins().splat(types::I32X4, c255);
let cover_init = bcx.ins().iconst(types::I32, 0);
let zero_vec = bcx.ins().splat(types::I32X4, cover_init);
let has_main = bcx.ins().icmp(IntCC::NotEqual, count4, zero);
let args_main = block_args(&[cells, cov_buf, zero, cover_init, c255_vec, zero_vec]);
let args_scalar = block_args(&[cells, cov_buf, cover_init]);
bcx.ins()
.brif(has_main, main_loop, &args_main, scalar_check, &args_scalar);
bcx.append_block_param(main_loop, ptr_type); bcx.append_block_param(main_loop, ptr_type); bcx.append_block_param(main_loop, ptr_type); bcx.append_block_param(main_loop, types::I32); bcx.append_block_param(main_loop, types::I32X4); bcx.append_block_param(main_loop, types::I32X4); bcx.switch_to_block(main_loop);
let cells_p = bcx.block_params(main_loop)[0];
let cov_p = bcx.block_params(main_loop)[1];
let i = bcx.block_params(main_loop)[2];
let cover = bcx.block_params(main_loop)[3];
let c255_vec_loop = bcx.block_params(main_loop)[4];
let zero_vec_loop = bcx.block_params(main_loop)[5];
let c0 = bcx.ins().load(types::I32, MemFlags::new(), cells_p, 0);
let c1 = bcx.ins().load(types::I32, MemFlags::new(), cells_p, 4);
let c2 = bcx.ins().load(types::I32, MemFlags::new(), cells_p, 8);
let c3 = bcx.ins().load(types::I32, MemFlags::new(), cells_p, 12);
bcx.ins().store(MemFlags::new(), zero_vec_loop, cells_p, 0);
let cover0 = bcx.ins().iadd(cover, c0);
let cover1 = bcx.ins().iadd(cover0, c1);
let cover2 = bcx.ins().iadd(cover1, c2);
let cover3 = bcx.ins().iadd(cover2, c3);
let vec = bcx.ins().scalar_to_vector(types::I32X4, cover0);
let vec = bcx.ins().insertlane(vec, cover1, 1);
let vec = bcx.ins().insertlane(vec, cover2, 2);
let vec = bcx.ins().insertlane(vec, cover3, 3);
let shifted_vec = bcx.ins().sshr_imm(vec, 9);
let cov_vec = emit_fill_rule_convert_simd(&mut bcx, shifted_vec, c255_vec_loop, fill_rule);
let le_flags = MemFlags::new().with_endianness(Endianness::Little);
let narrow16 = bcx.ins().unarrow(cov_vec, zero_vec_loop);
let zero_i16x8 = bcx.ins().bitcast(types::I16X8, le_flags, zero_vec_loop);
let narrow8 = bcx.ins().unarrow(narrow16, zero_i16x8);
let packed_i32x4 = bcx.ins().bitcast(types::I32X4, le_flags, narrow8);
let packed = bcx.ins().extractlane(packed_i32x4, 0);
bcx.ins().store(MemFlags::new(), packed, cov_p, 0);
let sixteen = bcx.ins().iconst(ptr_type, 16);
let next_cells_p = bcx.ins().iadd(cells_p, sixteen);
let four_ptr = bcx.ins().iconst(ptr_type, 4);
let next_cov_p = bcx.ins().iadd(cov_p, four_ptr);
let one = bcx.ins().iconst(ptr_type, 1);
let next_i = bcx.ins().iadd(i, one);
let cont = bcx.ins().icmp(IntCC::UnsignedLessThan, next_i, count4);
let args_loop = block_args(&[
next_cells_p,
next_cov_p,
next_i,
cover3,
c255_vec_loop,
zero_vec_loop,
]);
let args_check = block_args(&[next_cells_p, next_cov_p, cover3]);
bcx.ins()
.brif(cont, main_loop, &args_loop, scalar_check, &args_check);
bcx.append_block_param(scalar_check, ptr_type);
bcx.append_block_param(scalar_check, ptr_type);
bcx.append_block_param(scalar_check, types::I32);
bcx.switch_to_block(scalar_check);
let cells_p = bcx.block_params(scalar_check)[0];
let cov_p = bcx.block_params(scalar_check)[1];
let cover = bcx.block_params(scalar_check)[2];
let has_rem = bcx.ins().icmp(IntCC::NotEqual, rem, zero);
let args_scalar = block_args(&[cells_p, cov_p, zero, cover]);
bcx.ins()
.brif(has_rem, scalar_loop, &args_scalar, exit, &[]);
bcx.append_block_param(scalar_loop, ptr_type);
bcx.append_block_param(scalar_loop, ptr_type);
bcx.append_block_param(scalar_loop, ptr_type);
bcx.append_block_param(scalar_loop, types::I32);
bcx.switch_to_block(scalar_loop);
let cells_p = bcx.block_params(scalar_loop)[0];
let cov_p = bcx.block_params(scalar_loop)[1];
let j = bcx.block_params(scalar_loop)[2];
let cover = bcx.block_params(scalar_loop)[3];
let cell_val = bcx.ins().load(types::I32, MemFlags::new(), cells_p, 0);
let i32_zero_s = bcx.ins().iconst(types::I32, 0);
bcx.ins().store(MemFlags::new(), i32_zero_s, cells_p, 0);
let cover = bcx.ins().iadd(cover, cell_val);
let shifted = bcx.ins().sshr_imm(cover, 9);
let clamped = emit_fill_rule_convert(&mut bcx, shifted, c255, fill_rule);
bcx.ins().istore8(MemFlags::new(), clamped, cov_p, 0);
let four_bytes = bcx.ins().iconst(ptr_type, 4);
let next_cells_p = bcx.ins().iadd(cells_p, four_bytes);
let one_byte = bcx.ins().iconst(ptr_type, 1);
let next_cov_p = bcx.ins().iadd(cov_p, one_byte);
let next_j = bcx.ins().iadd(j, one_byte);
let cont = bcx.ins().icmp(IntCC::UnsignedLessThan, next_j, rem);
let args_loop = block_args(&[next_cells_p, next_cov_p, next_j, cover]);
bcx.ins().brif(cont, scalar_loop, &args_loop, exit, &[]);
bcx.switch_to_block(exit);
bcx.ins().return_(&[]);
bcx.seal_all_blocks();
bcx.finalize();
}