use cranelift_codegen::ir::condcodes::IntCC;
use cranelift_codegen::ir::types;
use cranelift_codegen::ir::{InstBuilder, MemFlags, Type};
use cranelift_frontend::FunctionBuilder;
use super::block_args;
use super::core_pipelines::emit_src_over_ag_rb_simd;
pub(super) fn build_src_over_box(mut bcx: FunctionBuilder, ptr_type: Type) {
let vec_type = types::I32X4;
let entry = bcx.create_block();
let y_loop = bcx.create_block();
let unroll_loop = bcx.create_block();
let tail_check = bcx.create_block();
let tail_loop = bcx.create_block();
let scalar_check = bcx.create_block();
let scalar_loop = bcx.create_block();
let y_advance = bcx.create_block();
let exit = bcx.create_block();
bcx.switch_to_block(entry);
bcx.append_block_params_for_function_params(entry);
let dst = bcx.block_params(entry)[0];
let src_solid = bcx.block_params(entry)[1];
let width = bcx.block_params(entry)[2];
let height = bcx.block_params(entry)[3];
let stride = bcx.block_params(entry)[4];
let mask_00ff00ff = bcx.ins().iconst(types::I32, 0x00FF00FFu32 as i64);
let src_ag = bcx.ins().ushr_imm(src_solid, 8);
let src_ag = bcx.ins().band(src_ag, mask_00ff00ff);
let src_rb = bcx.ins().band(src_solid, mask_00ff00ff);
let src_a = bcx.ins().ushr_imm(src_solid, 24);
let src_a = bcx.ins().band_imm(src_a, 0xFF);
let c256 = bcx.ins().iconst(types::I32, 256);
let inv_alpha = bcx.ins().isub(c256, src_a);
let src_ag_vec = bcx.ins().splat(vec_type, src_ag);
let src_rb_vec = bcx.ins().splat(vec_type, src_rb);
let inv_alpha_vec = bcx.ins().splat(vec_type, inv_alpha);
let mask_vec = bcx.ins().splat(vec_type, mask_00ff00ff);
let count16 = bcx.ins().ushr_imm(width, 4);
let tail_quads = bcx.ins().band_imm(width, 0xF);
let tail_quads = bcx.ins().ushr_imm(tail_quads, 2);
let remainder = bcx.ins().band_imm(width, 3);
let zero = bcx.ins().iconst(ptr_type, 0);
bcx.ins().jump(
y_loop,
&block_args(&[dst, zero, src_ag_vec, src_rb_vec, inv_alpha_vec, mask_vec]),
);
bcx.append_block_param(y_loop, ptr_type); bcx.append_block_param(y_loop, ptr_type); bcx.append_block_param(y_loop, vec_type); bcx.append_block_param(y_loop, vec_type); bcx.append_block_param(y_loop, vec_type); bcx.append_block_param(y_loop, vec_type); bcx.switch_to_block(y_loop);
let scanline_dst = bcx.block_params(y_loop)[0];
let y_i = bcx.block_params(y_loop)[1];
let src_ag_vec = bcx.block_params(y_loop)[2];
let src_rb_vec = bcx.block_params(y_loop)[3];
let inv_alpha_vec = bcx.block_params(y_loop)[4];
let mask_vec = bcx.block_params(y_loop)[5];
let has_unroll = bcx.ins().icmp(IntCC::NotEqual, count16, zero);
bcx.ins().brif(
has_unroll,
unroll_loop,
&block_args(&[scanline_dst, zero]),
tail_check,
&block_args(&[scanline_dst]),
);
bcx.append_block_param(unroll_loop, ptr_type); bcx.append_block_param(unroll_loop, ptr_type); bcx.switch_to_block(unroll_loop);
let x_dst = bcx.block_params(unroll_loop)[0];
let unroll_i = bcx.block_params(unroll_loop)[1];
let px0 = bcx.ins().load(vec_type, MemFlags::new(), x_dst, 0);
let r0 = emit_src_over_ag_rb_simd(
&mut bcx,
px0,
src_ag_vec,
src_rb_vec,
inv_alpha_vec,
mask_vec,
);
bcx.ins().store(MemFlags::new(), r0, x_dst, 0);
let px1 = bcx.ins().load(vec_type, MemFlags::new(), x_dst, 16);
let r1 = emit_src_over_ag_rb_simd(
&mut bcx,
px1,
src_ag_vec,
src_rb_vec,
inv_alpha_vec,
mask_vec,
);
bcx.ins().store(MemFlags::new(), r1, x_dst, 16);
let px2 = bcx.ins().load(vec_type, MemFlags::new(), x_dst, 32);
let r2 = emit_src_over_ag_rb_simd(
&mut bcx,
px2,
src_ag_vec,
src_rb_vec,
inv_alpha_vec,
mask_vec,
);
bcx.ins().store(MemFlags::new(), r2, x_dst, 32);
let px3 = bcx.ins().load(vec_type, MemFlags::new(), x_dst, 48);
let r3 = emit_src_over_ag_rb_simd(
&mut bcx,
px3,
src_ag_vec,
src_rb_vec,
inv_alpha_vec,
mask_vec,
);
bcx.ins().store(MemFlags::new(), r3, x_dst, 48);
let sixty_four = bcx.ins().iconst(ptr_type, 64);
let next_x_dst = bcx.ins().iadd(x_dst, sixty_four);
let one = bcx.ins().iconst(ptr_type, 1);
let next_i = bcx.ins().iadd(unroll_i, one);
let cont = bcx.ins().icmp(IntCC::UnsignedLessThan, next_i, count16);
bcx.ins().brif(
cont,
unroll_loop,
&block_args(&[next_x_dst, next_i]),
tail_check,
&block_args(&[next_x_dst]),
);
bcx.append_block_param(tail_check, ptr_type);
bcx.switch_to_block(tail_check);
let x_dst = bcx.block_params(tail_check)[0];
let has_tail = bcx.ins().icmp(IntCC::NotEqual, tail_quads, zero);
bcx.ins().brif(
has_tail,
tail_loop,
&block_args(&[x_dst, zero]),
scalar_check,
&block_args(&[x_dst]),
);
bcx.append_block_param(tail_loop, ptr_type); bcx.append_block_param(tail_loop, ptr_type); bcx.switch_to_block(tail_loop);
let x_dst = bcx.block_params(tail_loop)[0];
let tail_i = bcx.block_params(tail_loop)[1];
let px = bcx.ins().load(vec_type, MemFlags::new(), x_dst, 0);
let r = emit_src_over_ag_rb_simd(
&mut bcx,
px,
src_ag_vec,
src_rb_vec,
inv_alpha_vec,
mask_vec,
);
bcx.ins().store(MemFlags::new(), r, x_dst, 0);
let sixteen = bcx.ins().iconst(ptr_type, 16);
let next_x_dst = bcx.ins().iadd(x_dst, sixteen);
let one = bcx.ins().iconst(ptr_type, 1);
let next_ti = bcx.ins().iadd(tail_i, one);
let cont = bcx.ins().icmp(IntCC::UnsignedLessThan, next_ti, tail_quads);
bcx.ins().brif(
cont,
tail_loop,
&block_args(&[next_x_dst, next_ti]),
scalar_check,
&block_args(&[next_x_dst]),
);
bcx.append_block_param(scalar_check, ptr_type);
bcx.switch_to_block(scalar_check);
let x_dst = bcx.block_params(scalar_check)[0];
let has_remainder = bcx.ins().icmp(IntCC::NotEqual, remainder, zero);
bcx.ins().brif(
has_remainder,
scalar_loop,
&block_args(&[x_dst, zero]),
y_advance,
&[],
);
bcx.append_block_param(scalar_loop, ptr_type); bcx.append_block_param(scalar_loop, ptr_type); bcx.switch_to_block(scalar_loop);
let x_dst = bcx.block_params(scalar_loop)[0];
let scalar_i = bcx.block_params(scalar_loop)[1];
let dst_pixel = bcx.ins().load(types::I32, MemFlags::new(), x_dst, 0);
let dst_ag = bcx.ins().ushr_imm(dst_pixel, 8);
let dst_ag = bcx.ins().band(dst_ag, mask_00ff00ff);
let dst_rb = bcx.ins().band(dst_pixel, mask_00ff00ff);
let tmp_ag = bcx.ins().imul(dst_ag, inv_alpha);
let tmp_ag = bcx.ins().ushr_imm(tmp_ag, 8);
let tmp_ag = bcx.ins().band(tmp_ag, mask_00ff00ff);
let out_ag = bcx.ins().iadd(src_ag, tmp_ag);
let tmp_rb = bcx.ins().imul(dst_rb, inv_alpha);
let tmp_rb = bcx.ins().ushr_imm(tmp_rb, 8);
let tmp_rb = bcx.ins().band(tmp_rb, mask_00ff00ff);
let out_rb = bcx.ins().iadd(src_rb, tmp_rb);
let result = bcx.ins().ishl_imm(out_ag, 8);
let result = bcx.ins().bor(result, out_rb);
bcx.ins().store(MemFlags::new(), result, x_dst, 0);
let four = bcx.ins().iconst(ptr_type, 4);
let next_x_dst = bcx.ins().iadd(x_dst, four);
let one = bcx.ins().iconst(ptr_type, 1);
let next_si = bcx.ins().iadd(scalar_i, one);
let cont = bcx.ins().icmp(IntCC::UnsignedLessThan, next_si, remainder);
bcx.ins().brif(
cont,
scalar_loop,
&block_args(&[next_x_dst, next_si]),
y_advance,
&[],
);
bcx.switch_to_block(y_advance);
let next_scanline = bcx.ins().iadd(scanline_dst, stride);
let one = bcx.ins().iconst(ptr_type, 1);
let next_y = bcx.ins().iadd(y_i, one);
let cont = bcx.ins().icmp(IntCC::UnsignedLessThan, next_y, height);
bcx.ins().brif(
cont,
y_loop,
&block_args(&[
next_scanline,
next_y,
src_ag_vec,
src_rb_vec,
inv_alpha_vec,
mask_vec,
]),
exit,
&[],
);
bcx.switch_to_block(exit);
bcx.ins().return_(&[]);
bcx.seal_all_blocks();
bcx.finalize();
}
pub(super) fn build_src_copy_box(mut bcx: FunctionBuilder, ptr_type: Type) {
let entry = bcx.create_block();
let y_loop = bcx.create_block();
let unroll_loop = bcx.create_block();
let tail_check = bcx.create_block();
let tail_loop = bcx.create_block();
let scalar_check = bcx.create_block();
let scalar_loop = bcx.create_block();
let y_advance = bcx.create_block();
let exit = bcx.create_block();
bcx.switch_to_block(entry);
bcx.append_block_params_for_function_params(entry);
let dst = bcx.block_params(entry)[0];
let src_solid = bcx.block_params(entry)[1];
let width = bcx.block_params(entry)[2];
let height = bcx.block_params(entry)[3];
let stride = bcx.block_params(entry)[4];
let src_vec = bcx.ins().splat(types::I32X4, src_solid);
let count16 = bcx.ins().ushr_imm(width, 4);
let tail_quads = bcx.ins().band_imm(width, 0xF);
let tail_quads = bcx.ins().ushr_imm(tail_quads, 2);
let remainder = bcx.ins().band_imm(width, 3);
let zero = bcx.ins().iconst(ptr_type, 0);
bcx.ins().jump(y_loop, &block_args(&[dst, zero]));
bcx.append_block_param(y_loop, ptr_type);
bcx.append_block_param(y_loop, ptr_type);
bcx.switch_to_block(y_loop);
let scanline_dst = bcx.block_params(y_loop)[0];
let y_i = bcx.block_params(y_loop)[1];
let has_unroll = bcx.ins().icmp(IntCC::NotEqual, count16, zero);
bcx.ins().brif(
has_unroll,
unroll_loop,
&block_args(&[scanline_dst, zero]),
tail_check,
&block_args(&[scanline_dst]),
);
bcx.append_block_param(unroll_loop, ptr_type);
bcx.append_block_param(unroll_loop, ptr_type);
bcx.switch_to_block(unroll_loop);
let x_dst = bcx.block_params(unroll_loop)[0];
let unroll_i = bcx.block_params(unroll_loop)[1];
bcx.ins().store(MemFlags::new(), src_vec, x_dst, 0);
bcx.ins().store(MemFlags::new(), src_vec, x_dst, 16);
bcx.ins().store(MemFlags::new(), src_vec, x_dst, 32);
bcx.ins().store(MemFlags::new(), src_vec, x_dst, 48);
let sixty_four = bcx.ins().iconst(ptr_type, 64);
let next_x_dst = bcx.ins().iadd(x_dst, sixty_four);
let one = bcx.ins().iconst(ptr_type, 1);
let next_i = bcx.ins().iadd(unroll_i, one);
let cont = bcx.ins().icmp(IntCC::UnsignedLessThan, next_i, count16);
bcx.ins().brif(
cont,
unroll_loop,
&block_args(&[next_x_dst, next_i]),
tail_check,
&block_args(&[next_x_dst]),
);
bcx.append_block_param(tail_check, ptr_type);
bcx.switch_to_block(tail_check);
let x_dst = bcx.block_params(tail_check)[0];
let has_tail = bcx.ins().icmp(IntCC::NotEqual, tail_quads, zero);
bcx.ins().brif(
has_tail,
tail_loop,
&block_args(&[x_dst, zero]),
scalar_check,
&block_args(&[x_dst]),
);
bcx.append_block_param(tail_loop, ptr_type);
bcx.append_block_param(tail_loop, ptr_type);
bcx.switch_to_block(tail_loop);
let x_dst = bcx.block_params(tail_loop)[0];
let tail_i = bcx.block_params(tail_loop)[1];
bcx.ins().store(MemFlags::new(), src_vec, x_dst, 0);
let sixteen = bcx.ins().iconst(ptr_type, 16);
let next_x_dst = bcx.ins().iadd(x_dst, sixteen);
let one = bcx.ins().iconst(ptr_type, 1);
let next_ti = bcx.ins().iadd(tail_i, one);
let cont = bcx.ins().icmp(IntCC::UnsignedLessThan, next_ti, tail_quads);
bcx.ins().brif(
cont,
tail_loop,
&block_args(&[next_x_dst, next_ti]),
scalar_check,
&block_args(&[next_x_dst]),
);
bcx.append_block_param(scalar_check, ptr_type);
bcx.switch_to_block(scalar_check);
let x_dst = bcx.block_params(scalar_check)[0];
let has_remainder = bcx.ins().icmp(IntCC::NotEqual, remainder, zero);
bcx.ins().brif(
has_remainder,
scalar_loop,
&block_args(&[x_dst, zero]),
y_advance,
&[],
);
bcx.append_block_param(scalar_loop, ptr_type);
bcx.append_block_param(scalar_loop, ptr_type);
bcx.switch_to_block(scalar_loop);
let x_dst = bcx.block_params(scalar_loop)[0];
let scalar_i = bcx.block_params(scalar_loop)[1];
bcx.ins().store(MemFlags::new(), src_solid, x_dst, 0);
let four = bcx.ins().iconst(ptr_type, 4);
let next_x_dst = bcx.ins().iadd(x_dst, four);
let one = bcx.ins().iconst(ptr_type, 1);
let next_si = bcx.ins().iadd(scalar_i, one);
let cont = bcx.ins().icmp(IntCC::UnsignedLessThan, next_si, remainder);
bcx.ins().brif(
cont,
scalar_loop,
&block_args(&[next_x_dst, next_si]),
y_advance,
&[],
);
bcx.switch_to_block(y_advance);
let next_scanline = bcx.ins().iadd(scanline_dst, stride);
let one = bcx.ins().iconst(ptr_type, 1);
let next_y = bcx.ins().iadd(y_i, one);
let cont = bcx.ins().icmp(IntCC::UnsignedLessThan, next_y, height);
bcx.ins().brif(
cont,
y_loop,
&block_args(&[next_scanline, next_y]),
exit,
&[],
);
bcx.switch_to_block(exit);
bcx.ins().return_(&[]);
bcx.seal_all_blocks();
bcx.finalize();
}