#if (defined SLJIT_CONFIG_X86 && SLJIT_CONFIG_X86)
#define SIMD_RUN_START \
while (1) {
#define SIMD_RUN_END \
if (options == SLJIT_ENTER_USE_VEX) \
break; \
options = SLJIT_ENTER_USE_VEX; \
}
#else
#define SIMD_RUN_START
#define SIMD_RUN_END
#endif
static void simd_set(sljit_u8* buf, sljit_u8 start, sljit_s32 length)
{
do {
*buf++ = start;
start = (sljit_u8)(start + 103);
if (start == 0xaa)
start = 0xab;
} while (--length != 0);
}
static sljit_s32 check_simd_mov(sljit_u8* buf, sljit_u8 start, sljit_s32 length)
{
if (buf[-1] != 0xaa || buf[length] != 0xaa)
return 0;
do {
if (*buf++ != start)
return 0;
start = (sljit_u8)(start + 103);
if (start == 0xaa)
start = 0xab;
} while (--length != 0);
return 1;
}
static void test_simd1(void)
{
executable_code code;
struct sljit_compiler* compiler;
sljit_s32 options = 0;
sljit_s32 i, type;
sljit_u8 supported[2];
sljit_u8* buf;
sljit_u8 data[63 + 880];
sljit_s32 vs0 = SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS > 0 ? SLJIT_VS0 : SLJIT_VR5;
SLJIT_ASSERT(sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VR0) != -1
&& sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_TMP_DEST_VREG) != -1);
SLJIT_ASSERT(SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS == 0
|| sljit_get_register_index(SLJIT_SIMD_REG_128, SLJIT_VS0) != -1);
if (verbose)
printf("Run test_simd1\n");
SIMD_RUN_START
buf = (sljit_u8*)(((sljit_sw)data + (sljit_sw)63) & ~(sljit_sw)63);
for (i = 0; i < 880; i++)
buf[i] = 0xaa;
compiler = sljit_create_compiler(NULL);
FAILED(!compiler, "cannot create compiler\n");
simd_set(buf + 0, 81, 16);
simd_set(buf + 65, 213, 16);
simd_set(buf + 104, 33, 16);
simd_set(buf + 160, 140, 16);
simd_set(buf + 210, 7, 16);
simd_set(buf + 256, 239, 16);
simd_set(buf + 312, 176, 16);
simd_set(buf + 368, 88, 8);
simd_set(buf + 393, 197, 8);
simd_set(buf + 416, 58, 16);
simd_set(buf + 432, 203, 16);
simd_set(buf + 496, 105, 16);
simd_set(buf + 560, 19, 16);
simd_set(buf + 616, 202, 8);
simd_set(buf + 648, 123, 8);
simd_set(buf + 704, 85, 32);
simd_set(buf + 801, 215, 32);
sljit_emit_enter(compiler, options, SLJIT_ARGS1V(P), 2 | SLJIT_ENTER_VECTOR(6), 2 | SLJIT_ENTER_VECTOR(SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS > 0 ? 2 : 0), 64);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_8 | SLJIT_SIMD_MEM_ALIGNED_128;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 32);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 65);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 82 >> 1);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_8 | SLJIT_SIMD_MEM_UNALIGNED;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, SLJIT_MEM2(SLJIT_S0, SLJIT_R0), 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR2, SLJIT_MEM2(SLJIT_S0, SLJIT_R1), 1);
sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_IMM, 70001);
sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 70001);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_MEM_ALIGNED_64;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR4, SLJIT_MEM1(SLJIT_R0), 70001 + 104);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR4, SLJIT_MEM1(SLJIT_R1), 136 - 70001);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_FLOAT | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_MEM_ALIGNED_128;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, vs0, SLJIT_MEM0(), (sljit_sw)(buf + 160));
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, vs0, SLJIT_MEM0(), (sljit_sw)(buf + 192));
sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_IMM, 1001);
sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 1001);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_FLOAT | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_MEM_ALIGNED_16;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_R0), 1001 + 210);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_R1), 230 - 1001);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 256 >> 3);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 288 >> 3);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_FLOAT | SLJIT_SIMD_ELEM_64 | SLJIT_SIMD_MEM_ALIGNED_128;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM2(SLJIT_S0, SLJIT_R0), 3);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM2(SLJIT_S0, SLJIT_R1), 3);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_FLOAT | SLJIT_SIMD_ELEM_64 | SLJIT_SIMD_MEM_ALIGNED_64;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 312);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 344);
type = SLJIT_SIMD_REG_64 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_MEM_ALIGNED_64;
supported[0] = sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR4, SLJIT_MEM1(SLJIT_S0), 368) != SLJIT_ERR_UNSUPPORTED;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR4, SLJIT_MEM1(SLJIT_S0), 384);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 393);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 402);
type = SLJIT_SIMD_REG_64 | SLJIT_SIMD_ELEM_64 | SLJIT_SIMD_MEM_UNALIGNED;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM2(SLJIT_S0, SLJIT_R0), 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM2(SLJIT_S0, SLJIT_R1), 0);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_16 | SLJIT_SIMD_MEM_ALIGNED_128;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR4, SLJIT_MEM1(SLJIT_S0), 416);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR5, SLJIT_MEM1(SLJIT_S0), 432);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR4, SLJIT_MEM1(SLJIT_S0), 464);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_16;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR3, SLJIT_MEM1(SLJIT_S0), 496);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR4, SLJIT_MEM1(SLJIT_S0), 480);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR4, SLJIT_VR3, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR4, SLJIT_MEM1(SLJIT_S0), 528);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 560);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 544);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR2, SLJIT_VR0, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 592);
type = SLJIT_SIMD_REG_64 | SLJIT_SIMD_ELEM_8;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR5, SLJIT_MEM1(SLJIT_S0), 616);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR3, SLJIT_MEM1(SLJIT_S0), 608);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR3, SLJIT_VR5, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR3, SLJIT_MEM1(SLJIT_S0), 632);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR3, SLJIT_MEM1(SLJIT_S0), 648);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, vs0, SLJIT_MEM1(SLJIT_S0), 640);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR3, vs0, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, vs0, SLJIT_MEM1(SLJIT_S0), 664);
type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_MEM_ALIGNED_256;
supported[1] = sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 704) != SLJIT_ERR_UNSUPPORTED;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_32, SLJIT_VR2, vs0, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_S0, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, 384);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, vs0, SLJIT_MEM2(SLJIT_R1, SLJIT_S1), 1);
type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_16;
sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_IMM, 801 - 32);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_R0), 32);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_SP), 0);
sljit_get_local_base(compiler, SLJIT_R1, 0, 128);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR3, SLJIT_MEM1(SLJIT_R1), -128);
type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_16 | SLJIT_SIMD_MEM_ALIGNED_16;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR3, SLJIT_MEM0(), (sljit_sw)(buf + 834));
sljit_emit_return_void(compiler);
code.code = sljit_generate_code(compiler, 0, NULL);
CHECK(compiler);
sljit_free_compiler(compiler);
code.func1((sljit_sw)buf);
sljit_free_code(code.code, NULL);
FAILED(!check_simd_mov(buf + 32, 81, 16), "test_simd1 case 1 failed\n");
FAILED(!check_simd_mov(buf + 82, 213, 16), "test_simd1 case 2 failed\n");
FAILED(!check_simd_mov(buf + 136, 33, 16), "test_simd1 case 3 failed\n");
FAILED(!check_simd_mov(buf + 192, 140, 16), "test_simd1 case 4 failed\n");
FAILED(!check_simd_mov(buf + 230, 7, 16), "test_simd1 case 5 failed\n");
FAILED(!check_simd_mov(buf + 288, 239, 16), "test_simd1 case 6 failed\n");
FAILED(!check_simd_mov(buf + 344, 176, 16), "test_simd1 case 7 failed\n");
if (supported[0]) {
SLJIT_ASSERT(sljit_get_register_index(SLJIT_SIMD_REG_64, SLJIT_VR0) != -1
&& sljit_get_register_index(SLJIT_SIMD_REG_64, SLJIT_TMP_DEST_VREG) != -1);
FAILED(!check_simd_mov(buf + 384, 88, 8), "test_simd1 case 8 failed\n");
FAILED(!check_simd_mov(buf + 402, 197, 8), "test_simd1 case 9 failed\n");
}
FAILED(!check_simd_mov(buf + 464, sljit_has_cpu_feature(SLJIT_SIMD_REGS_ARE_PAIRS) ? 203 : 58, 16), "test_simd1 case 10 failed\n");
FAILED(!check_simd_mov(buf + 528, 105, 16), "test_simd1 case 11 failed\n");
FAILED(!check_simd_mov(buf + 592, 19, 16), "test_simd1 case 12 failed\n");
if (supported[0]) {
FAILED(!check_simd_mov(buf + 632, 202, 8), "test_simd1 case 13 failed\n");
FAILED(!check_simd_mov(buf + 664, 123, 8), "test_simd1 case 14 failed\n");
}
if (supported[1]) {
SLJIT_ASSERT(sljit_get_register_index(SLJIT_SIMD_REG_256, SLJIT_VR0) != -1
&& sljit_get_register_index(SLJIT_SIMD_REG_256, SLJIT_TMP_DEST_VREG) != -1);
FAILED(!check_simd_mov(buf + 768, 85, 32), "test_simd1 case 15 failed\n");
FAILED(!check_simd_mov(buf + 834, 215, 32), "test_simd1 case 16 failed\n");
}
SIMD_RUN_END
successful_tests++;
}
static sljit_s32 check_simd_lane_mov(sljit_u8* buf, sljit_s32 length, sljit_s32 elem_size, sljit_s32 is_odd)
{
sljit_s32 count = (length / elem_size) >> 1;
sljit_s32 value = 180 + length - elem_size;
sljit_s32 i;
if (!is_odd)
value -= elem_size;
do {
if (is_odd) {
for (i = 0; i < elem_size; i++)
if (*buf++ != 0xaa)
return 0;
}
for (i = 0; i < elem_size; i++)
if (*buf++ != value++)
return 0;
if (!is_odd) {
for (i = 0; i < elem_size; i++)
if (*buf++ != 0xaa)
return 0;
}
value -= 3 * elem_size;
} while (--count != 0);
return 1;
}
static void test_simd2(void)
{
executable_code code;
struct sljit_compiler* compiler;
sljit_s32 options = 0;
sljit_s32 i, type;
sljit_u8 supported[1];
sljit_u8* buf;
sljit_u8 data[63 + 576];
sljit_f64 tmp[1];
sljit_u32 f32_result = 0;
sljit_sw result[6];
sljit_s32 result32[5];
sljit_s32 vs0 = SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS > 0 ? SLJIT_VS0 : SLJIT_VR5;
if (verbose)
printf("Run test_simd2\n");
SIMD_RUN_START
buf = (sljit_u8*)(((sljit_sw)data + (sljit_sw)63) & ~(sljit_sw)63);
for (i = 0; i < 64; i++)
buf[i] = (sljit_u8)(180 + i);
for (i = 64; i < 576; i++)
buf[i] = 0xaa;
for (i = 0; i < 6; i++)
result[i] = 0;
for (i = 0; i < 5; i++)
result32[i] = 0;
compiler = sljit_create_compiler(NULL);
FAILED(!compiler, "cannot create compiler\n");
sljit_emit_enter(compiler, options, SLJIT_ARGS1V(P), 4 | SLJIT_ENTER_FLOAT(5) | SLJIT_ENTER_VECTOR(6), 4 | SLJIT_ENTER_VECTOR(SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS > 0 ? 2 : 0), 16);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)tmp - 100000);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)tmp + 1000);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, 100000 / 2);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_8;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 64);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, 14, SLJIT_R2, 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, 0, SLJIT_R2, 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, 12, SLJIT_MEM1(SLJIT_SP), 0);
sljit_get_local_base(compiler, SLJIT_R2, 0, 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, 2, SLJIT_MEM1(SLJIT_R2), 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, 10, SLJIT_MEM0(), (sljit_sw)tmp);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, 4, SLJIT_MEM0(), (sljit_sw)tmp);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, 8, SLJIT_R3, 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, 6, SLJIT_R3, 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, 6, SLJIT_MEM1(SLJIT_R0), 100000);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, 8, SLJIT_MEM1(SLJIT_R1), -1000);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, 4, SLJIT_MEM2(SLJIT_R0, SLJIT_S1), 1);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, 10, SLJIT_MEM1(SLJIT_R1), -1000);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, 2, SLJIT_MEM1(SLJIT_R1), -1000);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, 12, SLJIT_MEM2(SLJIT_R0, SLJIT_S1), 1);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, 0, SLJIT_S2, 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, 14, SLJIT_S2, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 128);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR5, SLJIT_MEM1(SLJIT_S0), 64);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, 1, SLJIT_R2, 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR5, 15, SLJIT_IMM, 181 + 0xffff00);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 183 + 0xffff00);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR5, 13, SLJIT_R2, 0);
for (i = 5; i < 16; i += 2) {
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | SLJIT_32 | type, SLJIT_VR0, i, SLJIT_R2, 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | SLJIT_32 | type, SLJIT_VR5, 16 - i, SLJIT_R2, 0);
}
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR5, SLJIT_MEM1(SLJIT_S0), 144);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_16;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 64);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR2, 6, SLJIT_R2, 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, 0, SLJIT_R2, 0);
sljit_get_local_base(compiler, SLJIT_R2, 0, 4);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR2, 4, SLJIT_MEM1(SLJIT_R2), 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, 2, SLJIT_MEM1(SLJIT_SP), 4);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR2, 2, SLJIT_MEM0(), (sljit_sw)tmp);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, 4, SLJIT_MEM0(), (sljit_sw)tmp);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR2, 0, SLJIT_S3, 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, 6, SLJIT_S3, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 160);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 64);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR2, 7, SLJIT_MEM1(SLJIT_R0), 100000);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR1, 1, SLJIT_MEM1(SLJIT_R1), -1000);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR2, 5, SLJIT_MEM2(SLJIT_R0, SLJIT_S1), 1);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR1, 3, SLJIT_MEM1(SLJIT_R1), -1000);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR2, 3, SLJIT_MEM1(SLJIT_R1), -1000);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR1, 5, SLJIT_MEM2(SLJIT_R0, SLJIT_S1), 1);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR2, 1, SLJIT_S2, 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR1, 7, SLJIT_S2, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 176);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR3, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 64);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR3, 2, SLJIT_R2, 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, 0, SLJIT_R2, 0);
sljit_get_local_base(compiler, SLJIT_R2, 0, 8);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR3, 0, SLJIT_MEM1(SLJIT_R2), 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, 2, SLJIT_MEM1(SLJIT_SP), 8);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 192);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, vs0, SLJIT_MEM1(SLJIT_S0), 64);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR3, 3, SLJIT_S3, 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, vs0, 1, SLJIT_S3, 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR3, 1, SLJIT_MEM1(SLJIT_R0), 100000);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, vs0, 3, SLJIT_MEM1(SLJIT_R1), -1000);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, vs0, SLJIT_MEM1(SLJIT_S0), 208);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_64;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR5, SLJIT_MEM1(SLJIT_S0), 64);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, 0, SLJIT_R2, 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR5, 0, SLJIT_R2, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR5, SLJIT_MEM1(SLJIT_S0), 224);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 64);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, 1, SLJIT_MEM1(SLJIT_R1), -1000);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, 1, SLJIT_MEM2(SLJIT_R0, SLJIT_S1), 1);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 240);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_FLOAT | SLJIT_SIMD_ELEM_32;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR4, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR3, SLJIT_MEM1(SLJIT_S0), 64);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_VR3, 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR4, 2, SLJIT_FR1, 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR3, 0, SLJIT_FR1, 0);
sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM0(), (sljit_sw)&f32_result, SLJIT_FR1, 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR4, 0, SLJIT_FR0, 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR3, 2, SLJIT_FR0, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR3, SLJIT_MEM1(SLJIT_S0), 256);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 64);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR4, 3, SLJIT_FR2, 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR1, 1, SLJIT_FR2, 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR4, 1, SLJIT_FR4, 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR1, 3, SLJIT_FR4, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 272);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR4, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 64);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR4, 2, SLJIT_MEM1(SLJIT_SP), 4);
sljit_get_local_base(compiler, SLJIT_R2, 0, 4);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, 0, SLJIT_MEM1(SLJIT_R2), 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR4, 0, SLJIT_MEM1(SLJIT_R0), 100000);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, 2, SLJIT_MEM1(SLJIT_R1), -1000);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 288);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 64);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR4, 3, SLJIT_MEM1(SLJIT_R1), -1000);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, 1, SLJIT_MEM2(SLJIT_R0, SLJIT_S1), 1);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR4, 1, SLJIT_MEM0(), (sljit_sw)tmp);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, 3, SLJIT_MEM0(), (sljit_sw)tmp);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 304);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_FLOAT | SLJIT_SIMD_ELEM_64;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 64);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR1, 0, SLJIT_FR4, 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, 0, SLJIT_FR4, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 320);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, vs0, SLJIT_MEM1(SLJIT_S0), 64);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR1, 1, SLJIT_FR2, 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, vs0, 1, SLJIT_FR2, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, vs0, SLJIT_MEM1(SLJIT_S0), 336);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR3, SLJIT_MEM1(SLJIT_S0), 64);
sljit_get_local_base(compiler, SLJIT_R2, 0, 8);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR1, 0, SLJIT_MEM1(SLJIT_R2), 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR3, 0, SLJIT_MEM1(SLJIT_SP), 8);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR3, SLJIT_MEM1(SLJIT_S0), 352);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 64);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR1, 1, SLJIT_MEM1(SLJIT_R0), 100000);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, 1, SLJIT_MEM1(SLJIT_R1), -1000);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 368);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, (sljit_sw)result);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | SLJIT_SIMD_REG_128, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 64);
type = SLJIT_SIMD_STORE | SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_8;
sljit_emit_simd_lane_mov(compiler, type, SLJIT_VR1, 6, SLJIT_R0, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S3, 0, SLJIT_IMM, -1);
sljit_emit_simd_lane_mov(compiler, type | SLJIT_SIMD_LANE_SIGNED, SLJIT_VR1, 13, SLJIT_S3, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R2), 0, SLJIT_R0, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R2), sizeof(sljit_sw), SLJIT_S3, 0);
type = SLJIT_SIMD_STORE | SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_16;
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R3, 0, SLJIT_IMM, -1);
sljit_emit_simd_lane_mov(compiler, type, SLJIT_VR1, 5, SLJIT_R3, 0);
sljit_emit_simd_lane_mov(compiler, type | SLJIT_SIMD_LANE_SIGNED, SLJIT_VR1, 7, SLJIT_R1, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R2), sizeof(sljit_sw) * 2, SLJIT_R3, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R2), sizeof(sljit_sw) * 3, SLJIT_R1, 0);
type = SLJIT_SIMD_STORE | SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32;
sljit_emit_simd_lane_mov(compiler, type, SLJIT_VR1, 2, SLJIT_S3, 0);
sljit_emit_simd_lane_mov(compiler, type | SLJIT_SIMD_LANE_SIGNED, SLJIT_VR1, 3, SLJIT_R0, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R2), sizeof(sljit_sw) * 4, SLJIT_S3, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_R2), sizeof(sljit_sw) * 5, SLJIT_R0, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, (sljit_sw)result32);
type = SLJIT_SIMD_STORE | SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_8 | SLJIT_32;
sljit_emit_simd_lane_mov(compiler, type, SLJIT_VR1, 0, SLJIT_R3, 0);
sljit_emit_simd_lane_mov(compiler, type | SLJIT_SIMD_LANE_SIGNED, SLJIT_VR1, 3, SLJIT_S2, 0);
sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_R2), 0, SLJIT_R3, 0);
sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_R2), sizeof(sljit_s32), SLJIT_S2, 0);
type = SLJIT_SIMD_STORE | SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_16 | SLJIT_32;
sljit_emit_simd_lane_mov(compiler, type, SLJIT_VR1, 0, SLJIT_R1, 0);
sljit_emit_simd_lane_mov(compiler, type | SLJIT_SIMD_LANE_SIGNED, SLJIT_VR1, 3, SLJIT_S3, 0);
sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_R2), sizeof(sljit_s32) * 2, SLJIT_R1, 0);
sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_R2), sizeof(sljit_s32) * 3, SLJIT_S3, 0);
type = SLJIT_SIMD_STORE | SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32 | SLJIT_32;
sljit_emit_simd_lane_mov(compiler, type | SLJIT_SIMD_LANE_SIGNED, SLJIT_VR1, 0, SLJIT_R0, 0);
sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_R2), sizeof(sljit_s32) * 4, SLJIT_R0, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, (sljit_sw)tmp - 100000);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)tmp + 1000);
type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_8;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR4, SLJIT_MEM1(SLJIT_S0), 64);
supported[0] = sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR1, 30, SLJIT_MEM1(SLJIT_R1), -1000) != SLJIT_ERR_UNSUPPORTED;
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR4, 0, SLJIT_MEM1(SLJIT_R1), -1000);
for (i = 2; i < 32; i += 2) {
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR1, 30 - i, SLJIT_R2, 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR4, i, SLJIT_R2, 0);
}
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR4, SLJIT_MEM1(SLJIT_S0), 384);
type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_16;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 64);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR2, 1, SLJIT_MEM1(SLJIT_SP), 8);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, 15, SLJIT_MEM1(SLJIT_SP), 8);
for (i = 3; i < 16; i += 2) {
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR2, i, SLJIT_R2, 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, 16 - i, SLJIT_R2, 0);
}
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 416);
type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_32;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR3, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 64);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR3, 6, SLJIT_MEM1(SLJIT_R0), 100000);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR1, 0, SLJIT_MEM0(), (sljit_sw)tmp);
for (i = 2; i < 8; i += 2) {
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR3, 6 - i, SLJIT_S1, 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR1, i, SLJIT_S1, 0);
}
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 448);
type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_64;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, vs0, SLJIT_MEM1(SLJIT_S0), 64);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, -1000);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR1, 1, SLJIT_MEM1(SLJIT_R0), 100000);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, vs0, 3, SLJIT_MEM2(SLJIT_R1, SLJIT_S1), 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR1, 3, SLJIT_S1, 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, vs0, 1, SLJIT_S1, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, vs0, SLJIT_MEM1(SLJIT_S0), 480);
type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_FLOAT;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR4, SLJIT_MEM1(SLJIT_S0), 64);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, 1, SLJIT_MEM1(SLJIT_SP), 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR4, 7, SLJIT_MEM1(SLJIT_SP), 0);
for (i = 3; i < 8; i += 2) {
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, i, SLJIT_FR2, 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR4, 8 - i, SLJIT_FR2, 0);
}
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR4, SLJIT_MEM1(SLJIT_S0), 512);
type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_64 | SLJIT_SIMD_FLOAT;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR4, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 64);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR4, 2, SLJIT_MEM0(), (sljit_sw)tmp);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, 0, SLJIT_MEM1(SLJIT_R0), 100000);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR4, 0, SLJIT_FR0, 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, 2, SLJIT_FR0, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 544);
sljit_emit_return_void(compiler);
code.code = sljit_generate_code(compiler, 0, NULL);
CHECK(compiler);
sljit_free_compiler(compiler);
code.func1((sljit_sw)buf);
sljit_free_code(code.code, NULL);
FAILED(!check_simd_lane_mov(buf + 128, 16, 1, 0), "test_simd2 case 1 failed\n");
FAILED(!check_simd_lane_mov(buf + 144, 16, 1, 1), "test_simd2 case 2 failed\n");
FAILED(!check_simd_lane_mov(buf + 160, 16, 2, 0), "test_simd2 case 3 failed\n");
FAILED(!check_simd_lane_mov(buf + 176, 16, 2, 1), "test_simd2 case 4 failed\n");
FAILED(!check_simd_lane_mov(buf + 192, 16, 4, 0), "test_simd2 case 5 failed\n");
FAILED(!check_simd_lane_mov(buf + 208, 16, 4, 1), "test_simd2 case 6 failed\n");
#if IS_64BIT
FAILED(!check_simd_lane_mov(buf + 224, 16, 8, 0), "test_simd2 case 7 failed\n");
FAILED(!check_simd_lane_mov(buf + 240, 16, 8, 1), "test_simd2 case 8 failed\n");
#endif
FAILED(!check_simd_lane_mov(buf + 256, 16, 4, 0), "test_simd2 case 9 failed\n");
FAILED(!check_simd_lane_mov(buf + 272, 16, 4, 1), "test_simd2 case 10 failed\n");
FAILED(!check_simd_lane_mov(buf + 288, 16, 4, 0), "test_simd2 case 11 failed\n");
FAILED(!check_simd_lane_mov(buf + 304, 16, 4, 1), "test_simd2 case 12 failed\n");
FAILED(f32_result != LITTLE_BIG(0xbfbebdbc, 0xbcbdbebf), "test_simd2 case 13 failed\n");
FAILED(!check_simd_lane_mov(buf + 320, 16, 8, 0), "test_simd2 case 14 failed\n");
FAILED(!check_simd_lane_mov(buf + 336, 16, 8, 1), "test_simd2 case 15 failed\n");
FAILED(!check_simd_lane_mov(buf + 352, 16, 8, 0), "test_simd2 case 16 failed\n");
FAILED(!check_simd_lane_mov(buf + 368, 16, 8, 1), "test_simd2 case 17 failed\n");
FAILED(result[0] != 186, "test_simd2 case 18 failed\n");
FAILED(result[1] != -63, "test_simd2 case 19 failed\n");
FAILED(result[2] != LITTLE_BIG(49086, 48831), "test_simd2 case 20 failed\n");
FAILED(result[3] != LITTLE_BIG(-15422, -15677), "test_simd2 case 21 failed\n");
FAILED(result[4] != LITTLE_BIG(WCONST(3216948668, -1078018628), WCONST(3166551743, -1128415553)), "test_simd2 case 22 failed\n");
FAILED(result[5] != LITTLE_BIG(-1010646592, -1061043517), "test_simd2 case 23 failed\n");
FAILED(result32[0] != 180, "test_simd2 case 24 failed\n");
FAILED(result32[1] != -73, "test_simd2 case 25 failed\n");
FAILED(result32[2] != LITTLE_BIG(46516, 46261), "test_simd2 case 26 failed\n");
FAILED(result32[3] != LITTLE_BIG(-17478, -17733), "test_simd2 case 27 failed\n");
FAILED(result32[4] != LITTLE_BIG(-1212762700, -1263159625), "test_simd2 case 28 failed\n");
if (supported[0]) {
FAILED(!check_simd_lane_mov(buf + 384, 32, 1, 0), "test_simd2 case 29 failed\n");
FAILED(!check_simd_lane_mov(buf + 416, 32, 2, 1), "test_simd2 case 30 failed\n");
FAILED(!check_simd_lane_mov(buf + 448, 32, 4, 0), "test_simd2 case 31 failed\n");
#if IS_64BIT
FAILED(!check_simd_lane_mov(buf + 480, 32, 8, 1), "test_simd2 case 32 failed\n");
#endif
FAILED(!check_simd_lane_mov(buf + 512, 32, 4, 1), "test_simd2 case 33 failed\n");
FAILED(!check_simd_lane_mov(buf + 544, 32, 8, 0), "test_simd2 case 34 failed\n");
}
SIMD_RUN_END
successful_tests++;
}
static sljit_s32 check_simd_replicate(sljit_u8* buf, sljit_s32 length, sljit_s32 elem_size, sljit_s32 value)
{
sljit_s32 count = length / elem_size;
sljit_s32 i;
do {
for (i = 0; i < elem_size; i++)
if (*buf++ != value++)
return 0;
value -= elem_size;
} while (--count != 0);
return 1;
}
static sljit_s32 check_simd_replicate_u32(sljit_u8* buf, sljit_s32 length, sljit_u32 value)
{
sljit_s32 count = length / 4;
sljit_u32 start_value = value;
sljit_s32 i;
do {
for (i = 0; i < 4; i++) {
if (*buf++ != (value & 0xff))
return 0;
value >>= 8;
}
value = start_value;
} while (--count != 0);
return 1;
}
static void test_simd3(void)
{
executable_code code;
struct sljit_compiler* compiler;
sljit_s32 options = 0;
sljit_s32 i, type;
sljit_u8 supported[1];
sljit_u8* buf;
sljit_u8 data[63 + 768];
sljit_s32 vs0 = SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS > 0 ? SLJIT_VS0 : SLJIT_VR5;
if (verbose)
printf("Run test_simd3\n");
SIMD_RUN_START
buf = (sljit_u8*)(((sljit_sw)data + (sljit_sw)63) & ~(sljit_sw)63);
for (i = 0; i < 32; i++)
buf[i] = (sljit_u8)(200 + i);
for (i = 32; i < 768; i++)
buf[i] = 0xaa;
compiler = sljit_create_compiler(NULL);
FAILED(!compiler, "cannot create compiler\n");
sljit_emit_enter(compiler, options, SLJIT_ARGS1V(P), 4 | SLJIT_ENTER_FLOAT(6) | SLJIT_ENTER_VECTOR(6), 4 | SLJIT_ENTER_VECTOR(SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS > 0 ? 2 : 0), 16);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_8;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 32);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 0xffff00 + 78);
sljit_emit_simd_replicate(compiler, type, SLJIT_VR0, SLJIT_R2, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 48);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR3, SLJIT_MEM1(SLJIT_S0), 32);
sljit_emit_simd_replicate(compiler, type, SLJIT_VR3, SLJIT_IMM, 0xffff00 + 253);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR3, SLJIT_MEM1(SLJIT_S0), 64);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, vs0, SLJIT_MEM1(SLJIT_S0), 32);
sljit_emit_op1(compiler, SLJIT_MOV_U8, SLJIT_MEM1(SLJIT_SP), 3, SLJIT_IMM, 42);
sljit_emit_simd_replicate(compiler, type, vs0, SLJIT_MEM1(SLJIT_SP), 3);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, vs0, SLJIT_MEM1(SLJIT_S0), 80);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR5, SLJIT_MEM1(SLJIT_S0), 32);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 15);
sljit_emit_simd_replicate(compiler, type, SLJIT_VR5, SLJIT_MEM2(SLJIT_S0, SLJIT_R0), 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR5, SLJIT_MEM1(SLJIT_S0), 96);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_16;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR3, SLJIT_MEM1(SLJIT_S0), 32);
sljit_emit_op1(compiler, SLJIT_MOV_S16, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_S0), 24);
sljit_emit_simd_replicate(compiler, type, SLJIT_VR3, SLJIT_R1, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR3, SLJIT_MEM1(SLJIT_S0), 112);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, vs0, SLJIT_MEM1(SLJIT_S0), 32);
sljit_emit_simd_replicate(compiler, type, vs0, SLJIT_MEM0(), (sljit_sw)(buf + 10));
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, vs0, SLJIT_MEM1(SLJIT_S0), 128);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 32);
sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_IMM, 10000 + 20);
sljit_emit_simd_replicate(compiler, type, SLJIT_VR0, SLJIT_MEM1(SLJIT_R0), -10000);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 144);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 32);
sljit_emit_op1(compiler, SLJIT_MOV_S32, SLJIT_S3, 0, SLJIT_MEM1(SLJIT_S0), 28);
sljit_emit_simd_replicate(compiler, type, SLJIT_VR1, SLJIT_S3, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 160);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR5, SLJIT_MEM1(SLJIT_S0), 32);
sljit_emit_op1(compiler, SLJIT_MOV_U32, SLJIT_MEM1(SLJIT_SP), 4, SLJIT_MEM1(SLJIT_S0), 12);
sljit_emit_simd_replicate(compiler, type, SLJIT_VR5, SLJIT_MEM1(SLJIT_SP), 4);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR5, SLJIT_MEM1(SLJIT_S0), 176);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, vs0, SLJIT_MEM1(SLJIT_S0), 32);
sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R2, 0, SLJIT_S0, 0, SLJIT_IMM, 100000 - 24);
sljit_emit_simd_replicate(compiler, type, vs0, SLJIT_MEM1(SLJIT_R2), 100000);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, vs0, SLJIT_MEM1(SLJIT_S0), 192);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_64;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 32);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_S0), 8);
sljit_emit_simd_replicate(compiler, type, SLJIT_VR2, SLJIT_S1, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 208);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 32);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 3);
sljit_emit_simd_replicate(compiler, type, SLJIT_VR0, SLJIT_MEM2(SLJIT_S0, SLJIT_R0), 3);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 224);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, vs0, SLJIT_MEM1(SLJIT_S0), 32);
sljit_emit_simd_replicate(compiler, type, vs0, SLJIT_MEM0(), (sljit_sw)buf);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, vs0, SLJIT_MEM1(SLJIT_S0), 240);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_FLOAT | SLJIT_SIMD_ELEM_32;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 32);
sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S0), 4);
sljit_emit_simd_replicate(compiler, type, SLJIT_VR1, SLJIT_FR2, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 256);
sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR4, 0, SLJIT_MEM1(SLJIT_S0), 20);
sljit_emit_simd_replicate(compiler, type, SLJIT_VR4, SLJIT_FR4, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR4, SLJIT_MEM1(SLJIT_S0), 272);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 32);
sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_MEM1(SLJIT_SP), 4, SLJIT_MEM1(SLJIT_S0), 12);
sljit_emit_simd_replicate(compiler, type, SLJIT_VR0, SLJIT_MEM1(SLJIT_SP), 4);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 288);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_FLOAT | SLJIT_SIMD_ELEM_64;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, vs0, SLJIT_MEM1(SLJIT_S0), 32);
sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S0), 16);
sljit_emit_simd_replicate(compiler, type, vs0, SLJIT_FR0, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, vs0, SLJIT_MEM1(SLJIT_S0), 304);
sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR5, 0, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_replicate(compiler, type, SLJIT_VR5, SLJIT_FR5, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR5, SLJIT_MEM1(SLJIT_S0), 320);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 32);
sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R2, 0, SLJIT_S0, 0, SLJIT_IMM, 10000 + 8);
sljit_emit_simd_replicate(compiler, type, SLJIT_VR1, SLJIT_MEM1(SLJIT_R2), -10000);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 336);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32;
sljit_emit_simd_replicate(compiler, type, SLJIT_VR0, SLJIT_IMM, WCONST(0xff00123456, 0x123456));
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 352);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_16;
sljit_emit_simd_replicate(compiler, type, SLJIT_VR1, SLJIT_IMM, 0xff0000);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 368);
sljit_emit_simd_replicate(compiler, type, SLJIT_VR2, SLJIT_IMM, 0x1ffff);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 384);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_FLOAT | SLJIT_SIMD_ELEM_64;
sljit_emit_simd_replicate(compiler, type, SLJIT_VR3, SLJIT_IMM, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR3, SLJIT_MEM1(SLJIT_S0), 400);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_16;
sljit_emit_simd_replicate(compiler, type, vs0, SLJIT_IMM, 0xff0034);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, vs0, SLJIT_MEM1(SLJIT_S0), 416);
sljit_emit_simd_replicate(compiler, type, SLJIT_VR5, SLJIT_IMM, 0xff45ff);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR5, SLJIT_MEM1(SLJIT_S0), 432);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32;
sljit_emit_simd_replicate(compiler, type, SLJIT_VR0, SLJIT_IMM, 0xb3);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 448);
sljit_emit_simd_replicate(compiler, type, SLJIT_VR1, SLJIT_IMM, (sljit_sw)0xffff46ff);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 464);
sljit_emit_simd_replicate(compiler, type, vs0, SLJIT_IMM, 0x4c0000);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, vs0, SLJIT_MEM1(SLJIT_S0), 480);
sljit_emit_simd_replicate(compiler, type, SLJIT_VR3, SLJIT_IMM, 0x71ffffff);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR3, SLJIT_MEM1(SLJIT_S0), 496);
sljit_emit_simd_replicate(compiler, type, SLJIT_VR4, SLJIT_IMM, 0x9eff);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR4, SLJIT_MEM1(SLJIT_S0), 512);
sljit_emit_simd_replicate(compiler, type, SLJIT_VR5, SLJIT_IMM, (sljit_sw)0xff070000);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR5, SLJIT_MEM1(SLJIT_S0), 528);
type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_8;
supported[0] = sljit_emit_simd_replicate(compiler, type, SLJIT_VR2, SLJIT_IMM, 0xffff00 + 181) != SLJIT_ERR_UNSUPPORTED;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 544);
sljit_emit_simd_replicate(compiler, type, vs0, SLJIT_IMM, 0xffff00);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, vs0, SLJIT_MEM1(SLJIT_S0), 576);
type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_16;
sljit_emit_op1(compiler, SLJIT_MOV_U16, SLJIT_R1, 0, SLJIT_MEM1(SLJIT_S0), 30);
sljit_emit_simd_replicate(compiler, type, SLJIT_VR1, SLJIT_R1, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 608);
type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_32;
sljit_emit_simd_replicate(compiler, type, SLJIT_VR5, SLJIT_MEM1(SLJIT_S0), 4);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR5, SLJIT_MEM1(SLJIT_S0), 640);
type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_64;
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_S0, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, 4);
sljit_emit_simd_replicate(compiler, type, SLJIT_VR0, SLJIT_MEM2(SLJIT_R1, SLJIT_S1), 2);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 672);
type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_FLOAT;
sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S0), 20);
sljit_emit_simd_replicate(compiler, type, SLJIT_VR1, SLJIT_FR2, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 704);
type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_64 | SLJIT_SIMD_FLOAT;
sljit_emit_simd_replicate(compiler, type, vs0, SLJIT_MEM0(), (sljit_sw)(buf + 8));
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, vs0, SLJIT_MEM1(SLJIT_S0), 736);
sljit_emit_return_void(compiler);
code.code = sljit_generate_code(compiler, 0, NULL);
CHECK(compiler);
sljit_free_compiler(compiler);
code.func1((sljit_sw)buf);
sljit_free_code(code.code, NULL);
FAILED(!check_simd_replicate(buf + 48, 16, 1, 78), "test_simd3 case 1 failed\n");
FAILED(!check_simd_replicate(buf + 64, 16, 1, 253), "test_simd3 case 2 failed\n");
FAILED(!check_simd_replicate(buf + 80, 16, 1, 42), "test_simd3 case 3 failed\n");
FAILED(!check_simd_replicate(buf + 96, 16, 1, 215), "test_simd3 case 4 failed\n");
FAILED(!check_simd_replicate(buf + 112, 16, 2, 224), "test_simd3 case 5 failed\n");
FAILED(!check_simd_replicate(buf + 128, 16, 2, 210), "test_simd3 case 6 failed\n");
FAILED(!check_simd_replicate(buf + 144, 16, 2, 220), "test_simd3 case 7 failed\n");
FAILED(!check_simd_replicate(buf + 160, 16, 4, 228), "test_simd3 case 8 failed\n");
FAILED(!check_simd_replicate(buf + 176, 16, 4, 212), "test_simd3 case 9 failed\n");
FAILED(!check_simd_replicate(buf + 192, 16, 4, 224), "test_simd3 case 10 failed\n");
#if IS_64BIT
FAILED(!check_simd_replicate(buf + 208, 16, 8, 208), "test_simd3 case 11 failed\n");
FAILED(!check_simd_replicate(buf + 224, 16, 8, 224), "test_simd3 case 12 failed\n");
FAILED(!check_simd_replicate(buf + 240, 16, 8, 200), "test_simd3 case 13 failed\n");
#endif
FAILED(!check_simd_replicate(buf + 256, 16, 4, 204), "test_simd3 case 14 failed\n");
FAILED(!check_simd_replicate(buf + 272, 16, 4, 220), "test_simd3 case 15 failed\n");
FAILED(!check_simd_replicate(buf + 288, 16, 4, 212), "test_simd3 case 16 failed\n");
FAILED(!check_simd_replicate(buf + 304, 16, 8, 216), "test_simd3 case 17 failed\n");
FAILED(!check_simd_replicate(buf + 320, 16, 8, 200), "test_simd3 case 18 failed\n");
FAILED(!check_simd_replicate(buf + 336, 16, 8, 208), "test_simd3 case 19 failed\n");
FAILED(!check_simd_replicate_u32(buf + 352, 16, LITTLE_BIG(0x123456, 0x56341200)), "test_simd3 case 20 failed\n");
FAILED(!check_simd_replicate_u32(buf + 368, 16, 0), "test_simd3 case 21 failed\n");
FAILED(!check_simd_replicate_u32(buf + 384, 16, 0xffffffff), "test_simd3 case 22 failed\n");
FAILED(!check_simd_replicate_u32(buf + 400, 16, 0), "test_simd3 case 23 failed\n");
FAILED(!check_simd_replicate_u32(buf + 416, 16, LITTLE_BIG(0x340034, 0x34003400)), "test_simd3 case 24 failed\n");
FAILED(!check_simd_replicate_u32(buf + 432, 16, LITTLE_BIG(0x45ff45ff, 0xff45ff45)), "test_simd3 case 25 failed\n");
FAILED(!check_simd_replicate_u32(buf + 448, 16, LITTLE_BIG(0xb3, 0xb3000000)), "test_simd3 case 26 failed\n");
FAILED(!check_simd_replicate_u32(buf + 464, 16, LITTLE_BIG(0xffff46ff, 0xff46ffff)), "test_simd3 case 27 failed\n");
FAILED(!check_simd_replicate_u32(buf + 480, 16, LITTLE_BIG(0x4c0000, 0x4c00)), "test_simd3 case 28 failed\n");
FAILED(!check_simd_replicate_u32(buf + 496, 16, LITTLE_BIG(0x71ffffff, 0xffffff71)), "test_simd3 case 29 failed\n");
FAILED(!check_simd_replicate_u32(buf + 512, 16, LITTLE_BIG(0x9eff, 0xff9e0000)), "test_simd3 case 30 failed\n");
FAILED(!check_simd_replicate_u32(buf + 528, 16, LITTLE_BIG(0xff070000, 0x07ff)), "test_simd3 case 31 failed\n");
if (supported[0]) {
FAILED(!check_simd_replicate(buf + 544, 32, 1, 181), "test_simd3 case 32 failed\n");
FAILED(!check_simd_replicate(buf + 576, 32, 1, 0), "test_simd3 case 33 failed\n");
FAILED(!check_simd_replicate(buf + 608, 32, 2, 230), "test_simd3 case 34 failed\n");
FAILED(!check_simd_replicate(buf + 640, 32, 4, 204), "test_simd3 case 35 failed\n");
#if IS_64BIT
FAILED(!check_simd_replicate(buf + 672, 32, 8, 216), "test_simd3 case 36 failed\n");
#endif
FAILED(!check_simd_replicate(buf + 704, 32, 4, 220), "test_simd3 case 37 failed\n");
FAILED(!check_simd_replicate(buf + 736, 32, 8, 208), "test_simd3 case 38 failed\n");
}
SIMD_RUN_END
successful_tests++;
}
static void test_simd4(void)
{
executable_code code;
struct sljit_compiler* compiler;
sljit_s32 options = 0;
sljit_s32 i, type;
sljit_u8 supported[1];
sljit_u8* buf;
sljit_u8 data[63 + 992];
sljit_s32 vs0 = SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS > 0 ? SLJIT_VS0 : SLJIT_VR5;
if (verbose)
printf("Run test_simd4\n");
SIMD_RUN_START
buf = (sljit_u8*)(((sljit_sw)data + (sljit_sw)63) & ~(sljit_sw)63);
for (i = 0; i < 32; i++)
buf[i] = (sljit_u8)(100 + i);
for (i = 32; i < 992; i++)
buf[i] = 0xaa;
compiler = sljit_create_compiler(NULL);
FAILED(!compiler, "cannot create compiler\n");
sljit_emit_enter(compiler, options, SLJIT_ARGS1V(P), 4 | SLJIT_ENTER_VECTOR(6), 4 | SLJIT_ENTER_VECTOR(SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS > 0 ? 2 : 0), 16);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_8;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_lane_replicate(compiler, type, SLJIT_VR0, SLJIT_VR0, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 48);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 16);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 32);
sljit_emit_simd_lane_replicate(compiler, type, SLJIT_VR2, SLJIT_VR1, 12);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 64);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR5, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR3, SLJIT_MEM1(SLJIT_S0), 32);
sljit_emit_simd_lane_replicate(compiler, type, SLJIT_VR3, SLJIT_VR5, 6);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR3, SLJIT_MEM1(SLJIT_S0), 80);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR3, SLJIT_MEM1(SLJIT_S0), 16);
sljit_emit_simd_lane_replicate(compiler, type, SLJIT_VR3, SLJIT_VR3, 9);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR3, SLJIT_MEM1(SLJIT_S0), 96);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, vs0, SLJIT_MEM1(SLJIT_S0), 32);
sljit_emit_simd_lane_replicate(compiler, type, vs0, SLJIT_VR0, 10);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, vs0, SLJIT_MEM1(SLJIT_S0), 112);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 128);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_16;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_lane_replicate(compiler, type, SLJIT_VR0, SLJIT_VR0, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 144);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 16);
sljit_emit_simd_lane_replicate(compiler, type, SLJIT_VR1, SLJIT_VR1, 3);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 160);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR4, SLJIT_MEM1(SLJIT_S0), 16);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 32);
sljit_emit_simd_lane_replicate(compiler, type, SLJIT_VR0, SLJIT_VR4, 5);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 176);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR4, SLJIT_MEM1(SLJIT_S0), 192);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR3, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_lane_replicate(compiler, type, SLJIT_VR3, SLJIT_VR3, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR3, SLJIT_MEM1(SLJIT_S0), 208);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 16);
sljit_emit_simd_lane_replicate(compiler, type, SLJIT_VR1, SLJIT_VR1, 2);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 224);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR5, SLJIT_MEM1(SLJIT_S0), 32);
sljit_emit_simd_lane_replicate(compiler, type, SLJIT_VR5, SLJIT_VR2, 3);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR5, SLJIT_MEM1(SLJIT_S0), 240);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 256);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_64;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 16);
sljit_emit_simd_lane_replicate(compiler, type, SLJIT_VR1, SLJIT_VR1, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 272);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_lane_replicate(compiler, type, SLJIT_VR0, SLJIT_VR0, 1);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 288);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR3, SLJIT_MEM1(SLJIT_S0), 16);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, vs0, SLJIT_MEM1(SLJIT_S0), 32);
sljit_emit_simd_lane_replicate(compiler, type, vs0, SLJIT_VR3, 1);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, vs0, SLJIT_MEM1(SLJIT_S0), 304);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR3, SLJIT_MEM1(SLJIT_S0), 320);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_FLOAT | SLJIT_SIMD_ELEM_32;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_lane_replicate(compiler, type, SLJIT_VR2, SLJIT_VR2, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 336);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 16);
sljit_emit_simd_lane_replicate(compiler, type, SLJIT_VR0, SLJIT_VR0, 3);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 352);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR5, SLJIT_MEM1(SLJIT_S0), 32);
sljit_emit_simd_lane_replicate(compiler, type, SLJIT_VR5, SLJIT_VR0, 1);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR5, SLJIT_MEM1(SLJIT_S0), 368);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 384);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_FLOAT | SLJIT_SIMD_ELEM_64;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR4, SLJIT_MEM1(SLJIT_S0), 16);
sljit_emit_simd_lane_replicate(compiler, type, SLJIT_VR4, SLJIT_VR4, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR4, SLJIT_MEM1(SLJIT_S0), 400);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_lane_replicate(compiler, type, SLJIT_VR1, SLJIT_VR1, 1);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 416);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 16);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 32);
sljit_emit_simd_lane_replicate(compiler, type, SLJIT_VR0, SLJIT_VR2, 1);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 432);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 448);
type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_8;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 0);
supported[0] = sljit_emit_simd_lane_replicate(compiler, type, SLJIT_VR2, SLJIT_VR2, 0) != SLJIT_ERR_UNSUPPORTED;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 480);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR4, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_lane_replicate(compiler, type, SLJIT_VR0, SLJIT_VR4, 13);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 512);
sljit_emit_simd_lane_replicate(compiler, type, SLJIT_VR1, SLJIT_VR4, 6);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 544);
sljit_emit_simd_lane_replicate(compiler, type, SLJIT_VR4, SLJIT_VR4, 28);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR4, SLJIT_MEM1(SLJIT_S0), 576);
type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_16;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_lane_replicate(compiler, type, SLJIT_VR2, SLJIT_VR1, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 608);
sljit_emit_simd_lane_replicate(compiler, type, vs0, SLJIT_VR1, 2);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, vs0, SLJIT_MEM1(SLJIT_S0), 640);
sljit_emit_simd_lane_replicate(compiler, type, SLJIT_VR1, SLJIT_VR1, 13);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 672);
type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_32;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR5, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_lane_replicate(compiler, type, SLJIT_VR0, SLJIT_VR5, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 704);
sljit_emit_simd_lane_replicate(compiler, type, SLJIT_VR5, SLJIT_VR5, 5);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR5, SLJIT_MEM1(SLJIT_S0), 736);
type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_64;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_lane_replicate(compiler, type, vs0, SLJIT_VR0, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, vs0, SLJIT_MEM1(SLJIT_S0), 768);
sljit_emit_simd_lane_replicate(compiler, type, SLJIT_VR0, SLJIT_VR0, 1);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 800);
type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_FLOAT;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_lane_replicate(compiler, type, SLJIT_VR2, SLJIT_VR1, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 832);
sljit_emit_simd_lane_replicate(compiler, type, vs0, SLJIT_VR1, 1);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, vs0, SLJIT_MEM1(SLJIT_S0), 864);
sljit_emit_simd_lane_replicate(compiler, type, SLJIT_VR1, SLJIT_VR1, 4);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 896);
type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_64 | SLJIT_SIMD_FLOAT;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, vs0, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_lane_replicate(compiler, type, SLJIT_VR1, vs0, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 928);
sljit_emit_simd_lane_replicate(compiler, type, vs0, vs0, 2);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, vs0, SLJIT_MEM1(SLJIT_S0), 960);
sljit_emit_return_void(compiler);
code.code = sljit_generate_code(compiler, 0, NULL);
CHECK(compiler);
sljit_free_compiler(compiler);
code.func1((sljit_sw)buf);
sljit_free_code(code.code, NULL);
FAILED(!check_simd_replicate(buf + 48, 16, 1, 100), "test_simd4 case 1 failed\n");
FAILED(!check_simd_replicate(buf + 64, 16, 1, 128), "test_simd4 case 2 failed\n");
FAILED(!check_simd_replicate(buf + 80, 16, 1, 106), "test_simd4 case 3 failed\n");
FAILED(!check_simd_replicate(buf + 96, 16, 1, 125), "test_simd4 case 4 failed\n");
FAILED(!check_simd_replicate(buf + 112, 16, 1, 110), "test_simd4 case 5 failed\n");
FAILED(!check_simd_replicate(buf + 128, 16, 16, 100), "test_simd4 case 6 failed\n");
FAILED(!check_simd_replicate(buf + 144, 16, 2, 100), "test_simd4 case 7 failed\n");
FAILED(!check_simd_replicate(buf + 160, 16, 2, 122), "test_simd4 case 8 failed\n");
FAILED(!check_simd_replicate(buf + 176, 16, 2, 126), "test_simd4 case 9 failed\n");
FAILED(!check_simd_replicate(buf + 192, 16, 16, 116), "test_simd4 case 10 failed\n");
FAILED(!check_simd_replicate(buf + 208, 16, 4, 100), "test_simd4 case 11 failed\n");
FAILED(!check_simd_replicate(buf + 224, 16, 4, 124), "test_simd4 case 12 failed\n");
FAILED(!check_simd_replicate(buf + 240, 16, 4, 112), "test_simd4 case 13 failed\n");
FAILED(!check_simd_replicate(buf + 256, 16, 16, 100), "test_simd4 case 14 failed\n");
FAILED(!check_simd_replicate(buf + 272, 16, 8, 116), "test_simd4 case 15 failed\n");
FAILED(!check_simd_replicate(buf + 288, 16, 8, 108), "test_simd4 case 16 failed\n");
FAILED(!check_simd_replicate(buf + 304, 16, 8, 124), "test_simd4 case 17 failed\n");
FAILED(!check_simd_replicate(buf + 320, 16, 16, 116), "test_simd4 case 18 failed\n");
FAILED(!check_simd_replicate(buf + 336, 16, 4, 100), "test_simd4 case 19 failed\n");
FAILED(!check_simd_replicate(buf + 352, 16, 4, 128), "test_simd4 case 20 failed\n");
FAILED(!check_simd_replicate(buf + 368, 16, 4, 104), "test_simd4 case 21 failed\n");
FAILED(!check_simd_replicate(buf + 384, 16, 16, 100), "test_simd4 case 22 failed\n");
FAILED(!check_simd_replicate(buf + 400, 16, 8, 116), "test_simd4 case 23 failed\n");
FAILED(!check_simd_replicate(buf + 416, 16, 8, 108), "test_simd4 case 24 failed\n");
FAILED(!check_simd_replicate(buf + 432, 16, 8, 124), "test_simd4 case 25 failed\n");
FAILED(!check_simd_replicate(buf + 448, 16, 16, 116), "test_simd4 case 26 failed\n");
if (supported[0]) {
FAILED(!check_simd_replicate(buf + 480, 32, 1, 100), "test_simd4 case 27 failed\n");
FAILED(!check_simd_replicate(buf + 512, 32, 1, 113), "test_simd4 case 28 failed\n");
FAILED(!check_simd_replicate(buf + 544, 32, 1, 106), "test_simd4 case 29 failed\n");
FAILED(!check_simd_replicate(buf + 576, 32, 1, 128), "test_simd4 case 30 failed\n");
FAILED(!check_simd_replicate(buf + 608, 32, 2, 100), "test_simd4 case 31 failed\n");
FAILED(!check_simd_replicate(buf + 640, 32, 2, 104), "test_simd4 case 32 failed\n");
FAILED(!check_simd_replicate(buf + 672, 32, 2, 126), "test_simd4 case 33 failed\n");
FAILED(!check_simd_replicate(buf + 704, 32, 4, 100), "test_simd4 case 34 failed\n");
FAILED(!check_simd_replicate(buf + 736, 32, 4, 120), "test_simd4 case 35 failed\n");
FAILED(!check_simd_replicate(buf + 768, 32, 8, 100), "test_simd4 case 36 failed\n");
FAILED(!check_simd_replicate(buf + 800, 32, 8, 108), "test_simd4 case 37 failed\n");
FAILED(!check_simd_replicate(buf + 832, 32, 4, 100), "test_simd4 case 38 failed\n");
FAILED(!check_simd_replicate(buf + 864, 32, 4, 104), "test_simd4 case 39 failed\n");
FAILED(!check_simd_replicate(buf + 896, 32, 4, 116), "test_simd4 case 40 failed\n");
FAILED(!check_simd_replicate(buf + 928, 32, 8, 100), "test_simd4 case 41 failed\n");
FAILED(!check_simd_replicate(buf + 960, 32, 8, 116), "test_simd4 case 42 failed\n");
}
SIMD_RUN_END
successful_tests++;
}
static sljit_s32 check_simd_lane_mov_zero(sljit_u8* buf, sljit_s32 length, sljit_s32 elem_size, sljit_s32 start, sljit_s32 value)
{
sljit_s32 i;
for (i = 0; i < start; i++)
if (*buf++ != 0)
return 0;
for (i = 0; i < elem_size; i++)
if (*buf++ != value++)
return 0;
for (i = start + elem_size; i < length; i++)
if (*buf++ != 0)
return 0;
return 1;
}
static void test_simd5(void)
{
executable_code code;
struct sljit_compiler* compiler;
sljit_s32 options = 0;
sljit_s32 i, type;
sljit_u8 supported[1];
sljit_u8* buf;
sljit_u8 data[63 + 672];
sljit_s32 vs0 = SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS > 0 ? SLJIT_VS0 : SLJIT_VR5;
if (verbose)
printf("Run test_simd5\n");
SIMD_RUN_START
buf = (sljit_u8*)(((sljit_sw)data + (sljit_sw)63) & ~(sljit_sw)63);
for (i = 0; i < 64; i++)
buf[i] = (sljit_u8)(100 + i);
for (i = 64; i < 672; i++)
buf[i] = 0xaa;
compiler = sljit_create_compiler(NULL);
FAILED(!compiler, "cannot create compiler\n");
sljit_emit_enter(compiler, options, SLJIT_ARGS1V(P), 4 | SLJIT_ENTER_FLOAT(6) | SLJIT_ENTER_VECTOR(6), 4 | SLJIT_ENTER_VECTOR(SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS > 0 ? 2 : 0), 16);
sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_IMM, 100000);
sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 10000);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_8;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 0xffff00 + 85);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, SLJIT_VR0, 0, SLJIT_R2, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 64);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, vs0, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, vs0, 0, SLJIT_IMM, 0xffff00 + 18);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, vs0, SLJIT_MEM1(SLJIT_S0), 80);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR5, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_op1(compiler, SLJIT_MOV_U8, SLJIT_MEM1(SLJIT_SP), 10, SLJIT_IMM, 170);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | SLJIT_32 | type, SLJIT_VR5, 5, SLJIT_MEM1(SLJIT_SP), 10);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR5, SLJIT_MEM1(SLJIT_S0), 96);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_16;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, LITTLE_BIG(0x789a6d6c, 0x789a6c6d));
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, SLJIT_VR1, 0, SLJIT_S2, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 112);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR4, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | SLJIT_32 | type, SLJIT_VR4, 0, SLJIT_IMM, LITTLE_BIG(0xff8382, 0xff8283));
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR4, SLJIT_MEM1(SLJIT_S0), 128);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, vs0, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, vs0, 3, SLJIT_MEM1(SLJIT_R0), 100004);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, vs0, SLJIT_MEM1(SLJIT_S0), 144);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_S0), 4);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | SLJIT_32 | type, SLJIT_VR2, 0, SLJIT_R2, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 160);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR5, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, SLJIT_VR5, 0, SLJIT_IMM, LITTLE_BIG(0x29282726, 0x26272829));
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR5, SLJIT_MEM1(SLJIT_S0), 176);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 3);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | SLJIT_32 | type, SLJIT_VR1, 0, SLJIT_MEM2(SLJIT_S0, SLJIT_R2), 2);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 192);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR3, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, SLJIT_VR3, 3, SLJIT_MEM1(SLJIT_R1), -10000 + 8);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR3, SLJIT_MEM1(SLJIT_S0), 208);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_64;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, vs0, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, vs0, 0, SLJIT_S2, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, vs0, SLJIT_MEM1(SLJIT_S0), 224);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR4, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, SLJIT_VR4, 0,
SLJIT_IMM, LITTLE_BIG(WCONST(0xe3e2e1e0dfdedddc, 0), WCONST(0xdcdddedfe0e1e2e3, 0)));
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR4, SLJIT_MEM1(SLJIT_S0), 240);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR3, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 8);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, SLJIT_VR3, 0, SLJIT_MEM2(SLJIT_S0, SLJIT_R2), 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR3, SLJIT_MEM1(SLJIT_S0), 256);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, SLJIT_VR1, 1, SLJIT_MEM1(SLJIT_R0), 100000);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 272);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_FLOAT | SLJIT_SIMD_ELEM_32;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR0, 0, SLJIT_MEM1(SLJIT_S0), 12);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, SLJIT_VR0, 0, SLJIT_FR0, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 288);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR5, 0, SLJIT_MEM1(SLJIT_S0), 4);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, SLJIT_VR2, 0, SLJIT_FR5, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 304);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 1);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, SLJIT_VR1, 0, SLJIT_MEM2(SLJIT_S0, SLJIT_R2), 3);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 320);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR4, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR4, 0, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, SLJIT_VR4, 1, SLJIT_FR4, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR4, SLJIT_MEM1(SLJIT_S0), 336);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_FLOAT | SLJIT_SIMD_ELEM_64;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S0), 8);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, SLJIT_VR2, 0, SLJIT_FR2, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 352);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR3, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR4, 0, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, SLJIT_VR3, 0, SLJIT_FR4, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR3, SLJIT_MEM1(SLJIT_S0), 368);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, SLJIT_VR1, 0, SLJIT_MEM0(), (sljit_sw)(buf + 8));
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 384);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, SLJIT_VR2, 1, SLJIT_FR2, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 400);
type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_8;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 215);
supported[0] = sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, SLJIT_VR0, 0, SLJIT_R2, 0) != SLJIT_ERR_UNSUPPORTED;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 416);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, vs0, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, vs0, 17, SLJIT_IMM, 78);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, vs0, SLJIT_MEM1(SLJIT_S0), 448);
type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_16;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, 0xff3433);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, SLJIT_VR1, 4, SLJIT_S1, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 480);
type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_32;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR5, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, SLJIT_VR5, 5, SLJIT_MEM1(SLJIT_S0), 60);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR5, SLJIT_MEM1(SLJIT_S0), 512);
type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_64;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR4, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, SLJIT_VR4, 3, SLJIT_MEM0(), (sljit_sw)buf + 32);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR4, SLJIT_MEM1(SLJIT_S0), 544);
type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_FLOAT;
sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR2, 0, SLJIT_MEM1(SLJIT_S0), 48);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, SLJIT_VR2, 3, SLJIT_FR2, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 576);
sljit_emit_fop1(compiler, SLJIT_MOV_F32, SLJIT_FR3, 0, SLJIT_MEM1(SLJIT_S0), 8);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, SLJIT_VR3, 6, SLJIT_FR3, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR3, SLJIT_MEM1(SLJIT_S0), 608);
type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_64 | SLJIT_SIMD_FLOAT;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_SP), 8, SLJIT_MEM1(SLJIT_S0), 40);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LANE_ZERO | type, SLJIT_VR0, 3, SLJIT_MEM1(SLJIT_SP), 8);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 640);
sljit_emit_return_void(compiler);
code.code = sljit_generate_code(compiler, 0, NULL);
CHECK(compiler);
sljit_free_compiler(compiler);
code.func1((sljit_sw)buf);
sljit_free_code(code.code, NULL);
FAILED(!check_simd_lane_mov_zero(buf + 64, 16, 1, 0, 85), "test_simd5 case 1 failed\n");
FAILED(!check_simd_lane_mov_zero(buf + 80, 16, 1, 0, 18), "test_simd5 case 2 failed\n");
FAILED(!check_simd_lane_mov_zero(buf + 96, 16, 1, 5, 170), "test_simd5 case 3 failed\n");
FAILED(!check_simd_lane_mov_zero(buf + 112, 16, 2, 0, 108), "test_simd5 case 4 failed\n");
FAILED(!check_simd_lane_mov_zero(buf + 128, 16, 2, 0, 130), "test_simd5 case 5 failed\n");
FAILED(!check_simd_lane_mov_zero(buf + 144, 16, 2, 6, 104), "test_simd5 case 6 failed\n");
FAILED(!check_simd_lane_mov_zero(buf + 160, 16, 4, 0, 104), "test_simd5 case 7 failed\n");
FAILED(!check_simd_lane_mov_zero(buf + 176, 16, 4, 0, 38), "test_simd5 case 8 failed\n");
FAILED(!check_simd_lane_mov_zero(buf + 192, 16, 4, 0, 112), "test_simd5 case 9 failed\n");
FAILED(!check_simd_lane_mov_zero(buf + 208, 16, 4, 12, 108), "test_simd5 case 10 failed\n");
#if IS_64BIT
FAILED(!check_simd_lane_mov_zero(buf + 224, 16, 8, 0, 100), "test_simd5 case 11 failed\n");
FAILED(!check_simd_lane_mov_zero(buf + 240, 16, 8, 0, 220), "test_simd5 case 12 failed\n");
FAILED(!check_simd_lane_mov_zero(buf + 256, 16, 8, 0, 108), "test_simd5 case 13 failed\n");
FAILED(!check_simd_lane_mov_zero(buf + 272, 16, 8, 8, 100), "test_simd5 case 14 failed\n");
#endif
FAILED(!check_simd_lane_mov_zero(buf + 288, 16, 4, 0, 112), "test_simd5 case 15 failed\n");
FAILED(!check_simd_lane_mov_zero(buf + 304, 16, 4, 0, 104), "test_simd5 case 16 failed\n");
FAILED(!check_simd_lane_mov_zero(buf + 320, 16, 4, 0, 108), "test_simd5 case 17 failed\n");
FAILED(!check_simd_lane_mov_zero(buf + 336, 16, 4, 4, 100), "test_simd5 case 18 failed\n");
FAILED(!check_simd_lane_mov_zero(buf + 352, 16, 8, 0, 108), "test_simd5 case 19 failed\n");
FAILED(!check_simd_lane_mov_zero(buf + 368, 16, 8, 0, 100), "test_simd5 case 20 failed\n");
FAILED(!check_simd_lane_mov_zero(buf + 384, 16, 8, 0, 108), "test_simd5 case 21 failed\n");
FAILED(!check_simd_lane_mov_zero(buf + 400, 16, 8, 8, 100), "test_simd5 case 22 failed\n");
if (supported[0]) {
FAILED(!check_simd_lane_mov_zero(buf + 416, 32, 1, 0, 215), "test_simd5 case 23 failed\n");
FAILED(!check_simd_lane_mov_zero(buf + 448, 32, 1, 17, 78), "test_simd5 case 24 failed\n");
FAILED(!check_simd_lane_mov_zero(buf + 480, 32, 2, 8, 51), "test_simd5 case 25 failed\n");
FAILED(!check_simd_lane_mov_zero(buf + 512, 32, 4, 20, 160), "test_simd5 case 26 failed\n");
#if IS_64BIT
FAILED(!check_simd_lane_mov_zero(buf + 544, 32, 8, 24, 132), "test_simd5 case 27 failed\n");
#endif
FAILED(!check_simd_lane_mov_zero(buf + 576, 32, 4, 12, 148), "test_simd5 case 28 failed\n");
FAILED(!check_simd_lane_mov_zero(buf + 608, 32, 4, 24, 108), "test_simd5 case 29 failed\n");
FAILED(!check_simd_lane_mov_zero(buf + 640, 32, 8, 24, 140), "test_simd5 case 30 failed\n");
}
SIMD_RUN_END
successful_tests++;
}
static void init_simd_extend(sljit_u8* buf, sljit_s32 length, sljit_s32 elem_size, sljit_s32 is_float, sljit_s32 data)
{
sljit_u8* end = buf + length;
do {
if (elem_size == 1)
*buf = (sljit_u8)data;
else if (elem_size == 2)
*(sljit_u16*)buf = (sljit_u16)data;
else if (!is_float)
*(sljit_u32*)buf = (sljit_u32)data;
else
*(sljit_f32*)buf = (sljit_f32)data;
buf += elem_size;
data++;
} while (buf < end);
}
static sljit_s32 check_simd_extend_unsigned(sljit_u8* buf, sljit_s32 length, sljit_s32 elem_size, sljit_u32 mask)
{
sljit_s32 data;
sljit_u8* end = buf + length;
if (elem_size == 2)
data = -(length >> 2);
else if (elem_size == 4)
data = -(length >> 3);
else
data = -(length >> 4);
do {
if (elem_size == 2) {
if (*(sljit_u16*)buf != ((sljit_u16)data & mask))
return 0;
} else if (elem_size == 4) {
if (*(sljit_u32*)buf != ((sljit_u32)data & mask))
return 0;
} else {
#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
if (*(sljit_u32*)buf != ((sljit_u32)data & mask) || *(sljit_u32*)(buf + 4) != 0)
return 0;
#else
if (*(sljit_u32*)(buf + 4) != ((sljit_u32)data & mask) || *(sljit_u32*)buf != 0)
return 0;
#endif
}
buf += elem_size;
data++;
} while (buf < end);
return 1;
}
static sljit_s32 check_simd_extend_signed(sljit_u8* buf, sljit_s32 length, sljit_s32 elem_size, sljit_s32 is_float)
{
sljit_s32 data;
sljit_u8* end = buf + length;
if (elem_size == 2)
data = -(length >> 2);
else if (elem_size == 4)
data = -(length >> 3);
else if (!is_float)
data = -(length >> 4);
else
data = 1000;
do {
if (elem_size == 2) {
if (*(sljit_s16*)buf != data)
return 0;
} else if (elem_size == 4) {
if (*(sljit_s32*)buf != data)
return 0;
} else if (!is_float) {
#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
if (*(sljit_s32*)buf != data)
return 0;
if (*(sljit_s32*)(buf + 4) != (data >> 31))
return 0;
#else
if (*(sljit_s32*)(buf + 4) != data)
return 0;
if (*(sljit_s32*)buf != (data >> 31))
return 0;
#endif
} else {
if (*(sljit_f64*)buf != (sljit_f64)data)
return 0;
}
buf += elem_size;
data++;
} while (buf < end);
return 1;
}
static void test_simd6(void)
{
executable_code code;
struct sljit_compiler* compiler;
sljit_s32 options = 0;
sljit_s32 i, type;
sljit_u8 supported[2];
sljit_u8* buf;
sljit_u8 data[63 + 1088];
sljit_s32 vs0 = SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS > 0 ? SLJIT_VS0 : SLJIT_VR5;
if (verbose)
printf("Run test_simd6\n");
SIMD_RUN_START
buf = (sljit_u8*)(((sljit_sw)data + (sljit_sw)63) & ~(sljit_sw)63);
for (i = 0; i < 1088; i++)
buf[i] = 0xaa;
init_simd_extend(buf + 0, 16, 1, 0, -8);
init_simd_extend(buf + 32, 16, 2, 0, -4);
init_simd_extend(buf + 64, 16, 4, 0, -2);
init_simd_extend(buf + 96, 16, 4, 1, 1000);
init_simd_extend(buf + 128, 8, 1, 0, -4);
init_simd_extend(buf + 160, 8, 2, 0, -2);
init_simd_extend(buf + 192, 8, 4, 0, -1);
init_simd_extend(buf + 224, 8, 4, 1, 1000);
init_simd_extend(buf + 256, 4, 1, 0, -2);
init_simd_extend(buf + 288, 4, 2, 0, -1);
init_simd_extend(buf + 320, 2, 1, 0, -1);
compiler = sljit_create_compiler(NULL);
FAILED(!compiler, "cannot create compiler\n");
sljit_emit_enter(compiler, options, SLJIT_ARGS1V(P), 4 | SLJIT_ENTER_VECTOR(6), 4 | SLJIT_ENTER_VECTOR(SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS > 0 ? 2 : 0), 32);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_8 | SLJIT_SIMD_EXTEND_16;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 128);
sljit_emit_simd_extend(compiler, type, SLJIT_VR2, SLJIT_VR0, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 352);
sljit_emit_simd_extend(compiler, type | SLJIT_SIMD_EXTEND_SIGNED, SLJIT_VR1, SLJIT_VR0, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 368);
sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R1, 0, SLJIT_S0, 0, SLJIT_IMM, 128);
sljit_emit_simd_extend(compiler, type, SLJIT_VR0, SLJIT_MEM1(SLJIT_R1), 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 384);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 128);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_SP), 0);
sljit_emit_simd_extend(compiler, type | SLJIT_SIMD_EXTEND_SIGNED, vs0, SLJIT_MEM1(SLJIT_SP), 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, vs0, SLJIT_MEM1(SLJIT_S0), 400);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_16 | SLJIT_SIMD_EXTEND_32;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR4, SLJIT_MEM1(SLJIT_S0), 160);
sljit_emit_simd_extend(compiler, type, SLJIT_VR4, SLJIT_VR4, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR4, SLJIT_MEM1(SLJIT_S0), 416);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR4, SLJIT_MEM1(SLJIT_S0), 160);
sljit_emit_simd_extend(compiler, type | SLJIT_SIMD_EXTEND_SIGNED, SLJIT_VR0, SLJIT_VR4, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 432);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 40);
sljit_emit_simd_extend(compiler, type, SLJIT_VR1, SLJIT_MEM2(SLJIT_S0, SLJIT_R2), 2);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 448);
sljit_emit_simd_extend(compiler, type | SLJIT_SIMD_EXTEND_SIGNED, vs0, SLJIT_MEM0(), (sljit_sw)(buf + 160));
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, vs0, SLJIT_MEM1(SLJIT_S0), 464);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_EXTEND_64;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 192);
sljit_emit_simd_extend(compiler, type, SLJIT_VR0, SLJIT_VR2, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 480);
sljit_emit_simd_extend(compiler, type | SLJIT_SIMD_EXTEND_SIGNED, SLJIT_VR3, SLJIT_VR2, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR3, SLJIT_MEM1(SLJIT_S0), 496);
sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_IMM, 10000 - 192);
sljit_emit_simd_extend(compiler, type, SLJIT_VR2, SLJIT_MEM1(SLJIT_R0), 10000);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 512);
sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R0, 0, SLJIT_S0, 0, SLJIT_IMM, 100000 + 192);
sljit_emit_simd_extend(compiler, type | SLJIT_SIMD_EXTEND_SIGNED, vs0, SLJIT_MEM1(SLJIT_R0), -100000);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, vs0, SLJIT_MEM1(SLJIT_S0), 528);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_FLOAT | SLJIT_SIMD_EXTEND_64;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, vs0, SLJIT_MEM1(SLJIT_S0), 224);
sljit_emit_simd_extend(compiler, type, vs0, vs0, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, vs0, SLJIT_MEM1(SLJIT_S0), 544);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 224);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_SP), 0);
sljit_emit_simd_extend(compiler, type, SLJIT_VR3, SLJIT_MEM1(SLJIT_SP), 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR3, SLJIT_MEM1(SLJIT_S0), 560);
sljit_emit_simd_extend(compiler, type, SLJIT_VR5, SLJIT_VR1, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR5, SLJIT_MEM1(SLJIT_S0), 576);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_8 | SLJIT_SIMD_EXTEND_32;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 256);
sljit_emit_simd_extend(compiler, type, SLJIT_VR0, SLJIT_VR2, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 592);
sljit_emit_simd_extend(compiler, type | SLJIT_SIMD_EXTEND_SIGNED, SLJIT_VR2, SLJIT_VR2, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 608);
sljit_emit_simd_extend(compiler, type, vs0, SLJIT_MEM1(SLJIT_S0), 256);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, vs0, SLJIT_MEM1(SLJIT_S0), 624);
sljit_emit_simd_extend(compiler, type | SLJIT_SIMD_EXTEND_SIGNED, SLJIT_VR4, SLJIT_MEM0(), (sljit_sw)(buf + 256));
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR4, SLJIT_MEM1(SLJIT_S0), 640);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_8 | SLJIT_SIMD_EXTEND_64;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, vs0, SLJIT_MEM1(SLJIT_S0), 320);
sljit_emit_simd_extend(compiler, type, SLJIT_VR0, vs0, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 656);
sljit_emit_simd_extend(compiler, type | SLJIT_SIMD_EXTEND_SIGNED, SLJIT_VR0, vs0, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 672);
sljit_emit_op1(compiler, SLJIT_MOV_U16, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_MEM1(SLJIT_S0), 320);
sljit_emit_simd_extend(compiler, type, SLJIT_VR3, SLJIT_MEM1(SLJIT_SP), 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR3, SLJIT_MEM1(SLJIT_S0), 688);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, 320);
sljit_emit_simd_extend(compiler, type | SLJIT_SIMD_EXTEND_SIGNED, SLJIT_VR3, SLJIT_MEM2(SLJIT_S0, SLJIT_S2), 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR3, SLJIT_MEM1(SLJIT_S0), 704);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_16 | SLJIT_SIMD_EXTEND_64;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 288);
sljit_emit_simd_extend(compiler, type, SLJIT_VR2, SLJIT_VR0, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 720);
sljit_emit_simd_extend(compiler, type | SLJIT_SIMD_EXTEND_SIGNED, SLJIT_VR0, SLJIT_VR0, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 736);
sljit_emit_op2(compiler, SLJIT_SUB, SLJIT_R2, 0, SLJIT_S0, 0, SLJIT_IMM, 100000 - 288);
sljit_emit_simd_extend(compiler, type, vs0, SLJIT_MEM1(SLJIT_R2), 100000);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, vs0, SLJIT_MEM1(SLJIT_S0), 752);
sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_S2, 0, SLJIT_S0, 0, SLJIT_IMM, 10000 + 288);
sljit_emit_simd_extend(compiler, type | SLJIT_SIMD_EXTEND_SIGNED, SLJIT_VR1, SLJIT_MEM1(SLJIT_S2), -10000);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 768);
type = SLJIT_SIMD_REG_64 | SLJIT_SIMD_ELEM_8 | SLJIT_SIMD_EXTEND_16;
supported[0] = sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 256) != SLJIT_ERR_UNSUPPORTED;
sljit_emit_simd_extend(compiler, type, vs0, SLJIT_VR1, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, vs0, SLJIT_MEM1(SLJIT_S0), 784);
sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R2, 0, SLJIT_S0, 0, SLJIT_IMM, 256);
sljit_emit_simd_extend(compiler, type | SLJIT_SIMD_EXTEND_SIGNED, SLJIT_VR2, SLJIT_MEM1(SLJIT_R2), 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 792);
type = SLJIT_SIMD_REG_64 | SLJIT_SIMD_ELEM_8 | SLJIT_SIMD_EXTEND_32;
sljit_emit_simd_extend(compiler, type, SLJIT_VR3, SLJIT_MEM1(SLJIT_S0), 320);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR3, SLJIT_MEM1(SLJIT_S0), 800);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, vs0, SLJIT_MEM1(SLJIT_S0), 320);
sljit_emit_simd_extend(compiler, type | SLJIT_SIMD_EXTEND_SIGNED, SLJIT_VR2, vs0, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 808);
type = SLJIT_SIMD_REG_64 | SLJIT_SIMD_ELEM_16 | SLJIT_SIMD_EXTEND_32;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 288);
sljit_emit_simd_extend(compiler, type, SLJIT_VR2, SLJIT_VR1, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 816);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S1, 0, SLJIT_IMM, 288);
sljit_emit_simd_extend(compiler, type | SLJIT_SIMD_EXTEND_SIGNED, vs0, SLJIT_MEM2(SLJIT_S1, SLJIT_S0), 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, vs0, SLJIT_MEM1(SLJIT_S0), 824);
type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_8 | SLJIT_SIMD_EXTEND_16;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 0);
supported[1] = sljit_emit_simd_extend(compiler, type, SLJIT_VR4, SLJIT_VR1, 0) != SLJIT_ERR_UNSUPPORTED;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR4, SLJIT_MEM1(SLJIT_S0), 832);
type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_8 | SLJIT_SIMD_EXTEND_32;
sljit_emit_simd_extend(compiler, type | SLJIT_SIMD_EXTEND_SIGNED, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 128);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 864);
type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_8 | SLJIT_SIMD_EXTEND_64;
sljit_emit_simd_extend(compiler, type, vs0, SLJIT_MEM0(), (sljit_sw)(buf + 256));
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, vs0, SLJIT_MEM1(SLJIT_S0), 896);
type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_16 | SLJIT_SIMD_EXTEND_32;
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_S0, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 16);
sljit_emit_simd_extend(compiler, type | SLJIT_SIMD_EXTEND_SIGNED, SLJIT_VR0, SLJIT_MEM2(SLJIT_R1, SLJIT_R2), 1);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 928);
type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_16 | SLJIT_SIMD_EXTEND_64;
sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_S1, 0, SLJIT_S0, 0, SLJIT_IMM, 100000 + 160);
sljit_emit_simd_extend(compiler, type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S1), -100000);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 960);
type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_EXTEND_64;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, vs0, SLJIT_MEM1(SLJIT_S0), 64);
sljit_emit_simd_extend(compiler, type | SLJIT_SIMD_EXTEND_SIGNED, SLJIT_VR0, vs0, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 992);
type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_EXTEND_64 | SLJIT_SIMD_FLOAT;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 96);
sljit_emit_simd_extend(compiler, type, SLJIT_VR2, SLJIT_VR2, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 1024);
sljit_emit_simd_extend(compiler, type, SLJIT_VR4, SLJIT_MEM0(), (sljit_sw)(buf + 96));
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR4, SLJIT_MEM1(SLJIT_S0), 1056);
sljit_emit_return_void(compiler);
code.code = sljit_generate_code(compiler, 0, NULL);
CHECK(compiler);
sljit_free_compiler(compiler);
code.func1((sljit_sw)buf);
sljit_free_code(code.code, NULL);
FAILED(!check_simd_extend_unsigned(buf + 352, 16, 2, 0xff), "test_simd6 case 1 failed\n");
FAILED(!check_simd_extend_signed(buf + 368, 16, 2, 0), "test_simd6 case 2 failed\n");
FAILED(!check_simd_extend_unsigned(buf + 384, 16, 2, 0xff), "test_simd6 case 3 failed\n");
FAILED(!check_simd_extend_signed(buf + 400, 16, 2, 0), "test_simd6 case 4 failed\n");
FAILED(!check_simd_extend_unsigned(buf + 416, 16, 4, 0xffff), "test_simd6 case 5 failed\n");
FAILED(!check_simd_extend_signed(buf + 432, 16, 4, 0), "test_simd6 case 6 failed\n");
FAILED(!check_simd_extend_unsigned(buf + 448, 16, 4, 0xffff), "test_simd6 case 7 failed\n");
FAILED(!check_simd_extend_signed(buf + 464, 16, 4, 0), "test_simd6 case 8 failed\n");
FAILED(!check_simd_extend_unsigned(buf + 480, 16, 8, 0xffffffff), "test_simd6 case 9 failed\n");
FAILED(!check_simd_extend_signed(buf + 496, 16, 8, 0), "test_simd6 case 10 failed\n");
FAILED(!check_simd_extend_unsigned(buf + 512, 16, 8, 0xffffffff), "test_simd6 case 11 failed\n");
FAILED(!check_simd_extend_signed(buf + 528, 16, 8, 0), "test_simd6 case 12 failed\n");
FAILED(!check_simd_extend_signed(buf + 544, 16, 8, 1), "test_simd6 case 13 failed\n");
FAILED(!check_simd_extend_signed(buf + 560, 16, 8, 1), "test_simd6 case 14 failed\n");
FAILED(!check_simd_extend_signed(buf + 576, 16, 8, 1), "test_simd6 case 15 failed\n");
FAILED(!check_simd_extend_unsigned(buf + 592, 16, 4, 0xff), "test_simd6 case 16 failed\n");
FAILED(!check_simd_extend_signed(buf + 608, 16, 4, 0), "test_simd6 case 17 failed\n");
FAILED(!check_simd_extend_unsigned(buf + 624, 16, 4, 0xff), "test_simd6 case 18 failed\n");
FAILED(!check_simd_extend_signed(buf + 640, 16, 4, 0), "test_simd6 case 19 failed\n");
FAILED(!check_simd_extend_unsigned(buf + 656, 16, 8, 0xff), "test_simd6 case 20 failed\n");
FAILED(!check_simd_extend_signed(buf + 672, 16, 8, 0), "test_simd6 case 21 failed\n");
FAILED(!check_simd_extend_unsigned(buf + 688, 16, 8, 0xff), "test_simd6 case 22 failed\n");
FAILED(!check_simd_extend_signed(buf + 704, 16, 8, 0), "test_simd6 case 23 failed\n");
FAILED(!check_simd_extend_unsigned(buf + 720, 16, 8, 0xffff), "test_simd6 case 24 failed\n");
FAILED(!check_simd_extend_signed(buf + 736, 16, 8, 0), "test_simd6 case 25 failed\n");
FAILED(!check_simd_extend_unsigned(buf + 752, 16, 8, 0xffff), "test_simd6 case 26 failed\n");
FAILED(!check_simd_extend_signed(buf + 768, 16, 8, 0), "test_simd6 case 27 failed\n");
if (supported[0]) {
FAILED(!check_simd_extend_unsigned(buf + 784, 8, 2, 0xff), "test_simd6 case 28 failed\n");
FAILED(!check_simd_extend_signed(buf + 792, 8, 2, 0), "test_simd6 case 29 failed\n");
FAILED(!check_simd_extend_unsigned(buf + 800, 8, 4, 0xff), "test_simd6 case 30 failed\n");
FAILED(!check_simd_extend_signed(buf + 808, 8, 4, 0), "test_simd6 case 31 failed\n");
FAILED(!check_simd_extend_unsigned(buf + 816, 8, 4, 0xffff), "test_simd6 case 32 failed\n");
FAILED(!check_simd_extend_signed(buf + 824, 8, 4, 0), "test_simd6 case 33 failed\n");
}
if (supported[1]) {
FAILED(!check_simd_extend_unsigned(buf + 832, 32, 2, 0xff), "test_simd6 case 34 failed\n");
FAILED(!check_simd_extend_signed(buf + 864, 32, 4, 0), "test_simd6 case 35 failed\n");
FAILED(!check_simd_extend_unsigned(buf + 896, 32, 8, 0xff), "test_simd6 case 36 failed\n");
FAILED(!check_simd_extend_signed(buf + 928, 32, 4, 0), "test_simd6 case 37 failed\n");
FAILED(!check_simd_extend_unsigned(buf + 960, 32, 8, 0xffff), "test_simd6 case 38 failed\n");
FAILED(!check_simd_extend_signed(buf + 992, 32, 8, 0), "test_simd6 case 39 failed\n");
FAILED(!check_simd_extend_signed(buf + 1024, 32, 8, 1), "test_simd6 case 40 failed\n");
FAILED(!check_simd_extend_signed(buf + 1056, 32, 8, 1), "test_simd6 case 41 failed\n");
}
SIMD_RUN_END
successful_tests++;
}
static void init_simd_sign(sljit_u8* buf, sljit_s32 length, sljit_s32 elem_size, sljit_u32 data)
{
#if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
sljit_u8* end = buf + length;
do {
if (elem_size == 1)
*buf = (sljit_u8)(((data & 0x1) << 7) + 0x7f);
else if (elem_size == 2)
*(sljit_u16*)buf = (sljit_u16)(((data & 0x1) << 15) + 0x7fff);
else if (elem_size == 4)
*(sljit_u32*)buf = (sljit_u32)(((data & 0x1) << 31) + 0x7fffffff);
else {
*(sljit_u32*)buf = 0xffffffff;
*(sljit_u32*)(buf + 4) = (sljit_u32)(((data & 0x1) << 31) + 0x7fffffff);
}
data >>= 1;
buf += elem_size;
} while (buf < end);
#else
sljit_u8* current = buf + length - elem_size;
do {
if (elem_size == 1)
*current = (sljit_u8)(((data & 0x1) << 7) + 0x7f);
else if (elem_size == 2)
*(sljit_u16*)current = (sljit_u16)(((data & 0x1) << 15) + 0x7fff);
else if (elem_size == 4)
*(sljit_u32*)current = (sljit_u32)(((data & 0x1) << 31) + 0x7fffffff);
else {
*(sljit_u32*)(current + 4) = 0xffffffff;
*(sljit_u32*)current = (sljit_u32)(((data & 0x1) << 31) + 0x7fffffff);
}
data >>= 1;
current -= elem_size;
} while (current >= buf);
#endif
}
static void test_simd7(void)
{
executable_code code;
struct sljit_compiler* compiler;
sljit_s32 options = 0;
sljit_s32 i, type;
sljit_u8 supported[2];
sljit_u8* buf;
sljit_u8 data[63 + 320];
sljit_s32 vs0 = SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS > 0 ? SLJIT_VS0 : SLJIT_VR5;
sljit_uw resw[10];
sljit_u32 res32[7];
if (verbose)
printf("Run test_simd7\n");
SIMD_RUN_START
buf = (sljit_u8*)(((sljit_sw)data + (sljit_sw)63) & ~(sljit_sw)63);
for (i = 0; i < 10; i++)
resw[i] = (sljit_uw)-1;
for (i = 0; i < 7; i++)
res32[i] = (sljit_u32)-1;
init_simd_sign(buf + 0, 16, 1, 0x8fa3);
init_simd_sign(buf + 16, 16, 1, 0x34d5);
init_simd_sign(buf + 32, 16, 2, 0xa6);
init_simd_sign(buf + 48, 16, 2, 0x5e);
init_simd_sign(buf + 64, 16, 4, 0xd);
init_simd_sign(buf + 80, 16, 4, 0x5);
init_simd_sign(buf + 96, 16, 8, 0x2);
init_simd_sign(buf + 112, 16, 8, 0x1);
init_simd_sign(buf + 128, 16, 1, 0x0);
memset(buf + 128, 0, 8);
memset(buf + 136, 255, 8);
init_simd_sign(buf + 144, 8, 1, 0x45);
init_simd_sign(buf + 152, 8, 2, 0x9);
init_simd_sign(buf + 160, 8, 4, 0x1);
init_simd_sign(buf + 192, 32, 1, 0x51e83b71);
init_simd_sign(buf + 224, 32, 2, 0xc90d);
init_simd_sign(buf + 256, 32, 4, 0xa5);
init_simd_sign(buf + 288, 32, 8, 0x9);
compiler = sljit_create_compiler(NULL);
FAILED(!compiler, "cannot create compiler\n");
sljit_emit_enter(compiler, options, SLJIT_ARGS3V(P, P, P), 4 | SLJIT_ENTER_VECTOR(6), 4 | SLJIT_ENTER_VECTOR(SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS > 0 ? 2 : 0), 16);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_8;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_R0, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 0, SLJIT_R0, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, vs0, SLJIT_MEM1(SLJIT_S0), 16);
sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | type, vs0, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_uw));
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_16;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR3, SLJIT_MEM1(SLJIT_S0), 32);
sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR3, SLJIT_R2, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_uw), SLJIT_R2, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR4, SLJIT_MEM1(SLJIT_S0), 48);
sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | type | SLJIT_32, SLJIT_VR4, SLJIT_MEM1(SLJIT_SP), 4);
sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_S2), 0, SLJIT_MEM1(SLJIT_SP), 4);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, vs0, SLJIT_MEM1(SLJIT_S0), 64);
sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | type | SLJIT_32, vs0, SLJIT_R1, 0);
sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_S2), sizeof(sljit_u32), SLJIT_R1, 0);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_FLOAT;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 80);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 4);
sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | type | SLJIT_32, SLJIT_VR1, SLJIT_MEM2(SLJIT_S2, SLJIT_R1), 1);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_64 | SLJIT_SIMD_FLOAT;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 96);
sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR2, SLJIT_S3, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_uw), SLJIT_S3, 0);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_64;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, vs0, SLJIT_MEM1(SLJIT_S0), 112);
sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | type, vs0, SLJIT_MEM0(), (sljit_sw)(resw + 4));
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_8;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 128);
sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S1), 5 * sizeof(sljit_uw));
type = SLJIT_SIMD_REG_64 | SLJIT_SIMD_ELEM_8;
supported[0] = sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 144) != SLJIT_ERR_UNSUPPORTED;
sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR2, SLJIT_R0, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 6 * sizeof(sljit_uw), SLJIT_R0, 0);
type = SLJIT_SIMD_REG_64 | SLJIT_SIMD_ELEM_16;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 152);
sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | type | SLJIT_32, SLJIT_VR0, SLJIT_MEM1(SLJIT_S2), 4 * sizeof(sljit_u32));
type = SLJIT_SIMD_REG_64 | SLJIT_SIMD_ELEM_32;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, vs0, SLJIT_MEM1(SLJIT_S0), 160);
sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | type, vs0, SLJIT_MEM1(SLJIT_S1), 7 * sizeof(sljit_uw));
type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_8;
supported[1] = sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 192) != SLJIT_ERR_UNSUPPORTED;
sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | type | SLJIT_32, SLJIT_VR2, SLJIT_R2, 0);
sljit_emit_op1(compiler, SLJIT_MOV32, SLJIT_MEM1(SLJIT_S2), 5 * sizeof(sljit_u32), SLJIT_R2, 0);
type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_16;
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_S1, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, sizeof(sljit_uw));
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, vs0, SLJIT_MEM1(SLJIT_S0), 224);
sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | type, vs0, SLJIT_MEM2(SLJIT_R2, SLJIT_R1), 3);
type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_32;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR3, SLJIT_MEM1(SLJIT_S0), 256);
sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR3, SLJIT_MEM1(SLJIT_S1), 9 * sizeof(sljit_uw));
type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_64 | SLJIT_SIMD_FLOAT;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 288);
sljit_emit_simd_sign(compiler, SLJIT_SIMD_STORE | type | SLJIT_32, SLJIT_VR0, SLJIT_MEM0(), (sljit_sw)(res32 + 6));
sljit_emit_return_void(compiler);
code.code = sljit_generate_code(compiler, 0, NULL);
CHECK(compiler);
sljit_free_compiler(compiler);
code.func3((sljit_sw)buf, (sljit_sw)resw, (sljit_sw)res32);
sljit_free_code(code.code, NULL);
FAILED(resw[0] != 0x8fa3, "test_simd7 case 1 failed\n");
FAILED(resw[1] != 0x34d5, "test_simd7 case 2 failed\n");
FAILED(resw[2] != 0xa6, "test_simd7 case 3 failed\n");
FAILED(res32[0] != 0x5e, "test_simd7 case 4 failed\n");
FAILED(res32[1] != 0xd, "test_simd7 case 5 failed\n");
FAILED(res32[2] != 0x5, "test_simd7 case 6 failed\n");
FAILED(res32[3] != (sljit_u32)-1, "test_simd7 case 7 failed\n");
FAILED(resw[3] != 0x2, "test_simd7 case 8 failed\n");
FAILED(resw[4] != 0x1, "test_simd7 case 9 failed\n");
FAILED(resw[5] != LITTLE_BIG(0xff00, 0xff), "test_simd7 case 10 failed\n");
if (supported[0]) {
FAILED(resw[6] != 0x45, "test_simd7 case 11 failed\n");
FAILED(res32[4] != 0x9, "test_simd7 case 12 failed\n");
FAILED(resw[7] != 0x1, "test_simd7 case 13 failed\n");
}
if (supported[1]) {
FAILED(res32[5] != 0x51e83b71, "test_simd7 case 14 failed\n");
FAILED(resw[8] != 0xc90d, "test_simd7 case 15 failed\n");
FAILED(resw[9] != 0xa5, "test_simd7 case 16 failed\n");
FAILED(res32[6] != 0x9, "test_simd7 case 17 failed\n");
}
SIMD_RUN_END
successful_tests++;
}
static void init_simd_u32(sljit_u8* buf, sljit_s32 length, sljit_u32 data)
{
sljit_u32* current = (sljit_u32*)buf;
sljit_u32* end = (sljit_u32*)(buf + length);
while (current < end)
*current++ = data;
}
static sljit_s32 check_simd_u32(sljit_u8* buf, sljit_s32 length, sljit_u32 data)
{
sljit_u32* current = (sljit_u32*)buf;
sljit_u32* end = (sljit_u32*)(buf + length);
while (current < end) {
if (*current++ != data)
return 0;
}
return 1;
}
static void test_simd8(void)
{
executable_code code;
struct sljit_compiler* compiler;
sljit_s32 options = 0;
sljit_s32 i, type;
sljit_u8 supported[2];
sljit_u8* buf;
sljit_u8 data[63 + 1024];
sljit_s32 vs0 = SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS > 0 ? SLJIT_VS1 : SLJIT_VR5;
if (verbose)
printf("Run test_simd8\n");
SIMD_RUN_START
buf = (sljit_u8*)(((sljit_sw)data + (sljit_sw)63) & ~(sljit_sw)63);
for (i = 0; i < 1024; i++)
buf[i] = 0xaa;
init_simd_u32(buf, 32, 0x00ff00ff);
init_simd_u32(buf + 32, 32, 0x0000ffff);
compiler = sljit_create_compiler(NULL);
FAILED(!compiler, "cannot create compiler\n");
sljit_emit_enter(compiler, options, SLJIT_ARGS3V(P, P, P), 4 | SLJIT_ENTER_VECTOR(6), 4 | SLJIT_ENTER_VECTOR(SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS > 0 ? 2 : 0), 16);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_8;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 32);
sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_AND | type, SLJIT_VR0, SLJIT_VR0, SLJIT_VR2, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 64);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_FLOAT;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 32);
sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_OR | type, SLJIT_VR2, SLJIT_VR0, SLJIT_VR2, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 80);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_16;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, vs0, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 32);
sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_XOR | type, SLJIT_VR4, vs0, SLJIT_VR2, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR4, SLJIT_MEM1(SLJIT_S0), 96);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_64 | SLJIT_SIMD_FLOAT;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 32);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_AND | type, SLJIT_VR1, SLJIT_VR2, SLJIT_VR0, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 112);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_128;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, vs0, SLJIT_MEM1(SLJIT_S0), 32);
sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_OR | type, vs0, SLJIT_VR0, vs0, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, vs0, SLJIT_MEM1(SLJIT_S0), 128);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_FLOAT;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR4, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 32);
sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_XOR | type, SLJIT_VR2, SLJIT_VR4, SLJIT_VR0, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 144);
type = SLJIT_SIMD_REG_64 | SLJIT_SIMD_ELEM_32;
supported[0] = sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 0) != SLJIT_ERR_UNSUPPORTED;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR4, SLJIT_MEM1(SLJIT_S0), 32);
sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_AND | type, SLJIT_VR4, SLJIT_VR0, SLJIT_VR4, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR4, SLJIT_MEM1(SLJIT_S0), 160);
type = SLJIT_SIMD_REG_64 | SLJIT_SIMD_ELEM_64 | SLJIT_SIMD_FLOAT;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, vs0, SLJIT_MEM1(SLJIT_S0), 32);
sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_OR | type, SLJIT_VR0, SLJIT_VR2, vs0, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 168);
type = SLJIT_SIMD_REG_64 | SLJIT_SIMD_ELEM_64;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 32);
sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_XOR | type, vs0, SLJIT_VR0, SLJIT_VR2, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, vs0, SLJIT_MEM1(SLJIT_S0), 176);
type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_8;
supported[1] = sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_AND | type | SLJIT_SIMD_TEST, SLJIT_VR0, SLJIT_VR0, SLJIT_VR2, 0) != SLJIT_ERR_UNSUPPORTED;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 32);
sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_AND | type, SLJIT_VR0, SLJIT_VR0, SLJIT_VR2, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 192);
type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_256;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 32);
sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_OR | type, vs0, SLJIT_VR0, SLJIT_VR2, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, vs0, SLJIT_MEM1(SLJIT_S0), 224);
type = SLJIT_SIMD_REG_256 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_FLOAT;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR3, SLJIT_MEM1(SLJIT_S0), 32);
sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_XOR | type, SLJIT_VR3, SLJIT_VR1, SLJIT_VR3, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR3, SLJIT_MEM1(SLJIT_S0), 256);
sljit_emit_return_void(compiler);
code.code = sljit_generate_code(compiler, 0, NULL);
CHECK(compiler);
sljit_free_compiler(compiler);
code.func1((sljit_sw)buf);
sljit_free_code(code.code, NULL);
FAILED(!check_simd_u32(buf + 64, 16, 0x000000ff), "test_simd8 case 1 failed\n");
FAILED(!check_simd_u32(buf + 80, 16, 0x00ffffff), "test_simd8 case 2 failed\n");
FAILED(!check_simd_u32(buf + 96, 16, 0x00ffff00), "test_simd8 case 3 failed\n");
FAILED(!check_simd_u32(buf + 112, 16, 0x000000ff), "test_simd8 case 4 failed\n");
FAILED(!check_simd_u32(buf + 128, 16, 0x00ffffff), "test_simd8 case 5 failed\n");
FAILED(!check_simd_u32(buf + 144, 16, 0x00ffff00), "test_simd8 case 6 failed\n");
if (supported[0]) {
FAILED(!check_simd_u32(buf + 160, 8, 0x000000ff), "test_simd8 case 7 failed\n");
FAILED(!check_simd_u32(buf + 168, 8, 0x00ffffff), "test_simd8 case 8 failed\n");
FAILED(!check_simd_u32(buf + 176, 8, 0x00ffff00), "test_simd8 case 9 failed\n");
}
if (supported[1]) {
FAILED(!check_simd_u32(buf + 192, 32, 0x000000ff), "test_simd8 case 10 failed\n");
FAILED(!check_simd_u32(buf + 224, 32, 0x00ffffff), "test_simd8 case 11 failed\n");
FAILED(!check_simd_u32(buf + 256, 32, 0x00ffff00), "test_simd8 case 12 failed\n");
}
SIMD_RUN_END
successful_tests++;
}
static void test_simd9(void)
{
executable_code code;
struct sljit_compiler* compiler;
sljit_s32 options = 0;
sljit_s32 i, type;
sljit_u8* buf;
sljit_u8 data[63 + 512];
sljit_sw wbuf[16];
if (verbose)
printf("Run test_simd9\n");
SIMD_RUN_START
buf = (sljit_u8*)(((sljit_sw)data + (sljit_sw)63) & ~(sljit_sw)63);
for (i = 0; i < 512; i++)
buf[i] = 0xaa;
for (i = 0; i < 16; i++)
wbuf[i] = -1;
simd_set(buf, 53, 16);
simd_set(buf + 16, 106, 16);
simd_set(buf + 32, 216, 16);
simd_set(buf + 48, 97, 16);
*(sljit_s32*)(wbuf + 6) = 0x345678ab;
compiler = sljit_create_compiler(NULL);
FAILED(!compiler, "cannot create compiler\n");
sljit_emit_enter(compiler, options, SLJIT_ARGS2V(P, P), 4 | SLJIT_ENTER_VECTOR(6), 4, 16);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_MEM_ALIGNED_128;
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_TMP_DEST_REG, 0, SLJIT_IMM, WCONST(0x1ca0ca0ca0ca0ca0, 0x1ca0ca0c));
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, (sljit_sw)(buf) + WCONST(0x49b29b29b29b29b2, 0x49b29b29));
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, (sljit_sw)(buf + 80) - WCONST(0x2835835835835835, 0x28358358));
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_TMP_DEST_VREG, SLJIT_MEM1(SLJIT_S2), -WCONST(0x49b29b29b29b29b2, 0x49b29b29));
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_TMP_DEST_VREG, SLJIT_MEM1(SLJIT_R2), WCONST(0x2835835835835835, 0x28358358));
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 0, SLJIT_TMP_DEST_REG, 0);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_16 | SLJIT_SIMD_MEM_ALIGNED_128;
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_TMP_MEM_REG, 0, SLJIT_IMM, (sljit_sw)(buf + 16) + WCONST(0x4d63d63d63d63d63, 0x4d63d63d));
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_TMP_DEST_VREG, SLJIT_MEM1(SLJIT_TMP_MEM_REG), -WCONST(0x4d63d63d63d63d63, 0x4d63d63d));
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), sizeof(sljit_sw), SLJIT_TMP_MEM_REG, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_TMP_MEM_REG, 0, SLJIT_IMM, (sljit_sw)(buf + 112) - WCONST(0x739f39f39f39f39f, 0x739f39f3));
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_TMP_DEST_VREG, SLJIT_MEM1(SLJIT_TMP_MEM_REG), WCONST(0x739f39f39f39f39f, 0x739f39f3));
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 2 * sizeof(sljit_sw), SLJIT_TMP_MEM_REG, 0);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_16;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 32);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, 2, SLJIT_TMP_DEST_REG, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 3 * sizeof(sljit_sw), SLJIT_TMP_DEST_REG, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_TMP_DEST_REG, 0, SLJIT_IMM, WCONST(0x3b59b59b59b59b59, 0x3b59b59b));
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, 0xcc1234);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_S2, 0, SLJIT_IMM, 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, 4, SLJIT_R2, 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, 4, SLJIT_S2, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 4 * sizeof(sljit_sw), SLJIT_S2, 0);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 5 * sizeof(sljit_sw), SLJIT_TMP_DEST_REG, 0);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32;
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_TMP_MEM_REG, 0, SLJIT_IMM, (sljit_sw)(wbuf + 6) - WCONST(0x4261261261261261, 0x42612612));
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 32);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, 3, SLJIT_MEM1(SLJIT_TMP_MEM_REG), WCONST(0x4261261261261261, 0x42612612));
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_TMP_MEM_REG, 0, SLJIT_IMM, (sljit_sw)(wbuf + 7) + WCONST(0x57d37d37d37d37d3, 0x57d37d37));
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, 3, SLJIT_MEM1(SLJIT_TMP_MEM_REG), -WCONST(0x57d37d37d37d37d3, 0x57d37d37));
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32;
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_TMP_DEST_REG, 0, SLJIT_IMM, 0x5763a80);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, (sljit_sw)(buf + 16) - WCONST(0x69a09a09a09a09a0, 0x69a09a09));
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR3, SLJIT_MEM1(SLJIT_R1), WCONST(0x69a09a09a09a09a0, 0x69a09a09));
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR3, 1, SLJIT_TMP_DEST_REG, 0);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR3, 1, SLJIT_MEM1(SLJIT_S1), 8 * sizeof(sljit_sw));
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_8;
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_TMP_DEST_REG, 0, SLJIT_IMM, WCONST(0x4e7ce7ce7ce7ce7c, 0x4e7ce7ce));
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R2, 0, SLJIT_IMM, (sljit_sw)(wbuf + 6) + WCONST(0x20ab0ab0ab0ab0ab, 0x20ab0ab0));
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, SLJIT_MEM0(), (sljit_sw)buf);
sljit_emit_simd_lane_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, 1, SLJIT_MEM1(SLJIT_R2), -WCONST(0x20ab0ab0ab0ab0ab, 0x20ab0ab0));
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_MEM1(SLJIT_S1), 9 * sizeof(sljit_sw), SLJIT_TMP_DEST_REG, 0);
sljit_emit_return_void(compiler);
code.code = sljit_generate_code(compiler, 0, NULL);
CHECK(compiler);
sljit_free_compiler(compiler);
code.func2((sljit_sw)buf, (sljit_sw)wbuf);
sljit_free_code(code.code, NULL);
FAILED(!check_simd_mov(buf + 80, 53, 16), "test_simd9 case 1 failed\n");
FAILED(wbuf[0] != WCONST(0x1ca0ca0ca0ca0ca0, 0x1ca0ca0c), "test_simd9 case 2 failed\n");
FAILED(wbuf[1] != (sljit_sw)(buf + 16) + WCONST(0x4d63d63d63d63d63, 0x4d63d63d), "test_simd9 case 3 failed\n");
FAILED(!check_simd_mov(buf + 112, 106, 16), "test_simd9 case 4 failed\n");
FAILED(wbuf[2] != (sljit_sw)(buf + 112) - WCONST(0x739f39f39f39f39f, 0x739f39f3), "test_simd9 case 5 failed\n");
FAILED(wbuf[3] != LITTLE_BIG(0xdb74, 0x74db), "test_simd9 case 5 failed\n");
FAILED(wbuf[4] != 0x1234, "test_simd9 case 6 failed\n");
FAILED(wbuf[5] != WCONST(0x3b59b59b59b59b59, 0x3b59b59b), "test_simd9 case 7 failed\n");
FAILED(*(sljit_s32*)(wbuf + 7) != 0x345678ab, "test_simd9 case 8 failed\n");
FAILED(*(sljit_s32*)(wbuf + 8) != 0x5763a80, "test_simd9 case 9 failed\n");
FAILED(wbuf[9] != WCONST(0x4e7ce7ce7ce7ce7c, 0x4e7ce7ce), "test_simd9 case 10 failed\n");
SIMD_RUN_END
successful_tests++;
}
static void test_simd10(void)
{
executable_code code;
struct sljit_compiler* compiler;
sljit_s32 options = 0;
sljit_s32 i, type;
sljit_u8 supported[1];
sljit_u8* buf;
sljit_u8 data[63 + 288];
sljit_s32 vs0 = SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS > 0 ? SLJIT_VS1 : SLJIT_VR5;
if (verbose)
printf("Run test_simd10\n");
SIMD_RUN_START
buf = (sljit_u8*)(((sljit_sw)data + (sljit_sw)63) & ~(sljit_sw)63);
for (i = 0; i < 288; i++)
buf[i] = 0xaa;
init_simd_u32(buf, 32, 0x00ff00ff);
init_simd_u32(buf + 32, 32, 0x0000ffff);
init_simd_u32(buf + 64, 32, LITTLE_BIG(0x04050607, 0x07060504));
*(sljit_u32*)(buf + 96 + 4) = LITTLE_BIG(0x12345678, 0x78563412);
init_simd_u32(buf + 128, 32, LITTLE_BIG(0x080b090a, 0x0a090b08));
*(sljit_u32*)(buf + 160 + 8) = LITTLE_BIG(0x11223344, 0x44332211);
compiler = sljit_create_compiler(NULL);
FAILED(!compiler, "cannot create compiler\n");
sljit_emit_enter(compiler, options, SLJIT_ARGS2V(P, P), 4 | SLJIT_ENTER_VECTOR(6), 4 | SLJIT_ENTER_VECTOR(SLJIT_NUMBER_OF_SAVED_VECTOR_REGISTERS > 0 ? 2 : 0), 32);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_8 | SLJIT_SIMD_MEM_ALIGNED_64;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_AND | type, SLJIT_VR0, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 32);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 192);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_32 | SLJIT_SIMD_MEM_ALIGNED_128;
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R1, 0, SLJIT_IMM, 32 >> 2);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_XOR | type, SLJIT_VR4, SLJIT_VR2, SLJIT_MEM2(SLJIT_S0, SLJIT_R1), 2);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR4, SLJIT_MEM1(SLJIT_S0), 208);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_FLOAT | SLJIT_SIMD_ELEM_64;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_SP), 16);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR3, SLJIT_MEM1(SLJIT_S0), 32);
sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_OR | type, vs0, SLJIT_VR3, SLJIT_MEM1(SLJIT_SP), 16);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, vs0, SLJIT_MEM1(SLJIT_S0), 224);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_8;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 64);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 96);
sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_SHUFFLE | type, SLJIT_VR1, SLJIT_VR2, SLJIT_VR1, 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR1, SLJIT_MEM1(SLJIT_S0), 240);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_8 | SLJIT_SIMD_MEM_ALIGNED_128;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR4, SLJIT_MEM1(SLJIT_S0), 160);
sljit_emit_op2(compiler, SLJIT_ADD, SLJIT_R2, 0, SLJIT_S0, 0, SLJIT_IMM, WCONST(0xcba2ba2ba2ba2ba2, 0xcba2ba2b) + 128);
sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_SHUFFLE | type, SLJIT_VR4, SLJIT_VR4, SLJIT_MEM1(SLJIT_R2), -WCONST(0xcba2ba2ba2ba2ba2, 0xcba2ba2b));
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR4, SLJIT_MEM1(SLJIT_S0), 256);
type = SLJIT_SIMD_REG_64 | SLJIT_SIMD_ELEM_8;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 96);
supported[0] = sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_SHUFFLE | type, SLJIT_VR2, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 64) != SLJIT_ERR_UNSUPPORTED;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR2, SLJIT_MEM1(SLJIT_S0), 272);
type = SLJIT_SIMD_REG_64 | SLJIT_SIMD_ELEM_8 | SLJIT_SIMD_MEM_ALIGNED_64;
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, vs0, SLJIT_MEM1(SLJIT_S0), 96);
sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, SLJIT_IMM, 64);
sljit_emit_simd_op2(compiler, SLJIT_SIMD_OP2_SHUFFLE | type, SLJIT_VR0, vs0, SLJIT_MEM2(SLJIT_S0, SLJIT_R0), 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR0, SLJIT_MEM1(SLJIT_S0), 280);
sljit_emit_return_void(compiler);
code.code = sljit_generate_code(compiler, 0, NULL);
CHECK(compiler);
sljit_free_compiler(compiler);
code.func1((sljit_sw)buf);
sljit_free_code(code.code, NULL);
FAILED(!check_simd_u32(buf + 192, 16, 0x000000ff), "test_simd10 case 1 failed\n");
FAILED(!check_simd_u32(buf + 208, 16, 0x00ffff00), "test_simd10 case 2 failed\n");
FAILED(!check_simd_u32(buf + 224, 16, 0x00ffffff), "test_simd10 case 3 failed\n");
FAILED(!check_simd_u32(buf + 240, 16, LITTLE_BIG(0x78563412, 0x12345678)), "test_simd10 case 4 failed\n");
FAILED(!check_simd_u32(buf + 256, 16, LITTLE_BIG(0x44113322, 0x22331144)), "test_simd10 case 5 failed\n");
if (supported[0]) {
FAILED(!check_simd_u32(buf + 272, 8, LITTLE_BIG(0x78563412, 0x12345678)), "test_simd10 case 6 failed\n");
FAILED(!check_simd_u32(buf + 280, 8, LITTLE_BIG(0x78563412, 0x12345678)), "test_simd10 case 7 failed\n");
}
SIMD_RUN_END
successful_tests++;
}
static void test_simd11(void)
{
executable_code code;
struct sljit_compiler* compiler;
sljit_s32 options = 0;
sljit_s32 i, type, run;
sljit_u8* buf;
sljit_u8 data[63 + 32 + (32 * SLJIT_NUMBER_OF_VECTOR_REGISTERS)];
if (verbose)
printf("Run test_simd11\n");
SIMD_RUN_START
for (run = 0; run < 2; run++) {
buf = (sljit_u8*)(((sljit_sw)data + (sljit_sw)63) & ~(sljit_sw)63);
for (i = 24; i < 32 + (32 * SLJIT_NUMBER_OF_VECTOR_REGISTERS); i++)
buf[i] = 0xaa;
simd_set(buf, 87, 16);
*(sljit_f64*)(buf + 16) = 1.0;
compiler = sljit_create_compiler(NULL);
FAILED(!compiler, "cannot create compiler\n");
sljit_emit_enter(compiler, options, SLJIT_ARGS2V(P, P), 4 | SLJIT_ENTER_VECTOR(SLJIT_NUMBER_OF_VECTOR_REGISTERS), 4 | SLJIT_ENTER_FLOAT(SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS), 0);
if (run == 1)
sljit_set_context(compiler, options, SLJIT_ARGS2V(P, P), 4 | SLJIT_ENTER_VECTOR(SLJIT_NUMBER_OF_VECTOR_REGISTERS), 4 | SLJIT_ENTER_FLOAT(SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS), 0);
type = SLJIT_SIMD_REG_128 | SLJIT_SIMD_ELEM_8 | SLJIT_SIMD_MEM_ALIGNED_128;
for (i = 0; i < SLJIT_NUMBER_OF_VECTOR_REGISTERS; i++) {
sljit_emit_simd_mov(compiler, SLJIT_SIMD_LOAD | type, SLJIT_VR(i), SLJIT_MEM1(SLJIT_S0), 0);
sljit_emit_simd_mov(compiler, SLJIT_SIMD_STORE | type, SLJIT_VR(i), SLJIT_MEM1(SLJIT_S0), (i + 1) * 32);
}
if (SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS > 0) {
for (i = 0; i < SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS; i++) {
sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_FS(i), 0, SLJIT_MEM1(SLJIT_S0), 16);
}
for (i = 1; i < SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS; i++) {
sljit_emit_fop2(compiler, SLJIT_ADD_F64, SLJIT_FS0, 0, SLJIT_FS0, 0, SLJIT_FS(i), 0);
}
sljit_emit_fop1(compiler, SLJIT_MOV_F64, SLJIT_MEM1(SLJIT_S0), 16, SLJIT_FS0, 0);
}
sljit_emit_return_void(compiler);
code.code = sljit_generate_code(compiler, 0, NULL);
CHECK(compiler);
sljit_free_compiler(compiler);
code.func1((sljit_sw)buf);
sljit_free_code(code.code, NULL);
for (i = 0; i < SLJIT_NUMBER_OF_VECTOR_REGISTERS; i++) {
FAILED(!check_simd_mov(buf + ((i + 1) * 32), 87, 16), "test_simd11 case 1 failed\n");
}
if (SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS > 0) {
FAILED(*(double*)(buf + 16) != SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS, "test_simd11 case 2 failed\n");
}
}
SIMD_RUN_END
successful_tests++;
}
#undef SIMD_RUN_START
#undef SIMD_RUN_END