//
// Generated by NVIDIA NVVM Compiler
//
// Compiler Build ID: UNKNOWN
// Cuda compilation tools, release 13.2, V13.2.78
// Based on NVVM 7.0.1
//
.version 9.2
.target sm_75
.address_size 64
// .globl j2k_htj2k_encode_codeblock
// _ZZ31 j2k_htj2k_encode_codeblockE9block_max has been demoted
// _ZZ31 j2k_htj2k_encode_codeblockE13cleanup_e_val has been demoted
// _ZZ31 j2k_htj2k_encode_codeblockE14cleanup_cx_val has been demoted
// _ZZ32 j2k_htj2k_encode_codeblocksE9block_max has been demoted
// _ZZ32 j2k_htj2k_encode_codeblocksE13cleanup_e_val has been demoted
// _ZZ32 j2k_htj2k_encode_codeblocksE14cleanup_cx_val has been demoted
// _ZZ44 j2k_htj2k_encode_codeblocks_multi_inputE9block_max has been demoted
// _ZZ44 j2k_htj2k_encode_codeblocks_multi_inputE13cleanup_e_val has been demoted
// _ZZ44 j2k_htj2k_encode_codeblocks_multi_inputE14cleanup_cx_val has been demoted
// _ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE9block_max has been demoted
// _ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val has been demoted
// _ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE14cleanup_cx_val has been demoted
// _ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E9block_max has been demoted
// _ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E13cleanup_e_val has been demoted
// _ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E14cleanup_cx_val has been demoted
// _ZZ32 j2k_htj2k_packetize_cleanupE11shared_code has been demoted
// _ZZ32 j2k_htj2k_packetize_cleanupE17shared_header_len has been demoted
// _ZZ32 j2k_htj2k_packetize_cleanupE15shared_body_len has been demoted
.visible .entry j2k_htj2k_encode_codeblock(
.param .u64 j2k_htj2k_encode_codeblock_param_0,
.param .u64 j2k_htj2k_encode_codeblock_param_1,
.param .u64 j2k_htj2k_encode_codeblock_param_2,
.param .u64 j2k_htj2k_encode_codeblock_param_3,
.param .u64 j2k_htj2k_encode_codeblock_param_4,
.param .u64 j2k_htj2k_encode_codeblock_param_5,
.param .u64 j2k_htj2k_encode_codeblock_param_6
)
{
.local .align 2 .b8 __local_depot0[1026];
.reg .b64 %SP;
.reg .b64 %SPL;
.reg .pred %p<1636>;
.reg .b16 %rs<766>;
.reg .b32 %r<6486>;
.reg .b64 %rd<640>;
// demoted variable
.shared .align 4 .b8 _ZZ31 j2k_htj2k_encode_codeblockE9block_max[512];
// demoted variable
.shared .align 1 .b8 _ZZ31 j2k_htj2k_encode_codeblockE13cleanup_e_val[513];
// demoted variable
.shared .align 1 .b8 _ZZ31 j2k_htj2k_encode_codeblockE14cleanup_cx_val[513];
mov.u64 %SPL, __local_depot0;
ld.param.u64 %rd61, [ j2k_htj2k_encode_codeblock_param_0];
ld.param.u64 %rd55, [ j2k_htj2k_encode_codeblock_param_1];
ld.param.u64 %rd56, [ j2k_htj2k_encode_codeblock_param_2];
ld.param.u64 %rd57, [ j2k_htj2k_encode_codeblock_param_3];
ld.param.u64 %rd59, [ j2k_htj2k_encode_codeblock_param_5];
ld.param.u64 %rd60, [ j2k_htj2k_encode_codeblock_param_6];
cvta.to.global.u64 %rd1, %rd55;
cvta.to.global.u64 %rd2, %rd61;
mov.u32 %r2352, %ctaid.x;
setp.ne.s32 %p8, %r2352, 0;
@%p8 bra $L__BB0_1254;
cvta.to.global.u64 %rd62, %rd56;
ld.global.u32 %r1, [%rd62+8];
ld.global.u32 %r2, [%rd62+12];
ld.global.u32 %r3, [%rd62+16];
ld.global.u32 %r4, [%rd62+20];
ld.global.u32 %r5, [%rd62];
setp.eq.s32 %p9, %r5, 0;
ld.global.u32 %r6, [%rd62+4];
setp.eq.s32 %p10, %r6, 0;
or.pred %p11, %p9, %p10;
@%p11 bra $L__BB0_14;
bra.uni $L__BB0_2;
$L__BB0_14:
mov.u32 %r5172, 0;
bra.uni $L__BB0_15;
$L__BB0_2:
mov.u32 %r7, %tid.x;
mul.lo.s32 %r8, %r6, %r5;
setp.eq.s32 %p12, %r1, %r5;
@%p12 bra $L__BB0_6;
bra.uni $L__BB0_3;
$L__BB0_6:
setp.ge.u32 %p15, %r7, %r8;
mov.u32 %r5170, 0;
@%p15 bra $L__BB0_9;
mov.u32 %r5170, 0;
mov.u32 %r15, %ntid.x;
mov.u32 %r5168, %r7;
$L__BB0_8:
mul.wide.u32 %rd65, %r5168, 4;
add.s64 %rd66, %rd2, %rd65;
ld.global.u32 %r2361, [%rd66];
abs.s32 %r2362, %r2361;
max.u32 %r5170, %r5170, %r2362;
add.s32 %r5168, %r5168, %r15;
setp.lt.u32 %p16, %r5168, %r8;
@%p16 bra $L__BB0_8;
bra.uni $L__BB0_9;
$L__BB0_3:
setp.ge.u32 %p13, %r7, %r8;
mov.u32 %r5170, 0;
@%p13 bra $L__BB0_9;
sub.s32 %r9, %r1, %r5;
mov.u32 %r5170, 0;
mov.u32 %r10, %ntid.x;
mov.u32 %r5166, %r7;
$L__BB0_5:
div.u32 %r2355, %r5166, %r5;
mad.lo.s32 %r2356, %r9, %r2355, %r5166;
mul.wide.u32 %rd63, %r2356, 4;
add.s64 %rd64, %rd2, %rd63;
ld.global.u32 %r2357, [%rd64];
abs.s32 %r2358, %r2357;
max.u32 %r5170, %r5170, %r2358;
add.s32 %r5166, %r5166, %r10;
setp.lt.u32 %p14, %r5166, %r8;
@%p14 bra $L__BB0_5;
$L__BB0_9:
shl.b32 %r2363, %r7, 2;
mov.u32 %r2364, _ZZ31 j2k_htj2k_encode_codeblockE9block_max;
add.s32 %r21, %r2364, %r2363;
st.shared.u32 [%r21], %r5170;
bar.sync 0;
mov.u32 %r2365, %ntid.x;
shr.u32 %r5171, %r2365, 1;
setp.eq.s32 %p17, %r5171, 0;
@%p17 bra $L__BB0_13;
$L__BB0_10:
setp.ge.u32 %p18, %r7, %r5171;
@%p18 bra $L__BB0_12;
ld.shared.u32 %r2366, [%r21];
shl.b32 %r2367, %r5171, 2;
add.s32 %r2368, %r21, %r2367;
ld.shared.u32 %r2369, [%r2368];
setp.gt.u32 %p19, %r2366, %r2369;
add.s32 %r2370, %r5171, %r7;
selp.b32 %r2371, %r7, %r2370, %p19;
shl.b32 %r2372, %r2371, 2;
add.s32 %r2374, %r2364, %r2372;
ld.shared.u32 %r2375, [%r2374];
st.shared.u32 [%r21], %r2375;
$L__BB0_12:
bar.sync 0;
shr.u32 %r5171, %r5171, 1;
setp.ne.s32 %p20, %r5171, 0;
@%p20 bra $L__BB0_10;
$L__BB0_13:
ld.shared.u32 %r5172, [_ZZ31 j2k_htj2k_encode_codeblockE9block_max];
$L__BB0_15:
mov.u32 %r2377, %tid.x;
setp.ne.s32 %p21, %r2377, 0;
@%p21 bra $L__BB0_1254;
mov.u32 %r2378, 0;
cvta.to.global.u64 %rd3, %rd60;
mov.u32 %r2379, 1;
st.global.u32 [%rd3], %r2379;
st.global.u32 [%rd3+4], %r2378;
st.global.u32 [%rd3+8], %r2378;
st.global.u32 [%rd3+12], %r2378;
st.global.u32 [%rd3+16], %r2378;
st.global.u32 [%rd3+20], %r2378;
st.global.u32 [%rd3+24], %r2378;
st.global.u32 [%rd3+28], %r2378;
add.s32 %r2380, %r5, -1;
setp.ge.u32 %p23, %r2380, %r1;
or.pred %p24, %p10, %p23;
setp.gt.u32 %p25, %r5, 1024;
or.pred %p26, %p25, %p24;
@%p26 bra $L__BB0_1253;
cvt.u16.u32 %rs271, %r5;
mov.u16 %rs272, 4096;
div.u16 %rs273, %rs272, %rs271;
cvt.u32.u16 %r2381, %rs273;
setp.gt.u32 %p27, %r6, %r2381;
add.s32 %r2382, %r2, -1;
setp.gt.u32 %p28, %r2382, 29;
or.pred %p29, %p28, %p27;
setp.lt.u32 %p30, %r3, 20549;
or.pred %p31, %p30, %p29;
@%p31 bra $L__BB0_1253;
bra.uni $L__BB0_18;
$L__BB0_1253:
mov.u32 %r5102, 2;
st.global.u32 [%rd3], %r5102;
st.global.u32 [%rd3+4], %r2379;
st.global.u32 [%rd3+8], %r2378;
st.global.u32 [%rd3+12], %r2378;
st.global.u32 [%rd3+16], %r2378;
st.global.u32 [%rd3+20], %r2378;
st.global.u32 [%rd3+24], %r2378;
st.global.u32 [%rd3+28], %r2378;
$L__BB0_1254:
ret;
$L__BB0_18:
add.s32 %r2383, %r4, -1;
setp.gt.u32 %p32, %r2383, 163;
@%p32 bra $L__BB0_1252;
bra.uni $L__BB0_19;
$L__BB0_1252:
mov.u32 %r5099, 2;
st.global.u32 [%rd3], %r5099;
mov.u32 %r5100, 5;
st.global.u32 [%rd3+4], %r5100;
mov.u32 %r5101, 0;
st.global.u32 [%rd3+8], %r5101;
st.global.u32 [%rd3+12], %r5101;
st.global.u32 [%rd3+16], %r5101;
st.global.u32 [%rd3+20], %r5101;
st.global.u32 [%rd3+24], %r5101;
st.global.u32 [%rd3+28], %r5101;
bra.uni $L__BB0_1254;
$L__BB0_19:
setp.gt.u32 %p33, %r4, 3;
@%p33 bra $L__BB0_1251;
bra.uni $L__BB0_20;
$L__BB0_1251:
mov.u32 %r5096, 2;
st.global.u32 [%rd3], %r5096;
mov.u32 %r5097, 5;
st.global.u32 [%rd3+4], %r5097;
mov.u32 %r5098, 0;
st.global.u32 [%rd3+8], %r5098;
st.global.u32 [%rd3+12], %r5098;
st.global.u32 [%rd3+16], %r5098;
st.global.u32 [%rd3+20], %r5098;
st.global.u32 [%rd3+24], %r5098;
st.global.u32 [%rd3+28], %r5098;
bra.uni $L__BB0_1254;
$L__BB0_20:
setp.eq.s32 %p34, %r5172, 0;
@%p34 bra $L__BB0_1250;
clz.b32 %r2384, %r5172;
mov.u32 %r2385, 32;
sub.s32 %r2386, %r2385, %r2384;
setp.gt.u32 %p35, %r2386, %r2;
@%p35 bra $L__BB0_1249;
bra.uni $L__BB0_22;
$L__BB0_1249:
mov.u32 %r5092, 1;
st.global.u32 [%rd3], %r5092;
mov.u32 %r5093, 2;
st.global.u32 [%rd3+4], %r5093;
mov.u32 %r5094, 0;
st.global.u32 [%rd3+8], %r5094;
st.global.u32 [%rd3+12], %r5094;
st.global.u32 [%rd3+16], %r5094;
st.global.u32 [%rd3+20], %r5094;
st.global.u32 [%rd3+24], %r5094;
st.global.u32 [%rd3+28], %r5094;
bra.uni $L__BB0_1254;
$L__BB0_1250:
mov.u32 %r5095, 0;
st.global.u32 [%rd3], %r5095;
st.global.u32 [%rd3+4], %r5095;
st.global.u32 [%rd3+8], %r5095;
st.global.u32 [%rd3+12], %r5095;
st.global.u32 [%rd3+16], %r2;
st.global.u32 [%rd3+20], %r5095;
st.global.u32 [%rd3+24], %r5095;
st.global.u32 [%rd3+28], %r5095;
bra.uni $L__BB0_1254;
$L__BB0_22:
setp.gt.u32 %p36, %r4, 1;
setp.lt.u32 %p37, %r2, %r4;
and.pred %p38, %p36, %p37;
@%p38 bra $L__BB0_1248;
bra.uni $L__BB0_23;
$L__BB0_1248:
mov.u32 %r5089, 2;
st.global.u32 [%rd3], %r5089;
mov.u32 %r5090, 5;
st.global.u32 [%rd3+4], %r5090;
mov.u32 %r5091, 0;
st.global.u32 [%rd3+8], %r5091;
st.global.u32 [%rd3+12], %r5091;
st.global.u32 [%rd3+16], %r5091;
st.global.u32 [%rd3+20], %r5091;
st.global.u32 [%rd3+24], %r5091;
st.global.u32 [%rd3+28], %r5091;
bra.uni $L__BB0_1254;
$L__BB0_23:
mov.u32 %r5180, 0;
setp.eq.s32 %p39, %r4, 2;
@%p39 bra $L__BB0_34;
setp.ne.s32 %p40, %r4, 3;
@%p40 bra $L__BB0_42;
@%p9 bra $L__BB0_42;
mov.u32 %r2390, 0;
mov.u32 %r5173, %r2390;
mov.u32 %r5180, %r2390;
$L__BB0_27:
mul.lo.s32 %r29, %r5173, %r1;
mov.u32 %r5175, %r2390;
$L__BB0_28:
add.s32 %r2392, %r5175, %r29;
mul.wide.u32 %rd67, %r2392, 4;
add.s64 %rd68, %rd2, %rd67;
ld.global.u32 %r2393, [%rd68];
abs.s32 %r32, %r2393;
setp.eq.s32 %p42, %r32, 0;
@%p42 bra $L__BB0_31;
setp.eq.s32 %p43, %r32, 3;
@%p43 bra $L__BB0_31;
add.s32 %r5180, %r5180, 1;
and.b32 %r2394, %r32, 1;
setp.eq.b32 %p44, %r2394, 1;
not.pred %p45, %p44;
setp.lt.u32 %p46, %r32, 5;
or.pred %p47, %p46, %p45;
@%p47 bra $L__BB0_33;
$L__BB0_31:
add.s32 %r5175, %r5175, 1;
setp.lt.u32 %p48, %r5175, %r5;
@%p48 bra $L__BB0_28;
add.s32 %r5173, %r5173, 1;
setp.lt.u32 %p49, %r5173, %r6;
@%p49 bra $L__BB0_27;
bra.uni $L__BB0_42;
$L__BB0_34:
@%p9 bra $L__BB0_42;
mov.u32 %r2399, 0;
mov.u32 %r5178, %r2399;
$L__BB0_36:
mul.lo.s32 %r38, %r5178, %r1;
mov.u32 %r5179, %r2399;
$L__BB0_37:
add.s32 %r2401, %r5179, %r38;
mul.wide.u32 %rd69, %r2401, 4;
add.s64 %rd70, %rd2, %rd69;
ld.global.u32 %r2402, [%rd70];
abs.s32 %r40, %r2402;
setp.eq.s32 %p51, %r40, 0;
@%p51 bra $L__BB0_40;
setp.gt.u32 %p52, %r40, 2;
and.b32 %r2403, %r40, 1;
setp.eq.b32 %p53, %r2403, 1;
and.pred %p54, %p52, %p53;
@%p54 bra $L__BB0_40;
bra.uni $L__BB0_39;
$L__BB0_40:
add.s32 %r5179, %r5179, 1;
setp.lt.u32 %p55, %r5179, %r5;
@%p55 bra $L__BB0_37;
add.s32 %r5178, %r5178, 1;
setp.lt.u32 %p56, %r5178, %r6;
mov.u32 %r5180, 0;
@%p56 bra $L__BB0_36;
$L__BB0_42:
sub.s32 %r44, %r2, %r4;
mov.u32 %r2409, 30;
sub.s32 %r45, %r2409, %r44;
mov.u16 %rs274, 255;
st.global.u8 [%rd1+20548], %rs274;
add.s32 %r2410, %r5, 1;
shr.u32 %r2411, %r2410, 1;
add.s32 %r2412, %r2411, 2;
min.u32 %r46, %r2412, 513;
mov.u32 %r2413, -3;
sub.s32 %r2414, %r2413, %r2411;
max.u32 %r2415, %r2414, -514;
mov.u32 %r2416, -2;
sub.s32 %r2417, %r2416, %r2415;
and.b32 %r5185, %r46, 3;
setp.lt.u32 %p57, %r2417, 3;
mov.u32 %r5183, 0;
@%p57 bra $L__BB0_45;
sub.s32 %r5182, %r46, %r5185;
mov.u32 %r5183, 0;
$L__BB0_44:
mov.u32 %r2419, _ZZ31 j2k_htj2k_encode_codeblockE13cleanup_e_val;
add.s32 %r2420, %r2419, %r5183;
mov.u16 %rs275, 0;
st.shared.u8 [%r2420], %rs275;
mov.u32 %r2421, _ZZ31 j2k_htj2k_encode_codeblockE14cleanup_cx_val;
add.s32 %r2422, %r2421, %r5183;
st.shared.u8 [%r2422], %rs275;
st.shared.u8 [%r2420+1], %rs275;
st.shared.u8 [%r2422+1], %rs275;
st.shared.u8 [%r2420+2], %rs275;
st.shared.u8 [%r2422+2], %rs275;
st.shared.u8 [%r2420+3], %rs275;
st.shared.u8 [%r2422+3], %rs275;
add.s32 %r5183, %r5183, 4;
add.s32 %r5182, %r5182, -4;
setp.ne.s32 %p58, %r5182, 0;
@%p58 bra $L__BB0_44;
$L__BB0_45:
setp.eq.s32 %p59, %r5185, 0;
@%p59 bra $L__BB0_48;
mov.u32 %r2423, _ZZ31 j2k_htj2k_encode_codeblockE13cleanup_e_val;
mov.u32 %r2425, _ZZ31 j2k_htj2k_encode_codeblockE14cleanup_cx_val;
$L__BB0_47:
.pragma "nounroll";
add.s32 %r2424, %r2423, %r5183;
mov.u16 %rs276, 0;
st.shared.u8 [%r2424], %rs276;
add.s32 %r2426, %r2425, %r5183;
st.shared.u8 [%r2426], %rs276;
add.s32 %r5183, %r5183, 1;
add.s32 %r5185, %r5185, -1;
setp.ne.s32 %p60, %r5185, 0;
@%p60 bra $L__BB0_47;
$L__BB0_48:
mov.u32 %r5480, 0;
mov.u32 %r5277, 8;
mov.u32 %r5481, 1;
mov.u32 %r5718, 4;
mov.u16 %rs688, 15;
mov.u16 %rs705, 0;
mov.u32 %r5482, %r5480;
mov.u32 %r5483, %r5480;
mov.u32 %r5271, %r5480;
mov.u32 %r5716, %r5480;
mov.u32 %r5717, %r5481;
mov.u32 %r5719, %r5481;
mov.u32 %r5933, %r5480;
mov.u32 %r5904, %r5480;
mov.u32 %r5905, %r5480;
mov.u32 %r5906, %r5277;
mov.u32 %r5907, %r5480;
@%p9 bra $L__BB0_417;
mov.u32 %r2459, 0;
mov.u32 %r5719, 1;
mov.u16 %rs705, 0;
mov.u32 %r5906, 8;
mov.u16 %rs688, 15;
mov.u32 %r5718, 4;
cvta.to.global.u64 %rd79, %rd57;
cvta.to.global.u64 %rd108, %rd59;
mov.u32 %r5186, %r2459;
mov.u32 %r5187, %r2459;
mov.u32 %r5736, %r2459;
mov.u32 %r5907, %r2459;
mov.u32 %r5905, %r2459;
mov.u32 %r5904, %r2459;
mov.u32 %r5933, %r2459;
mov.u32 %r5717, %r5719;
mov.u32 %r5716, %r2459;
mov.u32 %r5271, %r2459;
mov.u32 %r5277, %r5906;
mov.u32 %r5483, %r2459;
mov.u32 %r5482, %r2459;
mov.u32 %r5481, %r5719;
mov.u32 %r5480, %r2459;
bra.uni $L__BB0_50;
$L__BB0_39:
mov.u32 %r2404, 2;
st.global.u32 [%rd3], %r2404;
mov.u32 %r2405, 6;
st.global.u32 [%rd3+4], %r2405;
mov.u32 %r2406, 0;
st.global.u32 [%rd3+8], %r2406;
st.global.u32 [%rd3+12], %r2406;
st.global.u32 [%rd3+16], %r2406;
st.global.u32 [%rd3+20], %r2406;
st.global.u32 [%rd3+24], %r2406;
st.global.u32 [%rd3+28], %r2406;
bra.uni $L__BB0_1254;
$L__BB0_33:
mov.u32 %r2395, 2;
st.global.u32 [%rd3], %r2395;
mov.u32 %r2396, 6;
st.global.u32 [%rd3+4], %r2396;
mov.u32 %r2397, 0;
st.global.u32 [%rd3+8], %r2397;
st.global.u32 [%rd3+12], %r2397;
st.global.u32 [%rd3+16], %r2397;
st.global.u32 [%rd3+20], %r2397;
st.global.u32 [%rd3+24], %r2397;
st.global.u32 [%rd3+28], %r2397;
bra.uni $L__BB0_1254;
$L__BB0_255:
shl.b16 %rs600, %rs705, 1;
or.b16 %rs646, %rs600, 1;
setp.gt.u32 %p291, %r5271, 191;
mov.u32 %r5277, 0;
mov.u32 %r5476, 1;
@%p291 bra $L__BB0_257;
shl.b16 %rs602, %rs705, 1;
or.b16 %rs601, %rs602, 1;
and.b16 %rs364, %rs601, 255;
st.global.u8 [%rd6], %rs601;
add.s32 %r5271, %r5271, 1;
setp.eq.s16 %p292, %rs364, 255;
selp.b32 %r5277, 7, 8, %p292;
mov.u16 %rs646, 0;
mov.u32 %r5476, %r5480;
bra.uni $L__BB0_257;
$L__BB0_50:
mul.wide.u32 %rd71, %r5187, 4;
add.s64 %rd72, %rd2, %rd71;
ld.global.u32 %r76, [%rd72];
setp.eq.s32 %p62, %r76, 0;
mov.u32 %r5204, %r2459;
@%p62 bra $L__BB0_52;
and.b32 %r2461, %r76, -2147483648;
abs.s32 %r2462, %r76;
mov.u32 %r2463, 31;
sub.s32 %r2464, %r2463, %r2;
shl.b32 %r2465, %r2462, %r2464;
or.b32 %r5204, %r2465, %r2461;
$L__BB0_52:
shl.b32 %r2469, %r5204, 1;
shr.u32 %r2470, %r2469, %r45;
and.b32 %r79, %r2470, -2;
setp.eq.s32 %p63, %r79, 0;
mov.u32 %r5205, 0;
mov.u32 %r5206, %r5205;
mov.u32 %r5212, %r5205;
@%p63 bra $L__BB0_54;
add.s32 %r2472, %r79, -1;
clz.b32 %r2473, %r2472;
mov.u32 %r2474, 32;
sub.s32 %r5205, %r2474, %r2473;
shr.u32 %r2475, %r5204, 31;
add.s32 %r2476, %r2475, %r79;
add.s32 %r5206, %r2476, -2;
mov.u32 %r5212, 1;
$L__BB0_54:
mov.u32 %r5208, 0;
setp.lt.u32 %p64, %r6, 2;
@%p64 bra $L__BB0_57;
mov.u32 %r5208, 0;
add.s32 %r2479, %r5187, %r1;
mul.wide.u32 %rd73, %r2479, 4;
add.s64 %rd74, %rd2, %rd73;
ld.global.u32 %r85, [%rd74];
setp.eq.s32 %p65, %r85, 0;
@%p65 bra $L__BB0_57;
and.b32 %r2480, %r85, -2147483648;
abs.s32 %r2481, %r85;
mov.u32 %r2482, 31;
sub.s32 %r2483, %r2482, %r2;
shl.b32 %r2484, %r2481, %r2483;
or.b32 %r5208, %r2484, %r2480;
$L__BB0_57:
shl.b32 %r2487, %r5208, 1;
shr.u32 %r2488, %r2487, %r45;
and.b32 %r88, %r2488, -2;
setp.eq.s32 %p66, %r88, 0;
mov.u32 %r5223, 0;
mov.u32 %r5209, %r5223;
mov.u32 %r5210, %r5223;
mov.u32 %r5227, %r5205;
@%p66 bra $L__BB0_59;
or.b32 %r5212, %r5212, 2;
add.s32 %r2489, %r88, -1;
clz.b32 %r2490, %r2489;
mov.u32 %r2491, 32;
sub.s32 %r5209, %r2491, %r2490;
max.s32 %r5227, %r5205, %r5209;
shr.u32 %r2492, %r5208, 31;
add.s32 %r2493, %r2492, %r88;
add.s32 %r5210, %r2493, -2;
$L__BB0_59:
add.s32 %r5229, %r5187, 1;
add.s32 %r2498, %r5186, 1;
setp.ge.u32 %p67, %r2498, %r5;
mov.u32 %r5224, %r5223;
mov.u32 %r5225, %r5223;
mov.u32 %r5226, %r5223;
@%p67 bra $L__BB0_70;
mul.wide.u32 %rd75, %r5229, 4;
add.s64 %rd76, %rd2, %rd75;
ld.global.u32 %r98, [%rd76];
setp.eq.s32 %p68, %r98, 0;
mov.u32 %r5224, 0;
mov.u32 %r5213, %r5224;
@%p68 bra $L__BB0_62;
and.b32 %r2500, %r98, -2147483648;
abs.s32 %r2501, %r98;
mov.u32 %r2502, 31;
sub.s32 %r2503, %r2502, %r2;
shl.b32 %r2504, %r2501, %r2503;
or.b32 %r5213, %r2504, %r2500;
$L__BB0_62:
shl.b32 %r2507, %r5213, 1;
shr.u32 %r2508, %r2507, %r45;
and.b32 %r101, %r2508, -2;
setp.eq.s32 %p69, %r101, 0;
mov.u32 %r5226, %r5224;
@%p69 bra $L__BB0_64;
or.b32 %r5212, %r5212, 4;
add.s32 %r2509, %r101, -1;
clz.b32 %r2510, %r2509;
mov.u32 %r2511, 32;
sub.s32 %r5224, %r2511, %r2510;
max.s32 %r5227, %r5227, %r5224;
shr.u32 %r2512, %r5213, 31;
add.s32 %r2513, %r2512, %r101;
add.s32 %r5226, %r2513, -2;
$L__BB0_64:
mov.u32 %r5223, 0;
mov.u32 %r5218, %r5223;
@%p64 bra $L__BB0_67;
add.s32 %r5118, %r5187, 1;
add.s32 %r2516, %r5118, %r1;
mul.wide.u32 %rd77, %r2516, 4;
add.s64 %rd78, %rd2, %rd77;
ld.global.u32 %r110, [%rd78];
setp.eq.s32 %p71, %r110, 0;
@%p71 bra $L__BB0_67;
and.b32 %r2517, %r110, -2147483648;
abs.s32 %r2518, %r110;
mov.u32 %r2519, 31;
sub.s32 %r2520, %r2519, %r2;
shl.b32 %r2521, %r2518, %r2520;
or.b32 %r5218, %r2521, %r2517;
$L__BB0_67:
shl.b32 %r2524, %r5218, 1;
shr.u32 %r2525, %r2524, %r45;
and.b32 %r113, %r2525, -2;
setp.eq.s32 %p72, %r113, 0;
mov.u32 %r5225, %r5223;
@%p72 bra $L__BB0_69;
or.b32 %r5212, %r5212, 8;
add.s32 %r2526, %r113, -1;
clz.b32 %r2527, %r2526;
mov.u32 %r2528, 32;
sub.s32 %r5223, %r2528, %r2527;
max.s32 %r5227, %r5227, %r5223;
shr.u32 %r2529, %r5218, 31;
add.s32 %r2530, %r2529, %r113;
add.s32 %r5225, %r2530, -2;
$L__BB0_69:
add.s32 %r5229, %r5187, 2;
$L__BB0_70:
mov.u32 %r5187, %r5229;
add.s32 %r2532, %r5227, -1;
setp.lt.s32 %p73, %r5227, 2;
setp.gt.s32 %p74, %r5227, 1;
selp.b32 %r130, %r2532, 0, %p74;
mov.u32 %r5230, 0;
@%p73 bra $L__BB0_72;
setp.eq.s32 %p75, %r5205, %r5227;
selp.u32 %r2533, 1, 0, %p75;
setp.eq.s32 %p76, %r5209, %r5227;
selp.u32 %r2534, -1, 0, %p76;
bfi.b32 %r2535, %r2534, %r2533, 1, 1;
setp.eq.s32 %p77, %r5224, %r5227;
selp.u16 %rs281, 1, 0, %p77;
mul.wide.u16 %r2536, %rs281, 4;
or.b32 %r2537, %r2535, %r2536;
setp.eq.s32 %p78, %r5223, %r5227;
selp.u16 %rs282, 1, 0, %p78;
mul.wide.u16 %r2538, %rs282, 8;
or.b32 %r5230, %r2537, %r2538;
$L__BB0_72:
shr.u32 %r2539, %r5186, 1;
mov.u32 %r2540, _ZZ31 j2k_htj2k_encode_codeblockE13cleanup_e_val;
add.s32 %r2541, %r2540, %r2539;
ld.shared.u8 %rs283, [%r2541];
cvt.u32.u16 %r2542, %rs283;
and.b32 %r2543, %r2542, 255;
and.b32 %r2544, %r5209, 255;
setp.lt.u32 %p79, %r2544, %r2543;
cvt.u16.u32 %rs284, %r5209;
selp.b16 %rs285, %rs283, %rs284, %p79;
st.shared.u8 [%r2541], %rs285;
st.shared.u8 [%r2541+1], %r5223;
mov.u32 %r2545, _ZZ31 j2k_htj2k_encode_codeblockE14cleanup_cx_val;
add.s32 %r2546, %r2545, %r2539;
and.b32 %r133, %r5212, 2;
cvt.u16.u32 %rs286, %r133;
shr.u16 %rs287, %rs286, 1;
ld.shared.u8 %rs288, [%r2546];
or.b16 %rs289, %rs288, %rs287;
st.shared.u8 [%r2546], %rs289;
and.b32 %r134, %r5212, 8;
shr.u32 %r135, %r134, 3;
st.shared.u8 [%r2546+1], %r135;
shl.b32 %r2547, %r5212, 4;
shl.b32 %r2548, %r5736, 8;
or.b32 %r2549, %r2547, %r2548;
or.b32 %r2550, %r2549, %r5230;
mul.wide.u32 %rd80, %r2550, 2;
add.s64 %rd81, %rd79, %rd80;
ld.global.u16 %rs3, [%rd81];
shr.u16 %rs290, %rs3, 4;
and.b16 %rs4, %rs290, 7;
setp.eq.s16 %p80, %rs4, 0;
mov.u32 %r5242, %r5716;
@%p80 bra $L__BB0_79;
cvt.u32.u16 %r5231, %rs4;
shr.u16 %rs291, %rs3, 8;
cvt.u32.u16 %r5232, %rs291;
$L__BB0_74:
mov.u16 %rs5, %rs688;
mov.u32 %r138, %r5231;
setp.gt.u32 %p81, %r5719, 2879;
mov.u32 %r5242, 1;
@%p81 bra $L__BB0_79;
mov.u32 %r2552, 8;
sub.s32 %r2553, %r2552, %r5717;
sub.s32 %r2554, %r2553, %r5718;
min.u32 %r2555, %r2554, %r138;
setp.eq.s32 %p82, %r2555, 32;
mov.u32 %r2556, -1;
shl.b32 %r2557, %r2556, %r2555;
not.b32 %r2558, %r2557;
selp.b32 %r2559, -1, %r2558, %p82;
and.b32 %r2560, %r2559, %r5232;
shl.b32 %r2561, %r2560, %r5718;
cvt.u16.u32 %rs292, %r2561;
or.b16 %rs688, %rs5, %rs292;
add.s32 %r5718, %r2555, %r5718;
sub.s32 %r5231, %r138, %r2555;
shr.u32 %r5232, %r5232, %r2555;
setp.gt.u32 %p83, %r2554, %r138;
@%p83 bra $L__BB0_78;
setp.ne.s32 %p84, %r5717, 0;
mov.u32 %r5717, 0;
and.b16 %rs293, %rs688, 255;
setp.ne.s16 %p85, %rs293, 127;
and.pred %p86, %p84, %p85;
@%p86 bra $L__BB0_78;
cvt.u16.u32 %rs588, %r2561;
or.b16 %rs587, %rs5, %rs588;
mov.u32 %r2564, 20548;
sub.s32 %r2565, %r2564, %r5719;
cvt.u64.u32 %rd82, %r2565;
add.s64 %rd83, %rd1, %rd82;
st.global.u8 [%rd83], %rs587;
add.s32 %r5719, %r5719, 1;
setp.gt.u16 %p87, %rs293, 143;
selp.u32 %r5717, 1, 0, %p87;
mov.u32 %r5718, 0;
mov.u16 %rs688, 0;
$L__BB0_78:
setp.ne.s32 %p88, %r5231, 0;
mov.u32 %r5242, %r5716;
@%p88 bra $L__BB0_74;
$L__BB0_79:
setp.ne.s32 %p89, %r5736, 0;
@%p89 bra $L__BB0_127;
setp.eq.s32 %p90, %r5212, 0;
add.s32 %r2566, %r5271, 17477;
cvt.u64.u32 %rd84, %r2566;
add.s64 %rd4, %rd1, %rd84;
@%p90 bra $L__BB0_119;
shl.b16 %rs619, %rs705, 1;
add.s32 %r5277, %r5277, -1;
setp.ne.s32 %p91, %r5277, 0;
mov.u32 %r5276, %r5480;
@%p91 bra $L__BB0_84;
bra.uni $L__BB0_82;
$L__BB0_84:
setp.lt.u32 %p93, %r5482, 3;
mov.u32 %r5246, 0;
@%p93 bra $L__BB0_87;
setp.lt.u32 %p94, %r5482, 6;
mov.u32 %r5246, 1;
@%p94 bra $L__BB0_87;
setp.lt.u32 %p95, %r5482, 9;
setp.eq.s32 %p96, %r5482, 11;
selp.b32 %r2572, 4, 5, %p96;
setp.lt.u32 %p97, %r5482, 11;
selp.b32 %r2573, 3, %r2572, %p97;
selp.b32 %r5246, 2, %r2573, %p95;
$L__BB0_87:
setp.eq.s32 %p98, %r5246, 0;
@%p98 bra $L__BB0_115;
and.b32 %r163, %r5246, 3;
setp.eq.s32 %p99, %r163, 0;
mov.u32 %r5256, %r5246;
mov.u32 %r5259, %r5276;
@%p99 bra $L__BB0_100;
add.s32 %r5119, %r5246, -1;
mov.u32 %r2575, 1;
shl.b32 %r2576, %r2575, %r5119;
and.b32 %r2577, %r2576, %r5483;
setp.ne.s32 %p100, %r2577, 0;
selp.u32 %r2578, 1, 0, %p100;
cvt.u32.u16 %r2579, %rs619;
bfi.b32 %r2580, %r2579, %r2578, 1, 8;
cvt.u16.u32 %rs619, %r2580;
add.s32 %r5277, %r5277, -1;
setp.ne.s32 %p101, %r5277, 0;
mov.u32 %r5259, %r5276;
@%p101 bra $L__BB0_92;
setp.gt.u32 %p102, %r5271, 191;
mov.u32 %r5277, 0;
mov.u32 %r5259, %r2575;
@%p102 bra $L__BB0_92;
add.s32 %r2584, %r5271, 17477;
cvt.u64.u32 %rd85, %r2584;
add.s64 %rd86, %rd1, %rd85;
st.global.u8 [%rd86], %rs619;
add.s32 %r5271, %r5271, 1;
mov.u32 %r5277, 8;
mov.u16 %rs619, 0;
mov.u32 %r5259, %r5276;
$L__BB0_92:
and.b32 %r5121, %r5246, 3;
add.s32 %r5256, %r5246, -1;
setp.eq.s32 %p103, %r5121, 1;
mov.u32 %r5276, %r5259;
@%p103 bra $L__BB0_100;
add.s32 %r5256, %r5246, -2;
mov.u32 %r2585, 1;
shl.b32 %r2586, %r2585, %r5256;
and.b32 %r2587, %r2586, %r5483;
setp.ne.s32 %p104, %r2587, 0;
selp.u32 %r2588, 1, 0, %p104;
cvt.u32.u16 %r2589, %rs619;
bfi.b32 %r2590, %r2589, %r2588, 1, 8;
cvt.u16.u32 %rs619, %r2590;
add.s32 %r5277, %r5277, -1;
setp.ne.s32 %p105, %r5277, 0;
mov.u32 %r5250, %r5259;
@%p105 bra $L__BB0_96;
setp.gt.u32 %p106, %r5271, 191;
mov.u32 %r5277, 0;
mov.u32 %r5250, %r2585;
@%p106 bra $L__BB0_96;
add.s32 %r2593, %r5271, 17477;
cvt.u64.u32 %rd87, %r2593;
add.s64 %rd88, %rd1, %rd87;
and.b16 %rs300, %rs619, 255;
st.global.u8 [%rd88], %rs619;
add.s32 %r5271, %r5271, 1;
setp.eq.s16 %p107, %rs300, 255;
selp.b32 %r5277, 7, 8, %p107;
mov.u16 %rs619, 0;
mov.u32 %r5250, %r5259;
$L__BB0_96:
and.b32 %r5122, %r5246, 3;
setp.eq.s32 %p108, %r5122, 2;
mov.u32 %r5276, %r5250;
mov.u32 %r5259, %r5250;
@%p108 bra $L__BB0_100;
add.s32 %r5256, %r5246, -3;
mov.u32 %r2594, 1;
shl.b32 %r2595, %r2594, %r5256;
and.b32 %r2596, %r2595, %r5483;
setp.ne.s32 %p109, %r2596, 0;
selp.u32 %r2597, 1, 0, %p109;
cvt.u32.u16 %r2598, %rs619;
bfi.b32 %r2599, %r2598, %r2597, 1, 8;
cvt.u16.u32 %rs619, %r2599;
add.s32 %r5277, %r5277, -1;
setp.ne.s32 %p110, %r5277, 0;
mov.u32 %r5276, %r5250;
mov.u32 %r5259, %r5250;
@%p110 bra $L__BB0_100;
add.s32 %r5256, %r5246, -3;
setp.gt.u32 %p111, %r5271, 191;
mov.u32 %r5277, 0;
mov.u32 %r5276, %r2594;
mov.u32 %r5259, %r2594;
@%p111 bra $L__BB0_100;
add.s32 %r5256, %r5246, -3;
add.s32 %r2604, %r5271, 17477;
cvt.u64.u32 %rd89, %r2604;
add.s64 %rd90, %rd1, %rd89;
and.b16 %rs303, %rs619, 255;
st.global.u8 [%rd90], %rs619;
add.s32 %r5271, %r5271, 1;
setp.eq.s16 %p112, %rs303, 255;
selp.b32 %r5277, 7, 8, %p112;
mov.u16 %rs619, 0;
mov.u32 %r5276, %r5250;
mov.u32 %r5259, %r5250;
$L__BB0_100:
add.s32 %r5123, %r5246, -1;
setp.lt.u32 %p113, %r5123, 3;
@%p113 bra $L__BB0_115;
mov.u32 %r5276, %r5259;
$L__BB0_102:
add.s32 %r2605, %r5256, -1;
mov.u32 %r2606, 1;
shl.b32 %r2607, %r2606, %r2605;
and.b32 %r2608, %r2607, %r5483;
setp.ne.s32 %p114, %r2608, 0;
selp.u32 %r2609, 1, 0, %p114;
cvt.u32.u16 %r2610, %rs619;
bfi.b32 %r5265, %r2610, %r2609, 1, 8;
add.s32 %r5266, %r5277, -1;
setp.ne.s32 %p115, %r5266, 0;
mov.u32 %r5264, %r5276;
@%p115 bra $L__BB0_105;
setp.gt.u32 %p116, %r5271, 191;
mov.u32 %r5266, 0;
mov.u32 %r5264, %r2606;
@%p116 bra $L__BB0_105;
cvt.u16.u32 %rs304, %r5265;
and.b16 %rs305, %rs304, 255;
add.s32 %r2614, %r5271, 17477;
cvt.u64.u32 %rd91, %r2614;
add.s64 %rd92, %rd1, %rd91;
st.global.u8 [%rd92], %rs304;
add.s32 %r5271, %r5271, 1;
setp.eq.s16 %p117, %rs305, 255;
selp.b32 %r5266, 7, 8, %p117;
mov.u32 %r5265, 0;
mov.u32 %r5264, %r5276;
$L__BB0_105:
add.s32 %r2615, %r5256, -2;
shl.b32 %r2617, %r2606, %r2615;
and.b32 %r2618, %r2617, %r5483;
setp.ne.s32 %p118, %r2618, 0;
and.b32 %r2619, %r5265, 127;
selp.u32 %r2620, 1, 0, %p118;
bfi.b32 %r5269, %r2619, %r2620, 1, 7;
add.s32 %r5270, %r5266, -1;
setp.ne.s32 %p119, %r5270, 0;
mov.u32 %r5268, %r5264;
@%p119 bra $L__BB0_108;
setp.gt.u32 %p120, %r5271, 191;
mov.u32 %r5270, 0;
mov.u32 %r5268, 1;
@%p120 bra $L__BB0_108;
cvt.u16.u32 %rs306, %r5269;
and.b16 %rs307, %rs306, 255;
add.s32 %r2624, %r5271, 17477;
cvt.u64.u32 %rd93, %r2624;
add.s64 %rd94, %rd1, %rd93;
st.global.u8 [%rd94], %rs306;
add.s32 %r5271, %r5271, 1;
setp.eq.s16 %p121, %rs307, 255;
selp.b32 %r5270, 7, 8, %p121;
mov.u32 %r5269, 0;
mov.u32 %r5268, %r5264;
$L__BB0_108:
add.s32 %r2625, %r5256, -3;
mov.u32 %r2626, 1;
shl.b32 %r2627, %r2626, %r2625;
and.b32 %r2628, %r2627, %r5483;
setp.ne.s32 %p122, %r2628, 0;
and.b32 %r2629, %r5269, 127;
selp.u32 %r2630, 1, 0, %p122;
bfi.b32 %r5273, %r2629, %r2630, 1, 7;
add.s32 %r5274, %r5270, -1;
setp.ne.s32 %p123, %r5274, 0;
mov.u32 %r5272, %r5268;
@%p123 bra $L__BB0_111;
setp.gt.u32 %p124, %r5271, 191;
mov.u32 %r5274, 0;
mov.u32 %r5272, %r2626;
@%p124 bra $L__BB0_111;
cvt.u16.u32 %rs308, %r5273;
and.b16 %rs309, %rs308, 255;
add.s32 %r2634, %r5271, 17477;
cvt.u64.u32 %rd95, %r2634;
add.s64 %rd96, %rd1, %rd95;
st.global.u8 [%rd96], %rs308;
add.s32 %r5271, %r5271, 1;
setp.eq.s16 %p125, %rs309, 255;
selp.b32 %r5274, 7, 8, %p125;
mov.u32 %r5273, 0;
mov.u32 %r5272, %r5268;
$L__BB0_111:
add.s32 %r5256, %r5256, -4;
shl.b32 %r2636, %r2626, %r5256;
and.b32 %r2637, %r2636, %r5483;
setp.ne.s32 %p126, %r2637, 0;
and.b32 %r2638, %r5273, 127;
selp.u32 %r2639, 1, 0, %p126;
bfi.b32 %r2640, %r2638, %r2639, 1, 15;
cvt.u16.u32 %rs619, %r2640;
add.s32 %r5277, %r5274, -1;
setp.ne.s32 %p127, %r5277, 0;
mov.u32 %r5276, %r5272;
@%p127 bra $L__BB0_114;
setp.gt.u32 %p128, %r5271, 191;
mov.u32 %r5277, 0;
mov.u32 %r5276, 1;
@%p128 bra $L__BB0_114;
add.s32 %r2643, %r5271, 17477;
cvt.u64.u32 %rd97, %r2643;
add.s64 %rd98, %rd1, %rd97;
and.b16 %rs311, %rs619, 255;
st.global.u8 [%rd98], %rs619;
add.s32 %r5271, %r5271, 1;
setp.eq.s16 %p129, %rs311, 255;
selp.b32 %r5277, 7, 8, %p129;
mov.u16 %rs619, 0;
mov.u32 %r5276, %r5272;
$L__BB0_114:
setp.ne.s32 %p130, %r5256, 0;
@%p130 bra $L__BB0_102;
$L__BB0_115:
add.s32 %r2645, %r5482, -1;
setp.eq.s32 %p131, %r5482, 0;
mov.u32 %r5483, 0;
selp.b32 %r5482, 0, %r2645, %p131;
setp.lt.u32 %p132, %r5482, 3;
mov.u32 %r5282, %r5483;
@%p132 bra $L__BB0_118;
setp.lt.u32 %p133, %r5482, 6;
mov.u32 %r5282, 1;
@%p133 bra $L__BB0_118;
setp.lt.u32 %p134, %r5482, 9;
setp.eq.s32 %p135, %r5482, 11;
selp.b32 %r2647, 4, 5, %p135;
setp.lt.u32 %p136, %r5482, 11;
selp.b32 %r2648, 3, %r2647, %p136;
selp.b32 %r5282, 2, %r2648, %p134;
$L__BB0_118:
mov.u32 %r2650, 1;
shl.b32 %r5481, %r2650, %r5282;
mov.u32 %r5480, %r5276;
mov.u16 %rs705, %rs619;
bra.uni $L__BB0_127;
$L__BB0_119:
add.s32 %r5483, %r5483, 1;
setp.lt.u32 %p137, %r5483, %r5481;
@%p137 bra $L__BB0_127;
shl.b16 %rs312, %rs705, 1;
or.b16 %rs622, %rs312, 1;
add.s32 %r5277, %r5277, -1;
setp.ne.s32 %p138, %r5277, 0;
mov.u32 %r5283, %r5480;
@%p138 bra $L__BB0_123;
shl.b16 %rs592, %rs705, 1;
or.b16 %rs622, %rs592, 1;
setp.gt.u32 %p139, %r5271, 191;
mov.u32 %r5277, 0;
mov.u32 %r5283, 1;
@%p139 bra $L__BB0_123;
shl.b16 %rs594, %rs705, 1;
or.b16 %rs593, %rs594, 1;
and.b16 %rs314, %rs593, 255;
st.global.u8 [%rd4], %rs593;
add.s32 %r5271, %r5271, 1;
setp.eq.s16 %p140, %rs314, 255;
selp.b32 %r5277, 7, 8, %p140;
mov.u16 %rs622, 0;
mov.u32 %r5283, %r5480;
$L__BB0_123:
add.s32 %r2654, %r5482, 1;
min.u32 %r236, %r2654, 12;
setp.lt.u32 %p141, %r236, 3;
mov.u32 %r5483, 0;
mov.u32 %r5286, %r5483;
@%p141 bra $L__BB0_126;
add.s32 %r5127, %r5482, 1;
min.u32 %r5126, %r5127, 12;
setp.lt.u32 %p142, %r5126, 6;
mov.u32 %r5286, 1;
@%p142 bra $L__BB0_126;
add.s32 %r5129, %r5482, 1;
min.u32 %r5128, %r5129, 12;
setp.lt.u32 %p143, %r5128, 9;
setp.eq.s32 %p144, %r5128, 11;
selp.b32 %r2656, 4, 5, %p144;
setp.lt.u32 %p145, %r5128, 11;
selp.b32 %r2657, 3, %r2656, %p145;
selp.b32 %r5286, 2, %r2657, %p143;
$L__BB0_126:
add.s32 %r5131, %r5482, 1;
min.u32 %r5482, %r5131, 12;
mov.u32 %r2659, 1;
shl.b32 %r5481, %r2659, %r5286;
mov.u32 %r5480, %r5283;
mov.u16 %rs705, %rs622;
$L__BB0_127:
max.s32 %r246, %r5227, 1;
and.b16 %rs315, %rs3, 15;
cvt.u32.u16 %r247, %rs315;
and.b32 %r2660, %r5212, 1;
setp.eq.b32 %p146, %r2660, 1;
mov.pred %p147, 0;
xor.pred %p148, %p146, %p147;
not.pred %p149, %p148;
mov.u32 %r5303, %r5933;
@%p149 bra $L__BB0_134;
and.b32 %r2661, %r247, 1;
sub.s32 %r5293, %r246, %r2661;
setp.eq.s32 %p150, %r5293, 0;
mov.u32 %r5303, %r5933;
@%p150 bra $L__BB0_134;
mov.u32 %r2662, -1;
shl.b32 %r2663, %r2662, %r5293;
not.b32 %r2664, %r2663;
and.b32 %r5294, %r5206, %r2664;
$L__BB0_130:
setp.gt.u32 %p151, %r5907, 17476;
mov.u32 %r5303, 1;
@%p151 bra $L__BB0_134;
sub.s32 %r2666, %r5906, %r5905;
min.u32 %r2667, %r2666, %r5293;
setp.eq.s32 %p152, %r2667, 32;
mov.u32 %r2668, -1;
shl.b32 %r2669, %r2668, %r2667;
not.b32 %r2670, %r2669;
selp.b32 %r2671, -1, %r2670, %p152;
and.b32 %r2672, %r2671, %r5294;
shl.b32 %r2673, %r2672, %r5905;
or.b32 %r5904, %r2673, %r5904;
add.s32 %r5905, %r2667, %r5905;
shr.u32 %r5294, %r5294, %r2667;
sub.s32 %r5293, %r5293, %r2667;
setp.lt.u32 %p153, %r5905, %r5906;
@%p153 bra $L__BB0_133;
cvt.u64.u32 %rd99, %r5907;
add.s64 %rd100, %rd1, %rd99;
st.global.u8 [%rd100], %r5904;
add.s32 %r5907, %r5907, 1;
setp.eq.s32 %p154, %r5904, 255;
selp.b32 %r5906, 7, 8, %p154;
mov.u32 %r5904, 0;
mov.u32 %r5905, %r5904;
$L__BB0_133:
setp.ne.s32 %p155, %r5293, 0;
mov.u32 %r5303, %r5933;
@%p155 bra $L__BB0_130;
$L__BB0_134:
and.b32 %r5105, %r5212, 2;
setp.eq.s32 %p156, %r5105, 0;
mov.u32 %r5318, %r5303;
@%p156 bra $L__BB0_141;
shr.u32 %r2676, %r247, 1;
and.b32 %r2677, %r2676, 1;
sub.s32 %r5308, %r246, %r2677;
setp.eq.s32 %p157, %r5308, 0;
mov.u32 %r5318, %r5303;
@%p157 bra $L__BB0_141;
mov.u32 %r2678, -1;
shl.b32 %r2679, %r2678, %r5308;
not.b32 %r2680, %r2679;
and.b32 %r5309, %r5210, %r2680;
$L__BB0_137:
setp.gt.u32 %p158, %r5907, 17476;
mov.u32 %r5318, 1;
@%p158 bra $L__BB0_141;
sub.s32 %r2682, %r5906, %r5905;
min.u32 %r2683, %r2682, %r5308;
setp.eq.s32 %p159, %r2683, 32;
mov.u32 %r2684, -1;
shl.b32 %r2685, %r2684, %r2683;
not.b32 %r2686, %r2685;
selp.b32 %r2687, -1, %r2686, %p159;
and.b32 %r2688, %r2687, %r5309;
shl.b32 %r2689, %r2688, %r5905;
or.b32 %r5904, %r2689, %r5904;
add.s32 %r5905, %r2683, %r5905;
shr.u32 %r5309, %r5309, %r2683;
sub.s32 %r5308, %r5308, %r2683;
setp.lt.u32 %p160, %r5905, %r5906;
@%p160 bra $L__BB0_140;
cvt.u64.u32 %rd101, %r5907;
add.s64 %rd102, %rd1, %rd101;
st.global.u8 [%rd102], %r5904;
add.s32 %r5907, %r5907, 1;
setp.eq.s32 %p161, %r5904, 255;
selp.b32 %r5906, 7, 8, %p161;
mov.u32 %r5904, 0;
mov.u32 %r5905, %r5904;
$L__BB0_140:
setp.ne.s32 %p162, %r5308, 0;
mov.u32 %r5318, %r5303;
@%p162 bra $L__BB0_137;
$L__BB0_141:
and.b32 %r2692, %r5212, 4;
setp.eq.s32 %p163, %r2692, 0;
mov.u32 %r5333, %r5318;
@%p163 bra $L__BB0_148;
shr.u32 %r2693, %r247, 2;
and.b32 %r2694, %r2693, 1;
sub.s32 %r5323, %r246, %r2694;
setp.eq.s32 %p164, %r5323, 0;
mov.u32 %r5333, %r5318;
@%p164 bra $L__BB0_148;
mov.u32 %r2695, -1;
shl.b32 %r2696, %r2695, %r5323;
not.b32 %r2697, %r2696;
and.b32 %r5324, %r5226, %r2697;
$L__BB0_144:
setp.gt.u32 %p165, %r5907, 17476;
mov.u32 %r5333, 1;
@%p165 bra $L__BB0_148;
sub.s32 %r2699, %r5906, %r5905;
min.u32 %r2700, %r2699, %r5323;
setp.eq.s32 %p166, %r2700, 32;
mov.u32 %r2701, -1;
shl.b32 %r2702, %r2701, %r2700;
not.b32 %r2703, %r2702;
selp.b32 %r2704, -1, %r2703, %p166;
and.b32 %r2705, %r2704, %r5324;
shl.b32 %r2706, %r2705, %r5905;
or.b32 %r5904, %r2706, %r5904;
add.s32 %r5905, %r2700, %r5905;
shr.u32 %r5324, %r5324, %r2700;
sub.s32 %r5323, %r5323, %r2700;
setp.lt.u32 %p167, %r5905, %r5906;
@%p167 bra $L__BB0_147;
cvt.u64.u32 %rd103, %r5907;
add.s64 %rd104, %rd1, %rd103;
st.global.u8 [%rd104], %r5904;
add.s32 %r5907, %r5907, 1;
setp.eq.s32 %p168, %r5904, 255;
selp.b32 %r5906, 7, 8, %p168;
mov.u32 %r5904, 0;
mov.u32 %r5905, %r5904;
$L__BB0_147:
setp.ne.s32 %p169, %r5323, 0;
mov.u32 %r5333, %r5318;
@%p169 bra $L__BB0_144;
$L__BB0_148:
and.b32 %r5106, %r5212, 8;
setp.eq.s32 %p170, %r5106, 0;
mov.u32 %r5933, %r5333;
@%p170 bra $L__BB0_155;
shr.u32 %r2709, %r247, 3;
sub.s32 %r5338, %r246, %r2709;
setp.eq.s32 %p171, %r5338, 0;
mov.u32 %r5933, %r5333;
@%p171 bra $L__BB0_155;
mov.u32 %r2710, -1;
shl.b32 %r2711, %r2710, %r5338;
not.b32 %r2712, %r2711;
and.b32 %r5339, %r5225, %r2712;
$L__BB0_151:
setp.gt.u32 %p172, %r5907, 17476;
mov.u32 %r5933, 1;
@%p172 bra $L__BB0_155;
sub.s32 %r2714, %r5906, %r5905;
min.u32 %r2715, %r2714, %r5338;
setp.eq.s32 %p173, %r2715, 32;
mov.u32 %r2716, -1;
shl.b32 %r2717, %r2716, %r2715;
not.b32 %r2718, %r2717;
selp.b32 %r2719, -1, %r2718, %p173;
and.b32 %r2720, %r2719, %r5339;
shl.b32 %r2721, %r2720, %r5905;
or.b32 %r5904, %r2721, %r5904;
add.s32 %r5905, %r2715, %r5905;
shr.u32 %r5339, %r5339, %r2715;
sub.s32 %r5338, %r5338, %r2715;
setp.lt.u32 %p174, %r5905, %r5906;
@%p174 bra $L__BB0_154;
cvt.u64.u32 %rd105, %r5907;
add.s64 %rd106, %rd1, %rd105;
st.global.u8 [%rd106], %r5904;
add.s32 %r5907, %r5907, 1;
setp.eq.s32 %p175, %r5904, 255;
selp.b32 %r5906, 7, 8, %p175;
mov.u32 %r5904, 0;
mov.u32 %r5905, %r5904;
$L__BB0_154:
setp.ne.s32 %p176, %r5338, 0;
mov.u32 %r5933, %r5333;
@%p176 bra $L__BB0_151;
$L__BB0_155:
add.s32 %r2724, %r5186, 2;
setp.lt.u32 %p177, %r2724, %r5;
mul.lo.s32 %r340, %r130, 6;
@%p177 bra $L__BB0_184;
bra.uni $L__BB0_156;
$L__BB0_184:
mul.wide.u32 %rd119, %r5187, 4;
add.s64 %rd120, %rd2, %rd119;
ld.global.u32 %r413, [%rd120];
setp.eq.s32 %p214, %r413, 0;
mov.u32 %r5398, 0;
mov.u32 %r5397, %r5398;
@%p214 bra $L__BB0_186;
and.b32 %r2796, %r413, -2147483648;
abs.s32 %r2797, %r413;
mov.u32 %r2798, 31;
sub.s32 %r2799, %r2798, %r2;
shl.b32 %r2800, %r2797, %r2799;
or.b32 %r5397, %r2800, %r2796;
$L__BB0_186:
shl.b32 %r2804, %r5397, 1;
shr.u32 %r2805, %r2804, %r45;
and.b32 %r416, %r2805, -2;
setp.eq.s32 %p215, %r416, 0;
mov.u32 %r5399, %r5398;
mov.u32 %r5405, %r5398;
@%p215 bra $L__BB0_188;
add.s32 %r2807, %r416, -1;
clz.b32 %r2808, %r2807;
mov.u32 %r2809, 32;
sub.s32 %r5398, %r2809, %r2808;
shr.u32 %r2810, %r5397, 31;
add.s32 %r2811, %r2810, %r416;
add.s32 %r5399, %r2811, -2;
mov.u32 %r5405, 1;
$L__BB0_188:
mov.u32 %r5402, 0;
mov.u32 %r5401, %r5402;
@%p64 bra $L__BB0_191;
add.s32 %r2814, %r5187, %r1;
mul.wide.u32 %rd121, %r2814, 4;
add.s64 %rd122, %rd2, %rd121;
ld.global.u32 %r422, [%rd122];
setp.eq.s32 %p217, %r422, 0;
@%p217 bra $L__BB0_191;
and.b32 %r2815, %r422, -2147483648;
abs.s32 %r2816, %r422;
mov.u32 %r2817, 31;
sub.s32 %r2818, %r2817, %r2;
shl.b32 %r2819, %r2816, %r2818;
or.b32 %r5401, %r2819, %r2815;
$L__BB0_191:
shl.b32 %r2822, %r5401, 1;
shr.u32 %r2823, %r2822, %r45;
and.b32 %r425, %r2823, -2;
setp.eq.s32 %p218, %r425, 0;
mov.u32 %r5403, %r5402;
mov.u32 %r5420, %r5398;
@%p218 bra $L__BB0_193;
or.b32 %r5405, %r5405, 2;
add.s32 %r2824, %r425, -1;
clz.b32 %r2825, %r2824;
mov.u32 %r2826, 32;
sub.s32 %r5402, %r2826, %r2825;
max.s32 %r5420, %r5398, %r5402;
shr.u32 %r2827, %r5401, 31;
add.s32 %r2828, %r2827, %r425;
add.s32 %r5403, %r2828, -2;
$L__BB0_193:
add.s32 %r5422, %r5187, 1;
add.s32 %r2833, %r5186, 3;
setp.ge.u32 %p219, %r2833, %r5;
mov.u32 %r5423, 0;
mov.u32 %r5416, %r5423;
mov.u32 %r5417, %r5423;
mov.u32 %r5418, %r5423;
mov.u32 %r5419, %r5423;
@%p219 bra $L__BB0_204;
add.s32 %r5132, %r5187, 1;
mul.wide.u32 %rd123, %r5132, 4;
add.s64 %rd124, %rd2, %rd123;
ld.global.u32 %r435, [%rd124];
setp.eq.s32 %p220, %r435, 0;
mov.u32 %r5417, 0;
mov.u32 %r5406, %r5417;
@%p220 bra $L__BB0_196;
and.b32 %r2835, %r435, -2147483648;
abs.s32 %r2836, %r435;
mov.u32 %r2837, 31;
sub.s32 %r2838, %r2837, %r2;
shl.b32 %r2839, %r2836, %r2838;
or.b32 %r5406, %r2839, %r2835;
$L__BB0_196:
shl.b32 %r2842, %r5406, 1;
shr.u32 %r2843, %r2842, %r45;
and.b32 %r438, %r2843, -2;
setp.eq.s32 %p221, %r438, 0;
mov.u32 %r5419, %r5417;
@%p221 bra $L__BB0_198;
or.b32 %r5405, %r5405, 4;
add.s32 %r2844, %r438, -1;
clz.b32 %r2845, %r2844;
mov.u32 %r2846, 32;
sub.s32 %r5417, %r2846, %r2845;
max.s32 %r5420, %r5420, %r5417;
shr.u32 %r2847, %r5406, 31;
add.s32 %r2848, %r2847, %r438;
add.s32 %r5419, %r2848, -2;
$L__BB0_198:
mov.u32 %r5416, 0;
mov.u32 %r5411, %r5416;
@%p64 bra $L__BB0_201;
add.s32 %r5133, %r5187, 1;
add.s32 %r2851, %r5133, %r1;
mul.wide.u32 %rd125, %r2851, 4;
add.s64 %rd126, %rd2, %rd125;
ld.global.u32 %r447, [%rd126];
setp.eq.s32 %p223, %r447, 0;
@%p223 bra $L__BB0_201;
and.b32 %r2852, %r447, -2147483648;
abs.s32 %r2853, %r447;
mov.u32 %r2854, 31;
sub.s32 %r2855, %r2854, %r2;
shl.b32 %r2856, %r2853, %r2855;
or.b32 %r5411, %r2856, %r2852;
$L__BB0_201:
shl.b32 %r2859, %r5411, 1;
shr.u32 %r2860, %r2859, %r45;
and.b32 %r450, %r2860, -2;
setp.eq.s32 %p224, %r450, 0;
mov.u32 %r5418, %r5416;
@%p224 bra $L__BB0_203;
or.b32 %r5405, %r5405, 8;
add.s32 %r2861, %r450, -1;
clz.b32 %r2862, %r2861;
mov.u32 %r2863, 32;
sub.s32 %r5416, %r2863, %r2862;
max.s32 %r5420, %r5420, %r5416;
shr.u32 %r2864, %r5411, 31;
add.s32 %r2865, %r2864, %r450;
add.s32 %r5418, %r2865, -2;
$L__BB0_203:
add.s32 %r5422, %r5187, 2;
$L__BB0_204:
mov.u32 %r5187, %r5422;
and.b32 %r5117, %r5212, 1;
shr.u32 %r2868, %r5212, 1;
or.b32 %r467, %r2868, %r5117;
add.s32 %r2869, %r5420, -1;
setp.lt.s32 %p225, %r5420, 2;
setp.gt.s32 %p226, %r5420, 1;
selp.b32 %r468, %r2869, 0, %p226;
@%p225 bra $L__BB0_206;
setp.eq.s32 %p227, %r5398, %r5420;
selp.u32 %r2870, 1, 0, %p227;
setp.eq.s32 %p228, %r5402, %r5420;
selp.u32 %r2871, -1, 0, %p228;
bfi.b32 %r2872, %r2871, %r2870, 1, 1;
setp.eq.s32 %p229, %r5417, %r5420;
selp.u16 %rs335, 1, 0, %p229;
mul.wide.u16 %r2873, %rs335, 4;
or.b32 %r2874, %r2872, %r2873;
setp.eq.s32 %p230, %r5416, %r5420;
selp.u16 %rs336, 1, 0, %p230;
mul.wide.u16 %r2875, %rs336, 8;
or.b32 %r5423, %r2874, %r2875;
$L__BB0_206:
shr.u32 %r5116, %r5186, 1;
mov.u32 %r5115, _ZZ31 j2k_htj2k_encode_codeblockE14cleanup_cx_val;
add.s32 %r5114, %r5115, %r5116;
and.b32 %r5113, %r5212, 8;
shr.u32 %r5112, %r5113, 3;
mov.u32 %r5111, _ZZ31 j2k_htj2k_encode_codeblockE13cleanup_e_val;
add.s32 %r5110, %r5111, %r5116;
and.b32 %r2876, %r5402, 255;
and.b32 %r2877, %r5223, 255;
setp.lt.u32 %p231, %r2876, %r2877;
cvt.u16.u32 %rs337, %r5223;
cvt.u16.u32 %rs338, %r5402;
selp.b16 %rs339, %rs337, %rs338, %p231;
st.shared.u8 [%r5110+1], %rs339;
st.shared.u8 [%r5110+2], %r5416;
and.b32 %r471, %r5405, 2;
shr.u32 %r2881, %r471, 1;
or.b32 %r2882, %r5112, %r2881;
st.shared.u8 [%r5114+1], %r2882;
and.b32 %r472, %r5405, 8;
shr.u32 %r2885, %r472, 3;
st.shared.u8 [%r5114+2], %r2885;
shl.b32 %r2886, %r5405, 4;
shl.b32 %r2887, %r467, 8;
or.b32 %r2888, %r2886, %r2887;
or.b32 %r2889, %r2888, %r5423;
mul.wide.u32 %rd128, %r2889, 2;
add.s64 %rd129, %rd79, %rd128;
ld.global.u16 %rs47, [%rd129];
shr.u16 %rs340, %rs47, 4;
and.b16 %rs48, %rs340, 7;
setp.eq.s16 %p232, %rs48, 0;
mov.u32 %r5435, %r5242;
@%p232 bra $L__BB0_213;
cvt.u32.u16 %r5424, %rs48;
shr.u16 %rs341, %rs47, 8;
cvt.u32.u16 %r5425, %rs341;
$L__BB0_208:
mov.u16 %rs49, %rs688;
mov.u32 %r475, %r5424;
setp.gt.u32 %p233, %r5719, 2879;
mov.u32 %r5435, 1;
@%p233 bra $L__BB0_213;
mov.u32 %r2891, 8;
sub.s32 %r2892, %r2891, %r5717;
sub.s32 %r2893, %r2892, %r5718;
min.u32 %r2894, %r2893, %r475;
setp.eq.s32 %p234, %r2894, 32;
mov.u32 %r2895, -1;
shl.b32 %r2896, %r2895, %r2894;
not.b32 %r2897, %r2896;
selp.b32 %r2898, -1, %r2897, %p234;
and.b32 %r2899, %r2898, %r5425;
shl.b32 %r2900, %r2899, %r5718;
cvt.u16.u32 %rs342, %r2900;
or.b16 %rs688, %rs49, %rs342;
add.s32 %r5718, %r2894, %r5718;
sub.s32 %r5424, %r475, %r2894;
shr.u32 %r5425, %r5425, %r2894;
setp.gt.u32 %p235, %r2893, %r475;
@%p235 bra $L__BB0_212;
setp.ne.s32 %p236, %r5717, 0;
mov.u32 %r5717, 0;
and.b16 %rs343, %rs688, 255;
setp.ne.s16 %p237, %rs343, 127;
and.pred %p238, %p236, %p237;
@%p238 bra $L__BB0_212;
cvt.u16.u32 %rs596, %r2900;
or.b16 %rs595, %rs49, %rs596;
mov.u32 %r2903, 20548;
sub.s32 %r2904, %r2903, %r5719;
cvt.u64.u32 %rd130, %r2904;
add.s64 %rd131, %rd1, %rd130;
st.global.u8 [%rd131], %rs595;
add.s32 %r5719, %r5719, 1;
setp.gt.u16 %p239, %rs343, 143;
selp.u32 %r5717, 1, 0, %p239;
mov.u32 %r5718, 0;
mov.u16 %rs688, 0;
$L__BB0_212:
setp.ne.s32 %p240, %r5424, 0;
mov.u32 %r5435, %r5242;
@%p240 bra $L__BB0_208;
$L__BB0_213:
setp.ne.s32 %p241, %r467, 0;
@%p241 bra $L__BB0_261;
setp.eq.s32 %p242, %r5405, 0;
add.s32 %r2905, %r5271, 17477;
cvt.u64.u32 %rd132, %r2905;
add.s64 %rd6, %rd1, %rd132;
@%p242 bra $L__BB0_253;
shl.b16 %rs643, %rs705, 1;
add.s32 %r5277, %r5277, -1;
setp.ne.s32 %p243, %r5277, 0;
mov.u32 %r5469, %r5480;
@%p243 bra $L__BB0_218;
shl.b16 %rs643, %rs705, 1;
setp.gt.u32 %p244, %r5271, 191;
mov.u32 %r5277, 0;
mov.u32 %r5469, 1;
@%p244 bra $L__BB0_218;
shl.b16 %rs598, %rs705, 1;
st.global.u8 [%rd6], %rs598;
add.s32 %r5271, %r5271, 1;
mov.u32 %r5277, 8;
mov.u16 %rs643, 0;
mov.u32 %r5469, %r5480;
$L__BB0_218:
setp.lt.u32 %p245, %r5482, 3;
mov.u32 %r5439, 0;
@%p245 bra $L__BB0_221;
setp.lt.u32 %p246, %r5482, 6;
mov.u32 %r5439, 1;
@%p246 bra $L__BB0_221;
setp.lt.u32 %p247, %r5482, 9;
setp.eq.s32 %p248, %r5482, 11;
selp.b32 %r2911, 4, 5, %p248;
setp.lt.u32 %p249, %r5482, 11;
selp.b32 %r2912, 3, %r2911, %p249;
selp.b32 %r5439, 2, %r2912, %p247;
$L__BB0_221:
setp.eq.s32 %p250, %r5439, 0;
@%p250 bra $L__BB0_249;
and.b32 %r500, %r5439, 3;
setp.eq.s32 %p251, %r500, 0;
mov.u32 %r5449, %r5439;
mov.u32 %r5452, %r5469;
@%p251 bra $L__BB0_234;
add.s32 %r5136, %r5439, -1;
mov.u32 %r2914, 1;
shl.b32 %r2915, %r2914, %r5136;
and.b32 %r2916, %r2915, %r5483;
setp.ne.s32 %p252, %r2916, 0;
selp.u32 %r2917, 1, 0, %p252;
cvt.u32.u16 %r2918, %rs643;
bfi.b32 %r2919, %r2918, %r2917, 1, 8;
cvt.u16.u32 %rs643, %r2919;
add.s32 %r5277, %r5277, -1;
setp.ne.s32 %p253, %r5277, 0;
mov.u32 %r5452, %r5469;
@%p253 bra $L__BB0_226;
setp.gt.u32 %p254, %r5271, 191;
mov.u32 %r5277, 0;
mov.u32 %r5452, %r2914;
@%p254 bra $L__BB0_226;
add.s32 %r2923, %r5271, 17477;
cvt.u64.u32 %rd133, %r2923;
add.s64 %rd134, %rd1, %rd133;
st.global.u8 [%rd134], %rs643;
add.s32 %r5271, %r5271, 1;
mov.u32 %r5277, 8;
mov.u16 %rs643, 0;
mov.u32 %r5452, %r5469;
$L__BB0_226:
and.b32 %r5138, %r5439, 3;
add.s32 %r5449, %r5439, -1;
setp.eq.s32 %p255, %r5138, 1;
mov.u32 %r5469, %r5452;
@%p255 bra $L__BB0_234;
add.s32 %r5449, %r5439, -2;
mov.u32 %r2924, 1;
shl.b32 %r2925, %r2924, %r5449;
and.b32 %r2926, %r2925, %r5483;
setp.ne.s32 %p256, %r2926, 0;
selp.u32 %r2927, 1, 0, %p256;
cvt.u32.u16 %r2928, %rs643;
bfi.b32 %r2929, %r2928, %r2927, 1, 8;
cvt.u16.u32 %rs643, %r2929;
add.s32 %r5277, %r5277, -1;
setp.ne.s32 %p257, %r5277, 0;
mov.u32 %r5443, %r5452;
@%p257 bra $L__BB0_230;
setp.gt.u32 %p258, %r5271, 191;
mov.u32 %r5277, 0;
mov.u32 %r5443, %r2924;
@%p258 bra $L__BB0_230;
add.s32 %r2932, %r5271, 17477;
cvt.u64.u32 %rd135, %r2932;
add.s64 %rd136, %rd1, %rd135;
and.b16 %rs350, %rs643, 255;
st.global.u8 [%rd136], %rs643;
add.s32 %r5271, %r5271, 1;
setp.eq.s16 %p259, %rs350, 255;
selp.b32 %r5277, 7, 8, %p259;
mov.u16 %rs643, 0;
mov.u32 %r5443, %r5452;
$L__BB0_230:
and.b32 %r5139, %r5439, 3;
setp.eq.s32 %p260, %r5139, 2;
mov.u32 %r5469, %r5443;
mov.u32 %r5452, %r5443;
@%p260 bra $L__BB0_234;
add.s32 %r5449, %r5439, -3;
mov.u32 %r2933, 1;
shl.b32 %r2934, %r2933, %r5449;
and.b32 %r2935, %r2934, %r5483;
setp.ne.s32 %p261, %r2935, 0;
selp.u32 %r2936, 1, 0, %p261;
cvt.u32.u16 %r2937, %rs643;
bfi.b32 %r2938, %r2937, %r2936, 1, 8;
cvt.u16.u32 %rs643, %r2938;
add.s32 %r5277, %r5277, -1;
setp.ne.s32 %p262, %r5277, 0;
mov.u32 %r5469, %r5443;
mov.u32 %r5452, %r5443;
@%p262 bra $L__BB0_234;
add.s32 %r5449, %r5439, -3;
setp.gt.u32 %p263, %r5271, 191;
mov.u32 %r5277, 0;
mov.u32 %r5469, %r2933;
mov.u32 %r5452, %r2933;
@%p263 bra $L__BB0_234;
add.s32 %r5449, %r5439, -3;
add.s32 %r2943, %r5271, 17477;
cvt.u64.u32 %rd137, %r2943;
add.s64 %rd138, %rd1, %rd137;
and.b16 %rs353, %rs643, 255;
st.global.u8 [%rd138], %rs643;
add.s32 %r5271, %r5271, 1;
setp.eq.s16 %p264, %rs353, 255;
selp.b32 %r5277, 7, 8, %p264;
mov.u16 %rs643, 0;
mov.u32 %r5469, %r5443;
mov.u32 %r5452, %r5443;
$L__BB0_234:
add.s32 %r5140, %r5439, -1;
setp.lt.u32 %p265, %r5140, 3;
@%p265 bra $L__BB0_249;
mov.u32 %r5469, %r5452;
$L__BB0_236:
add.s32 %r2944, %r5449, -1;
mov.u32 %r2945, 1;
shl.b32 %r2946, %r2945, %r2944;
and.b32 %r2947, %r2946, %r5483;
setp.ne.s32 %p266, %r2947, 0;
selp.u32 %r2948, 1, 0, %p266;
cvt.u32.u16 %r2949, %rs643;
bfi.b32 %r5458, %r2949, %r2948, 1, 8;
add.s32 %r5459, %r5277, -1;
setp.ne.s32 %p267, %r5459, 0;
mov.u32 %r5457, %r5469;
@%p267 bra $L__BB0_239;
setp.gt.u32 %p268, %r5271, 191;
mov.u32 %r5459, 0;
mov.u32 %r5457, %r2945;
@%p268 bra $L__BB0_239;
cvt.u16.u32 %rs354, %r5458;
and.b16 %rs355, %rs354, 255;
add.s32 %r2953, %r5271, 17477;
cvt.u64.u32 %rd139, %r2953;
add.s64 %rd140, %rd1, %rd139;
st.global.u8 [%rd140], %rs354;
add.s32 %r5271, %r5271, 1;
setp.eq.s16 %p269, %rs355, 255;
selp.b32 %r5459, 7, 8, %p269;
mov.u32 %r5458, 0;
mov.u32 %r5457, %r5469;
$L__BB0_239:
add.s32 %r2954, %r5449, -2;
shl.b32 %r2956, %r2945, %r2954;
and.b32 %r2957, %r2956, %r5483;
setp.ne.s32 %p270, %r2957, 0;
and.b32 %r2958, %r5458, 127;
selp.u32 %r2959, 1, 0, %p270;
bfi.b32 %r5462, %r2958, %r2959, 1, 7;
add.s32 %r5463, %r5459, -1;
setp.ne.s32 %p271, %r5463, 0;
mov.u32 %r5461, %r5457;
@%p271 bra $L__BB0_242;
setp.gt.u32 %p272, %r5271, 191;
mov.u32 %r5463, 0;
mov.u32 %r5461, 1;
@%p272 bra $L__BB0_242;
cvt.u16.u32 %rs356, %r5462;
and.b16 %rs357, %rs356, 255;
add.s32 %r2963, %r5271, 17477;
cvt.u64.u32 %rd141, %r2963;
add.s64 %rd142, %rd1, %rd141;
st.global.u8 [%rd142], %rs356;
add.s32 %r5271, %r5271, 1;
setp.eq.s16 %p273, %rs357, 255;
selp.b32 %r5463, 7, 8, %p273;
mov.u32 %r5462, 0;
mov.u32 %r5461, %r5457;
$L__BB0_242:
add.s32 %r2964, %r5449, -3;
mov.u32 %r2965, 1;
shl.b32 %r2966, %r2965, %r2964;
and.b32 %r2967, %r2966, %r5483;
setp.ne.s32 %p274, %r2967, 0;
and.b32 %r2968, %r5462, 127;
selp.u32 %r2969, 1, 0, %p274;
bfi.b32 %r5466, %r2968, %r2969, 1, 7;
add.s32 %r5467, %r5463, -1;
setp.ne.s32 %p275, %r5467, 0;
mov.u32 %r5465, %r5461;
@%p275 bra $L__BB0_245;
setp.gt.u32 %p276, %r5271, 191;
mov.u32 %r5467, 0;
mov.u32 %r5465, %r2965;
@%p276 bra $L__BB0_245;
cvt.u16.u32 %rs358, %r5466;
and.b16 %rs359, %rs358, 255;
add.s32 %r2973, %r5271, 17477;
cvt.u64.u32 %rd143, %r2973;
add.s64 %rd144, %rd1, %rd143;
st.global.u8 [%rd144], %rs358;
add.s32 %r5271, %r5271, 1;
setp.eq.s16 %p277, %rs359, 255;
selp.b32 %r5467, 7, 8, %p277;
mov.u32 %r5466, 0;
mov.u32 %r5465, %r5461;
$L__BB0_245:
add.s32 %r5449, %r5449, -4;
shl.b32 %r2975, %r2965, %r5449;
and.b32 %r2976, %r2975, %r5483;
setp.ne.s32 %p278, %r2976, 0;
and.b32 %r2977, %r5466, 127;
selp.u32 %r2978, 1, 0, %p278;
bfi.b32 %r2979, %r2977, %r2978, 1, 15;
cvt.u16.u32 %rs643, %r2979;
add.s32 %r5277, %r5467, -1;
setp.ne.s32 %p279, %r5277, 0;
mov.u32 %r5469, %r5465;
@%p279 bra $L__BB0_248;
setp.gt.u32 %p280, %r5271, 191;
mov.u32 %r5277, 0;
mov.u32 %r5469, 1;
@%p280 bra $L__BB0_248;
add.s32 %r2982, %r5271, 17477;
cvt.u64.u32 %rd145, %r2982;
add.s64 %rd146, %rd1, %rd145;
and.b16 %rs361, %rs643, 255;
st.global.u8 [%rd146], %rs643;
add.s32 %r5271, %r5271, 1;
setp.eq.s16 %p281, %rs361, 255;
selp.b32 %r5277, 7, 8, %p281;
mov.u16 %rs643, 0;
mov.u32 %r5469, %r5465;
$L__BB0_248:
setp.ne.s32 %p282, %r5449, 0;
@%p282 bra $L__BB0_236;
$L__BB0_249:
add.s32 %r2984, %r5482, -1;
setp.eq.s32 %p283, %r5482, 0;
mov.u32 %r5483, 0;
selp.b32 %r5482, 0, %r2984, %p283;
setp.lt.u32 %p284, %r5482, 3;
mov.u32 %r5475, %r5483;
@%p284 bra $L__BB0_252;
setp.lt.u32 %p285, %r5482, 6;
mov.u32 %r5475, 1;
@%p285 bra $L__BB0_252;
setp.lt.u32 %p286, %r5482, 9;
setp.eq.s32 %p287, %r5482, 11;
selp.b32 %r2986, 4, 5, %p287;
setp.lt.u32 %p288, %r5482, 11;
selp.b32 %r2987, 3, %r2986, %p288;
selp.b32 %r5475, 2, %r2987, %p286;
$L__BB0_252:
mov.u32 %r2989, 1;
shl.b32 %r5481, %r2989, %r5475;
mov.u32 %r5480, %r5469;
mov.u16 %rs705, %rs643;
bra.uni $L__BB0_261;
$L__BB0_156:
cvt.u64.u32 %rd107, %r340;
add.s64 %rd5, %rd108, %rd107;
add.s32 %r2725, %r340, 2;
cvt.u64.u32 %rd109, %r2725;
add.s64 %rd110, %rd108, %rd109;
ld.global.u8 %rs25, [%rd5+1];
ld.global.u8 %rs26, [%rd110];
ld.global.u8 %rs27, [%rd110+1];
ld.global.u8 %rs28, [%rd108];
ld.global.u8 %rs29, [%rd108+1];
ld.global.u8 %rs30, [%rd108+2];
ld.global.u8 %rs31, [%rd108+3];
setp.eq.s16 %p178, %rs25, 0;
mov.u32 %r5364, %r5242;
@%p178 bra $L__BB0_163;
ld.global.u8 %r5354, [%rd5];
cvt.u32.u16 %r5353, %rs25;
$L__BB0_158:
mov.u16 %rs32, %rs688;
mov.u32 %r343, %r5353;
setp.gt.u32 %p179, %r5719, 2879;
mov.u32 %r5364, 1;
@%p179 bra $L__BB0_163;
mov.u32 %r2727, 8;
sub.s32 %r2728, %r2727, %r5717;
sub.s32 %r2729, %r2728, %r5718;
min.u32 %r2730, %r2729, %r343;
setp.eq.s32 %p180, %r2730, 32;
mov.u32 %r2731, -1;
shl.b32 %r2732, %r2731, %r2730;
not.b32 %r2733, %r2732;
selp.b32 %r2734, -1, %r2733, %p180;
and.b32 %r2735, %r2734, %r5354;
shl.b32 %r2736, %r2735, %r5718;
cvt.u16.u32 %rs316, %r2736;
or.b16 %rs688, %rs32, %rs316;
add.s32 %r5718, %r2730, %r5718;
sub.s32 %r5353, %r343, %r2730;
shr.u32 %r5354, %r5354, %r2730;
setp.gt.u32 %p181, %r2729, %r343;
@%p181 bra $L__BB0_162;
setp.ne.s32 %p182, %r5717, 0;
mov.u32 %r5717, 0;
and.b16 %rs317, %rs688, 255;
setp.ne.s16 %p183, %rs317, 127;
and.pred %p184, %p182, %p183;
@%p184 bra $L__BB0_162;
cvt.u16.u32 %rs604, %r2736;
or.b16 %rs603, %rs32, %rs604;
mov.u32 %r2739, 20548;
sub.s32 %r2740, %r2739, %r5719;
cvt.u64.u32 %rd111, %r2740;
add.s64 %rd112, %rd1, %rd111;
st.global.u8 [%rd112], %rs603;
add.s32 %r5719, %r5719, 1;
setp.gt.u16 %p185, %rs317, 143;
selp.u32 %r5717, 1, 0, %p185;
mov.u32 %r5718, 0;
mov.u16 %rs688, 0;
$L__BB0_162:
setp.ne.s32 %p186, %r5353, 0;
mov.u32 %r5364, %r5242;
@%p186 bra $L__BB0_158;
$L__BB0_163:
setp.eq.s16 %p187, %rs29, 0;
mov.u32 %r5376, %r5364;
@%p187 bra $L__BB0_170;
cvt.u32.u16 %r2741, %rs28;
and.b32 %r5366, %r2741, 255;
cvt.u32.u16 %r2742, %rs29;
and.b32 %r5365, %r2742, 255;
$L__BB0_165:
mov.u32 %r362, %r5365;
setp.gt.u32 %p188, %r5719, 2879;
mov.u32 %r5376, 1;
@%p188 bra $L__BB0_170;
mov.u32 %r2744, 8;
sub.s32 %r2745, %r2744, %r5717;
sub.s32 %r2746, %r2745, %r5718;
min.u32 %r2747, %r2746, %r362;
setp.eq.s32 %p189, %r2747, 32;
mov.u32 %r2748, -1;
shl.b32 %r2749, %r2748, %r2747;
not.b32 %r2750, %r2749;
selp.b32 %r2751, -1, %r2750, %p189;
and.b32 %r2752, %r2751, %r5366;
shl.b32 %r2753, %r2752, %r5718;
cvt.u16.u32 %rs321, %r2753;
or.b16 %rs688, %rs688, %rs321;
add.s32 %r5718, %r2747, %r5718;
sub.s32 %r5365, %r362, %r2747;
shr.u32 %r5366, %r5366, %r2747;
setp.gt.u32 %p190, %r2746, %r362;
@%p190 bra $L__BB0_169;
setp.ne.s32 %p191, %r5717, 0;
mov.u32 %r5717, 0;
and.b16 %rs322, %rs688, 255;
setp.ne.s16 %p192, %rs322, 127;
and.pred %p193, %p191, %p192;
@%p193 bra $L__BB0_169;
mov.u32 %r2756, 20548;
sub.s32 %r2757, %r2756, %r5719;
cvt.u64.u32 %rd113, %r2757;
add.s64 %rd114, %rd1, %rd113;
st.global.u8 [%rd114], %rs688;
add.s32 %r5719, %r5719, 1;
setp.gt.u16 %p194, %rs322, 143;
selp.u32 %r5717, 1, 0, %p194;
mov.u32 %r5718, 0;
mov.u16 %rs688, 0;
$L__BB0_169:
setp.ne.s32 %p195, %r5365, 0;
mov.u32 %r5376, %r5364;
@%p195 bra $L__BB0_165;
$L__BB0_170:
setp.eq.s16 %p196, %rs27, 0;
mov.u32 %r5388, %r5376;
@%p196 bra $L__BB0_177;
cvt.u32.u16 %r2758, %rs27;
and.b32 %r5377, %r2758, 255;
cvt.u32.u16 %r2759, %rs26;
and.b32 %r5378, %r2759, 255;
$L__BB0_172:
mov.u32 %r381, %r5377;
setp.gt.u32 %p197, %r5719, 2879;
mov.u32 %r5388, 1;
@%p197 bra $L__BB0_177;
mov.u32 %r2761, 8;
sub.s32 %r2762, %r2761, %r5717;
sub.s32 %r2763, %r2762, %r5718;
min.u32 %r2764, %r2763, %r381;
setp.eq.s32 %p198, %r2764, 32;
mov.u32 %r2765, -1;
shl.b32 %r2766, %r2765, %r2764;
not.b32 %r2767, %r2766;
selp.b32 %r2768, -1, %r2767, %p198;
and.b32 %r2769, %r2768, %r5378;
shl.b32 %r2770, %r2769, %r5718;
cvt.u16.u32 %rs326, %r2770;
or.b16 %rs688, %rs688, %rs326;
add.s32 %r5718, %r2764, %r5718;
sub.s32 %r5377, %r381, %r2764;
shr.u32 %r5378, %r5378, %r2764;
setp.gt.u32 %p199, %r2763, %r381;
@%p199 bra $L__BB0_176;
setp.ne.s32 %p200, %r5717, 0;
mov.u32 %r5717, 0;
and.b16 %rs327, %rs688, 255;
setp.ne.s16 %p201, %rs327, 127;
and.pred %p202, %p200, %p201;
@%p202 bra $L__BB0_176;
mov.u32 %r2773, 20548;
sub.s32 %r2774, %r2773, %r5719;
cvt.u64.u32 %rd115, %r2774;
add.s64 %rd116, %rd1, %rd115;
st.global.u8 [%rd116], %rs688;
add.s32 %r5719, %r5719, 1;
setp.gt.u16 %p203, %rs327, 143;
selp.u32 %r5717, 1, 0, %p203;
mov.u32 %r5718, 0;
mov.u16 %rs688, 0;
$L__BB0_176:
setp.ne.s32 %p204, %r5377, 0;
mov.u32 %r5388, %r5376;
@%p204 bra $L__BB0_172;
$L__BB0_177:
setp.eq.s16 %p205, %rs31, 0;
mov.u32 %r5736, 0;
mov.u32 %r5716, %r5388;
@%p205 bra $L__BB0_416;
cvt.u32.u16 %r2776, %rs30;
and.b32 %r5390, %r2776, 255;
cvt.u32.u16 %r2777, %rs31;
and.b32 %r5389, %r2777, 255;
$L__BB0_179:
mov.u32 %r400, %r5389;
mov.u32 %r5736, 0;
setp.gt.u32 %p206, %r5719, 2879;
mov.u32 %r5716, 1;
@%p206 bra $L__BB0_416;
mov.u32 %r2780, 8;
sub.s32 %r2781, %r2780, %r5717;
sub.s32 %r2782, %r2781, %r5718;
min.u32 %r2783, %r2782, %r400;
setp.eq.s32 %p207, %r2783, 32;
mov.u32 %r2784, -1;
shl.b32 %r2785, %r2784, %r2783;
not.b32 %r2786, %r2785;
selp.b32 %r2787, -1, %r2786, %p207;
and.b32 %r2788, %r2787, %r5390;
shl.b32 %r2789, %r2788, %r5718;
cvt.u16.u32 %rs331, %r2789;
or.b16 %rs688, %rs688, %rs331;
add.s32 %r5718, %r2783, %r5718;
sub.s32 %r5389, %r400, %r2783;
shr.u32 %r5390, %r5390, %r2783;
setp.gt.u32 %p208, %r2782, %r400;
@%p208 bra $L__BB0_183;
setp.ne.s32 %p209, %r5717, 0;
mov.u32 %r5717, 0;
and.b16 %rs332, %rs688, 255;
setp.ne.s16 %p210, %rs332, 127;
and.pred %p211, %p209, %p210;
@%p211 bra $L__BB0_183;
mov.u32 %r2792, 20548;
sub.s32 %r2793, %r2792, %r5719;
cvt.u64.u32 %rd117, %r2793;
add.s64 %rd118, %rd1, %rd117;
st.global.u8 [%rd118], %rs688;
add.s32 %r5719, %r5719, 1;
setp.gt.u16 %p212, %rs332, 143;
selp.u32 %r5717, 1, 0, %p212;
mov.u32 %r5718, 0;
mov.u16 %rs688, 0;
$L__BB0_183:
mov.u32 %r5736, 0;
setp.eq.s32 %p213, %r5389, 0;
mov.u32 %r5716, %r5388;
@%p213 bra $L__BB0_416;
bra.uni $L__BB0_179;
$L__BB0_82:
shl.b16 %rs619, %rs705, 1;
setp.gt.u32 %p92, %r5271, 191;
mov.u32 %r5277, 0;
mov.u32 %r5276, 1;
@%p92 bra $L__BB0_84;
shl.b16 %rs590, %rs705, 1;
st.global.u8 [%rd4], %rs590;
add.s32 %r5271, %r5271, 1;
mov.u32 %r5277, 8;
mov.u16 %rs619, 0;
mov.u32 %r5276, %r5480;
bra.uni $L__BB0_84;
$L__BB0_253:
add.s32 %r5483, %r5483, 1;
setp.lt.u32 %p289, %r5483, %r5481;
@%p289 bra $L__BB0_261;
shl.b16 %rs362, %rs705, 1;
or.b16 %rs646, %rs362, 1;
add.s32 %r5277, %r5277, -1;
setp.ne.s32 %p290, %r5277, 0;
mov.u32 %r5476, %r5480;
@%p290 bra $L__BB0_257;
bra.uni $L__BB0_255;
$L__BB0_257:
add.s32 %r2993, %r5482, 1;
min.u32 %r573, %r2993, 12;
setp.lt.u32 %p293, %r573, 3;
mov.u32 %r5483, 0;
mov.u32 %r5479, %r5483;
@%p293 bra $L__BB0_260;
add.s32 %r5144, %r5482, 1;
min.u32 %r5143, %r5144, 12;
setp.lt.u32 %p294, %r5143, 6;
mov.u32 %r5479, 1;
@%p294 bra $L__BB0_260;
add.s32 %r5146, %r5482, 1;
min.u32 %r5145, %r5146, 12;
setp.lt.u32 %p295, %r5145, 9;
setp.eq.s32 %p296, %r5145, 11;
selp.b32 %r2995, 4, 5, %p296;
setp.lt.u32 %p297, %r5145, 11;
selp.b32 %r2996, 3, %r2995, %p297;
selp.b32 %r5479, 2, %r2996, %p295;
$L__BB0_260:
add.s32 %r5148, %r5482, 1;
min.u32 %r5482, %r5148, 12;
mov.u32 %r2998, 1;
shl.b32 %r5481, %r2998, %r5479;
mov.u32 %r5480, %r5476;
mov.u16 %rs705, %rs646;
$L__BB0_261:
max.s32 %r583, %r5420, 1;
and.b16 %rs365, %rs47, 15;
cvt.u32.u16 %r584, %rs365;
and.b32 %r585, %r5405, 1;
setp.eq.s32 %p298, %r585, 0;
mov.u32 %r5496, %r5933;
@%p298 bra $L__BB0_268;
and.b32 %r2999, %r584, 1;
sub.s32 %r5486, %r583, %r2999;
setp.eq.s32 %p299, %r5486, 0;
mov.u32 %r5496, %r5933;
@%p299 bra $L__BB0_268;
mov.u32 %r3000, -1;
shl.b32 %r3001, %r3000, %r5486;
not.b32 %r3002, %r3001;
and.b32 %r5487, %r5399, %r3002;
$L__BB0_264:
setp.gt.u32 %p300, %r5907, 17476;
mov.u32 %r5496, 1;
@%p300 bra $L__BB0_268;
sub.s32 %r3004, %r5906, %r5905;
min.u32 %r3005, %r3004, %r5486;
setp.eq.s32 %p301, %r3005, 32;
mov.u32 %r3006, -1;
shl.b32 %r3007, %r3006, %r3005;
not.b32 %r3008, %r3007;
selp.b32 %r3009, -1, %r3008, %p301;
and.b32 %r3010, %r3009, %r5487;
shl.b32 %r3011, %r3010, %r5905;
or.b32 %r5904, %r3011, %r5904;
add.s32 %r5905, %r3005, %r5905;
shr.u32 %r5487, %r5487, %r3005;
sub.s32 %r5486, %r5486, %r3005;
setp.lt.u32 %p302, %r5905, %r5906;
@%p302 bra $L__BB0_267;
cvt.u64.u32 %rd147, %r5907;
add.s64 %rd148, %rd1, %rd147;
st.global.u8 [%rd148], %r5904;
add.s32 %r5907, %r5907, 1;
setp.eq.s32 %p303, %r5904, 255;
selp.b32 %r5906, 7, 8, %p303;
mov.u32 %r5904, 0;
mov.u32 %r5905, %r5904;
$L__BB0_267:
setp.ne.s32 %p304, %r5486, 0;
mov.u32 %r5496, %r5933;
@%p304 bra $L__BB0_264;
$L__BB0_268:
and.b32 %r5134, %r5405, 2;
setp.eq.s32 %p305, %r5134, 0;
mov.u32 %r5511, %r5496;
@%p305 bra $L__BB0_275;
shr.u32 %r3014, %r584, 1;
and.b32 %r3015, %r3014, 1;
sub.s32 %r5501, %r583, %r3015;
setp.eq.s32 %p306, %r5501, 0;
mov.u32 %r5511, %r5496;
@%p306 bra $L__BB0_275;
mov.u32 %r3016, -1;
shl.b32 %r3017, %r3016, %r5501;
not.b32 %r3018, %r3017;
and.b32 %r5502, %r5403, %r3018;
$L__BB0_271:
setp.gt.u32 %p307, %r5907, 17476;
mov.u32 %r5511, 1;
@%p307 bra $L__BB0_275;
sub.s32 %r3020, %r5906, %r5905;
min.u32 %r3021, %r3020, %r5501;
setp.eq.s32 %p308, %r3021, 32;
mov.u32 %r3022, -1;
shl.b32 %r3023, %r3022, %r3021;
not.b32 %r3024, %r3023;
selp.b32 %r3025, -1, %r3024, %p308;
and.b32 %r3026, %r3025, %r5502;
shl.b32 %r3027, %r3026, %r5905;
or.b32 %r5904, %r3027, %r5904;
add.s32 %r5905, %r3021, %r5905;
shr.u32 %r5502, %r5502, %r3021;
sub.s32 %r5501, %r5501, %r3021;
setp.lt.u32 %p309, %r5905, %r5906;
@%p309 bra $L__BB0_274;
cvt.u64.u32 %rd149, %r5907;
add.s64 %rd150, %rd1, %rd149;
st.global.u8 [%rd150], %r5904;
add.s32 %r5907, %r5907, 1;
setp.eq.s32 %p310, %r5904, 255;
selp.b32 %r5906, 7, 8, %p310;
mov.u32 %r5904, 0;
mov.u32 %r5905, %r5904;
$L__BB0_274:
setp.ne.s32 %p311, %r5501, 0;
mov.u32 %r5511, %r5496;
@%p311 bra $L__BB0_271;
$L__BB0_275:
and.b32 %r3030, %r5405, 4;
setp.eq.s32 %p312, %r3030, 0;
mov.u32 %r5526, %r5511;
@%p312 bra $L__BB0_282;
shr.u32 %r3031, %r584, 2;
and.b32 %r3032, %r3031, 1;
sub.s32 %r5516, %r583, %r3032;
setp.eq.s32 %p313, %r5516, 0;
mov.u32 %r5526, %r5511;
@%p313 bra $L__BB0_282;
mov.u32 %r3033, -1;
shl.b32 %r3034, %r3033, %r5516;
not.b32 %r3035, %r3034;
and.b32 %r5517, %r5419, %r3035;
$L__BB0_278:
setp.gt.u32 %p314, %r5907, 17476;
mov.u32 %r5526, 1;
@%p314 bra $L__BB0_282;
sub.s32 %r3037, %r5906, %r5905;
min.u32 %r3038, %r3037, %r5516;
setp.eq.s32 %p315, %r3038, 32;
mov.u32 %r3039, -1;
shl.b32 %r3040, %r3039, %r3038;
not.b32 %r3041, %r3040;
selp.b32 %r3042, -1, %r3041, %p315;
and.b32 %r3043, %r3042, %r5517;
shl.b32 %r3044, %r3043, %r5905;
or.b32 %r5904, %r3044, %r5904;
add.s32 %r5905, %r3038, %r5905;
shr.u32 %r5517, %r5517, %r3038;
sub.s32 %r5516, %r5516, %r3038;
setp.lt.u32 %p316, %r5905, %r5906;
@%p316 bra $L__BB0_281;
cvt.u64.u32 %rd151, %r5907;
add.s64 %rd152, %rd1, %rd151;
st.global.u8 [%rd152], %r5904;
add.s32 %r5907, %r5907, 1;
setp.eq.s32 %p317, %r5904, 255;
selp.b32 %r5906, 7, 8, %p317;
mov.u32 %r5904, 0;
mov.u32 %r5905, %r5904;
$L__BB0_281:
setp.ne.s32 %p318, %r5516, 0;
mov.u32 %r5526, %r5511;
@%p318 bra $L__BB0_278;
$L__BB0_282:
and.b32 %r5135, %r5405, 8;
setp.eq.s32 %p319, %r5135, 0;
mov.u32 %r5933, %r5526;
@%p319 bra $L__BB0_289;
shr.u32 %r3047, %r584, 3;
sub.s32 %r5531, %r583, %r3047;
setp.eq.s32 %p320, %r5531, 0;
mov.u32 %r5933, %r5526;
@%p320 bra $L__BB0_289;
mov.u32 %r3048, -1;
shl.b32 %r3049, %r3048, %r5531;
not.b32 %r3050, %r3049;
and.b32 %r5532, %r5418, %r3050;
$L__BB0_285:
setp.gt.u32 %p321, %r5907, 17476;
mov.u32 %r5933, 1;
@%p321 bra $L__BB0_289;
sub.s32 %r3052, %r5906, %r5905;
min.u32 %r3053, %r3052, %r5531;
setp.eq.s32 %p322, %r3053, 32;
mov.u32 %r3054, -1;
shl.b32 %r3055, %r3054, %r3053;
not.b32 %r3056, %r3055;
selp.b32 %r3057, -1, %r3056, %p322;
and.b32 %r3058, %r3057, %r5532;
shl.b32 %r3059, %r3058, %r5905;
or.b32 %r5904, %r3059, %r5904;
add.s32 %r5905, %r3053, %r5905;
shr.u32 %r5532, %r5532, %r3053;
sub.s32 %r5531, %r5531, %r3053;
setp.lt.u32 %p323, %r5905, %r5906;
@%p323 bra $L__BB0_288;
cvt.u64.u32 %rd153, %r5907;
add.s64 %rd154, %rd1, %rd153;
st.global.u8 [%rd154], %r5904;
add.s32 %r5907, %r5907, 1;
setp.eq.s32 %p324, %r5904, 255;
selp.b32 %r5906, 7, 8, %p324;
mov.u32 %r5904, 0;
mov.u32 %r5905, %r5904;
$L__BB0_288:
setp.ne.s32 %p325, %r5531, 0;
mov.u32 %r5933, %r5526;
@%p325 bra $L__BB0_285;
$L__BB0_289:
setp.lt.s32 %p326, %r468, 1;
setp.lt.s32 %p327, %r130, 1;
or.pred %p328, %p327, %p326;
@%p328 bra $L__BB0_337;
min.s32 %r3062, %r130, %r468;
setp.lt.s32 %p329, %r3062, 3;
add.s32 %r3063, %r5271, 17477;
cvt.u64.u32 %rd155, %r3063;
add.s64 %rd7, %rd1, %rd155;
@%p329 bra $L__BB0_329;
bra.uni $L__BB0_291;
$L__BB0_329:
add.s32 %r5483, %r5483, 1;
setp.lt.u32 %p376, %r5483, %r5481;
@%p376 bra $L__BB0_337;
shl.b16 %rs382, %rs705, 1;
or.b16 %rs705, %rs382, 1;
add.s32 %r5277, %r5277, -1;
setp.ne.s32 %p377, %r5277, 0;
mov.u32 %r5586, %r5480;
@%p377 bra $L__BB0_333;
setp.gt.u32 %p378, %r5271, 191;
mov.u32 %r5277, 0;
mov.u32 %r5586, 1;
@%p378 bra $L__BB0_333;
and.b16 %rs384, %rs705, 255;
st.global.u8 [%rd7], %rs705;
add.s32 %r5271, %r5271, 1;
setp.eq.s16 %p379, %rs384, 255;
selp.b32 %r5277, 7, 8, %p379;
mov.u16 %rs705, 0;
mov.u32 %r5586, %r5480;
$L__BB0_333:
add.s32 %r3151, %r5482, 1;
min.u32 %r5482, %r3151, 12;
setp.lt.u32 %p380, %r5482, 3;
mov.u32 %r5483, 0;
mov.u32 %r5589, %r5483;
@%p380 bra $L__BB0_336;
setp.lt.u32 %p381, %r5482, 6;
mov.u32 %r5589, 1;
@%p381 bra $L__BB0_336;
setp.lt.u32 %p382, %r5482, 9;
setp.eq.s32 %p383, %r5482, 11;
selp.b32 %r3153, 4, 5, %p383;
setp.lt.u32 %p384, %r5482, 11;
selp.b32 %r3154, 3, %r3153, %p384;
selp.b32 %r5589, 2, %r3154, %p382;
$L__BB0_336:
mov.u32 %r3156, 1;
shl.b32 %r5481, %r3156, %r5589;
mov.u32 %r5480, %r5586;
bra.uni $L__BB0_337;
$L__BB0_291:
shl.b16 %rs705, %rs705, 1;
add.s32 %r5277, %r5277, -1;
setp.ne.s32 %p330, %r5277, 0;
mov.u32 %r5579, %r5480;
@%p330 bra $L__BB0_294;
setp.gt.u32 %p331, %r5271, 191;
mov.u32 %r5277, 0;
mov.u32 %r5579, 1;
@%p331 bra $L__BB0_294;
st.global.u8 [%rd7], %rs705;
add.s32 %r5271, %r5271, 1;
mov.u32 %r5277, 8;
mov.u16 %rs705, 0;
mov.u32 %r5579, %r5480;
$L__BB0_294:
setp.lt.u32 %p332, %r5482, 3;
mov.u32 %r5549, 0;
@%p332 bra $L__BB0_297;
setp.lt.u32 %p333, %r5482, 6;
mov.u32 %r5549, 1;
@%p333 bra $L__BB0_297;
setp.lt.u32 %p334, %r5482, 9;
setp.eq.s32 %p335, %r5482, 11;
selp.b32 %r3069, 4, 5, %p335;
setp.lt.u32 %p336, %r5482, 11;
selp.b32 %r3070, 3, %r3069, %p336;
selp.b32 %r5549, 2, %r3070, %p334;
$L__BB0_297:
setp.eq.s32 %p337, %r5549, 0;
@%p337 bra $L__BB0_325;
and.b32 %r686, %r5549, 3;
setp.eq.s32 %p338, %r686, 0;
mov.u32 %r5559, %r5549;
mov.u32 %r5562, %r5579;
@%p338 bra $L__BB0_310;
add.s32 %r5157, %r5549, -1;
mov.u32 %r3072, 1;
shl.b32 %r3073, %r3072, %r5157;
and.b32 %r3074, %r3073, %r5483;
setp.ne.s32 %p339, %r3074, 0;
selp.u32 %r3075, 1, 0, %p339;
cvt.u32.u16 %r3076, %rs705;
bfi.b32 %r3077, %r3076, %r3075, 1, 8;
cvt.u16.u32 %rs705, %r3077;
add.s32 %r5277, %r5277, -1;
setp.ne.s32 %p340, %r5277, 0;
mov.u32 %r5562, %r5579;
@%p340 bra $L__BB0_302;
setp.gt.u32 %p341, %r5271, 191;
mov.u32 %r5277, 0;
mov.u32 %r5562, %r3072;
@%p341 bra $L__BB0_302;
add.s32 %r3081, %r5271, 17477;
cvt.u64.u32 %rd156, %r3081;
add.s64 %rd157, %rd1, %rd156;
st.global.u8 [%rd157], %rs705;
add.s32 %r5271, %r5271, 1;
mov.u32 %r5277, 8;
mov.u16 %rs705, 0;
mov.u32 %r5562, %r5579;
$L__BB0_302:
and.b32 %r5159, %r5549, 3;
add.s32 %r5559, %r5549, -1;
setp.eq.s32 %p342, %r5159, 1;
mov.u32 %r5579, %r5562;
@%p342 bra $L__BB0_310;
add.s32 %r5559, %r5549, -2;
mov.u32 %r3082, 1;
shl.b32 %r3083, %r3082, %r5559;
and.b32 %r3084, %r3083, %r5483;
setp.ne.s32 %p343, %r3084, 0;
selp.u32 %r3085, 1, 0, %p343;
cvt.u32.u16 %r3086, %rs705;
bfi.b32 %r3087, %r3086, %r3085, 1, 8;
cvt.u16.u32 %rs705, %r3087;
add.s32 %r5277, %r5277, -1;
setp.ne.s32 %p344, %r5277, 0;
mov.u32 %r5553, %r5562;
@%p344 bra $L__BB0_306;
setp.gt.u32 %p345, %r5271, 191;
mov.u32 %r5277, 0;
mov.u32 %r5553, %r3082;
@%p345 bra $L__BB0_306;
add.s32 %r3090, %r5271, 17477;
cvt.u64.u32 %rd158, %r3090;
add.s64 %rd159, %rd1, %rd158;
and.b16 %rs370, %rs705, 255;
st.global.u8 [%rd159], %rs705;
add.s32 %r5271, %r5271, 1;
setp.eq.s16 %p346, %rs370, 255;
selp.b32 %r5277, 7, 8, %p346;
mov.u16 %rs705, 0;
mov.u32 %r5553, %r5562;
$L__BB0_306:
and.b32 %r5160, %r5549, 3;
setp.eq.s32 %p347, %r5160, 2;
mov.u32 %r5579, %r5553;
mov.u32 %r5562, %r5553;
@%p347 bra $L__BB0_310;
add.s32 %r5559, %r5549, -3;
mov.u32 %r3091, 1;
shl.b32 %r3092, %r3091, %r5559;
and.b32 %r3093, %r3092, %r5483;
setp.ne.s32 %p348, %r3093, 0;
selp.u32 %r3094, 1, 0, %p348;
cvt.u32.u16 %r3095, %rs705;
bfi.b32 %r3096, %r3095, %r3094, 1, 8;
cvt.u16.u32 %rs705, %r3096;
add.s32 %r5277, %r5277, -1;
setp.ne.s32 %p349, %r5277, 0;
mov.u32 %r5579, %r5553;
mov.u32 %r5562, %r5553;
@%p349 bra $L__BB0_310;
add.s32 %r5559, %r5549, -3;
setp.gt.u32 %p350, %r5271, 191;
mov.u32 %r5277, 0;
mov.u32 %r5579, %r3091;
mov.u32 %r5562, %r3091;
@%p350 bra $L__BB0_310;
add.s32 %r5559, %r5549, -3;
add.s32 %r3101, %r5271, 17477;
cvt.u64.u32 %rd160, %r3101;
add.s64 %rd161, %rd1, %rd160;
and.b16 %rs373, %rs705, 255;
st.global.u8 [%rd161], %rs705;
add.s32 %r5271, %r5271, 1;
setp.eq.s16 %p351, %rs373, 255;
selp.b32 %r5277, 7, 8, %p351;
mov.u16 %rs705, 0;
mov.u32 %r5579, %r5553;
mov.u32 %r5562, %r5553;
$L__BB0_310:
add.s32 %r5161, %r5549, -1;
setp.lt.u32 %p352, %r5161, 3;
@%p352 bra $L__BB0_325;
mov.u32 %r5579, %r5562;
$L__BB0_312:
add.s32 %r3102, %r5559, -1;
mov.u32 %r3103, 1;
shl.b32 %r3104, %r3103, %r3102;
and.b32 %r3105, %r3104, %r5483;
setp.ne.s32 %p353, %r3105, 0;
selp.u32 %r3106, 1, 0, %p353;
cvt.u32.u16 %r3107, %rs705;
bfi.b32 %r5568, %r3107, %r3106, 1, 8;
add.s32 %r5569, %r5277, -1;
setp.ne.s32 %p354, %r5569, 0;
mov.u32 %r5567, %r5579;
@%p354 bra $L__BB0_315;
setp.gt.u32 %p355, %r5271, 191;
mov.u32 %r5569, 0;
mov.u32 %r5567, %r3103;
@%p355 bra $L__BB0_315;
cvt.u16.u32 %rs374, %r5568;
and.b16 %rs375, %rs374, 255;
add.s32 %r3111, %r5271, 17477;
cvt.u64.u32 %rd162, %r3111;
add.s64 %rd163, %rd1, %rd162;
st.global.u8 [%rd163], %rs374;
add.s32 %r5271, %r5271, 1;
setp.eq.s16 %p356, %rs375, 255;
selp.b32 %r5569, 7, 8, %p356;
mov.u32 %r5568, 0;
mov.u32 %r5567, %r5579;
$L__BB0_315:
add.s32 %r3112, %r5559, -2;
shl.b32 %r3114, %r3103, %r3112;
and.b32 %r3115, %r3114, %r5483;
setp.ne.s32 %p357, %r3115, 0;
and.b32 %r3116, %r5568, 127;
selp.u32 %r3117, 1, 0, %p357;
bfi.b32 %r5572, %r3116, %r3117, 1, 7;
add.s32 %r5573, %r5569, -1;
setp.ne.s32 %p358, %r5573, 0;
mov.u32 %r5571, %r5567;
@%p358 bra $L__BB0_318;
setp.gt.u32 %p359, %r5271, 191;
mov.u32 %r5573, 0;
mov.u32 %r5571, 1;
@%p359 bra $L__BB0_318;
cvt.u16.u32 %rs376, %r5572;
and.b16 %rs377, %rs376, 255;
add.s32 %r3121, %r5271, 17477;
cvt.u64.u32 %rd164, %r3121;
add.s64 %rd165, %rd1, %rd164;
st.global.u8 [%rd165], %rs376;
add.s32 %r5271, %r5271, 1;
setp.eq.s16 %p360, %rs377, 255;
selp.b32 %r5573, 7, 8, %p360;
mov.u32 %r5572, 0;
mov.u32 %r5571, %r5567;
$L__BB0_318:
add.s32 %r3122, %r5559, -3;
mov.u32 %r3123, 1;
shl.b32 %r3124, %r3123, %r3122;
and.b32 %r3125, %r3124, %r5483;
setp.ne.s32 %p361, %r3125, 0;
and.b32 %r3126, %r5572, 127;
selp.u32 %r3127, 1, 0, %p361;
bfi.b32 %r5576, %r3126, %r3127, 1, 7;
add.s32 %r5577, %r5573, -1;
setp.ne.s32 %p362, %r5577, 0;
mov.u32 %r5575, %r5571;
@%p362 bra $L__BB0_321;
setp.gt.u32 %p363, %r5271, 191;
mov.u32 %r5577, 0;
mov.u32 %r5575, %r3123;
@%p363 bra $L__BB0_321;
cvt.u16.u32 %rs378, %r5576;
and.b16 %rs379, %rs378, 255;
add.s32 %r3131, %r5271, 17477;
cvt.u64.u32 %rd166, %r3131;
add.s64 %rd167, %rd1, %rd166;
st.global.u8 [%rd167], %rs378;
add.s32 %r5271, %r5271, 1;
setp.eq.s16 %p364, %rs379, 255;
selp.b32 %r5577, 7, 8, %p364;
mov.u32 %r5576, 0;
mov.u32 %r5575, %r5571;
$L__BB0_321:
add.s32 %r5559, %r5559, -4;
shl.b32 %r3133, %r3123, %r5559;
and.b32 %r3134, %r3133, %r5483;
setp.ne.s32 %p365, %r3134, 0;
and.b32 %r3135, %r5576, 127;
selp.u32 %r3136, 1, 0, %p365;
bfi.b32 %r3137, %r3135, %r3136, 1, 15;
cvt.u16.u32 %rs705, %r3137;
add.s32 %r5277, %r5577, -1;
setp.ne.s32 %p366, %r5277, 0;
mov.u32 %r5579, %r5575;
@%p366 bra $L__BB0_324;
setp.gt.u32 %p367, %r5271, 191;
mov.u32 %r5277, 0;
mov.u32 %r5579, 1;
@%p367 bra $L__BB0_324;
add.s32 %r3140, %r5271, 17477;
cvt.u64.u32 %rd168, %r3140;
add.s64 %rd169, %rd1, %rd168;
and.b16 %rs381, %rs705, 255;
st.global.u8 [%rd169], %rs705;
add.s32 %r5271, %r5271, 1;
setp.eq.s16 %p368, %rs381, 255;
selp.b32 %r5277, 7, 8, %p368;
mov.u16 %rs705, 0;
mov.u32 %r5579, %r5575;
$L__BB0_324:
setp.ne.s32 %p369, %r5559, 0;
@%p369 bra $L__BB0_312;
$L__BB0_325:
add.s32 %r3142, %r5482, -1;
setp.eq.s32 %p370, %r5482, 0;
mov.u32 %r5483, 0;
selp.b32 %r5482, 0, %r3142, %p370;
setp.lt.u32 %p371, %r5482, 3;
mov.u32 %r5585, %r5483;
@%p371 bra $L__BB0_328;
setp.lt.u32 %p372, %r5482, 6;
mov.u32 %r5585, 1;
@%p372 bra $L__BB0_328;
setp.lt.u32 %p373, %r5482, 9;
setp.eq.s32 %p374, %r5482, 11;
selp.b32 %r3144, 4, 5, %p374;
setp.lt.u32 %p375, %r5482, 11;
selp.b32 %r3145, 3, %r3144, %p375;
selp.b32 %r5585, 2, %r3145, %p373;
$L__BB0_328:
mov.u32 %r3147, 1;
shl.b32 %r5481, %r3147, %r5585;
mov.u32 %r5480, %r5579;
$L__BB0_337:
setp.gt.s32 %p385, %r468, 2;
setp.gt.s32 %p386, %r130, 2;
and.pred %p387, %p386, %p385;
@%p387 bra $L__BB0_387;
bra.uni $L__BB0_338;
$L__BB0_387:
add.s32 %r3279, %r340, -11;
cvt.u64.u32 %rd200, %r3279;
add.s64 %rd9, %rd108, %rd200;
ld.global.u8 %rs121, [%rd9];
add.s32 %r3280, %r340, -10;
cvt.u64.u32 %rd202, %r3280;
add.s64 %rd203, %rd108, %rd202;
ld.global.u8 %rs122, [%rd203];
ld.global.u8 %rs123, [%rd203+1];
mul.lo.s32 %r3281, %r468, 6;
add.s32 %r3282, %r3281, -12;
cvt.u64.u32 %rd204, %r3282;
add.s64 %rd205, %rd108, %rd204;
ld.global.u8 %rs124, [%rd205];
ld.global.u8 %rs125, [%rd205+1];
add.s32 %r3283, %r3281, -10;
cvt.u64.u32 %rd206, %r3283;
add.s64 %rd207, %rd108, %rd206;
ld.global.u8 %rs126, [%rd207];
ld.global.u8 %rs127, [%rd207+1];
setp.eq.s16 %p455, %rs121, 0;
mov.u32 %r5683, %r5435;
@%p455 bra $L__BB0_394;
ld.global.u8 %r5673, [%rd9+-1];
cvt.u32.u16 %r5672, %rs121;
$L__BB0_389:
mov.u16 %rs128, %rs688;
mov.u32 %r895, %r5672;
setp.gt.u32 %p456, %r5719, 2879;
mov.u32 %r5683, 1;
@%p456 bra $L__BB0_394;
mov.u32 %r3285, 8;
sub.s32 %r3286, %r3285, %r5717;
sub.s32 %r3287, %r3286, %r5718;
min.u32 %r3288, %r3287, %r895;
setp.eq.s32 %p457, %r3288, 32;
mov.u32 %r3289, -1;
shl.b32 %r3290, %r3289, %r3288;
not.b32 %r3291, %r3290;
selp.b32 %r3292, -1, %r3291, %p457;
and.b32 %r3293, %r3292, %r5673;
shl.b32 %r3294, %r3293, %r5718;
cvt.u16.u32 %rs417, %r3294;
or.b16 %rs688, %rs128, %rs417;
add.s32 %r5718, %r3288, %r5718;
sub.s32 %r5672, %r895, %r3288;
shr.u32 %r5673, %r5673, %r3288;
setp.gt.u32 %p458, %r3287, %r895;
@%p458 bra $L__BB0_393;
setp.ne.s32 %p459, %r5717, 0;
mov.u32 %r5717, 0;
and.b16 %rs418, %rs688, 255;
setp.ne.s16 %p460, %rs418, 127;
and.pred %p461, %p459, %p460;
@%p461 bra $L__BB0_393;
cvt.u16.u32 %rs608, %r3294;
or.b16 %rs607, %rs128, %rs608;
mov.u32 %r3297, 20548;
sub.s32 %r3298, %r3297, %r5719;
cvt.u64.u32 %rd208, %r3298;
add.s64 %rd209, %rd1, %rd208;
st.global.u8 [%rd209], %rs607;
add.s32 %r5719, %r5719, 1;
setp.gt.u16 %p462, %rs418, 143;
selp.u32 %r5717, 1, 0, %p462;
mov.u32 %r5718, 0;
mov.u16 %rs688, 0;
$L__BB0_393:
setp.ne.s32 %p463, %r5672, 0;
mov.u32 %r5683, %r5435;
@%p463 bra $L__BB0_389;
$L__BB0_394:
setp.eq.s16 %p464, %rs125, 0;
mov.u32 %r5695, %r5683;
@%p464 bra $L__BB0_401;
cvt.u32.u16 %r3299, %rs124;
and.b32 %r5685, %r3299, 255;
cvt.u32.u16 %r3300, %rs125;
and.b32 %r5684, %r3300, 255;
$L__BB0_396:
mov.u32 %r914, %r5684;
setp.gt.u32 %p465, %r5719, 2879;
mov.u32 %r5695, 1;
@%p465 bra $L__BB0_401;
mov.u32 %r3302, 8;
sub.s32 %r3303, %r3302, %r5717;
sub.s32 %r3304, %r3303, %r5718;
min.u32 %r3305, %r3304, %r914;
setp.eq.s32 %p466, %r3305, 32;
mov.u32 %r3306, -1;
shl.b32 %r3307, %r3306, %r3305;
not.b32 %r3308, %r3307;
selp.b32 %r3309, -1, %r3308, %p466;
and.b32 %r3310, %r3309, %r5685;
shl.b32 %r3311, %r3310, %r5718;
cvt.u16.u32 %rs422, %r3311;
or.b16 %rs688, %rs688, %rs422;
add.s32 %r5718, %r3305, %r5718;
sub.s32 %r5684, %r914, %r3305;
shr.u32 %r5685, %r5685, %r3305;
setp.gt.u32 %p467, %r3304, %r914;
@%p467 bra $L__BB0_400;
setp.ne.s32 %p468, %r5717, 0;
mov.u32 %r5717, 0;
and.b16 %rs423, %rs688, 255;
setp.ne.s16 %p469, %rs423, 127;
and.pred %p470, %p468, %p469;
@%p470 bra $L__BB0_400;
mov.u32 %r3314, 20548;
sub.s32 %r3315, %r3314, %r5719;
cvt.u64.u32 %rd210, %r3315;
add.s64 %rd211, %rd1, %rd210;
st.global.u8 [%rd211], %rs688;
add.s32 %r5719, %r5719, 1;
setp.gt.u16 %p471, %rs423, 143;
selp.u32 %r5717, 1, 0, %p471;
mov.u32 %r5718, 0;
mov.u16 %rs688, 0;
$L__BB0_400:
setp.ne.s32 %p472, %r5684, 0;
mov.u32 %r5695, %r5683;
@%p472 bra $L__BB0_396;
$L__BB0_401:
setp.eq.s16 %p473, %rs123, 0;
mov.u32 %r5707, %r5695;
@%p473 bra $L__BB0_408;
cvt.u32.u16 %r3316, %rs123;
and.b32 %r5696, %r3316, 255;
cvt.u32.u16 %r3317, %rs122;
and.b32 %r5697, %r3317, 255;
$L__BB0_403:
mov.u32 %r933, %r5696;
setp.gt.u32 %p474, %r5719, 2879;
mov.u32 %r5707, 1;
@%p474 bra $L__BB0_408;
mov.u32 %r3319, 8;
sub.s32 %r3320, %r3319, %r5717;
sub.s32 %r3321, %r3320, %r5718;
min.u32 %r3322, %r3321, %r933;
setp.eq.s32 %p475, %r3322, 32;
mov.u32 %r3323, -1;
shl.b32 %r3324, %r3323, %r3322;
not.b32 %r3325, %r3324;
selp.b32 %r3326, -1, %r3325, %p475;
and.b32 %r3327, %r3326, %r5697;
shl.b32 %r3328, %r3327, %r5718;
cvt.u16.u32 %rs427, %r3328;
or.b16 %rs688, %rs688, %rs427;
add.s32 %r5718, %r3322, %r5718;
sub.s32 %r5696, %r933, %r3322;
shr.u32 %r5697, %r5697, %r3322;
setp.gt.u32 %p476, %r3321, %r933;
@%p476 bra $L__BB0_407;
setp.ne.s32 %p477, %r5717, 0;
mov.u32 %r5717, 0;
and.b16 %rs428, %rs688, 255;
setp.ne.s16 %p478, %rs428, 127;
and.pred %p479, %p477, %p478;
@%p479 bra $L__BB0_407;
mov.u32 %r3331, 20548;
sub.s32 %r3332, %r3331, %r5719;
cvt.u64.u32 %rd212, %r3332;
add.s64 %rd213, %rd1, %rd212;
st.global.u8 [%rd213], %rs688;
add.s32 %r5719, %r5719, 1;
setp.gt.u16 %p480, %rs428, 143;
selp.u32 %r5717, 1, 0, %p480;
mov.u32 %r5718, 0;
mov.u16 %rs688, 0;
$L__BB0_407:
setp.ne.s32 %p481, %r5696, 0;
mov.u32 %r5707, %r5695;
@%p481 bra $L__BB0_403;
$L__BB0_408:
setp.eq.s16 %p482, %rs127, 0;
mov.u32 %r5716, %r5707;
@%p482 bra $L__BB0_415;
cvt.u32.u16 %r3333, %rs126;
and.b32 %r5709, %r3333, 255;
cvt.u32.u16 %r3334, %rs127;
and.b32 %r5708, %r3334, 255;
$L__BB0_410:
mov.u32 %r952, %r5708;
setp.gt.u32 %p483, %r5719, 2879;
mov.u32 %r5716, 1;
@%p483 bra $L__BB0_415;
mov.u32 %r3336, 8;
sub.s32 %r3337, %r3336, %r5717;
sub.s32 %r3338, %r3337, %r5718;
min.u32 %r3339, %r3338, %r952;
setp.eq.s32 %p484, %r3339, 32;
mov.u32 %r3340, -1;
shl.b32 %r3341, %r3340, %r3339;
not.b32 %r3342, %r3341;
selp.b32 %r3343, -1, %r3342, %p484;
and.b32 %r3344, %r3343, %r5709;
shl.b32 %r3345, %r3344, %r5718;
cvt.u16.u32 %rs432, %r3345;
or.b16 %rs688, %rs688, %rs432;
add.s32 %r5718, %r3339, %r5718;
sub.s32 %r5708, %r952, %r3339;
shr.u32 %r5709, %r5709, %r3339;
setp.gt.u32 %p485, %r3338, %r952;
@%p485 bra $L__BB0_414;
setp.ne.s32 %p486, %r5717, 0;
mov.u32 %r5717, 0;
and.b16 %rs433, %rs688, 255;
setp.ne.s16 %p487, %rs433, 127;
and.pred %p488, %p486, %p487;
@%p488 bra $L__BB0_414;
mov.u32 %r3348, 20548;
sub.s32 %r3349, %r3348, %r5719;
cvt.u64.u32 %rd214, %r3349;
add.s64 %rd215, %rd1, %rd214;
st.global.u8 [%rd215], %rs688;
add.s32 %r5719, %r5719, 1;
setp.gt.u16 %p489, %rs433, 143;
selp.u32 %r5717, 1, 0, %p489;
mov.u32 %r5718, 0;
mov.u16 %rs688, 0;
$L__BB0_414:
setp.ne.s32 %p490, %r5708, 0;
mov.u32 %r5716, %r5707;
@%p490 bra $L__BB0_410;
bra.uni $L__BB0_415;
$L__BB0_338:
setp.gt.s32 %p388, %r468, 0;
and.pred %p390, %p386, %p388;
@%p390 bra $L__BB0_367;
bra.uni $L__BB0_339;
$L__BB0_367:
cvt.u64.u32 %rd186, %r340;
add.s64 %rd188, %rd108, %rd186;
ld.global.u8 %rs107, [%rd188+1];
add.s32 %r3229, %r340, 2;
cvt.u64.u32 %rd189, %r3229;
add.s64 %rd190, %rd108, %rd189;
ld.global.u8 %rs108, [%rd190];
ld.global.u8 %rs109, [%rd190+1];
setp.eq.s16 %p429, %rs107, 0;
mov.u32 %r5651, %r5435;
@%p429 bra $L__BB0_374;
ld.global.u8 %r5641, [%rd188];
cvt.u32.u16 %r5640, %rs107;
$L__BB0_369:
mov.u32 %r843, %r5640;
setp.gt.u32 %p430, %r5719, 2879;
mov.u32 %r5651, 1;
@%p430 bra $L__BB0_374;
mov.u32 %r3231, 8;
sub.s32 %r3232, %r3231, %r5717;
sub.s32 %r3233, %r3232, %r5718;
min.u32 %r3234, %r3233, %r843;
setp.eq.s32 %p431, %r3234, 32;
mov.u32 %r3235, -1;
shl.b32 %r3236, %r3235, %r3234;
not.b32 %r3237, %r3236;
selp.b32 %r3238, -1, %r3237, %p431;
and.b32 %r3239, %r3238, %r5641;
shl.b32 %r3240, %r3239, %r5718;
cvt.u16.u32 %rs404, %r3240;
or.b16 %rs688, %rs688, %rs404;
add.s32 %r5718, %r3234, %r5718;
sub.s32 %r5640, %r843, %r3234;
shr.u32 %r5641, %r5641, %r3234;
setp.gt.u32 %p432, %r3233, %r843;
@%p432 bra $L__BB0_373;
setp.ne.s32 %p433, %r5717, 0;
mov.u32 %r5717, 0;
and.b16 %rs405, %rs688, 255;
setp.ne.s16 %p434, %rs405, 127;
and.pred %p435, %p433, %p434;
@%p435 bra $L__BB0_373;
mov.u32 %r3243, 20548;
sub.s32 %r3244, %r3243, %r5719;
cvt.u64.u32 %rd194, %r3244;
add.s64 %rd195, %rd1, %rd194;
st.global.u8 [%rd195], %rs688;
add.s32 %r5719, %r5719, 1;
setp.gt.u16 %p436, %rs405, 143;
selp.u32 %r5717, 1, 0, %p436;
mov.u32 %r5718, 0;
mov.u16 %rs688, 0;
$L__BB0_373:
setp.ne.s32 %p437, %r5640, 0;
mov.u32 %r5651, %r5435;
@%p437 bra $L__BB0_369;
$L__BB0_374:
add.s32 %r5653, %r468, -1;
mov.u32 %r5652, 1;
$L__BB0_375:
mov.u32 %r863, %r5652;
mov.u32 %r5663, 1;
setp.gt.u32 %p438, %r5719, 2879;
@%p438 bra $L__BB0_380;
mov.u32 %r3249, 8;
sub.s32 %r3250, %r3249, %r5717;
sub.s32 %r3251, %r3250, %r5718;
min.u32 %r3252, %r3251, %r863;
setp.eq.s32 %p439, %r3252, 32;
mov.u32 %r3253, -1;
shl.b32 %r3254, %r3253, %r3252;
not.b32 %r3255, %r3254;
selp.b32 %r3256, -1, %r3255, %p439;
and.b32 %r3257, %r3256, %r5653;
shl.b32 %r3258, %r3257, %r5718;
cvt.u16.u32 %rs408, %r3258;
or.b16 %rs688, %rs688, %rs408;
add.s32 %r5718, %r3252, %r5718;
sub.s32 %r5652, %r863, %r3252;
shr.u32 %r5653, %r5653, %r3252;
setp.gt.u32 %p440, %r3251, %r863;
@%p440 bra $L__BB0_379;
setp.ne.s32 %p441, %r5717, 0;
mov.u32 %r5717, 0;
and.b16 %rs409, %rs688, 255;
setp.ne.s16 %p442, %rs409, 127;
and.pred %p443, %p441, %p442;
@%p443 bra $L__BB0_379;
mov.u32 %r3261, 20548;
sub.s32 %r3262, %r3261, %r5719;
cvt.u64.u32 %rd196, %r3262;
add.s64 %rd197, %rd1, %rd196;
st.global.u8 [%rd197], %rs688;
add.s32 %r5719, %r5719, 1;
setp.gt.u16 %p444, %rs409, 143;
selp.u32 %r5717, 1, 0, %p444;
mov.u32 %r5718, 0;
mov.u16 %rs688, 0;
$L__BB0_379:
setp.ne.s32 %p445, %r5652, 0;
mov.u32 %r5663, %r5651;
@%p445 bra $L__BB0_375;
$L__BB0_380:
setp.eq.s16 %p446, %rs109, 0;
mov.u32 %r5716, %r5663;
@%p446 bra $L__BB0_415;
cvt.u32.u16 %r5165, %rs108;
and.b32 %r5665, %r5165, 255;
cvt.u32.u16 %r5152, %rs109;
and.b32 %r5664, %r5152, 255;
$L__BB0_382:
mov.u32 %r880, %r5664;
setp.gt.u32 %p447, %r5719, 2879;
mov.u32 %r5716, 1;
@%p447 bra $L__BB0_415;
mov.u32 %r3264, 8;
sub.s32 %r3265, %r3264, %r5717;
sub.s32 %r3266, %r3265, %r5718;
min.u32 %r3267, %r3266, %r880;
setp.eq.s32 %p448, %r3267, 32;
mov.u32 %r3268, -1;
shl.b32 %r3269, %r3268, %r3267;
not.b32 %r3270, %r3269;
selp.b32 %r3271, -1, %r3270, %p448;
and.b32 %r3272, %r3271, %r5665;
shl.b32 %r3273, %r3272, %r5718;
cvt.u16.u32 %rs413, %r3273;
or.b16 %rs688, %rs688, %rs413;
add.s32 %r5718, %r3267, %r5718;
sub.s32 %r5664, %r880, %r3267;
shr.u32 %r5665, %r5665, %r3267;
setp.gt.u32 %p449, %r3266, %r880;
@%p449 bra $L__BB0_386;
setp.ne.s32 %p450, %r5717, 0;
mov.u32 %r5717, 0;
and.b16 %rs414, %rs688, 255;
setp.ne.s16 %p451, %rs414, 127;
and.pred %p452, %p450, %p451;
@%p452 bra $L__BB0_386;
mov.u32 %r3276, 20548;
sub.s32 %r3277, %r3276, %r5719;
cvt.u64.u32 %rd198, %r3277;
add.s64 %rd199, %rd1, %rd198;
st.global.u8 [%rd199], %rs688;
add.s32 %r5719, %r5719, 1;
setp.gt.u16 %p453, %rs414, 143;
selp.u32 %r5717, 1, 0, %p453;
mov.u32 %r5718, 0;
mov.u16 %rs688, 0;
$L__BB0_386:
setp.eq.s32 %p454, %r5664, 0;
mov.u32 %r5716, %r5663;
@%p454 bra $L__BB0_415;
bra.uni $L__BB0_382;
$L__BB0_339:
setp.gt.s32 %p392, %r130, 0;
selp.b32 %r3158, %r340, 0, %p392;
cvt.u64.u32 %rd170, %r3158;
add.s64 %rd8, %rd108, %rd170;
ld.global.u8 %rs85, [%rd8+1];
add.s32 %r3159, %r3158, 2;
cvt.u64.u32 %rd172, %r3159;
add.s64 %rd173, %rd108, %rd172;
ld.global.u8 %rs86, [%rd173];
ld.global.u8 %rs87, [%rd173+1];
mul.lo.s32 %r3160, %r468, 6;
selp.b32 %r3161, %r3160, 0, %p388;
cvt.u64.u32 %rd174, %r3161;
add.s64 %rd175, %rd108, %rd174;
ld.global.u8 %rs88, [%rd175];
ld.global.u8 %rs89, [%rd175+1];
add.s32 %r3162, %r3161, 2;
cvt.u64.u32 %rd176, %r3162;
add.s64 %rd177, %rd108, %rd176;
ld.global.u8 %rs90, [%rd177];
ld.global.u8 %rs91, [%rd177+1];
setp.eq.s16 %p393, %rs85, 0;
mov.u32 %r5607, %r5435;
@%p393 bra $L__BB0_346;
ld.global.u8 %r5597, [%rd8];
cvt.u32.u16 %r5596, %rs85;
$L__BB0_341:
mov.u16 %rs92, %rs688;
mov.u32 %r771, %r5596;
setp.gt.u32 %p394, %r5719, 2879;
mov.u32 %r5607, 1;
@%p394 bra $L__BB0_346;
mov.u32 %r3164, 8;
sub.s32 %r3165, %r3164, %r5717;
sub.s32 %r3166, %r3165, %r5718;
min.u32 %r3167, %r3166, %r771;
setp.eq.s32 %p395, %r3167, 32;
mov.u32 %r3168, -1;
shl.b32 %r3169, %r3168, %r3167;
not.b32 %r3170, %r3169;
selp.b32 %r3171, -1, %r3170, %p395;
and.b32 %r3172, %r3171, %r5597;
shl.b32 %r3173, %r3172, %r5718;
cvt.u16.u32 %rs385, %r3173;
or.b16 %rs688, %rs92, %rs385;
add.s32 %r5718, %r3167, %r5718;
sub.s32 %r5596, %r771, %r3167;
shr.u32 %r5597, %r5597, %r3167;
setp.gt.u32 %p396, %r3166, %r771;
@%p396 bra $L__BB0_345;
setp.ne.s32 %p397, %r5717, 0;
mov.u32 %r5717, 0;
and.b16 %rs386, %rs688, 255;
setp.ne.s16 %p398, %rs386, 127;
and.pred %p399, %p397, %p398;
@%p399 bra $L__BB0_345;
cvt.u16.u32 %rs606, %r3173;
or.b16 %rs605, %rs92, %rs606;
mov.u32 %r3176, 20548;
sub.s32 %r3177, %r3176, %r5719;
cvt.u64.u32 %rd178, %r3177;
add.s64 %rd179, %rd1, %rd178;
st.global.u8 [%rd179], %rs605;
add.s32 %r5719, %r5719, 1;
setp.gt.u16 %p400, %rs386, 143;
selp.u32 %r5717, 1, 0, %p400;
mov.u32 %r5718, 0;
mov.u16 %rs688, 0;
$L__BB0_345:
setp.ne.s32 %p401, %r5596, 0;
mov.u32 %r5607, %r5435;
@%p401 bra $L__BB0_341;
$L__BB0_346:
setp.eq.s16 %p402, %rs89, 0;
mov.u32 %r5619, %r5607;
@%p402 bra $L__BB0_353;
cvt.u32.u16 %r3178, %rs88;
and.b32 %r5609, %r3178, 255;
cvt.u32.u16 %r3179, %rs89;
and.b32 %r5608, %r3179, 255;
$L__BB0_348:
mov.u32 %r790, %r5608;
setp.gt.u32 %p403, %r5719, 2879;
mov.u32 %r5619, 1;
@%p403 bra $L__BB0_353;
mov.u32 %r3181, 8;
sub.s32 %r3182, %r3181, %r5717;
sub.s32 %r3183, %r3182, %r5718;
min.u32 %r3184, %r3183, %r790;
setp.eq.s32 %p404, %r3184, 32;
mov.u32 %r3185, -1;
shl.b32 %r3186, %r3185, %r3184;
not.b32 %r3187, %r3186;
selp.b32 %r3188, -1, %r3187, %p404;
and.b32 %r3189, %r3188, %r5609;
shl.b32 %r3190, %r3189, %r5718;
cvt.u16.u32 %rs390, %r3190;
or.b16 %rs688, %rs688, %rs390;
add.s32 %r5718, %r3184, %r5718;
sub.s32 %r5608, %r790, %r3184;
shr.u32 %r5609, %r5609, %r3184;
setp.gt.u32 %p405, %r3183, %r790;
@%p405 bra $L__BB0_352;
setp.ne.s32 %p406, %r5717, 0;
mov.u32 %r5717, 0;
and.b16 %rs391, %rs688, 255;
setp.ne.s16 %p407, %rs391, 127;
and.pred %p408, %p406, %p407;
@%p408 bra $L__BB0_352;
mov.u32 %r3193, 20548;
sub.s32 %r3194, %r3193, %r5719;
cvt.u64.u32 %rd180, %r3194;
add.s64 %rd181, %rd1, %rd180;
st.global.u8 [%rd181], %rs688;
add.s32 %r5719, %r5719, 1;
setp.gt.u16 %p409, %rs391, 143;
selp.u32 %r5717, 1, 0, %p409;
mov.u32 %r5718, 0;
mov.u16 %rs688, 0;
$L__BB0_352:
setp.ne.s32 %p410, %r5608, 0;
mov.u32 %r5619, %r5607;
@%p410 bra $L__BB0_348;
$L__BB0_353:
setp.eq.s16 %p411, %rs87, 0;
mov.u32 %r5631, %r5619;
@%p411 bra $L__BB0_360;
cvt.u32.u16 %r3195, %rs87;
and.b32 %r5620, %r3195, 255;
cvt.u32.u16 %r3196, %rs86;
and.b32 %r5621, %r3196, 255;
$L__BB0_355:
mov.u32 %r809, %r5620;
setp.gt.u32 %p412, %r5719, 2879;
mov.u32 %r5631, 1;
@%p412 bra $L__BB0_360;
mov.u32 %r3198, 8;
sub.s32 %r3199, %r3198, %r5717;
sub.s32 %r3200, %r3199, %r5718;
min.u32 %r3201, %r3200, %r809;
setp.eq.s32 %p413, %r3201, 32;
mov.u32 %r3202, -1;
shl.b32 %r3203, %r3202, %r3201;
not.b32 %r3204, %r3203;
selp.b32 %r3205, -1, %r3204, %p413;
and.b32 %r3206, %r3205, %r5621;
shl.b32 %r3207, %r3206, %r5718;
cvt.u16.u32 %rs395, %r3207;
or.b16 %rs688, %rs688, %rs395;
add.s32 %r5718, %r3201, %r5718;
sub.s32 %r5620, %r809, %r3201;
shr.u32 %r5621, %r5621, %r3201;
setp.gt.u32 %p414, %r3200, %r809;
@%p414 bra $L__BB0_359;
setp.ne.s32 %p415, %r5717, 0;
mov.u32 %r5717, 0;
and.b16 %rs396, %rs688, 255;
setp.ne.s16 %p416, %rs396, 127;
and.pred %p417, %p415, %p416;
@%p417 bra $L__BB0_359;
mov.u32 %r3210, 20548;
sub.s32 %r3211, %r3210, %r5719;
cvt.u64.u32 %rd182, %r3211;
add.s64 %rd183, %rd1, %rd182;
st.global.u8 [%rd183], %rs688;
add.s32 %r5719, %r5719, 1;
setp.gt.u16 %p418, %rs396, 143;
selp.u32 %r5717, 1, 0, %p418;
mov.u32 %r5718, 0;
mov.u16 %rs688, 0;
$L__BB0_359:
setp.ne.s32 %p419, %r5620, 0;
mov.u32 %r5631, %r5619;
@%p419 bra $L__BB0_355;
$L__BB0_360:
setp.eq.s16 %p420, %rs91, 0;
mov.u32 %r5716, %r5631;
@%p420 bra $L__BB0_415;
cvt.u32.u16 %r3212, %rs90;
and.b32 %r5633, %r3212, 255;
cvt.u32.u16 %r3213, %rs91;
and.b32 %r5632, %r3213, 255;
$L__BB0_362:
mov.u32 %r828, %r5632;
setp.gt.u32 %p421, %r5719, 2879;
mov.u32 %r5716, 1;
@%p421 bra $L__BB0_415;
mov.u32 %r3215, 8;
sub.s32 %r3216, %r3215, %r5717;
sub.s32 %r3217, %r3216, %r5718;
min.u32 %r3218, %r3217, %r828;
setp.eq.s32 %p422, %r3218, 32;
mov.u32 %r3219, -1;
shl.b32 %r3220, %r3219, %r3218;
not.b32 %r3221, %r3220;
selp.b32 %r3222, -1, %r3221, %p422;
and.b32 %r3223, %r3222, %r5633;
shl.b32 %r3224, %r3223, %r5718;
cvt.u16.u32 %rs400, %r3224;
or.b16 %rs688, %rs688, %rs400;
add.s32 %r5718, %r3218, %r5718;
sub.s32 %r5632, %r828, %r3218;
shr.u32 %r5633, %r5633, %r3218;
setp.gt.u32 %p423, %r3217, %r828;
@%p423 bra $L__BB0_366;
setp.ne.s32 %p424, %r5717, 0;
mov.u32 %r5717, 0;
and.b16 %rs401, %rs688, 255;
setp.ne.s16 %p425, %rs401, 127;
and.pred %p426, %p424, %p425;
@%p426 bra $L__BB0_366;
mov.u32 %r3227, 20548;
sub.s32 %r3228, %r3227, %r5719;
cvt.u64.u32 %rd184, %r3228;
add.s64 %rd185, %rd1, %rd184;
st.global.u8 [%rd185], %rs688;
add.s32 %r5719, %r5719, 1;
setp.gt.u16 %p427, %rs401, 143;
selp.u32 %r5717, 1, 0, %p427;
mov.u32 %r5718, 0;
mov.u16 %rs688, 0;
$L__BB0_366:
setp.eq.s32 %p428, %r5632, 0;
mov.u32 %r5716, %r5631;
@%p428 bra $L__BB0_415;
bra.uni $L__BB0_362;
$L__BB0_415:
and.b32 %r5149, %r5405, 1;
shr.u32 %r3350, %r5405, 1;
or.b32 %r5736, %r3350, %r5149;
$L__BB0_416:
add.s32 %r5186, %r5186, 4;
setp.lt.u32 %p491, %r5186, %r5;
@%p491 bra $L__BB0_50;
$L__BB0_417:
add.s32 %r5108, %r5, 1;
shr.u32 %r5107, %r5108, 1;
add.s32 %r1003, %r5107, 1;
setp.gt.u32 %p492, %r1003, 512;
@%p492 bra $L__BB0_419;
mov.u32 %r3353, _ZZ31 j2k_htj2k_encode_codeblockE13cleanup_e_val;
add.s32 %r3354, %r3353, %r1003;
mov.u16 %rs436, 0;
st.shared.u8 [%r3354], %rs436;
$L__BB0_419:
setp.lt.u32 %p493, %r6, 3;
@%p493 bra $L__BB0_665;
ld.param.u64 %rd639, [ j2k_htj2k_encode_codeblock_param_4];
ld.param.u64 %rd638, [ j2k_htj2k_encode_codeblock_param_5];
mov.u32 %r3356, 31;
sub.s32 %r1004, %r3356, %r2;
mov.u32 %r5752, 2;
cvta.to.global.u64 %rd10, %rd638;
cvta.to.global.u64 %rd11, %rd639;
$L__BB0_421:
ld.shared.u8 %rs150, [_ZZ31 j2k_htj2k_encode_codeblockE13cleanup_e_val];
mov.u16 %rs437, 0;
st.shared.u8 [_ZZ31 j2k_htj2k_encode_codeblockE13cleanup_e_val], %rs437;
ld.shared.u8 %rs151, [_ZZ31 j2k_htj2k_encode_codeblockE14cleanup_cx_val];
st.shared.u8 [_ZZ31 j2k_htj2k_encode_codeblockE14cleanup_cx_val], %rs437;
@%p9 bra $L__BB0_664;
mov.u32 %r3359, 0;
ld.shared.u8 %rs438, [_ZZ31 j2k_htj2k_encode_codeblockE13cleanup_e_val+1];
ld.shared.u8 %rs439, [_ZZ31 j2k_htj2k_encode_codeblockE14cleanup_cx_val+1];
max.u16 %rs441, %rs150, %rs438;
cvt.u32.u16 %r3360, %rs441;
add.s32 %r5770, %r3360, -1;
add.s32 %r1022, %r5752, 1;
mul.lo.s32 %r5772, %r5752, %r1;
mul.wide.u16 %r3361, %rs439, 4;
cvt.u32.u16 %r3362, %rs151;
and.b32 %r3363, %r3362, 255;
add.s32 %r5773, %r3361, %r3363;
mov.u32 %r5768, %r3359;
mov.u32 %r5769, %r3359;
mov.u32 %r5771, %r3359;
$L__BB0_423:
mul.wide.u32 %rd216, %r5772, 4;
add.s64 %rd217, %rd2, %rd216;
ld.global.u32 %r1046, [%rd217];
setp.eq.s32 %p495, %r1046, 0;
mov.u32 %r5789, %r3359;
@%p495 bra $L__BB0_425;
and.b32 %r3365, %r1046, -2147483648;
abs.s32 %r3366, %r1046;
shl.b32 %r3367, %r3366, %r1004;
or.b32 %r5789, %r3367, %r3365;
$L__BB0_425:
shl.b32 %r3371, %r5789, 1;
shr.u32 %r3372, %r3371, %r45;
and.b32 %r1049, %r3372, -2;
setp.eq.s32 %p496, %r1049, 0;
mov.u32 %r5793, 0;
mov.u32 %r5790, %r5793;
mov.u32 %r5791, %r5793;
mov.u32 %r5797, %r5793;
@%p496 bra $L__BB0_427;
add.s32 %r3374, %r1049, -1;
clz.b32 %r3375, %r3374;
mov.u32 %r3376, 32;
sub.s32 %r5790, %r3376, %r3375;
shr.u32 %r3377, %r5789, 31;
add.s32 %r3378, %r3377, %r1049;
add.s32 %r5791, %r3378, -2;
mov.u32 %r5797, 1;
$L__BB0_427:
setp.ge.u32 %p497, %r1022, %r6;
@%p497 bra $L__BB0_430;
add.s32 %r3381, %r5772, %r1;
mul.wide.u32 %rd218, %r3381, 4;
add.s64 %rd219, %rd2, %rd218;
ld.global.u32 %r1055, [%rd219];
setp.eq.s32 %p498, %r1055, 0;
@%p498 bra $L__BB0_430;
and.b32 %r3382, %r1055, -2147483648;
abs.s32 %r3383, %r1055;
shl.b32 %r3384, %r3383, %r1004;
or.b32 %r5793, %r3384, %r3382;
$L__BB0_430:
shl.b32 %r3387, %r5793, 1;
shr.u32 %r3388, %r3387, %r45;
and.b32 %r1058, %r3388, -2;
setp.eq.s32 %p499, %r1058, 0;
mov.u32 %r5808, 0;
mov.u32 %r5794, %r5808;
mov.u32 %r5795, %r5808;
mov.u32 %r5812, %r5790;
@%p499 bra $L__BB0_432;
or.b32 %r5797, %r5797, 2;
add.s32 %r3389, %r1058, -1;
clz.b32 %r3390, %r3389;
mov.u32 %r3391, 32;
sub.s32 %r5794, %r3391, %r3390;
max.s32 %r5812, %r5790, %r5794;
shr.u32 %r3392, %r5793, 31;
add.s32 %r3393, %r3392, %r1058;
add.s32 %r5795, %r3393, -2;
$L__BB0_432:
add.s32 %r6102, %r5772, 1;
add.s32 %r3398, %r5768, 1;
setp.ge.u32 %p500, %r3398, %r5;
mov.u32 %r5809, %r5808;
mov.u32 %r5810, %r5808;
mov.u32 %r5811, %r5808;
@%p500 bra $L__BB0_443;
mul.wide.u32 %rd220, %r6102, 4;
add.s64 %rd221, %rd2, %rd220;
ld.global.u32 %r1068, [%rd221];
setp.eq.s32 %p501, %r1068, 0;
mov.u32 %r5809, 0;
mov.u32 %r5798, %r5809;
@%p501 bra $L__BB0_435;
and.b32 %r3400, %r1068, -2147483648;
abs.s32 %r3401, %r1068;
shl.b32 %r3402, %r3401, %r1004;
or.b32 %r5798, %r3402, %r3400;
$L__BB0_435:
shl.b32 %r3405, %r5798, 1;
shr.u32 %r3406, %r3405, %r45;
and.b32 %r1071, %r3406, -2;
setp.eq.s32 %p502, %r1071, 0;
mov.u32 %r5811, %r5809;
@%p502 bra $L__BB0_437;
or.b32 %r5797, %r5797, 4;
add.s32 %r3407, %r1071, -1;
clz.b32 %r3408, %r3407;
mov.u32 %r3409, 32;
sub.s32 %r5809, %r3409, %r3408;
max.s32 %r5812, %r5812, %r5809;
shr.u32 %r3410, %r5798, 31;
add.s32 %r3411, %r3410, %r1071;
add.s32 %r5811, %r3411, -2;
$L__BB0_437:
mov.u32 %r5808, 0;
mov.u32 %r5803, %r5808;
@%p497 bra $L__BB0_440;
add.s32 %r3414, %r6102, %r1;
mul.wide.u32 %rd222, %r3414, 4;
add.s64 %rd223, %rd2, %rd222;
ld.global.u32 %r1080, [%rd223];
setp.eq.s32 %p504, %r1080, 0;
@%p504 bra $L__BB0_440;
and.b32 %r3415, %r1080, -2147483648;
abs.s32 %r3416, %r1080;
shl.b32 %r3417, %r3416, %r1004;
or.b32 %r5803, %r3417, %r3415;
$L__BB0_440:
shl.b32 %r3420, %r5803, 1;
shr.u32 %r3421, %r3420, %r45;
and.b32 %r1083, %r3421, -2;
setp.eq.s32 %p505, %r1083, 0;
mov.u32 %r5810, %r5808;
@%p505 bra $L__BB0_442;
or.b32 %r5797, %r5797, 8;
add.s32 %r3422, %r1083, -1;
clz.b32 %r3423, %r3422;
mov.u32 %r3424, 32;
sub.s32 %r5808, %r3424, %r3423;
max.s32 %r5812, %r5812, %r5808;
shr.u32 %r3425, %r5803, 31;
add.s32 %r3426, %r3425, %r1083;
add.s32 %r5810, %r3426, -2;
$L__BB0_442:
add.s32 %r6102, %r5772, 2;
$L__BB0_443:
add.s32 %r3428, %r5797, -1;
and.b32 %r3429, %r3428, %r5797;
setp.ne.s32 %p506, %r3429, 0;
mov.u32 %r5815, 0;
setp.gt.s32 %p507, %r5770, 1;
and.pred %p508, %p507, %p506;
selp.b32 %r3430, %r5770, 1, %p508;
max.s32 %r1100, %r3430, %r5812;
sub.s32 %r1101, %r1100, %r3430;
setp.lt.s32 %p509, %r1101, 1;
@%p509 bra $L__BB0_445;
setp.eq.s32 %p510, %r5790, %r5812;
selp.u32 %r3431, 1, 0, %p510;
setp.eq.s32 %p511, %r5794, %r5812;
selp.u32 %r3432, -1, 0, %p511;
bfi.b32 %r3433, %r3432, %r3431, 1, 1;
setp.eq.s32 %p512, %r5809, %r5812;
selp.u16 %rs442, 1, 0, %p512;
mul.wide.u16 %r3434, %rs442, 4;
or.b32 %r3435, %r3433, %r3434;
setp.eq.s32 %p513, %r5808, %r5812;
selp.u16 %rs443, 1, 0, %p513;
mul.wide.u16 %r3436, %rs443, 8;
or.b32 %r5815, %r3435, %r3436;
$L__BB0_445:
shl.b32 %r3437, %r5797, 4;
shl.b32 %r3438, %r5773, 8;
or.b32 %r3439, %r3437, %r3438;
or.b32 %r3440, %r3439, %r5815;
mul.wide.u32 %rd224, %r3440, 2;
add.s64 %rd225, %rd11, %rd224;
ld.global.u16 %rs154, [%rd225];
shr.u16 %rs444, %rs154, 4;
and.b16 %rs155, %rs444, 7;
setp.eq.s16 %p514, %rs155, 0;
mov.u32 %r5827, %r5716;
@%p514 bra $L__BB0_452;
cvt.u32.u16 %r5816, %rs155;
shr.u16 %rs445, %rs154, 8;
cvt.u32.u16 %r5817, %rs445;
$L__BB0_447:
mov.u32 %r1106, %r5816;
setp.gt.u32 %p515, %r5719, 2879;
mov.u32 %r5827, 1;
@%p515 bra $L__BB0_452;
mov.u32 %r3442, 8;
sub.s32 %r3443, %r3442, %r5717;
sub.s32 %r3444, %r3443, %r5718;
min.u32 %r3445, %r3444, %r1106;
setp.eq.s32 %p516, %r3445, 32;
mov.u32 %r3446, -1;
shl.b32 %r3447, %r3446, %r3445;
not.b32 %r3448, %r3447;
selp.b32 %r3449, -1, %r3448, %p516;
and.b32 %r3450, %r3449, %r5817;
shl.b32 %r3451, %r3450, %r5718;
cvt.u16.u32 %rs446, %r3451;
or.b16 %rs688, %rs688, %rs446;
add.s32 %r5718, %r3445, %r5718;
sub.s32 %r5816, %r1106, %r3445;
shr.u32 %r5817, %r5817, %r3445;
setp.gt.u32 %p517, %r3444, %r1106;
@%p517 bra $L__BB0_451;
setp.ne.s32 %p518, %r5717, 0;
mov.u32 %r5717, 0;
and.b16 %rs447, %rs688, 255;
setp.ne.s16 %p519, %rs447, 127;
and.pred %p520, %p518, %p519;
@%p520 bra $L__BB0_451;
mov.u32 %r3454, 20548;
sub.s32 %r3455, %r3454, %r5719;
cvt.u64.u32 %rd226, %r3455;
add.s64 %rd227, %rd1, %rd226;
st.global.u8 [%rd227], %rs688;
add.s32 %r5719, %r5719, 1;
setp.gt.u16 %p521, %rs447, 143;
selp.u32 %r5717, 1, 0, %p521;
mov.u32 %r5718, 0;
mov.u16 %rs688, 0;
$L__BB0_451:
setp.ne.s32 %p522, %r5816, 0;
mov.u32 %r5827, %r5716;
@%p522 bra $L__BB0_447;
$L__BB0_452:
setp.ne.s32 %p523, %r5773, 0;
@%p523 bra $L__BB0_500;
setp.eq.s32 %p524, %r5797, 0;
add.s32 %r3456, %r5271, 17477;
cvt.u64.u32 %rd228, %r3456;
add.s64 %rd12, %rd1, %rd228;
@%p524 bra $L__BB0_492;
shl.b16 %rs705, %rs705, 1;
add.s32 %r5277, %r5277, -1;
setp.ne.s32 %p525, %r5277, 0;
mov.u32 %r5861, %r5480;
@%p525 bra $L__BB0_457;
setp.gt.u32 %p526, %r5271, 191;
mov.u32 %r5277, 0;
mov.u32 %r5861, 1;
@%p526 bra $L__BB0_457;
st.global.u8 [%rd12], %rs705;
add.s32 %r5271, %r5271, 1;
mov.u32 %r5277, 8;
mov.u16 %rs705, 0;
mov.u32 %r5861, %r5480;
$L__BB0_457:
setp.lt.u32 %p527, %r5482, 3;
mov.u32 %r5831, 0;
@%p527 bra $L__BB0_460;
setp.lt.u32 %p528, %r5482, 6;
mov.u32 %r5831, 1;
@%p528 bra $L__BB0_460;
setp.lt.u32 %p529, %r5482, 9;
setp.eq.s32 %p530, %r5482, 11;
selp.b32 %r3462, 4, 5, %p530;
setp.lt.u32 %p531, %r5482, 11;
selp.b32 %r3463, 3, %r3462, %p531;
selp.b32 %r5831, 2, %r3463, %p529;
$L__BB0_460:
setp.eq.s32 %p532, %r5831, 0;
@%p532 bra $L__BB0_488;
add.s32 %r1130, %r5831, -1;
and.b32 %r1131, %r5831, 3;
setp.eq.s32 %p533, %r1131, 0;
mov.u32 %r5841, %r5831;
mov.u32 %r5844, %r5861;
@%p533 bra $L__BB0_473;
mov.u32 %r3465, 1;
shl.b32 %r3466, %r3465, %r1130;
and.b32 %r3467, %r3466, %r5483;
setp.ne.s32 %p534, %r3467, 0;
selp.u32 %r3468, 1, 0, %p534;
cvt.u32.u16 %r3469, %rs705;
bfi.b32 %r3470, %r3469, %r3468, 1, 8;
cvt.u16.u32 %rs705, %r3470;
add.s32 %r5277, %r5277, -1;
setp.ne.s32 %p535, %r5277, 0;
mov.u32 %r5844, %r5861;
@%p535 bra $L__BB0_465;
setp.gt.u32 %p536, %r5271, 191;
mov.u32 %r5277, 0;
mov.u32 %r5844, %r3465;
@%p536 bra $L__BB0_465;
add.s32 %r3474, %r5271, 17477;
cvt.u64.u32 %rd229, %r3474;
add.s64 %rd230, %rd1, %rd229;
st.global.u8 [%rd230], %rs705;
add.s32 %r5271, %r5271, 1;
mov.u32 %r5277, 8;
mov.u16 %rs705, 0;
mov.u32 %r5844, %r5861;
$L__BB0_465:
setp.eq.s32 %p537, %r1131, 1;
mov.u32 %r5861, %r5844;
mov.u32 %r5841, %r1130;
@%p537 bra $L__BB0_473;
add.s32 %r5841, %r5831, -2;
mov.u32 %r3475, 1;
shl.b32 %r3476, %r3475, %r5841;
and.b32 %r3477, %r3476, %r5483;
setp.ne.s32 %p538, %r3477, 0;
selp.u32 %r3478, 1, 0, %p538;
cvt.u32.u16 %r3479, %rs705;
bfi.b32 %r3480, %r3479, %r3478, 1, 8;
cvt.u16.u32 %rs705, %r3480;
add.s32 %r5277, %r5277, -1;
setp.ne.s32 %p539, %r5277, 0;
mov.u32 %r5835, %r5844;
@%p539 bra $L__BB0_469;
setp.gt.u32 %p540, %r5271, 191;
mov.u32 %r5277, 0;
mov.u32 %r5835, %r3475;
@%p540 bra $L__BB0_469;
add.s32 %r3483, %r5271, 17477;
cvt.u64.u32 %rd231, %r3483;
add.s64 %rd232, %rd1, %rd231;
and.b16 %rs454, %rs705, 255;
st.global.u8 [%rd232], %rs705;
add.s32 %r5271, %r5271, 1;
setp.eq.s16 %p541, %rs454, 255;
selp.b32 %r5277, 7, 8, %p541;
mov.u16 %rs705, 0;
mov.u32 %r5835, %r5844;
$L__BB0_469:
setp.eq.s32 %p542, %r1131, 2;
mov.u32 %r5861, %r5835;
mov.u32 %r5844, %r5835;
@%p542 bra $L__BB0_473;
add.s32 %r5841, %r5831, -3;
mov.u32 %r3484, 1;
shl.b32 %r3485, %r3484, %r5841;
and.b32 %r3486, %r3485, %r5483;
setp.ne.s32 %p543, %r3486, 0;
selp.u32 %r3487, 1, 0, %p543;
cvt.u32.u16 %r3488, %rs705;
bfi.b32 %r3489, %r3488, %r3487, 1, 8;
cvt.u16.u32 %rs705, %r3489;
add.s32 %r5277, %r5277, -1;
setp.ne.s32 %p544, %r5277, 0;
mov.u32 %r5861, %r5835;
mov.u32 %r5844, %r5835;
@%p544 bra $L__BB0_473;
setp.gt.u32 %p545, %r5271, 191;
mov.u32 %r5277, 0;
mov.u32 %r5861, %r3484;
mov.u32 %r5844, %r3484;
@%p545 bra $L__BB0_473;
add.s32 %r3494, %r5271, 17477;
cvt.u64.u32 %rd233, %r3494;
add.s64 %rd234, %rd1, %rd233;
and.b16 %rs457, %rs705, 255;
st.global.u8 [%rd234], %rs705;
add.s32 %r5271, %r5271, 1;
setp.eq.s16 %p546, %rs457, 255;
selp.b32 %r5277, 7, 8, %p546;
mov.u16 %rs705, 0;
mov.u32 %r5861, %r5835;
mov.u32 %r5844, %r5835;
$L__BB0_473:
setp.lt.u32 %p547, %r1130, 3;
@%p547 bra $L__BB0_488;
mov.u32 %r5861, %r5844;
$L__BB0_475:
add.s32 %r3495, %r5841, -1;
mov.u32 %r3496, 1;
shl.b32 %r3497, %r3496, %r3495;
and.b32 %r3498, %r3497, %r5483;
setp.ne.s32 %p548, %r3498, 0;
selp.u32 %r3499, 1, 0, %p548;
cvt.u32.u16 %r3500, %rs705;
bfi.b32 %r5850, %r3500, %r3499, 1, 8;
add.s32 %r5851, %r5277, -1;
setp.ne.s32 %p549, %r5851, 0;
mov.u32 %r5849, %r5861;
@%p549 bra $L__BB0_478;
setp.gt.u32 %p550, %r5271, 191;
mov.u32 %r5851, 0;
mov.u32 %r5849, %r3496;
@%p550 bra $L__BB0_478;
cvt.u16.u32 %rs458, %r5850;
and.b16 %rs459, %rs458, 255;
add.s32 %r3504, %r5271, 17477;
cvt.u64.u32 %rd235, %r3504;
add.s64 %rd236, %rd1, %rd235;
st.global.u8 [%rd236], %rs458;
add.s32 %r5271, %r5271, 1;
setp.eq.s16 %p551, %rs459, 255;
selp.b32 %r5851, 7, 8, %p551;
mov.u32 %r5850, 0;
mov.u32 %r5849, %r5861;
$L__BB0_478:
add.s32 %r3505, %r5841, -2;
shl.b32 %r3507, %r3496, %r3505;
and.b32 %r3508, %r3507, %r5483;
setp.ne.s32 %p552, %r3508, 0;
and.b32 %r3509, %r5850, 127;
selp.u32 %r3510, 1, 0, %p552;
bfi.b32 %r5854, %r3509, %r3510, 1, 7;
add.s32 %r5855, %r5851, -1;
setp.ne.s32 %p553, %r5855, 0;
mov.u32 %r5853, %r5849;
@%p553 bra $L__BB0_481;
setp.gt.u32 %p554, %r5271, 191;
mov.u32 %r5855, 0;
mov.u32 %r5853, 1;
@%p554 bra $L__BB0_481;
cvt.u16.u32 %rs460, %r5854;
and.b16 %rs461, %rs460, 255;
add.s32 %r3514, %r5271, 17477;
cvt.u64.u32 %rd237, %r3514;
add.s64 %rd238, %rd1, %rd237;
st.global.u8 [%rd238], %rs460;
add.s32 %r5271, %r5271, 1;
setp.eq.s16 %p555, %rs461, 255;
selp.b32 %r5855, 7, 8, %p555;
mov.u32 %r5854, 0;
mov.u32 %r5853, %r5849;
$L__BB0_481:
add.s32 %r3515, %r5841, -3;
mov.u32 %r3516, 1;
shl.b32 %r3517, %r3516, %r3515;
and.b32 %r3518, %r3517, %r5483;
setp.ne.s32 %p556, %r3518, 0;
and.b32 %r3519, %r5854, 127;
selp.u32 %r3520, 1, 0, %p556;
bfi.b32 %r5858, %r3519, %r3520, 1, 7;
add.s32 %r5859, %r5855, -1;
setp.ne.s32 %p557, %r5859, 0;
mov.u32 %r5857, %r5853;
@%p557 bra $L__BB0_484;
setp.gt.u32 %p558, %r5271, 191;
mov.u32 %r5859, 0;
mov.u32 %r5857, %r3516;
@%p558 bra $L__BB0_484;
cvt.u16.u32 %rs462, %r5858;
and.b16 %rs463, %rs462, 255;
add.s32 %r3524, %r5271, 17477;
cvt.u64.u32 %rd239, %r3524;
add.s64 %rd240, %rd1, %rd239;
st.global.u8 [%rd240], %rs462;
add.s32 %r5271, %r5271, 1;
setp.eq.s16 %p559, %rs463, 255;
selp.b32 %r5859, 7, 8, %p559;
mov.u32 %r5858, 0;
mov.u32 %r5857, %r5853;
$L__BB0_484:
add.s32 %r5841, %r5841, -4;
shl.b32 %r3526, %r3516, %r5841;
and.b32 %r3527, %r3526, %r5483;
setp.ne.s32 %p560, %r3527, 0;
and.b32 %r3528, %r5858, 127;
selp.u32 %r3529, 1, 0, %p560;
bfi.b32 %r3530, %r3528, %r3529, 1, 15;
cvt.u16.u32 %rs705, %r3530;
add.s32 %r5277, %r5859, -1;
setp.ne.s32 %p561, %r5277, 0;
mov.u32 %r5861, %r5857;
@%p561 bra $L__BB0_487;
setp.gt.u32 %p562, %r5271, 191;
mov.u32 %r5277, 0;
mov.u32 %r5861, 1;
@%p562 bra $L__BB0_487;
add.s32 %r3533, %r5271, 17477;
cvt.u64.u32 %rd241, %r3533;
add.s64 %rd242, %rd1, %rd241;
and.b16 %rs465, %rs705, 255;
st.global.u8 [%rd242], %rs705;
add.s32 %r5271, %r5271, 1;
setp.eq.s16 %p563, %rs465, 255;
selp.b32 %r5277, 7, 8, %p563;
mov.u16 %rs705, 0;
mov.u32 %r5861, %r5857;
$L__BB0_487:
setp.ne.s32 %p564, %r5841, 0;
@%p564 bra $L__BB0_475;
$L__BB0_488:
add.s32 %r3535, %r5482, -1;
setp.eq.s32 %p565, %r5482, 0;
mov.u32 %r5483, 0;
selp.b32 %r5482, 0, %r3535, %p565;
setp.lt.u32 %p566, %r5482, 3;
mov.u32 %r5867, %r5483;
@%p566 bra $L__BB0_491;
setp.lt.u32 %p567, %r5482, 6;
mov.u32 %r5867, 1;
@%p567 bra $L__BB0_491;
setp.lt.u32 %p568, %r5482, 9;
setp.eq.s32 %p569, %r5482, 11;
selp.b32 %r3537, 4, 5, %p569;
setp.lt.u32 %p570, %r5482, 11;
selp.b32 %r3538, 3, %r3537, %p570;
selp.b32 %r5867, 2, %r3538, %p568;
$L__BB0_491:
mov.u32 %r3540, 1;
shl.b32 %r5481, %r3540, %r5867;
mov.u32 %r5480, %r5861;
bra.uni $L__BB0_500;
$L__BB0_492:
add.s32 %r5483, %r5483, 1;
setp.lt.u32 %p571, %r5483, %r5481;
@%p571 bra $L__BB0_500;
shl.b16 %rs466, %rs705, 1;
or.b16 %rs705, %rs466, 1;
add.s32 %r5277, %r5277, -1;
setp.ne.s32 %p572, %r5277, 0;
mov.u32 %r5868, %r5480;
@%p572 bra $L__BB0_496;
setp.gt.u32 %p573, %r5271, 191;
mov.u32 %r5277, 0;
mov.u32 %r5868, 1;
@%p573 bra $L__BB0_496;
and.b16 %rs468, %rs705, 255;
st.global.u8 [%rd12], %rs705;
add.s32 %r5271, %r5271, 1;
setp.eq.s16 %p574, %rs468, 255;
selp.b32 %r5277, 7, 8, %p574;
mov.u16 %rs705, 0;
mov.u32 %r5868, %r5480;
$L__BB0_496:
add.s32 %r3544, %r5482, 1;
min.u32 %r5482, %r3544, 12;
setp.lt.u32 %p575, %r5482, 3;
mov.u32 %r5483, 0;
mov.u32 %r5871, %r5483;
@%p575 bra $L__BB0_499;
setp.lt.u32 %p576, %r5482, 6;
mov.u32 %r5871, 1;
@%p576 bra $L__BB0_499;
setp.lt.u32 %p577, %r5482, 9;
setp.eq.s32 %p578, %r5482, 11;
selp.b32 %r3546, 4, 5, %p578;
setp.lt.u32 %p579, %r5482, 11;
selp.b32 %r3547, 3, %r3546, %p579;
selp.b32 %r5871, 2, %r3547, %p577;
$L__BB0_499:
mov.u32 %r3549, 1;
shl.b32 %r5481, %r3549, %r5871;
mov.u32 %r5480, %r5868;
$L__BB0_500:
and.b16 %rs469, %rs154, 15;
cvt.u32.u16 %r1214, %rs469;
and.b32 %r3550, %r5797, 1;
setp.eq.b32 %p580, %r3550, 1;
mov.pred %p581, 0;
xor.pred %p582, %p580, %p581;
not.pred %p583, %p582;
mov.u32 %r5888, %r5933;
@%p583 bra $L__BB0_507;
and.b32 %r3551, %r1214, 1;
sub.s32 %r5878, %r1100, %r3551;
setp.eq.s32 %p584, %r5878, 0;
mov.u32 %r5888, %r5933;
@%p584 bra $L__BB0_507;
mov.u32 %r3552, -1;
shl.b32 %r3553, %r3552, %r5878;
not.b32 %r3554, %r3553;
and.b32 %r5879, %r5791, %r3554;
$L__BB0_503:
setp.gt.u32 %p585, %r5907, 17476;
mov.u32 %r5888, 1;
@%p585 bra $L__BB0_507;
sub.s32 %r3556, %r5906, %r5905;
min.u32 %r3557, %r3556, %r5878;
setp.eq.s32 %p586, %r3557, 32;
mov.u32 %r3558, -1;
shl.b32 %r3559, %r3558, %r3557;
not.b32 %r3560, %r3559;
selp.b32 %r3561, -1, %r3560, %p586;
and.b32 %r3562, %r3561, %r5879;
shl.b32 %r3563, %r3562, %r5905;
or.b32 %r5904, %r3563, %r5904;
add.s32 %r5905, %r3557, %r5905;
shr.u32 %r5879, %r5879, %r3557;
sub.s32 %r5878, %r5878, %r3557;
setp.lt.u32 %p587, %r5905, %r5906;
@%p587 bra $L__BB0_506;
cvt.u64.u32 %rd243, %r5907;
add.s64 %rd244, %rd1, %rd243;
st.global.u8 [%rd244], %r5904;
add.s32 %r5907, %r5907, 1;
setp.eq.s32 %p588, %r5904, 255;
selp.b32 %r5906, 7, 8, %p588;
mov.u32 %r5904, 0;
mov.u32 %r5905, %r5904;
$L__BB0_506:
setp.ne.s32 %p589, %r5878, 0;
mov.u32 %r5888, %r5933;
@%p589 bra $L__BB0_503;
$L__BB0_507:
and.b32 %r1238, %r5797, 2;
setp.eq.s32 %p590, %r1238, 0;
mov.u32 %r5903, %r5888;
@%p590 bra $L__BB0_514;
shr.u32 %r3566, %r1214, 1;
and.b32 %r3567, %r3566, 1;
sub.s32 %r5893, %r1100, %r3567;
setp.eq.s32 %p591, %r5893, 0;
mov.u32 %r5903, %r5888;
@%p591 bra $L__BB0_514;
mov.u32 %r3568, -1;
shl.b32 %r3569, %r3568, %r5893;
not.b32 %r3570, %r3569;
and.b32 %r5894, %r5795, %r3570;
$L__BB0_510:
setp.gt.u32 %p592, %r5907, 17476;
mov.u32 %r5903, 1;
@%p592 bra $L__BB0_514;
sub.s32 %r3572, %r5906, %r5905;
min.u32 %r3573, %r3572, %r5893;
setp.eq.s32 %p593, %r3573, 32;
mov.u32 %r3574, -1;
shl.b32 %r3575, %r3574, %r3573;
not.b32 %r3576, %r3575;
selp.b32 %r3577, -1, %r3576, %p593;
and.b32 %r3578, %r3577, %r5894;
shl.b32 %r3579, %r3578, %r5905;
or.b32 %r5904, %r3579, %r5904;
add.s32 %r5905, %r3573, %r5905;
shr.u32 %r5894, %r5894, %r3573;
sub.s32 %r5893, %r5893, %r3573;
setp.lt.u32 %p594, %r5905, %r5906;
@%p594 bra $L__BB0_513;
cvt.u64.u32 %rd245, %r5907;
add.s64 %rd246, %rd1, %rd245;
st.global.u8 [%rd246], %r5904;
add.s32 %r5907, %r5907, 1;
setp.eq.s32 %p595, %r5904, 255;
selp.b32 %r5906, 7, 8, %p595;
mov.u32 %r5904, 0;
mov.u32 %r5905, %r5904;
$L__BB0_513:
setp.ne.s32 %p596, %r5893, 0;
mov.u32 %r5903, %r5888;
@%p596 bra $L__BB0_510;
$L__BB0_514:
and.b32 %r1262, %r5797, 4;
setp.eq.s32 %p597, %r1262, 0;
mov.u32 %r5918, %r5903;
@%p597 bra $L__BB0_521;
shr.u32 %r3582, %r1214, 2;
and.b32 %r3583, %r3582, 1;
sub.s32 %r5908, %r1100, %r3583;
setp.eq.s32 %p598, %r5908, 0;
mov.u32 %r5918, %r5903;
@%p598 bra $L__BB0_521;
mov.u32 %r3584, -1;
shl.b32 %r3585, %r3584, %r5908;
not.b32 %r3586, %r3585;
and.b32 %r5909, %r5811, %r3586;
$L__BB0_517:
setp.gt.u32 %p599, %r5907, 17476;
mov.u32 %r5918, 1;
@%p599 bra $L__BB0_521;
sub.s32 %r3588, %r5906, %r5905;
min.u32 %r3589, %r3588, %r5908;
setp.eq.s32 %p600, %r3589, 32;
mov.u32 %r3590, -1;
shl.b32 %r3591, %r3590, %r3589;
not.b32 %r3592, %r3591;
selp.b32 %r3593, -1, %r3592, %p600;
and.b32 %r3594, %r3593, %r5909;
shl.b32 %r3595, %r3594, %r5905;
or.b32 %r5904, %r3595, %r5904;
add.s32 %r5905, %r3589, %r5905;
shr.u32 %r5909, %r5909, %r3589;
sub.s32 %r5908, %r5908, %r3589;
setp.lt.u32 %p601, %r5905, %r5906;
@%p601 bra $L__BB0_520;
cvt.u64.u32 %rd247, %r5907;
add.s64 %rd248, %rd1, %rd247;
st.global.u8 [%rd248], %r5904;
add.s32 %r5907, %r5907, 1;
setp.eq.s32 %p602, %r5904, 255;
selp.b32 %r5906, 7, 8, %p602;
mov.u32 %r5904, 0;
mov.u32 %r5905, %r5904;
$L__BB0_520:
setp.ne.s32 %p603, %r5908, 0;
mov.u32 %r5918, %r5903;
@%p603 bra $L__BB0_517;
$L__BB0_521:
and.b32 %r1286, %r5797, 8;
setp.eq.s32 %p604, %r1286, 0;
mov.u32 %r5933, %r5918;
@%p604 bra $L__BB0_528;
shr.u32 %r3598, %r1214, 3;
sub.s32 %r5923, %r1100, %r3598;
setp.eq.s32 %p605, %r5923, 0;
mov.u32 %r5933, %r5918;
@%p605 bra $L__BB0_528;
mov.u32 %r3599, -1;
shl.b32 %r3600, %r3599, %r5923;
not.b32 %r3601, %r3600;
and.b32 %r5924, %r5810, %r3601;
$L__BB0_524:
setp.gt.u32 %p606, %r5907, 17476;
mov.u32 %r5933, 1;
@%p606 bra $L__BB0_528;
sub.s32 %r3603, %r5906, %r5905;
min.u32 %r3604, %r3603, %r5923;
setp.eq.s32 %p607, %r3604, 32;
mov.u32 %r3605, -1;
shl.b32 %r3606, %r3605, %r3604;
not.b32 %r3607, %r3606;
selp.b32 %r3608, -1, %r3607, %p607;
and.b32 %r3609, %r3608, %r5924;
shl.b32 %r3610, %r3609, %r5905;
or.b32 %r5904, %r3610, %r5904;
add.s32 %r5905, %r3604, %r5905;
shr.u32 %r5924, %r5924, %r3604;
sub.s32 %r5923, %r5923, %r3604;
setp.lt.u32 %p608, %r5905, %r5906;
@%p608 bra $L__BB0_527;
cvt.u64.u32 %rd249, %r5907;
add.s64 %rd250, %rd1, %rd249;
st.global.u8 [%rd250], %r5904;
add.s32 %r5907, %r5907, 1;
setp.eq.s32 %p609, %r5904, 255;
selp.b32 %r5906, 7, 8, %p609;
mov.u32 %r5904, 0;
mov.u32 %r5905, %r5904;
$L__BB0_527:
setp.ne.s32 %p610, %r5923, 0;
mov.u32 %r5933, %r5918;
@%p610 bra $L__BB0_524;
$L__BB0_528:
mov.u32 %r3615, _ZZ31 j2k_htj2k_encode_codeblockE13cleanup_e_val;
add.s32 %r1310, %r3615, %r5771;
ld.shared.u8 %rs470, [%r1310];
mov.u32 %r5773, 0;
cvt.u32.u16 %r3616, %rs470;
and.b32 %r3617, %r3616, 255;
and.b32 %r3618, %r5794, 255;
setp.lt.u32 %p611, %r3618, %r3617;
cvt.u16.u32 %rs471, %r5794;
selp.b16 %rs472, %rs470, %rs471, %p611;
st.shared.u8 [%r1310], %rs472;
ld.shared.u8 %rs176, [%r1310+2];
ld.shared.u8 %rs473, [%r1310+1];
setp.gt.u16 %p612, %rs473, %rs176;
add.s32 %r6103, %r5771, 1;
add.s32 %r3619, %r5771, 2;
selp.b32 %r3620, %r6103, %r3619, %p612;
add.s32 %r3621, %r3615, %r3620;
ld.shared.u8 %rs177, [%r3621];
cvt.u32.u16 %r3622, %rs177;
and.b32 %r3623, %r3622, 255;
add.s32 %r5770, %r3623, -1;
cvt.u16.u32 %rs178, %r5808;
cvt.u16.u32 %rs474, %r1238;
shr.u16 %rs475, %rs474, 1;
mov.u32 %r3624, _ZZ31 j2k_htj2k_encode_codeblockE14cleanup_cx_val;
add.s32 %r1313, %r3624, %r5769;
st.shared.u8 [%r1310+1], %r5808;
ld.shared.u8 %rs476, [%r1313];
or.b16 %rs477, %rs476, %rs475;
st.shared.u8 [%r1313], %rs477;
add.s32 %r5769, %r5769, 1;
ld.shared.u8 %rs179, [%r1313+1];
ld.shared.u8 %r1315, [%r1313+2];
shr.u32 %r1316, %r1286, 3;
st.shared.u8 [%r1313+1], %r1316;
add.s32 %r3625, %r5768, 2;
setp.ge.u32 %p613, %r3625, %r5;
mov.u32 %r6107, %r5773;
@%p613 bra $L__BB0_635;
mul.wide.u32 %rd251, %r6102, 4;
add.s64 %rd252, %rd2, %rd251;
ld.global.u32 %r1317, [%rd252];
setp.eq.s32 %p614, %r1317, 0;
mov.u32 %r5939, 0;
mov.u32 %r5938, %r5939;
@%p614 bra $L__BB0_531;
and.b32 %r3627, %r1317, -2147483648;
abs.s32 %r3628, %r1317;
shl.b32 %r3629, %r3628, %r1004;
or.b32 %r5938, %r3629, %r3627;
$L__BB0_531:
shl.b32 %r3633, %r5938, 1;
shr.u32 %r3634, %r3633, %r45;
and.b32 %r1320, %r3634, -2;
setp.eq.s32 %p615, %r1320, 0;
mov.u32 %r5940, %r5939;
mov.u32 %r5946, %r5939;
@%p615 bra $L__BB0_533;
add.s32 %r3636, %r1320, -1;
clz.b32 %r3637, %r3636;
mov.u32 %r3638, 32;
sub.s32 %r5939, %r3638, %r3637;
shr.u32 %r3639, %r5938, 31;
add.s32 %r3640, %r3639, %r1320;
add.s32 %r5940, %r3640, -2;
mov.u32 %r5946, 1;
$L__BB0_533:
mov.u32 %r5943, 0;
mov.u32 %r5942, %r5943;
@%p497 bra $L__BB0_536;
add.s32 %r3643, %r6102, %r1;
mul.wide.u32 %rd253, %r3643, 4;
add.s64 %rd254, %rd2, %rd253;
ld.global.u32 %r1326, [%rd254];
setp.eq.s32 %p617, %r1326, 0;
@%p617 bra $L__BB0_536;
and.b32 %r3644, %r1326, -2147483648;
abs.s32 %r3645, %r1326;
shl.b32 %r3646, %r3645, %r1004;
or.b32 %r5942, %r3646, %r3644;
$L__BB0_536:
shl.b32 %r3649, %r5942, 1;
shr.u32 %r3650, %r3649, %r45;
and.b32 %r1329, %r3650, -2;
setp.eq.s32 %p618, %r1329, 0;
mov.u32 %r5944, %r5943;
mov.u32 %r5961, %r5939;
@%p618 bra $L__BB0_538;
or.b32 %r5946, %r5946, 2;
add.s32 %r3651, %r1329, -1;
clz.b32 %r3652, %r3651;
mov.u32 %r3653, 32;
sub.s32 %r5943, %r3653, %r3652;
max.s32 %r5961, %r5939, %r5943;
shr.u32 %r3654, %r5942, 31;
add.s32 %r3655, %r3654, %r1329;
add.s32 %r5944, %r3655, -2;
$L__BB0_538:
add.s32 %r5963, %r6102, 1;
add.s32 %r3660, %r5768, 3;
setp.ge.u32 %p619, %r3660, %r5;
mov.u32 %r5964, 0;
mov.u32 %r5957, %r5964;
mov.u32 %r5958, %r5964;
mov.u32 %r5959, %r5964;
mov.u32 %r5960, %r5964;
@%p619 bra $L__BB0_549;
mul.wide.u32 %rd255, %r5963, 4;
add.s64 %rd256, %rd2, %rd255;
ld.global.u32 %r1339, [%rd256];
setp.eq.s32 %p620, %r1339, 0;
mov.u32 %r5958, 0;
mov.u32 %r5947, %r5958;
@%p620 bra $L__BB0_541;
and.b32 %r3662, %r1339, -2147483648;
abs.s32 %r3663, %r1339;
shl.b32 %r3664, %r3663, %r1004;
or.b32 %r5947, %r3664, %r3662;
$L__BB0_541:
shl.b32 %r3667, %r5947, 1;
shr.u32 %r3668, %r3667, %r45;
and.b32 %r1342, %r3668, -2;
setp.eq.s32 %p621, %r1342, 0;
mov.u32 %r5960, %r5958;
@%p621 bra $L__BB0_543;
or.b32 %r5946, %r5946, 4;
add.s32 %r3669, %r1342, -1;
clz.b32 %r3670, %r3669;
mov.u32 %r3671, 32;
sub.s32 %r5958, %r3671, %r3670;
max.s32 %r5961, %r5961, %r5958;
shr.u32 %r3672, %r5947, 31;
add.s32 %r3673, %r3672, %r1342;
add.s32 %r5960, %r3673, -2;
$L__BB0_543:
mov.u32 %r5957, 0;
mov.u32 %r5952, %r5957;
@%p497 bra $L__BB0_546;
add.s32 %r3676, %r5963, %r1;
mul.wide.u32 %rd257, %r3676, 4;
add.s64 %rd258, %rd2, %rd257;
ld.global.u32 %r1351, [%rd258];
setp.eq.s32 %p623, %r1351, 0;
@%p623 bra $L__BB0_546;
and.b32 %r3677, %r1351, -2147483648;
abs.s32 %r3678, %r1351;
shl.b32 %r3679, %r3678, %r1004;
or.b32 %r5952, %r3679, %r3677;
$L__BB0_546:
shl.b32 %r3682, %r5952, 1;
shr.u32 %r3683, %r3682, %r45;
and.b32 %r1354, %r3683, -2;
setp.eq.s32 %p624, %r1354, 0;
mov.u32 %r5959, %r5957;
@%p624 bra $L__BB0_548;
or.b32 %r5946, %r5946, 8;
add.s32 %r3684, %r1354, -1;
clz.b32 %r3685, %r3684;
mov.u32 %r3686, 32;
sub.s32 %r5957, %r3686, %r3685;
max.s32 %r5961, %r5961, %r5957;
shr.u32 %r3687, %r5952, 31;
add.s32 %r3688, %r3687, %r1354;
add.s32 %r5959, %r3688, -2;
$L__BB0_548:
add.s32 %r5963, %r6102, 2;
$L__BB0_549:
mov.u32 %r6102, %r5963;
shr.u32 %r3690, %r1286, 2;
shr.u32 %r3691, %r1262, 1;
or.b32 %r3692, %r3690, %r3691;
cvt.u32.u16 %r3693, %rs179;
and.b32 %r3694, %r3693, 255;
shl.b32 %r3695, %r1315, 2;
add.s32 %r3696, %r3695, %r3694;
or.b32 %r1371, %r3692, %r3696;
add.s32 %r3697, %r5946, -1;
and.b32 %r3698, %r3697, %r5946;
setp.ne.s32 %p625, %r3698, 0;
setp.gt.u16 %p626, %rs177, 2;
and.pred %p627, %p626, %p625;
selp.b32 %r3699, %r5770, 1, %p627;
max.s32 %r1372, %r3699, %r5961;
sub.s32 %r6107, %r1372, %r3699;
setp.lt.s32 %p628, %r6107, 1;
@%p628 bra $L__BB0_551;
setp.eq.s32 %p629, %r5939, %r5961;
selp.u32 %r3700, 1, 0, %p629;
setp.eq.s32 %p630, %r5943, %r5961;
selp.u32 %r3701, -1, 0, %p630;
bfi.b32 %r3702, %r3701, %r3700, 1, 1;
setp.eq.s32 %p631, %r5958, %r5961;
selp.u16 %rs479, 1, 0, %p631;
mul.wide.u16 %r3703, %rs479, 4;
or.b32 %r3704, %r3702, %r3703;
setp.eq.s32 %p632, %r5957, %r5961;
selp.u16 %rs480, 1, 0, %p632;
mul.wide.u16 %r3705, %rs480, 8;
or.b32 %r5964, %r3704, %r3705;
$L__BB0_551:
shl.b32 %r3706, %r5946, 4;
shl.b32 %r3707, %r1371, 8;
or.b32 %r3708, %r3706, %r3707;
or.b32 %r3709, %r3708, %r5964;
mul.wide.u32 %rd260, %r3709, 2;
add.s64 %rd261, %rd11, %rd260;
ld.global.u16 %rs180, [%rd261];
shr.u16 %rs481, %rs180, 4;
and.b16 %rs181, %rs481, 7;
setp.eq.s16 %p633, %rs181, 0;
mov.u32 %r5976, %r5827;
@%p633 bra $L__BB0_558;
cvt.u32.u16 %r5965, %rs181;
shr.u16 %rs482, %rs180, 8;
cvt.u32.u16 %r5966, %rs482;
$L__BB0_553:
mov.u32 %r1378, %r5965;
setp.gt.u32 %p634, %r5719, 2879;
mov.u32 %r5976, 1;
@%p634 bra $L__BB0_558;
mov.u32 %r3711, 8;
sub.s32 %r3712, %r3711, %r5717;
sub.s32 %r3713, %r3712, %r5718;
min.u32 %r3714, %r3713, %r1378;
setp.eq.s32 %p635, %r3714, 32;
mov.u32 %r3715, -1;
shl.b32 %r3716, %r3715, %r3714;
not.b32 %r3717, %r3716;
selp.b32 %r3718, -1, %r3717, %p635;
and.b32 %r3719, %r3718, %r5966;
shl.b32 %r3720, %r3719, %r5718;
cvt.u16.u32 %rs483, %r3720;
or.b16 %rs688, %rs688, %rs483;
add.s32 %r5718, %r3714, %r5718;
sub.s32 %r5965, %r1378, %r3714;
shr.u32 %r5966, %r5966, %r3714;
setp.gt.u32 %p636, %r3713, %r1378;
@%p636 bra $L__BB0_557;
setp.ne.s32 %p637, %r5717, 0;
mov.u32 %r5717, 0;
and.b16 %rs484, %rs688, 255;
setp.ne.s16 %p638, %rs484, 127;
and.pred %p639, %p637, %p638;
@%p639 bra $L__BB0_557;
mov.u32 %r3723, 20548;
sub.s32 %r3724, %r3723, %r5719;
cvt.u64.u32 %rd262, %r3724;
add.s64 %rd263, %rd1, %rd262;
st.global.u8 [%rd263], %rs688;
add.s32 %r5719, %r5719, 1;
setp.gt.u16 %p640, %rs484, 143;
selp.u32 %r5717, 1, 0, %p640;
mov.u32 %r5718, 0;
mov.u16 %rs688, 0;
$L__BB0_557:
setp.ne.s32 %p641, %r5965, 0;
mov.u32 %r5976, %r5827;
@%p641 bra $L__BB0_553;
$L__BB0_558:
setp.ne.s32 %p642, %r1371, 0;
@%p642 bra $L__BB0_606;
setp.eq.s32 %p643, %r5946, 0;
add.s32 %r3725, %r5271, 17477;
cvt.u64.u32 %rd264, %r3725;
add.s64 %rd13, %rd1, %rd264;
@%p643 bra $L__BB0_598;
shl.b16 %rs705, %rs705, 1;
add.s32 %r5277, %r5277, -1;
setp.ne.s32 %p644, %r5277, 0;
mov.u32 %r6010, %r5480;
@%p644 bra $L__BB0_563;
setp.gt.u32 %p645, %r5271, 191;
mov.u32 %r5277, 0;
mov.u32 %r6010, 1;
@%p645 bra $L__BB0_563;
st.global.u8 [%rd13], %rs705;
add.s32 %r5271, %r5271, 1;
mov.u32 %r5277, 8;
mov.u16 %rs705, 0;
mov.u32 %r6010, %r5480;
$L__BB0_563:
setp.lt.u32 %p646, %r5482, 3;
mov.u32 %r5980, 0;
@%p646 bra $L__BB0_566;
setp.lt.u32 %p647, %r5482, 6;
mov.u32 %r5980, 1;
@%p647 bra $L__BB0_566;
setp.lt.u32 %p648, %r5482, 9;
setp.eq.s32 %p649, %r5482, 11;
selp.b32 %r3731, 4, 5, %p649;
setp.lt.u32 %p650, %r5482, 11;
selp.b32 %r3732, 3, %r3731, %p650;
selp.b32 %r5980, 2, %r3732, %p648;
$L__BB0_566:
setp.eq.s32 %p651, %r5980, 0;
@%p651 bra $L__BB0_594;
add.s32 %r1402, %r5980, -1;
and.b32 %r1403, %r5980, 3;
setp.eq.s32 %p652, %r1403, 0;
mov.u32 %r5990, %r5980;
mov.u32 %r5993, %r6010;
@%p652 bra $L__BB0_579;
mov.u32 %r3734, 1;
shl.b32 %r3735, %r3734, %r1402;
and.b32 %r3736, %r3735, %r5483;
setp.ne.s32 %p653, %r3736, 0;
selp.u32 %r3737, 1, 0, %p653;
cvt.u32.u16 %r3738, %rs705;
bfi.b32 %r3739, %r3738, %r3737, 1, 8;
cvt.u16.u32 %rs705, %r3739;
add.s32 %r5277, %r5277, -1;
setp.ne.s32 %p654, %r5277, 0;
mov.u32 %r5993, %r6010;
@%p654 bra $L__BB0_571;
setp.gt.u32 %p655, %r5271, 191;
mov.u32 %r5277, 0;
mov.u32 %r5993, %r3734;
@%p655 bra $L__BB0_571;
add.s32 %r3743, %r5271, 17477;
cvt.u64.u32 %rd265, %r3743;
add.s64 %rd266, %rd1, %rd265;
st.global.u8 [%rd266], %rs705;
add.s32 %r5271, %r5271, 1;
mov.u32 %r5277, 8;
mov.u16 %rs705, 0;
mov.u32 %r5993, %r6010;
$L__BB0_571:
setp.eq.s32 %p656, %r1403, 1;
mov.u32 %r6010, %r5993;
mov.u32 %r5990, %r1402;
@%p656 bra $L__BB0_579;
add.s32 %r5990, %r5980, -2;
mov.u32 %r3744, 1;
shl.b32 %r3745, %r3744, %r5990;
and.b32 %r3746, %r3745, %r5483;
setp.ne.s32 %p657, %r3746, 0;
selp.u32 %r3747, 1, 0, %p657;
cvt.u32.u16 %r3748, %rs705;
bfi.b32 %r3749, %r3748, %r3747, 1, 8;
cvt.u16.u32 %rs705, %r3749;
add.s32 %r5277, %r5277, -1;
setp.ne.s32 %p658, %r5277, 0;
mov.u32 %r5984, %r5993;
@%p658 bra $L__BB0_575;
setp.gt.u32 %p659, %r5271, 191;
mov.u32 %r5277, 0;
mov.u32 %r5984, %r3744;
@%p659 bra $L__BB0_575;
add.s32 %r3752, %r5271, 17477;
cvt.u64.u32 %rd267, %r3752;
add.s64 %rd268, %rd1, %rd267;
and.b16 %rs491, %rs705, 255;
st.global.u8 [%rd268], %rs705;
add.s32 %r5271, %r5271, 1;
setp.eq.s16 %p660, %rs491, 255;
selp.b32 %r5277, 7, 8, %p660;
mov.u16 %rs705, 0;
mov.u32 %r5984, %r5993;
$L__BB0_575:
setp.eq.s32 %p661, %r1403, 2;
mov.u32 %r6010, %r5984;
mov.u32 %r5993, %r5984;
@%p661 bra $L__BB0_579;
add.s32 %r5990, %r5980, -3;
mov.u32 %r3753, 1;
shl.b32 %r3754, %r3753, %r5990;
and.b32 %r3755, %r3754, %r5483;
setp.ne.s32 %p662, %r3755, 0;
selp.u32 %r3756, 1, 0, %p662;
cvt.u32.u16 %r3757, %rs705;
bfi.b32 %r3758, %r3757, %r3756, 1, 8;
cvt.u16.u32 %rs705, %r3758;
add.s32 %r5277, %r5277, -1;
setp.ne.s32 %p663, %r5277, 0;
mov.u32 %r6010, %r5984;
mov.u32 %r5993, %r5984;
@%p663 bra $L__BB0_579;
setp.gt.u32 %p664, %r5271, 191;
mov.u32 %r5277, 0;
mov.u32 %r6010, %r3753;
mov.u32 %r5993, %r3753;
@%p664 bra $L__BB0_579;
add.s32 %r3763, %r5271, 17477;
cvt.u64.u32 %rd269, %r3763;
add.s64 %rd270, %rd1, %rd269;
and.b16 %rs494, %rs705, 255;
st.global.u8 [%rd270], %rs705;
add.s32 %r5271, %r5271, 1;
setp.eq.s16 %p665, %rs494, 255;
selp.b32 %r5277, 7, 8, %p665;
mov.u16 %rs705, 0;
mov.u32 %r6010, %r5984;
mov.u32 %r5993, %r5984;
$L__BB0_579:
setp.lt.u32 %p666, %r1402, 3;
@%p666 bra $L__BB0_594;
mov.u32 %r6010, %r5993;
$L__BB0_581:
add.s32 %r3764, %r5990, -1;
mov.u32 %r3765, 1;
shl.b32 %r3766, %r3765, %r3764;
and.b32 %r3767, %r3766, %r5483;
setp.ne.s32 %p667, %r3767, 0;
selp.u32 %r3768, 1, 0, %p667;
cvt.u32.u16 %r3769, %rs705;
bfi.b32 %r5999, %r3769, %r3768, 1, 8;
add.s32 %r6000, %r5277, -1;
setp.ne.s32 %p668, %r6000, 0;
mov.u32 %r5998, %r6010;
@%p668 bra $L__BB0_584;
setp.gt.u32 %p669, %r5271, 191;
mov.u32 %r6000, 0;
mov.u32 %r5998, %r3765;
@%p669 bra $L__BB0_584;
cvt.u16.u32 %rs495, %r5999;
and.b16 %rs496, %rs495, 255;
add.s32 %r3773, %r5271, 17477;
cvt.u64.u32 %rd271, %r3773;
add.s64 %rd272, %rd1, %rd271;
st.global.u8 [%rd272], %rs495;
add.s32 %r5271, %r5271, 1;
setp.eq.s16 %p670, %rs496, 255;
selp.b32 %r6000, 7, 8, %p670;
mov.u32 %r5999, 0;
mov.u32 %r5998, %r6010;
$L__BB0_584:
add.s32 %r3774, %r5990, -2;
shl.b32 %r3776, %r3765, %r3774;
and.b32 %r3777, %r3776, %r5483;
setp.ne.s32 %p671, %r3777, 0;
and.b32 %r3778, %r5999, 127;
selp.u32 %r3779, 1, 0, %p671;
bfi.b32 %r6003, %r3778, %r3779, 1, 7;
add.s32 %r6004, %r6000, -1;
setp.ne.s32 %p672, %r6004, 0;
mov.u32 %r6002, %r5998;
@%p672 bra $L__BB0_587;
setp.gt.u32 %p673, %r5271, 191;
mov.u32 %r6004, 0;
mov.u32 %r6002, 1;
@%p673 bra $L__BB0_587;
cvt.u16.u32 %rs497, %r6003;
and.b16 %rs498, %rs497, 255;
add.s32 %r3783, %r5271, 17477;
cvt.u64.u32 %rd273, %r3783;
add.s64 %rd274, %rd1, %rd273;
st.global.u8 [%rd274], %rs497;
add.s32 %r5271, %r5271, 1;
setp.eq.s16 %p674, %rs498, 255;
selp.b32 %r6004, 7, 8, %p674;
mov.u32 %r6003, 0;
mov.u32 %r6002, %r5998;
$L__BB0_587:
add.s32 %r3784, %r5990, -3;
mov.u32 %r3785, 1;
shl.b32 %r3786, %r3785, %r3784;
and.b32 %r3787, %r3786, %r5483;
setp.ne.s32 %p675, %r3787, 0;
and.b32 %r3788, %r6003, 127;
selp.u32 %r3789, 1, 0, %p675;
bfi.b32 %r6007, %r3788, %r3789, 1, 7;
add.s32 %r6008, %r6004, -1;
setp.ne.s32 %p676, %r6008, 0;
mov.u32 %r6006, %r6002;
@%p676 bra $L__BB0_590;
setp.gt.u32 %p677, %r5271, 191;
mov.u32 %r6008, 0;
mov.u32 %r6006, %r3785;
@%p677 bra $L__BB0_590;
cvt.u16.u32 %rs499, %r6007;
and.b16 %rs500, %rs499, 255;
add.s32 %r3793, %r5271, 17477;
cvt.u64.u32 %rd275, %r3793;
add.s64 %rd276, %rd1, %rd275;
st.global.u8 [%rd276], %rs499;
add.s32 %r5271, %r5271, 1;
setp.eq.s16 %p678, %rs500, 255;
selp.b32 %r6008, 7, 8, %p678;
mov.u32 %r6007, 0;
mov.u32 %r6006, %r6002;
$L__BB0_590:
add.s32 %r5990, %r5990, -4;
shl.b32 %r3795, %r3785, %r5990;
and.b32 %r3796, %r3795, %r5483;
setp.ne.s32 %p679, %r3796, 0;
and.b32 %r3797, %r6007, 127;
selp.u32 %r3798, 1, 0, %p679;
bfi.b32 %r3799, %r3797, %r3798, 1, 15;
cvt.u16.u32 %rs705, %r3799;
add.s32 %r5277, %r6008, -1;
setp.ne.s32 %p680, %r5277, 0;
mov.u32 %r6010, %r6006;
@%p680 bra $L__BB0_593;
setp.gt.u32 %p681, %r5271, 191;
mov.u32 %r5277, 0;
mov.u32 %r6010, 1;
@%p681 bra $L__BB0_593;
add.s32 %r3802, %r5271, 17477;
cvt.u64.u32 %rd277, %r3802;
add.s64 %rd278, %rd1, %rd277;
and.b16 %rs502, %rs705, 255;
st.global.u8 [%rd278], %rs705;
add.s32 %r5271, %r5271, 1;
setp.eq.s16 %p682, %rs502, 255;
selp.b32 %r5277, 7, 8, %p682;
mov.u16 %rs705, 0;
mov.u32 %r6010, %r6006;
$L__BB0_593:
setp.ne.s32 %p683, %r5990, 0;
@%p683 bra $L__BB0_581;
$L__BB0_594:
add.s32 %r3804, %r5482, -1;
setp.eq.s32 %p684, %r5482, 0;
mov.u32 %r5483, 0;
selp.b32 %r5482, 0, %r3804, %p684;
setp.lt.u32 %p685, %r5482, 3;
mov.u32 %r6016, %r5483;
@%p685 bra $L__BB0_597;
setp.lt.u32 %p686, %r5482, 6;
mov.u32 %r6016, 1;
@%p686 bra $L__BB0_597;
setp.lt.u32 %p687, %r5482, 9;
setp.eq.s32 %p688, %r5482, 11;
selp.b32 %r3806, 4, 5, %p688;
setp.lt.u32 %p689, %r5482, 11;
selp.b32 %r3807, 3, %r3806, %p689;
selp.b32 %r6016, 2, %r3807, %p687;
$L__BB0_597:
mov.u32 %r3809, 1;
shl.b32 %r5481, %r3809, %r6016;
mov.u32 %r5480, %r6010;
bra.uni $L__BB0_606;
$L__BB0_598:
add.s32 %r5483, %r5483, 1;
setp.lt.u32 %p690, %r5483, %r5481;
@%p690 bra $L__BB0_606;
shl.b16 %rs503, %rs705, 1;
or.b16 %rs705, %rs503, 1;
add.s32 %r5277, %r5277, -1;
setp.ne.s32 %p691, %r5277, 0;
mov.u32 %r6017, %r5480;
@%p691 bra $L__BB0_602;
setp.gt.u32 %p692, %r5271, 191;
mov.u32 %r5277, 0;
mov.u32 %r6017, 1;
@%p692 bra $L__BB0_602;
and.b16 %rs505, %rs705, 255;
st.global.u8 [%rd13], %rs705;
add.s32 %r5271, %r5271, 1;
setp.eq.s16 %p693, %rs505, 255;
selp.b32 %r5277, 7, 8, %p693;
mov.u16 %rs705, 0;
mov.u32 %r6017, %r5480;
$L__BB0_602:
add.s32 %r3813, %r5482, 1;
min.u32 %r5482, %r3813, 12;
setp.lt.u32 %p694, %r5482, 3;
mov.u32 %r5483, 0;
mov.u32 %r6020, %r5483;
@%p694 bra $L__BB0_605;
setp.lt.u32 %p695, %r5482, 6;
mov.u32 %r6020, 1;
@%p695 bra $L__BB0_605;
setp.lt.u32 %p696, %r5482, 9;
setp.eq.s32 %p697, %r5482, 11;
selp.b32 %r3815, 4, 5, %p697;
setp.lt.u32 %p698, %r5482, 11;
selp.b32 %r3816, 3, %r3815, %p698;
selp.b32 %r6020, 2, %r3816, %p696;
$L__BB0_605:
mov.u32 %r3818, 1;
shl.b32 %r5481, %r3818, %r6020;
mov.u32 %r5480, %r6017;
$L__BB0_606:
and.b16 %rs506, %rs180, 15;
cvt.u32.u16 %r1486, %rs506;
and.b32 %r3819, %r5946, 1;
setp.eq.b32 %p699, %r3819, 1;
mov.pred %p700, 0;
xor.pred %p701, %p699, %p700;
not.pred %p702, %p701;
mov.u32 %r6037, %r5933;
@%p702 bra $L__BB0_613;
and.b32 %r3820, %r1486, 1;
sub.s32 %r6027, %r1372, %r3820;
setp.eq.s32 %p703, %r6027, 0;
mov.u32 %r6037, %r5933;
@%p703 bra $L__BB0_613;
mov.u32 %r3821, -1;
shl.b32 %r3822, %r3821, %r6027;
not.b32 %r3823, %r3822;
and.b32 %r6028, %r5940, %r3823;
$L__BB0_609:
setp.gt.u32 %p704, %r5907, 17476;
mov.u32 %r6037, 1;
@%p704 bra $L__BB0_613;
sub.s32 %r3825, %r5906, %r5905;
min.u32 %r3826, %r3825, %r6027;
setp.eq.s32 %p705, %r3826, 32;
mov.u32 %r3827, -1;
shl.b32 %r3828, %r3827, %r3826;
not.b32 %r3829, %r3828;
selp.b32 %r3830, -1, %r3829, %p705;
and.b32 %r3831, %r3830, %r6028;
shl.b32 %r3832, %r3831, %r5905;
or.b32 %r5904, %r3832, %r5904;
add.s32 %r5905, %r3826, %r5905;
shr.u32 %r6028, %r6028, %r3826;
sub.s32 %r6027, %r6027, %r3826;
setp.lt.u32 %p706, %r5905, %r5906;
@%p706 bra $L__BB0_612;
cvt.u64.u32 %rd279, %r5907;
add.s64 %rd280, %rd1, %rd279;
st.global.u8 [%rd280], %r5904;
add.s32 %r5907, %r5907, 1;
setp.eq.s32 %p707, %r5904, 255;
selp.b32 %r5906, 7, 8, %p707;
mov.u32 %r5904, 0;
mov.u32 %r5905, %r5904;
$L__BB0_612:
setp.ne.s32 %p708, %r6027, 0;
mov.u32 %r6037, %r5933;
@%p708 bra $L__BB0_609;
$L__BB0_613:
and.b32 %r1510, %r5946, 2;
setp.eq.s32 %p709, %r1510, 0;
mov.u32 %r6052, %r6037;
@%p709 bra $L__BB0_620;
shr.u32 %r3835, %r1486, 1;
and.b32 %r3836, %r3835, 1;
sub.s32 %r6042, %r1372, %r3836;
setp.eq.s32 %p710, %r6042, 0;
mov.u32 %r6052, %r6037;
@%p710 bra $L__BB0_620;
mov.u32 %r3837, -1;
shl.b32 %r3838, %r3837, %r6042;
not.b32 %r3839, %r3838;
and.b32 %r6043, %r5944, %r3839;
$L__BB0_616:
setp.gt.u32 %p711, %r5907, 17476;
mov.u32 %r6052, 1;
@%p711 bra $L__BB0_620;
sub.s32 %r3841, %r5906, %r5905;
min.u32 %r3842, %r3841, %r6042;
setp.eq.s32 %p712, %r3842, 32;
mov.u32 %r3843, -1;
shl.b32 %r3844, %r3843, %r3842;
not.b32 %r3845, %r3844;
selp.b32 %r3846, -1, %r3845, %p712;
and.b32 %r3847, %r3846, %r6043;
shl.b32 %r3848, %r3847, %r5905;
or.b32 %r5904, %r3848, %r5904;
add.s32 %r5905, %r3842, %r5905;
shr.u32 %r6043, %r6043, %r3842;
sub.s32 %r6042, %r6042, %r3842;
setp.lt.u32 %p713, %r5905, %r5906;
@%p713 bra $L__BB0_619;
cvt.u64.u32 %rd281, %r5907;
add.s64 %rd282, %rd1, %rd281;
st.global.u8 [%rd282], %r5904;
add.s32 %r5907, %r5907, 1;
setp.eq.s32 %p714, %r5904, 255;
selp.b32 %r5906, 7, 8, %p714;
mov.u32 %r5904, 0;
mov.u32 %r5905, %r5904;
$L__BB0_619:
setp.ne.s32 %p715, %r6042, 0;
mov.u32 %r6052, %r6037;
@%p715 bra $L__BB0_616;
$L__BB0_620:
and.b32 %r1534, %r5946, 4;
setp.eq.s32 %p716, %r1534, 0;
mov.u32 %r6067, %r6052;
@%p716 bra $L__BB0_627;
shr.u32 %r3851, %r1486, 2;
and.b32 %r3852, %r3851, 1;
sub.s32 %r6057, %r1372, %r3852;
setp.eq.s32 %p717, %r6057, 0;
mov.u32 %r6067, %r6052;
@%p717 bra $L__BB0_627;
mov.u32 %r3853, -1;
shl.b32 %r3854, %r3853, %r6057;
not.b32 %r3855, %r3854;
and.b32 %r6058, %r5960, %r3855;
$L__BB0_623:
setp.gt.u32 %p718, %r5907, 17476;
mov.u32 %r6067, 1;
@%p718 bra $L__BB0_627;
sub.s32 %r3857, %r5906, %r5905;
min.u32 %r3858, %r3857, %r6057;
setp.eq.s32 %p719, %r3858, 32;
mov.u32 %r3859, -1;
shl.b32 %r3860, %r3859, %r3858;
not.b32 %r3861, %r3860;
selp.b32 %r3862, -1, %r3861, %p719;
and.b32 %r3863, %r3862, %r6058;
shl.b32 %r3864, %r3863, %r5905;
or.b32 %r5904, %r3864, %r5904;
add.s32 %r5905, %r3858, %r5905;
shr.u32 %r6058, %r6058, %r3858;
sub.s32 %r6057, %r6057, %r3858;
setp.lt.u32 %p720, %r5905, %r5906;
@%p720 bra $L__BB0_626;
cvt.u64.u32 %rd283, %r5907;
add.s64 %rd284, %rd1, %rd283;
st.global.u8 [%rd284], %r5904;
add.s32 %r5907, %r5907, 1;
setp.eq.s32 %p721, %r5904, 255;
selp.b32 %r5906, 7, 8, %p721;
mov.u32 %r5904, 0;
mov.u32 %r5905, %r5904;
$L__BB0_626:
setp.ne.s32 %p722, %r6057, 0;
mov.u32 %r6067, %r6052;
@%p722 bra $L__BB0_623;
$L__BB0_627:
and.b32 %r1558, %r5946, 8;
setp.eq.s32 %p723, %r1558, 0;
mov.u32 %r5933, %r6067;
@%p723 bra $L__BB0_634;
shr.u32 %r3867, %r1486, 3;
sub.s32 %r6072, %r1372, %r3867;
setp.eq.s32 %p724, %r6072, 0;
mov.u32 %r5933, %r6067;
@%p724 bra $L__BB0_634;
mov.u32 %r3868, -1;
shl.b32 %r3869, %r3868, %r6072;
not.b32 %r3870, %r3869;
and.b32 %r6073, %r5959, %r3870;
$L__BB0_630:
setp.gt.u32 %p725, %r5907, 17476;
mov.u32 %r5933, 1;
@%p725 bra $L__BB0_634;
sub.s32 %r3872, %r5906, %r5905;
min.u32 %r3873, %r3872, %r6072;
setp.eq.s32 %p726, %r3873, 32;
mov.u32 %r3874, -1;
shl.b32 %r3875, %r3874, %r3873;
not.b32 %r3876, %r3875;
selp.b32 %r3877, -1, %r3876, %p726;
and.b32 %r3878, %r3877, %r6073;
shl.b32 %r3879, %r3878, %r5905;
or.b32 %r5904, %r3879, %r5904;
add.s32 %r5905, %r3873, %r5905;
shr.u32 %r6073, %r6073, %r3873;
sub.s32 %r6072, %r6072, %r3873;
setp.lt.u32 %p727, %r5905, %r5906;
@%p727 bra $L__BB0_633;
cvt.u64.u32 %rd285, %r5907;
add.s64 %rd286, %rd1, %rd285;
st.global.u8 [%rd286], %r5904;
add.s32 %r5907, %r5907, 1;
setp.eq.s32 %p728, %r5904, 255;
selp.b32 %r5906, 7, 8, %p728;
mov.u32 %r5904, 0;
mov.u32 %r5905, %r5904;
$L__BB0_633:
setp.ne.s32 %p729, %r6072, 0;
mov.u32 %r5933, %r6067;
@%p729 bra $L__BB0_630;
$L__BB0_634:
and.b32 %r3882, %r5943, 255;
and.b32 %r3883, %r5808, 255;
setp.lt.u32 %p730, %r3882, %r3883;
cvt.u16.u32 %rs507, %r5943;
selp.b16 %rs508, %rs178, %rs507, %p730;
st.shared.u8 [%r1310+1], %rs508;
ld.shared.u8 %rs509, [%r1310+3];
setp.gt.u16 %p731, %rs176, %rs509;
add.s32 %r6103, %r6103, 1;
add.s32 %r3884, %r5771, 3;
selp.b32 %r3885, %r6103, %r3884, %p731;
add.s32 %r3887, %r3615, %r3885;
ld.shared.u8 %r3888, [%r3887];
add.s32 %r5770, %r3888, -1;
shr.u32 %r3889, %r1510, 1;
or.b32 %r3890, %r1316, %r3889;
st.shared.u8 [%r1310+2], %r5957;
st.shared.u8 [%r1313+1], %r3890;
ld.shared.u8 %rs510, [%r1313+3];
mul.wide.u16 %r3891, %rs510, 4;
add.s32 %r3892, %r3891, %r1315;
shr.u32 %r3893, %r1558, 3;
st.shared.u8 [%r1313+2], %r3893;
shr.u32 %r3894, %r1558, 2;
shr.u32 %r3895, %r1534, 1;
or.b32 %r3896, %r3894, %r3895;
or.b32 %r5773, %r3896, %r3892;
add.s32 %r5769, %r5769, 1;
mov.u32 %r5827, %r5976;
$L__BB0_635:
mov.u32 %r5771, %r6103;
mov.u32 %r5772, %r6102;
max.s32 %r3897, %r6107, 0;
mul.lo.s32 %r3898, %r1101, 6;
setp.gt.s32 %p732, %r1101, 0;
selp.b32 %r3899, %r3898, 0, %p732;
cvt.u64.u32 %rd287, %r3899;
add.s64 %rd14, %rd10, %rd287;
ld.global.u8 %rs204, [%rd14+1];
add.s32 %r3900, %r3899, 2;
cvt.u64.u32 %rd288, %r3900;
add.s64 %rd289, %rd10, %rd288;
ld.global.u8 %rs205, [%rd289];
ld.global.u8 %rs206, [%rd289+1];
mul.lo.s32 %r3901, %r3897, 6;
cvt.u64.u32 %rd290, %r3901;
add.s64 %rd291, %rd10, %rd290;
ld.global.u8 %rs207, [%rd291];
ld.global.u8 %rs208, [%rd291+1];
add.s32 %r3902, %r3901, 2;
cvt.u64.u32 %rd292, %r3902;
add.s64 %rd293, %rd10, %rd292;
ld.global.u8 %rs209, [%rd293];
ld.global.u8 %rs210, [%rd293+1];
setp.eq.s16 %p733, %rs204, 0;
mov.u32 %r6119, %r5827;
@%p733 bra $L__BB0_642;
ld.global.u8 %r6109, [%rd14];
cvt.u32.u16 %r6108, %rs204;
$L__BB0_637:
mov.u32 %r1609, %r6108;
setp.gt.u32 %p734, %r5719, 2879;
mov.u32 %r6119, 1;
@%p734 bra $L__BB0_642;
mov.u32 %r3904, 8;
sub.s32 %r3905, %r3904, %r5717;
sub.s32 %r3906, %r3905, %r5718;
min.u32 %r3907, %r3906, %r1609;
setp.eq.s32 %p735, %r3907, 32;
mov.u32 %r3908, -1;
shl.b32 %r3909, %r3908, %r3907;
not.b32 %r3910, %r3909;
selp.b32 %r3911, -1, %r3910, %p735;
and.b32 %r3912, %r3911, %r6109;
shl.b32 %r3913, %r3912, %r5718;
cvt.u16.u32 %rs511, %r3913;
or.b16 %rs688, %rs688, %rs511;
add.s32 %r5718, %r3907, %r5718;
sub.s32 %r6108, %r1609, %r3907;
shr.u32 %r6109, %r6109, %r3907;
setp.gt.u32 %p736, %r3906, %r1609;
@%p736 bra $L__BB0_641;
setp.ne.s32 %p737, %r5717, 0;
mov.u32 %r5717, 0;
and.b16 %rs512, %rs688, 255;
setp.ne.s16 %p738, %rs512, 127;
and.pred %p739, %p737, %p738;
@%p739 bra $L__BB0_641;
mov.u32 %r3916, 20548;
sub.s32 %r3917, %r3916, %r5719;
cvt.u64.u32 %rd294, %r3917;
add.s64 %rd295, %rd1, %rd294;
st.global.u8 [%rd295], %rs688;
add.s32 %r5719, %r5719, 1;
setp.gt.u16 %p740, %rs512, 143;
selp.u32 %r5717, 1, 0, %p740;
mov.u32 %r5718, 0;
mov.u16 %rs688, 0;
$L__BB0_641:
setp.ne.s32 %p741, %r6108, 0;
mov.u32 %r6119, %r5827;
@%p741 bra $L__BB0_637;
$L__BB0_642:
setp.eq.s16 %p742, %rs208, 0;
mov.u32 %r6131, %r6119;
@%p742 bra $L__BB0_649;
cvt.u32.u16 %r3918, %rs207;
and.b32 %r6121, %r3918, 255;
cvt.u32.u16 %r3919, %rs208;
and.b32 %r6120, %r3919, 255;
$L__BB0_644:
mov.u32 %r1628, %r6120;
setp.gt.u32 %p743, %r5719, 2879;
mov.u32 %r6131, 1;
@%p743 bra $L__BB0_649;
mov.u32 %r3921, 8;
sub.s32 %r3922, %r3921, %r5717;
sub.s32 %r3923, %r3922, %r5718;
min.u32 %r3924, %r3923, %r1628;
setp.eq.s32 %p744, %r3924, 32;
mov.u32 %r3925, -1;
shl.b32 %r3926, %r3925, %r3924;
not.b32 %r3927, %r3926;
selp.b32 %r3928, -1, %r3927, %p744;
and.b32 %r3929, %r3928, %r6121;
shl.b32 %r3930, %r3929, %r5718;
cvt.u16.u32 %rs516, %r3930;
or.b16 %rs688, %rs688, %rs516;
add.s32 %r5718, %r3924, %r5718;
sub.s32 %r6120, %r1628, %r3924;
shr.u32 %r6121, %r6121, %r3924;
setp.gt.u32 %p745, %r3923, %r1628;
@%p745 bra $L__BB0_648;
setp.ne.s32 %p746, %r5717, 0;
mov.u32 %r5717, 0;
and.b16 %rs517, %rs688, 255;
setp.ne.s16 %p747, %rs517, 127;
and.pred %p748, %p746, %p747;
@%p748 bra $L__BB0_648;
mov.u32 %r3933, 20548;
sub.s32 %r3934, %r3933, %r5719;
cvt.u64.u32 %rd296, %r3934;
add.s64 %rd297, %rd1, %rd296;
st.global.u8 [%rd297], %rs688;
add.s32 %r5719, %r5719, 1;
setp.gt.u16 %p749, %rs517, 143;
selp.u32 %r5717, 1, 0, %p749;
mov.u32 %r5718, 0;
mov.u16 %rs688, 0;
$L__BB0_648:
setp.ne.s32 %p750, %r6120, 0;
mov.u32 %r6131, %r6119;
@%p750 bra $L__BB0_644;
$L__BB0_649:
setp.eq.s16 %p751, %rs206, 0;
mov.u32 %r6143, %r6131;
@%p751 bra $L__BB0_656;
cvt.u32.u16 %r3935, %rs206;
and.b32 %r6132, %r3935, 255;
cvt.u32.u16 %r3936, %rs205;
and.b32 %r6133, %r3936, 255;
$L__BB0_651:
mov.u32 %r1647, %r6132;
setp.gt.u32 %p752, %r5719, 2879;
mov.u32 %r6143, 1;
@%p752 bra $L__BB0_656;
mov.u32 %r3938, 8;
sub.s32 %r3939, %r3938, %r5717;
sub.s32 %r3940, %r3939, %r5718;
min.u32 %r3941, %r3940, %r1647;
setp.eq.s32 %p753, %r3941, 32;
mov.u32 %r3942, -1;
shl.b32 %r3943, %r3942, %r3941;
not.b32 %r3944, %r3943;
selp.b32 %r3945, -1, %r3944, %p753;
and.b32 %r3946, %r3945, %r6133;
shl.b32 %r3947, %r3946, %r5718;
cvt.u16.u32 %rs521, %r3947;
or.b16 %rs688, %rs688, %rs521;
add.s32 %r5718, %r3941, %r5718;
sub.s32 %r6132, %r1647, %r3941;
shr.u32 %r6133, %r6133, %r3941;
setp.gt.u32 %p754, %r3940, %r1647;
@%p754 bra $L__BB0_655;
setp.ne.s32 %p755, %r5717, 0;
mov.u32 %r5717, 0;
and.b16 %rs522, %rs688, 255;
setp.ne.s16 %p756, %rs522, 127;
and.pred %p757, %p755, %p756;
@%p757 bra $L__BB0_655;
mov.u32 %r3950, 20548;
sub.s32 %r3951, %r3950, %r5719;
cvt.u64.u32 %rd298, %r3951;
add.s64 %rd299, %rd1, %rd298;
st.global.u8 [%rd299], %rs688;
add.s32 %r5719, %r5719, 1;
setp.gt.u16 %p758, %rs522, 143;
selp.u32 %r5717, 1, 0, %p758;
mov.u32 %r5718, 0;
mov.u16 %rs688, 0;
$L__BB0_655:
setp.ne.s32 %p759, %r6132, 0;
mov.u32 %r6143, %r6131;
@%p759 bra $L__BB0_651;
$L__BB0_656:
setp.eq.s16 %p760, %rs210, 0;
mov.u32 %r5716, %r6143;
@%p760 bra $L__BB0_663;
cvt.u32.u16 %r3952, %rs209;
and.b32 %r6145, %r3952, 255;
cvt.u32.u16 %r3953, %rs210;
and.b32 %r6144, %r3953, 255;
$L__BB0_658:
mov.u32 %r1666, %r6144;
setp.gt.u32 %p761, %r5719, 2879;
mov.u32 %r5716, 1;
@%p761 bra $L__BB0_663;
mov.u32 %r3955, 8;
sub.s32 %r3956, %r3955, %r5717;
sub.s32 %r3957, %r3956, %r5718;
min.u32 %r3958, %r3957, %r1666;
setp.eq.s32 %p762, %r3958, 32;
mov.u32 %r3959, -1;
shl.b32 %r3960, %r3959, %r3958;
not.b32 %r3961, %r3960;
selp.b32 %r3962, -1, %r3961, %p762;
and.b32 %r3963, %r3962, %r6145;
shl.b32 %r3964, %r3963, %r5718;
cvt.u16.u32 %rs526, %r3964;
or.b16 %rs688, %rs688, %rs526;
add.s32 %r5718, %r3958, %r5718;
sub.s32 %r6144, %r1666, %r3958;
shr.u32 %r6145, %r6145, %r3958;
setp.gt.u32 %p763, %r3957, %r1666;
@%p763 bra $L__BB0_662;
setp.ne.s32 %p764, %r5717, 0;
mov.u32 %r5717, 0;
and.b16 %rs527, %rs688, 255;
setp.ne.s16 %p765, %rs527, 127;
and.pred %p766, %p764, %p765;
@%p766 bra $L__BB0_662;
mov.u32 %r3967, 20548;
sub.s32 %r3968, %r3967, %r5719;
cvt.u64.u32 %rd300, %r3968;
add.s64 %rd301, %rd1, %rd300;
st.global.u8 [%rd301], %rs688;
add.s32 %r5719, %r5719, 1;
setp.gt.u16 %p767, %rs527, 143;
selp.u32 %r5717, 1, 0, %p767;
mov.u32 %r5718, 0;
mov.u16 %rs688, 0;
$L__BB0_662:
setp.ne.s32 %p768, %r6144, 0;
mov.u32 %r5716, %r6143;
@%p768 bra $L__BB0_658;
$L__BB0_663:
add.s32 %r5768, %r5768, 4;
setp.lt.u32 %p769, %r5768, %r5;
@%p769 bra $L__BB0_423;
$L__BB0_664:
add.s32 %r5752, %r5752, 2;
setp.lt.u32 %p770, %r5752, %r6;
@%p770 bra $L__BB0_421;
$L__BB0_665:
setp.eq.s32 %p771, %r5483, 0;
mov.u32 %r6183, %r5480;
@%p771 bra $L__BB0_669;
shl.b16 %rs530, %rs705, 1;
or.b16 %rs705, %rs530, 1;
add.s32 %r5277, %r5277, -1;
setp.ne.s32 %p772, %r5277, 0;
mov.u32 %r6183, %r5480;
@%p772 bra $L__BB0_669;
setp.gt.u32 %p773, %r5271, 191;
mov.u32 %r5277, 0;
mov.u32 %r6183, 1;
@%p773 bra $L__BB0_669;
add.s32 %r3971, %r5271, 17477;
cvt.u64.u32 %rd302, %r3971;
add.s64 %rd303, %rd1, %rd302;
and.b16 %rs532, %rs705, 255;
st.global.u8 [%rd303], %rs705;
add.s32 %r5271, %r5271, 1;
setp.eq.s16 %p774, %rs532, 255;
selp.b32 %r5277, 7, 8, %p774;
mov.u16 %rs705, 0;
mov.u32 %r6183, %r5480;
$L__BB0_669:
cvt.u32.u16 %r3972, %rs705;
and.b32 %r3973, %r3972, 255;
shl.b32 %r3974, %r3973, %r5277;
cvt.u16.u32 %rs233, %r3974;
mov.u32 %r3975, -1;
shl.b32 %r3976, %r3975, %r5718;
not.b32 %r3977, %r3976;
mov.u32 %r3978, 255;
and.b32 %r3979, %r3977, 255;
setp.eq.s32 %p775, %r5718, 0;
selp.b32 %r1718, 0, %r3979, %p775;
shl.b32 %r1719, %r3978, %r5277;
and.b32 %r3980, %r1719, 255;
or.b32 %r3981, %r3980, %r1718;
setp.eq.s32 %p776, %r3981, 0;
mov.u32 %r6186, %r6183;
mov.u32 %r6188, %r5716;
@%p776 bra $L__BB0_675;
or.b16 %rs234, %rs688, %rs233;
and.b16 %rs533, %rs234, 255;
xor.b16 %rs534, %rs234, %rs233;
cvt.u32.u16 %r3982, %rs534;
and.b32 %r3983, %r1719, %r3982;
and.b32 %r3984, %r3983, 255;
xor.b16 %rs535, %rs234, %rs688;
cvt.u32.u16 %r3985, %rs535;
and.b32 %r3986, %r1718, %r3985;
or.b32 %r3987, %r3984, %r3986;
setp.eq.s32 %p777, %r3987, 0;
setp.ne.s16 %p778, %rs533, 255;
and.pred %p779, %p778, %p777;
setp.gt.u32 %p780, %r5719, 1;
and.pred %p781, %p780, %p779;
add.s32 %r3988, %r5271, 17477;
cvt.u64.u32 %rd304, %r3988;
add.s64 %rd15, %rd1, %rd304;
@%p781 bra $L__BB0_673;
bra.uni $L__BB0_671;
$L__BB0_673:
setp.gt.u32 %p785, %r5271, 191;
mov.u32 %r6186, 1;
mov.u32 %r6188, %r5716;
@%p785 bra $L__BB0_675;
st.global.u8 [%rd15], %rs234;
add.s32 %r5271, %r5271, 1;
mov.u32 %r6186, %r6183;
mov.u32 %r6188, %r5716;
bra.uni $L__BB0_675;
$L__BB0_671:
setp.gt.u32 %p782, %r5271, 191;
setp.gt.u32 %p783, %r5719, 2879;
or.pred %p784, %p783, %p782;
mov.u32 %r6186, 1;
mov.u32 %r6188, %r6186;
@%p784 bra $L__BB0_675;
st.global.u8 [%rd15], %rs233;
add.s32 %r5271, %r5271, 1;
mov.u32 %r3991, 20548;
sub.s32 %r3992, %r3991, %r5719;
cvt.u64.u32 %rd305, %r3992;
add.s64 %rd306, %rd1, %rd305;
st.global.u8 [%rd306], %rs688;
add.s32 %r5719, %r5719, 1;
mov.u32 %r6186, %r6183;
mov.u32 %r6188, %r5716;
$L__BB0_675:
setp.eq.s32 %p786, %r5905, 0;
@%p786 bra $L__BB0_679;
sub.s32 %r3994, %r5906, %r5905;
mov.u32 %r3995, -1;
shl.b32 %r3996, %r3995, %r3994;
not.b32 %r3997, %r3996;
and.b32 %r3998, %r3997, 255;
shl.b32 %r3999, %r3998, %r5905;
or.b32 %r1727, %r3999, %r5904;
setp.eq.s32 %p787, %r1727, 255;
mov.u32 %r6190, %r5933;
@%p787 bra $L__BB0_681;
setp.gt.u32 %p788, %r5907, 17476;
mov.u32 %r6190, 1;
@%p788 bra $L__BB0_681;
cvt.u64.u32 %rd307, %r5907;
add.s64 %rd308, %rd1, %rd307;
st.global.u8 [%rd308], %r1727;
add.s32 %r5907, %r5907, 1;
mov.u32 %r6190, %r5933;
bra.uni $L__BB0_681;
$L__BB0_679:
setp.ne.s32 %p789, %r5906, 7;
mov.u32 %r6190, %r5933;
@%p789 bra $L__BB0_681;
setp.eq.s32 %p790, %r5907, 0;
add.s32 %r4001, %r5907, -1;
selp.b32 %r5907, 0, %r4001, %p790;
mov.u32 %r6190, %r5933;
$L__BB0_681:
or.b32 %r4002, %r6188, %r6186;
or.b32 %r4003, %r4002, %r6190;
setp.eq.s32 %p791, %r4003, 0;
@%p791 bra $L__BB0_683;
mov.u32 %r4004, 1;
st.global.u32 [%rd3], %r4004;
mov.u32 %r4005, 3;
st.global.u32 [%rd3+4], %r4005;
mov.u32 %r4006, 0;
st.global.u32 [%rd3+8], %r4006;
st.global.u32 [%rd3+12], %r4006;
st.global.u32 [%rd3+16], %r4006;
st.global.u32 [%rd3+20], %r4006;
st.global.u32 [%rd3+24], %r4006;
st.global.u32 [%rd3+28], %r4006;
bra.uni $L__BB0_1254;
$L__BB0_683:
setp.eq.s32 %p1629, %r4, 2;
add.s32 %r1732, %r5907, %r5271;
add.s32 %r1733, %r1732, %r5719;
add.u64 %rd16, %SPL, 0;
mov.u32 %r6310, 1;
mov.u32 %r6308, 0;
mov.u32 %r6309, %r6308;
@%p1629 bra $L__BB0_932;
setp.ne.s32 %p793, %r4, 3;
@%p793 bra $L__BB0_931;
add.s32 %r4011, %r5, 3;
shr.u32 %r4012, %r4011, 2;
add.s32 %r4013, %r4012, 8;
setp.gt.u32 %p795, %r4013, 513;
mov.pred %p794, -1;
mov.u32 %r6307, 0;
mov.pred %p1632, %p794;
@%p795 bra $L__BB0_928;
mov.u16 %rs747, 0;
st.local.u16 [%rd16], %rs747;
st.local.u16 [%rd16+2], %rs747;
st.local.u16 [%rd16+4], %rs747;
st.local.u16 [%rd16+6], %rs747;
st.local.u16 [%rd16+8], %rs747;
st.local.u16 [%rd16+10], %rs747;
st.local.u16 [%rd16+12], %rs747;
st.local.u16 [%rd16+14], %rs747;
st.local.u16 [%rd16+16], %rs747;
st.local.u16 [%rd16+18], %rs747;
st.local.u16 [%rd16+20], %rs747;
st.local.u16 [%rd16+22], %rs747;
st.local.u16 [%rd16+24], %rs747;
st.local.u16 [%rd16+26], %rs747;
st.local.u16 [%rd16+28], %rs747;
st.local.u16 [%rd16+30], %rs747;
st.local.u16 [%rd16+32], %rs747;
st.local.u16 [%rd16+34], %rs747;
st.local.u16 [%rd16+36], %rs747;
st.local.u16 [%rd16+38], %rs747;
st.local.u16 [%rd16+40], %rs747;
st.local.u16 [%rd16+42], %rs747;
st.local.u16 [%rd16+44], %rs747;
st.local.u16 [%rd16+46], %rs747;
st.local.u16 [%rd16+48], %rs747;
st.local.u16 [%rd16+50], %rs747;
st.local.u16 [%rd16+52], %rs747;
st.local.u16 [%rd16+54], %rs747;
st.local.u16 [%rd16+56], %rs747;
st.local.u16 [%rd16+58], %rs747;
st.local.u16 [%rd16+60], %rs747;
st.local.u16 [%rd16+62], %rs747;
st.local.u16 [%rd16+64], %rs747;
st.local.u16 [%rd16+66], %rs747;
st.local.u16 [%rd16+68], %rs747;
st.local.u16 [%rd16+70], %rs747;
st.local.u16 [%rd16+72], %rs747;
st.local.u16 [%rd16+74], %rs747;
st.local.u16 [%rd16+76], %rs747;
st.local.u16 [%rd16+78], %rs747;
st.local.u16 [%rd16+80], %rs747;
st.local.u16 [%rd16+82], %rs747;
st.local.u16 [%rd16+84], %rs747;
st.local.u16 [%rd16+86], %rs747;
st.local.u16 [%rd16+88], %rs747;
st.local.u16 [%rd16+90], %rs747;
st.local.u16 [%rd16+92], %rs747;
st.local.u16 [%rd16+94], %rs747;
st.local.u16 [%rd16+96], %rs747;
st.local.u16 [%rd16+98], %rs747;
st.local.u16 [%rd16+100], %rs747;
st.local.u16 [%rd16+102], %rs747;
st.local.u16 [%rd16+104], %rs747;
st.local.u16 [%rd16+106], %rs747;
st.local.u16 [%rd16+108], %rs747;
st.local.u16 [%rd16+110], %rs747;
st.local.u16 [%rd16+112], %rs747;
st.local.u16 [%rd16+114], %rs747;
st.local.u16 [%rd16+116], %rs747;
st.local.u16 [%rd16+118], %rs747;
st.local.u16 [%rd16+120], %rs747;
st.local.u16 [%rd16+122], %rs747;
st.local.u16 [%rd16+124], %rs747;
st.local.u16 [%rd16+126], %rs747;
st.local.u16 [%rd16+128], %rs747;
st.local.u16 [%rd16+130], %rs747;
st.local.u16 [%rd16+132], %rs747;
st.local.u16 [%rd16+134], %rs747;
st.local.u16 [%rd16+136], %rs747;
st.local.u16 [%rd16+138], %rs747;
st.local.u16 [%rd16+140], %rs747;
st.local.u16 [%rd16+142], %rs747;
st.local.u16 [%rd16+144], %rs747;
st.local.u16 [%rd16+146], %rs747;
st.local.u16 [%rd16+148], %rs747;
st.local.u16 [%rd16+150], %rs747;
st.local.u16 [%rd16+152], %rs747;
st.local.u16 [%rd16+154], %rs747;
st.local.u16 [%rd16+156], %rs747;
st.local.u16 [%rd16+158], %rs747;
st.local.u16 [%rd16+160], %rs747;
st.local.u16 [%rd16+162], %rs747;
st.local.u16 [%rd16+164], %rs747;
st.local.u16 [%rd16+166], %rs747;
st.local.u16 [%rd16+168], %rs747;
st.local.u16 [%rd16+170], %rs747;
st.local.u16 [%rd16+172], %rs747;
st.local.u16 [%rd16+174], %rs747;
st.local.u16 [%rd16+176], %rs747;
st.local.u16 [%rd16+178], %rs747;
st.local.u16 [%rd16+180], %rs747;
st.local.u16 [%rd16+182], %rs747;
st.local.u16 [%rd16+184], %rs747;
st.local.u16 [%rd16+186], %rs747;
st.local.u16 [%rd16+188], %rs747;
st.local.u16 [%rd16+190], %rs747;
st.local.u16 [%rd16+192], %rs747;
st.local.u16 [%rd16+194], %rs747;
st.local.u16 [%rd16+196], %rs747;
st.local.u16 [%rd16+198], %rs747;
st.local.u16 [%rd16+200], %rs747;
st.local.u16 [%rd16+202], %rs747;
st.local.u16 [%rd16+204], %rs747;
st.local.u16 [%rd16+206], %rs747;
st.local.u16 [%rd16+208], %rs747;
st.local.u16 [%rd16+210], %rs747;
st.local.u16 [%rd16+212], %rs747;
st.local.u16 [%rd16+214], %rs747;
st.local.u16 [%rd16+216], %rs747;
st.local.u16 [%rd16+218], %rs747;
st.local.u16 [%rd16+220], %rs747;
st.local.u16 [%rd16+222], %rs747;
st.local.u16 [%rd16+224], %rs747;
st.local.u16 [%rd16+226], %rs747;
st.local.u16 [%rd16+228], %rs747;
st.local.u16 [%rd16+230], %rs747;
st.local.u16 [%rd16+232], %rs747;
st.local.u16 [%rd16+234], %rs747;
st.local.u16 [%rd16+236], %rs747;
st.local.u16 [%rd16+238], %rs747;
st.local.u16 [%rd16+240], %rs747;
st.local.u16 [%rd16+242], %rs747;
st.local.u16 [%rd16+244], %rs747;
st.local.u16 [%rd16+246], %rs747;
st.local.u16 [%rd16+248], %rs747;
st.local.u16 [%rd16+250], %rs747;
st.local.u16 [%rd16+252], %rs747;
st.local.u16 [%rd16+254], %rs747;
st.local.u16 [%rd16+256], %rs747;
st.local.u16 [%rd16+258], %rs747;
st.local.u16 [%rd16+260], %rs747;
st.local.u16 [%rd16+262], %rs747;
st.local.u16 [%rd16+264], %rs747;
st.local.u16 [%rd16+266], %rs747;
st.local.u16 [%rd16+268], %rs747;
st.local.u16 [%rd16+270], %rs747;
st.local.u16 [%rd16+272], %rs747;
st.local.u16 [%rd16+274], %rs747;
st.local.u16 [%rd16+276], %rs747;
st.local.u16 [%rd16+278], %rs747;
st.local.u16 [%rd16+280], %rs747;
st.local.u16 [%rd16+282], %rs747;
st.local.u16 [%rd16+284], %rs747;
st.local.u16 [%rd16+286], %rs747;
st.local.u16 [%rd16+288], %rs747;
st.local.u16 [%rd16+290], %rs747;
st.local.u16 [%rd16+292], %rs747;
st.local.u16 [%rd16+294], %rs747;
st.local.u16 [%rd16+296], %rs747;
st.local.u16 [%rd16+298], %rs747;
st.local.u16 [%rd16+300], %rs747;
st.local.u16 [%rd16+302], %rs747;
st.local.u16 [%rd16+304], %rs747;
st.local.u16 [%rd16+306], %rs747;
st.local.u16 [%rd16+308], %rs747;
st.local.u16 [%rd16+310], %rs747;
st.local.u16 [%rd16+312], %rs747;
st.local.u16 [%rd16+314], %rs747;
st.local.u16 [%rd16+316], %rs747;
st.local.u16 [%rd16+318], %rs747;
st.local.u16 [%rd16+320], %rs747;
st.local.u16 [%rd16+322], %rs747;
st.local.u16 [%rd16+324], %rs747;
st.local.u16 [%rd16+326], %rs747;
st.local.u16 [%rd16+328], %rs747;
st.local.u16 [%rd16+330], %rs747;
st.local.u16 [%rd16+332], %rs747;
st.local.u16 [%rd16+334], %rs747;
st.local.u16 [%rd16+336], %rs747;
st.local.u16 [%rd16+338], %rs747;
st.local.u16 [%rd16+340], %rs747;
st.local.u16 [%rd16+342], %rs747;
st.local.u16 [%rd16+344], %rs747;
st.local.u16 [%rd16+346], %rs747;
st.local.u16 [%rd16+348], %rs747;
st.local.u16 [%rd16+350], %rs747;
st.local.u16 [%rd16+352], %rs747;
st.local.u16 [%rd16+354], %rs747;
st.local.u16 [%rd16+356], %rs747;
st.local.u16 [%rd16+358], %rs747;
st.local.u16 [%rd16+360], %rs747;
st.local.u16 [%rd16+362], %rs747;
st.local.u16 [%rd16+364], %rs747;
st.local.u16 [%rd16+366], %rs747;
st.local.u16 [%rd16+368], %rs747;
st.local.u16 [%rd16+370], %rs747;
st.local.u16 [%rd16+372], %rs747;
st.local.u16 [%rd16+374], %rs747;
st.local.u16 [%rd16+376], %rs747;
st.local.u16 [%rd16+378], %rs747;
st.local.u16 [%rd16+380], %rs747;
st.local.u16 [%rd16+382], %rs747;
st.local.u16 [%rd16+384], %rs747;
st.local.u16 [%rd16+386], %rs747;
st.local.u16 [%rd16+388], %rs747;
st.local.u16 [%rd16+390], %rs747;
st.local.u16 [%rd16+392], %rs747;
st.local.u16 [%rd16+394], %rs747;
st.local.u16 [%rd16+396], %rs747;
st.local.u16 [%rd16+398], %rs747;
st.local.u16 [%rd16+400], %rs747;
st.local.u16 [%rd16+402], %rs747;
st.local.u16 [%rd16+404], %rs747;
st.local.u16 [%rd16+406], %rs747;
st.local.u16 [%rd16+408], %rs747;
st.local.u16 [%rd16+410], %rs747;
st.local.u16 [%rd16+412], %rs747;
st.local.u16 [%rd16+414], %rs747;
st.local.u16 [%rd16+416], %rs747;
st.local.u16 [%rd16+418], %rs747;
st.local.u16 [%rd16+420], %rs747;
st.local.u16 [%rd16+422], %rs747;
st.local.u16 [%rd16+424], %rs747;
st.local.u16 [%rd16+426], %rs747;
st.local.u16 [%rd16+428], %rs747;
st.local.u16 [%rd16+430], %rs747;
st.local.u16 [%rd16+432], %rs747;
st.local.u16 [%rd16+434], %rs747;
st.local.u16 [%rd16+436], %rs747;
st.local.u16 [%rd16+438], %rs747;
st.local.u16 [%rd16+440], %rs747;
st.local.u16 [%rd16+442], %rs747;
st.local.u16 [%rd16+444], %rs747;
st.local.u16 [%rd16+446], %rs747;
st.local.u16 [%rd16+448], %rs747;
st.local.u16 [%rd16+450], %rs747;
st.local.u16 [%rd16+452], %rs747;
st.local.u16 [%rd16+454], %rs747;
st.local.u16 [%rd16+456], %rs747;
st.local.u16 [%rd16+458], %rs747;
st.local.u16 [%rd16+460], %rs747;
st.local.u16 [%rd16+462], %rs747;
st.local.u16 [%rd16+464], %rs747;
st.local.u16 [%rd16+466], %rs747;
st.local.u16 [%rd16+468], %rs747;
st.local.u16 [%rd16+470], %rs747;
st.local.u16 [%rd16+472], %rs747;
st.local.u16 [%rd16+474], %rs747;
st.local.u16 [%rd16+476], %rs747;
st.local.u16 [%rd16+478], %rs747;
st.local.u16 [%rd16+480], %rs747;
st.local.u16 [%rd16+482], %rs747;
st.local.u16 [%rd16+484], %rs747;
st.local.u16 [%rd16+486], %rs747;
st.local.u16 [%rd16+488], %rs747;
st.local.u16 [%rd16+490], %rs747;
st.local.u16 [%rd16+492], %rs747;
st.local.u16 [%rd16+494], %rs747;
st.local.u16 [%rd16+496], %rs747;
st.local.u16 [%rd16+498], %rs747;
st.local.u16 [%rd16+500], %rs747;
st.local.u16 [%rd16+502], %rs747;
st.local.u16 [%rd16+504], %rs747;
st.local.u16 [%rd16+506], %rs747;
st.local.u16 [%rd16+508], %rs747;
st.local.u16 [%rd16+510], %rs747;
st.local.u16 [%rd16+512], %rs747;
st.local.u16 [%rd16+514], %rs747;
st.local.u16 [%rd16+516], %rs747;
st.local.u16 [%rd16+518], %rs747;
st.local.u16 [%rd16+520], %rs747;
st.local.u16 [%rd16+522], %rs747;
st.local.u16 [%rd16+524], %rs747;
st.local.u16 [%rd16+526], %rs747;
st.local.u16 [%rd16+528], %rs747;
st.local.u16 [%rd16+530], %rs747;
st.local.u16 [%rd16+532], %rs747;
st.local.u16 [%rd16+534], %rs747;
st.local.u16 [%rd16+536], %rs747;
st.local.u16 [%rd16+538], %rs747;
st.local.u16 [%rd16+540], %rs747;
st.local.u16 [%rd16+542], %rs747;
st.local.u16 [%rd16+544], %rs747;
st.local.u16 [%rd16+546], %rs747;
st.local.u16 [%rd16+548], %rs747;
st.local.u16 [%rd16+550], %rs747;
st.local.u16 [%rd16+552], %rs747;
st.local.u16 [%rd16+554], %rs747;
st.local.u16 [%rd16+556], %rs747;
st.local.u16 [%rd16+558], %rs747;
st.local.u16 [%rd16+560], %rs747;
st.local.u16 [%rd16+562], %rs747;
st.local.u16 [%rd16+564], %rs747;
st.local.u16 [%rd16+566], %rs747;
st.local.u16 [%rd16+568], %rs747;
st.local.u16 [%rd16+570], %rs747;
st.local.u16 [%rd16+572], %rs747;
st.local.u16 [%rd16+574], %rs747;
st.local.u16 [%rd16+576], %rs747;
st.local.u16 [%rd16+578], %rs747;
st.local.u16 [%rd16+580], %rs747;
st.local.u16 [%rd16+582], %rs747;
st.local.u16 [%rd16+584], %rs747;
st.local.u16 [%rd16+586], %rs747;
st.local.u16 [%rd16+588], %rs747;
st.local.u16 [%rd16+590], %rs747;
st.local.u16 [%rd16+592], %rs747;
st.local.u16 [%rd16+594], %rs747;
st.local.u16 [%rd16+596], %rs747;
st.local.u16 [%rd16+598], %rs747;
st.local.u16 [%rd16+600], %rs747;
st.local.u16 [%rd16+602], %rs747;
st.local.u16 [%rd16+604], %rs747;
st.local.u16 [%rd16+606], %rs747;
st.local.u16 [%rd16+608], %rs747;
st.local.u16 [%rd16+610], %rs747;
st.local.u16 [%rd16+612], %rs747;
st.local.u16 [%rd16+614], %rs747;
st.local.u16 [%rd16+616], %rs747;
st.local.u16 [%rd16+618], %rs747;
st.local.u16 [%rd16+620], %rs747;
st.local.u16 [%rd16+622], %rs747;
st.local.u16 [%rd16+624], %rs747;
st.local.u16 [%rd16+626], %rs747;
st.local.u16 [%rd16+628], %rs747;
st.local.u16 [%rd16+630], %rs747;
st.local.u16 [%rd16+632], %rs747;
st.local.u16 [%rd16+634], %rs747;
st.local.u16 [%rd16+636], %rs747;
st.local.u16 [%rd16+638], %rs747;
st.local.u16 [%rd16+640], %rs747;
st.local.u16 [%rd16+642], %rs747;
st.local.u16 [%rd16+644], %rs747;
st.local.u16 [%rd16+646], %rs747;
st.local.u16 [%rd16+648], %rs747;
st.local.u16 [%rd16+650], %rs747;
st.local.u16 [%rd16+652], %rs747;
st.local.u16 [%rd16+654], %rs747;
st.local.u16 [%rd16+656], %rs747;
st.local.u16 [%rd16+658], %rs747;
st.local.u16 [%rd16+660], %rs747;
st.local.u16 [%rd16+662], %rs747;
st.local.u16 [%rd16+664], %rs747;
st.local.u16 [%rd16+666], %rs747;
st.local.u16 [%rd16+668], %rs747;
st.local.u16 [%rd16+670], %rs747;
st.local.u16 [%rd16+672], %rs747;
st.local.u16 [%rd16+674], %rs747;
st.local.u16 [%rd16+676], %rs747;
st.local.u16 [%rd16+678], %rs747;
st.local.u16 [%rd16+680], %rs747;
st.local.u16 [%rd16+682], %rs747;
st.local.u16 [%rd16+684], %rs747;
st.local.u16 [%rd16+686], %rs747;
st.local.u16 [%rd16+688], %rs747;
st.local.u16 [%rd16+690], %rs747;
st.local.u16 [%rd16+692], %rs747;
st.local.u16 [%rd16+694], %rs747;
st.local.u16 [%rd16+696], %rs747;
st.local.u16 [%rd16+698], %rs747;
st.local.u16 [%rd16+700], %rs747;
st.local.u16 [%rd16+702], %rs747;
st.local.u16 [%rd16+704], %rs747;
st.local.u16 [%rd16+706], %rs747;
st.local.u16 [%rd16+708], %rs747;
st.local.u16 [%rd16+710], %rs747;
st.local.u16 [%rd16+712], %rs747;
st.local.u16 [%rd16+714], %rs747;
st.local.u16 [%rd16+716], %rs747;
st.local.u16 [%rd16+718], %rs747;
st.local.u16 [%rd16+720], %rs747;
st.local.u16 [%rd16+722], %rs747;
st.local.u16 [%rd16+724], %rs747;
st.local.u16 [%rd16+726], %rs747;
st.local.u16 [%rd16+728], %rs747;
st.local.u16 [%rd16+730], %rs747;
st.local.u16 [%rd16+732], %rs747;
st.local.u16 [%rd16+734], %rs747;
st.local.u16 [%rd16+736], %rs747;
st.local.u16 [%rd16+738], %rs747;
st.local.u16 [%rd16+740], %rs747;
st.local.u16 [%rd16+742], %rs747;
st.local.u16 [%rd16+744], %rs747;
st.local.u16 [%rd16+746], %rs747;
st.local.u16 [%rd16+748], %rs747;
st.local.u16 [%rd16+750], %rs747;
st.local.u16 [%rd16+752], %rs747;
st.local.u16 [%rd16+754], %rs747;
st.local.u16 [%rd16+756], %rs747;
st.local.u16 [%rd16+758], %rs747;
st.local.u16 [%rd16+760], %rs747;
st.local.u16 [%rd16+762], %rs747;
st.local.u16 [%rd16+764], %rs747;
st.local.u16 [%rd16+766], %rs747;
st.local.u16 [%rd16+768], %rs747;
st.local.u16 [%rd16+770], %rs747;
st.local.u16 [%rd16+772], %rs747;
st.local.u16 [%rd16+774], %rs747;
st.local.u16 [%rd16+776], %rs747;
st.local.u16 [%rd16+778], %rs747;
st.local.u16 [%rd16+780], %rs747;
st.local.u16 [%rd16+782], %rs747;
st.local.u16 [%rd16+784], %rs747;
st.local.u16 [%rd16+786], %rs747;
st.local.u16 [%rd16+788], %rs747;
st.local.u16 [%rd16+790], %rs747;
st.local.u16 [%rd16+792], %rs747;
st.local.u16 [%rd16+794], %rs747;
st.local.u16 [%rd16+796], %rs747;
st.local.u16 [%rd16+798], %rs747;
st.local.u16 [%rd16+800], %rs747;
st.local.u16 [%rd16+802], %rs747;
st.local.u16 [%rd16+804], %rs747;
st.local.u16 [%rd16+806], %rs747;
st.local.u16 [%rd16+808], %rs747;
st.local.u16 [%rd16+810], %rs747;
st.local.u16 [%rd16+812], %rs747;
st.local.u16 [%rd16+814], %rs747;
st.local.u16 [%rd16+816], %rs747;
st.local.u16 [%rd16+818], %rs747;
st.local.u16 [%rd16+820], %rs747;
st.local.u16 [%rd16+822], %rs747;
st.local.u16 [%rd16+824], %rs747;
st.local.u16 [%rd16+826], %rs747;
st.local.u16 [%rd16+828], %rs747;
st.local.u16 [%rd16+830], %rs747;
st.local.u16 [%rd16+832], %rs747;
st.local.u16 [%rd16+834], %rs747;
st.local.u16 [%rd16+836], %rs747;
st.local.u16 [%rd16+838], %rs747;
st.local.u16 [%rd16+840], %rs747;
st.local.u16 [%rd16+842], %rs747;
st.local.u16 [%rd16+844], %rs747;
st.local.u16 [%rd16+846], %rs747;
st.local.u16 [%rd16+848], %rs747;
st.local.u16 [%rd16+850], %rs747;
st.local.u16 [%rd16+852], %rs747;
st.local.u16 [%rd16+854], %rs747;
st.local.u16 [%rd16+856], %rs747;
st.local.u16 [%rd16+858], %rs747;
st.local.u16 [%rd16+860], %rs747;
st.local.u16 [%rd16+862], %rs747;
st.local.u16 [%rd16+864], %rs747;
st.local.u16 [%rd16+866], %rs747;
st.local.u16 [%rd16+868], %rs747;
st.local.u16 [%rd16+870], %rs747;
st.local.u16 [%rd16+872], %rs747;
st.local.u16 [%rd16+874], %rs747;
st.local.u16 [%rd16+876], %rs747;
st.local.u16 [%rd16+878], %rs747;
st.local.u16 [%rd16+880], %rs747;
st.local.u16 [%rd16+882], %rs747;
st.local.u16 [%rd16+884], %rs747;
st.local.u16 [%rd16+886], %rs747;
st.local.u16 [%rd16+888], %rs747;
st.local.u16 [%rd16+890], %rs747;
st.local.u16 [%rd16+892], %rs747;
st.local.u16 [%rd16+894], %rs747;
st.local.u16 [%rd16+896], %rs747;
st.local.u16 [%rd16+898], %rs747;
st.local.u16 [%rd16+900], %rs747;
st.local.u16 [%rd16+902], %rs747;
st.local.u16 [%rd16+904], %rs747;
st.local.u16 [%rd16+906], %rs747;
st.local.u16 [%rd16+908], %rs747;
st.local.u16 [%rd16+910], %rs747;
st.local.u16 [%rd16+912], %rs747;
st.local.u16 [%rd16+914], %rs747;
st.local.u16 [%rd16+916], %rs747;
st.local.u16 [%rd16+918], %rs747;
st.local.u16 [%rd16+920], %rs747;
st.local.u16 [%rd16+922], %rs747;
st.local.u16 [%rd16+924], %rs747;
st.local.u16 [%rd16+926], %rs747;
st.local.u16 [%rd16+928], %rs747;
st.local.u16 [%rd16+930], %rs747;
st.local.u16 [%rd16+932], %rs747;
st.local.u16 [%rd16+934], %rs747;
st.local.u16 [%rd16+936], %rs747;
st.local.u16 [%rd16+938], %rs747;
st.local.u16 [%rd16+940], %rs747;
st.local.u16 [%rd16+942], %rs747;
st.local.u16 [%rd16+944], %rs747;
st.local.u16 [%rd16+946], %rs747;
st.local.u16 [%rd16+948], %rs747;
st.local.u16 [%rd16+950], %rs747;
st.local.u16 [%rd16+952], %rs747;
st.local.u16 [%rd16+954], %rs747;
st.local.u16 [%rd16+956], %rs747;
st.local.u16 [%rd16+958], %rs747;
st.local.u16 [%rd16+960], %rs747;
st.local.u16 [%rd16+962], %rs747;
st.local.u16 [%rd16+964], %rs747;
st.local.u16 [%rd16+966], %rs747;
st.local.u16 [%rd16+968], %rs747;
st.local.u16 [%rd16+970], %rs747;
st.local.u16 [%rd16+972], %rs747;
st.local.u16 [%rd16+974], %rs747;
st.local.u16 [%rd16+976], %rs747;
st.local.u16 [%rd16+978], %rs747;
st.local.u16 [%rd16+980], %rs747;
st.local.u16 [%rd16+982], %rs747;
st.local.u16 [%rd16+984], %rs747;
st.local.u16 [%rd16+986], %rs747;
st.local.u16 [%rd16+988], %rs747;
st.local.u16 [%rd16+990], %rs747;
st.local.u16 [%rd16+992], %rs747;
st.local.u16 [%rd16+994], %rs747;
st.local.u16 [%rd16+996], %rs747;
st.local.u16 [%rd16+998], %rs747;
st.local.u16 [%rd16+1000], %rs747;
st.local.u16 [%rd16+1002], %rs747;
st.local.u16 [%rd16+1004], %rs747;
st.local.u16 [%rd16+1006], %rs747;
st.local.u16 [%rd16+1008], %rs747;
st.local.u16 [%rd16+1010], %rs747;
st.local.u16 [%rd16+1012], %rs747;
st.local.u16 [%rd16+1014], %rs747;
st.local.u16 [%rd16+1016], %rs747;
st.local.u16 [%rd16+1018], %rs747;
st.local.u16 [%rd16+1020], %rs747;
st.local.u16 [%rd16+1022], %rs747;
st.local.u16 [%rd16+1024], %rs747;
mov.u32 %r6192, 0;
mov.u32 %r6302, %r6192;
mov.u32 %r6298, %r6192;
mov.u32 %r6300, %r6192;
$L__BB0_687:
@%p9 bra $L__BB0_926;
sub.s32 %r4020, %r6, %r6192;
add.s32 %r1738, %r6192, 4;
mul.lo.s32 %r1739, %r1738, %r1;
add.s32 %r1740, %r6192, 5;
add.s32 %r1741, %r1739, %r1;
add.s32 %r1742, %r6192, 6;
shl.b32 %r4021, %r1, 1;
add.s32 %r1743, %r1739, %r4021;
add.s32 %r1744, %r6192, 7;
mul.lo.s32 %r4022, %r1, 3;
add.s32 %r1745, %r1739, %r4022;
add.s32 %r1746, %r6192, 1;
add.s32 %r1747, %r6192, 2;
add.s32 %r1748, %r6192, 3;
mul.lo.s32 %r1749, %r6192, %r1;
add.s32 %r1750, %r1749, %r4022;
sub.s32 %r1751, %r1750, %r1;
sub.s32 %r1752, %r1751, %r1;
setp.lt.u32 %p797, %r4020, 2;
selp.b32 %r4023, 4369, 13107, %p797;
setp.lt.u32 %p798, %r4020, 3;
selp.b32 %r4024, %r4023, 30583, %p798;
setp.lt.u32 %p799, %r4020, 4;
selp.b32 %r1753, %r4024, 65535, %p799;
mov.u32 %r4019, 0;
mov.u32 %r6196, %r4019;
mov.u32 %r6197, %r4019;
$L__BB0_689:
shr.u32 %r4026, %r6196, 2;
mul.wide.u32 %rd311, %r4026, 2;
add.s64 %rd18, %rd16, %rd311;
ld.local.u16 %rs237, [%rd18];
ld.local.u16 %rs238, [%rd18+2];
setp.ge.u32 %p800, %r6196, %r5;
mov.u32 %r6208, %r4019;
@%p800 bra $L__BB0_698;
setp.ge.u32 %p801, %r1738, %r6;
mov.u32 %r6208, 0;
@%p801 bra $L__BB0_692;
add.s32 %r4028, %r1739, %r6196;
mul.wide.u32 %rd312, %r4028, 4;
add.s64 %rd313, %rd2, %rd312;
ld.global.u32 %r4029, [%rd313];
abs.s32 %r4030, %r4029;
setp.gt.u32 %p802, %r4030, 4;
and.b32 %r4031, %r4030, 1;
setp.eq.b32 %p803, %r4031, 1;
and.pred %p804, %p802, %p803;
selp.u32 %r6208, 1, 0, %p804;
$L__BB0_692:
setp.ge.u32 %p805, %r1740, %r6;
@%p805 bra $L__BB0_694;
add.s32 %r4032, %r1741, %r6196;
mul.wide.u32 %rd314, %r4032, 4;
add.s64 %rd315, %rd2, %rd314;
ld.global.u32 %r4033, [%rd315];
abs.s32 %r4034, %r4033;
setp.gt.u32 %p806, %r4034, 4;
and.b32 %r4035, %r4034, 1;
setp.eq.b32 %p807, %r4035, 1;
and.pred %p808, %p806, %p807;
selp.b32 %r4036, 2, 0, %p808;
or.b32 %r6208, %r4036, %r6208;
$L__BB0_694:
setp.ge.u32 %p809, %r1742, %r6;
@%p809 bra $L__BB0_696;
add.s32 %r4037, %r1743, %r6196;
mul.wide.u32 %rd316, %r4037, 4;
add.s64 %rd317, %rd2, %rd316;
ld.global.u32 %r4038, [%rd317];
abs.s32 %r4039, %r4038;
setp.gt.u32 %p810, %r4039, 4;
and.b32 %r4040, %r4039, 1;
setp.eq.b32 %p811, %r4040, 1;
and.pred %p812, %p810, %p811;
selp.b32 %r4041, 4, 0, %p812;
or.b32 %r6208, %r4041, %r6208;
$L__BB0_696:
setp.ge.u32 %p813, %r1744, %r6;
@%p813 bra $L__BB0_698;
add.s32 %r4042, %r1745, %r6196;
mul.wide.u32 %rd318, %r4042, 4;
add.s64 %rd319, %rd2, %rd318;
ld.global.u32 %r4043, [%rd319];
abs.s32 %r4044, %r4043;
setp.gt.u32 %p814, %r4044, 4;
and.b32 %r4045, %r4044, 1;
setp.eq.b32 %p815, %r4045, 1;
and.pred %p816, %p814, %p815;
selp.b32 %r4046, 8, 0, %p816;
or.b32 %r6208, %r4046, %r6208;
$L__BB0_698:
add.s32 %r1767, %r6196, 1;
setp.ge.u32 %p817, %r1767, %r5;
@%p817 bra $L__BB0_707;
setp.ge.u32 %p818, %r1738, %r6;
@%p818 bra $L__BB0_701;
add.s32 %r4047, %r1739, %r1767;
mul.wide.u32 %rd320, %r4047, 4;
add.s64 %rd321, %rd2, %rd320;
ld.global.u32 %r4048, [%rd321];
abs.s32 %r4049, %r4048;
setp.gt.u32 %p819, %r4049, 4;
and.b32 %r4050, %r4049, 1;
setp.eq.b32 %p820, %r4050, 1;
and.pred %p821, %p819, %p820;
selp.b32 %r4051, 16, 0, %p821;
or.b32 %r6208, %r4051, %r6208;
$L__BB0_701:
setp.ge.u32 %p822, %r1740, %r6;
@%p822 bra $L__BB0_703;
add.s32 %r4052, %r1741, %r1767;
mul.wide.u32 %rd322, %r4052, 4;
add.s64 %rd323, %rd2, %rd322;
ld.global.u32 %r4053, [%rd323];
abs.s32 %r4054, %r4053;
setp.gt.u32 %p823, %r4054, 4;
and.b32 %r4055, %r4054, 1;
setp.eq.b32 %p824, %r4055, 1;
and.pred %p825, %p823, %p824;
selp.b32 %r4056, 32, 0, %p825;
or.b32 %r6208, %r4056, %r6208;
$L__BB0_703:
setp.ge.u32 %p826, %r1742, %r6;
@%p826 bra $L__BB0_705;
add.s32 %r4057, %r1743, %r1767;
mul.wide.u32 %rd324, %r4057, 4;
add.s64 %rd325, %rd2, %rd324;
ld.global.u32 %r4058, [%rd325];
abs.s32 %r4059, %r4058;
setp.gt.u32 %p827, %r4059, 4;
and.b32 %r4060, %r4059, 1;
setp.eq.b32 %p828, %r4060, 1;
and.pred %p829, %p827, %p828;
selp.b32 %r4061, 64, 0, %p829;
or.b32 %r6208, %r4061, %r6208;
$L__BB0_705:
setp.ge.u32 %p830, %r1744, %r6;
@%p830 bra $L__BB0_707;
add.s32 %r4062, %r1745, %r1767;
mul.wide.u32 %rd326, %r4062, 4;
add.s64 %rd327, %rd2, %rd326;
ld.global.u32 %r4063, [%rd327];
abs.s32 %r4064, %r4063;
setp.gt.u32 %p831, %r4064, 4;
and.b32 %r4065, %r4064, 1;
setp.eq.b32 %p832, %r4065, 1;
and.pred %p833, %p831, %p832;
selp.b32 %r4066, 128, 0, %p833;
or.b32 %r6208, %r4066, %r6208;
$L__BB0_707:
add.s32 %r1776, %r6196, 2;
setp.ge.u32 %p834, %r1776, %r5;
@%p834 bra $L__BB0_716;
setp.ge.u32 %p835, %r1738, %r6;
@%p835 bra $L__BB0_710;
add.s32 %r4067, %r1739, %r1776;
mul.wide.u32 %rd328, %r4067, 4;
add.s64 %rd329, %rd2, %rd328;
ld.global.u32 %r4068, [%rd329];
abs.s32 %r4069, %r4068;
setp.gt.u32 %p836, %r4069, 4;
and.b32 %r4070, %r4069, 1;
setp.eq.b32 %p837, %r4070, 1;
and.pred %p838, %p836, %p837;
selp.b32 %r4071, 256, 0, %p838;
or.b32 %r6208, %r4071, %r6208;
$L__BB0_710:
setp.ge.u32 %p839, %r1740, %r6;
@%p839 bra $L__BB0_712;
add.s32 %r4072, %r1741, %r1776;
mul.wide.u32 %rd330, %r4072, 4;
add.s64 %rd331, %rd2, %rd330;
ld.global.u32 %r4073, [%rd331];
abs.s32 %r4074, %r4073;
setp.gt.u32 %p840, %r4074, 4;
and.b32 %r4075, %r4074, 1;
setp.eq.b32 %p841, %r4075, 1;
and.pred %p842, %p840, %p841;
selp.b32 %r4076, 512, 0, %p842;
or.b32 %r6208, %r4076, %r6208;
$L__BB0_712:
setp.ge.u32 %p843, %r1742, %r6;
@%p843 bra $L__BB0_714;
add.s32 %r4077, %r1743, %r1776;
mul.wide.u32 %rd332, %r4077, 4;
add.s64 %rd333, %rd2, %rd332;
ld.global.u32 %r4078, [%rd333];
abs.s32 %r4079, %r4078;
setp.gt.u32 %p844, %r4079, 4;
and.b32 %r4080, %r4079, 1;
setp.eq.b32 %p845, %r4080, 1;
and.pred %p846, %p844, %p845;
selp.b32 %r4081, 1024, 0, %p846;
or.b32 %r6208, %r4081, %r6208;
$L__BB0_714:
setp.ge.u32 %p847, %r1744, %r6;
@%p847 bra $L__BB0_716;
add.s32 %r4082, %r1745, %r1776;
mul.wide.u32 %rd334, %r4082, 4;
add.s64 %rd335, %rd2, %rd334;
ld.global.u32 %r4083, [%rd335];
abs.s32 %r4084, %r4083;
setp.gt.u32 %p848, %r4084, 4;
and.b32 %r4085, %r4084, 1;
setp.eq.b32 %p849, %r4085, 1;
and.pred %p850, %p848, %p849;
selp.b32 %r4086, 2048, 0, %p850;
or.b32 %r6208, %r4086, %r6208;
$L__BB0_716:
add.s32 %r1785, %r6196, 3;
setp.ge.u32 %p851, %r1785, %r5;
@%p851 bra $L__BB0_725;
setp.ge.u32 %p852, %r1738, %r6;
@%p852 bra $L__BB0_719;
add.s32 %r4087, %r1739, %r1785;
mul.wide.u32 %rd336, %r4087, 4;
add.s64 %rd337, %rd2, %rd336;
ld.global.u32 %r4088, [%rd337];
abs.s32 %r4089, %r4088;
setp.gt.u32 %p853, %r4089, 4;
and.b32 %r4090, %r4089, 1;
setp.eq.b32 %p854, %r4090, 1;
and.pred %p855, %p853, %p854;
selp.b32 %r4091, 4096, 0, %p855;
or.b32 %r6208, %r4091, %r6208;
$L__BB0_719:
setp.ge.u32 %p856, %r1740, %r6;
@%p856 bra $L__BB0_721;
add.s32 %r4092, %r1741, %r1785;
mul.wide.u32 %rd338, %r4092, 4;
add.s64 %rd339, %rd2, %rd338;
ld.global.u32 %r4093, [%rd339];
abs.s32 %r4094, %r4093;
setp.gt.u32 %p857, %r4094, 4;
and.b32 %r4095, %r4094, 1;
setp.eq.b32 %p858, %r4095, 1;
and.pred %p859, %p857, %p858;
selp.b32 %r4096, 8192, 0, %p859;
or.b32 %r6208, %r4096, %r6208;
$L__BB0_721:
setp.ge.u32 %p860, %r1742, %r6;
@%p860 bra $L__BB0_723;
add.s32 %r4097, %r1743, %r1785;
mul.wide.u32 %rd340, %r4097, 4;
add.s64 %rd341, %rd2, %rd340;
ld.global.u32 %r4098, [%rd341];
abs.s32 %r4099, %r4098;
setp.gt.u32 %p861, %r4099, 4;
and.b32 %r4100, %r4099, 1;
setp.eq.b32 %p862, %r4100, 1;
and.pred %p863, %p861, %p862;
selp.b32 %r4101, 16384, 0, %p863;
or.b32 %r6208, %r4101, %r6208;
$L__BB0_723:
setp.ge.u32 %p864, %r1744, %r6;
@%p864 bra $L__BB0_725;
add.s32 %r4102, %r1745, %r1785;
mul.wide.u32 %rd342, %r4102, 4;
add.s64 %rd343, %rd2, %rd342;
ld.global.u32 %r4103, [%rd343];
abs.s32 %r4104, %r4103;
setp.gt.u32 %p865, %r4104, 4;
and.b32 %r4105, %r4104, 1;
setp.eq.b32 %p866, %r4105, 1;
and.pred %p867, %p865, %p866;
selp.b32 %r4106, 32768, 0, %p867;
or.b32 %r6208, %r4106, %r6208;
$L__BB0_725:
add.s32 %r4108, %r6196, 4;
setp.ge.u32 %p868, %r4108, %r5;
mov.u32 %r6224, 0;
@%p868 bra $L__BB0_734;
setp.ge.u32 %p869, %r1738, %r6;
mov.u32 %r6224, 0;
@%p869 bra $L__BB0_728;
add.s32 %r4110, %r1739, %r6196;
add.s32 %r4111, %r4110, 4;
mul.wide.u32 %rd344, %r4111, 4;
add.s64 %rd345, %rd2, %rd344;
ld.global.u32 %r4112, [%rd345];
abs.s32 %r4113, %r4112;
setp.gt.u32 %p870, %r4113, 4;
and.b32 %r4114, %r4113, 1;
setp.eq.b32 %p871, %r4114, 1;
and.pred %p872, %p870, %p871;
selp.u32 %r6224, 1, 0, %p872;
$L__BB0_728:
setp.ge.u32 %p873, %r1740, %r6;
@%p873 bra $L__BB0_730;
add.s32 %r4115, %r1741, %r6196;
add.s32 %r4116, %r4115, 4;
mul.wide.u32 %rd346, %r4116, 4;
add.s64 %rd347, %rd2, %rd346;
ld.global.u32 %r4117, [%rd347];
abs.s32 %r4118, %r4117;
setp.gt.u32 %p874, %r4118, 4;
and.b32 %r4119, %r4118, 1;
setp.eq.b32 %p875, %r4119, 1;
and.pred %p876, %p874, %p875;
selp.b32 %r4120, 2, 0, %p876;
or.b32 %r6224, %r4120, %r6224;
$L__BB0_730:
setp.ge.u32 %p877, %r1742, %r6;
@%p877 bra $L__BB0_732;
add.s32 %r4121, %r1743, %r6196;
add.s32 %r4122, %r4121, 4;
mul.wide.u32 %rd348, %r4122, 4;
add.s64 %rd349, %rd2, %rd348;
ld.global.u32 %r4123, [%rd349];
abs.s32 %r4124, %r4123;
setp.gt.u32 %p878, %r4124, 4;
and.b32 %r4125, %r4124, 1;
setp.eq.b32 %p879, %r4125, 1;
and.pred %p880, %p878, %p879;
selp.b32 %r4126, 4, 0, %p880;
or.b32 %r6224, %r4126, %r6224;
$L__BB0_732:
setp.ge.u32 %p881, %r1744, %r6;
@%p881 bra $L__BB0_734;
add.s32 %r4127, %r1745, %r6196;
add.s32 %r4128, %r4127, 4;
mul.wide.u32 %rd350, %r4128, 4;
add.s64 %rd351, %rd2, %rd350;
ld.global.u32 %r4129, [%rd351];
abs.s32 %r4130, %r4129;
setp.gt.u32 %p882, %r4130, 4;
and.b32 %r4131, %r4130, 1;
setp.eq.b32 %p883, %r4131, 1;
and.pred %p884, %p882, %p883;
selp.b32 %r4132, 8, 0, %p884;
or.b32 %r6224, %r4132, %r6224;
$L__BB0_734:
add.s32 %r1802, %r6196, 5;
setp.ge.u32 %p885, %r1802, %r5;
@%p885 bra $L__BB0_743;
setp.ge.u32 %p886, %r1738, %r6;
@%p886 bra $L__BB0_737;
add.s32 %r4133, %r1739, %r1802;
mul.wide.u32 %rd352, %r4133, 4;
add.s64 %rd353, %rd2, %rd352;
ld.global.u32 %r4134, [%rd353];
abs.s32 %r4135, %r4134;
setp.gt.u32 %p887, %r4135, 4;
and.b32 %r4136, %r4135, 1;
setp.eq.b32 %p888, %r4136, 1;
and.pred %p889, %p887, %p888;
selp.b32 %r4137, 16, 0, %p889;
or.b32 %r6224, %r4137, %r6224;
$L__BB0_737:
setp.ge.u32 %p890, %r1740, %r6;
@%p890 bra $L__BB0_739;
add.s32 %r4138, %r1741, %r1802;
mul.wide.u32 %rd354, %r4138, 4;
add.s64 %rd355, %rd2, %rd354;
ld.global.u32 %r4139, [%rd355];
abs.s32 %r4140, %r4139;
setp.gt.u32 %p891, %r4140, 4;
and.b32 %r4141, %r4140, 1;
setp.eq.b32 %p892, %r4141, 1;
and.pred %p893, %p891, %p892;
selp.b32 %r4142, 32, 0, %p893;
or.b32 %r6224, %r4142, %r6224;
$L__BB0_739:
setp.ge.u32 %p894, %r1742, %r6;
@%p894 bra $L__BB0_741;
add.s32 %r4143, %r1743, %r1802;
mul.wide.u32 %rd356, %r4143, 4;
add.s64 %rd357, %rd2, %rd356;
ld.global.u32 %r4144, [%rd357];
abs.s32 %r4145, %r4144;
setp.gt.u32 %p895, %r4145, 4;
and.b32 %r4146, %r4145, 1;
setp.eq.b32 %p896, %r4146, 1;
and.pred %p897, %p895, %p896;
selp.b32 %r4147, 64, 0, %p897;
or.b32 %r6224, %r4147, %r6224;
$L__BB0_741:
setp.ge.u32 %p898, %r1744, %r6;
@%p898 bra $L__BB0_743;
add.s32 %r4148, %r1745, %r1802;
mul.wide.u32 %rd358, %r4148, 4;
add.s64 %rd359, %rd2, %rd358;
ld.global.u32 %r4149, [%rd359];
abs.s32 %r4150, %r4149;
setp.gt.u32 %p899, %r4150, 4;
and.b32 %r4151, %r4150, 1;
setp.eq.b32 %p900, %r4151, 1;
and.pred %p901, %p899, %p900;
selp.b32 %r4152, 128, 0, %p901;
or.b32 %r6224, %r4152, %r6224;
$L__BB0_743:
add.s32 %r1811, %r6196, 6;
setp.ge.u32 %p902, %r1811, %r5;
@%p902 bra $L__BB0_752;
setp.ge.u32 %p903, %r1738, %r6;
@%p903 bra $L__BB0_746;
add.s32 %r4153, %r1739, %r1811;
mul.wide.u32 %rd360, %r4153, 4;
add.s64 %rd361, %rd2, %rd360;
ld.global.u32 %r4154, [%rd361];
abs.s32 %r4155, %r4154;
setp.gt.u32 %p904, %r4155, 4;
and.b32 %r4156, %r4155, 1;
setp.eq.b32 %p905, %r4156, 1;
and.pred %p906, %p904, %p905;
selp.b32 %r4157, 256, 0, %p906;
or.b32 %r6224, %r4157, %r6224;
$L__BB0_746:
setp.ge.u32 %p907, %r1740, %r6;
@%p907 bra $L__BB0_748;
add.s32 %r4158, %r1741, %r1811;
mul.wide.u32 %rd362, %r4158, 4;
add.s64 %rd363, %rd2, %rd362;
ld.global.u32 %r4159, [%rd363];
abs.s32 %r4160, %r4159;
setp.gt.u32 %p908, %r4160, 4;
and.b32 %r4161, %r4160, 1;
setp.eq.b32 %p909, %r4161, 1;
and.pred %p910, %p908, %p909;
selp.b32 %r4162, 512, 0, %p910;
or.b32 %r6224, %r4162, %r6224;
$L__BB0_748:
setp.ge.u32 %p911, %r1742, %r6;
@%p911 bra $L__BB0_750;
add.s32 %r4163, %r1743, %r1811;
mul.wide.u32 %rd364, %r4163, 4;
add.s64 %rd365, %rd2, %rd364;
ld.global.u32 %r4164, [%rd365];
abs.s32 %r4165, %r4164;
setp.gt.u32 %p912, %r4165, 4;
and.b32 %r4166, %r4165, 1;
setp.eq.b32 %p913, %r4166, 1;
and.pred %p914, %p912, %p913;
selp.b32 %r4167, 1024, 0, %p914;
or.b32 %r6224, %r4167, %r6224;
$L__BB0_750:
setp.ge.u32 %p915, %r1744, %r6;
@%p915 bra $L__BB0_752;
add.s32 %r4168, %r1745, %r1811;
mul.wide.u32 %rd366, %r4168, 4;
add.s64 %rd367, %rd2, %rd366;
ld.global.u32 %r4169, [%rd367];
abs.s32 %r4170, %r4169;
setp.gt.u32 %p916, %r4170, 4;
and.b32 %r4171, %r4170, 1;
setp.eq.b32 %p917, %r4171, 1;
and.pred %p918, %p916, %p917;
selp.b32 %r4172, 2048, 0, %p918;
or.b32 %r6224, %r4172, %r6224;
$L__BB0_752:
add.s32 %r1820, %r6196, 7;
setp.ge.u32 %p919, %r1820, %r5;
@%p919 bra $L__BB0_761;
setp.ge.u32 %p920, %r1738, %r6;
@%p920 bra $L__BB0_755;
add.s32 %r4173, %r1739, %r1820;
mul.wide.u32 %rd368, %r4173, 4;
add.s64 %rd369, %rd2, %rd368;
ld.global.u32 %r4174, [%rd369];
abs.s32 %r4175, %r4174;
setp.gt.u32 %p921, %r4175, 4;
and.b32 %r4176, %r4175, 1;
setp.eq.b32 %p922, %r4176, 1;
and.pred %p923, %p921, %p922;
selp.b32 %r4177, 4096, 0, %p923;
or.b32 %r6224, %r4177, %r6224;
$L__BB0_755:
setp.ge.u32 %p924, %r1740, %r6;
@%p924 bra $L__BB0_757;
add.s32 %r4178, %r1741, %r1820;
mul.wide.u32 %rd370, %r4178, 4;
add.s64 %rd371, %rd2, %rd370;
ld.global.u32 %r4179, [%rd371];
abs.s32 %r4180, %r4179;
setp.gt.u32 %p925, %r4180, 4;
and.b32 %r4181, %r4180, 1;
setp.eq.b32 %p926, %r4181, 1;
and.pred %p927, %p925, %p926;
selp.b32 %r4182, 8192, 0, %p927;
or.b32 %r6224, %r4182, %r6224;
$L__BB0_757:
setp.ge.u32 %p928, %r1742, %r6;
@%p928 bra $L__BB0_759;
add.s32 %r4183, %r1743, %r1820;
mul.wide.u32 %rd372, %r4183, 4;
add.s64 %rd373, %rd2, %rd372;
ld.global.u32 %r4184, [%rd373];
abs.s32 %r4185, %r4184;
setp.gt.u32 %p929, %r4185, 4;
and.b32 %r4186, %r4185, 1;
setp.eq.b32 %p930, %r4186, 1;
and.pred %p931, %p929, %p930;
selp.b32 %r4187, 16384, 0, %p931;
or.b32 %r6224, %r4187, %r6224;
$L__BB0_759:
setp.ge.u32 %p932, %r1744, %r6;
@%p932 bra $L__BB0_761;
add.s32 %r4188, %r1745, %r1820;
mul.wide.u32 %rd374, %r4188, 4;
add.s64 %rd375, %rd2, %rd374;
ld.global.u32 %r4189, [%rd375];
abs.s32 %r4190, %r4189;
setp.gt.u32 %p933, %r4190, 4;
and.b32 %r4191, %r4190, 1;
setp.eq.b32 %p934, %r4191, 1;
and.pred %p935, %p933, %p934;
selp.b32 %r4192, 32768, 0, %p935;
or.b32 %r6224, %r4192, %r6224;
$L__BB0_761:
mov.b32 %r1829, {%rs237, %rs238};
add.s32 %r4194, %r1749, %r6196;
mul.wide.u32 %rd376, %r4194, 4;
add.s64 %rd19, %rd2, %rd376;
add.s32 %r4195, %r1752, %r6196;
mul.wide.u32 %rd377, %r4195, 4;
add.s64 %rd20, %rd2, %rd377;
add.s32 %r4196, %r1751, %r6196;
mul.wide.u32 %rd378, %r4196, 4;
add.s64 %rd21, %rd2, %rd378;
add.s32 %r4197, %r1750, %r6196;
mul.wide.u32 %rd379, %r4197, 4;
add.s64 %rd22, %rd2, %rd379;
mov.u32 %r6240, 0;
@%p800 bra $L__BB0_770;
setp.le.u32 %p937, %r6, %r6192;
mov.u32 %r6240, 0;
@%p937 bra $L__BB0_764;
ld.global.u32 %r4199, [%rd19];
abs.s32 %r4200, %r4199;
setp.gt.u32 %p938, %r4200, 4;
and.b32 %r4201, %r4200, 1;
setp.eq.b32 %p939, %r4201, 1;
and.pred %p940, %p938, %p939;
selp.u32 %r6240, 1, 0, %p940;
$L__BB0_764:
setp.ge.u32 %p941, %r1746, %r6;
@%p941 bra $L__BB0_766;
ld.global.u32 %r4202, [%rd20];
abs.s32 %r4203, %r4202;
setp.gt.u32 %p942, %r4203, 4;
and.b32 %r4204, %r4203, 1;
setp.eq.b32 %p943, %r4204, 1;
and.pred %p944, %p942, %p943;
selp.b32 %r4205, 2, 0, %p944;
or.b32 %r6240, %r4205, %r6240;
$L__BB0_766:
setp.ge.u32 %p945, %r1747, %r6;
@%p945 bra $L__BB0_768;
ld.global.u32 %r4206, [%rd21];
abs.s32 %r4207, %r4206;
setp.gt.u32 %p946, %r4207, 4;
and.b32 %r4208, %r4207, 1;
setp.eq.b32 %p947, %r4208, 1;
and.pred %p948, %p946, %p947;
selp.b32 %r4209, 4, 0, %p948;
or.b32 %r6240, %r4209, %r6240;
$L__BB0_768:
setp.ge.u32 %p949, %r1748, %r6;
@%p949 bra $L__BB0_770;
ld.global.u32 %r4210, [%rd22];
abs.s32 %r4211, %r4210;
setp.gt.u32 %p950, %r4211, 4;
and.b32 %r4212, %r4211, 1;
setp.eq.b32 %p951, %r4212, 1;
and.pred %p952, %p950, %p951;
selp.b32 %r4213, 8, 0, %p952;
or.b32 %r6240, %r4213, %r6240;
$L__BB0_770:
add.s32 %r4214, %r1749, %r1767;
mul.wide.u32 %rd380, %r4214, 4;
add.s64 %rd23, %rd2, %rd380;
add.s32 %r4215, %r1752, %r1767;
mul.wide.u32 %rd381, %r4215, 4;
add.s64 %rd24, %rd2, %rd381;
add.s32 %r4216, %r1751, %r1767;
mul.wide.u32 %rd382, %r4216, 4;
add.s64 %rd25, %rd2, %rd382;
add.s32 %r4217, %r1750, %r1767;
mul.wide.u32 %rd383, %r4217, 4;
add.s64 %rd26, %rd2, %rd383;
shl.b32 %r4218, %r6224, 16;
or.b32 %r1838, %r4218, %r6208;
@%p817 bra $L__BB0_779;
setp.le.u32 %p954, %r6, %r6192;
@%p954 bra $L__BB0_773;
ld.global.u32 %r4219, [%rd23];
abs.s32 %r4220, %r4219;
setp.gt.u32 %p955, %r4220, 4;
and.b32 %r4221, %r4220, 1;
setp.eq.b32 %p956, %r4221, 1;
and.pred %p957, %p955, %p956;
selp.b32 %r4222, 16, 0, %p957;
or.b32 %r6240, %r4222, %r6240;
$L__BB0_773:
setp.ge.u32 %p958, %r1746, %r6;
@%p958 bra $L__BB0_775;
ld.global.u32 %r4223, [%rd24];
abs.s32 %r4224, %r4223;
setp.gt.u32 %p959, %r4224, 4;
and.b32 %r4225, %r4224, 1;
setp.eq.b32 %p960, %r4225, 1;
and.pred %p961, %p959, %p960;
selp.b32 %r4226, 32, 0, %p961;
or.b32 %r6240, %r4226, %r6240;
$L__BB0_775:
setp.ge.u32 %p962, %r1747, %r6;
@%p962 bra $L__BB0_777;
ld.global.u32 %r4227, [%rd25];
abs.s32 %r4228, %r4227;
setp.gt.u32 %p963, %r4228, 4;
and.b32 %r4229, %r4228, 1;
setp.eq.b32 %p964, %r4229, 1;
and.pred %p965, %p963, %p964;
selp.b32 %r4230, 64, 0, %p965;
or.b32 %r6240, %r4230, %r6240;
$L__BB0_777:
setp.ge.u32 %p966, %r1748, %r6;
@%p966 bra $L__BB0_779;
ld.global.u32 %r4231, [%rd26];
abs.s32 %r4232, %r4231;
setp.gt.u32 %p967, %r4232, 4;
and.b32 %r4233, %r4232, 1;
setp.eq.b32 %p968, %r4233, 1;
and.pred %p969, %p967, %p968;
selp.b32 %r4234, 128, 0, %p969;
or.b32 %r6240, %r4234, %r6240;
$L__BB0_779:
add.s32 %r4235, %r1749, %r1776;
mul.wide.u32 %rd384, %r4235, 4;
add.s64 %rd27, %rd2, %rd384;
add.s32 %r4236, %r1752, %r1776;
mul.wide.u32 %rd385, %r4236, 4;
add.s64 %rd28, %rd2, %rd385;
add.s32 %r4237, %r1751, %r1776;
mul.wide.u32 %rd386, %r4237, 4;
add.s64 %rd29, %rd2, %rd386;
add.s32 %r4238, %r1750, %r1776;
mul.wide.u32 %rd387, %r4238, 4;
add.s64 %rd30, %rd2, %rd387;
@%p834 bra $L__BB0_788;
setp.le.u32 %p971, %r6, %r6192;
@%p971 bra $L__BB0_782;
ld.global.u32 %r4239, [%rd27];
abs.s32 %r4240, %r4239;
setp.gt.u32 %p972, %r4240, 4;
and.b32 %r4241, %r4240, 1;
setp.eq.b32 %p973, %r4241, 1;
and.pred %p974, %p972, %p973;
selp.b32 %r4242, 256, 0, %p974;
or.b32 %r6240, %r4242, %r6240;
$L__BB0_782:
setp.ge.u32 %p975, %r1746, %r6;
@%p975 bra $L__BB0_784;
ld.global.u32 %r4243, [%rd28];
abs.s32 %r4244, %r4243;
setp.gt.u32 %p976, %r4244, 4;
and.b32 %r4245, %r4244, 1;
setp.eq.b32 %p977, %r4245, 1;
and.pred %p978, %p976, %p977;
selp.b32 %r4246, 512, 0, %p978;
or.b32 %r6240, %r4246, %r6240;
$L__BB0_784:
setp.ge.u32 %p979, %r1747, %r6;
@%p979 bra $L__BB0_786;
ld.global.u32 %r4247, [%rd29];
abs.s32 %r4248, %r4247;
setp.gt.u32 %p980, %r4248, 4;
and.b32 %r4249, %r4248, 1;
setp.eq.b32 %p981, %r4249, 1;
and.pred %p982, %p980, %p981;
selp.b32 %r4250, 1024, 0, %p982;
or.b32 %r6240, %r4250, %r6240;
$L__BB0_786:
setp.ge.u32 %p983, %r1748, %r6;
@%p983 bra $L__BB0_788;
ld.global.u32 %r4251, [%rd30];
abs.s32 %r4252, %r4251;
setp.gt.u32 %p984, %r4252, 4;
and.b32 %r4253, %r4252, 1;
setp.eq.b32 %p985, %r4253, 1;
and.pred %p986, %p984, %p985;
selp.b32 %r4254, 2048, 0, %p986;
or.b32 %r6240, %r4254, %r6240;
$L__BB0_788:
add.s32 %r4255, %r1749, %r1785;
mul.wide.u32 %rd388, %r4255, 4;
add.s64 %rd31, %rd2, %rd388;
add.s32 %r4256, %r1752, %r1785;
mul.wide.u32 %rd389, %r4256, 4;
add.s64 %rd32, %rd2, %rd389;
add.s32 %r4257, %r1751, %r1785;
mul.wide.u32 %rd390, %r4257, 4;
add.s64 %rd33, %rd2, %rd390;
add.s32 %r4258, %r1750, %r1785;
mul.wide.u32 %rd391, %r4258, 4;
add.s64 %rd34, %rd2, %rd391;
@%p851 bra $L__BB0_797;
setp.le.u32 %p988, %r6, %r6192;
@%p988 bra $L__BB0_791;
ld.global.u32 %r4259, [%rd31];
abs.s32 %r4260, %r4259;
setp.gt.u32 %p989, %r4260, 4;
and.b32 %r4261, %r4260, 1;
setp.eq.b32 %p990, %r4261, 1;
and.pred %p991, %p989, %p990;
selp.b32 %r4262, 4096, 0, %p991;
or.b32 %r6240, %r4262, %r6240;
$L__BB0_791:
setp.ge.u32 %p992, %r1746, %r6;
@%p992 bra $L__BB0_793;
ld.global.u32 %r4263, [%rd32];
abs.s32 %r4264, %r4263;
setp.gt.u32 %p993, %r4264, 4;
and.b32 %r4265, %r4264, 1;
setp.eq.b32 %p994, %r4265, 1;
and.pred %p995, %p993, %p994;
selp.b32 %r4266, 8192, 0, %p995;
or.b32 %r6240, %r4266, %r6240;
$L__BB0_793:
setp.ge.u32 %p996, %r1747, %r6;
@%p996 bra $L__BB0_795;
ld.global.u32 %r4267, [%rd33];
abs.s32 %r4268, %r4267;
setp.gt.u32 %p997, %r4268, 4;
and.b32 %r4269, %r4268, 1;
setp.eq.b32 %p998, %r4269, 1;
and.pred %p999, %p997, %p998;
selp.b32 %r4270, 16384, 0, %p999;
or.b32 %r6240, %r4270, %r6240;
$L__BB0_795:
setp.ge.u32 %p1000, %r1748, %r6;
@%p1000 bra $L__BB0_797;
ld.global.u32 %r4271, [%rd34];
abs.s32 %r4272, %r4271;
setp.gt.u32 %p1001, %r4272, 4;
and.b32 %r4273, %r4272, 1;
setp.eq.b32 %p1002, %r4273, 1;
and.pred %p1003, %p1001, %p1002;
selp.b32 %r4274, 32768, 0, %p1003;
or.b32 %r6240, %r4274, %r6240;
$L__BB0_797:
mov.u32 %r6256, 0;
@%p868 bra $L__BB0_806;
setp.le.u32 %p1005, %r6, %r6192;
mov.u32 %r6256, 0;
@%p1005 bra $L__BB0_800;
add.s32 %r4279, %r4194, 4;
mul.wide.u32 %rd392, %r4279, 4;
add.s64 %rd393, %rd2, %rd392;
ld.global.u32 %r4280, [%rd393];
abs.s32 %r4281, %r4280;
setp.gt.u32 %p1006, %r4281, 4;
and.b32 %r4282, %r4281, 1;
setp.eq.b32 %p1007, %r4282, 1;
and.pred %p1008, %p1006, %p1007;
selp.u32 %r6256, 1, 0, %p1008;
$L__BB0_800:
setp.ge.u32 %p1009, %r1746, %r6;
@%p1009 bra $L__BB0_802;
add.s32 %r4284, %r4195, 4;
mul.wide.u32 %rd394, %r4284, 4;
add.s64 %rd395, %rd2, %rd394;
ld.global.u32 %r4285, [%rd395];
abs.s32 %r4286, %r4285;
setp.gt.u32 %p1010, %r4286, 4;
and.b32 %r4287, %r4286, 1;
setp.eq.b32 %p1011, %r4287, 1;
and.pred %p1012, %p1010, %p1011;
selp.b32 %r4288, 2, 0, %p1012;
or.b32 %r6256, %r4288, %r6256;
$L__BB0_802:
setp.ge.u32 %p1013, %r1747, %r6;
@%p1013 bra $L__BB0_804;
add.s32 %r4290, %r4196, 4;
mul.wide.u32 %rd396, %r4290, 4;
add.s64 %rd397, %rd2, %rd396;
ld.global.u32 %r4291, [%rd397];
abs.s32 %r4292, %r4291;
setp.gt.u32 %p1014, %r4292, 4;
and.b32 %r4293, %r4292, 1;
setp.eq.b32 %p1015, %r4293, 1;
and.pred %p1016, %p1014, %p1015;
selp.b32 %r4294, 4, 0, %p1016;
or.b32 %r6256, %r4294, %r6256;
$L__BB0_804:
setp.ge.u32 %p1017, %r1748, %r6;
@%p1017 bra $L__BB0_806;
add.s32 %r4296, %r4197, 4;
mul.wide.u32 %rd398, %r4296, 4;
add.s64 %rd399, %rd2, %rd398;
ld.global.u32 %r4297, [%rd399];
abs.s32 %r4298, %r4297;
setp.gt.u32 %p1018, %r4298, 4;
and.b32 %r4299, %r4298, 1;
setp.eq.b32 %p1019, %r4299, 1;
and.pred %p1020, %p1018, %p1019;
selp.b32 %r4300, 8, 0, %p1020;
or.b32 %r6256, %r4300, %r6256;
$L__BB0_806:
@%p885 bra $L__BB0_815;
setp.le.u32 %p1022, %r6, %r6192;
@%p1022 bra $L__BB0_809;
add.s32 %r4301, %r1749, %r1802;
mul.wide.u32 %rd400, %r4301, 4;
add.s64 %rd401, %rd2, %rd400;
ld.global.u32 %r4302, [%rd401];
abs.s32 %r4303, %r4302;
setp.gt.u32 %p1023, %r4303, 4;
and.b32 %r4304, %r4303, 1;
setp.eq.b32 %p1024, %r4304, 1;
and.pred %p1025, %p1023, %p1024;
selp.b32 %r4305, 16, 0, %p1025;
or.b32 %r6256, %r4305, %r6256;
$L__BB0_809:
setp.ge.u32 %p1026, %r1746, %r6;
@%p1026 bra $L__BB0_811;
add.s32 %r4306, %r1752, %r1802;
mul.wide.u32 %rd402, %r4306, 4;
add.s64 %rd403, %rd2, %rd402;
ld.global.u32 %r4307, [%rd403];
abs.s32 %r4308, %r4307;
setp.gt.u32 %p1027, %r4308, 4;
and.b32 %r4309, %r4308, 1;
setp.eq.b32 %p1028, %r4309, 1;
and.pred %p1029, %p1027, %p1028;
selp.b32 %r4310, 32, 0, %p1029;
or.b32 %r6256, %r4310, %r6256;
$L__BB0_811:
setp.ge.u32 %p1030, %r1747, %r6;
@%p1030 bra $L__BB0_813;
add.s32 %r4311, %r1751, %r1802;
mul.wide.u32 %rd404, %r4311, 4;
add.s64 %rd405, %rd2, %rd404;
ld.global.u32 %r4312, [%rd405];
abs.s32 %r4313, %r4312;
setp.gt.u32 %p1031, %r4313, 4;
and.b32 %r4314, %r4313, 1;
setp.eq.b32 %p1032, %r4314, 1;
and.pred %p1033, %p1031, %p1032;
selp.b32 %r4315, 64, 0, %p1033;
or.b32 %r6256, %r4315, %r6256;
$L__BB0_813:
setp.ge.u32 %p1034, %r1748, %r6;
@%p1034 bra $L__BB0_815;
add.s32 %r4316, %r1750, %r1802;
mul.wide.u32 %rd406, %r4316, 4;
add.s64 %rd407, %rd2, %rd406;
ld.global.u32 %r4317, [%rd407];
abs.s32 %r4318, %r4317;
setp.gt.u32 %p1035, %r4318, 4;
and.b32 %r4319, %r4318, 1;
setp.eq.b32 %p1036, %r4319, 1;
and.pred %p1037, %p1035, %p1036;
selp.b32 %r4320, 128, 0, %p1037;
or.b32 %r6256, %r4320, %r6256;
$L__BB0_815:
@%p902 bra $L__BB0_824;
setp.le.u32 %p1039, %r6, %r6192;
@%p1039 bra $L__BB0_818;
add.s32 %r4321, %r1749, %r1811;
mul.wide.u32 %rd408, %r4321, 4;
add.s64 %rd409, %rd2, %rd408;
ld.global.u32 %r4322, [%rd409];
abs.s32 %r4323, %r4322;
setp.gt.u32 %p1040, %r4323, 4;
and.b32 %r4324, %r4323, 1;
setp.eq.b32 %p1041, %r4324, 1;
and.pred %p1042, %p1040, %p1041;
selp.b32 %r4325, 256, 0, %p1042;
or.b32 %r6256, %r4325, %r6256;
$L__BB0_818:
setp.ge.u32 %p1043, %r1746, %r6;
@%p1043 bra $L__BB0_820;
add.s32 %r4326, %r1752, %r1811;
mul.wide.u32 %rd410, %r4326, 4;
add.s64 %rd411, %rd2, %rd410;
ld.global.u32 %r4327, [%rd411];
abs.s32 %r4328, %r4327;
setp.gt.u32 %p1044, %r4328, 4;
and.b32 %r4329, %r4328, 1;
setp.eq.b32 %p1045, %r4329, 1;
and.pred %p1046, %p1044, %p1045;
selp.b32 %r4330, 512, 0, %p1046;
or.b32 %r6256, %r4330, %r6256;
$L__BB0_820:
setp.ge.u32 %p1047, %r1747, %r6;
@%p1047 bra $L__BB0_822;
add.s32 %r4331, %r1751, %r1811;
mul.wide.u32 %rd412, %r4331, 4;
add.s64 %rd413, %rd2, %rd412;
ld.global.u32 %r4332, [%rd413];
abs.s32 %r4333, %r4332;
setp.gt.u32 %p1048, %r4333, 4;
and.b32 %r4334, %r4333, 1;
setp.eq.b32 %p1049, %r4334, 1;
and.pred %p1050, %p1048, %p1049;
selp.b32 %r4335, 1024, 0, %p1050;
or.b32 %r6256, %r4335, %r6256;
$L__BB0_822:
setp.ge.u32 %p1051, %r1748, %r6;
@%p1051 bra $L__BB0_824;
add.s32 %r4336, %r1750, %r1811;
mul.wide.u32 %rd414, %r4336, 4;
add.s64 %rd415, %rd2, %rd414;
ld.global.u32 %r4337, [%rd415];
abs.s32 %r4338, %r4337;
setp.gt.u32 %p1052, %r4338, 4;
and.b32 %r4339, %r4338, 1;
setp.eq.b32 %p1053, %r4339, 1;
and.pred %p1054, %p1052, %p1053;
selp.b32 %r4340, 2048, 0, %p1054;
or.b32 %r6256, %r4340, %r6256;
$L__BB0_824:
@%p919 bra $L__BB0_833;
setp.le.u32 %p1056, %r6, %r6192;
@%p1056 bra $L__BB0_827;
add.s32 %r4341, %r1749, %r1820;
mul.wide.u32 %rd416, %r4341, 4;
add.s64 %rd417, %rd2, %rd416;
ld.global.u32 %r4342, [%rd417];
abs.s32 %r4343, %r4342;
setp.gt.u32 %p1057, %r4343, 4;
and.b32 %r4344, %r4343, 1;
setp.eq.b32 %p1058, %r4344, 1;
and.pred %p1059, %p1057, %p1058;
selp.b32 %r4345, 4096, 0, %p1059;
or.b32 %r6256, %r4345, %r6256;
$L__BB0_827:
setp.ge.u32 %p1060, %r1746, %r6;
@%p1060 bra $L__BB0_829;
add.s32 %r4346, %r1752, %r1820;
mul.wide.u32 %rd418, %r4346, 4;
add.s64 %rd419, %rd2, %rd418;
ld.global.u32 %r4347, [%rd419];
abs.s32 %r4348, %r4347;
setp.gt.u32 %p1061, %r4348, 4;
and.b32 %r4349, %r4348, 1;
setp.eq.b32 %p1062, %r4349, 1;
and.pred %p1063, %p1061, %p1062;
selp.b32 %r4350, 8192, 0, %p1063;
or.b32 %r6256, %r4350, %r6256;
$L__BB0_829:
setp.ge.u32 %p1064, %r1747, %r6;
@%p1064 bra $L__BB0_831;
add.s32 %r4351, %r1751, %r1820;
mul.wide.u32 %rd420, %r4351, 4;
add.s64 %rd421, %rd2, %rd420;
ld.global.u32 %r4352, [%rd421];
abs.s32 %r4353, %r4352;
setp.gt.u32 %p1065, %r4353, 4;
and.b32 %r4354, %r4353, 1;
setp.eq.b32 %p1066, %r4354, 1;
and.pred %p1067, %p1065, %p1066;
selp.b32 %r4355, 16384, 0, %p1067;
or.b32 %r6256, %r4355, %r6256;
$L__BB0_831:
setp.ge.u32 %p1068, %r1748, %r6;
@%p1068 bra $L__BB0_833;
add.s32 %r4356, %r1750, %r1820;
mul.wide.u32 %rd422, %r4356, 4;
add.s64 %rd423, %rd2, %rd422;
ld.global.u32 %r4357, [%rd423];
abs.s32 %r4358, %r4357;
setp.gt.u32 %p1069, %r4358, 4;
and.b32 %r4359, %r4358, 1;
setp.eq.b32 %p1070, %r4359, 1;
and.pred %p1071, %p1069, %p1070;
selp.b32 %r4360, 32768, 0, %p1071;
or.b32 %r6256, %r4360, %r6256;
$L__BB0_833:
sub.s32 %r4363, %r4108, %r5;
shl.b32 %r4364, %r6256, 16;
or.b32 %r1895, %r4364, %r6240;
and.b32 %r4365, %r1829, -2004318072;
shr.u32 %r4366, %r4365, 3;
shl.b32 %r4367, %r1838, 3;
and.b32 %r4368, %r4367, -2004318072;
or.b32 %r1896, %r4368, %r4366;
not.b32 %r4369, %r1895;
setp.gt.s32 %p1072, %r4363, 0;
mov.u32 %r6272, 0;
shl.b32 %r4370, %r4363, 2;
selp.b32 %r4371, %r4370, 0, %p1072;
shr.u32 %r1897, %r1753, %r4371;
and.b32 %r1898, %r1897, %r4369;
@%p800 bra $L__BB0_842;
setp.le.u32 %p1074, %r6, %r6192;
mov.u32 %r6272, 0;
@%p1074 bra $L__BB0_836;
ld.global.u32 %r4373, [%rd19];
abs.s32 %r4374, %r4373;
setp.eq.s32 %p1075, %r4374, 3;
selp.u32 %r6272, 1, 0, %p1075;
$L__BB0_836:
setp.ge.u32 %p1076, %r1746, %r6;
@%p1076 bra $L__BB0_838;
ld.global.u32 %r4375, [%rd20];
abs.s32 %r4376, %r4375;
setp.eq.s32 %p1077, %r4376, 3;
selp.b32 %r4377, 2, 0, %p1077;
or.b32 %r6272, %r4377, %r6272;
$L__BB0_838:
setp.ge.u32 %p1078, %r1747, %r6;
@%p1078 bra $L__BB0_840;
ld.global.u32 %r4378, [%rd21];
abs.s32 %r4379, %r4378;
setp.eq.s32 %p1079, %r4379, 3;
selp.b32 %r4380, 4, 0, %p1079;
or.b32 %r6272, %r4380, %r6272;
$L__BB0_840:
setp.ge.u32 %p1080, %r1748, %r6;
@%p1080 bra $L__BB0_842;
ld.global.u32 %r4381, [%rd22];
abs.s32 %r4382, %r4381;
setp.eq.s32 %p1081, %r4382, 3;
selp.b32 %r4383, 8, 0, %p1081;
or.b32 %r6272, %r4383, %r6272;
$L__BB0_842:
@%p817 bra $L__BB0_851;
setp.le.u32 %p1083, %r6, %r6192;
@%p1083 bra $L__BB0_845;
ld.global.u32 %r4384, [%rd23];
abs.s32 %r4385, %r4384;
setp.eq.s32 %p1084, %r4385, 3;
selp.b32 %r4386, 16, 0, %p1084;
or.b32 %r6272, %r4386, %r6272;
$L__BB0_845:
setp.ge.u32 %p1085, %r1746, %r6;
@%p1085 bra $L__BB0_847;
ld.global.u32 %r4387, [%rd24];
abs.s32 %r4388, %r4387;
setp.eq.s32 %p1086, %r4388, 3;
selp.b32 %r4389, 32, 0, %p1086;
or.b32 %r6272, %r4389, %r6272;
$L__BB0_847:
setp.ge.u32 %p1087, %r1747, %r6;
@%p1087 bra $L__BB0_849;
ld.global.u32 %r4390, [%rd25];
abs.s32 %r4391, %r4390;
setp.eq.s32 %p1088, %r4391, 3;
selp.b32 %r4392, 64, 0, %p1088;
or.b32 %r6272, %r4392, %r6272;
$L__BB0_849:
setp.ge.u32 %p1089, %r1748, %r6;
@%p1089 bra $L__BB0_851;
ld.global.u32 %r4393, [%rd26];
abs.s32 %r4394, %r4393;
setp.eq.s32 %p1090, %r4394, 3;
selp.b32 %r4395, 128, 0, %p1090;
or.b32 %r6272, %r4395, %r6272;
$L__BB0_851:
@%p834 bra $L__BB0_860;
setp.le.u32 %p1092, %r6, %r6192;
@%p1092 bra $L__BB0_854;
ld.global.u32 %r4396, [%rd27];
abs.s32 %r4397, %r4396;
setp.eq.s32 %p1093, %r4397, 3;
selp.b32 %r4398, 256, 0, %p1093;
or.b32 %r6272, %r4398, %r6272;
$L__BB0_854:
setp.ge.u32 %p1094, %r1746, %r6;
@%p1094 bra $L__BB0_856;
ld.global.u32 %r4399, [%rd28];
abs.s32 %r4400, %r4399;
setp.eq.s32 %p1095, %r4400, 3;
selp.b32 %r4401, 512, 0, %p1095;
or.b32 %r6272, %r4401, %r6272;
$L__BB0_856:
setp.ge.u32 %p1096, %r1747, %r6;
@%p1096 bra $L__BB0_858;
ld.global.u32 %r4402, [%rd29];
abs.s32 %r4403, %r4402;
setp.eq.s32 %p1097, %r4403, 3;
selp.b32 %r4404, 1024, 0, %p1097;
or.b32 %r6272, %r4404, %r6272;
$L__BB0_858:
setp.ge.u32 %p1098, %r1748, %r6;
@%p1098 bra $L__BB0_860;
ld.global.u32 %r4405, [%rd30];
abs.s32 %r4406, %r4405;
setp.eq.s32 %p1099, %r4406, 3;
selp.b32 %r4407, 2048, 0, %p1099;
or.b32 %r6272, %r4407, %r6272;
$L__BB0_860:
@%p851 bra $L__BB0_869;
setp.le.u32 %p1101, %r6, %r6192;
@%p1101 bra $L__BB0_863;
ld.global.u32 %r4408, [%rd31];
abs.s32 %r4409, %r4408;
setp.eq.s32 %p1102, %r4409, 3;
selp.b32 %r4410, 4096, 0, %p1102;
or.b32 %r6272, %r4410, %r6272;
$L__BB0_863:
setp.ge.u32 %p1103, %r1746, %r6;
@%p1103 bra $L__BB0_865;
ld.global.u32 %r4411, [%rd32];
abs.s32 %r4412, %r4411;
setp.eq.s32 %p1104, %r4412, 3;
selp.b32 %r4413, 8192, 0, %p1104;
or.b32 %r6272, %r4413, %r6272;
$L__BB0_865:
setp.ge.u32 %p1105, %r1747, %r6;
@%p1105 bra $L__BB0_867;
ld.global.u32 %r4414, [%rd33];
abs.s32 %r4415, %r4414;
setp.eq.s32 %p1106, %r4415, 3;
selp.b32 %r4416, 16384, 0, %p1106;
or.b32 %r6272, %r4416, %r6272;
$L__BB0_867:
setp.ge.u32 %p1107, %r1748, %r6;
@%p1107 bra $L__BB0_869;
ld.global.u32 %r4417, [%rd34];
abs.s32 %r4418, %r4417;
setp.eq.s32 %p1108, %r4418, 3;
selp.b32 %r4419, 32768, 0, %p1108;
or.b32 %r6272, %r4419, %r6272;
$L__BB0_869:
and.b32 %r4421, %r1895, -286331154;
shr.u32 %r4422, %r4421, 1;
shl.b32 %r4423, %r1895, 1;
and.b32 %r4424, %r4423, -286331154;
or.b32 %r4425, %r1895, %r1896;
or.b32 %r4426, %r4425, %r4424;
or.b32 %r4427, %r4426, %r4422;
and.b32 %r1931, %r6272, %r1897;
shr.u32 %r4428, %r4427, 4;
shl.b32 %r4429, %r4427, 4;
shr.u32 %r4430, %r6197, 12;
or.b32 %r4431, %r4427, %r4430;
or.b32 %r4432, %r4431, %r4429;
or.b32 %r4433, %r4432, %r4428;
and.b32 %r6282, %r1898, %r4433;
setp.eq.s32 %p1109, %r6282, 0;
mov.u32 %r4420, 0;
mov.u32 %r6303, %r4420;
@%p1109 bra $L__BB0_924;
mov.u32 %r6281, 0;
mov.u32 %r6283, %r6281;
mov.u32 %r6286, %r6300;
$L__BB0_871:
brev.b32 %r4436, %r6282;
bfind.shiftamt.u32 %r1939, %r4436;
mov.pred %p1632, -1;
mov.u32 %r4437, 1;
shl.b32 %r1940, %r4437, %r1939;
mov.u32 %r4438, -2;
shf.l.wrap.b32 %r4439, %r4438, %r4438, %r1939;
and.b32 %r6282, %r6282, %r4439;
or.b32 %r6281, %r1940, %r6281;
and.b32 %r1943, %r1940, %r1931;
setp.ne.s32 %p1111, %r1943, 0;
selp.u32 %r4440, 1, 0, %p1111;
setp.eq.s32 %p1112, %r6302, 0;
selp.b32 %r4441, 8, 7, %p1112;
shl.b32 %r4442, %r4440, %r6298;
cvt.u16.u32 %rs537, %r4442;
or.b16 %rs747, %rs747, %rs537;
add.s32 %r6298, %r6298, 1;
setp.lt.u32 %p1113, %r6298, %r4441;
mov.pred %p1630, %p1632;
@%p1113 bra $L__BB0_874;
setp.eq.s32 %p1115, %r6286, -1;
mov.pred %p1630, 0;
mov.u32 %r6300, -1;
@%p1115 bra $L__BB0_874;
and.b16 %rs539, %rs747, 255;
setp.eq.s16 %p1117, %rs539, 255;
selp.u32 %r6302, 1, 0, %p1117;
add.s32 %r6300, %r6286, 1;
mov.u16 %rs747, 0;
mov.u32 %r6298, 0;
mov.pred %p1630, %p1632;
$L__BB0_874:
mov.u32 %r6307, 0;
not.pred %p1119, %p1630;
@%p1119 bra $L__BB0_928;
setp.eq.s32 %p1120, %r1943, 0;
@%p1120 bra $L__BB0_916;
or.b32 %r6283, %r1940, %r6283;
mov.u32 %r6290, 51;
setp.gt.s32 %p1121, %r1939, 7;
@%p1121 bra $L__BB0_892;
setp.gt.s32 %p1133, %r1939, 3;
@%p1133 bra $L__BB0_885;
setp.gt.s32 %p1139, %r1939, 1;
@%p1139 bra $L__BB0_882;
setp.eq.s32 %p1142, %r1939, 0;
@%p1142 bra $L__BB0_915;
setp.eq.s32 %p1143, %r1939, 1;
@%p1143 bra $L__BB0_881;
bra.uni $L__BB0_914;
$L__BB0_881:
mov.u32 %r6290, 118;
bra.uni $L__BB0_915;
$L__BB0_892:
setp.gt.s32 %p1122, %r1939, 11;
@%p1122 bra $L__BB0_900;
setp.gt.s32 %p1128, %r1939, 9;
@%p1128 bra $L__BB0_897;
setp.eq.s32 %p1131, %r1939, 8;
@%p1131 bra $L__BB0_910;
setp.eq.s32 %p1132, %r1939, 9;
@%p1132 bra $L__BB0_896;
bra.uni $L__BB0_914;
$L__BB0_896:
mov.u32 %r6290, 30208;
bra.uni $L__BB0_915;
$L__BB0_885:
setp.gt.s32 %p1134, %r1939, 5;
@%p1134 bra $L__BB0_889;
setp.eq.s32 %p1137, %r1939, 4;
@%p1137 bra $L__BB0_912;
setp.eq.s32 %p1138, %r1939, 5;
@%p1138 bra $L__BB0_888;
bra.uni $L__BB0_914;
$L__BB0_888:
mov.u32 %r6290, 1888;
bra.uni $L__BB0_915;
$L__BB0_900:
setp.gt.s32 %p1123, %r1939, 13;
@%p1123 bra $L__BB0_904;
setp.eq.s32 %p1126, %r1939, 12;
@%p1126 bra $L__BB0_908;
setp.eq.s32 %p1127, %r1939, 13;
@%p1127 bra $L__BB0_903;
bra.uni $L__BB0_914;
$L__BB0_903:
mov.u32 %r6290, 483328;
bra.uni $L__BB0_915;
$L__BB0_882:
setp.eq.s32 %p1140, %r1939, 2;
@%p1140 bra $L__BB0_913;
setp.eq.s32 %p1141, %r1939, 3;
@%p1141 bra $L__BB0_884;
bra.uni $L__BB0_914;
$L__BB0_884:
mov.u32 %r6290, 200;
bra.uni $L__BB0_915;
$L__BB0_897:
setp.eq.s32 %p1129, %r1939, 10;
@%p1129 bra $L__BB0_909;
setp.eq.s32 %p1130, %r1939, 11;
@%p1130 bra $L__BB0_899;
bra.uni $L__BB0_914;
$L__BB0_899:
mov.u32 %r6290, 51200;
bra.uni $L__BB0_915;
$L__BB0_889:
setp.eq.s32 %p1135, %r1939, 6;
@%p1135 bra $L__BB0_911;
setp.eq.s32 %p1136, %r1939, 7;
@%p1136 bra $L__BB0_891;
bra.uni $L__BB0_914;
$L__BB0_891:
mov.u32 %r6290, 3200;
bra.uni $L__BB0_915;
$L__BB0_904:
setp.eq.s32 %p1124, %r1939, 14;
@%p1124 bra $L__BB0_907;
setp.ne.s32 %p1125, %r1939, 15;
@%p1125 bra $L__BB0_914;
mov.u32 %r6290, 819200;
bra.uni $L__BB0_915;
$L__BB0_910:
mov.u32 %r6290, 13056;
bra.uni $L__BB0_915;
$L__BB0_912:
mov.u32 %r6290, 816;
bra.uni $L__BB0_915;
$L__BB0_908:
mov.u32 %r6290, 208896;
bra.uni $L__BB0_915;
$L__BB0_913:
mov.u32 %r6290, 236;
bra.uni $L__BB0_915;
$L__BB0_909:
mov.u32 %r6290, 60416;
bra.uni $L__BB0_915;
$L__BB0_911:
mov.u32 %r6290, 3776;
bra.uni $L__BB0_915;
$L__BB0_907:
mov.u32 %r6290, 966656;
bra.uni $L__BB0_915;
$L__BB0_914:
mov.u32 %r6290, 0;
$L__BB0_915:
not.b32 %r4463, %r6281;
and.b32 %r4464, %r1898, %r4463;
and.b32 %r4465, %r4464, %r6290;
or.b32 %r6282, %r4465, %r6282;
$L__BB0_916:
setp.ne.s32 %p1144, %r6282, 0;
mov.u32 %r6286, %r6300;
@%p1144 bra $L__BB0_871;
setp.eq.s32 %p1145, %r6283, 0;
mov.u32 %r6303, 0;
@%p1145 bra $L__BB0_924;
mov.u32 %r6297, %r6300;
mov.u32 %r6296, %r6283;
$L__BB0_919:
mov.u32 %r6300, %r6297;
setp.eq.s32 %p1146, %r6296, 0;
mov.u32 %r6303, %r6283;
@%p1146 bra $L__BB0_924;
brev.b32 %r4467, %r6296;
bfind.shiftamt.u32 %r4468, %r4467;
mov.pred %p1632, -1;
mov.u32 %r4469, -2;
shf.l.wrap.b32 %r4470, %r4469, %r4469, %r4468;
and.b32 %r6296, %r6296, %r4470;
shr.u32 %r4471, %r4468, 2;
and.b32 %r4472, %r4468, 3;
add.s32 %r4473, %r4472, %r6192;
add.s32 %r4474, %r4471, %r6196;
mad.lo.s32 %r4475, %r4473, %r1, %r4474;
mul.wide.u32 %rd424, %r4475, 4;
add.s64 %rd425, %rd2, %rd424;
ld.global.u32 %r4476, [%rd425];
shr.u32 %r4477, %r4476, 31;
setp.eq.s32 %p1148, %r6302, 0;
selp.b32 %r4478, 8, 7, %p1148;
shl.b32 %r4479, %r4477, %r6298;
cvt.u16.u32 %rs540, %r4479;
or.b16 %rs747, %rs747, %rs540;
add.s32 %r6298, %r6298, 1;
setp.lt.u32 %p1149, %r6298, %r4478;
mov.u32 %r6297, %r6300;
mov.pred %p1631, %p1632;
@%p1149 bra $L__BB0_923;
setp.eq.s32 %p1151, %r6300, -1;
mov.pred %p1631, 0;
mov.u32 %r6297, -1;
@%p1151 bra $L__BB0_923;
and.b16 %rs542, %rs747, 255;
setp.eq.s16 %p1153, %rs542, 255;
selp.u32 %r6302, 1, 0, %p1153;
add.s32 %r6297, %r6300, 1;
mov.u16 %rs747, 0;
mov.u32 %r6298, 0;
mov.pred %p1631, %p1632;
$L__BB0_923:
mov.u32 %r6307, 0;
@%p1631 bra $L__BB0_919;
bra.uni $L__BB0_928;
$L__BB0_924:
not.b32 %r4484, %r6303;
and.b32 %r4485, %r1931, %r4484;
setp.ne.s32 %p1156, %r4485, 0;
mov.u32 %r6307, %r4420;
mov.pred %p1632, %p794;
@%p1156 bra $L__BB0_928;
setp.lt.u32 %p1157, %r4108, %r5;
or.b32 %r4486, %r6303, %r1895;
st.local.u16 [%rd18], %r4486;
shr.u32 %r4487, %r4486, 16;
st.local.u16 [%rd18+2], %r4487;
shl.b32 %r4488, %r4486, 1;
and.b32 %r4489, %r4488, 57344;
and.b32 %r4490, %r4486, 57344;
shr.u32 %r4491, %r4490, 1;
or.b32 %r4492, %r4486, %r1896;
and.b32 %r4493, %r4492, 61440;
or.b32 %r4494, %r4493, %r4489;
or.b32 %r6197, %r4494, %r4491;
mov.u32 %r6196, %r4108;
@%p1157 bra $L__BB0_689;
$L__BB0_926:
add.s32 %r6192, %r6192, 4;
setp.gt.u32 %p1158, %r6, %r6192;
@%p1158 bra $L__BB0_687;
setp.eq.s32 %p1159, %r6298, 0;
add.s32 %r4495, %r6300, 1;
setp.eq.s32 %p1160, %r6300, -1;
selp.b32 %r4496, -1, %r4495, %p1160;
selp.b32 %r4497, %r6300, %r4496, %p1159;
setp.ne.s32 %p1161, %r6300, -1;
or.pred %p1162, %p1159, %p1161;
selp.b32 %r6307, %r4497, 0, %p1162;
not.pred %p1632, %p1162;
$L__BB0_928:
@%p1632 bra $L__BB0_930;
bra.uni $L__BB0_929;
$L__BB0_930:
mov.u32 %r4505, 2;
st.global.u32 [%rd3], %r4505;
mov.u32 %r4506, 6;
st.global.u32 [%rd3+4], %r4506;
mov.u32 %r4507, 0;
st.global.u32 [%rd3+8], %r4507;
st.global.u32 [%rd3+12], %r4507;
st.global.u32 [%rd3+16], %r4507;
st.global.u32 [%rd3+20], %r4507;
st.global.u32 [%rd3+24], %r4507;
st.global.u32 [%rd3+28], %r4507;
bra.uni $L__BB0_1254;
$L__BB0_931:
mov.u32 %r6308, 0;
mov.u32 %r6309, %r6308;
mov.u32 %r6310, %r6308;
bra.uni $L__BB0_932;
$L__BB0_929:
mad.lo.s32 %r4498, %r6, %r5, 7;
shr.u32 %r4499, %r4498, 3;
max.u32 %r6308, %r4499, %r6307;
add.s32 %r4500, %r5180, 6;
mul.wide.u32 %rd426, %r4500, 613566757;
shr.u64 %rd427, %rd426, 32;
cvt.u32.u64 %r4501, %rd427;
sub.s32 %r4502, %r4500, %r4501;
shr.u32 %r4503, %r4502, 1;
add.s32 %r4504, %r4503, %r4501;
shr.u32 %r6309, %r4504, 2;
add.s32 %r6310, %r6308, %r6309;
$L__BB0_932:
add.s32 %r1984, %r6310, %r1733;
setp.gt.u32 %p1163, %r1984, %r3;
setp.lt.u32 %p1164, %r1733, 2;
or.pred %p1165, %p1164, %p1163;
@%p1165 bra $L__BB0_1247;
bra.uni $L__BB0_933;
$L__BB0_1247:
mov.u32 %r5086, 1;
st.global.u32 [%rd3], %r5086;
mov.u32 %r5087, 4;
st.global.u32 [%rd3+4], %r5087;
mov.u32 %r5088, 0;
st.global.u32 [%rd3+8], %r5088;
st.global.u32 [%rd3+12], %r5088;
st.global.u32 [%rd3+16], %r5088;
st.global.u32 [%rd3+20], %r5088;
st.global.u32 [%rd3+24], %r5088;
st.global.u32 [%rd3+28], %r5088;
bra.uni $L__BB0_1254;
$L__BB0_933:
setp.eq.s32 %p1166, %r5271, 0;
@%p1166 bra $L__BB0_939;
add.s32 %r4512, %r5271, -1;
and.b32 %r6315, %r5271, 3;
setp.lt.u32 %p1167, %r4512, 3;
mov.u32 %r6313, 0;
@%p1167 bra $L__BB0_937;
sub.s32 %r6312, %r5271, %r6315;
mov.u32 %r6313, 0;
$L__BB0_936:
add.s32 %r4514, %r6313, 17477;
cvt.u64.u32 %rd429, %r4514;
add.s64 %rd430, %rd1, %rd429;
ld.global.u8 %rs543, [%rd430];
add.s32 %r4515, %r6313, %r5907;
cvt.u64.u32 %rd431, %r4515;
add.s64 %rd432, %rd1, %rd431;
st.global.u8 [%rd432], %rs543;
ld.global.u8 %rs544, [%rd430+1];
add.s32 %r4516, %r4515, 1;
cvt.u64.u32 %rd433, %r4516;
add.s64 %rd434, %rd1, %rd433;
st.global.u8 [%rd434], %rs544;
ld.global.u8 %rs545, [%rd430+2];
add.s32 %r4517, %r4515, 2;
cvt.u64.u32 %rd435, %r4517;
add.s64 %rd436, %rd1, %rd435;
st.global.u8 [%rd436], %rs545;
add.s32 %r4518, %r6313, 17480;
cvt.u64.u32 %rd437, %r4518;
add.s64 %rd438, %rd1, %rd437;
ld.global.u8 %rs546, [%rd438];
add.s32 %r4519, %r4515, 3;
cvt.u64.u32 %rd439, %r4519;
add.s64 %rd440, %rd1, %rd439;
st.global.u8 [%rd440], %rs546;
add.s32 %r6313, %r6313, 4;
add.s32 %r6312, %r6312, -4;
setp.ne.s32 %p1168, %r6312, 0;
@%p1168 bra $L__BB0_936;
$L__BB0_937:
setp.eq.s32 %p1169, %r6315, 0;
@%p1169 bra $L__BB0_939;
$L__BB0_938:
.pragma "nounroll";
add.s32 %r4520, %r6313, 17477;
cvt.u64.u32 %rd441, %r4520;
add.s64 %rd442, %rd1, %rd441;
ld.global.u8 %rs547, [%rd442];
add.s32 %r4521, %r6313, %r5907;
cvt.u64.u32 %rd443, %r4521;
add.s64 %rd444, %rd1, %rd443;
st.global.u8 [%rd444], %rs547;
add.s32 %r6313, %r6313, 1;
add.s32 %r6315, %r6315, -1;
setp.ne.s32 %p1170, %r6315, 0;
@%p1170 bra $L__BB0_938;
$L__BB0_939:
setp.eq.s32 %p1171, %r5719, 0;
@%p1171 bra $L__BB0_945;
mov.u32 %r4523, 20549;
sub.s32 %r1996, %r4523, %r5719;
and.b32 %r6320, %r5719, 3;
add.s32 %r4524, %r5719, -1;
setp.lt.u32 %p1172, %r4524, 3;
mov.u32 %r6318, 0;
@%p1172 bra $L__BB0_943;
sub.s32 %r6317, %r5719, %r6320;
mov.u32 %r6318, 0;
$L__BB0_942:
add.s32 %r4526, %r1996, %r6318;
cvt.u64.u32 %rd445, %r4526;
add.s64 %rd446, %rd1, %rd445;
ld.global.u8 %rs548, [%rd446];
add.s32 %r4527, %r6318, %r1732;
cvt.u64.u32 %rd447, %r4527;
add.s64 %rd448, %rd1, %rd447;
st.global.u8 [%rd448], %rs548;
add.s32 %r4528, %r6318, 1;
add.s32 %r4529, %r1996, %r4528;
cvt.u64.u32 %rd449, %r4529;
add.s64 %rd450, %rd1, %rd449;
ld.global.u8 %rs549, [%rd450];
add.s32 %r4530, %r4528, %r1732;
cvt.u64.u32 %rd451, %r4530;
add.s64 %rd452, %rd1, %rd451;
st.global.u8 [%rd452], %rs549;
add.s32 %r4531, %r6318, 2;
add.s32 %r4532, %r1996, %r4531;
cvt.u64.u32 %rd453, %r4532;
add.s64 %rd454, %rd1, %rd453;
ld.global.u8 %rs550, [%rd454];
add.s32 %r4533, %r4531, %r1732;
cvt.u64.u32 %rd455, %r4533;
add.s64 %rd456, %rd1, %rd455;
st.global.u8 [%rd456], %rs550;
add.s32 %r4534, %r6318, 3;
add.s32 %r4535, %r1996, %r4534;
cvt.u64.u32 %rd457, %r4535;
add.s64 %rd458, %rd1, %rd457;
ld.global.u8 %rs551, [%rd458];
add.s32 %r4536, %r4534, %r1732;
cvt.u64.u32 %rd459, %r4536;
add.s64 %rd460, %rd1, %rd459;
st.global.u8 [%rd460], %rs551;
add.s32 %r6318, %r6318, 4;
add.s32 %r6317, %r6317, -4;
setp.ne.s32 %p1173, %r6317, 0;
@%p1173 bra $L__BB0_942;
$L__BB0_943:
setp.eq.s32 %p1174, %r6320, 0;
@%p1174 bra $L__BB0_945;
$L__BB0_944:
.pragma "nounroll";
add.s32 %r4537, %r1996, %r6318;
cvt.u64.u32 %rd461, %r4537;
add.s64 %rd462, %rd1, %rd461;
ld.global.u8 %rs552, [%rd462];
add.s32 %r4538, %r6318, %r1732;
cvt.u64.u32 %rd463, %r4538;
add.s64 %rd464, %rd1, %rd463;
st.global.u8 [%rd464], %rs552;
add.s32 %r6318, %r6318, 1;
add.s32 %r6320, %r6320, -1;
setp.ne.s32 %p1175, %r6320, 0;
@%p1175 bra $L__BB0_944;
$L__BB0_945:
add.s32 %r4539, %r5719, %r5271;
shr.u32 %r4540, %r4539, 4;
add.s32 %r4541, %r1733, -1;
cvt.u64.u32 %rd465, %r4541;
add.s64 %rd466, %rd1, %rd465;
st.global.u8 [%rd466], %r4540;
add.s32 %r4542, %r1733, -2;
cvt.u64.u32 %rd467, %r4542;
add.s64 %rd468, %rd1, %rd467;
ld.global.u8 %rs553, [%rd468];
and.b16 %rs554, %rs553, 240;
cvt.u16.u32 %rs555, %r4539;
and.b16 %rs556, %rs555, 15;
or.b16 %rs557, %rs554, %rs556;
st.global.u8 [%rd468], %rs557;
setp.eq.s32 %p1176, %r6310, 0;
@%p1176 bra $L__BB0_951;
add.s32 %r4544, %r6310, -1;
and.b32 %r6325, %r6310, 3;
setp.lt.u32 %p1177, %r4544, 3;
mov.u32 %r6323, 0;
@%p1177 bra $L__BB0_949;
sub.s32 %r6322, %r6310, %r6325;
mov.u32 %r6323, 0;
$L__BB0_948:
add.s32 %r4546, %r6323, %r1733;
cvt.u64.u32 %rd469, %r4546;
add.s64 %rd470, %rd1, %rd469;
mov.u16 %rs558, 0;
st.global.u8 [%rd470], %rs558;
add.s32 %r4547, %r4546, 1;
cvt.u64.u32 %rd471, %r4547;
add.s64 %rd472, %rd1, %rd471;
st.global.u8 [%rd472], %rs558;
add.s32 %r4548, %r4546, 2;
cvt.u64.u32 %rd473, %r4548;
add.s64 %rd474, %rd1, %rd473;
st.global.u8 [%rd474], %rs558;
add.s32 %r4549, %r4546, 3;
cvt.u64.u32 %rd475, %r4549;
add.s64 %rd476, %rd1, %rd475;
st.global.u8 [%rd476], %rs558;
add.s32 %r6323, %r6323, 4;
add.s32 %r6322, %r6322, -4;
setp.ne.s32 %p1178, %r6322, 0;
@%p1178 bra $L__BB0_948;
$L__BB0_949:
setp.eq.s32 %p1179, %r6325, 0;
@%p1179 bra $L__BB0_951;
$L__BB0_950:
.pragma "nounroll";
add.s32 %r4550, %r6323, %r1733;
cvt.u64.u32 %rd477, %r4550;
add.s64 %rd478, %rd1, %rd477;
mov.u16 %rs559, 0;
st.global.u8 [%rd478], %rs559;
add.s32 %r6323, %r6323, 1;
add.s32 %r6325, %r6325, -1;
setp.ne.s32 %p1180, %r6325, 0;
@%p1180 bra $L__BB0_950;
$L__BB0_951:
setp.ne.s32 %p1181, %r4, 3;
@%p1181 bra $L__BB0_1243;
ld.param.u64 %rd637, [ j2k_htj2k_encode_codeblock_param_1];
cvt.u64.u32 %rd35, %r1733;
add.s64 %rd36, %rd637, %rd35;
add.s32 %r4551, %r5, 3;
shr.u32 %r4552, %r4551, 2;
add.s32 %r4553, %r4552, 8;
setp.gt.u32 %p1183, %r4553, 513;
mov.pred %p1182, -1;
mov.pred %p1635, %p1182;
@%p1183 bra $L__BB0_1202;
mov.u16 %rs755, 0;
st.local.u16 [%rd16], %rs755;
st.local.u16 [%rd16+2], %rs755;
st.local.u16 [%rd16+4], %rs755;
st.local.u16 [%rd16+6], %rs755;
st.local.u16 [%rd16+8], %rs755;
st.local.u16 [%rd16+10], %rs755;
st.local.u16 [%rd16+12], %rs755;
st.local.u16 [%rd16+14], %rs755;
st.local.u16 [%rd16+16], %rs755;
st.local.u16 [%rd16+18], %rs755;
st.local.u16 [%rd16+20], %rs755;
st.local.u16 [%rd16+22], %rs755;
st.local.u16 [%rd16+24], %rs755;
st.local.u16 [%rd16+26], %rs755;
st.local.u16 [%rd16+28], %rs755;
st.local.u16 [%rd16+30], %rs755;
st.local.u16 [%rd16+32], %rs755;
st.local.u16 [%rd16+34], %rs755;
st.local.u16 [%rd16+36], %rs755;
st.local.u16 [%rd16+38], %rs755;
st.local.u16 [%rd16+40], %rs755;
st.local.u16 [%rd16+42], %rs755;
st.local.u16 [%rd16+44], %rs755;
st.local.u16 [%rd16+46], %rs755;
st.local.u16 [%rd16+48], %rs755;
st.local.u16 [%rd16+50], %rs755;
st.local.u16 [%rd16+52], %rs755;
st.local.u16 [%rd16+54], %rs755;
st.local.u16 [%rd16+56], %rs755;
st.local.u16 [%rd16+58], %rs755;
st.local.u16 [%rd16+60], %rs755;
st.local.u16 [%rd16+62], %rs755;
st.local.u16 [%rd16+64], %rs755;
st.local.u16 [%rd16+66], %rs755;
st.local.u16 [%rd16+68], %rs755;
st.local.u16 [%rd16+70], %rs755;
st.local.u16 [%rd16+72], %rs755;
st.local.u16 [%rd16+74], %rs755;
st.local.u16 [%rd16+76], %rs755;
st.local.u16 [%rd16+78], %rs755;
st.local.u16 [%rd16+80], %rs755;
st.local.u16 [%rd16+82], %rs755;
st.local.u16 [%rd16+84], %rs755;
st.local.u16 [%rd16+86], %rs755;
st.local.u16 [%rd16+88], %rs755;
st.local.u16 [%rd16+90], %rs755;
st.local.u16 [%rd16+92], %rs755;
st.local.u16 [%rd16+94], %rs755;
st.local.u16 [%rd16+96], %rs755;
st.local.u16 [%rd16+98], %rs755;
st.local.u16 [%rd16+100], %rs755;
st.local.u16 [%rd16+102], %rs755;
st.local.u16 [%rd16+104], %rs755;
st.local.u16 [%rd16+106], %rs755;
st.local.u16 [%rd16+108], %rs755;
st.local.u16 [%rd16+110], %rs755;
st.local.u16 [%rd16+112], %rs755;
st.local.u16 [%rd16+114], %rs755;
st.local.u16 [%rd16+116], %rs755;
st.local.u16 [%rd16+118], %rs755;
st.local.u16 [%rd16+120], %rs755;
st.local.u16 [%rd16+122], %rs755;
st.local.u16 [%rd16+124], %rs755;
st.local.u16 [%rd16+126], %rs755;
st.local.u16 [%rd16+128], %rs755;
st.local.u16 [%rd16+130], %rs755;
st.local.u16 [%rd16+132], %rs755;
st.local.u16 [%rd16+134], %rs755;
st.local.u16 [%rd16+136], %rs755;
st.local.u16 [%rd16+138], %rs755;
st.local.u16 [%rd16+140], %rs755;
st.local.u16 [%rd16+142], %rs755;
st.local.u16 [%rd16+144], %rs755;
st.local.u16 [%rd16+146], %rs755;
st.local.u16 [%rd16+148], %rs755;
st.local.u16 [%rd16+150], %rs755;
st.local.u16 [%rd16+152], %rs755;
st.local.u16 [%rd16+154], %rs755;
st.local.u16 [%rd16+156], %rs755;
st.local.u16 [%rd16+158], %rs755;
st.local.u16 [%rd16+160], %rs755;
st.local.u16 [%rd16+162], %rs755;
st.local.u16 [%rd16+164], %rs755;
st.local.u16 [%rd16+166], %rs755;
st.local.u16 [%rd16+168], %rs755;
st.local.u16 [%rd16+170], %rs755;
st.local.u16 [%rd16+172], %rs755;
st.local.u16 [%rd16+174], %rs755;
st.local.u16 [%rd16+176], %rs755;
st.local.u16 [%rd16+178], %rs755;
st.local.u16 [%rd16+180], %rs755;
st.local.u16 [%rd16+182], %rs755;
st.local.u16 [%rd16+184], %rs755;
st.local.u16 [%rd16+186], %rs755;
st.local.u16 [%rd16+188], %rs755;
st.local.u16 [%rd16+190], %rs755;
st.local.u16 [%rd16+192], %rs755;
st.local.u16 [%rd16+194], %rs755;
st.local.u16 [%rd16+196], %rs755;
st.local.u16 [%rd16+198], %rs755;
st.local.u16 [%rd16+200], %rs755;
st.local.u16 [%rd16+202], %rs755;
st.local.u16 [%rd16+204], %rs755;
st.local.u16 [%rd16+206], %rs755;
st.local.u16 [%rd16+208], %rs755;
st.local.u16 [%rd16+210], %rs755;
st.local.u16 [%rd16+212], %rs755;
st.local.u16 [%rd16+214], %rs755;
st.local.u16 [%rd16+216], %rs755;
st.local.u16 [%rd16+218], %rs755;
st.local.u16 [%rd16+220], %rs755;
st.local.u16 [%rd16+222], %rs755;
st.local.u16 [%rd16+224], %rs755;
st.local.u16 [%rd16+226], %rs755;
st.local.u16 [%rd16+228], %rs755;
st.local.u16 [%rd16+230], %rs755;
st.local.u16 [%rd16+232], %rs755;
st.local.u16 [%rd16+234], %rs755;
st.local.u16 [%rd16+236], %rs755;
st.local.u16 [%rd16+238], %rs755;
st.local.u16 [%rd16+240], %rs755;
st.local.u16 [%rd16+242], %rs755;
st.local.u16 [%rd16+244], %rs755;
st.local.u16 [%rd16+246], %rs755;
st.local.u16 [%rd16+248], %rs755;
st.local.u16 [%rd16+250], %rs755;
st.local.u16 [%rd16+252], %rs755;
st.local.u16 [%rd16+254], %rs755;
st.local.u16 [%rd16+256], %rs755;
st.local.u16 [%rd16+258], %rs755;
st.local.u16 [%rd16+260], %rs755;
st.local.u16 [%rd16+262], %rs755;
st.local.u16 [%rd16+264], %rs755;
st.local.u16 [%rd16+266], %rs755;
st.local.u16 [%rd16+268], %rs755;
st.local.u16 [%rd16+270], %rs755;
st.local.u16 [%rd16+272], %rs755;
st.local.u16 [%rd16+274], %rs755;
st.local.u16 [%rd16+276], %rs755;
st.local.u16 [%rd16+278], %rs755;
st.local.u16 [%rd16+280], %rs755;
st.local.u16 [%rd16+282], %rs755;
st.local.u16 [%rd16+284], %rs755;
st.local.u16 [%rd16+286], %rs755;
st.local.u16 [%rd16+288], %rs755;
st.local.u16 [%rd16+290], %rs755;
st.local.u16 [%rd16+292], %rs755;
st.local.u16 [%rd16+294], %rs755;
st.local.u16 [%rd16+296], %rs755;
st.local.u16 [%rd16+298], %rs755;
st.local.u16 [%rd16+300], %rs755;
st.local.u16 [%rd16+302], %rs755;
st.local.u16 [%rd16+304], %rs755;
st.local.u16 [%rd16+306], %rs755;
st.local.u16 [%rd16+308], %rs755;
st.local.u16 [%rd16+310], %rs755;
st.local.u16 [%rd16+312], %rs755;
st.local.u16 [%rd16+314], %rs755;
st.local.u16 [%rd16+316], %rs755;
st.local.u16 [%rd16+318], %rs755;
st.local.u16 [%rd16+320], %rs755;
st.local.u16 [%rd16+322], %rs755;
st.local.u16 [%rd16+324], %rs755;
st.local.u16 [%rd16+326], %rs755;
st.local.u16 [%rd16+328], %rs755;
st.local.u16 [%rd16+330], %rs755;
st.local.u16 [%rd16+332], %rs755;
st.local.u16 [%rd16+334], %rs755;
st.local.u16 [%rd16+336], %rs755;
st.local.u16 [%rd16+338], %rs755;
st.local.u16 [%rd16+340], %rs755;
st.local.u16 [%rd16+342], %rs755;
st.local.u16 [%rd16+344], %rs755;
st.local.u16 [%rd16+346], %rs755;
st.local.u16 [%rd16+348], %rs755;
st.local.u16 [%rd16+350], %rs755;
st.local.u16 [%rd16+352], %rs755;
st.local.u16 [%rd16+354], %rs755;
st.local.u16 [%rd16+356], %rs755;
st.local.u16 [%rd16+358], %rs755;
st.local.u16 [%rd16+360], %rs755;
st.local.u16 [%rd16+362], %rs755;
st.local.u16 [%rd16+364], %rs755;
st.local.u16 [%rd16+366], %rs755;
st.local.u16 [%rd16+368], %rs755;
st.local.u16 [%rd16+370], %rs755;
st.local.u16 [%rd16+372], %rs755;
st.local.u16 [%rd16+374], %rs755;
st.local.u16 [%rd16+376], %rs755;
st.local.u16 [%rd16+378], %rs755;
st.local.u16 [%rd16+380], %rs755;
st.local.u16 [%rd16+382], %rs755;
st.local.u16 [%rd16+384], %rs755;
st.local.u16 [%rd16+386], %rs755;
st.local.u16 [%rd16+388], %rs755;
st.local.u16 [%rd16+390], %rs755;
st.local.u16 [%rd16+392], %rs755;
st.local.u16 [%rd16+394], %rs755;
st.local.u16 [%rd16+396], %rs755;
st.local.u16 [%rd16+398], %rs755;
st.local.u16 [%rd16+400], %rs755;
st.local.u16 [%rd16+402], %rs755;
st.local.u16 [%rd16+404], %rs755;
st.local.u16 [%rd16+406], %rs755;
st.local.u16 [%rd16+408], %rs755;
st.local.u16 [%rd16+410], %rs755;
st.local.u16 [%rd16+412], %rs755;
st.local.u16 [%rd16+414], %rs755;
st.local.u16 [%rd16+416], %rs755;
st.local.u16 [%rd16+418], %rs755;
st.local.u16 [%rd16+420], %rs755;
st.local.u16 [%rd16+422], %rs755;
st.local.u16 [%rd16+424], %rs755;
st.local.u16 [%rd16+426], %rs755;
st.local.u16 [%rd16+428], %rs755;
st.local.u16 [%rd16+430], %rs755;
st.local.u16 [%rd16+432], %rs755;
st.local.u16 [%rd16+434], %rs755;
st.local.u16 [%rd16+436], %rs755;
st.local.u16 [%rd16+438], %rs755;
st.local.u16 [%rd16+440], %rs755;
st.local.u16 [%rd16+442], %rs755;
st.local.u16 [%rd16+444], %rs755;
st.local.u16 [%rd16+446], %rs755;
st.local.u16 [%rd16+448], %rs755;
st.local.u16 [%rd16+450], %rs755;
st.local.u16 [%rd16+452], %rs755;
st.local.u16 [%rd16+454], %rs755;
st.local.u16 [%rd16+456], %rs755;
st.local.u16 [%rd16+458], %rs755;
st.local.u16 [%rd16+460], %rs755;
st.local.u16 [%rd16+462], %rs755;
st.local.u16 [%rd16+464], %rs755;
st.local.u16 [%rd16+466], %rs755;
st.local.u16 [%rd16+468], %rs755;
st.local.u16 [%rd16+470], %rs755;
st.local.u16 [%rd16+472], %rs755;
st.local.u16 [%rd16+474], %rs755;
st.local.u16 [%rd16+476], %rs755;
st.local.u16 [%rd16+478], %rs755;
st.local.u16 [%rd16+480], %rs755;
st.local.u16 [%rd16+482], %rs755;
st.local.u16 [%rd16+484], %rs755;
st.local.u16 [%rd16+486], %rs755;
st.local.u16 [%rd16+488], %rs755;
st.local.u16 [%rd16+490], %rs755;
st.local.u16 [%rd16+492], %rs755;
st.local.u16 [%rd16+494], %rs755;
st.local.u16 [%rd16+496], %rs755;
st.local.u16 [%rd16+498], %rs755;
st.local.u16 [%rd16+500], %rs755;
st.local.u16 [%rd16+502], %rs755;
st.local.u16 [%rd16+504], %rs755;
st.local.u16 [%rd16+506], %rs755;
st.local.u16 [%rd16+508], %rs755;
st.local.u16 [%rd16+510], %rs755;
st.local.u16 [%rd16+512], %rs755;
st.local.u16 [%rd16+514], %rs755;
st.local.u16 [%rd16+516], %rs755;
st.local.u16 [%rd16+518], %rs755;
st.local.u16 [%rd16+520], %rs755;
st.local.u16 [%rd16+522], %rs755;
st.local.u16 [%rd16+524], %rs755;
st.local.u16 [%rd16+526], %rs755;
st.local.u16 [%rd16+528], %rs755;
st.local.u16 [%rd16+530], %rs755;
st.local.u16 [%rd16+532], %rs755;
st.local.u16 [%rd16+534], %rs755;
st.local.u16 [%rd16+536], %rs755;
st.local.u16 [%rd16+538], %rs755;
st.local.u16 [%rd16+540], %rs755;
st.local.u16 [%rd16+542], %rs755;
st.local.u16 [%rd16+544], %rs755;
st.local.u16 [%rd16+546], %rs755;
st.local.u16 [%rd16+548], %rs755;
st.local.u16 [%rd16+550], %rs755;
st.local.u16 [%rd16+552], %rs755;
st.local.u16 [%rd16+554], %rs755;
st.local.u16 [%rd16+556], %rs755;
st.local.u16 [%rd16+558], %rs755;
st.local.u16 [%rd16+560], %rs755;
st.local.u16 [%rd16+562], %rs755;
st.local.u16 [%rd16+564], %rs755;
st.local.u16 [%rd16+566], %rs755;
st.local.u16 [%rd16+568], %rs755;
st.local.u16 [%rd16+570], %rs755;
st.local.u16 [%rd16+572], %rs755;
st.local.u16 [%rd16+574], %rs755;
st.local.u16 [%rd16+576], %rs755;
st.local.u16 [%rd16+578], %rs755;
st.local.u16 [%rd16+580], %rs755;
st.local.u16 [%rd16+582], %rs755;
st.local.u16 [%rd16+584], %rs755;
st.local.u16 [%rd16+586], %rs755;
st.local.u16 [%rd16+588], %rs755;
st.local.u16 [%rd16+590], %rs755;
st.local.u16 [%rd16+592], %rs755;
st.local.u16 [%rd16+594], %rs755;
st.local.u16 [%rd16+596], %rs755;
st.local.u16 [%rd16+598], %rs755;
st.local.u16 [%rd16+600], %rs755;
st.local.u16 [%rd16+602], %rs755;
st.local.u16 [%rd16+604], %rs755;
st.local.u16 [%rd16+606], %rs755;
st.local.u16 [%rd16+608], %rs755;
st.local.u16 [%rd16+610], %rs755;
st.local.u16 [%rd16+612], %rs755;
st.local.u16 [%rd16+614], %rs755;
st.local.u16 [%rd16+616], %rs755;
st.local.u16 [%rd16+618], %rs755;
st.local.u16 [%rd16+620], %rs755;
st.local.u16 [%rd16+622], %rs755;
st.local.u16 [%rd16+624], %rs755;
st.local.u16 [%rd16+626], %rs755;
st.local.u16 [%rd16+628], %rs755;
st.local.u16 [%rd16+630], %rs755;
st.local.u16 [%rd16+632], %rs755;
st.local.u16 [%rd16+634], %rs755;
st.local.u16 [%rd16+636], %rs755;
st.local.u16 [%rd16+638], %rs755;
st.local.u16 [%rd16+640], %rs755;
st.local.u16 [%rd16+642], %rs755;
st.local.u16 [%rd16+644], %rs755;
st.local.u16 [%rd16+646], %rs755;
st.local.u16 [%rd16+648], %rs755;
st.local.u16 [%rd16+650], %rs755;
st.local.u16 [%rd16+652], %rs755;
st.local.u16 [%rd16+654], %rs755;
st.local.u16 [%rd16+656], %rs755;
st.local.u16 [%rd16+658], %rs755;
st.local.u16 [%rd16+660], %rs755;
st.local.u16 [%rd16+662], %rs755;
st.local.u16 [%rd16+664], %rs755;
st.local.u16 [%rd16+666], %rs755;
st.local.u16 [%rd16+668], %rs755;
st.local.u16 [%rd16+670], %rs755;
st.local.u16 [%rd16+672], %rs755;
st.local.u16 [%rd16+674], %rs755;
st.local.u16 [%rd16+676], %rs755;
st.local.u16 [%rd16+678], %rs755;
st.local.u16 [%rd16+680], %rs755;
st.local.u16 [%rd16+682], %rs755;
st.local.u16 [%rd16+684], %rs755;
st.local.u16 [%rd16+686], %rs755;
st.local.u16 [%rd16+688], %rs755;
st.local.u16 [%rd16+690], %rs755;
st.local.u16 [%rd16+692], %rs755;
st.local.u16 [%rd16+694], %rs755;
st.local.u16 [%rd16+696], %rs755;
st.local.u16 [%rd16+698], %rs755;
st.local.u16 [%rd16+700], %rs755;
st.local.u16 [%rd16+702], %rs755;
st.local.u16 [%rd16+704], %rs755;
st.local.u16 [%rd16+706], %rs755;
st.local.u16 [%rd16+708], %rs755;
st.local.u16 [%rd16+710], %rs755;
st.local.u16 [%rd16+712], %rs755;
st.local.u16 [%rd16+714], %rs755;
st.local.u16 [%rd16+716], %rs755;
st.local.u16 [%rd16+718], %rs755;
st.local.u16 [%rd16+720], %rs755;
st.local.u16 [%rd16+722], %rs755;
st.local.u16 [%rd16+724], %rs755;
st.local.u16 [%rd16+726], %rs755;
st.local.u16 [%rd16+728], %rs755;
st.local.u16 [%rd16+730], %rs755;
st.local.u16 [%rd16+732], %rs755;
st.local.u16 [%rd16+734], %rs755;
st.local.u16 [%rd16+736], %rs755;
st.local.u16 [%rd16+738], %rs755;
st.local.u16 [%rd16+740], %rs755;
st.local.u16 [%rd16+742], %rs755;
st.local.u16 [%rd16+744], %rs755;
st.local.u16 [%rd16+746], %rs755;
st.local.u16 [%rd16+748], %rs755;
st.local.u16 [%rd16+750], %rs755;
st.local.u16 [%rd16+752], %rs755;
st.local.u16 [%rd16+754], %rs755;
st.local.u16 [%rd16+756], %rs755;
st.local.u16 [%rd16+758], %rs755;
st.local.u16 [%rd16+760], %rs755;
st.local.u16 [%rd16+762], %rs755;
st.local.u16 [%rd16+764], %rs755;
st.local.u16 [%rd16+766], %rs755;
st.local.u16 [%rd16+768], %rs755;
st.local.u16 [%rd16+770], %rs755;
st.local.u16 [%rd16+772], %rs755;
st.local.u16 [%rd16+774], %rs755;
st.local.u16 [%rd16+776], %rs755;
st.local.u16 [%rd16+778], %rs755;
st.local.u16 [%rd16+780], %rs755;
st.local.u16 [%rd16+782], %rs755;
st.local.u16 [%rd16+784], %rs755;
st.local.u16 [%rd16+786], %rs755;
st.local.u16 [%rd16+788], %rs755;
st.local.u16 [%rd16+790], %rs755;
st.local.u16 [%rd16+792], %rs755;
st.local.u16 [%rd16+794], %rs755;
st.local.u16 [%rd16+796], %rs755;
st.local.u16 [%rd16+798], %rs755;
st.local.u16 [%rd16+800], %rs755;
st.local.u16 [%rd16+802], %rs755;
st.local.u16 [%rd16+804], %rs755;
st.local.u16 [%rd16+806], %rs755;
st.local.u16 [%rd16+808], %rs755;
st.local.u16 [%rd16+810], %rs755;
st.local.u16 [%rd16+812], %rs755;
st.local.u16 [%rd16+814], %rs755;
st.local.u16 [%rd16+816], %rs755;
st.local.u16 [%rd16+818], %rs755;
st.local.u16 [%rd16+820], %rs755;
st.local.u16 [%rd16+822], %rs755;
st.local.u16 [%rd16+824], %rs755;
st.local.u16 [%rd16+826], %rs755;
st.local.u16 [%rd16+828], %rs755;
st.local.u16 [%rd16+830], %rs755;
st.local.u16 [%rd16+832], %rs755;
st.local.u16 [%rd16+834], %rs755;
st.local.u16 [%rd16+836], %rs755;
st.local.u16 [%rd16+838], %rs755;
st.local.u16 [%rd16+840], %rs755;
st.local.u16 [%rd16+842], %rs755;
st.local.u16 [%rd16+844], %rs755;
st.local.u16 [%rd16+846], %rs755;
st.local.u16 [%rd16+848], %rs755;
st.local.u16 [%rd16+850], %rs755;
st.local.u16 [%rd16+852], %rs755;
st.local.u16 [%rd16+854], %rs755;
st.local.u16 [%rd16+856], %rs755;
st.local.u16 [%rd16+858], %rs755;
st.local.u16 [%rd16+860], %rs755;
st.local.u16 [%rd16+862], %rs755;
st.local.u16 [%rd16+864], %rs755;
st.local.u16 [%rd16+866], %rs755;
st.local.u16 [%rd16+868], %rs755;
st.local.u16 [%rd16+870], %rs755;
st.local.u16 [%rd16+872], %rs755;
st.local.u16 [%rd16+874], %rs755;
st.local.u16 [%rd16+876], %rs755;
st.local.u16 [%rd16+878], %rs755;
st.local.u16 [%rd16+880], %rs755;
st.local.u16 [%rd16+882], %rs755;
st.local.u16 [%rd16+884], %rs755;
st.local.u16 [%rd16+886], %rs755;
st.local.u16 [%rd16+888], %rs755;
st.local.u16 [%rd16+890], %rs755;
st.local.u16 [%rd16+892], %rs755;
st.local.u16 [%rd16+894], %rs755;
st.local.u16 [%rd16+896], %rs755;
st.local.u16 [%rd16+898], %rs755;
st.local.u16 [%rd16+900], %rs755;
st.local.u16 [%rd16+902], %rs755;
st.local.u16 [%rd16+904], %rs755;
st.local.u16 [%rd16+906], %rs755;
st.local.u16 [%rd16+908], %rs755;
st.local.u16 [%rd16+910], %rs755;
st.local.u16 [%rd16+912], %rs755;
st.local.u16 [%rd16+914], %rs755;
st.local.u16 [%rd16+916], %rs755;
st.local.u16 [%rd16+918], %rs755;
st.local.u16 [%rd16+920], %rs755;
st.local.u16 [%rd16+922], %rs755;
st.local.u16 [%rd16+924], %rs755;
st.local.u16 [%rd16+926], %rs755;
st.local.u16 [%rd16+928], %rs755;
st.local.u16 [%rd16+930], %rs755;
st.local.u16 [%rd16+932], %rs755;
st.local.u16 [%rd16+934], %rs755;
st.local.u16 [%rd16+936], %rs755;
st.local.u16 [%rd16+938], %rs755;
st.local.u16 [%rd16+940], %rs755;
st.local.u16 [%rd16+942], %rs755;
st.local.u16 [%rd16+944], %rs755;
st.local.u16 [%rd16+946], %rs755;
st.local.u16 [%rd16+948], %rs755;
st.local.u16 [%rd16+950], %rs755;
st.local.u16 [%rd16+952], %rs755;
st.local.u16 [%rd16+954], %rs755;
st.local.u16 [%rd16+956], %rs755;
st.local.u16 [%rd16+958], %rs755;
st.local.u16 [%rd16+960], %rs755;
st.local.u16 [%rd16+962], %rs755;
st.local.u16 [%rd16+964], %rs755;
st.local.u16 [%rd16+966], %rs755;
st.local.u16 [%rd16+968], %rs755;
st.local.u16 [%rd16+970], %rs755;
st.local.u16 [%rd16+972], %rs755;
st.local.u16 [%rd16+974], %rs755;
st.local.u16 [%rd16+976], %rs755;
st.local.u16 [%rd16+978], %rs755;
st.local.u16 [%rd16+980], %rs755;
st.local.u16 [%rd16+982], %rs755;
st.local.u16 [%rd16+984], %rs755;
st.local.u16 [%rd16+986], %rs755;
st.local.u16 [%rd16+988], %rs755;
st.local.u16 [%rd16+990], %rs755;
st.local.u16 [%rd16+992], %rs755;
st.local.u16 [%rd16+994], %rs755;
st.local.u16 [%rd16+996], %rs755;
st.local.u16 [%rd16+998], %rs755;
st.local.u16 [%rd16+1000], %rs755;
st.local.u16 [%rd16+1002], %rs755;
st.local.u16 [%rd16+1004], %rs755;
st.local.u16 [%rd16+1006], %rs755;
st.local.u16 [%rd16+1008], %rs755;
st.local.u16 [%rd16+1010], %rs755;
st.local.u16 [%rd16+1012], %rs755;
st.local.u16 [%rd16+1014], %rs755;
st.local.u16 [%rd16+1016], %rs755;
st.local.u16 [%rd16+1018], %rs755;
st.local.u16 [%rd16+1020], %rs755;
st.local.u16 [%rd16+1022], %rs755;
st.local.u16 [%rd16+1024], %rs755;
mov.u32 %r6326, 0;
mov.u32 %r6436, %r6326;
mov.u32 %r6432, %r6326;
mov.u32 %r6434, %r6326;
$L__BB0_954:
@%p9 bra $L__BB0_1197;
sub.s32 %r4560, %r6, %r6326;
add.s32 %r2023, %r6326, 4;
mul.lo.s32 %r2024, %r2023, %r1;
add.s32 %r2025, %r6326, 5;
add.s32 %r2026, %r2024, %r1;
add.s32 %r2027, %r6326, 6;
shl.b32 %r4561, %r1, 1;
add.s32 %r2028, %r2024, %r4561;
add.s32 %r2029, %r6326, 7;
mul.lo.s32 %r4562, %r1, 3;
add.s32 %r2030, %r2024, %r4562;
add.s32 %r2031, %r6326, 1;
add.s32 %r2032, %r6326, 2;
add.s32 %r2033, %r6326, 3;
mul.lo.s32 %r2034, %r6326, %r1;
add.s32 %r2035, %r2034, %r4562;
sub.s32 %r2036, %r2035, %r1;
sub.s32 %r2037, %r2036, %r1;
setp.lt.u32 %p1185, %r4560, 2;
selp.b32 %r4563, 4369, 13107, %p1185;
setp.lt.u32 %p1186, %r4560, 3;
selp.b32 %r4564, %r4563, 30583, %p1186;
setp.lt.u32 %p1187, %r4560, 4;
selp.b32 %r2038, %r4564, 65535, %p1187;
mov.u32 %r4559, 0;
mov.u32 %r6330, %r4559;
mov.u32 %r6331, %r4559;
$L__BB0_956:
shr.u32 %r4566, %r6330, 2;
mul.wide.u32 %rd479, %r4566, 2;
add.s64 %rd37, %rd16, %rd479;
ld.local.u16 %rs249, [%rd37];
ld.local.u16 %rs250, [%rd37+2];
setp.ge.u32 %p1188, %r6330, %r5;
mov.u32 %r6342, %r4559;
@%p1188 bra $L__BB0_965;
setp.ge.u32 %p1189, %r2023, %r6;
mov.u32 %r6342, 0;
@%p1189 bra $L__BB0_959;
add.s32 %r4568, %r2024, %r6330;
mul.wide.u32 %rd480, %r4568, 4;
add.s64 %rd481, %rd2, %rd480;
ld.global.u32 %r4569, [%rd481];
abs.s32 %r4570, %r4569;
setp.gt.u32 %p1190, %r4570, 4;
and.b32 %r4571, %r4570, 1;
setp.eq.b32 %p1191, %r4571, 1;
and.pred %p1192, %p1190, %p1191;
selp.u32 %r6342, 1, 0, %p1192;
$L__BB0_959:
setp.ge.u32 %p1193, %r2025, %r6;
@%p1193 bra $L__BB0_961;
add.s32 %r4572, %r2026, %r6330;
mul.wide.u32 %rd482, %r4572, 4;
add.s64 %rd483, %rd2, %rd482;
ld.global.u32 %r4573, [%rd483];
abs.s32 %r4574, %r4573;
setp.gt.u32 %p1194, %r4574, 4;
and.b32 %r4575, %r4574, 1;
setp.eq.b32 %p1195, %r4575, 1;
and.pred %p1196, %p1194, %p1195;
selp.b32 %r4576, 2, 0, %p1196;
or.b32 %r6342, %r4576, %r6342;
$L__BB0_961:
setp.ge.u32 %p1197, %r2027, %r6;
@%p1197 bra $L__BB0_963;
add.s32 %r4577, %r2028, %r6330;
mul.wide.u32 %rd484, %r4577, 4;
add.s64 %rd485, %rd2, %rd484;
ld.global.u32 %r4578, [%rd485];
abs.s32 %r4579, %r4578;
setp.gt.u32 %p1198, %r4579, 4;
and.b32 %r4580, %r4579, 1;
setp.eq.b32 %p1199, %r4580, 1;
and.pred %p1200, %p1198, %p1199;
selp.b32 %r4581, 4, 0, %p1200;
or.b32 %r6342, %r4581, %r6342;
$L__BB0_963:
setp.ge.u32 %p1201, %r2029, %r6;
@%p1201 bra $L__BB0_965;
add.s32 %r4582, %r2030, %r6330;
mul.wide.u32 %rd486, %r4582, 4;
add.s64 %rd487, %rd2, %rd486;
ld.global.u32 %r4583, [%rd487];
abs.s32 %r4584, %r4583;
setp.gt.u32 %p1202, %r4584, 4;
and.b32 %r4585, %r4584, 1;
setp.eq.b32 %p1203, %r4585, 1;
and.pred %p1204, %p1202, %p1203;
selp.b32 %r4586, 8, 0, %p1204;
or.b32 %r6342, %r4586, %r6342;
$L__BB0_965:
add.s32 %r2052, %r6330, 1;
setp.ge.u32 %p1205, %r2052, %r5;
@%p1205 bra $L__BB0_974;
setp.ge.u32 %p1206, %r2023, %r6;
@%p1206 bra $L__BB0_968;
add.s32 %r4587, %r2024, %r2052;
mul.wide.u32 %rd488, %r4587, 4;
add.s64 %rd489, %rd2, %rd488;
ld.global.u32 %r4588, [%rd489];
abs.s32 %r4589, %r4588;
setp.gt.u32 %p1207, %r4589, 4;
and.b32 %r4590, %r4589, 1;
setp.eq.b32 %p1208, %r4590, 1;
and.pred %p1209, %p1207, %p1208;
selp.b32 %r4591, 16, 0, %p1209;
or.b32 %r6342, %r4591, %r6342;
$L__BB0_968:
setp.ge.u32 %p1210, %r2025, %r6;
@%p1210 bra $L__BB0_970;
add.s32 %r4592, %r2026, %r2052;
mul.wide.u32 %rd490, %r4592, 4;
add.s64 %rd491, %rd2, %rd490;
ld.global.u32 %r4593, [%rd491];
abs.s32 %r4594, %r4593;
setp.gt.u32 %p1211, %r4594, 4;
and.b32 %r4595, %r4594, 1;
setp.eq.b32 %p1212, %r4595, 1;
and.pred %p1213, %p1211, %p1212;
selp.b32 %r4596, 32, 0, %p1213;
or.b32 %r6342, %r4596, %r6342;
$L__BB0_970:
setp.ge.u32 %p1214, %r2027, %r6;
@%p1214 bra $L__BB0_972;
add.s32 %r4597, %r2028, %r2052;
mul.wide.u32 %rd492, %r4597, 4;
add.s64 %rd493, %rd2, %rd492;
ld.global.u32 %r4598, [%rd493];
abs.s32 %r4599, %r4598;
setp.gt.u32 %p1215, %r4599, 4;
and.b32 %r4600, %r4599, 1;
setp.eq.b32 %p1216, %r4600, 1;
and.pred %p1217, %p1215, %p1216;
selp.b32 %r4601, 64, 0, %p1217;
or.b32 %r6342, %r4601, %r6342;
$L__BB0_972:
setp.ge.u32 %p1218, %r2029, %r6;
@%p1218 bra $L__BB0_974;
add.s32 %r4602, %r2030, %r2052;
mul.wide.u32 %rd494, %r4602, 4;
add.s64 %rd495, %rd2, %rd494;
ld.global.u32 %r4603, [%rd495];
abs.s32 %r4604, %r4603;
setp.gt.u32 %p1219, %r4604, 4;
and.b32 %r4605, %r4604, 1;
setp.eq.b32 %p1220, %r4605, 1;
and.pred %p1221, %p1219, %p1220;
selp.b32 %r4606, 128, 0, %p1221;
or.b32 %r6342, %r4606, %r6342;
$L__BB0_974:
add.s32 %r2061, %r6330, 2;
setp.ge.u32 %p1222, %r2061, %r5;
@%p1222 bra $L__BB0_983;
setp.ge.u32 %p1223, %r2023, %r6;
@%p1223 bra $L__BB0_977;
add.s32 %r4607, %r2024, %r2061;
mul.wide.u32 %rd496, %r4607, 4;
add.s64 %rd497, %rd2, %rd496;
ld.global.u32 %r4608, [%rd497];
abs.s32 %r4609, %r4608;
setp.gt.u32 %p1224, %r4609, 4;
and.b32 %r4610, %r4609, 1;
setp.eq.b32 %p1225, %r4610, 1;
and.pred %p1226, %p1224, %p1225;
selp.b32 %r4611, 256, 0, %p1226;
or.b32 %r6342, %r4611, %r6342;
$L__BB0_977:
setp.ge.u32 %p1227, %r2025, %r6;
@%p1227 bra $L__BB0_979;
add.s32 %r4612, %r2026, %r2061;
mul.wide.u32 %rd498, %r4612, 4;
add.s64 %rd499, %rd2, %rd498;
ld.global.u32 %r4613, [%rd499];
abs.s32 %r4614, %r4613;
setp.gt.u32 %p1228, %r4614, 4;
and.b32 %r4615, %r4614, 1;
setp.eq.b32 %p1229, %r4615, 1;
and.pred %p1230, %p1228, %p1229;
selp.b32 %r4616, 512, 0, %p1230;
or.b32 %r6342, %r4616, %r6342;
$L__BB0_979:
setp.ge.u32 %p1231, %r2027, %r6;
@%p1231 bra $L__BB0_981;
add.s32 %r4617, %r2028, %r2061;
mul.wide.u32 %rd500, %r4617, 4;
add.s64 %rd501, %rd2, %rd500;
ld.global.u32 %r4618, [%rd501];
abs.s32 %r4619, %r4618;
setp.gt.u32 %p1232, %r4619, 4;
and.b32 %r4620, %r4619, 1;
setp.eq.b32 %p1233, %r4620, 1;
and.pred %p1234, %p1232, %p1233;
selp.b32 %r4621, 1024, 0, %p1234;
or.b32 %r6342, %r4621, %r6342;
$L__BB0_981:
setp.ge.u32 %p1235, %r2029, %r6;
@%p1235 bra $L__BB0_983;
add.s32 %r4622, %r2030, %r2061;
mul.wide.u32 %rd502, %r4622, 4;
add.s64 %rd503, %rd2, %rd502;
ld.global.u32 %r4623, [%rd503];
abs.s32 %r4624, %r4623;
setp.gt.u32 %p1236, %r4624, 4;
and.b32 %r4625, %r4624, 1;
setp.eq.b32 %p1237, %r4625, 1;
and.pred %p1238, %p1236, %p1237;
selp.b32 %r4626, 2048, 0, %p1238;
or.b32 %r6342, %r4626, %r6342;
$L__BB0_983:
add.s32 %r2070, %r6330, 3;
setp.ge.u32 %p1239, %r2070, %r5;
@%p1239 bra $L__BB0_992;
setp.ge.u32 %p1240, %r2023, %r6;
@%p1240 bra $L__BB0_986;
add.s32 %r4627, %r2024, %r2070;
mul.wide.u32 %rd504, %r4627, 4;
add.s64 %rd505, %rd2, %rd504;
ld.global.u32 %r4628, [%rd505];
abs.s32 %r4629, %r4628;
setp.gt.u32 %p1241, %r4629, 4;
and.b32 %r4630, %r4629, 1;
setp.eq.b32 %p1242, %r4630, 1;
and.pred %p1243, %p1241, %p1242;
selp.b32 %r4631, 4096, 0, %p1243;
or.b32 %r6342, %r4631, %r6342;
$L__BB0_986:
setp.ge.u32 %p1244, %r2025, %r6;
@%p1244 bra $L__BB0_988;
add.s32 %r4632, %r2026, %r2070;
mul.wide.u32 %rd506, %r4632, 4;
add.s64 %rd507, %rd2, %rd506;
ld.global.u32 %r4633, [%rd507];
abs.s32 %r4634, %r4633;
setp.gt.u32 %p1245, %r4634, 4;
and.b32 %r4635, %r4634, 1;
setp.eq.b32 %p1246, %r4635, 1;
and.pred %p1247, %p1245, %p1246;
selp.b32 %r4636, 8192, 0, %p1247;
or.b32 %r6342, %r4636, %r6342;
$L__BB0_988:
setp.ge.u32 %p1248, %r2027, %r6;
@%p1248 bra $L__BB0_990;
add.s32 %r4637, %r2028, %r2070;
mul.wide.u32 %rd508, %r4637, 4;
add.s64 %rd509, %rd2, %rd508;
ld.global.u32 %r4638, [%rd509];
abs.s32 %r4639, %r4638;
setp.gt.u32 %p1249, %r4639, 4;
and.b32 %r4640, %r4639, 1;
setp.eq.b32 %p1250, %r4640, 1;
and.pred %p1251, %p1249, %p1250;
selp.b32 %r4641, 16384, 0, %p1251;
or.b32 %r6342, %r4641, %r6342;
$L__BB0_990:
setp.ge.u32 %p1252, %r2029, %r6;
@%p1252 bra $L__BB0_992;
add.s32 %r4642, %r2030, %r2070;
mul.wide.u32 %rd510, %r4642, 4;
add.s64 %rd511, %rd2, %rd510;
ld.global.u32 %r4643, [%rd511];
abs.s32 %r4644, %r4643;
setp.gt.u32 %p1253, %r4644, 4;
and.b32 %r4645, %r4644, 1;
setp.eq.b32 %p1254, %r4645, 1;
and.pred %p1255, %p1253, %p1254;
selp.b32 %r4646, 32768, 0, %p1255;
or.b32 %r6342, %r4646, %r6342;
$L__BB0_992:
add.s32 %r4648, %r6330, 4;
setp.ge.u32 %p1256, %r4648, %r5;
mov.u32 %r6358, 0;
@%p1256 bra $L__BB0_1001;
setp.ge.u32 %p1257, %r2023, %r6;
mov.u32 %r6358, 0;
@%p1257 bra $L__BB0_995;
add.s32 %r4650, %r2024, %r6330;
add.s32 %r4651, %r4650, 4;
mul.wide.u32 %rd512, %r4651, 4;
add.s64 %rd513, %rd2, %rd512;
ld.global.u32 %r4652, [%rd513];
abs.s32 %r4653, %r4652;
setp.gt.u32 %p1258, %r4653, 4;
and.b32 %r4654, %r4653, 1;
setp.eq.b32 %p1259, %r4654, 1;
and.pred %p1260, %p1258, %p1259;
selp.u32 %r6358, 1, 0, %p1260;
$L__BB0_995:
setp.ge.u32 %p1261, %r2025, %r6;
@%p1261 bra $L__BB0_997;
add.s32 %r4655, %r2026, %r6330;
add.s32 %r4656, %r4655, 4;
mul.wide.u32 %rd514, %r4656, 4;
add.s64 %rd515, %rd2, %rd514;
ld.global.u32 %r4657, [%rd515];
abs.s32 %r4658, %r4657;
setp.gt.u32 %p1262, %r4658, 4;
and.b32 %r4659, %r4658, 1;
setp.eq.b32 %p1263, %r4659, 1;
and.pred %p1264, %p1262, %p1263;
selp.b32 %r4660, 2, 0, %p1264;
or.b32 %r6358, %r4660, %r6358;
$L__BB0_997:
setp.ge.u32 %p1265, %r2027, %r6;
@%p1265 bra $L__BB0_999;
add.s32 %r4661, %r2028, %r6330;
add.s32 %r4662, %r4661, 4;
mul.wide.u32 %rd516, %r4662, 4;
add.s64 %rd517, %rd2, %rd516;
ld.global.u32 %r4663, [%rd517];
abs.s32 %r4664, %r4663;
setp.gt.u32 %p1266, %r4664, 4;
and.b32 %r4665, %r4664, 1;
setp.eq.b32 %p1267, %r4665, 1;
and.pred %p1268, %p1266, %p1267;
selp.b32 %r4666, 4, 0, %p1268;
or.b32 %r6358, %r4666, %r6358;
$L__BB0_999:
setp.ge.u32 %p1269, %r2029, %r6;
@%p1269 bra $L__BB0_1001;
add.s32 %r4667, %r2030, %r6330;
add.s32 %r4668, %r4667, 4;
mul.wide.u32 %rd518, %r4668, 4;
add.s64 %rd519, %rd2, %rd518;
ld.global.u32 %r4669, [%rd519];
abs.s32 %r4670, %r4669;
setp.gt.u32 %p1270, %r4670, 4;
and.b32 %r4671, %r4670, 1;
setp.eq.b32 %p1271, %r4671, 1;
and.pred %p1272, %p1270, %p1271;
selp.b32 %r4672, 8, 0, %p1272;
or.b32 %r6358, %r4672, %r6358;
$L__BB0_1001:
add.s32 %r2087, %r6330, 5;
setp.ge.u32 %p1273, %r2087, %r5;
@%p1273 bra $L__BB0_1010;
setp.ge.u32 %p1274, %r2023, %r6;
@%p1274 bra $L__BB0_1004;
add.s32 %r4673, %r2024, %r2087;
mul.wide.u32 %rd520, %r4673, 4;
add.s64 %rd521, %rd2, %rd520;
ld.global.u32 %r4674, [%rd521];
abs.s32 %r4675, %r4674;
setp.gt.u32 %p1275, %r4675, 4;
and.b32 %r4676, %r4675, 1;
setp.eq.b32 %p1276, %r4676, 1;
and.pred %p1277, %p1275, %p1276;
selp.b32 %r4677, 16, 0, %p1277;
or.b32 %r6358, %r4677, %r6358;
$L__BB0_1004:
setp.ge.u32 %p1278, %r2025, %r6;
@%p1278 bra $L__BB0_1006;
add.s32 %r4678, %r2026, %r2087;
mul.wide.u32 %rd522, %r4678, 4;
add.s64 %rd523, %rd2, %rd522;
ld.global.u32 %r4679, [%rd523];
abs.s32 %r4680, %r4679;
setp.gt.u32 %p1279, %r4680, 4;
and.b32 %r4681, %r4680, 1;
setp.eq.b32 %p1280, %r4681, 1;
and.pred %p1281, %p1279, %p1280;
selp.b32 %r4682, 32, 0, %p1281;
or.b32 %r6358, %r4682, %r6358;
$L__BB0_1006:
setp.ge.u32 %p1282, %r2027, %r6;
@%p1282 bra $L__BB0_1008;
add.s32 %r4683, %r2028, %r2087;
mul.wide.u32 %rd524, %r4683, 4;
add.s64 %rd525, %rd2, %rd524;
ld.global.u32 %r4684, [%rd525];
abs.s32 %r4685, %r4684;
setp.gt.u32 %p1283, %r4685, 4;
and.b32 %r4686, %r4685, 1;
setp.eq.b32 %p1284, %r4686, 1;
and.pred %p1285, %p1283, %p1284;
selp.b32 %r4687, 64, 0, %p1285;
or.b32 %r6358, %r4687, %r6358;
$L__BB0_1008:
setp.ge.u32 %p1286, %r2029, %r6;
@%p1286 bra $L__BB0_1010;
add.s32 %r4688, %r2030, %r2087;
mul.wide.u32 %rd526, %r4688, 4;
add.s64 %rd527, %rd2, %rd526;
ld.global.u32 %r4689, [%rd527];
abs.s32 %r4690, %r4689;
setp.gt.u32 %p1287, %r4690, 4;
and.b32 %r4691, %r4690, 1;
setp.eq.b32 %p1288, %r4691, 1;
and.pred %p1289, %p1287, %p1288;
selp.b32 %r4692, 128, 0, %p1289;
or.b32 %r6358, %r4692, %r6358;
$L__BB0_1010:
add.s32 %r2096, %r6330, 6;
setp.ge.u32 %p1290, %r2096, %r5;
@%p1290 bra $L__BB0_1019;
setp.ge.u32 %p1291, %r2023, %r6;
@%p1291 bra $L__BB0_1013;
add.s32 %r4693, %r2024, %r2096;
mul.wide.u32 %rd528, %r4693, 4;
add.s64 %rd529, %rd2, %rd528;
ld.global.u32 %r4694, [%rd529];
abs.s32 %r4695, %r4694;
setp.gt.u32 %p1292, %r4695, 4;
and.b32 %r4696, %r4695, 1;
setp.eq.b32 %p1293, %r4696, 1;
and.pred %p1294, %p1292, %p1293;
selp.b32 %r4697, 256, 0, %p1294;
or.b32 %r6358, %r4697, %r6358;
$L__BB0_1013:
setp.ge.u32 %p1295, %r2025, %r6;
@%p1295 bra $L__BB0_1015;
add.s32 %r4698, %r2026, %r2096;
mul.wide.u32 %rd530, %r4698, 4;
add.s64 %rd531, %rd2, %rd530;
ld.global.u32 %r4699, [%rd531];
abs.s32 %r4700, %r4699;
setp.gt.u32 %p1296, %r4700, 4;
and.b32 %r4701, %r4700, 1;
setp.eq.b32 %p1297, %r4701, 1;
and.pred %p1298, %p1296, %p1297;
selp.b32 %r4702, 512, 0, %p1298;
or.b32 %r6358, %r4702, %r6358;
$L__BB0_1015:
setp.ge.u32 %p1299, %r2027, %r6;
@%p1299 bra $L__BB0_1017;
add.s32 %r4703, %r2028, %r2096;
mul.wide.u32 %rd532, %r4703, 4;
add.s64 %rd533, %rd2, %rd532;
ld.global.u32 %r4704, [%rd533];
abs.s32 %r4705, %r4704;
setp.gt.u32 %p1300, %r4705, 4;
and.b32 %r4706, %r4705, 1;
setp.eq.b32 %p1301, %r4706, 1;
and.pred %p1302, %p1300, %p1301;
selp.b32 %r4707, 1024, 0, %p1302;
or.b32 %r6358, %r4707, %r6358;
$L__BB0_1017:
setp.ge.u32 %p1303, %r2029, %r6;
@%p1303 bra $L__BB0_1019;
add.s32 %r4708, %r2030, %r2096;
mul.wide.u32 %rd534, %r4708, 4;
add.s64 %rd535, %rd2, %rd534;
ld.global.u32 %r4709, [%rd535];
abs.s32 %r4710, %r4709;
setp.gt.u32 %p1304, %r4710, 4;
and.b32 %r4711, %r4710, 1;
setp.eq.b32 %p1305, %r4711, 1;
and.pred %p1306, %p1304, %p1305;
selp.b32 %r4712, 2048, 0, %p1306;
or.b32 %r6358, %r4712, %r6358;
$L__BB0_1019:
add.s32 %r2105, %r6330, 7;
setp.ge.u32 %p1307, %r2105, %r5;
@%p1307 bra $L__BB0_1028;
setp.ge.u32 %p1308, %r2023, %r6;
@%p1308 bra $L__BB0_1022;
add.s32 %r4713, %r2024, %r2105;
mul.wide.u32 %rd536, %r4713, 4;
add.s64 %rd537, %rd2, %rd536;
ld.global.u32 %r4714, [%rd537];
abs.s32 %r4715, %r4714;
setp.gt.u32 %p1309, %r4715, 4;
and.b32 %r4716, %r4715, 1;
setp.eq.b32 %p1310, %r4716, 1;
and.pred %p1311, %p1309, %p1310;
selp.b32 %r4717, 4096, 0, %p1311;
or.b32 %r6358, %r4717, %r6358;
$L__BB0_1022:
setp.ge.u32 %p1312, %r2025, %r6;
@%p1312 bra $L__BB0_1024;
add.s32 %r4718, %r2026, %r2105;
mul.wide.u32 %rd538, %r4718, 4;
add.s64 %rd539, %rd2, %rd538;
ld.global.u32 %r4719, [%rd539];
abs.s32 %r4720, %r4719;
setp.gt.u32 %p1313, %r4720, 4;
and.b32 %r4721, %r4720, 1;
setp.eq.b32 %p1314, %r4721, 1;
and.pred %p1315, %p1313, %p1314;
selp.b32 %r4722, 8192, 0, %p1315;
or.b32 %r6358, %r4722, %r6358;
$L__BB0_1024:
setp.ge.u32 %p1316, %r2027, %r6;
@%p1316 bra $L__BB0_1026;
add.s32 %r4723, %r2028, %r2105;
mul.wide.u32 %rd540, %r4723, 4;
add.s64 %rd541, %rd2, %rd540;
ld.global.u32 %r4724, [%rd541];
abs.s32 %r4725, %r4724;
setp.gt.u32 %p1317, %r4725, 4;
and.b32 %r4726, %r4725, 1;
setp.eq.b32 %p1318, %r4726, 1;
and.pred %p1319, %p1317, %p1318;
selp.b32 %r4727, 16384, 0, %p1319;
or.b32 %r6358, %r4727, %r6358;
$L__BB0_1026:
setp.ge.u32 %p1320, %r2029, %r6;
@%p1320 bra $L__BB0_1028;
add.s32 %r4728, %r2030, %r2105;
mul.wide.u32 %rd542, %r4728, 4;
add.s64 %rd543, %rd2, %rd542;
ld.global.u32 %r4729, [%rd543];
abs.s32 %r4730, %r4729;
setp.gt.u32 %p1321, %r4730, 4;
and.b32 %r4731, %r4730, 1;
setp.eq.b32 %p1322, %r4731, 1;
and.pred %p1323, %p1321, %p1322;
selp.b32 %r4732, 32768, 0, %p1323;
or.b32 %r6358, %r4732, %r6358;
$L__BB0_1028:
mov.b32 %r2114, {%rs249, %rs250};
add.s32 %r4734, %r2034, %r6330;
mul.wide.u32 %rd544, %r4734, 4;
add.s64 %rd38, %rd2, %rd544;
add.s32 %r4735, %r2037, %r6330;
mul.wide.u32 %rd545, %r4735, 4;
add.s64 %rd39, %rd2, %rd545;
add.s32 %r4736, %r2036, %r6330;
mul.wide.u32 %rd546, %r4736, 4;
add.s64 %rd40, %rd2, %rd546;
add.s32 %r4737, %r2035, %r6330;
mul.wide.u32 %rd547, %r4737, 4;
add.s64 %rd41, %rd2, %rd547;
mov.u32 %r6374, 0;
@%p1188 bra $L__BB0_1037;
setp.le.u32 %p1325, %r6, %r6326;
mov.u32 %r6374, 0;
@%p1325 bra $L__BB0_1031;
ld.global.u32 %r4739, [%rd38];
abs.s32 %r4740, %r4739;
setp.gt.u32 %p1326, %r4740, 4;
and.b32 %r4741, %r4740, 1;
setp.eq.b32 %p1327, %r4741, 1;
and.pred %p1328, %p1326, %p1327;
selp.u32 %r6374, 1, 0, %p1328;
$L__BB0_1031:
setp.ge.u32 %p1329, %r2031, %r6;
@%p1329 bra $L__BB0_1033;
ld.global.u32 %r4742, [%rd39];
abs.s32 %r4743, %r4742;
setp.gt.u32 %p1330, %r4743, 4;
and.b32 %r4744, %r4743, 1;
setp.eq.b32 %p1331, %r4744, 1;
and.pred %p1332, %p1330, %p1331;
selp.b32 %r4745, 2, 0, %p1332;
or.b32 %r6374, %r4745, %r6374;
$L__BB0_1033:
setp.ge.u32 %p1333, %r2032, %r6;
@%p1333 bra $L__BB0_1035;
ld.global.u32 %r4746, [%rd40];
abs.s32 %r4747, %r4746;
setp.gt.u32 %p1334, %r4747, 4;
and.b32 %r4748, %r4747, 1;
setp.eq.b32 %p1335, %r4748, 1;
and.pred %p1336, %p1334, %p1335;
selp.b32 %r4749, 4, 0, %p1336;
or.b32 %r6374, %r4749, %r6374;
$L__BB0_1035:
setp.ge.u32 %p1337, %r2033, %r6;
@%p1337 bra $L__BB0_1037;
ld.global.u32 %r4750, [%rd41];
abs.s32 %r4751, %r4750;
setp.gt.u32 %p1338, %r4751, 4;
and.b32 %r4752, %r4751, 1;
setp.eq.b32 %p1339, %r4752, 1;
and.pred %p1340, %p1338, %p1339;
selp.b32 %r4753, 8, 0, %p1340;
or.b32 %r6374, %r4753, %r6374;
$L__BB0_1037:
add.s32 %r4754, %r2034, %r2052;
mul.wide.u32 %rd548, %r4754, 4;
add.s64 %rd42, %rd2, %rd548;
add.s32 %r4755, %r2037, %r2052;
mul.wide.u32 %rd549, %r4755, 4;
add.s64 %rd43, %rd2, %rd549;
add.s32 %r4756, %r2036, %r2052;
mul.wide.u32 %rd550, %r4756, 4;
add.s64 %rd44, %rd2, %rd550;
add.s32 %r4757, %r2035, %r2052;
mul.wide.u32 %rd551, %r4757, 4;
add.s64 %rd45, %rd2, %rd551;
shl.b32 %r4758, %r6358, 16;
or.b32 %r2123, %r4758, %r6342;
@%p1205 bra $L__BB0_1046;
setp.le.u32 %p1342, %r6, %r6326;
@%p1342 bra $L__BB0_1040;
ld.global.u32 %r4759, [%rd42];
abs.s32 %r4760, %r4759;
setp.gt.u32 %p1343, %r4760, 4;
and.b32 %r4761, %r4760, 1;
setp.eq.b32 %p1344, %r4761, 1;
and.pred %p1345, %p1343, %p1344;
selp.b32 %r4762, 16, 0, %p1345;
or.b32 %r6374, %r4762, %r6374;
$L__BB0_1040:
setp.ge.u32 %p1346, %r2031, %r6;
@%p1346 bra $L__BB0_1042;
ld.global.u32 %r4763, [%rd43];
abs.s32 %r4764, %r4763;
setp.gt.u32 %p1347, %r4764, 4;
and.b32 %r4765, %r4764, 1;
setp.eq.b32 %p1348, %r4765, 1;
and.pred %p1349, %p1347, %p1348;
selp.b32 %r4766, 32, 0, %p1349;
or.b32 %r6374, %r4766, %r6374;
$L__BB0_1042:
setp.ge.u32 %p1350, %r2032, %r6;
@%p1350 bra $L__BB0_1044;
ld.global.u32 %r4767, [%rd44];
abs.s32 %r4768, %r4767;
setp.gt.u32 %p1351, %r4768, 4;
and.b32 %r4769, %r4768, 1;
setp.eq.b32 %p1352, %r4769, 1;
and.pred %p1353, %p1351, %p1352;
selp.b32 %r4770, 64, 0, %p1353;
or.b32 %r6374, %r4770, %r6374;
$L__BB0_1044:
setp.ge.u32 %p1354, %r2033, %r6;
@%p1354 bra $L__BB0_1046;
ld.global.u32 %r4771, [%rd45];
abs.s32 %r4772, %r4771;
setp.gt.u32 %p1355, %r4772, 4;
and.b32 %r4773, %r4772, 1;
setp.eq.b32 %p1356, %r4773, 1;
and.pred %p1357, %p1355, %p1356;
selp.b32 %r4774, 128, 0, %p1357;
or.b32 %r6374, %r4774, %r6374;
$L__BB0_1046:
add.s32 %r4775, %r2034, %r2061;
mul.wide.u32 %rd552, %r4775, 4;
add.s64 %rd46, %rd2, %rd552;
add.s32 %r4776, %r2037, %r2061;
mul.wide.u32 %rd553, %r4776, 4;
add.s64 %rd47, %rd2, %rd553;
add.s32 %r4777, %r2036, %r2061;
mul.wide.u32 %rd554, %r4777, 4;
add.s64 %rd48, %rd2, %rd554;
add.s32 %r4778, %r2035, %r2061;
mul.wide.u32 %rd555, %r4778, 4;
add.s64 %rd49, %rd2, %rd555;
@%p1222 bra $L__BB0_1055;
setp.le.u32 %p1359, %r6, %r6326;
@%p1359 bra $L__BB0_1049;
ld.global.u32 %r4779, [%rd46];
abs.s32 %r4780, %r4779;
setp.gt.u32 %p1360, %r4780, 4;
and.b32 %r4781, %r4780, 1;
setp.eq.b32 %p1361, %r4781, 1;
and.pred %p1362, %p1360, %p1361;
selp.b32 %r4782, 256, 0, %p1362;
or.b32 %r6374, %r4782, %r6374;
$L__BB0_1049:
setp.ge.u32 %p1363, %r2031, %r6;
@%p1363 bra $L__BB0_1051;
ld.global.u32 %r4783, [%rd47];
abs.s32 %r4784, %r4783;
setp.gt.u32 %p1364, %r4784, 4;
and.b32 %r4785, %r4784, 1;
setp.eq.b32 %p1365, %r4785, 1;
and.pred %p1366, %p1364, %p1365;
selp.b32 %r4786, 512, 0, %p1366;
or.b32 %r6374, %r4786, %r6374;
$L__BB0_1051:
setp.ge.u32 %p1367, %r2032, %r6;
@%p1367 bra $L__BB0_1053;
ld.global.u32 %r4787, [%rd48];
abs.s32 %r4788, %r4787;
setp.gt.u32 %p1368, %r4788, 4;
and.b32 %r4789, %r4788, 1;
setp.eq.b32 %p1369, %r4789, 1;
and.pred %p1370, %p1368, %p1369;
selp.b32 %r4790, 1024, 0, %p1370;
or.b32 %r6374, %r4790, %r6374;
$L__BB0_1053:
setp.ge.u32 %p1371, %r2033, %r6;
@%p1371 bra $L__BB0_1055;
ld.global.u32 %r4791, [%rd49];
abs.s32 %r4792, %r4791;
setp.gt.u32 %p1372, %r4792, 4;
and.b32 %r4793, %r4792, 1;
setp.eq.b32 %p1373, %r4793, 1;
and.pred %p1374, %p1372, %p1373;
selp.b32 %r4794, 2048, 0, %p1374;
or.b32 %r6374, %r4794, %r6374;
$L__BB0_1055:
add.s32 %r4795, %r2034, %r2070;
mul.wide.u32 %rd556, %r4795, 4;
add.s64 %rd50, %rd2, %rd556;
add.s32 %r4796, %r2037, %r2070;
mul.wide.u32 %rd557, %r4796, 4;
add.s64 %rd51, %rd2, %rd557;
add.s32 %r4797, %r2036, %r2070;
mul.wide.u32 %rd558, %r4797, 4;
add.s64 %rd52, %rd2, %rd558;
add.s32 %r4798, %r2035, %r2070;
mul.wide.u32 %rd559, %r4798, 4;
add.s64 %rd53, %rd2, %rd559;
@%p1239 bra $L__BB0_1064;
setp.le.u32 %p1376, %r6, %r6326;
@%p1376 bra $L__BB0_1058;
ld.global.u32 %r4799, [%rd50];
abs.s32 %r4800, %r4799;
setp.gt.u32 %p1377, %r4800, 4;
and.b32 %r4801, %r4800, 1;
setp.eq.b32 %p1378, %r4801, 1;
and.pred %p1379, %p1377, %p1378;
selp.b32 %r4802, 4096, 0, %p1379;
or.b32 %r6374, %r4802, %r6374;
$L__BB0_1058:
setp.ge.u32 %p1380, %r2031, %r6;
@%p1380 bra $L__BB0_1060;
ld.global.u32 %r4803, [%rd51];
abs.s32 %r4804, %r4803;
setp.gt.u32 %p1381, %r4804, 4;
and.b32 %r4805, %r4804, 1;
setp.eq.b32 %p1382, %r4805, 1;
and.pred %p1383, %p1381, %p1382;
selp.b32 %r4806, 8192, 0, %p1383;
or.b32 %r6374, %r4806, %r6374;
$L__BB0_1060:
setp.ge.u32 %p1384, %r2032, %r6;
@%p1384 bra $L__BB0_1062;
ld.global.u32 %r4807, [%rd52];
abs.s32 %r4808, %r4807;
setp.gt.u32 %p1385, %r4808, 4;
and.b32 %r4809, %r4808, 1;
setp.eq.b32 %p1386, %r4809, 1;
and.pred %p1387, %p1385, %p1386;
selp.b32 %r4810, 16384, 0, %p1387;
or.b32 %r6374, %r4810, %r6374;
$L__BB0_1062:
setp.ge.u32 %p1388, %r2033, %r6;
@%p1388 bra $L__BB0_1064;
ld.global.u32 %r4811, [%rd53];
abs.s32 %r4812, %r4811;
setp.gt.u32 %p1389, %r4812, 4;
and.b32 %r4813, %r4812, 1;
setp.eq.b32 %p1390, %r4813, 1;
and.pred %p1391, %p1389, %p1390;
selp.b32 %r4814, 32768, 0, %p1391;
or.b32 %r6374, %r4814, %r6374;
$L__BB0_1064:
mov.u32 %r6390, 0;
@%p1256 bra $L__BB0_1073;
setp.le.u32 %p1393, %r6, %r6326;
mov.u32 %r6390, 0;
@%p1393 bra $L__BB0_1067;
add.s32 %r4819, %r4734, 4;
mul.wide.u32 %rd560, %r4819, 4;
add.s64 %rd561, %rd2, %rd560;
ld.global.u32 %r4820, [%rd561];
abs.s32 %r4821, %r4820;
setp.gt.u32 %p1394, %r4821, 4;
and.b32 %r4822, %r4821, 1;
setp.eq.b32 %p1395, %r4822, 1;
and.pred %p1396, %p1394, %p1395;
selp.u32 %r6390, 1, 0, %p1396;
$L__BB0_1067:
setp.ge.u32 %p1397, %r2031, %r6;
@%p1397 bra $L__BB0_1069;
add.s32 %r4824, %r4735, 4;
mul.wide.u32 %rd562, %r4824, 4;
add.s64 %rd563, %rd2, %rd562;
ld.global.u32 %r4825, [%rd563];
abs.s32 %r4826, %r4825;
setp.gt.u32 %p1398, %r4826, 4;
and.b32 %r4827, %r4826, 1;
setp.eq.b32 %p1399, %r4827, 1;
and.pred %p1400, %p1398, %p1399;
selp.b32 %r4828, 2, 0, %p1400;
or.b32 %r6390, %r4828, %r6390;
$L__BB0_1069:
setp.ge.u32 %p1401, %r2032, %r6;
@%p1401 bra $L__BB0_1071;
add.s32 %r4830, %r4736, 4;
mul.wide.u32 %rd564, %r4830, 4;
add.s64 %rd565, %rd2, %rd564;
ld.global.u32 %r4831, [%rd565];
abs.s32 %r4832, %r4831;
setp.gt.u32 %p1402, %r4832, 4;
and.b32 %r4833, %r4832, 1;
setp.eq.b32 %p1403, %r4833, 1;
and.pred %p1404, %p1402, %p1403;
selp.b32 %r4834, 4, 0, %p1404;
or.b32 %r6390, %r4834, %r6390;
$L__BB0_1071:
setp.ge.u32 %p1405, %r2033, %r6;
@%p1405 bra $L__BB0_1073;
add.s32 %r4836, %r4737, 4;
mul.wide.u32 %rd566, %r4836, 4;
add.s64 %rd567, %rd2, %rd566;
ld.global.u32 %r4837, [%rd567];
abs.s32 %r4838, %r4837;
setp.gt.u32 %p1406, %r4838, 4;
and.b32 %r4839, %r4838, 1;
setp.eq.b32 %p1407, %r4839, 1;
and.pred %p1408, %p1406, %p1407;
selp.b32 %r4840, 8, 0, %p1408;
or.b32 %r6390, %r4840, %r6390;
$L__BB0_1073:
@%p1273 bra $L__BB0_1082;
setp.le.u32 %p1410, %r6, %r6326;
@%p1410 bra $L__BB0_1076;
add.s32 %r4841, %r2034, %r2087;
mul.wide.u32 %rd568, %r4841, 4;
add.s64 %rd569, %rd2, %rd568;
ld.global.u32 %r4842, [%rd569];
abs.s32 %r4843, %r4842;
setp.gt.u32 %p1411, %r4843, 4;
and.b32 %r4844, %r4843, 1;
setp.eq.b32 %p1412, %r4844, 1;
and.pred %p1413, %p1411, %p1412;
selp.b32 %r4845, 16, 0, %p1413;
or.b32 %r6390, %r4845, %r6390;
$L__BB0_1076:
setp.ge.u32 %p1414, %r2031, %r6;
@%p1414 bra $L__BB0_1078;
add.s32 %r4846, %r2037, %r2087;
mul.wide.u32 %rd570, %r4846, 4;
add.s64 %rd571, %rd2, %rd570;
ld.global.u32 %r4847, [%rd571];
abs.s32 %r4848, %r4847;
setp.gt.u32 %p1415, %r4848, 4;
and.b32 %r4849, %r4848, 1;
setp.eq.b32 %p1416, %r4849, 1;
and.pred %p1417, %p1415, %p1416;
selp.b32 %r4850, 32, 0, %p1417;
or.b32 %r6390, %r4850, %r6390;
$L__BB0_1078:
setp.ge.u32 %p1418, %r2032, %r6;
@%p1418 bra $L__BB0_1080;
add.s32 %r4851, %r2036, %r2087;
mul.wide.u32 %rd572, %r4851, 4;
add.s64 %rd573, %rd2, %rd572;
ld.global.u32 %r4852, [%rd573];
abs.s32 %r4853, %r4852;
setp.gt.u32 %p1419, %r4853, 4;
and.b32 %r4854, %r4853, 1;
setp.eq.b32 %p1420, %r4854, 1;
and.pred %p1421, %p1419, %p1420;
selp.b32 %r4855, 64, 0, %p1421;
or.b32 %r6390, %r4855, %r6390;
$L__BB0_1080:
setp.ge.u32 %p1422, %r2033, %r6;
@%p1422 bra $L__BB0_1082;
add.s32 %r4856, %r2035, %r2087;
mul.wide.u32 %rd574, %r4856, 4;
add.s64 %rd575, %rd2, %rd574;
ld.global.u32 %r4857, [%rd575];
abs.s32 %r4858, %r4857;
setp.gt.u32 %p1423, %r4858, 4;
and.b32 %r4859, %r4858, 1;
setp.eq.b32 %p1424, %r4859, 1;
and.pred %p1425, %p1423, %p1424;
selp.b32 %r4860, 128, 0, %p1425;
or.b32 %r6390, %r4860, %r6390;
$L__BB0_1082:
@%p1290 bra $L__BB0_1091;
setp.le.u32 %p1427, %r6, %r6326;
@%p1427 bra $L__BB0_1085;
add.s32 %r4861, %r2034, %r2096;
mul.wide.u32 %rd576, %r4861, 4;
add.s64 %rd577, %rd2, %rd576;
ld.global.u32 %r4862, [%rd577];
abs.s32 %r4863, %r4862;
setp.gt.u32 %p1428, %r4863, 4;
and.b32 %r4864, %r4863, 1;
setp.eq.b32 %p1429, %r4864, 1;
and.pred %p1430, %p1428, %p1429;
selp.b32 %r4865, 256, 0, %p1430;
or.b32 %r6390, %r4865, %r6390;
$L__BB0_1085:
setp.ge.u32 %p1431, %r2031, %r6;
@%p1431 bra $L__BB0_1087;
add.s32 %r4866, %r2037, %r2096;
mul.wide.u32 %rd578, %r4866, 4;
add.s64 %rd579, %rd2, %rd578;
ld.global.u32 %r4867, [%rd579];
abs.s32 %r4868, %r4867;
setp.gt.u32 %p1432, %r4868, 4;
and.b32 %r4869, %r4868, 1;
setp.eq.b32 %p1433, %r4869, 1;
and.pred %p1434, %p1432, %p1433;
selp.b32 %r4870, 512, 0, %p1434;
or.b32 %r6390, %r4870, %r6390;
$L__BB0_1087:
setp.ge.u32 %p1435, %r2032, %r6;
@%p1435 bra $L__BB0_1089;
add.s32 %r4871, %r2036, %r2096;
mul.wide.u32 %rd580, %r4871, 4;
add.s64 %rd581, %rd2, %rd580;
ld.global.u32 %r4872, [%rd581];
abs.s32 %r4873, %r4872;
setp.gt.u32 %p1436, %r4873, 4;
and.b32 %r4874, %r4873, 1;
setp.eq.b32 %p1437, %r4874, 1;
and.pred %p1438, %p1436, %p1437;
selp.b32 %r4875, 1024, 0, %p1438;
or.b32 %r6390, %r4875, %r6390;
$L__BB0_1089:
setp.ge.u32 %p1439, %r2033, %r6;
@%p1439 bra $L__BB0_1091;
add.s32 %r4876, %r2035, %r2096;
mul.wide.u32 %rd582, %r4876, 4;
add.s64 %rd583, %rd2, %rd582;
ld.global.u32 %r4877, [%rd583];
abs.s32 %r4878, %r4877;
setp.gt.u32 %p1440, %r4878, 4;
and.b32 %r4879, %r4878, 1;
setp.eq.b32 %p1441, %r4879, 1;
and.pred %p1442, %p1440, %p1441;
selp.b32 %r4880, 2048, 0, %p1442;
or.b32 %r6390, %r4880, %r6390;
$L__BB0_1091:
@%p1307 bra $L__BB0_1100;
setp.le.u32 %p1444, %r6, %r6326;
@%p1444 bra $L__BB0_1094;
add.s32 %r4881, %r2034, %r2105;
mul.wide.u32 %rd584, %r4881, 4;
add.s64 %rd585, %rd2, %rd584;
ld.global.u32 %r4882, [%rd585];
abs.s32 %r4883, %r4882;
setp.gt.u32 %p1445, %r4883, 4;
and.b32 %r4884, %r4883, 1;
setp.eq.b32 %p1446, %r4884, 1;
and.pred %p1447, %p1445, %p1446;
selp.b32 %r4885, 4096, 0, %p1447;
or.b32 %r6390, %r4885, %r6390;
$L__BB0_1094:
setp.ge.u32 %p1448, %r2031, %r6;
@%p1448 bra $L__BB0_1096;
add.s32 %r4886, %r2037, %r2105;
mul.wide.u32 %rd586, %r4886, 4;
add.s64 %rd587, %rd2, %rd586;
ld.global.u32 %r4887, [%rd587];
abs.s32 %r4888, %r4887;
setp.gt.u32 %p1449, %r4888, 4;
and.b32 %r4889, %r4888, 1;
setp.eq.b32 %p1450, %r4889, 1;
and.pred %p1451, %p1449, %p1450;
selp.b32 %r4890, 8192, 0, %p1451;
or.b32 %r6390, %r4890, %r6390;
$L__BB0_1096:
setp.ge.u32 %p1452, %r2032, %r6;
@%p1452 bra $L__BB0_1098;
add.s32 %r4891, %r2036, %r2105;
mul.wide.u32 %rd588, %r4891, 4;
add.s64 %rd589, %rd2, %rd588;
ld.global.u32 %r4892, [%rd589];
abs.s32 %r4893, %r4892;
setp.gt.u32 %p1453, %r4893, 4;
and.b32 %r4894, %r4893, 1;
setp.eq.b32 %p1454, %r4894, 1;
and.pred %p1455, %p1453, %p1454;
selp.b32 %r4895, 16384, 0, %p1455;
or.b32 %r6390, %r4895, %r6390;
$L__BB0_1098:
setp.ge.u32 %p1456, %r2033, %r6;
@%p1456 bra $L__BB0_1100;
add.s32 %r4896, %r2035, %r2105;
mul.wide.u32 %rd590, %r4896, 4;
add.s64 %rd591, %rd2, %rd590;
ld.global.u32 %r4897, [%rd591];
abs.s32 %r4898, %r4897;
setp.gt.u32 %p1457, %r4898, 4;
and.b32 %r4899, %r4898, 1;
setp.eq.b32 %p1458, %r4899, 1;
and.pred %p1459, %p1457, %p1458;
selp.b32 %r4900, 32768, 0, %p1459;
or.b32 %r6390, %r4900, %r6390;
$L__BB0_1100:
sub.s32 %r4903, %r4648, %r5;
shl.b32 %r4904, %r6390, 16;
or.b32 %r2180, %r4904, %r6374;
and.b32 %r4905, %r2114, -2004318072;
shr.u32 %r4906, %r4905, 3;
shl.b32 %r4907, %r2123, 3;
and.b32 %r4908, %r4907, -2004318072;
or.b32 %r2181, %r4908, %r4906;
not.b32 %r4909, %r2180;
setp.gt.s32 %p1460, %r4903, 0;
mov.u32 %r6406, 0;
shl.b32 %r4910, %r4903, 2;
selp.b32 %r4911, %r4910, 0, %p1460;
shr.u32 %r2182, %r2038, %r4911;
and.b32 %r2183, %r2182, %r4909;
@%p1188 bra $L__BB0_1109;
setp.le.u32 %p1462, %r6, %r6326;
mov.u32 %r6406, 0;
@%p1462 bra $L__BB0_1103;
ld.global.u32 %r4913, [%rd38];
abs.s32 %r4914, %r4913;
setp.eq.s32 %p1463, %r4914, 3;
selp.u32 %r6406, 1, 0, %p1463;
$L__BB0_1103:
setp.ge.u32 %p1464, %r2031, %r6;
@%p1464 bra $L__BB0_1105;
ld.global.u32 %r4915, [%rd39];
abs.s32 %r4916, %r4915;
setp.eq.s32 %p1465, %r4916, 3;
selp.b32 %r4917, 2, 0, %p1465;
or.b32 %r6406, %r4917, %r6406;
$L__BB0_1105:
setp.ge.u32 %p1466, %r2032, %r6;
@%p1466 bra $L__BB0_1107;
ld.global.u32 %r4918, [%rd40];
abs.s32 %r4919, %r4918;
setp.eq.s32 %p1467, %r4919, 3;
selp.b32 %r4920, 4, 0, %p1467;
or.b32 %r6406, %r4920, %r6406;
$L__BB0_1107:
setp.ge.u32 %p1468, %r2033, %r6;
@%p1468 bra $L__BB0_1109;
ld.global.u32 %r4921, [%rd41];
abs.s32 %r4922, %r4921;
setp.eq.s32 %p1469, %r4922, 3;
selp.b32 %r4923, 8, 0, %p1469;
or.b32 %r6406, %r4923, %r6406;
$L__BB0_1109:
@%p1205 bra $L__BB0_1118;
setp.le.u32 %p1471, %r6, %r6326;
@%p1471 bra $L__BB0_1112;
ld.global.u32 %r4924, [%rd42];
abs.s32 %r4925, %r4924;
setp.eq.s32 %p1472, %r4925, 3;
selp.b32 %r4926, 16, 0, %p1472;
or.b32 %r6406, %r4926, %r6406;
$L__BB0_1112:
setp.ge.u32 %p1473, %r2031, %r6;
@%p1473 bra $L__BB0_1114;
ld.global.u32 %r4927, [%rd43];
abs.s32 %r4928, %r4927;
setp.eq.s32 %p1474, %r4928, 3;
selp.b32 %r4929, 32, 0, %p1474;
or.b32 %r6406, %r4929, %r6406;
$L__BB0_1114:
setp.ge.u32 %p1475, %r2032, %r6;
@%p1475 bra $L__BB0_1116;
ld.global.u32 %r4930, [%rd44];
abs.s32 %r4931, %r4930;
setp.eq.s32 %p1476, %r4931, 3;
selp.b32 %r4932, 64, 0, %p1476;
or.b32 %r6406, %r4932, %r6406;
$L__BB0_1116:
setp.ge.u32 %p1477, %r2033, %r6;
@%p1477 bra $L__BB0_1118;
ld.global.u32 %r4933, [%rd45];
abs.s32 %r4934, %r4933;
setp.eq.s32 %p1478, %r4934, 3;
selp.b32 %r4935, 128, 0, %p1478;
or.b32 %r6406, %r4935, %r6406;
$L__BB0_1118:
@%p1222 bra $L__BB0_1127;
setp.le.u32 %p1480, %r6, %r6326;
@%p1480 bra $L__BB0_1121;
ld.global.u32 %r4936, [%rd46];
abs.s32 %r4937, %r4936;
setp.eq.s32 %p1481, %r4937, 3;
selp.b32 %r4938, 256, 0, %p1481;
or.b32 %r6406, %r4938, %r6406;
$L__BB0_1121:
setp.ge.u32 %p1482, %r2031, %r6;
@%p1482 bra $L__BB0_1123;
ld.global.u32 %r4939, [%rd47];
abs.s32 %r4940, %r4939;
setp.eq.s32 %p1483, %r4940, 3;
selp.b32 %r4941, 512, 0, %p1483;
or.b32 %r6406, %r4941, %r6406;
$L__BB0_1123:
setp.ge.u32 %p1484, %r2032, %r6;
@%p1484 bra $L__BB0_1125;
ld.global.u32 %r4942, [%rd48];
abs.s32 %r4943, %r4942;
setp.eq.s32 %p1485, %r4943, 3;
selp.b32 %r4944, 1024, 0, %p1485;
or.b32 %r6406, %r4944, %r6406;
$L__BB0_1125:
setp.ge.u32 %p1486, %r2033, %r6;
@%p1486 bra $L__BB0_1127;
ld.global.u32 %r4945, [%rd49];
abs.s32 %r4946, %r4945;
setp.eq.s32 %p1487, %r4946, 3;
selp.b32 %r4947, 2048, 0, %p1487;
or.b32 %r6406, %r4947, %r6406;
$L__BB0_1127:
@%p1239 bra $L__BB0_1136;
setp.le.u32 %p1489, %r6, %r6326;
@%p1489 bra $L__BB0_1130;
ld.global.u32 %r4948, [%rd50];
abs.s32 %r4949, %r4948;
setp.eq.s32 %p1490, %r4949, 3;
selp.b32 %r4950, 4096, 0, %p1490;
or.b32 %r6406, %r4950, %r6406;
$L__BB0_1130:
setp.ge.u32 %p1491, %r2031, %r6;
@%p1491 bra $L__BB0_1132;
ld.global.u32 %r4951, [%rd51];
abs.s32 %r4952, %r4951;
setp.eq.s32 %p1492, %r4952, 3;
selp.b32 %r4953, 8192, 0, %p1492;
or.b32 %r6406, %r4953, %r6406;
$L__BB0_1132:
setp.ge.u32 %p1493, %r2032, %r6;
@%p1493 bra $L__BB0_1134;
ld.global.u32 %r4954, [%rd52];
abs.s32 %r4955, %r4954;
setp.eq.s32 %p1494, %r4955, 3;
selp.b32 %r4956, 16384, 0, %p1494;
or.b32 %r6406, %r4956, %r6406;
$L__BB0_1134:
setp.ge.u32 %p1495, %r2033, %r6;
@%p1495 bra $L__BB0_1136;
ld.global.u32 %r4957, [%rd53];
abs.s32 %r4958, %r4957;
setp.eq.s32 %p1496, %r4958, 3;
selp.b32 %r4959, 32768, 0, %p1496;
or.b32 %r6406, %r4959, %r6406;
$L__BB0_1136:
and.b32 %r4961, %r2180, -286331154;
shr.u32 %r4962, %r4961, 1;
shl.b32 %r4963, %r2180, 1;
and.b32 %r4964, %r4963, -286331154;
or.b32 %r4965, %r2180, %r2181;
or.b32 %r4966, %r4965, %r4964;
or.b32 %r4967, %r4966, %r4962;
and.b32 %r2216, %r6406, %r2182;
shr.u32 %r4968, %r4967, 4;
shl.b32 %r4969, %r4967, 4;
shr.u32 %r4970, %r6331, 12;
or.b32 %r4971, %r4967, %r4970;
or.b32 %r4972, %r4971, %r4969;
or.b32 %r4973, %r4972, %r4968;
and.b32 %r6416, %r2183, %r4973;
setp.eq.s32 %p1497, %r6416, 0;
mov.u32 %r6437, 0;
@%p1497 bra $L__BB0_1195;
mov.u32 %r6415, 0;
mov.u32 %r6417, %r6415;
$L__BB0_1138:
brev.b32 %r4976, %r6416;
bfind.shiftamt.u32 %r2224, %r4976;
mov.pred %p1635, -1;
mov.u32 %r4977, 1;
shl.b32 %r2225, %r4977, %r2224;
mov.u32 %r4978, -2;
shf.l.wrap.b32 %r4979, %r4978, %r4978, %r2224;
and.b32 %r6416, %r6416, %r4979;
or.b32 %r6415, %r2225, %r6415;
and.b32 %r2228, %r2225, %r2216;
setp.ne.s32 %p1499, %r2228, 0;
selp.u32 %r4980, 1, 0, %p1499;
setp.eq.s32 %p1500, %r6436, 0;
selp.b32 %r4981, 8, 7, %p1500;
shl.b32 %r4982, %r4980, %r6432;
cvt.u16.u32 %rs561, %r4982;
or.b16 %rs755, %rs755, %rs561;
add.s32 %r6432, %r6432, 1;
setp.lt.u32 %p1501, %r6432, %r4981;
mov.pred %p1633, %p1635;
@%p1501 bra $L__BB0_1143;
setp.ge.u32 %p1503, %r6434, %r6308;
mov.pred %p1633, 0;
@%p1503 bra $L__BB0_1143;
setp.eq.s64 %p1504, %rd36, 0;
@%p1504 bra $L__BB0_1142;
cvt.u64.u32 %rd592, %r6434;
add.s64 %rd593, %rd592, %rd35;
add.s64 %rd594, %rd1, %rd593;
st.global.u8 [%rd594], %rs755;
$L__BB0_1142:
and.b16 %rs563, %rs755, 255;
setp.eq.s16 %p1506, %rs563, 255;
selp.u32 %r6436, 1, 0, %p1506;
add.s32 %r6434, %r6434, 1;
mov.u16 %rs755, 0;
mov.u32 %r6432, 0;
mov.pred %p1633, %p1635;
$L__BB0_1143:
not.pred %p1508, %p1633;
@%p1508 bra $L__BB0_1202;
setp.eq.s32 %p1509, %r2228, 0;
@%p1509 bra $L__BB0_1185;
or.b32 %r6417, %r2225, %r6417;
mov.u32 %r6424, 51;
setp.gt.s32 %p1510, %r2224, 7;
@%p1510 bra $L__BB0_1161;
setp.gt.s32 %p1522, %r2224, 3;
@%p1522 bra $L__BB0_1154;
setp.gt.s32 %p1528, %r2224, 1;
@%p1528 bra $L__BB0_1151;
setp.eq.s32 %p1531, %r2224, 0;
@%p1531 bra $L__BB0_1184;
setp.eq.s32 %p1532, %r2224, 1;
@%p1532 bra $L__BB0_1150;
bra.uni $L__BB0_1183;
$L__BB0_1150:
mov.u32 %r6424, 118;
bra.uni $L__BB0_1184;
$L__BB0_1161:
setp.gt.s32 %p1511, %r2224, 11;
@%p1511 bra $L__BB0_1169;
setp.gt.s32 %p1517, %r2224, 9;
@%p1517 bra $L__BB0_1166;
setp.eq.s32 %p1520, %r2224, 8;
@%p1520 bra $L__BB0_1179;
setp.eq.s32 %p1521, %r2224, 9;
@%p1521 bra $L__BB0_1165;
bra.uni $L__BB0_1183;
$L__BB0_1165:
mov.u32 %r6424, 30208;
bra.uni $L__BB0_1184;
$L__BB0_1154:
setp.gt.s32 %p1523, %r2224, 5;
@%p1523 bra $L__BB0_1158;
setp.eq.s32 %p1526, %r2224, 4;
@%p1526 bra $L__BB0_1181;
setp.eq.s32 %p1527, %r2224, 5;
@%p1527 bra $L__BB0_1157;
bra.uni $L__BB0_1183;
$L__BB0_1157:
mov.u32 %r6424, 1888;
bra.uni $L__BB0_1184;
$L__BB0_1169:
setp.gt.s32 %p1512, %r2224, 13;
@%p1512 bra $L__BB0_1173;
setp.eq.s32 %p1515, %r2224, 12;
@%p1515 bra $L__BB0_1177;
setp.eq.s32 %p1516, %r2224, 13;
@%p1516 bra $L__BB0_1172;
bra.uni $L__BB0_1183;
$L__BB0_1172:
mov.u32 %r6424, 483328;
bra.uni $L__BB0_1184;
$L__BB0_1151:
setp.eq.s32 %p1529, %r2224, 2;
@%p1529 bra $L__BB0_1182;
setp.eq.s32 %p1530, %r2224, 3;
@%p1530 bra $L__BB0_1153;
bra.uni $L__BB0_1183;
$L__BB0_1153:
mov.u32 %r6424, 200;
bra.uni $L__BB0_1184;
$L__BB0_1166:
setp.eq.s32 %p1518, %r2224, 10;
@%p1518 bra $L__BB0_1178;
setp.eq.s32 %p1519, %r2224, 11;
@%p1519 bra $L__BB0_1168;
bra.uni $L__BB0_1183;
$L__BB0_1168:
mov.u32 %r6424, 51200;
bra.uni $L__BB0_1184;
$L__BB0_1158:
setp.eq.s32 %p1524, %r2224, 6;
@%p1524 bra $L__BB0_1180;
setp.eq.s32 %p1525, %r2224, 7;
@%p1525 bra $L__BB0_1160;
bra.uni $L__BB0_1183;
$L__BB0_1160:
mov.u32 %r6424, 3200;
bra.uni $L__BB0_1184;
$L__BB0_1173:
setp.eq.s32 %p1513, %r2224, 14;
@%p1513 bra $L__BB0_1176;
setp.ne.s32 %p1514, %r2224, 15;
@%p1514 bra $L__BB0_1183;
mov.u32 %r6424, 819200;
bra.uni $L__BB0_1184;
$L__BB0_1179:
mov.u32 %r6424, 13056;
bra.uni $L__BB0_1184;
$L__BB0_1181:
mov.u32 %r6424, 816;
bra.uni $L__BB0_1184;
$L__BB0_1177:
mov.u32 %r6424, 208896;
bra.uni $L__BB0_1184;
$L__BB0_1182:
mov.u32 %r6424, 236;
bra.uni $L__BB0_1184;
$L__BB0_1178:
mov.u32 %r6424, 60416;
bra.uni $L__BB0_1184;
$L__BB0_1180:
mov.u32 %r6424, 3776;
bra.uni $L__BB0_1184;
$L__BB0_1176:
mov.u32 %r6424, 966656;
bra.uni $L__BB0_1184;
$L__BB0_1183:
mov.u32 %r6424, 0;
$L__BB0_1184:
not.b32 %r5001, %r6415;
and.b32 %r5002, %r2183, %r5001;
and.b32 %r5003, %r5002, %r6424;
or.b32 %r6416, %r5003, %r6416;
$L__BB0_1185:
setp.ne.s32 %p1533, %r6416, 0;
@%p1533 bra $L__BB0_1138;
setp.eq.s32 %p1534, %r6417, 0;
mov.u32 %r6437, 0;
@%p1534 bra $L__BB0_1195;
mov.u32 %r6430, %r6417;
$L__BB0_1188:
setp.eq.s32 %p1535, %r6430, 0;
mov.u32 %r6437, %r6417;
@%p1535 bra $L__BB0_1195;
brev.b32 %r5005, %r6430;
bfind.shiftamt.u32 %r5006, %r5005;
mov.pred %p1635, -1;
mov.u32 %r5007, -2;
shf.l.wrap.b32 %r5008, %r5007, %r5007, %r5006;
and.b32 %r6430, %r6430, %r5008;
shr.u32 %r5009, %r5006, 2;
and.b32 %r5010, %r5006, 3;
add.s32 %r5011, %r5010, %r6326;
add.s32 %r5012, %r5009, %r6330;
mad.lo.s32 %r5013, %r5011, %r1, %r5012;
mul.wide.u32 %rd595, %r5013, 4;
add.s64 %rd596, %rd2, %rd595;
ld.global.u32 %r5014, [%rd596];
shr.u32 %r5015, %r5014, 31;
setp.eq.s32 %p1537, %r6436, 0;
selp.b32 %r5016, 8, 7, %p1537;
shl.b32 %r5017, %r5015, %r6432;
cvt.u16.u32 %rs564, %r5017;
or.b16 %rs755, %rs755, %rs564;
add.s32 %r6432, %r6432, 1;
setp.lt.u32 %p1538, %r6432, %r5016;
mov.pred %p1634, %p1635;
@%p1538 bra $L__BB0_1194;
setp.ge.u32 %p1540, %r6434, %r6308;
mov.pred %p1634, 0;
@%p1540 bra $L__BB0_1194;
setp.eq.s64 %p1541, %rd36, 0;
@%p1541 bra $L__BB0_1193;
cvt.u64.u32 %rd597, %r6434;
add.s64 %rd598, %rd597, %rd35;
add.s64 %rd599, %rd1, %rd598;
st.global.u8 [%rd599], %rs755;
$L__BB0_1193:
and.b16 %rs566, %rs755, 255;
setp.eq.s16 %p1543, %rs566, 255;
selp.u32 %r6436, 1, 0, %p1543;
add.s32 %r6434, %r6434, 1;
mov.u16 %rs755, 0;
mov.u32 %r6432, 0;
mov.pred %p1634, %p1635;
$L__BB0_1194:
@%p1634 bra $L__BB0_1188;
bra.uni $L__BB0_1202;
$L__BB0_1195:
not.b32 %r5019, %r6437;
and.b32 %r5020, %r2216, %r5019;
setp.ne.s32 %p1546, %r5020, 0;
mov.pred %p1635, %p1182;
@%p1546 bra $L__BB0_1202;
setp.lt.u32 %p1547, %r4648, %r5;
or.b32 %r5021, %r6437, %r2180;
st.local.u16 [%rd37], %r5021;
shr.u32 %r5022, %r5021, 16;
st.local.u16 [%rd37+2], %r5022;
shl.b32 %r5023, %r5021, 1;
and.b32 %r5024, %r5023, 57344;
and.b32 %r5025, %r5021, 57344;
shr.u32 %r5026, %r5025, 1;
or.b32 %r5027, %r5021, %r2181;
and.b32 %r5028, %r5027, 61440;
or.b32 %r5029, %r5028, %r5024;
or.b32 %r6331, %r5029, %r5026;
mov.u32 %r6330, %r4648;
@%p1547 bra $L__BB0_956;
$L__BB0_1197:
add.s32 %r6326, %r6326, 4;
setp.gt.u32 %p1548, %r6, %r6326;
@%p1548 bra $L__BB0_954;
setp.eq.s32 %p1550, %r6432, 0;
mov.pred %p1549, 0;
mov.pred %p1635, %p1549;
@%p1550 bra $L__BB0_1202;
setp.ge.u32 %p1552, %r6434, %r6308;
mov.pred %p1635, %p1182;
@%p1552 bra $L__BB0_1202;
setp.eq.s64 %p1554, %rd36, 0;
mov.pred %p1635, %p1549;
@%p1554 bra $L__BB0_1202;
cvt.u64.u32 %rd600, %r6434;
add.s64 %rd601, %rd600, %rd35;
add.s64 %rd602, %rd1, %rd601;
st.global.u8 [%rd602], %rs755;
mov.pred %p1635, %p1549;
$L__BB0_1202:
@%p1635 bra $L__BB0_1246;
bra.uni $L__BB0_1203;
$L__BB0_1246:
mov.u32 %r5083, 2;
st.global.u32 [%rd3], %r5083;
mov.u32 %r5084, 6;
st.global.u32 [%rd3+4], %r5084;
mov.u32 %r5085, 0;
st.global.u32 [%rd3+8], %r5085;
st.global.u32 [%rd3+12], %r5085;
st.global.u32 [%rd3+16], %r5085;
st.global.u32 [%rd3+20], %r5085;
st.global.u32 [%rd3+24], %r5085;
st.global.u32 [%rd3+28], %r5085;
bra.uni $L__BB0_1254;
$L__BB0_1203:
cvt.u64.u32 %rd603, %r6308;
add.s64 %rd54, %rd603, %rd35;
setp.eq.s32 %p1556, %r6309, 0;
@%p1556 bra $L__BB0_1244;
add.s32 %r5031, %r6309, -1;
and.b32 %r6445, %r6309, 3;
setp.lt.u32 %p1557, %r5031, 3;
mov.u32 %r6443, 0;
@%p1557 bra $L__BB0_1207;
sub.s32 %r6442, %r6309, %r6445;
mov.u32 %r6443, 0;
$L__BB0_1206:
cvt.u64.u32 %rd604, %r6443;
add.s64 %rd605, %rd54, %rd604;
add.s64 %rd606, %rd1, %rd605;
mov.u16 %rs567, 0;
st.global.u8 [%rd606], %rs567;
st.global.u8 [%rd606+1], %rs567;
st.global.u8 [%rd606+2], %rs567;
st.global.u8 [%rd606+3], %rs567;
add.s32 %r6443, %r6443, 4;
add.s32 %r6442, %r6442, -4;
setp.ne.s32 %p1558, %r6442, 0;
@%p1558 bra $L__BB0_1206;
$L__BB0_1207:
setp.eq.s32 %p1559, %r6445, 0;
@%p1559 bra $L__BB0_1209;
$L__BB0_1208:
.pragma "nounroll";
cvt.u64.u32 %rd607, %r6443;
add.s64 %rd608, %rd54, %rd607;
add.s64 %rd609, %rd1, %rd608;
mov.u16 %rs568, 0;
st.global.u8 [%rd609], %rs568;
add.s32 %r6443, %r6443, 1;
add.s32 %r6445, %r6445, -1;
setp.ne.s32 %p1560, %r6445, 0;
@%p1560 bra $L__BB0_1208;
$L__BB0_1209:
@%p9 bra $L__BB0_1237;
mov.u32 %r5037, 0;
mov.u32 %r6472, 1;
mov.u16 %rs762, 0;
mov.u32 %r6446, %r5037;
mov.u32 %r6471, %r5037;
mov.u32 %r6470, %r5037;
mov.u32 %r6469, %r5037;
$L__BB0_1211:
mul.lo.s32 %r2277, %r6446, %r1;
add.s32 %r2278, %r6446, 3;
mul.lo.s32 %r2279, %r1, %r2278;
add.s32 %r2280, %r6446, 2;
mul.lo.s32 %r2281, %r1, %r2280;
add.s32 %r2282, %r6446, 1;
mul.lo.s32 %r2283, %r1, %r2282;
mov.u32 %r6451, %r5037;
$L__BB0_1212:
add.s32 %r6459, %r2279, %r6451;
add.s32 %r6458, %r2281, %r6451;
add.s32 %r6457, %r2283, %r6451;
add.s32 %r6456, %r2277, %r6451;
mov.u32 %r6460, 0;
$L__BB0_1213:
add.s32 %r5040, %r6451, %r6460;
setp.ge.u32 %p1562, %r5040, %r5;
@%p1562 bra $L__BB0_1234;
setp.ge.u32 %p1563, %r6446, %r6;
@%p1563 bra $L__BB0_1219;
mul.wide.u32 %rd610, %r6456, 4;
add.s64 %rd611, %rd2, %rd610;
ld.global.u32 %r5041, [%rd611];
abs.s32 %r2302, %r5041;
setp.lt.u32 %p1564, %r2302, 5;
and.b32 %r5042, %r2302, 1;
setp.eq.b32 %p1565, %r5042, 1;
not.pred %p1566, %p1565;
or.pred %p1567, %p1564, %p1566;
@%p1567 bra $L__BB0_1219;
shr.u32 %r5043, %r2302, 1;
and.b32 %r5044, %r5043, 1;
shl.b32 %r5045, %r5044, %r6469;
cvt.u16.u32 %rs570, %r5045;
or.b16 %rs762, %rs762, %rs570;
add.s32 %r6471, %r6471, 1;
add.s32 %r6469, %r6469, 1;
setp.ne.s32 %p1568, %r6469, 7;
setp.eq.s32 %p1569, %r6472, 0;
or.pred %p1570, %p1568, %p1569;
and.b16 %rs571, %rs762, 127;
setp.ne.s16 %p1571, %rs571, 127;
or.pred %p1572, %p1570, %p1571;
setp.ne.s32 %p1573, %r6469, 8;
and.pred %p1574, %p1573, %p1572;
@%p1574 bra $L__BB0_1219;
setp.ge.u32 %p1575, %r6470, %r6309;
@%p1575 bra $L__BB0_1245;
not.b32 %r5047, %r6470;
add.s32 %r5048, %r6309, %r5047;
cvt.u64.u32 %rd612, %r5048;
add.s64 %rd613, %rd54, %rd612;
add.s64 %rd614, %rd1, %rd613;
and.b16 %rs573, %rs762, 255;
st.global.u8 [%rd614], %rs762;
add.s32 %r6470, %r6470, 1;
setp.gt.u16 %p1576, %rs573, 143;
selp.u32 %r6472, 1, 0, %p1576;
mov.u16 %rs762, 0;
mov.u32 %r6469, 0;
$L__BB0_1219:
setp.ge.u32 %p1577, %r2282, %r6;
@%p1577 bra $L__BB0_1224;
mul.wide.u32 %rd615, %r6457, 4;
add.s64 %rd616, %rd2, %rd615;
ld.global.u32 %r5049, [%rd616];
abs.s32 %r2311, %r5049;
setp.lt.u32 %p1578, %r2311, 5;
and.b32 %r5050, %r2311, 1;
setp.eq.b32 %p1579, %r5050, 1;
not.pred %p1580, %p1579;
or.pred %p1581, %p1578, %p1580;
@%p1581 bra $L__BB0_1224;
shr.u32 %r5051, %r2311, 1;
and.b32 %r5052, %r5051, 1;
shl.b32 %r5053, %r5052, %r6469;
cvt.u16.u32 %rs574, %r5053;
or.b16 %rs762, %rs762, %rs574;
add.s32 %r6471, %r6471, 1;
add.s32 %r6469, %r6469, 1;
setp.ne.s32 %p1582, %r6469, 7;
setp.eq.s32 %p1583, %r6472, 0;
or.pred %p1584, %p1582, %p1583;
and.b16 %rs575, %rs762, 127;
setp.ne.s16 %p1585, %rs575, 127;
or.pred %p1586, %p1584, %p1585;
setp.ne.s32 %p1587, %r6469, 8;
and.pred %p1588, %p1587, %p1586;
@%p1588 bra $L__BB0_1224;
setp.ge.u32 %p1589, %r6470, %r6309;
@%p1589 bra $L__BB0_1245;
not.b32 %r5055, %r6470;
add.s32 %r5056, %r6309, %r5055;
cvt.u64.u32 %rd617, %r5056;
add.s64 %rd618, %rd54, %rd617;
add.s64 %rd619, %rd1, %rd618;
and.b16 %rs577, %rs762, 255;
st.global.u8 [%rd619], %rs762;
add.s32 %r6470, %r6470, 1;
setp.gt.u16 %p1590, %rs577, 143;
selp.u32 %r6472, 1, 0, %p1590;
mov.u16 %rs762, 0;
mov.u32 %r6469, 0;
$L__BB0_1224:
setp.ge.u32 %p1591, %r2280, %r6;
@%p1591 bra $L__BB0_1229;
mul.wide.u32 %rd620, %r6458, 4;
add.s64 %rd621, %rd2, %rd620;
ld.global.u32 %r5057, [%rd621];
abs.s32 %r2320, %r5057;
setp.lt.u32 %p1592, %r2320, 5;
and.b32 %r5058, %r2320, 1;
setp.eq.b32 %p1593, %r5058, 1;
not.pred %p1594, %p1593;
or.pred %p1595, %p1592, %p1594;
@%p1595 bra $L__BB0_1229;
shr.u32 %r5059, %r2320, 1;
and.b32 %r5060, %r5059, 1;
shl.b32 %r5061, %r5060, %r6469;
cvt.u16.u32 %rs578, %r5061;
or.b16 %rs762, %rs762, %rs578;
add.s32 %r6471, %r6471, 1;
add.s32 %r6469, %r6469, 1;
setp.ne.s32 %p1596, %r6469, 7;
setp.eq.s32 %p1597, %r6472, 0;
or.pred %p1598, %p1596, %p1597;
and.b16 %rs579, %rs762, 127;
setp.ne.s16 %p1599, %rs579, 127;
or.pred %p1600, %p1598, %p1599;
setp.ne.s32 %p1601, %r6469, 8;
and.pred %p1602, %p1601, %p1600;
@%p1602 bra $L__BB0_1229;
setp.ge.u32 %p1603, %r6470, %r6309;
@%p1603 bra $L__BB0_1245;
not.b32 %r5063, %r6470;
add.s32 %r5064, %r6309, %r5063;
cvt.u64.u32 %rd622, %r5064;
add.s64 %rd623, %rd54, %rd622;
add.s64 %rd624, %rd1, %rd623;
and.b16 %rs581, %rs762, 255;
st.global.u8 [%rd624], %rs762;
add.s32 %r6470, %r6470, 1;
setp.gt.u16 %p1604, %rs581, 143;
selp.u32 %r6472, 1, 0, %p1604;
mov.u16 %rs762, 0;
mov.u32 %r6469, 0;
$L__BB0_1229:
setp.ge.u32 %p1605, %r2278, %r6;
@%p1605 bra $L__BB0_1234;
mul.wide.u32 %rd625, %r6459, 4;
add.s64 %rd626, %rd2, %rd625;
ld.global.u32 %r5065, [%rd626];
abs.s32 %r2329, %r5065;
setp.lt.u32 %p1606, %r2329, 5;
and.b32 %r5066, %r2329, 1;
setp.eq.b32 %p1607, %r5066, 1;
not.pred %p1608, %p1607;
or.pred %p1609, %p1606, %p1608;
@%p1609 bra $L__BB0_1234;
shr.u32 %r5067, %r2329, 1;
and.b32 %r5068, %r5067, 1;
shl.b32 %r5069, %r5068, %r6469;
cvt.u16.u32 %rs582, %r5069;
or.b16 %rs762, %rs762, %rs582;
add.s32 %r6471, %r6471, 1;
add.s32 %r6469, %r6469, 1;
setp.ne.s32 %p1610, %r6469, 7;
setp.eq.s32 %p1611, %r6472, 0;
or.pred %p1612, %p1610, %p1611;
and.b16 %rs583, %rs762, 127;
setp.ne.s16 %p1613, %rs583, 127;
or.pred %p1614, %p1612, %p1613;
setp.ne.s32 %p1615, %r6469, 8;
and.pred %p1616, %p1615, %p1614;
@%p1616 bra $L__BB0_1234;
setp.ge.u32 %p1617, %r6470, %r6309;
@%p1617 bra $L__BB0_1245;
not.b32 %r5071, %r6470;
add.s32 %r5072, %r6309, %r5071;
cvt.u64.u32 %rd627, %r5072;
add.s64 %rd628, %rd54, %rd627;
add.s64 %rd629, %rd1, %rd628;
and.b16 %rs585, %rs762, 255;
st.global.u8 [%rd629], %rs762;
add.s32 %r6470, %r6470, 1;
setp.gt.u16 %p1618, %rs585, 143;
selp.u32 %r6472, 1, 0, %p1618;
mov.u16 %rs762, 0;
mov.u32 %r6469, 0;
$L__BB0_1234:
add.s32 %r6459, %r6459, 1;
add.s32 %r6458, %r6458, 1;
add.s32 %r6457, %r6457, 1;
add.s32 %r6456, %r6456, 1;
add.s32 %r6460, %r6460, 1;
setp.lt.u32 %p1619, %r6460, 8;
@%p1619 bra $L__BB0_1213;
add.s32 %r6451, %r6451, 8;
setp.lt.u32 %p1620, %r6451, %r5;
@%p1620 bra $L__BB0_1212;
add.s32 %r6446, %r6446, 4;
setp.lt.u32 %p1621, %r6446, %r6;
@%p1621 bra $L__BB0_1211;
bra.uni $L__BB0_1239;
$L__BB0_1244:
setp.eq.s32 %p1628, %r5180, 0;
@%p1628 bra $L__BB0_1243;
bra.uni $L__BB0_1245;
$L__BB0_1237:
mov.u32 %r6469, 0;
mov.u32 %r6481, %r6469;
$L__BB0_1238:
add.s32 %r6481, %r6481, 4;
setp.lt.u32 %p1622, %r6481, %r6;
mov.u16 %rs762, 0;
mov.u32 %r6470, %r6469;
mov.u32 %r6471, %r6469;
@%p1622 bra $L__BB0_1238;
$L__BB0_1239:
setp.eq.s32 %p1623, %r6469, 0;
@%p1623 bra $L__BB0_1242;
setp.ge.u32 %p1624, %r6470, %r6309;
@%p1624 bra $L__BB0_1245;
not.b32 %r5077, %r6470;
add.s32 %r5078, %r6309, %r5077;
cvt.u64.u32 %rd630, %r5078;
add.s64 %rd631, %rd54, %rd630;
add.s64 %rd632, %rd1, %rd631;
st.global.u8 [%rd632], %rs762;
add.s32 %r6470, %r6470, 1;
$L__BB0_1242:
setp.le.u32 %p1625, %r6470, %r6309;
setp.eq.s32 %p1626, %r6471, %r5180;
and.pred %p1627, %p1626, %p1625;
@%p1627 bra $L__BB0_1243;
bra.uni $L__BB0_1245;
$L__BB0_1243:
sub.s32 %r5109, %r2, %r4;
mov.u32 %r5082, 0;
st.global.u32 [%rd3], %r5082;
st.global.u32 [%rd3+4], %r5082;
st.global.u32 [%rd3+8], %r1984;
st.global.u32 [%rd3+12], %r4;
st.global.u32 [%rd3+16], %r5109;
st.global.u32 [%rd3+20], %r1733;
st.global.u32 [%rd3+24], %r6310;
st.global.u32 [%rd3+28], %r5082;
bra.uni $L__BB0_1254;
$L__BB0_1245:
mov.u32 %r5079, 2;
st.global.u32 [%rd3], %r5079;
mov.u32 %r5080, 7;
st.global.u32 [%rd3+4], %r5080;
mov.u32 %r5081, 0;
st.global.u32 [%rd3+8], %r5081;
st.global.u32 [%rd3+12], %r5081;
st.global.u32 [%rd3+16], %r5081;
st.global.u32 [%rd3+20], %r5081;
st.global.u32 [%rd3+24], %r5081;
st.global.u32 [%rd3+28], %r5081;
bra.uni $L__BB0_1254;
}
// .globl j2k_htj2k_encode_codeblocks
.visible .entry j2k_htj2k_encode_codeblocks(
.param .u64 j2k_htj2k_encode_codeblocks_param_0,
.param .u64 j2k_htj2k_encode_codeblocks_param_1,
.param .u64 j2k_htj2k_encode_codeblocks_param_2,
.param .u64 j2k_htj2k_encode_codeblocks_param_3,
.param .u64 j2k_htj2k_encode_codeblocks_param_4,
.param .u64 j2k_htj2k_encode_codeblocks_param_5,
.param .u64 j2k_htj2k_encode_codeblocks_param_6,
.param .u64 j2k_htj2k_encode_codeblocks_param_7
)
{
.local .align 2 .b8 __local_depot1[1026];
.reg .b64 %SP;
.reg .b64 %SPL;
.reg .pred %p<2374>;
.reg .b16 %rs<1376>;
.reg .b32 %r<10744>;
.reg .b64 %rd<1425>;
// demoted variable
.shared .align 4 .b8 _ZZ32 j2k_htj2k_encode_codeblocksE9block_max[512];
// demoted variable
.shared .align 1 .b8 _ZZ32 j2k_htj2k_encode_codeblocksE13cleanup_e_val[513];
// demoted variable
.shared .align 1 .b8 _ZZ32 j2k_htj2k_encode_codeblocksE14cleanup_cx_val[513];
mov.u64 %SPL, __local_depot1;
ld.param.u64 %rd78, [ j2k_htj2k_encode_codeblocks_param_1];
ld.param.u64 %rd84, [ j2k_htj2k_encode_codeblocks_param_7];
cvta.to.global.u64 %rd1, %rd78;
mov.u32 %r4049, %ctaid.x;
cvt.u64.u32 %rd2, %r4049;
setp.ge.u64 %p9, %rd2, %rd84;
@%p9 bra $L__BB1_1905;
ld.param.u64 %rd1424, [ j2k_htj2k_encode_codeblocks_param_2];
ld.param.u64 %rd1423, [ j2k_htj2k_encode_codeblocks_param_0];
cvta.to.global.u64 %rd3, %rd1423;
cvta.to.global.u64 %rd85, %rd1424;
shl.b64 %rd86, %rd2, 5;
add.s64 %rd87, %rd85, %rd86;
ld.global.u32 %r1, [%rd87+4];
ld.global.u32 %r2, [%rd87+16];
ld.global.u32 %rd4, [%rd87+20];
ld.global.u32 %r3, [%rd87+24];
ld.global.u32 %r4, [%rd87+28];
ld.global.u32 %rd5, [%rd87];
ld.global.u32 %r5, [%rd87+8];
setp.eq.s32 %p10, %r5, 0;
ld.global.u32 %r6, [%rd87+12];
setp.eq.s32 %p11, %r6, 0;
or.pred %p12, %p10, %p11;
@%p12 bra $L__BB1_14;
bra.uni $L__BB1_2;
$L__BB1_14:
mov.u32 %r8415, 0;
bra.uni $L__BB1_15;
$L__BB1_2:
setp.eq.s32 %p13, %r1, %r5;
@%p13 bra $L__BB1_6;
bra.uni $L__BB1_3;
$L__BB1_6:
mov.u32 %r8411, %tid.x;
mul.lo.s32 %r4063, %r6, %r5;
setp.ge.u32 %p16, %r8411, %r4063;
mov.u32 %r8413, 0;
@%p16 bra $L__BB1_9;
mov.u32 %r8413, 0;
$L__BB1_8:
cvt.u64.u32 %rd92, %r8411;
add.s64 %rd93, %rd92, %rd5;
shl.b64 %rd94, %rd93, 2;
add.s64 %rd95, %rd3, %rd94;
ld.global.u32 %r4065, [%rd95];
abs.s32 %r4066, %r4065;
max.u32 %r8413, %r8413, %r4066;
mov.u32 %r4067, %ntid.x;
add.s32 %r8411, %r8411, %r4067;
setp.lt.u32 %p17, %r8411, %r4063;
@%p17 bra $L__BB1_8;
bra.uni $L__BB1_9;
$L__BB1_3:
mov.u32 %r8409, %tid.x;
mul.lo.s32 %r4052, %r6, %r5;
setp.ge.u32 %p14, %r8409, %r4052;
mov.u32 %r8413, 0;
@%p14 bra $L__BB1_9;
mov.u32 %r8413, 0;
$L__BB1_5:
sub.s32 %r4054, %r1, %r5;
div.u32 %r4055, %r8409, %r5;
mad.lo.s32 %r4056, %r4054, %r4055, %r8409;
cvt.u64.u32 %rd88, %r4056;
add.s64 %rd89, %rd88, %rd5;
shl.b64 %rd90, %rd89, 2;
add.s64 %rd91, %rd3, %rd90;
ld.global.u32 %r4057, [%rd91];
abs.s32 %r4058, %r4057;
max.u32 %r8413, %r8413, %r4058;
mov.u32 %r4059, %ntid.x;
add.s32 %r8409, %r8409, %r4059;
setp.lt.u32 %p15, %r8409, %r4052;
@%p15 bra $L__BB1_5;
$L__BB1_9:
mov.u32 %r4069, %tid.x;
shl.b32 %r4070, %r4069, 2;
mov.u32 %r4071, _ZZ32 j2k_htj2k_encode_codeblocksE9block_max;
add.s32 %r4072, %r4071, %r4070;
st.shared.u32 [%r4072], %r8413;
bar.sync 0;
mov.u32 %r4073, %ntid.x;
shr.u32 %r8414, %r4073, 1;
setp.eq.s32 %p18, %r8414, 0;
@%p18 bra $L__BB1_13;
$L__BB1_10:
setp.ge.u32 %p19, %r4069, %r8414;
@%p19 bra $L__BB1_12;
add.s32 %r4079, %r8414, %r4069;
shl.b32 %r4080, %r8414, 2;
add.s32 %r4081, %r4072, %r4080;
ld.shared.u32 %r4082, [%r4081];
ld.shared.u32 %r4083, [%r4072];
setp.gt.u32 %p20, %r4083, %r4082;
selp.b32 %r4084, %r4069, %r4079, %p20;
shl.b32 %r4085, %r4084, 2;
add.s32 %r4086, %r4071, %r4085;
ld.shared.u32 %r4087, [%r4086];
st.shared.u32 [%r4072], %r4087;
$L__BB1_12:
bar.sync 0;
shr.u32 %r8414, %r8414, 1;
setp.ne.s32 %p21, %r8414, 0;
@%p21 bra $L__BB1_10;
$L__BB1_13:
ld.shared.u32 %r8415, [_ZZ32 j2k_htj2k_encode_codeblocksE9block_max];
$L__BB1_15:
mov.u32 %r4089, %tid.x;
setp.ne.s32 %p22, %r4089, 0;
@%p22 bra $L__BB1_1905;
setp.eq.s32 %p2367, %r6, 0;
mov.u32 %r8394, %ctaid.x;
ld.param.u64 %rd1418, [ j2k_htj2k_encode_codeblocks_param_6];
mov.u32 %r4090, 0;
cvta.to.global.u64 %rd96, %rd1418;
mul.wide.u32 %rd97, %r8394, 32;
add.s64 %rd6, %rd96, %rd97;
mov.u32 %r4092, 1;
st.global.u32 [%rd6], %r4092;
st.global.u32 [%rd6+4], %r4090;
st.global.u32 [%rd6+8], %r4090;
st.global.u32 [%rd6+12], %r4090;
st.global.u32 [%rd6+16], %r4090;
st.global.u32 [%rd6+20], %r4090;
st.global.u32 [%rd6+24], %r4090;
st.global.u32 [%rd6+28], %r4090;
add.s32 %r4093, %r5, -1;
setp.ge.u32 %p24, %r4093, %r1;
or.pred %p25, %p24, %p2367;
setp.gt.u32 %p26, %r5, 1024;
or.pred %p1, %p26, %p25;
mov.u32 %r4095, _ZZ32 j2k_htj2k_encode_codeblocksE13cleanup_e_val;
setp.eq.s32 %p27, %r4, 1;
@%p27 bra $L__BB1_1254;
bra.uni $L__BB1_17;
$L__BB1_1254:
@%p1 bra $L__BB1_1903;
cvt.u16.u32 %rs822, %r5;
mov.u16 %rs823, 4096;
div.u16 %rs824, %rs823, %rs822;
cvt.u32.u16 %r6791, %rs824;
setp.gt.u32 %p1625, %r6, %r6791;
add.s32 %r10742, %r2, -1;
setp.gt.u32 %p1626, %r10742, 29;
or.pred %p1627, %p1626, %p1625;
setp.lt.u32 %p1628, %r3, 20549;
or.pred %p1629, %p1628, %p1627;
@%p1629 bra $L__BB1_1903;
bra.uni $L__BB1_1256;
$L__BB1_1903:
mov.u32 %r8389, 2;
st.global.u32 [%rd6], %r8389;
mov.u32 %r8390, 1;
st.global.u32 [%rd6+4], %r8390;
mov.u32 %r10740, 0;
mov.u32 %r10741, %r10740;
mov.u32 %r10742, %r10740;
mov.u32 %r10743, %r10740;
$L__BB1_1904:
st.global.u32 [%rd6+8], %r10741;
st.global.u32 [%rd6+12], %r10743;
st.global.u32 [%rd6+16], %r10742;
st.global.u32 [%rd6+20], %r10741;
mov.u32 %r8391, 0;
st.global.u32 [%rd6+24], %r8391;
st.global.u32 [%rd6+28], %r10740;
bra.uni $L__BB1_1905;
$L__BB1_17:
@%p1 bra $L__BB1_1253;
cvt.u16.u32 %rs507, %r5;
mov.u16 %rs508, 4096;
div.u16 %rs509, %rs508, %rs507;
cvt.u32.u16 %r4097, %rs509;
setp.gt.u32 %p28, %r6, %r4097;
add.s32 %r4098, %r2, -1;
setp.gt.u32 %p29, %r4098, 29;
or.pred %p30, %p29, %p28;
setp.lt.u32 %p31, %r3, 20549;
or.pred %p32, %p31, %p30;
@%p32 bra $L__BB1_1253;
bra.uni $L__BB1_19;
$L__BB1_1253:
mov.u32 %r6788, 2;
st.global.u32 [%rd6], %r6788;
mov.u32 %r6789, 1;
st.global.u32 [%rd6+4], %r6789;
mov.u32 %r6790, 0;
st.global.u32 [%rd6+8], %r6790;
st.global.u32 [%rd6+12], %r6790;
st.global.u32 [%rd6+16], %r6790;
st.global.u32 [%rd6+20], %r6790;
st.global.u32 [%rd6+24], %r6790;
st.global.u32 [%rd6+28], %r6790;
$L__BB1_1905:
ret;
$L__BB1_1256:
setp.eq.s32 %p1630, %r8415, 0;
@%p1630 bra $L__BB1_1902;
clz.b32 %r6792, %r8415;
mov.u32 %r6793, 32;
sub.s32 %r6794, %r6793, %r6792;
setp.gt.u32 %p1631, %r6794, %r2;
@%p1631 bra $L__BB1_1901;
bra.uni $L__BB1_1258;
$L__BB1_1901:
mov.u32 %r8380, 1;
st.global.u32 [%rd6], %r8380;
mov.u32 %r8381, 2;
st.global.u32 [%rd6+4], %r8381;
mov.u32 %r10740, 0;
mov.u32 %r10741, %r10740;
mov.u32 %r10742, %r10740;
mov.u32 %r10743, %r10740;
bra.uni $L__BB1_1904;
$L__BB1_19:
add.s32 %r4099, %r4, -1;
setp.gt.u32 %p33, %r4099, 163;
@%p33 bra $L__BB1_1252;
bra.uni $L__BB1_20;
$L__BB1_1252:
mov.u32 %r6785, 2;
st.global.u32 [%rd6], %r6785;
mov.u32 %r6786, 5;
st.global.u32 [%rd6+4], %r6786;
mov.u32 %r6787, 0;
st.global.u32 [%rd6+8], %r6787;
st.global.u32 [%rd6+12], %r6787;
st.global.u32 [%rd6+16], %r6787;
st.global.u32 [%rd6+20], %r6787;
st.global.u32 [%rd6+24], %r6787;
st.global.u32 [%rd6+28], %r6787;
bra.uni $L__BB1_1905;
$L__BB1_20:
setp.gt.u32 %p34, %r4, 3;
@%p34 bra $L__BB1_1251;
bra.uni $L__BB1_21;
$L__BB1_1251:
mov.u32 %r6782, 2;
st.global.u32 [%rd6], %r6782;
mov.u32 %r6783, 5;
st.global.u32 [%rd6+4], %r6783;
mov.u32 %r6784, 0;
st.global.u32 [%rd6+8], %r6784;
st.global.u32 [%rd6+12], %r6784;
st.global.u32 [%rd6+16], %r6784;
st.global.u32 [%rd6+20], %r6784;
st.global.u32 [%rd6+24], %r6784;
st.global.u32 [%rd6+28], %r6784;
bra.uni $L__BB1_1905;
$L__BB1_1902:
mov.u32 %r10740, 0;
st.global.u32 [%rd6], %r10740;
st.global.u32 [%rd6+4], %r10740;
mov.u32 %r10741, %r10740;
mov.u32 %r10742, %r2;
mov.u32 %r10743, %r10740;
bra.uni $L__BB1_1904;
$L__BB1_1258:
add.s32 %r8403, %r5, 1;
shr.u32 %r8402, %r8403, 1;
add.s64 %rd1415, %rd1, %rd4;
add.s64 %rd1414, %rd1415, 20548;
mov.u32 %r6796, 31;
sub.s32 %r2355, %r6796, %r2;
mov.u16 %rs825, 255;
st.global.u8 [%rd1414], %rs825;
add.s32 %r6797, %r8402, 2;
min.u32 %r2356, %r6797, 513;
mov.u32 %r6798, -3;
sub.s32 %r6799, %r6798, %r8402;
max.u32 %r6800, %r6799, -514;
mov.u32 %r6801, -2;
sub.s32 %r6802, %r6801, %r6800;
and.b32 %r9733, %r2356, 3;
setp.lt.u32 %p1632, %r6802, 3;
mov.u32 %r9731, 0;
@%p1632 bra $L__BB1_1261;
sub.s32 %r9730, %r2356, %r9733;
mov.u32 %r9731, 0;
$L__BB1_1260:
add.s32 %r6805, %r4095, %r9731;
mov.u16 %rs826, 0;
st.shared.u8 [%r6805], %rs826;
mov.u32 %r6806, _ZZ32 j2k_htj2k_encode_codeblocksE14cleanup_cx_val;
add.s32 %r6807, %r6806, %r9731;
st.shared.u8 [%r6807], %rs826;
st.shared.u8 [%r6805+1], %rs826;
st.shared.u8 [%r6807+1], %rs826;
st.shared.u8 [%r6805+2], %rs826;
st.shared.u8 [%r6807+2], %rs826;
st.shared.u8 [%r6805+3], %rs826;
st.shared.u8 [%r6807+3], %rs826;
add.s32 %r9731, %r9731, 4;
add.s32 %r9730, %r9730, -4;
setp.ne.s32 %p1633, %r9730, 0;
@%p1633 bra $L__BB1_1260;
$L__BB1_1261:
setp.eq.s32 %p1634, %r9733, 0;
@%p1634 bra $L__BB1_1264;
mov.u32 %r6810, _ZZ32 j2k_htj2k_encode_codeblocksE14cleanup_cx_val;
$L__BB1_1263:
.pragma "nounroll";
add.s32 %r6809, %r4095, %r9731;
mov.u16 %rs827, 0;
st.shared.u8 [%r6809], %rs827;
add.s32 %r6811, %r6810, %r9731;
st.shared.u8 [%r6811], %rs827;
add.s32 %r9731, %r9731, 1;
add.s32 %r9733, %r9733, -1;
setp.ne.s32 %p1635, %r9733, 0;
@%p1635 bra $L__BB1_1263;
$L__BB1_1264:
mov.u32 %r10451, 0;
mov.u32 %r10264, 1;
mov.u16 %rs1253, 0;
mov.u32 %r10452, 8;
mov.u16 %rs1322, 15;
mov.u32 %r10265, 4;
mov.u32 %r10453, %r10451;
mov.u32 %r10454, %r10451;
mov.u32 %r10485, %r10451;
mov.u32 %r10266, %r10264;
mov.u32 %r10267, %r10451;
mov.u32 %r9816, %r10451;
mov.u32 %r9825, %r10452;
mov.u32 %r10030, %r10451;
mov.u32 %r10031, %r10451;
mov.u32 %r10032, %r10264;
mov.u32 %r10033, %r10451;
@%p10 bra $L__BB1_1632;
ld.param.u64 %rd1421, [ j2k_htj2k_encode_codeblocks_param_5];
ld.param.u64 %rd1416, [ j2k_htj2k_encode_codeblocks_param_3];
cvta.to.global.u64 %rd62, %rd1416;
cvta.to.global.u64 %rd63, %rd1421;
mov.u32 %r6844, 0;
mov.u32 %r9825, 8;
mov.u32 %r10032, 1;
mov.u32 %r10265, 4;
mov.u16 %rs1322, 15;
mov.u16 %rs1253, 0;
mov.u32 %r9734, %r6844;
mov.u32 %r10033, %r6844;
mov.u32 %r10031, %r6844;
mov.u32 %r10030, %r6844;
mov.u32 %r9816, %r6844;
mov.u32 %r10267, %r6844;
mov.u32 %r10266, %r10032;
mov.u32 %r10264, %r10032;
mov.u32 %r10485, %r6844;
mov.u32 %r10454, %r6844;
mov.u32 %r10453, %r6844;
mov.u32 %r10452, %r9825;
mov.u32 %r10451, %r6844;
mov.u32 %r9750, %r6844;
mov.u32 %r9751, %r6844;
bra.uni $L__BB1_1266;
$L__BB1_21:
setp.eq.s32 %p35, %r8415, 0;
@%p35 bra $L__BB1_1250;
clz.b32 %r4100, %r8415;
mov.u32 %r4101, 32;
sub.s32 %r4102, %r4101, %r4100;
setp.gt.u32 %p36, %r4102, %r2;
@%p36 bra $L__BB1_1249;
bra.uni $L__BB1_23;
$L__BB1_1249:
mov.u32 %r6778, 1;
st.global.u32 [%rd6], %r6778;
mov.u32 %r6779, 2;
st.global.u32 [%rd6+4], %r6779;
mov.u32 %r6780, 0;
st.global.u32 [%rd6+8], %r6780;
st.global.u32 [%rd6+12], %r6780;
st.global.u32 [%rd6+16], %r6780;
st.global.u32 [%rd6+20], %r6780;
st.global.u32 [%rd6+24], %r6780;
st.global.u32 [%rd6+28], %r6780;
bra.uni $L__BB1_1905;
$L__BB1_1432:
setp.gt.u32 %p1816, %r9816, 191;
mov.u32 %r10019, 1;
mov.u32 %r9825, 0;
@%p1816 bra $L__BB1_1434;
st.global.u8 [%rd67], %rs1253;
add.s32 %r9816, %r9816, 1;
mov.u16 %rs1253, 0;
mov.u32 %r9825, 8;
mov.u32 %r10019, %r10033;
bra.uni $L__BB1_1434;
$L__BB1_1337:
setp.gt.u32 %p1714, %r9816, 191;
mov.u32 %r9833, 1;
mov.u32 %r9825, 0;
@%p1714 bra $L__BB1_1339;
and.b16 %rs865, %rs1253, 255;
st.global.u8 [%rd64], %rs1253;
add.s32 %r9816, %r9816, 1;
setp.eq.s16 %p1715, %rs865, 255;
selp.b32 %r9825, 7, 8, %p1715;
mov.u16 %rs1253, 0;
mov.u32 %r9833, %r10033;
bra.uni $L__BB1_1339;
$L__BB1_1471:
setp.gt.u32 %p1863, %r9816, 191;
mov.u32 %r10026, 1;
mov.u32 %r9825, 0;
@%p1863 bra $L__BB1_1473;
and.b16 %rs914, %rs1253, 255;
st.global.u8 [%rd67], %rs1253;
add.s32 %r9816, %r9816, 1;
setp.eq.s16 %p1864, %rs914, 255;
selp.b32 %r9825, 7, 8, %p1864;
mov.u16 %rs1253, 0;
mov.u32 %r10026, %r10033;
bra.uni $L__BB1_1473;
$L__BB1_1266:
cvt.u64.u32 %rd1066, %r9751;
add.s64 %rd1067, %rd1066, %rd5;
shl.b64 %rd1068, %rd1067, 2;
add.s64 %rd1069, %rd3, %rd1068;
ld.global.u32 %r2386, [%rd1069];
setp.eq.s32 %p1637, %r2386, 0;
mov.u32 %r9752, %r6844;
@%p1637 bra $L__BB1_1268;
and.b32 %r6846, %r2386, -2147483648;
abs.s32 %r6847, %r2386;
shl.b32 %r6848, %r6847, %r2355;
or.b32 %r9752, %r6848, %r6846;
$L__BB1_1268:
shl.b32 %r6852, %r9752, 1;
shr.u32 %r6853, %r6852, %r2355;
and.b32 %r2389, %r6853, -2;
setp.eq.s32 %p1638, %r2389, 0;
mov.u32 %r9756, 0;
mov.u32 %r9753, %r9756;
mov.u32 %r9754, %r9756;
mov.u32 %r9760, %r9756;
@%p1638 bra $L__BB1_1270;
add.s32 %r6855, %r2389, -1;
clz.b32 %r6856, %r6855;
mov.u32 %r6857, 32;
sub.s32 %r9753, %r6857, %r6856;
shr.u32 %r6858, %r9752, 31;
add.s32 %r6859, %r6858, %r2389;
add.s32 %r9754, %r6859, -2;
mov.u32 %r9760, 1;
$L__BB1_1270:
setp.lt.u32 %p1639, %r6, 2;
@%p1639 bra $L__BB1_1273;
add.s32 %r6862, %r9751, %r1;
cvt.u64.u32 %rd1070, %r6862;
add.s64 %rd1071, %rd1070, %rd5;
shl.b64 %rd1072, %rd1071, 2;
add.s64 %rd1073, %rd3, %rd1072;
ld.global.u32 %r2395, [%rd1073];
setp.eq.s32 %p1640, %r2395, 0;
@%p1640 bra $L__BB1_1273;
and.b32 %r6863, %r2395, -2147483648;
abs.s32 %r6864, %r2395;
shl.b32 %r6865, %r6864, %r2355;
or.b32 %r9756, %r6865, %r6863;
$L__BB1_1273:
shl.b32 %r6868, %r9756, 1;
shr.u32 %r6869, %r6868, %r2355;
and.b32 %r2398, %r6869, -2;
setp.eq.s32 %p1641, %r2398, 0;
mov.u32 %r9771, 0;
mov.u32 %r9757, %r9771;
mov.u32 %r9758, %r9771;
mov.u32 %r9776, %r9753;
@%p1641 bra $L__BB1_1275;
or.b32 %r9760, %r9760, 2;
add.s32 %r6870, %r2398, -1;
clz.b32 %r6871, %r6870;
mov.u32 %r6872, 32;
sub.s32 %r9757, %r6872, %r6871;
max.s32 %r9776, %r9753, %r9757;
shr.u32 %r6873, %r9756, 31;
add.s32 %r6874, %r6873, %r2398;
add.s32 %r9758, %r6874, -2;
$L__BB1_1275:
add.s32 %r9775, %r9751, 1;
add.s32 %r6879, %r9734, 1;
setp.ge.u32 %p1642, %r6879, %r5;
mov.u32 %r9772, %r9771;
mov.u32 %r9773, %r9771;
mov.u32 %r9774, %r9771;
@%p1642 bra $L__BB1_1286;
cvt.u64.u32 %rd1074, %r9775;
add.s64 %rd1075, %rd1074, %rd5;
shl.b64 %rd1076, %rd1075, 2;
add.s64 %rd1077, %rd3, %rd1076;
ld.global.u32 %r2408, [%rd1077];
setp.eq.s32 %p1643, %r2408, 0;
mov.u32 %r9772, 0;
mov.u32 %r9761, %r9772;
@%p1643 bra $L__BB1_1278;
and.b32 %r6881, %r2408, -2147483648;
abs.s32 %r6882, %r2408;
shl.b32 %r6883, %r6882, %r2355;
or.b32 %r9761, %r6883, %r6881;
$L__BB1_1278:
shl.b32 %r6886, %r9761, 1;
shr.u32 %r6887, %r6886, %r2355;
and.b32 %r2411, %r6887, -2;
setp.eq.s32 %p1644, %r2411, 0;
mov.u32 %r9774, %r9772;
@%p1644 bra $L__BB1_1280;
or.b32 %r9760, %r9760, 4;
add.s32 %r6888, %r2411, -1;
clz.b32 %r6889, %r6888;
mov.u32 %r6890, 32;
sub.s32 %r9772, %r6890, %r6889;
max.s32 %r9776, %r9776, %r9772;
shr.u32 %r6891, %r9761, 31;
add.s32 %r6892, %r6891, %r2411;
add.s32 %r9774, %r6892, -2;
$L__BB1_1280:
mov.u32 %r9771, 0;
mov.u32 %r9766, %r9771;
@%p1639 bra $L__BB1_1283;
add.s32 %r6895, %r9775, %r1;
cvt.u64.u32 %rd1078, %r6895;
add.s64 %rd1079, %rd1078, %rd5;
shl.b64 %rd1080, %rd1079, 2;
add.s64 %rd1081, %rd3, %rd1080;
ld.global.u32 %r2420, [%rd1081];
setp.eq.s32 %p1646, %r2420, 0;
@%p1646 bra $L__BB1_1283;
and.b32 %r6896, %r2420, -2147483648;
abs.s32 %r6897, %r2420;
shl.b32 %r6898, %r6897, %r2355;
or.b32 %r9766, %r6898, %r6896;
$L__BB1_1283:
shl.b32 %r6901, %r9766, 1;
shr.u32 %r6902, %r6901, %r2355;
and.b32 %r2423, %r6902, -2;
setp.eq.s32 %p1647, %r2423, 0;
mov.u32 %r9773, %r9771;
@%p1647 bra $L__BB1_1285;
or.b32 %r9760, %r9760, 8;
add.s32 %r6903, %r2423, -1;
clz.b32 %r6904, %r6903;
mov.u32 %r6905, 32;
sub.s32 %r9771, %r6905, %r6904;
max.s32 %r9776, %r9776, %r9771;
shr.u32 %r6906, %r9766, 31;
add.s32 %r6907, %r6906, %r2423;
add.s32 %r9773, %r6907, -2;
$L__BB1_1285:
add.s32 %r9775, %r9751, 2;
$L__BB1_1286:
mov.u32 %r9751, %r9775;
add.s32 %r6909, %r9776, -1;
setp.lt.s32 %p1648, %r9776, 2;
setp.gt.s32 %p1649, %r9776, 1;
selp.b32 %r2440, %r6909, 0, %p1649;
mov.u32 %r9778, 0;
@%p1648 bra $L__BB1_1288;
setp.eq.s32 %p1650, %r9753, %r9776;
selp.u32 %r6910, 1, 0, %p1650;
setp.eq.s32 %p1651, %r9757, %r9776;
selp.u32 %r6911, -1, 0, %p1651;
bfi.b32 %r6912, %r6911, %r6910, 1, 1;
setp.eq.s32 %p1652, %r9772, %r9776;
selp.u16 %rs832, 1, 0, %p1652;
mul.wide.u16 %r6913, %rs832, 4;
or.b32 %r6914, %r6912, %r6913;
setp.eq.s32 %p1653, %r9771, %r9776;
selp.u16 %rs833, 1, 0, %p1653;
mul.wide.u16 %r6915, %rs833, 8;
or.b32 %r9778, %r6914, %r6915;
$L__BB1_1288:
shr.u32 %r6916, %r9734, 1;
add.s32 %r2443, %r4095, %r6916;
ld.shared.u8 %rs834, [%r2443];
cvt.u32.u16 %r6918, %rs834;
and.b32 %r6919, %r6918, 255;
and.b32 %r6920, %r9757, 255;
setp.lt.u32 %p1654, %r6920, %r6919;
cvt.u16.u32 %rs835, %r9757;
selp.b16 %rs836, %rs834, %rs835, %p1654;
st.shared.u8 [%r2443], %rs836;
cvt.u16.u32 %rs274, %r9771;
st.shared.u8 [%r2443+1], %rs274;
and.b32 %r2444, %r9760, 2;
cvt.u16.u32 %rs837, %r2444;
shr.u16 %rs838, %rs837, 1;
mov.u32 %r6921, _ZZ32 j2k_htj2k_encode_codeblocksE14cleanup_cx_val;
add.s32 %r2445, %r6921, %r6916;
ld.shared.u8 %rs839, [%r2445];
or.b16 %rs840, %rs839, %rs838;
st.shared.u8 [%r2445], %rs840;
and.b32 %r2446, %r9760, 8;
shr.u32 %r2447, %r2446, 3;
st.shared.u8 [%r2445+1], %r2447;
shl.b32 %r6922, %r9760, 4;
shl.b32 %r6923, %r9750, 8;
or.b32 %r6924, %r6922, %r6923;
or.b32 %r6925, %r6924, %r9778;
mul.wide.u32 %rd1082, %r6925, 2;
add.s64 %rd1083, %rd62, %rd1082;
ld.global.u16 %rs275, [%rd1083];
shr.u16 %rs841, %rs275, 4;
and.b16 %rs276, %rs841, 7;
setp.eq.s16 %p1655, %rs276, 0;
mov.u32 %r9790, %r10267;
@%p1655 bra $L__BB1_1295;
cvt.u32.u16 %r9779, %rs276;
shr.u16 %rs842, %rs275, 8;
cvt.u32.u16 %r9780, %rs842;
$L__BB1_1290:
mov.u32 %r2450, %r9779;
setp.gt.u32 %p1656, %r10264, 2879;
mov.u32 %r9790, 1;
@%p1656 bra $L__BB1_1295;
mov.u32 %r6927, 8;
sub.s32 %r6928, %r6927, %r10266;
sub.s32 %r6929, %r6928, %r10265;
min.u32 %r6930, %r6929, %r2450;
setp.eq.s32 %p1657, %r6930, 32;
mov.u32 %r6931, -1;
shl.b32 %r6932, %r6931, %r6930;
not.b32 %r6933, %r6932;
selp.b32 %r6934, -1, %r6933, %p1657;
and.b32 %r6935, %r6934, %r9780;
shl.b32 %r6936, %r6935, %r10265;
cvt.u16.u32 %rs843, %r6936;
or.b16 %rs1322, %rs1322, %rs843;
add.s32 %r10265, %r6930, %r10265;
sub.s32 %r9779, %r2450, %r6930;
shr.u32 %r9780, %r9780, %r6930;
setp.gt.u32 %p1658, %r6929, %r2450;
@%p1658 bra $L__BB1_1294;
setp.ne.s32 %p1659, %r10266, 0;
mov.u32 %r10266, 0;
and.b16 %rs844, %rs1322, 255;
setp.ne.s16 %p1660, %rs844, 127;
and.pred %p1661, %p1659, %p1660;
@%p1661 bra $L__BB1_1294;
mov.u32 %r6939, 20548;
sub.s32 %r6940, %r6939, %r10264;
cvt.u64.u32 %rd1084, %r6940;
add.s64 %rd1085, %rd1084, %rd4;
add.s64 %rd1086, %rd1, %rd1085;
st.global.u8 [%rd1086], %rs1322;
add.s32 %r10264, %r10264, 1;
setp.gt.u16 %p1662, %rs844, 143;
selp.u32 %r10266, 1, 0, %p1662;
mov.u16 %rs1322, 0;
mov.u32 %r10265, 0;
$L__BB1_1294:
setp.ne.s32 %p1663, %r9779, 0;
mov.u32 %r9790, %r10267;
@%p1663 bra $L__BB1_1290;
$L__BB1_1295:
setp.ne.s32 %p1664, %r9750, 0;
@%p1664 bra $L__BB1_1343;
setp.eq.s32 %p1665, %r9760, 0;
add.s32 %r6941, %r9816, 17477;
cvt.u64.u32 %rd1087, %r6941;
add.s64 %rd1088, %rd1087, %rd4;
add.s64 %rd64, %rd1, %rd1088;
@%p1665 bra $L__BB1_1335;
shl.b16 %rs1253, %rs1253, 1;
add.s32 %r9825, %r9825, -1;
setp.ne.s32 %p1666, %r9825, 0;
mov.u32 %r9826, %r10033;
@%p1666 bra $L__BB1_1300;
setp.gt.u32 %p1667, %r9816, 191;
mov.u32 %r9826, 1;
mov.u32 %r9825, 0;
@%p1667 bra $L__BB1_1300;
st.global.u8 [%rd64], %rs1253;
add.s32 %r9816, %r9816, 1;
mov.u16 %rs1253, 0;
mov.u32 %r9825, 8;
mov.u32 %r9826, %r10033;
$L__BB1_1300:
setp.lt.u32 %p1668, %r10031, 3;
mov.u32 %r9794, 0;
@%p1668 bra $L__BB1_1303;
setp.lt.u32 %p1669, %r10031, 6;
mov.u32 %r9794, 1;
@%p1669 bra $L__BB1_1303;
setp.lt.u32 %p1670, %r10031, 9;
setp.eq.s32 %p1671, %r10031, 11;
selp.b32 %r6947, 4, 5, %p1671;
setp.lt.u32 %p1672, %r10031, 11;
selp.b32 %r6948, 3, %r6947, %p1672;
selp.b32 %r9794, 2, %r6948, %p1670;
$L__BB1_1303:
setp.eq.s32 %p1673, %r9794, 0;
@%p1673 bra $L__BB1_1331;
add.s32 %r2474, %r9794, -1;
and.b32 %r2475, %r9794, 3;
setp.eq.s32 %p1674, %r2475, 0;
mov.u32 %r9804, %r9794;
mov.u32 %r9805, %r9826;
@%p1674 bra $L__BB1_1316;
mov.u32 %r6950, 1;
shl.b32 %r6951, %r6950, %r2474;
and.b32 %r6952, %r6951, %r10030;
setp.ne.s32 %p1675, %r6952, 0;
selp.u32 %r6953, 1, 0, %p1675;
cvt.u32.u16 %r6954, %rs1253;
bfi.b32 %r6955, %r6954, %r6953, 1, 8;
cvt.u16.u32 %rs1253, %r6955;
add.s32 %r9825, %r9825, -1;
setp.ne.s32 %p1676, %r9825, 0;
mov.u32 %r9805, %r9826;
@%p1676 bra $L__BB1_1308;
setp.gt.u32 %p1677, %r9816, 191;
mov.u32 %r9825, 0;
mov.u32 %r9805, %r6950;
@%p1677 bra $L__BB1_1308;
add.s32 %r6959, %r9816, 17477;
cvt.u64.u32 %rd1089, %r6959;
add.s64 %rd1090, %rd1089, %rd4;
add.s64 %rd1091, %rd1, %rd1090;
st.global.u8 [%rd1091], %rs1253;
add.s32 %r9816, %r9816, 1;
mov.u16 %rs1253, 0;
mov.u32 %r9825, 8;
mov.u32 %r9805, %r9826;
$L__BB1_1308:
setp.eq.s32 %p1678, %r2475, 1;
mov.u32 %r9826, %r9805;
mov.u32 %r9804, %r2474;
@%p1678 bra $L__BB1_1316;
add.s32 %r9804, %r9794, -2;
mov.u32 %r6960, 1;
shl.b32 %r6961, %r6960, %r9804;
and.b32 %r6962, %r6961, %r10030;
setp.ne.s32 %p1679, %r6962, 0;
selp.u32 %r6963, 1, 0, %p1679;
cvt.u32.u16 %r6964, %rs1253;
bfi.b32 %r6965, %r6964, %r6963, 1, 8;
cvt.u16.u32 %rs1253, %r6965;
add.s32 %r9825, %r9825, -1;
setp.ne.s32 %p1680, %r9825, 0;
mov.u32 %r9800, %r9805;
@%p1680 bra $L__BB1_1312;
setp.gt.u32 %p1681, %r9816, 191;
mov.u32 %r9825, 0;
mov.u32 %r9800, %r6960;
@%p1681 bra $L__BB1_1312;
add.s32 %r6968, %r9816, 17477;
cvt.u64.u32 %rd1092, %r6968;
add.s64 %rd1093, %rd1092, %rd4;
add.s64 %rd1094, %rd1, %rd1093;
and.b16 %rs851, %rs1253, 255;
st.global.u8 [%rd1094], %rs1253;
add.s32 %r9816, %r9816, 1;
setp.eq.s16 %p1682, %rs851, 255;
selp.b32 %r9825, 7, 8, %p1682;
mov.u16 %rs1253, 0;
mov.u32 %r9800, %r9805;
$L__BB1_1312:
setp.eq.s32 %p1683, %r2475, 2;
mov.u32 %r9826, %r9800;
mov.u32 %r9805, %r9800;
@%p1683 bra $L__BB1_1316;
add.s32 %r9804, %r9794, -3;
mov.u32 %r6969, 1;
shl.b32 %r6970, %r6969, %r9804;
and.b32 %r6971, %r6970, %r10030;
setp.ne.s32 %p1684, %r6971, 0;
selp.u32 %r6972, 1, 0, %p1684;
cvt.u32.u16 %r6973, %rs1253;
bfi.b32 %r6974, %r6973, %r6972, 1, 8;
cvt.u16.u32 %rs1253, %r6974;
add.s32 %r9825, %r9825, -1;
setp.ne.s32 %p1685, %r9825, 0;
mov.u32 %r9826, %r9800;
mov.u32 %r9805, %r9800;
@%p1685 bra $L__BB1_1316;
setp.gt.u32 %p1686, %r9816, 191;
mov.u32 %r9825, 0;
mov.u32 %r9826, %r6969;
mov.u32 %r9805, %r6969;
@%p1686 bra $L__BB1_1316;
add.s32 %r6979, %r9816, 17477;
cvt.u64.u32 %rd1095, %r6979;
add.s64 %rd1096, %rd1095, %rd4;
add.s64 %rd1097, %rd1, %rd1096;
and.b16 %rs854, %rs1253, 255;
st.global.u8 [%rd1097], %rs1253;
add.s32 %r9816, %r9816, 1;
setp.eq.s16 %p1687, %rs854, 255;
selp.b32 %r9825, 7, 8, %p1687;
mov.u16 %rs1253, 0;
mov.u32 %r9826, %r9800;
mov.u32 %r9805, %r9800;
$L__BB1_1316:
setp.lt.u32 %p1688, %r2474, 3;
@%p1688 bra $L__BB1_1331;
mov.u32 %r9826, %r9805;
$L__BB1_1318:
add.s32 %r6980, %r9804, -1;
mov.u32 %r6981, 1;
shl.b32 %r6982, %r6981, %r6980;
and.b32 %r6983, %r6982, %r10030;
setp.ne.s32 %p1689, %r6983, 0;
selp.u32 %r6984, 1, 0, %p1689;
cvt.u32.u16 %r6985, %rs1253;
bfi.b32 %r9814, %r6985, %r6984, 1, 8;
add.s32 %r9813, %r9825, -1;
setp.ne.s32 %p1690, %r9813, 0;
mov.u32 %r9815, %r9826;
@%p1690 bra $L__BB1_1321;
setp.gt.u32 %p1691, %r9816, 191;
mov.u32 %r9813, 0;
mov.u32 %r9815, %r6981;
@%p1691 bra $L__BB1_1321;
cvt.u16.u32 %rs855, %r9814;
and.b16 %rs856, %rs855, 255;
add.s32 %r6989, %r9816, 17477;
cvt.u64.u32 %rd1098, %r6989;
add.s64 %rd1099, %rd1098, %rd4;
add.s64 %rd1100, %rd1, %rd1099;
st.global.u8 [%rd1100], %rs855;
add.s32 %r9816, %r9816, 1;
setp.eq.s16 %p1692, %rs856, 255;
selp.b32 %r9813, 7, 8, %p1692;
mov.u32 %r9814, 0;
mov.u32 %r9815, %r9826;
$L__BB1_1321:
add.s32 %r6990, %r9804, -2;
shl.b32 %r6992, %r6981, %r6990;
and.b32 %r6993, %r6992, %r10030;
setp.ne.s32 %p1693, %r6993, 0;
and.b32 %r6994, %r9814, 127;
selp.u32 %r6995, 1, 0, %p1693;
bfi.b32 %r9818, %r6994, %r6995, 1, 7;
add.s32 %r9817, %r9813, -1;
setp.ne.s32 %p1694, %r9817, 0;
mov.u32 %r9819, %r9815;
@%p1694 bra $L__BB1_1324;
setp.gt.u32 %p1695, %r9816, 191;
mov.u32 %r9819, 1;
mov.u32 %r9817, 0;
@%p1695 bra $L__BB1_1324;
cvt.u16.u32 %rs857, %r9818;
and.b16 %rs858, %rs857, 255;
add.s32 %r6999, %r9816, 17477;
cvt.u64.u32 %rd1101, %r6999;
add.s64 %rd1102, %rd1101, %rd4;
add.s64 %rd1103, %rd1, %rd1102;
st.global.u8 [%rd1103], %rs857;
add.s32 %r9816, %r9816, 1;
setp.eq.s16 %p1696, %rs858, 255;
selp.b32 %r9817, 7, 8, %p1696;
mov.u32 %r9818, 0;
mov.u32 %r9819, %r9815;
$L__BB1_1324:
add.s32 %r7000, %r9804, -3;
mov.u32 %r7001, 1;
shl.b32 %r7002, %r7001, %r7000;
and.b32 %r7003, %r7002, %r10030;
setp.ne.s32 %p1697, %r7003, 0;
and.b32 %r7004, %r9818, 127;
selp.u32 %r7005, 1, 0, %p1697;
bfi.b32 %r9822, %r7004, %r7005, 1, 7;
add.s32 %r9821, %r9817, -1;
setp.ne.s32 %p1698, %r9821, 0;
mov.u32 %r9823, %r9819;
@%p1698 bra $L__BB1_1327;
setp.gt.u32 %p1699, %r9816, 191;
mov.u32 %r9821, 0;
mov.u32 %r9823, %r7001;
@%p1699 bra $L__BB1_1327;
cvt.u16.u32 %rs859, %r9822;
and.b16 %rs860, %rs859, 255;
add.s32 %r7009, %r9816, 17477;
cvt.u64.u32 %rd1104, %r7009;
add.s64 %rd1105, %rd1104, %rd4;
add.s64 %rd1106, %rd1, %rd1105;
st.global.u8 [%rd1106], %rs859;
add.s32 %r9816, %r9816, 1;
setp.eq.s16 %p1700, %rs860, 255;
selp.b32 %r9821, 7, 8, %p1700;
mov.u32 %r9822, 0;
mov.u32 %r9823, %r9819;
$L__BB1_1327:
add.s32 %r9804, %r9804, -4;
shl.b32 %r7011, %r7001, %r9804;
and.b32 %r7012, %r7011, %r10030;
setp.ne.s32 %p1701, %r7012, 0;
and.b32 %r7013, %r9822, 127;
selp.u32 %r7014, 1, 0, %p1701;
bfi.b32 %r7015, %r7013, %r7014, 1, 15;
cvt.u16.u32 %rs1253, %r7015;
add.s32 %r9825, %r9821, -1;
setp.ne.s32 %p1702, %r9825, 0;
mov.u32 %r9826, %r9823;
@%p1702 bra $L__BB1_1330;
setp.gt.u32 %p1703, %r9816, 191;
mov.u32 %r9826, 1;
mov.u32 %r9825, 0;
@%p1703 bra $L__BB1_1330;
add.s32 %r7018, %r9816, 17477;
cvt.u64.u32 %rd1107, %r7018;
add.s64 %rd1108, %rd1107, %rd4;
add.s64 %rd1109, %rd1, %rd1108;
and.b16 %rs862, %rs1253, 255;
st.global.u8 [%rd1109], %rs1253;
add.s32 %r9816, %r9816, 1;
setp.eq.s16 %p1704, %rs862, 255;
selp.b32 %r9825, 7, 8, %p1704;
mov.u16 %rs1253, 0;
mov.u32 %r9826, %r9823;
$L__BB1_1330:
setp.ne.s32 %p1705, %r9804, 0;
@%p1705 bra $L__BB1_1318;
$L__BB1_1331:
add.s32 %r7020, %r10031, -1;
setp.eq.s32 %p1706, %r10031, 0;
mov.u32 %r10030, 0;
selp.b32 %r10031, 0, %r7020, %p1706;
setp.lt.u32 %p1707, %r10031, 3;
mov.u32 %r9830, %r10030;
@%p1707 bra $L__BB1_1334;
setp.lt.u32 %p1708, %r10031, 6;
mov.u32 %r9830, 1;
@%p1708 bra $L__BB1_1334;
setp.lt.u32 %p1709, %r10031, 9;
setp.eq.s32 %p1710, %r10031, 11;
selp.b32 %r7022, 4, 5, %p1710;
setp.lt.u32 %p1711, %r10031, 11;
selp.b32 %r7023, 3, %r7022, %p1711;
selp.b32 %r9830, 2, %r7023, %p1709;
$L__BB1_1334:
mov.u32 %r7025, 1;
shl.b32 %r10032, %r7025, %r9830;
mov.u32 %r10033, %r9826;
bra.uni $L__BB1_1343;
$L__BB1_1335:
add.s32 %r10030, %r10030, 1;
setp.lt.u32 %p1712, %r10030, %r10032;
@%p1712 bra $L__BB1_1343;
shl.b16 %rs863, %rs1253, 1;
or.b16 %rs1253, %rs863, 1;
add.s32 %r9825, %r9825, -1;
setp.ne.s32 %p1713, %r9825, 0;
mov.u32 %r9833, %r10033;
@%p1713 bra $L__BB1_1339;
bra.uni $L__BB1_1337;
$L__BB1_1339:
add.s32 %r7029, %r10031, 1;
min.u32 %r10031, %r7029, 12;
setp.lt.u32 %p1716, %r10031, 3;
mov.u32 %r10030, 0;
mov.u32 %r9834, %r10030;
@%p1716 bra $L__BB1_1342;
setp.lt.u32 %p1717, %r10031, 6;
mov.u32 %r9834, 1;
@%p1717 bra $L__BB1_1342;
setp.lt.u32 %p1718, %r10031, 9;
setp.eq.s32 %p1719, %r10031, 11;
selp.b32 %r7031, 4, 5, %p1719;
setp.lt.u32 %p1720, %r10031, 11;
selp.b32 %r7032, 3, %r7031, %p1720;
selp.b32 %r9834, 2, %r7032, %p1718;
$L__BB1_1342:
mov.u32 %r7034, 1;
shl.b32 %r10032, %r7034, %r9834;
mov.u32 %r10033, %r9833;
$L__BB1_1343:
max.s32 %r2558, %r9776, 1;
and.b16 %rs866, %rs275, 15;
cvt.u32.u16 %r2559, %rs866;
and.b32 %r2560, %r9760, 1;
setp.eq.s32 %p1721, %r2560, 0;
mov.u32 %r9855, %r10485;
@%p1721 bra $L__BB1_1350;
and.b32 %r7035, %r2559, 1;
sub.s32 %r9841, %r2558, %r7035;
setp.eq.s32 %p1722, %r9841, 0;
mov.u32 %r9855, %r10485;
@%p1722 bra $L__BB1_1350;
mov.u32 %r7036, -1;
shl.b32 %r7037, %r7036, %r9841;
not.b32 %r7038, %r7037;
and.b32 %r9842, %r9754, %r7038;
$L__BB1_1346:
setp.gt.u32 %p1723, %r10451, 17476;
mov.u32 %r9855, 1;
@%p1723 bra $L__BB1_1350;
sub.s32 %r7040, %r10452, %r10453;
min.u32 %r7041, %r7040, %r9841;
setp.eq.s32 %p1724, %r7041, 32;
mov.u32 %r7042, -1;
shl.b32 %r7043, %r7042, %r7041;
not.b32 %r7044, %r7043;
selp.b32 %r7045, -1, %r7044, %p1724;
and.b32 %r7046, %r7045, %r9842;
shl.b32 %r7047, %r7046, %r10453;
or.b32 %r10454, %r7047, %r10454;
add.s32 %r10453, %r7041, %r10453;
shr.u32 %r9842, %r9842, %r7041;
sub.s32 %r9841, %r9841, %r7041;
setp.lt.u32 %p1725, %r10453, %r10452;
@%p1725 bra $L__BB1_1349;
cvt.u64.u32 %rd1110, %r10451;
add.s64 %rd1111, %rd1110, %rd4;
add.s64 %rd1112, %rd1, %rd1111;
st.global.u8 [%rd1112], %r10454;
add.s32 %r10451, %r10451, 1;
setp.eq.s32 %p1726, %r10454, 255;
selp.b32 %r10452, 7, 8, %p1726;
mov.u32 %r10453, 0;
mov.u32 %r10454, %r10453;
$L__BB1_1349:
setp.ne.s32 %p1727, %r9841, 0;
mov.u32 %r9855, %r10485;
@%p1727 bra $L__BB1_1346;
$L__BB1_1350:
setp.eq.s32 %p1728, %r2444, 0;
mov.u32 %r9870, %r9855;
@%p1728 bra $L__BB1_1357;
shr.u32 %r7050, %r2559, 1;
and.b32 %r7051, %r7050, 1;
sub.s32 %r9856, %r2558, %r7051;
setp.eq.s32 %p1729, %r9856, 0;
mov.u32 %r9870, %r9855;
@%p1729 bra $L__BB1_1357;
mov.u32 %r7052, -1;
shl.b32 %r7053, %r7052, %r9856;
not.b32 %r7054, %r7053;
and.b32 %r9857, %r9758, %r7054;
$L__BB1_1353:
setp.gt.u32 %p1730, %r10451, 17476;
mov.u32 %r9870, 1;
@%p1730 bra $L__BB1_1357;
sub.s32 %r7056, %r10452, %r10453;
min.u32 %r7057, %r7056, %r9856;
setp.eq.s32 %p1731, %r7057, 32;
mov.u32 %r7058, -1;
shl.b32 %r7059, %r7058, %r7057;
not.b32 %r7060, %r7059;
selp.b32 %r7061, -1, %r7060, %p1731;
and.b32 %r7062, %r7061, %r9857;
shl.b32 %r7063, %r7062, %r10453;
or.b32 %r10454, %r7063, %r10454;
add.s32 %r10453, %r7057, %r10453;
shr.u32 %r9857, %r9857, %r7057;
sub.s32 %r9856, %r9856, %r7057;
setp.lt.u32 %p1732, %r10453, %r10452;
@%p1732 bra $L__BB1_1356;
cvt.u64.u32 %rd1113, %r10451;
add.s64 %rd1114, %rd1113, %rd4;
add.s64 %rd1115, %rd1, %rd1114;
st.global.u8 [%rd1115], %r10454;
add.s32 %r10451, %r10451, 1;
setp.eq.s32 %p1733, %r10454, 255;
selp.b32 %r10452, 7, 8, %p1733;
mov.u32 %r10453, 0;
mov.u32 %r10454, %r10453;
$L__BB1_1356:
setp.ne.s32 %p1734, %r9856, 0;
mov.u32 %r9870, %r9855;
@%p1734 bra $L__BB1_1353;
$L__BB1_1357:
and.b32 %r7066, %r9760, 4;
setp.eq.s32 %p1735, %r7066, 0;
mov.u32 %r9885, %r9870;
@%p1735 bra $L__BB1_1364;
shr.u32 %r7067, %r2559, 2;
and.b32 %r7068, %r7067, 1;
sub.s32 %r9871, %r2558, %r7068;
setp.eq.s32 %p1736, %r9871, 0;
mov.u32 %r9885, %r9870;
@%p1736 bra $L__BB1_1364;
mov.u32 %r7069, -1;
shl.b32 %r7070, %r7069, %r9871;
not.b32 %r7071, %r7070;
and.b32 %r9872, %r9774, %r7071;
$L__BB1_1360:
setp.gt.u32 %p1737, %r10451, 17476;
mov.u32 %r9885, 1;
@%p1737 bra $L__BB1_1364;
sub.s32 %r7073, %r10452, %r10453;
min.u32 %r7074, %r7073, %r9871;
setp.eq.s32 %p1738, %r7074, 32;
mov.u32 %r7075, -1;
shl.b32 %r7076, %r7075, %r7074;
not.b32 %r7077, %r7076;
selp.b32 %r7078, -1, %r7077, %p1738;
and.b32 %r7079, %r7078, %r9872;
shl.b32 %r7080, %r7079, %r10453;
or.b32 %r10454, %r7080, %r10454;
add.s32 %r10453, %r7074, %r10453;
shr.u32 %r9872, %r9872, %r7074;
sub.s32 %r9871, %r9871, %r7074;
setp.lt.u32 %p1739, %r10453, %r10452;
@%p1739 bra $L__BB1_1363;
cvt.u64.u32 %rd1116, %r10451;
add.s64 %rd1117, %rd1116, %rd4;
add.s64 %rd1118, %rd1, %rd1117;
st.global.u8 [%rd1118], %r10454;
add.s32 %r10451, %r10451, 1;
setp.eq.s32 %p1740, %r10454, 255;
selp.b32 %r10452, 7, 8, %p1740;
mov.u32 %r10453, 0;
mov.u32 %r10454, %r10453;
$L__BB1_1363:
setp.ne.s32 %p1741, %r9871, 0;
mov.u32 %r9885, %r9870;
@%p1741 bra $L__BB1_1360;
$L__BB1_1364:
setp.eq.s32 %p1742, %r2446, 0;
mov.u32 %r10485, %r9885;
@%p1742 bra $L__BB1_1371;
shr.u32 %r7083, %r2559, 3;
sub.s32 %r9886, %r2558, %r7083;
setp.eq.s32 %p1743, %r9886, 0;
mov.u32 %r10485, %r9885;
@%p1743 bra $L__BB1_1371;
mov.u32 %r7084, -1;
shl.b32 %r7085, %r7084, %r9886;
not.b32 %r7086, %r7085;
and.b32 %r9887, %r9773, %r7086;
$L__BB1_1367:
setp.gt.u32 %p1744, %r10451, 17476;
mov.u32 %r10485, 1;
@%p1744 bra $L__BB1_1371;
sub.s32 %r7088, %r10452, %r10453;
min.u32 %r7089, %r7088, %r9886;
setp.eq.s32 %p1745, %r7089, 32;
mov.u32 %r7090, -1;
shl.b32 %r7091, %r7090, %r7089;
not.b32 %r7092, %r7091;
selp.b32 %r7093, -1, %r7092, %p1745;
and.b32 %r7094, %r7093, %r9887;
shl.b32 %r7095, %r7094, %r10453;
or.b32 %r10454, %r7095, %r10454;
add.s32 %r10453, %r7089, %r10453;
shr.u32 %r9887, %r9887, %r7089;
sub.s32 %r9886, %r9886, %r7089;
setp.lt.u32 %p1746, %r10453, %r10452;
@%p1746 bra $L__BB1_1370;
cvt.u64.u32 %rd1119, %r10451;
add.s64 %rd1120, %rd1119, %rd4;
add.s64 %rd1121, %rd1, %rd1120;
st.global.u8 [%rd1121], %r10454;
add.s32 %r10451, %r10451, 1;
setp.eq.s32 %p1747, %r10454, 255;
selp.b32 %r10452, 7, 8, %p1747;
mov.u32 %r10453, 0;
mov.u32 %r10454, %r10453;
$L__BB1_1370:
setp.ne.s32 %p1748, %r9886, 0;
mov.u32 %r10485, %r9885;
@%p1748 bra $L__BB1_1367;
$L__BB1_1371:
add.s32 %r7098, %r9734, 2;
setp.lt.u32 %p1749, %r7098, %r5;
mul.lo.s32 %r2653, %r2440, 6;
cvt.u64.u32 %rd1122, %r2653;
add.s64 %rd65, %rd63, %rd1122;
add.s32 %r7099, %r2653, 2;
cvt.u64.u32 %rd1123, %r7099;
add.s64 %rd66, %rd63, %rd1123;
@%p1749 bra $L__BB1_1400;
bra.uni $L__BB1_1372;
$L__BB1_1400:
cvt.u64.u32 %rd1137, %r9751;
add.s64 %rd1138, %rd1137, %rd5;
shl.b64 %rd1139, %rd1138, 2;
add.s64 %rd1140, %rd3, %rd1139;
ld.global.u32 %r2726, [%rd1140];
setp.eq.s32 %p1786, %r2726, 0;
mov.u32 %r9946, 0;
mov.u32 %r9945, %r9946;
@%p1786 bra $L__BB1_1402;
and.b32 %r7170, %r2726, -2147483648;
abs.s32 %r7171, %r2726;
shl.b32 %r7172, %r7171, %r2355;
or.b32 %r9945, %r7172, %r7170;
$L__BB1_1402:
shl.b32 %r7176, %r9945, 1;
shr.u32 %r7177, %r7176, %r2355;
and.b32 %r2729, %r7177, -2;
setp.eq.s32 %p1787, %r2729, 0;
mov.u32 %r9947, %r9946;
mov.u32 %r9953, %r9946;
@%p1787 bra $L__BB1_1404;
add.s32 %r7179, %r2729, -1;
clz.b32 %r7180, %r7179;
mov.u32 %r7181, 32;
sub.s32 %r9946, %r7181, %r7180;
shr.u32 %r7182, %r9945, 31;
add.s32 %r7183, %r7182, %r2729;
add.s32 %r9947, %r7183, -2;
mov.u32 %r9953, 1;
$L__BB1_1404:
mov.u32 %r9950, 0;
mov.u32 %r9949, %r9950;
@%p1639 bra $L__BB1_1407;
add.s32 %r7186, %r9751, %r1;
cvt.u64.u32 %rd1141, %r7186;
add.s64 %rd1142, %rd1141, %rd5;
shl.b64 %rd1143, %rd1142, 2;
add.s64 %rd1144, %rd3, %rd1143;
ld.global.u32 %r2735, [%rd1144];
setp.eq.s32 %p1789, %r2735, 0;
@%p1789 bra $L__BB1_1407;
and.b32 %r7187, %r2735, -2147483648;
abs.s32 %r7188, %r2735;
shl.b32 %r7189, %r7188, %r2355;
or.b32 %r9949, %r7189, %r7187;
$L__BB1_1407:
shl.b32 %r7192, %r9949, 1;
shr.u32 %r7193, %r7192, %r2355;
and.b32 %r2738, %r7193, -2;
setp.eq.s32 %p1790, %r2738, 0;
mov.u32 %r9951, %r9950;
mov.u32 %r9969, %r9946;
@%p1790 bra $L__BB1_1409;
or.b32 %r9953, %r9953, 2;
add.s32 %r7194, %r2738, -1;
clz.b32 %r7195, %r7194;
mov.u32 %r7196, 32;
sub.s32 %r9950, %r7196, %r7195;
max.s32 %r9969, %r9946, %r9950;
shr.u32 %r7197, %r9949, 31;
add.s32 %r7198, %r7197, %r2738;
add.s32 %r9951, %r7198, -2;
$L__BB1_1409:
add.s32 %r9968, %r9751, 1;
add.s32 %r7203, %r9734, 3;
setp.ge.u32 %p1791, %r7203, %r5;
mov.u32 %r9971, 0;
mov.u32 %r9964, %r9971;
mov.u32 %r9965, %r9971;
mov.u32 %r9966, %r9971;
mov.u32 %r9967, %r9971;
@%p1791 bra $L__BB1_1420;
cvt.u64.u32 %rd1145, %r9968;
add.s64 %rd1146, %rd1145, %rd5;
shl.b64 %rd1147, %rd1146, 2;
add.s64 %rd1148, %rd3, %rd1147;
ld.global.u32 %r2748, [%rd1148];
setp.eq.s32 %p1792, %r2748, 0;
mov.u32 %r9965, 0;
mov.u32 %r9954, %r9965;
@%p1792 bra $L__BB1_1412;
and.b32 %r7205, %r2748, -2147483648;
abs.s32 %r7206, %r2748;
shl.b32 %r7207, %r7206, %r2355;
or.b32 %r9954, %r7207, %r7205;
$L__BB1_1412:
shl.b32 %r7210, %r9954, 1;
shr.u32 %r7211, %r7210, %r2355;
and.b32 %r2751, %r7211, -2;
setp.eq.s32 %p1793, %r2751, 0;
mov.u32 %r9967, %r9965;
@%p1793 bra $L__BB1_1414;
or.b32 %r9953, %r9953, 4;
add.s32 %r7212, %r2751, -1;
clz.b32 %r7213, %r7212;
mov.u32 %r7214, 32;
sub.s32 %r9965, %r7214, %r7213;
max.s32 %r9969, %r9969, %r9965;
shr.u32 %r7215, %r9954, 31;
add.s32 %r7216, %r7215, %r2751;
add.s32 %r9967, %r7216, -2;
$L__BB1_1414:
mov.u32 %r9964, 0;
mov.u32 %r9959, %r9964;
@%p1639 bra $L__BB1_1417;
add.s32 %r7219, %r9968, %r1;
cvt.u64.u32 %rd1149, %r7219;
add.s64 %rd1150, %rd1149, %rd5;
shl.b64 %rd1151, %rd1150, 2;
add.s64 %rd1152, %rd3, %rd1151;
ld.global.u32 %r2760, [%rd1152];
setp.eq.s32 %p1795, %r2760, 0;
@%p1795 bra $L__BB1_1417;
and.b32 %r7220, %r2760, -2147483648;
abs.s32 %r7221, %r2760;
shl.b32 %r7222, %r7221, %r2355;
or.b32 %r9959, %r7222, %r7220;
$L__BB1_1417:
shl.b32 %r7225, %r9959, 1;
shr.u32 %r7226, %r7225, %r2355;
and.b32 %r2763, %r7226, -2;
setp.eq.s32 %p1796, %r2763, 0;
mov.u32 %r9966, %r9964;
@%p1796 bra $L__BB1_1419;
or.b32 %r9953, %r9953, 8;
add.s32 %r7227, %r2763, -1;
clz.b32 %r7228, %r7227;
mov.u32 %r7229, 32;
sub.s32 %r9964, %r7229, %r7228;
max.s32 %r9969, %r9969, %r9964;
shr.u32 %r7230, %r9959, 31;
add.s32 %r7231, %r7230, %r2763;
add.s32 %r9966, %r7231, -2;
$L__BB1_1419:
add.s32 %r9968, %r9751, 2;
$L__BB1_1420:
mov.u32 %r9751, %r9968;
shr.u32 %r7233, %r9760, 1;
or.b32 %r2780, %r7233, %r2560;
add.s32 %r7234, %r9969, -1;
setp.lt.s32 %p1797, %r9969, 2;
setp.gt.s32 %p1798, %r9969, 1;
selp.b32 %r2781, %r7234, 0, %p1798;
@%p1797 bra $L__BB1_1422;
setp.eq.s32 %p1799, %r9946, %r9969;
selp.u32 %r7235, 1, 0, %p1799;
setp.eq.s32 %p1800, %r9950, %r9969;
selp.u32 %r7236, -1, 0, %p1800;
bfi.b32 %r7237, %r7236, %r7235, 1, 1;
setp.eq.s32 %p1801, %r9965, %r9969;
selp.u16 %rs886, 1, 0, %p1801;
mul.wide.u16 %r7238, %rs886, 4;
or.b32 %r7239, %r7237, %r7238;
setp.eq.s32 %p1802, %r9964, %r9969;
selp.u16 %rs887, 1, 0, %p1802;
mul.wide.u16 %r7240, %rs887, 8;
or.b32 %r9971, %r7239, %r7240;
$L__BB1_1422:
and.b32 %r7241, %r9950, 255;
and.b32 %r7242, %r9771, 255;
setp.lt.u32 %p1803, %r7241, %r7242;
cvt.u16.u32 %rs888, %r9950;
selp.b16 %rs889, %rs274, %rs888, %p1803;
st.shared.u8 [%r2443+1], %rs889;
st.shared.u8 [%r2443+2], %r9964;
and.b32 %r2784, %r9953, 2;
shr.u32 %r7243, %r2784, 1;
or.b32 %r7244, %r2447, %r7243;
st.shared.u8 [%r2445+1], %r7244;
and.b32 %r2785, %r9953, 8;
shr.u32 %r7245, %r2785, 3;
st.shared.u8 [%r2445+2], %r7245;
shl.b32 %r7246, %r9953, 4;
shl.b32 %r7247, %r2780, 8;
or.b32 %r7248, %r7246, %r7247;
or.b32 %r7249, %r7248, %r9971;
mul.wide.u32 %rd1154, %r7249, 2;
add.s64 %rd1155, %rd62, %rd1154;
ld.global.u16 %rs319, [%rd1155];
shr.u16 %rs890, %rs319, 4;
and.b16 %rs320, %rs890, 7;
setp.eq.s16 %p1804, %rs320, 0;
mov.u32 %r9983, %r9790;
@%p1804 bra $L__BB1_1429;
cvt.u32.u16 %r9972, %rs320;
shr.u16 %rs891, %rs319, 8;
cvt.u32.u16 %r9973, %rs891;
$L__BB1_1424:
mov.u32 %r2788, %r9972;
setp.gt.u32 %p1805, %r10264, 2879;
mov.u32 %r9983, 1;
@%p1805 bra $L__BB1_1429;
mov.u32 %r7251, 8;
sub.s32 %r7252, %r7251, %r10266;
sub.s32 %r7253, %r7252, %r10265;
min.u32 %r7254, %r7253, %r2788;
setp.eq.s32 %p1806, %r7254, 32;
mov.u32 %r7255, -1;
shl.b32 %r7256, %r7255, %r7254;
not.b32 %r7257, %r7256;
selp.b32 %r7258, -1, %r7257, %p1806;
and.b32 %r7259, %r7258, %r9973;
shl.b32 %r7260, %r7259, %r10265;
cvt.u16.u32 %rs892, %r7260;
or.b16 %rs1322, %rs1322, %rs892;
add.s32 %r10265, %r7254, %r10265;
sub.s32 %r9972, %r2788, %r7254;
shr.u32 %r9973, %r9973, %r7254;
setp.gt.u32 %p1807, %r7253, %r2788;
@%p1807 bra $L__BB1_1428;
setp.ne.s32 %p1808, %r10266, 0;
mov.u32 %r10266, 0;
and.b16 %rs893, %rs1322, 255;
setp.ne.s16 %p1809, %rs893, 127;
and.pred %p1810, %p1808, %p1809;
@%p1810 bra $L__BB1_1428;
mov.u32 %r7263, 20548;
sub.s32 %r7264, %r7263, %r10264;
cvt.u64.u32 %rd1156, %r7264;
add.s64 %rd1157, %rd1156, %rd4;
add.s64 %rd1158, %rd1, %rd1157;
st.global.u8 [%rd1158], %rs1322;
add.s32 %r10264, %r10264, 1;
setp.gt.u16 %p1811, %rs893, 143;
selp.u32 %r10266, 1, 0, %p1811;
mov.u16 %rs1322, 0;
mov.u32 %r10265, 0;
$L__BB1_1428:
setp.ne.s32 %p1812, %r9972, 0;
mov.u32 %r9983, %r9790;
@%p1812 bra $L__BB1_1424;
$L__BB1_1429:
setp.ne.s32 %p1813, %r2780, 0;
@%p1813 bra $L__BB1_1477;
setp.eq.s32 %p1814, %r9953, 0;
add.s32 %r7265, %r9816, 17477;
cvt.u64.u32 %rd1159, %r7265;
add.s64 %rd1160, %rd1159, %rd4;
add.s64 %rd67, %rd1, %rd1160;
@%p1814 bra $L__BB1_1469;
shl.b16 %rs1253, %rs1253, 1;
add.s32 %r9825, %r9825, -1;
setp.ne.s32 %p1815, %r9825, 0;
mov.u32 %r10019, %r10033;
@%p1815 bra $L__BB1_1434;
bra.uni $L__BB1_1432;
$L__BB1_1434:
setp.lt.u32 %p1817, %r10031, 3;
mov.u32 %r9987, 0;
@%p1817 bra $L__BB1_1437;
setp.lt.u32 %p1818, %r10031, 6;
mov.u32 %r9987, 1;
@%p1818 bra $L__BB1_1437;
setp.lt.u32 %p1819, %r10031, 9;
setp.eq.s32 %p1820, %r10031, 11;
selp.b32 %r7271, 4, 5, %p1820;
setp.lt.u32 %p1821, %r10031, 11;
selp.b32 %r7272, 3, %r7271, %p1821;
selp.b32 %r9987, 2, %r7272, %p1819;
$L__BB1_1437:
setp.eq.s32 %p1822, %r9987, 0;
@%p1822 bra $L__BB1_1465;
add.s32 %r2812, %r9987, -1;
and.b32 %r2813, %r9987, 3;
setp.eq.s32 %p1823, %r2813, 0;
mov.u32 %r9997, %r9987;
mov.u32 %r9998, %r10019;
@%p1823 bra $L__BB1_1450;
mov.u32 %r7274, 1;
shl.b32 %r7275, %r7274, %r2812;
and.b32 %r7276, %r7275, %r10030;
setp.ne.s32 %p1824, %r7276, 0;
selp.u32 %r7277, 1, 0, %p1824;
cvt.u32.u16 %r7278, %rs1253;
bfi.b32 %r7279, %r7278, %r7277, 1, 8;
cvt.u16.u32 %rs1253, %r7279;
add.s32 %r9825, %r9825, -1;
setp.ne.s32 %p1825, %r9825, 0;
mov.u32 %r9998, %r10019;
@%p1825 bra $L__BB1_1442;
setp.gt.u32 %p1826, %r9816, 191;
mov.u32 %r9825, 0;
mov.u32 %r9998, %r7274;
@%p1826 bra $L__BB1_1442;
add.s32 %r7283, %r9816, 17477;
cvt.u64.u32 %rd1161, %r7283;
add.s64 %rd1162, %rd1161, %rd4;
add.s64 %rd1163, %rd1, %rd1162;
st.global.u8 [%rd1163], %rs1253;
add.s32 %r9816, %r9816, 1;
mov.u16 %rs1253, 0;
mov.u32 %r9825, 8;
mov.u32 %r9998, %r10019;
$L__BB1_1442:
setp.eq.s32 %p1827, %r2813, 1;
mov.u32 %r10019, %r9998;
mov.u32 %r9997, %r2812;
@%p1827 bra $L__BB1_1450;
add.s32 %r9997, %r9987, -2;
mov.u32 %r7284, 1;
shl.b32 %r7285, %r7284, %r9997;
and.b32 %r7286, %r7285, %r10030;
setp.ne.s32 %p1828, %r7286, 0;
selp.u32 %r7287, 1, 0, %p1828;
cvt.u32.u16 %r7288, %rs1253;
bfi.b32 %r7289, %r7288, %r7287, 1, 8;
cvt.u16.u32 %rs1253, %r7289;
add.s32 %r9825, %r9825, -1;
setp.ne.s32 %p1829, %r9825, 0;
mov.u32 %r9993, %r9998;
@%p1829 bra $L__BB1_1446;
setp.gt.u32 %p1830, %r9816, 191;
mov.u32 %r9825, 0;
mov.u32 %r9993, %r7284;
@%p1830 bra $L__BB1_1446;
add.s32 %r7292, %r9816, 17477;
cvt.u64.u32 %rd1164, %r7292;
add.s64 %rd1165, %rd1164, %rd4;
add.s64 %rd1166, %rd1, %rd1165;
and.b16 %rs900, %rs1253, 255;
st.global.u8 [%rd1166], %rs1253;
add.s32 %r9816, %r9816, 1;
setp.eq.s16 %p1831, %rs900, 255;
selp.b32 %r9825, 7, 8, %p1831;
mov.u16 %rs1253, 0;
mov.u32 %r9993, %r9998;
$L__BB1_1446:
setp.eq.s32 %p1832, %r2813, 2;
mov.u32 %r10019, %r9993;
mov.u32 %r9998, %r9993;
@%p1832 bra $L__BB1_1450;
add.s32 %r9997, %r9987, -3;
mov.u32 %r7293, 1;
shl.b32 %r7294, %r7293, %r9997;
and.b32 %r7295, %r7294, %r10030;
setp.ne.s32 %p1833, %r7295, 0;
selp.u32 %r7296, 1, 0, %p1833;
cvt.u32.u16 %r7297, %rs1253;
bfi.b32 %r7298, %r7297, %r7296, 1, 8;
cvt.u16.u32 %rs1253, %r7298;
add.s32 %r9825, %r9825, -1;
setp.ne.s32 %p1834, %r9825, 0;
mov.u32 %r10019, %r9993;
mov.u32 %r9998, %r9993;
@%p1834 bra $L__BB1_1450;
setp.gt.u32 %p1835, %r9816, 191;
mov.u32 %r9825, 0;
mov.u32 %r10019, %r7293;
mov.u32 %r9998, %r7293;
@%p1835 bra $L__BB1_1450;
add.s32 %r7303, %r9816, 17477;
cvt.u64.u32 %rd1167, %r7303;
add.s64 %rd1168, %rd1167, %rd4;
add.s64 %rd1169, %rd1, %rd1168;
and.b16 %rs903, %rs1253, 255;
st.global.u8 [%rd1169], %rs1253;
add.s32 %r9816, %r9816, 1;
setp.eq.s16 %p1836, %rs903, 255;
selp.b32 %r9825, 7, 8, %p1836;
mov.u16 %rs1253, 0;
mov.u32 %r10019, %r9993;
mov.u32 %r9998, %r9993;
$L__BB1_1450:
setp.lt.u32 %p1837, %r2812, 3;
@%p1837 bra $L__BB1_1465;
mov.u32 %r10019, %r9998;
$L__BB1_1452:
add.s32 %r7304, %r9997, -1;
mov.u32 %r7305, 1;
shl.b32 %r7306, %r7305, %r7304;
and.b32 %r7307, %r7306, %r10030;
setp.ne.s32 %p1838, %r7307, 0;
selp.u32 %r7308, 1, 0, %p1838;
cvt.u32.u16 %r7309, %rs1253;
bfi.b32 %r10007, %r7309, %r7308, 1, 8;
add.s32 %r10006, %r9825, -1;
setp.ne.s32 %p1839, %r10006, 0;
mov.u32 %r10008, %r10019;
@%p1839 bra $L__BB1_1455;
setp.gt.u32 %p1840, %r9816, 191;
mov.u32 %r10006, 0;
mov.u32 %r10008, %r7305;
@%p1840 bra $L__BB1_1455;
cvt.u16.u32 %rs904, %r10007;
and.b16 %rs905, %rs904, 255;
add.s32 %r7313, %r9816, 17477;
cvt.u64.u32 %rd1170, %r7313;
add.s64 %rd1171, %rd1170, %rd4;
add.s64 %rd1172, %rd1, %rd1171;
st.global.u8 [%rd1172], %rs904;
add.s32 %r9816, %r9816, 1;
setp.eq.s16 %p1841, %rs905, 255;
selp.b32 %r10006, 7, 8, %p1841;
mov.u32 %r10007, 0;
mov.u32 %r10008, %r10019;
$L__BB1_1455:
add.s32 %r7314, %r9997, -2;
shl.b32 %r7316, %r7305, %r7314;
and.b32 %r7317, %r7316, %r10030;
setp.ne.s32 %p1842, %r7317, 0;
and.b32 %r7318, %r10007, 127;
selp.u32 %r7319, 1, 0, %p1842;
bfi.b32 %r10011, %r7318, %r7319, 1, 7;
add.s32 %r10010, %r10006, -1;
setp.ne.s32 %p1843, %r10010, 0;
mov.u32 %r10012, %r10008;
@%p1843 bra $L__BB1_1458;
setp.gt.u32 %p1844, %r9816, 191;
mov.u32 %r10012, 1;
mov.u32 %r10010, 0;
@%p1844 bra $L__BB1_1458;
cvt.u16.u32 %rs906, %r10011;
and.b16 %rs907, %rs906, 255;
add.s32 %r7323, %r9816, 17477;
cvt.u64.u32 %rd1173, %r7323;
add.s64 %rd1174, %rd1173, %rd4;
add.s64 %rd1175, %rd1, %rd1174;
st.global.u8 [%rd1175], %rs906;
add.s32 %r9816, %r9816, 1;
setp.eq.s16 %p1845, %rs907, 255;
selp.b32 %r10010, 7, 8, %p1845;
mov.u32 %r10011, 0;
mov.u32 %r10012, %r10008;
$L__BB1_1458:
add.s32 %r7324, %r9997, -3;
mov.u32 %r7325, 1;
shl.b32 %r7326, %r7325, %r7324;
and.b32 %r7327, %r7326, %r10030;
setp.ne.s32 %p1846, %r7327, 0;
and.b32 %r7328, %r10011, 127;
selp.u32 %r7329, 1, 0, %p1846;
bfi.b32 %r10015, %r7328, %r7329, 1, 7;
add.s32 %r10014, %r10010, -1;
setp.ne.s32 %p1847, %r10014, 0;
mov.u32 %r10016, %r10012;
@%p1847 bra $L__BB1_1461;
setp.gt.u32 %p1848, %r9816, 191;
mov.u32 %r10014, 0;
mov.u32 %r10016, %r7325;
@%p1848 bra $L__BB1_1461;
cvt.u16.u32 %rs908, %r10015;
and.b16 %rs909, %rs908, 255;
add.s32 %r7333, %r9816, 17477;
cvt.u64.u32 %rd1176, %r7333;
add.s64 %rd1177, %rd1176, %rd4;
add.s64 %rd1178, %rd1, %rd1177;
st.global.u8 [%rd1178], %rs908;
add.s32 %r9816, %r9816, 1;
setp.eq.s16 %p1849, %rs909, 255;
selp.b32 %r10014, 7, 8, %p1849;
mov.u32 %r10015, 0;
mov.u32 %r10016, %r10012;
$L__BB1_1461:
add.s32 %r9997, %r9997, -4;
shl.b32 %r7335, %r7325, %r9997;
and.b32 %r7336, %r7335, %r10030;
setp.ne.s32 %p1850, %r7336, 0;
and.b32 %r7337, %r10015, 127;
selp.u32 %r7338, 1, 0, %p1850;
bfi.b32 %r7339, %r7337, %r7338, 1, 15;
cvt.u16.u32 %rs1253, %r7339;
add.s32 %r9825, %r10014, -1;
setp.ne.s32 %p1851, %r9825, 0;
mov.u32 %r10019, %r10016;
@%p1851 bra $L__BB1_1464;
setp.gt.u32 %p1852, %r9816, 191;
mov.u32 %r10019, 1;
mov.u32 %r9825, 0;
@%p1852 bra $L__BB1_1464;
add.s32 %r7342, %r9816, 17477;
cvt.u64.u32 %rd1179, %r7342;
add.s64 %rd1180, %rd1179, %rd4;
add.s64 %rd1181, %rd1, %rd1180;
and.b16 %rs911, %rs1253, 255;
st.global.u8 [%rd1181], %rs1253;
add.s32 %r9816, %r9816, 1;
setp.eq.s16 %p1853, %rs911, 255;
selp.b32 %r9825, 7, 8, %p1853;
mov.u16 %rs1253, 0;
mov.u32 %r10019, %r10016;
$L__BB1_1464:
setp.ne.s32 %p1854, %r9997, 0;
@%p1854 bra $L__BB1_1452;
$L__BB1_1465:
add.s32 %r7344, %r10031, -1;
setp.eq.s32 %p1855, %r10031, 0;
mov.u32 %r10030, 0;
selp.b32 %r10031, 0, %r7344, %p1855;
setp.lt.u32 %p1856, %r10031, 3;
mov.u32 %r10023, %r10030;
@%p1856 bra $L__BB1_1468;
setp.lt.u32 %p1857, %r10031, 6;
mov.u32 %r10023, 1;
@%p1857 bra $L__BB1_1468;
setp.lt.u32 %p1858, %r10031, 9;
setp.eq.s32 %p1859, %r10031, 11;
selp.b32 %r7346, 4, 5, %p1859;
setp.lt.u32 %p1860, %r10031, 11;
selp.b32 %r7347, 3, %r7346, %p1860;
selp.b32 %r10023, 2, %r7347, %p1858;
$L__BB1_1468:
mov.u32 %r7349, 1;
shl.b32 %r10032, %r7349, %r10023;
mov.u32 %r10033, %r10019;
bra.uni $L__BB1_1477;
$L__BB1_1372:
ld.global.u8 %rs297, [%rd65+1];
ld.global.u8 %rs298, [%rd66];
ld.global.u8 %rs299, [%rd66+1];
ld.global.u8 %rs300, [%rd63];
ld.global.u8 %rs301, [%rd63+1];
ld.global.u8 %rs302, [%rd63+2];
ld.global.u8 %rs303, [%rd63+3];
setp.eq.s16 %p1750, %rs297, 0;
mov.u32 %r9912, %r9790;
@%p1750 bra $L__BB1_1379;
ld.global.u8 %r9902, [%rd65];
cvt.u32.u16 %r9901, %rs297;
$L__BB1_1374:
mov.u32 %r2656, %r9901;
setp.gt.u32 %p1751, %r10264, 2879;
mov.u32 %r9912, 1;
@%p1751 bra $L__BB1_1379;
mov.u32 %r7101, 8;
sub.s32 %r7102, %r7101, %r10266;
sub.s32 %r7103, %r7102, %r10265;
min.u32 %r7104, %r7103, %r2656;
setp.eq.s32 %p1752, %r7104, 32;
mov.u32 %r7105, -1;
shl.b32 %r7106, %r7105, %r7104;
not.b32 %r7107, %r7106;
selp.b32 %r7108, -1, %r7107, %p1752;
and.b32 %r7109, %r7108, %r9902;
shl.b32 %r7110, %r7109, %r10265;
cvt.u16.u32 %rs867, %r7110;
or.b16 %rs1322, %rs1322, %rs867;
add.s32 %r10265, %r7104, %r10265;
sub.s32 %r9901, %r2656, %r7104;
shr.u32 %r9902, %r9902, %r7104;
setp.gt.u32 %p1753, %r7103, %r2656;
@%p1753 bra $L__BB1_1378;
setp.ne.s32 %p1754, %r10266, 0;
mov.u32 %r10266, 0;
and.b16 %rs868, %rs1322, 255;
setp.ne.s16 %p1755, %rs868, 127;
and.pred %p1756, %p1754, %p1755;
@%p1756 bra $L__BB1_1378;
mov.u32 %r7113, 20548;
sub.s32 %r7114, %r7113, %r10264;
cvt.u64.u32 %rd1125, %r7114;
add.s64 %rd1126, %rd1125, %rd4;
add.s64 %rd1127, %rd1, %rd1126;
st.global.u8 [%rd1127], %rs1322;
add.s32 %r10264, %r10264, 1;
setp.gt.u16 %p1757, %rs868, 143;
selp.u32 %r10266, 1, 0, %p1757;
mov.u16 %rs1322, 0;
mov.u32 %r10265, 0;
$L__BB1_1378:
setp.ne.s32 %p1758, %r9901, 0;
mov.u32 %r9912, %r9790;
@%p1758 bra $L__BB1_1374;
$L__BB1_1379:
setp.eq.s16 %p1759, %rs301, 0;
mov.u32 %r9924, %r9912;
@%p1759 bra $L__BB1_1386;
cvt.u32.u16 %r7115, %rs300;
and.b32 %r9914, %r7115, 255;
cvt.u32.u16 %r7116, %rs301;
and.b32 %r9913, %r7116, 255;
$L__BB1_1381:
mov.u32 %r2675, %r9913;
setp.gt.u32 %p1760, %r10264, 2879;
mov.u32 %r9924, 1;
@%p1760 bra $L__BB1_1386;
mov.u32 %r7118, 8;
sub.s32 %r7119, %r7118, %r10266;
sub.s32 %r7120, %r7119, %r10265;
min.u32 %r7121, %r7120, %r2675;
setp.eq.s32 %p1761, %r7121, 32;
mov.u32 %r7122, -1;
shl.b32 %r7123, %r7122, %r7121;
not.b32 %r7124, %r7123;
selp.b32 %r7125, -1, %r7124, %p1761;
and.b32 %r7126, %r7125, %r9914;
shl.b32 %r7127, %r7126, %r10265;
cvt.u16.u32 %rs872, %r7127;
or.b16 %rs1322, %rs1322, %rs872;
add.s32 %r10265, %r7121, %r10265;
sub.s32 %r9913, %r2675, %r7121;
shr.u32 %r9914, %r9914, %r7121;
setp.gt.u32 %p1762, %r7120, %r2675;
@%p1762 bra $L__BB1_1385;
setp.ne.s32 %p1763, %r10266, 0;
mov.u32 %r10266, 0;
and.b16 %rs873, %rs1322, 255;
setp.ne.s16 %p1764, %rs873, 127;
and.pred %p1765, %p1763, %p1764;
@%p1765 bra $L__BB1_1385;
mov.u32 %r7130, 20548;
sub.s32 %r7131, %r7130, %r10264;
cvt.u64.u32 %rd1128, %r7131;
add.s64 %rd1129, %rd1128, %rd4;
add.s64 %rd1130, %rd1, %rd1129;
st.global.u8 [%rd1130], %rs1322;
add.s32 %r10264, %r10264, 1;
setp.gt.u16 %p1766, %rs873, 143;
selp.u32 %r10266, 1, 0, %p1766;
mov.u16 %rs1322, 0;
mov.u32 %r10265, 0;
$L__BB1_1385:
setp.ne.s32 %p1767, %r9913, 0;
mov.u32 %r9924, %r9912;
@%p1767 bra $L__BB1_1381;
$L__BB1_1386:
setp.eq.s16 %p1768, %rs299, 0;
mov.u32 %r9936, %r9924;
@%p1768 bra $L__BB1_1393;
cvt.u32.u16 %r7132, %rs299;
and.b32 %r9925, %r7132, 255;
cvt.u32.u16 %r7133, %rs298;
and.b32 %r9926, %r7133, 255;
$L__BB1_1388:
mov.u32 %r2694, %r9925;
setp.gt.u32 %p1769, %r10264, 2879;
mov.u32 %r9936, 1;
@%p1769 bra $L__BB1_1393;
mov.u32 %r7135, 8;
sub.s32 %r7136, %r7135, %r10266;
sub.s32 %r7137, %r7136, %r10265;
min.u32 %r7138, %r7137, %r2694;
setp.eq.s32 %p1770, %r7138, 32;
mov.u32 %r7139, -1;
shl.b32 %r7140, %r7139, %r7138;
not.b32 %r7141, %r7140;
selp.b32 %r7142, -1, %r7141, %p1770;
and.b32 %r7143, %r7142, %r9926;
shl.b32 %r7144, %r7143, %r10265;
cvt.u16.u32 %rs877, %r7144;
or.b16 %rs1322, %rs1322, %rs877;
add.s32 %r10265, %r7138, %r10265;
sub.s32 %r9925, %r2694, %r7138;
shr.u32 %r9926, %r9926, %r7138;
setp.gt.u32 %p1771, %r7137, %r2694;
@%p1771 bra $L__BB1_1392;
setp.ne.s32 %p1772, %r10266, 0;
mov.u32 %r10266, 0;
and.b16 %rs878, %rs1322, 255;
setp.ne.s16 %p1773, %rs878, 127;
and.pred %p1774, %p1772, %p1773;
@%p1774 bra $L__BB1_1392;
mov.u32 %r7147, 20548;
sub.s32 %r7148, %r7147, %r10264;
cvt.u64.u32 %rd1131, %r7148;
add.s64 %rd1132, %rd1131, %rd4;
add.s64 %rd1133, %rd1, %rd1132;
st.global.u8 [%rd1133], %rs1322;
add.s32 %r10264, %r10264, 1;
setp.gt.u16 %p1775, %rs878, 143;
selp.u32 %r10266, 1, 0, %p1775;
mov.u16 %rs1322, 0;
mov.u32 %r10265, 0;
$L__BB1_1392:
setp.ne.s32 %p1776, %r9925, 0;
mov.u32 %r9936, %r9924;
@%p1776 bra $L__BB1_1388;
$L__BB1_1393:
setp.eq.s16 %p1777, %rs303, 0;
mov.u32 %r9750, 0;
mov.u32 %r10267, %r9936;
@%p1777 bra $L__BB1_1631;
cvt.u32.u16 %r7150, %rs302;
and.b32 %r9938, %r7150, 255;
cvt.u32.u16 %r7151, %rs303;
and.b32 %r9937, %r7151, 255;
$L__BB1_1395:
mov.u32 %r2713, %r9937;
setp.gt.u32 %p1778, %r10264, 2879;
mov.u32 %r10267, 1;
@%p1778 bra $L__BB1_1631;
mov.u32 %r7154, 8;
sub.s32 %r7155, %r7154, %r10266;
sub.s32 %r7156, %r7155, %r10265;
min.u32 %r7157, %r7156, %r2713;
setp.eq.s32 %p1779, %r7157, 32;
mov.u32 %r7158, -1;
shl.b32 %r7159, %r7158, %r7157;
not.b32 %r7160, %r7159;
selp.b32 %r7161, -1, %r7160, %p1779;
and.b32 %r7162, %r7161, %r9938;
shl.b32 %r7163, %r7162, %r10265;
cvt.u16.u32 %rs882, %r7163;
or.b16 %rs1322, %rs1322, %rs882;
add.s32 %r10265, %r7157, %r10265;
sub.s32 %r9937, %r2713, %r7157;
shr.u32 %r9938, %r9938, %r7157;
setp.gt.u32 %p1780, %r7156, %r2713;
@%p1780 bra $L__BB1_1399;
setp.ne.s32 %p1781, %r10266, 0;
mov.u32 %r10266, 0;
and.b16 %rs883, %rs1322, 255;
setp.ne.s16 %p1782, %rs883, 127;
and.pred %p1783, %p1781, %p1782;
@%p1783 bra $L__BB1_1399;
mov.u32 %r7166, 20548;
sub.s32 %r7167, %r7166, %r10264;
cvt.u64.u32 %rd1134, %r7167;
add.s64 %rd1135, %rd1134, %rd4;
add.s64 %rd1136, %rd1, %rd1135;
st.global.u8 [%rd1136], %rs1322;
add.s32 %r10264, %r10264, 1;
setp.gt.u16 %p1784, %rs883, 143;
selp.u32 %r10266, 1, 0, %p1784;
mov.u16 %rs1322, 0;
mov.u32 %r10265, 0;
$L__BB1_1399:
setp.eq.s32 %p1785, %r9937, 0;
mov.u32 %r10267, %r9936;
@%p1785 bra $L__BB1_1631;
bra.uni $L__BB1_1395;
$L__BB1_1469:
add.s32 %r10030, %r10030, 1;
setp.lt.u32 %p1861, %r10030, %r10032;
@%p1861 bra $L__BB1_1477;
shl.b16 %rs912, %rs1253, 1;
or.b16 %rs1253, %rs912, 1;
add.s32 %r9825, %r9825, -1;
setp.ne.s32 %p1862, %r9825, 0;
mov.u32 %r10026, %r10033;
@%p1862 bra $L__BB1_1473;
bra.uni $L__BB1_1471;
$L__BB1_1473:
add.s32 %r7353, %r10031, 1;
min.u32 %r10031, %r7353, 12;
setp.lt.u32 %p1865, %r10031, 3;
mov.u32 %r10030, 0;
mov.u32 %r10027, %r10030;
@%p1865 bra $L__BB1_1476;
setp.lt.u32 %p1866, %r10031, 6;
mov.u32 %r10027, 1;
@%p1866 bra $L__BB1_1476;
setp.lt.u32 %p1867, %r10031, 9;
setp.eq.s32 %p1868, %r10031, 11;
selp.b32 %r7355, 4, 5, %p1868;
setp.lt.u32 %p1869, %r10031, 11;
selp.b32 %r7356, 3, %r7355, %p1869;
selp.b32 %r10027, 2, %r7356, %p1867;
$L__BB1_1476:
mov.u32 %r7358, 1;
shl.b32 %r10032, %r7358, %r10027;
mov.u32 %r10033, %r10026;
$L__BB1_1477:
max.s32 %r2896, %r9969, 1;
and.b16 %rs915, %rs319, 15;
cvt.u32.u16 %r2897, %rs915;
and.b32 %r2898, %r9953, 1;
setp.eq.s32 %p1870, %r2898, 0;
mov.u32 %r10048, %r10485;
@%p1870 bra $L__BB1_1484;
and.b32 %r7359, %r2897, 1;
sub.s32 %r10034, %r2896, %r7359;
setp.eq.s32 %p1871, %r10034, 0;
mov.u32 %r10048, %r10485;
@%p1871 bra $L__BB1_1484;
mov.u32 %r7360, -1;
shl.b32 %r7361, %r7360, %r10034;
not.b32 %r7362, %r7361;
and.b32 %r10035, %r9947, %r7362;
$L__BB1_1480:
setp.gt.u32 %p1872, %r10451, 17476;
mov.u32 %r10048, 1;
@%p1872 bra $L__BB1_1484;
sub.s32 %r7364, %r10452, %r10453;
min.u32 %r7365, %r7364, %r10034;
setp.eq.s32 %p1873, %r7365, 32;
mov.u32 %r7366, -1;
shl.b32 %r7367, %r7366, %r7365;
not.b32 %r7368, %r7367;
selp.b32 %r7369, -1, %r7368, %p1873;
and.b32 %r7370, %r7369, %r10035;
shl.b32 %r7371, %r7370, %r10453;
or.b32 %r10454, %r7371, %r10454;
add.s32 %r10453, %r7365, %r10453;
shr.u32 %r10035, %r10035, %r7365;
sub.s32 %r10034, %r10034, %r7365;
setp.lt.u32 %p1874, %r10453, %r10452;
@%p1874 bra $L__BB1_1483;
cvt.u64.u32 %rd1182, %r10451;
add.s64 %rd1183, %rd1182, %rd4;
add.s64 %rd1184, %rd1, %rd1183;
st.global.u8 [%rd1184], %r10454;
add.s32 %r10451, %r10451, 1;
setp.eq.s32 %p1875, %r10454, 255;
selp.b32 %r10452, 7, 8, %p1875;
mov.u32 %r10453, 0;
mov.u32 %r10454, %r10453;
$L__BB1_1483:
setp.ne.s32 %p1876, %r10034, 0;
mov.u32 %r10048, %r10485;
@%p1876 bra $L__BB1_1480;
$L__BB1_1484:
setp.eq.s32 %p1877, %r2784, 0;
mov.u32 %r10063, %r10048;
@%p1877 bra $L__BB1_1491;
shr.u32 %r7374, %r2897, 1;
and.b32 %r7375, %r7374, 1;
sub.s32 %r10049, %r2896, %r7375;
setp.eq.s32 %p1878, %r10049, 0;
mov.u32 %r10063, %r10048;
@%p1878 bra $L__BB1_1491;
mov.u32 %r7376, -1;
shl.b32 %r7377, %r7376, %r10049;
not.b32 %r7378, %r7377;
and.b32 %r10050, %r9951, %r7378;
$L__BB1_1487:
setp.gt.u32 %p1879, %r10451, 17476;
mov.u32 %r10063, 1;
@%p1879 bra $L__BB1_1491;
sub.s32 %r7380, %r10452, %r10453;
min.u32 %r7381, %r7380, %r10049;
setp.eq.s32 %p1880, %r7381, 32;
mov.u32 %r7382, -1;
shl.b32 %r7383, %r7382, %r7381;
not.b32 %r7384, %r7383;
selp.b32 %r7385, -1, %r7384, %p1880;
and.b32 %r7386, %r7385, %r10050;
shl.b32 %r7387, %r7386, %r10453;
or.b32 %r10454, %r7387, %r10454;
add.s32 %r10453, %r7381, %r10453;
shr.u32 %r10050, %r10050, %r7381;
sub.s32 %r10049, %r10049, %r7381;
setp.lt.u32 %p1881, %r10453, %r10452;
@%p1881 bra $L__BB1_1490;
cvt.u64.u32 %rd1185, %r10451;
add.s64 %rd1186, %rd1185, %rd4;
add.s64 %rd1187, %rd1, %rd1186;
st.global.u8 [%rd1187], %r10454;
add.s32 %r10451, %r10451, 1;
setp.eq.s32 %p1882, %r10454, 255;
selp.b32 %r10452, 7, 8, %p1882;
mov.u32 %r10453, 0;
mov.u32 %r10454, %r10453;
$L__BB1_1490:
setp.ne.s32 %p1883, %r10049, 0;
mov.u32 %r10063, %r10048;
@%p1883 bra $L__BB1_1487;
$L__BB1_1491:
and.b32 %r7390, %r9953, 4;
setp.eq.s32 %p1884, %r7390, 0;
mov.u32 %r10078, %r10063;
@%p1884 bra $L__BB1_1498;
shr.u32 %r7391, %r2897, 2;
and.b32 %r7392, %r7391, 1;
sub.s32 %r10064, %r2896, %r7392;
setp.eq.s32 %p1885, %r10064, 0;
mov.u32 %r10078, %r10063;
@%p1885 bra $L__BB1_1498;
mov.u32 %r7393, -1;
shl.b32 %r7394, %r7393, %r10064;
not.b32 %r7395, %r7394;
and.b32 %r10065, %r9967, %r7395;
$L__BB1_1494:
setp.gt.u32 %p1886, %r10451, 17476;
mov.u32 %r10078, 1;
@%p1886 bra $L__BB1_1498;
sub.s32 %r7397, %r10452, %r10453;
min.u32 %r7398, %r7397, %r10064;
setp.eq.s32 %p1887, %r7398, 32;
mov.u32 %r7399, -1;
shl.b32 %r7400, %r7399, %r7398;
not.b32 %r7401, %r7400;
selp.b32 %r7402, -1, %r7401, %p1887;
and.b32 %r7403, %r7402, %r10065;
shl.b32 %r7404, %r7403, %r10453;
or.b32 %r10454, %r7404, %r10454;
add.s32 %r10453, %r7398, %r10453;
shr.u32 %r10065, %r10065, %r7398;
sub.s32 %r10064, %r10064, %r7398;
setp.lt.u32 %p1888, %r10453, %r10452;
@%p1888 bra $L__BB1_1497;
cvt.u64.u32 %rd1188, %r10451;
add.s64 %rd1189, %rd1188, %rd4;
add.s64 %rd1190, %rd1, %rd1189;
st.global.u8 [%rd1190], %r10454;
add.s32 %r10451, %r10451, 1;
setp.eq.s32 %p1889, %r10454, 255;
selp.b32 %r10452, 7, 8, %p1889;
mov.u32 %r10453, 0;
mov.u32 %r10454, %r10453;
$L__BB1_1497:
setp.ne.s32 %p1890, %r10064, 0;
mov.u32 %r10078, %r10063;
@%p1890 bra $L__BB1_1494;
$L__BB1_1498:
setp.eq.s32 %p1891, %r2785, 0;
mov.u32 %r10485, %r10078;
@%p1891 bra $L__BB1_1505;
shr.u32 %r7407, %r2897, 3;
sub.s32 %r10079, %r2896, %r7407;
setp.eq.s32 %p1892, %r10079, 0;
mov.u32 %r10485, %r10078;
@%p1892 bra $L__BB1_1505;
mov.u32 %r7408, -1;
shl.b32 %r7409, %r7408, %r10079;
not.b32 %r7410, %r7409;
and.b32 %r10080, %r9966, %r7410;
$L__BB1_1501:
setp.gt.u32 %p1893, %r10451, 17476;
mov.u32 %r10485, 1;
@%p1893 bra $L__BB1_1505;
sub.s32 %r7412, %r10452, %r10453;
min.u32 %r7413, %r7412, %r10079;
setp.eq.s32 %p1894, %r7413, 32;
mov.u32 %r7414, -1;
shl.b32 %r7415, %r7414, %r7413;
not.b32 %r7416, %r7415;
selp.b32 %r7417, -1, %r7416, %p1894;
and.b32 %r7418, %r7417, %r10080;
shl.b32 %r7419, %r7418, %r10453;
or.b32 %r10454, %r7419, %r10454;
add.s32 %r10453, %r7413, %r10453;
shr.u32 %r10080, %r10080, %r7413;
sub.s32 %r10079, %r10079, %r7413;
setp.lt.u32 %p1895, %r10453, %r10452;
@%p1895 bra $L__BB1_1504;
cvt.u64.u32 %rd1191, %r10451;
add.s64 %rd1192, %rd1191, %rd4;
add.s64 %rd1193, %rd1, %rd1192;
st.global.u8 [%rd1193], %r10454;
add.s32 %r10451, %r10451, 1;
setp.eq.s32 %p1896, %r10454, 255;
selp.b32 %r10452, 7, 8, %p1896;
mov.u32 %r10453, 0;
mov.u32 %r10454, %r10453;
$L__BB1_1504:
setp.ne.s32 %p1897, %r10079, 0;
mov.u32 %r10485, %r10078;
@%p1897 bra $L__BB1_1501;
$L__BB1_1505:
setp.lt.s32 %p1898, %r2781, 1;
setp.lt.s32 %p1899, %r2440, 1;
or.pred %p1900, %p1899, %p1898;
@%p1900 bra $L__BB1_1553;
min.s32 %r7422, %r2440, %r2781;
setp.lt.s32 %p1901, %r7422, 3;
add.s32 %r7423, %r9816, 17477;
cvt.u64.u32 %rd1194, %r7423;
add.s64 %rd1195, %rd1194, %rd4;
add.s64 %rd68, %rd1, %rd1195;
@%p1901 bra $L__BB1_1545;
bra.uni $L__BB1_1507;
$L__BB1_1545:
add.s32 %r10030, %r10030, 1;
setp.lt.u32 %p1948, %r10030, %r10032;
@%p1948 bra $L__BB1_1553;
shl.b16 %rs932, %rs1253, 1;
or.b16 %rs1253, %rs932, 1;
add.s32 %r9825, %r9825, -1;
setp.ne.s32 %p1949, %r9825, 0;
mov.u32 %r10136, %r10033;
@%p1949 bra $L__BB1_1549;
setp.gt.u32 %p1950, %r9816, 191;
mov.u32 %r10136, 1;
mov.u32 %r9825, 0;
@%p1950 bra $L__BB1_1549;
and.b16 %rs934, %rs1253, 255;
st.global.u8 [%rd68], %rs1253;
add.s32 %r9816, %r9816, 1;
setp.eq.s16 %p1951, %rs934, 255;
selp.b32 %r9825, 7, 8, %p1951;
mov.u16 %rs1253, 0;
mov.u32 %r10136, %r10033;
$L__BB1_1549:
add.s32 %r7511, %r10031, 1;
min.u32 %r10031, %r7511, 12;
setp.lt.u32 %p1952, %r10031, 3;
mov.u32 %r10030, 0;
mov.u32 %r10137, %r10030;
@%p1952 bra $L__BB1_1552;
setp.lt.u32 %p1953, %r10031, 6;
mov.u32 %r10137, 1;
@%p1953 bra $L__BB1_1552;
setp.lt.u32 %p1954, %r10031, 9;
setp.eq.s32 %p1955, %r10031, 11;
selp.b32 %r7513, 4, 5, %p1955;
setp.lt.u32 %p1956, %r10031, 11;
selp.b32 %r7514, 3, %r7513, %p1956;
selp.b32 %r10137, 2, %r7514, %p1954;
$L__BB1_1552:
mov.u32 %r7516, 1;
shl.b32 %r10032, %r7516, %r10137;
mov.u32 %r10033, %r10136;
bra.uni $L__BB1_1553;
$L__BB1_1507:
shl.b16 %rs1253, %rs1253, 1;
add.s32 %r9825, %r9825, -1;
setp.ne.s32 %p1902, %r9825, 0;
mov.u32 %r10129, %r10033;
@%p1902 bra $L__BB1_1510;
setp.gt.u32 %p1903, %r9816, 191;
mov.u32 %r10129, 1;
mov.u32 %r9825, 0;
@%p1903 bra $L__BB1_1510;
st.global.u8 [%rd68], %rs1253;
add.s32 %r9816, %r9816, 1;
mov.u16 %rs1253, 0;
mov.u32 %r9825, 8;
mov.u32 %r10129, %r10033;
$L__BB1_1510:
setp.lt.u32 %p1904, %r10031, 3;
mov.u32 %r10097, 0;
@%p1904 bra $L__BB1_1513;
setp.lt.u32 %p1905, %r10031, 6;
mov.u32 %r10097, 1;
@%p1905 bra $L__BB1_1513;
setp.lt.u32 %p1906, %r10031, 9;
setp.eq.s32 %p1907, %r10031, 11;
selp.b32 %r7429, 4, 5, %p1907;
setp.lt.u32 %p1908, %r10031, 11;
selp.b32 %r7430, 3, %r7429, %p1908;
selp.b32 %r10097, 2, %r7430, %p1906;
$L__BB1_1513:
setp.eq.s32 %p1909, %r10097, 0;
@%p1909 bra $L__BB1_1541;
add.s32 %r2998, %r10097, -1;
and.b32 %r2999, %r10097, 3;
setp.eq.s32 %p1910, %r2999, 0;
mov.u32 %r10107, %r10097;
mov.u32 %r10108, %r10129;
@%p1910 bra $L__BB1_1526;
mov.u32 %r7432, 1;
shl.b32 %r7433, %r7432, %r2998;
and.b32 %r7434, %r7433, %r10030;
setp.ne.s32 %p1911, %r7434, 0;
selp.u32 %r7435, 1, 0, %p1911;
cvt.u32.u16 %r7436, %rs1253;
bfi.b32 %r7437, %r7436, %r7435, 1, 8;
cvt.u16.u32 %rs1253, %r7437;
add.s32 %r9825, %r9825, -1;
setp.ne.s32 %p1912, %r9825, 0;
mov.u32 %r10108, %r10129;
@%p1912 bra $L__BB1_1518;
setp.gt.u32 %p1913, %r9816, 191;
mov.u32 %r9825, 0;
mov.u32 %r10108, %r7432;
@%p1913 bra $L__BB1_1518;
add.s32 %r7441, %r9816, 17477;
cvt.u64.u32 %rd1196, %r7441;
add.s64 %rd1197, %rd1196, %rd4;
add.s64 %rd1198, %rd1, %rd1197;
st.global.u8 [%rd1198], %rs1253;
add.s32 %r9816, %r9816, 1;
mov.u16 %rs1253, 0;
mov.u32 %r9825, 8;
mov.u32 %r10108, %r10129;
$L__BB1_1518:
setp.eq.s32 %p1914, %r2999, 1;
mov.u32 %r10129, %r10108;
mov.u32 %r10107, %r2998;
@%p1914 bra $L__BB1_1526;
add.s32 %r10107, %r10097, -2;
mov.u32 %r7442, 1;
shl.b32 %r7443, %r7442, %r10107;
and.b32 %r7444, %r7443, %r10030;
setp.ne.s32 %p1915, %r7444, 0;
selp.u32 %r7445, 1, 0, %p1915;
cvt.u32.u16 %r7446, %rs1253;
bfi.b32 %r7447, %r7446, %r7445, 1, 8;
cvt.u16.u32 %rs1253, %r7447;
add.s32 %r9825, %r9825, -1;
setp.ne.s32 %p1916, %r9825, 0;
mov.u32 %r10103, %r10108;
@%p1916 bra $L__BB1_1522;
setp.gt.u32 %p1917, %r9816, 191;
mov.u32 %r9825, 0;
mov.u32 %r10103, %r7442;
@%p1917 bra $L__BB1_1522;
add.s32 %r7450, %r9816, 17477;
cvt.u64.u32 %rd1199, %r7450;
add.s64 %rd1200, %rd1199, %rd4;
add.s64 %rd1201, %rd1, %rd1200;
and.b16 %rs920, %rs1253, 255;
st.global.u8 [%rd1201], %rs1253;
add.s32 %r9816, %r9816, 1;
setp.eq.s16 %p1918, %rs920, 255;
selp.b32 %r9825, 7, 8, %p1918;
mov.u16 %rs1253, 0;
mov.u32 %r10103, %r10108;
$L__BB1_1522:
setp.eq.s32 %p1919, %r2999, 2;
mov.u32 %r10129, %r10103;
mov.u32 %r10108, %r10103;
@%p1919 bra $L__BB1_1526;
add.s32 %r10107, %r10097, -3;
mov.u32 %r7451, 1;
shl.b32 %r7452, %r7451, %r10107;
and.b32 %r7453, %r7452, %r10030;
setp.ne.s32 %p1920, %r7453, 0;
selp.u32 %r7454, 1, 0, %p1920;
cvt.u32.u16 %r7455, %rs1253;
bfi.b32 %r7456, %r7455, %r7454, 1, 8;
cvt.u16.u32 %rs1253, %r7456;
add.s32 %r9825, %r9825, -1;
setp.ne.s32 %p1921, %r9825, 0;
mov.u32 %r10129, %r10103;
mov.u32 %r10108, %r10103;
@%p1921 bra $L__BB1_1526;
setp.gt.u32 %p1922, %r9816, 191;
mov.u32 %r9825, 0;
mov.u32 %r10129, %r7451;
mov.u32 %r10108, %r7451;
@%p1922 bra $L__BB1_1526;
add.s32 %r7461, %r9816, 17477;
cvt.u64.u32 %rd1202, %r7461;
add.s64 %rd1203, %rd1202, %rd4;
add.s64 %rd1204, %rd1, %rd1203;
and.b16 %rs923, %rs1253, 255;
st.global.u8 [%rd1204], %rs1253;
add.s32 %r9816, %r9816, 1;
setp.eq.s16 %p1923, %rs923, 255;
selp.b32 %r9825, 7, 8, %p1923;
mov.u16 %rs1253, 0;
mov.u32 %r10129, %r10103;
mov.u32 %r10108, %r10103;
$L__BB1_1526:
setp.lt.u32 %p1924, %r2998, 3;
@%p1924 bra $L__BB1_1541;
mov.u32 %r10129, %r10108;
$L__BB1_1528:
add.s32 %r7462, %r10107, -1;
mov.u32 %r7463, 1;
shl.b32 %r7464, %r7463, %r7462;
and.b32 %r7465, %r7464, %r10030;
setp.ne.s32 %p1925, %r7465, 0;
selp.u32 %r7466, 1, 0, %p1925;
cvt.u32.u16 %r7467, %rs1253;
bfi.b32 %r10117, %r7467, %r7466, 1, 8;
add.s32 %r10116, %r9825, -1;
setp.ne.s32 %p1926, %r10116, 0;
mov.u32 %r10118, %r10129;
@%p1926 bra $L__BB1_1531;
setp.gt.u32 %p1927, %r9816, 191;
mov.u32 %r10116, 0;
mov.u32 %r10118, %r7463;
@%p1927 bra $L__BB1_1531;
cvt.u16.u32 %rs924, %r10117;
and.b16 %rs925, %rs924, 255;
add.s32 %r7471, %r9816, 17477;
cvt.u64.u32 %rd1205, %r7471;
add.s64 %rd1206, %rd1205, %rd4;
add.s64 %rd1207, %rd1, %rd1206;
st.global.u8 [%rd1207], %rs924;
add.s32 %r9816, %r9816, 1;
setp.eq.s16 %p1928, %rs925, 255;
selp.b32 %r10116, 7, 8, %p1928;
mov.u32 %r10117, 0;
mov.u32 %r10118, %r10129;
$L__BB1_1531:
add.s32 %r7472, %r10107, -2;
shl.b32 %r7474, %r7463, %r7472;
and.b32 %r7475, %r7474, %r10030;
setp.ne.s32 %p1929, %r7475, 0;
and.b32 %r7476, %r10117, 127;
selp.u32 %r7477, 1, 0, %p1929;
bfi.b32 %r10121, %r7476, %r7477, 1, 7;
add.s32 %r10120, %r10116, -1;
setp.ne.s32 %p1930, %r10120, 0;
mov.u32 %r10122, %r10118;
@%p1930 bra $L__BB1_1534;
setp.gt.u32 %p1931, %r9816, 191;
mov.u32 %r10122, 1;
mov.u32 %r10120, 0;
@%p1931 bra $L__BB1_1534;
cvt.u16.u32 %rs926, %r10121;
and.b16 %rs927, %rs926, 255;
add.s32 %r7481, %r9816, 17477;
cvt.u64.u32 %rd1208, %r7481;
add.s64 %rd1209, %rd1208, %rd4;
add.s64 %rd1210, %rd1, %rd1209;
st.global.u8 [%rd1210], %rs926;
add.s32 %r9816, %r9816, 1;
setp.eq.s16 %p1932, %rs927, 255;
selp.b32 %r10120, 7, 8, %p1932;
mov.u32 %r10121, 0;
mov.u32 %r10122, %r10118;
$L__BB1_1534:
add.s32 %r7482, %r10107, -3;
mov.u32 %r7483, 1;
shl.b32 %r7484, %r7483, %r7482;
and.b32 %r7485, %r7484, %r10030;
setp.ne.s32 %p1933, %r7485, 0;
and.b32 %r7486, %r10121, 127;
selp.u32 %r7487, 1, 0, %p1933;
bfi.b32 %r10125, %r7486, %r7487, 1, 7;
add.s32 %r10124, %r10120, -1;
setp.ne.s32 %p1934, %r10124, 0;
mov.u32 %r10126, %r10122;
@%p1934 bra $L__BB1_1537;
setp.gt.u32 %p1935, %r9816, 191;
mov.u32 %r10124, 0;
mov.u32 %r10126, %r7483;
@%p1935 bra $L__BB1_1537;
cvt.u16.u32 %rs928, %r10125;
and.b16 %rs929, %rs928, 255;
add.s32 %r7491, %r9816, 17477;
cvt.u64.u32 %rd1211, %r7491;
add.s64 %rd1212, %rd1211, %rd4;
add.s64 %rd1213, %rd1, %rd1212;
st.global.u8 [%rd1213], %rs928;
add.s32 %r9816, %r9816, 1;
setp.eq.s16 %p1936, %rs929, 255;
selp.b32 %r10124, 7, 8, %p1936;
mov.u32 %r10125, 0;
mov.u32 %r10126, %r10122;
$L__BB1_1537:
add.s32 %r10107, %r10107, -4;
shl.b32 %r7493, %r7483, %r10107;
and.b32 %r7494, %r7493, %r10030;
setp.ne.s32 %p1937, %r7494, 0;
and.b32 %r7495, %r10125, 127;
selp.u32 %r7496, 1, 0, %p1937;
bfi.b32 %r7497, %r7495, %r7496, 1, 15;
cvt.u16.u32 %rs1253, %r7497;
add.s32 %r9825, %r10124, -1;
setp.ne.s32 %p1938, %r9825, 0;
mov.u32 %r10129, %r10126;
@%p1938 bra $L__BB1_1540;
setp.gt.u32 %p1939, %r9816, 191;
mov.u32 %r10129, 1;
mov.u32 %r9825, 0;
@%p1939 bra $L__BB1_1540;
add.s32 %r7500, %r9816, 17477;
cvt.u64.u32 %rd1214, %r7500;
add.s64 %rd1215, %rd1214, %rd4;
add.s64 %rd1216, %rd1, %rd1215;
and.b16 %rs931, %rs1253, 255;
st.global.u8 [%rd1216], %rs1253;
add.s32 %r9816, %r9816, 1;
setp.eq.s16 %p1940, %rs931, 255;
selp.b32 %r9825, 7, 8, %p1940;
mov.u16 %rs1253, 0;
mov.u32 %r10129, %r10126;
$L__BB1_1540:
setp.ne.s32 %p1941, %r10107, 0;
@%p1941 bra $L__BB1_1528;
$L__BB1_1541:
add.s32 %r7502, %r10031, -1;
setp.eq.s32 %p1942, %r10031, 0;
mov.u32 %r10030, 0;
selp.b32 %r10031, 0, %r7502, %p1942;
setp.lt.u32 %p1943, %r10031, 3;
mov.u32 %r10133, %r10030;
@%p1943 bra $L__BB1_1544;
setp.lt.u32 %p1944, %r10031, 6;
mov.u32 %r10133, 1;
@%p1944 bra $L__BB1_1544;
setp.lt.u32 %p1945, %r10031, 9;
setp.eq.s32 %p1946, %r10031, 11;
selp.b32 %r7504, 4, 5, %p1946;
setp.lt.u32 %p1947, %r10031, 11;
selp.b32 %r7505, 3, %r7504, %p1947;
selp.b32 %r10133, 2, %r7505, %p1945;
$L__BB1_1544:
mov.u32 %r7507, 1;
shl.b32 %r10032, %r7507, %r10133;
mov.u32 %r10033, %r10129;
$L__BB1_1553:
setp.gt.s32 %p1957, %r2781, 2;
setp.gt.s32 %p1958, %r2440, 2;
and.pred %p1959, %p1958, %p1957;
@%p1959 bra $L__BB1_1602;
bra.uni $L__BB1_1554;
$L__BB1_1602:
add.s32 %r7637, %r2653, -11;
cvt.u64.u32 %rd1246, %r7637;
add.s64 %rd70, %rd63, %rd1246;
ld.global.u8 %rs393, [%rd70];
add.s32 %r7638, %r2653, -10;
cvt.u64.u32 %rd1248, %r7638;
add.s64 %rd1249, %rd63, %rd1248;
ld.global.u8 %rs394, [%rd1249];
ld.global.u8 %rs395, [%rd1249+1];
mul.lo.s32 %r7639, %r2781, 6;
add.s32 %r7640, %r7639, -12;
cvt.u64.u32 %rd1250, %r7640;
add.s64 %rd1251, %rd63, %rd1250;
ld.global.u8 %rs396, [%rd1251];
ld.global.u8 %rs397, [%rd1251+1];
add.s32 %r7641, %r7639, -10;
cvt.u64.u32 %rd1252, %r7641;
add.s64 %rd1253, %rd63, %rd1252;
ld.global.u8 %rs398, [%rd1253];
ld.global.u8 %rs399, [%rd1253+1];
setp.eq.s16 %p2027, %rs393, 0;
mov.u32 %r10231, %r9983;
@%p2027 bra $L__BB1_1609;
ld.global.u8 %r10221, [%rd70+-1];
cvt.u32.u16 %r10220, %rs393;
$L__BB1_1604:
mov.u32 %r3208, %r10220;
setp.gt.u32 %p2028, %r10264, 2879;
mov.u32 %r10231, 1;
@%p2028 bra $L__BB1_1609;
mov.u32 %r7643, 8;
sub.s32 %r7644, %r7643, %r10266;
sub.s32 %r7645, %r7644, %r10265;
min.u32 %r7646, %r7645, %r3208;
setp.eq.s32 %p2029, %r7646, 32;
mov.u32 %r7647, -1;
shl.b32 %r7648, %r7647, %r7646;
not.b32 %r7649, %r7648;
selp.b32 %r7650, -1, %r7649, %p2029;
and.b32 %r7651, %r7650, %r10221;
shl.b32 %r7652, %r7651, %r10265;
cvt.u16.u32 %rs967, %r7652;
or.b16 %rs1322, %rs1322, %rs967;
add.s32 %r10265, %r7646, %r10265;
sub.s32 %r10220, %r3208, %r7646;
shr.u32 %r10221, %r10221, %r7646;
setp.gt.u32 %p2030, %r7645, %r3208;
@%p2030 bra $L__BB1_1608;
setp.ne.s32 %p2031, %r10266, 0;
mov.u32 %r10266, 0;
and.b16 %rs968, %rs1322, 255;
setp.ne.s16 %p2032, %rs968, 127;
and.pred %p2033, %p2031, %p2032;
@%p2033 bra $L__BB1_1608;
mov.u32 %r7655, 20548;
sub.s32 %r7656, %r7655, %r10264;
cvt.u64.u32 %rd1254, %r7656;
add.s64 %rd1255, %rd1254, %rd4;
add.s64 %rd1256, %rd1, %rd1255;
st.global.u8 [%rd1256], %rs1322;
add.s32 %r10264, %r10264, 1;
setp.gt.u16 %p2034, %rs968, 143;
selp.u32 %r10266, 1, 0, %p2034;
mov.u16 %rs1322, 0;
mov.u32 %r10265, 0;
$L__BB1_1608:
setp.ne.s32 %p2035, %r10220, 0;
mov.u32 %r10231, %r9983;
@%p2035 bra $L__BB1_1604;
$L__BB1_1609:
setp.eq.s16 %p2036, %rs397, 0;
mov.u32 %r10243, %r10231;
@%p2036 bra $L__BB1_1616;
cvt.u32.u16 %r7657, %rs396;
and.b32 %r10233, %r7657, 255;
cvt.u32.u16 %r7658, %rs397;
and.b32 %r10232, %r7658, 255;
$L__BB1_1611:
mov.u32 %r3227, %r10232;
setp.gt.u32 %p2037, %r10264, 2879;
mov.u32 %r10243, 1;
@%p2037 bra $L__BB1_1616;
mov.u32 %r7660, 8;
sub.s32 %r7661, %r7660, %r10266;
sub.s32 %r7662, %r7661, %r10265;
min.u32 %r7663, %r7662, %r3227;
setp.eq.s32 %p2038, %r7663, 32;
mov.u32 %r7664, -1;
shl.b32 %r7665, %r7664, %r7663;
not.b32 %r7666, %r7665;
selp.b32 %r7667, -1, %r7666, %p2038;
and.b32 %r7668, %r7667, %r10233;
shl.b32 %r7669, %r7668, %r10265;
cvt.u16.u32 %rs972, %r7669;
or.b16 %rs1322, %rs1322, %rs972;
add.s32 %r10265, %r7663, %r10265;
sub.s32 %r10232, %r3227, %r7663;
shr.u32 %r10233, %r10233, %r7663;
setp.gt.u32 %p2039, %r7662, %r3227;
@%p2039 bra $L__BB1_1615;
setp.ne.s32 %p2040, %r10266, 0;
mov.u32 %r10266, 0;
and.b16 %rs973, %rs1322, 255;
setp.ne.s16 %p2041, %rs973, 127;
and.pred %p2042, %p2040, %p2041;
@%p2042 bra $L__BB1_1615;
mov.u32 %r7672, 20548;
sub.s32 %r7673, %r7672, %r10264;
cvt.u64.u32 %rd1257, %r7673;
add.s64 %rd1258, %rd1257, %rd4;
add.s64 %rd1259, %rd1, %rd1258;
st.global.u8 [%rd1259], %rs1322;
add.s32 %r10264, %r10264, 1;
setp.gt.u16 %p2043, %rs973, 143;
selp.u32 %r10266, 1, 0, %p2043;
mov.u16 %rs1322, 0;
mov.u32 %r10265, 0;
$L__BB1_1615:
setp.ne.s32 %p2044, %r10232, 0;
mov.u32 %r10243, %r10231;
@%p2044 bra $L__BB1_1611;
$L__BB1_1616:
setp.eq.s16 %p2045, %rs395, 0;
mov.u32 %r10255, %r10243;
@%p2045 bra $L__BB1_1623;
cvt.u32.u16 %r7674, %rs395;
and.b32 %r10244, %r7674, 255;
cvt.u32.u16 %r7675, %rs394;
and.b32 %r10245, %r7675, 255;
$L__BB1_1618:
mov.u32 %r3246, %r10244;
setp.gt.u32 %p2046, %r10264, 2879;
mov.u32 %r10255, 1;
@%p2046 bra $L__BB1_1623;
mov.u32 %r7677, 8;
sub.s32 %r7678, %r7677, %r10266;
sub.s32 %r7679, %r7678, %r10265;
min.u32 %r7680, %r7679, %r3246;
setp.eq.s32 %p2047, %r7680, 32;
mov.u32 %r7681, -1;
shl.b32 %r7682, %r7681, %r7680;
not.b32 %r7683, %r7682;
selp.b32 %r7684, -1, %r7683, %p2047;
and.b32 %r7685, %r7684, %r10245;
shl.b32 %r7686, %r7685, %r10265;
cvt.u16.u32 %rs977, %r7686;
or.b16 %rs1322, %rs1322, %rs977;
add.s32 %r10265, %r7680, %r10265;
sub.s32 %r10244, %r3246, %r7680;
shr.u32 %r10245, %r10245, %r7680;
setp.gt.u32 %p2048, %r7679, %r3246;
@%p2048 bra $L__BB1_1622;
setp.ne.s32 %p2049, %r10266, 0;
mov.u32 %r10266, 0;
and.b16 %rs978, %rs1322, 255;
setp.ne.s16 %p2050, %rs978, 127;
and.pred %p2051, %p2049, %p2050;
@%p2051 bra $L__BB1_1622;
mov.u32 %r7689, 20548;
sub.s32 %r7690, %r7689, %r10264;
cvt.u64.u32 %rd1260, %r7690;
add.s64 %rd1261, %rd1260, %rd4;
add.s64 %rd1262, %rd1, %rd1261;
st.global.u8 [%rd1262], %rs1322;
add.s32 %r10264, %r10264, 1;
setp.gt.u16 %p2052, %rs978, 143;
selp.u32 %r10266, 1, 0, %p2052;
mov.u16 %rs1322, 0;
mov.u32 %r10265, 0;
$L__BB1_1622:
setp.ne.s32 %p2053, %r10244, 0;
mov.u32 %r10255, %r10243;
@%p2053 bra $L__BB1_1618;
$L__BB1_1623:
setp.eq.s16 %p2054, %rs399, 0;
mov.u32 %r10267, %r10255;
@%p2054 bra $L__BB1_1630;
cvt.u32.u16 %r7691, %rs398;
and.b32 %r10257, %r7691, 255;
cvt.u32.u16 %r7692, %rs399;
and.b32 %r10256, %r7692, 255;
$L__BB1_1625:
mov.u32 %r3265, %r10256;
setp.gt.u32 %p2055, %r10264, 2879;
mov.u32 %r10267, 1;
@%p2055 bra $L__BB1_1630;
mov.u32 %r7694, 8;
sub.s32 %r7695, %r7694, %r10266;
sub.s32 %r7696, %r7695, %r10265;
min.u32 %r7697, %r7696, %r3265;
setp.eq.s32 %p2056, %r7697, 32;
mov.u32 %r7698, -1;
shl.b32 %r7699, %r7698, %r7697;
not.b32 %r7700, %r7699;
selp.b32 %r7701, -1, %r7700, %p2056;
and.b32 %r7702, %r7701, %r10257;
shl.b32 %r7703, %r7702, %r10265;
cvt.u16.u32 %rs982, %r7703;
or.b16 %rs1322, %rs1322, %rs982;
add.s32 %r10265, %r7697, %r10265;
sub.s32 %r10256, %r3265, %r7697;
shr.u32 %r10257, %r10257, %r7697;
setp.gt.u32 %p2057, %r7696, %r3265;
@%p2057 bra $L__BB1_1629;
setp.ne.s32 %p2058, %r10266, 0;
mov.u32 %r10266, 0;
and.b16 %rs983, %rs1322, 255;
setp.ne.s16 %p2059, %rs983, 127;
and.pred %p2060, %p2058, %p2059;
@%p2060 bra $L__BB1_1629;
mov.u32 %r7706, 20548;
sub.s32 %r7707, %r7706, %r10264;
cvt.u64.u32 %rd1263, %r7707;
add.s64 %rd1264, %rd1263, %rd4;
add.s64 %rd1265, %rd1, %rd1264;
st.global.u8 [%rd1265], %rs1322;
add.s32 %r10264, %r10264, 1;
setp.gt.u16 %p2061, %rs983, 143;
selp.u32 %r10266, 1, 0, %p2061;
mov.u16 %rs1322, 0;
mov.u32 %r10265, 0;
$L__BB1_1629:
setp.ne.s32 %p2062, %r10256, 0;
mov.u32 %r10267, %r10255;
@%p2062 bra $L__BB1_1625;
bra.uni $L__BB1_1630;
$L__BB1_1554:
setp.gt.s32 %p1960, %r2781, 0;
and.pred %p1962, %p1958, %p1960;
@%p1962 bra $L__BB1_1583;
bra.uni $L__BB1_1555;
$L__BB1_1583:
ld.global.u8 %rs379, [%rd65+1];
ld.global.u8 %rs380, [%rd66];
ld.global.u8 %rs381, [%rd66+1];
setp.eq.s16 %p2001, %rs379, 0;
mov.u32 %r10199, %r9983;
@%p2001 bra $L__BB1_1590;
ld.global.u8 %r10189, [%rd65];
cvt.u32.u16 %r10188, %rs379;
$L__BB1_1585:
mov.u32 %r3156, %r10188;
setp.gt.u32 %p2002, %r10264, 2879;
mov.u32 %r10199, 1;
@%p2002 bra $L__BB1_1590;
mov.u32 %r7589, 8;
sub.s32 %r7590, %r7589, %r10266;
sub.s32 %r7591, %r7590, %r10265;
min.u32 %r7592, %r7591, %r3156;
setp.eq.s32 %p2003, %r7592, 32;
mov.u32 %r7593, -1;
shl.b32 %r7594, %r7593, %r7592;
not.b32 %r7595, %r7594;
selp.b32 %r7596, -1, %r7595, %p2003;
and.b32 %r7597, %r7596, %r10189;
shl.b32 %r7598, %r7597, %r10265;
cvt.u16.u32 %rs954, %r7598;
or.b16 %rs1322, %rs1322, %rs954;
add.s32 %r10265, %r7592, %r10265;
sub.s32 %r10188, %r3156, %r7592;
shr.u32 %r10189, %r10189, %r7592;
setp.gt.u32 %p2004, %r7591, %r3156;
@%p2004 bra $L__BB1_1589;
setp.ne.s32 %p2005, %r10266, 0;
mov.u32 %r10266, 0;
and.b16 %rs955, %rs1322, 255;
setp.ne.s16 %p2006, %rs955, 127;
and.pred %p2007, %p2005, %p2006;
@%p2007 bra $L__BB1_1589;
mov.u32 %r7601, 20548;
sub.s32 %r7602, %r7601, %r10264;
cvt.u64.u32 %rd1237, %r7602;
add.s64 %rd1238, %rd1237, %rd4;
add.s64 %rd1239, %rd1, %rd1238;
st.global.u8 [%rd1239], %rs1322;
add.s32 %r10264, %r10264, 1;
setp.gt.u16 %p2008, %rs955, 143;
selp.u32 %r10266, 1, 0, %p2008;
mov.u16 %rs1322, 0;
mov.u32 %r10265, 0;
$L__BB1_1589:
setp.ne.s32 %p2009, %r10188, 0;
mov.u32 %r10199, %r9983;
@%p2009 bra $L__BB1_1585;
$L__BB1_1590:
add.s32 %r10201, %r2781, -1;
cvt.u32.u16 %r7604, %rs381;
and.b32 %r10212, %r7604, 255;
cvt.u32.u16 %r7605, %rs380;
and.b32 %r10213, %r7605, 255;
mov.u32 %r7603, 1;
mov.u32 %r10200, %r7603;
$L__BB1_1591:
mov.u32 %r3176, %r10200;
setp.gt.u32 %p2010, %r10264, 2879;
mov.u32 %r10211, %r7603;
@%p2010 bra $L__BB1_1596;
mov.u32 %r7607, 8;
sub.s32 %r7608, %r7607, %r10266;
sub.s32 %r7609, %r7608, %r10265;
min.u32 %r7610, %r7609, %r3176;
setp.eq.s32 %p2011, %r7610, 32;
mov.u32 %r7611, -1;
shl.b32 %r7612, %r7611, %r7610;
not.b32 %r7613, %r7612;
selp.b32 %r7614, -1, %r7613, %p2011;
and.b32 %r7615, %r7614, %r10201;
shl.b32 %r7616, %r7615, %r10265;
cvt.u16.u32 %rs958, %r7616;
or.b16 %rs1322, %rs1322, %rs958;
add.s32 %r10265, %r7610, %r10265;
sub.s32 %r10200, %r3176, %r7610;
shr.u32 %r10201, %r10201, %r7610;
setp.gt.u32 %p2012, %r7609, %r3176;
@%p2012 bra $L__BB1_1595;
setp.ne.s32 %p2013, %r10266, 0;
mov.u32 %r10266, 0;
and.b16 %rs959, %rs1322, 255;
setp.ne.s16 %p2014, %rs959, 127;
and.pred %p2015, %p2013, %p2014;
@%p2015 bra $L__BB1_1595;
mov.u32 %r7619, 20548;
sub.s32 %r7620, %r7619, %r10264;
cvt.u64.u32 %rd1240, %r7620;
add.s64 %rd1241, %rd1240, %rd4;
add.s64 %rd1242, %rd1, %rd1241;
st.global.u8 [%rd1242], %rs1322;
add.s32 %r10264, %r10264, 1;
setp.gt.u16 %p2016, %rs959, 143;
selp.u32 %r10266, 1, 0, %p2016;
mov.u16 %rs1322, 0;
mov.u32 %r10265, 0;
$L__BB1_1595:
setp.ne.s32 %p2017, %r10200, 0;
mov.u32 %r10211, %r10199;
@%p2017 bra $L__BB1_1591;
$L__BB1_1596:
setp.eq.s16 %p2018, %rs381, 0;
mov.u32 %r10267, %r10211;
@%p2018 bra $L__BB1_1630;
$L__BB1_1597:
mov.u32 %r3193, %r10212;
setp.gt.u32 %p2019, %r10264, 2879;
mov.u32 %r10267, 1;
@%p2019 bra $L__BB1_1630;
mov.u32 %r7622, 8;
sub.s32 %r7623, %r7622, %r10266;
sub.s32 %r7624, %r7623, %r10265;
min.u32 %r7625, %r7624, %r3193;
setp.eq.s32 %p2020, %r7625, 32;
mov.u32 %r7626, -1;
shl.b32 %r7627, %r7626, %r7625;
not.b32 %r7628, %r7627;
selp.b32 %r7629, -1, %r7628, %p2020;
and.b32 %r7630, %r7629, %r10213;
shl.b32 %r7631, %r7630, %r10265;
cvt.u16.u32 %rs963, %r7631;
or.b16 %rs1322, %rs1322, %rs963;
add.s32 %r10265, %r7625, %r10265;
sub.s32 %r10212, %r3193, %r7625;
shr.u32 %r10213, %r10213, %r7625;
setp.gt.u32 %p2021, %r7624, %r3193;
@%p2021 bra $L__BB1_1601;
setp.ne.s32 %p2022, %r10266, 0;
mov.u32 %r10266, 0;
and.b16 %rs964, %rs1322, 255;
setp.ne.s16 %p2023, %rs964, 127;
and.pred %p2024, %p2022, %p2023;
@%p2024 bra $L__BB1_1601;
mov.u32 %r7634, 20548;
sub.s32 %r7635, %r7634, %r10264;
cvt.u64.u32 %rd1243, %r7635;
add.s64 %rd1244, %rd1243, %rd4;
add.s64 %rd1245, %rd1, %rd1244;
st.global.u8 [%rd1245], %rs1322;
add.s32 %r10264, %r10264, 1;
setp.gt.u16 %p2025, %rs964, 143;
selp.u32 %r10266, 1, 0, %p2025;
mov.u16 %rs1322, 0;
mov.u32 %r10265, 0;
$L__BB1_1601:
setp.eq.s32 %p2026, %r10212, 0;
mov.u32 %r10267, %r10211;
@%p2026 bra $L__BB1_1630;
bra.uni $L__BB1_1597;
$L__BB1_1555:
setp.gt.s32 %p1964, %r2440, 0;
selp.b32 %r7517, %r2653, 0, %p1964;
cvt.u64.u32 %rd1217, %r7517;
add.s64 %rd69, %rd63, %rd1217;
ld.global.u8 %rs357, [%rd69+1];
add.s32 %r7518, %r7517, 2;
cvt.u64.u32 %rd1219, %r7518;
add.s64 %rd1220, %rd63, %rd1219;
ld.global.u8 %rs358, [%rd1220];
ld.global.u8 %rs359, [%rd1220+1];
mul.lo.s32 %r7519, %r2781, 6;
selp.b32 %r7520, %r7519, 0, %p1960;
cvt.u64.u32 %rd1221, %r7520;
add.s64 %rd1222, %rd63, %rd1221;
ld.global.u8 %rs360, [%rd1222];
ld.global.u8 %rs361, [%rd1222+1];
add.s32 %r7521, %r7520, 2;
cvt.u64.u32 %rd1223, %r7521;
add.s64 %rd1224, %rd63, %rd1223;
ld.global.u8 %rs362, [%rd1224];
ld.global.u8 %rs363, [%rd1224+1];
setp.eq.s16 %p1965, %rs357, 0;
mov.u32 %r10155, %r9983;
@%p1965 bra $L__BB1_1562;
ld.global.u8 %r10145, [%rd69];
cvt.u32.u16 %r10144, %rs357;
$L__BB1_1557:
mov.u32 %r3084, %r10144;
setp.gt.u32 %p1966, %r10264, 2879;
mov.u32 %r10155, 1;
@%p1966 bra $L__BB1_1562;
mov.u32 %r7523, 8;
sub.s32 %r7524, %r7523, %r10266;
sub.s32 %r7525, %r7524, %r10265;
min.u32 %r7526, %r7525, %r3084;
setp.eq.s32 %p1967, %r7526, 32;
mov.u32 %r7527, -1;
shl.b32 %r7528, %r7527, %r7526;
not.b32 %r7529, %r7528;
selp.b32 %r7530, -1, %r7529, %p1967;
and.b32 %r7531, %r7530, %r10145;
shl.b32 %r7532, %r7531, %r10265;
cvt.u16.u32 %rs935, %r7532;
or.b16 %rs1322, %rs1322, %rs935;
add.s32 %r10265, %r7526, %r10265;
sub.s32 %r10144, %r3084, %r7526;
shr.u32 %r10145, %r10145, %r7526;
setp.gt.u32 %p1968, %r7525, %r3084;
@%p1968 bra $L__BB1_1561;
setp.ne.s32 %p1969, %r10266, 0;
mov.u32 %r10266, 0;
and.b16 %rs936, %rs1322, 255;
setp.ne.s16 %p1970, %rs936, 127;
and.pred %p1971, %p1969, %p1970;
@%p1971 bra $L__BB1_1561;
mov.u32 %r7535, 20548;
sub.s32 %r7536, %r7535, %r10264;
cvt.u64.u32 %rd1225, %r7536;
add.s64 %rd1226, %rd1225, %rd4;
add.s64 %rd1227, %rd1, %rd1226;
st.global.u8 [%rd1227], %rs1322;
add.s32 %r10264, %r10264, 1;
setp.gt.u16 %p1972, %rs936, 143;
selp.u32 %r10266, 1, 0, %p1972;
mov.u16 %rs1322, 0;
mov.u32 %r10265, 0;
$L__BB1_1561:
setp.ne.s32 %p1973, %r10144, 0;
mov.u32 %r10155, %r9983;
@%p1973 bra $L__BB1_1557;
$L__BB1_1562:
setp.eq.s16 %p1974, %rs361, 0;
mov.u32 %r10167, %r10155;
@%p1974 bra $L__BB1_1569;
cvt.u32.u16 %r7537, %rs360;
and.b32 %r10157, %r7537, 255;
cvt.u32.u16 %r7538, %rs361;
and.b32 %r10156, %r7538, 255;
$L__BB1_1564:
mov.u32 %r3103, %r10156;
setp.gt.u32 %p1975, %r10264, 2879;
mov.u32 %r10167, 1;
@%p1975 bra $L__BB1_1569;
mov.u32 %r7540, 8;
sub.s32 %r7541, %r7540, %r10266;
sub.s32 %r7542, %r7541, %r10265;
min.u32 %r7543, %r7542, %r3103;
setp.eq.s32 %p1976, %r7543, 32;
mov.u32 %r7544, -1;
shl.b32 %r7545, %r7544, %r7543;
not.b32 %r7546, %r7545;
selp.b32 %r7547, -1, %r7546, %p1976;
and.b32 %r7548, %r7547, %r10157;
shl.b32 %r7549, %r7548, %r10265;
cvt.u16.u32 %rs940, %r7549;
or.b16 %rs1322, %rs1322, %rs940;
add.s32 %r10265, %r7543, %r10265;
sub.s32 %r10156, %r3103, %r7543;
shr.u32 %r10157, %r10157, %r7543;
setp.gt.u32 %p1977, %r7542, %r3103;
@%p1977 bra $L__BB1_1568;
setp.ne.s32 %p1978, %r10266, 0;
mov.u32 %r10266, 0;
and.b16 %rs941, %rs1322, 255;
setp.ne.s16 %p1979, %rs941, 127;
and.pred %p1980, %p1978, %p1979;
@%p1980 bra $L__BB1_1568;
mov.u32 %r7552, 20548;
sub.s32 %r7553, %r7552, %r10264;
cvt.u64.u32 %rd1228, %r7553;
add.s64 %rd1229, %rd1228, %rd4;
add.s64 %rd1230, %rd1, %rd1229;
st.global.u8 [%rd1230], %rs1322;
add.s32 %r10264, %r10264, 1;
setp.gt.u16 %p1981, %rs941, 143;
selp.u32 %r10266, 1, 0, %p1981;
mov.u16 %rs1322, 0;
mov.u32 %r10265, 0;
$L__BB1_1568:
setp.ne.s32 %p1982, %r10156, 0;
mov.u32 %r10167, %r10155;
@%p1982 bra $L__BB1_1564;
$L__BB1_1569:
setp.eq.s16 %p1983, %rs359, 0;
mov.u32 %r10179, %r10167;
@%p1983 bra $L__BB1_1576;
cvt.u32.u16 %r7554, %rs359;
and.b32 %r10168, %r7554, 255;
cvt.u32.u16 %r7555, %rs358;
and.b32 %r10169, %r7555, 255;
$L__BB1_1571:
mov.u32 %r3122, %r10168;
setp.gt.u32 %p1984, %r10264, 2879;
mov.u32 %r10179, 1;
@%p1984 bra $L__BB1_1576;
mov.u32 %r7557, 8;
sub.s32 %r7558, %r7557, %r10266;
sub.s32 %r7559, %r7558, %r10265;
min.u32 %r7560, %r7559, %r3122;
setp.eq.s32 %p1985, %r7560, 32;
mov.u32 %r7561, -1;
shl.b32 %r7562, %r7561, %r7560;
not.b32 %r7563, %r7562;
selp.b32 %r7564, -1, %r7563, %p1985;
and.b32 %r7565, %r7564, %r10169;
shl.b32 %r7566, %r7565, %r10265;
cvt.u16.u32 %rs945, %r7566;
or.b16 %rs1322, %rs1322, %rs945;
add.s32 %r10265, %r7560, %r10265;
sub.s32 %r10168, %r3122, %r7560;
shr.u32 %r10169, %r10169, %r7560;
setp.gt.u32 %p1986, %r7559, %r3122;
@%p1986 bra $L__BB1_1575;
setp.ne.s32 %p1987, %r10266, 0;
mov.u32 %r10266, 0;
and.b16 %rs946, %rs1322, 255;
setp.ne.s16 %p1988, %rs946, 127;
and.pred %p1989, %p1987, %p1988;
@%p1989 bra $L__BB1_1575;
mov.u32 %r7569, 20548;
sub.s32 %r7570, %r7569, %r10264;
cvt.u64.u32 %rd1231, %r7570;
add.s64 %rd1232, %rd1231, %rd4;
add.s64 %rd1233, %rd1, %rd1232;
st.global.u8 [%rd1233], %rs1322;
add.s32 %r10264, %r10264, 1;
setp.gt.u16 %p1990, %rs946, 143;
selp.u32 %r10266, 1, 0, %p1990;
mov.u16 %rs1322, 0;
mov.u32 %r10265, 0;
$L__BB1_1575:
setp.ne.s32 %p1991, %r10168, 0;
mov.u32 %r10179, %r10167;
@%p1991 bra $L__BB1_1571;
$L__BB1_1576:
setp.eq.s16 %p1992, %rs363, 0;
mov.u32 %r10267, %r10179;
@%p1992 bra $L__BB1_1630;
cvt.u32.u16 %r7571, %rs362;
and.b32 %r10181, %r7571, 255;
cvt.u32.u16 %r7572, %rs363;
and.b32 %r10180, %r7572, 255;
$L__BB1_1578:
mov.u32 %r3141, %r10180;
setp.gt.u32 %p1993, %r10264, 2879;
mov.u32 %r10267, 1;
@%p1993 bra $L__BB1_1630;
mov.u32 %r7574, 8;
sub.s32 %r7575, %r7574, %r10266;
sub.s32 %r7576, %r7575, %r10265;
min.u32 %r7577, %r7576, %r3141;
setp.eq.s32 %p1994, %r7577, 32;
mov.u32 %r7578, -1;
shl.b32 %r7579, %r7578, %r7577;
not.b32 %r7580, %r7579;
selp.b32 %r7581, -1, %r7580, %p1994;
and.b32 %r7582, %r7581, %r10181;
shl.b32 %r7583, %r7582, %r10265;
cvt.u16.u32 %rs950, %r7583;
or.b16 %rs1322, %rs1322, %rs950;
add.s32 %r10265, %r7577, %r10265;
sub.s32 %r10180, %r3141, %r7577;
shr.u32 %r10181, %r10181, %r7577;
setp.gt.u32 %p1995, %r7576, %r3141;
@%p1995 bra $L__BB1_1582;
setp.ne.s32 %p1996, %r10266, 0;
mov.u32 %r10266, 0;
and.b16 %rs951, %rs1322, 255;
setp.ne.s16 %p1997, %rs951, 127;
and.pred %p1998, %p1996, %p1997;
@%p1998 bra $L__BB1_1582;
mov.u32 %r7586, 20548;
sub.s32 %r7587, %r7586, %r10264;
cvt.u64.u32 %rd1234, %r7587;
add.s64 %rd1235, %rd1234, %rd4;
add.s64 %rd1236, %rd1, %rd1235;
st.global.u8 [%rd1236], %rs1322;
add.s32 %r10264, %r10264, 1;
setp.gt.u16 %p1999, %rs951, 143;
selp.u32 %r10266, 1, 0, %p1999;
mov.u16 %rs1322, 0;
mov.u32 %r10265, 0;
$L__BB1_1582:
setp.eq.s32 %p2000, %r10180, 0;
mov.u32 %r10267, %r10179;
@%p2000 bra $L__BB1_1630;
bra.uni $L__BB1_1578;
$L__BB1_1630:
shr.u32 %r7708, %r9953, 1;
or.b32 %r9750, %r7708, %r2898;
$L__BB1_1631:
add.s32 %r9734, %r9734, 4;
setp.lt.u32 %p2063, %r9734, %r5;
@%p2063 bra $L__BB1_1266;
$L__BB1_1632:
add.s32 %r8405, %r5, 1;
shr.u32 %r8404, %r8405, 1;
add.s32 %r7709, %r8404, 1;
setp.gt.u32 %p2064, %r7709, 512;
@%p2064 bra $L__BB1_1634;
add.s32 %r8408, %r5, 1;
shr.u32 %r8407, %r8408, 1;
add.s32 %r8406, %r4095, %r8407;
mov.u16 %rs986, 0;
add.s32 %r8393, %r8406, 1;
st.shared.u8 [%r8393], %rs986;
$L__BB1_1634:
setp.lt.u32 %p2065, %r6, 3;
@%p2065 bra $L__BB1_1880;
ld.param.u64 %rd1422, [ j2k_htj2k_encode_codeblocks_param_5];
ld.param.u64 %rd1417, [ j2k_htj2k_encode_codeblocks_param_4];
mov.u32 %r10300, 2;
cvta.to.global.u64 %rd71, %rd1422;
cvta.to.global.u64 %rd72, %rd1417;
$L__BB1_1636:
ld.shared.u8 %rs422, [_ZZ32 j2k_htj2k_encode_codeblocksE13cleanup_e_val];
mov.u16 %rs987, 0;
st.shared.u8 [_ZZ32 j2k_htj2k_encode_codeblocksE13cleanup_e_val], %rs987;
ld.shared.u8 %rs423, [_ZZ32 j2k_htj2k_encode_codeblocksE14cleanup_cx_val];
st.shared.u8 [_ZZ32 j2k_htj2k_encode_codeblocksE14cleanup_cx_val], %rs987;
@%p10 bra $L__BB1_1879;
mov.u32 %r7713, 0;
ld.shared.u8 %rs988, [_ZZ32 j2k_htj2k_encode_codeblocksE13cleanup_e_val+1];
ld.shared.u8 %rs989, [_ZZ32 j2k_htj2k_encode_codeblocksE14cleanup_cx_val+1];
max.u16 %rs991, %rs422, %rs988;
cvt.u32.u16 %r7714, %rs991;
add.s32 %r10335, %r7714, -1;
add.s32 %r3333, %r10300, 1;
mul.lo.s32 %r10333, %r10300, %r1;
mul.wide.u16 %r7715, %rs989, 4;
cvt.u32.u16 %r7716, %rs423;
and.b32 %r7717, %r7716, 255;
add.s32 %r10332, %r7715, %r7717;
mov.u32 %r10316, %r7713;
mov.u32 %r10334, %r7713;
mov.u32 %r10336, %r7713;
bra.uni $L__BB1_1638;
$L__BB1_1709:
setp.gt.u32 %p2145, %r9816, 191;
mov.u32 %r10418, 1;
mov.u32 %r9825, 0;
@%p2145 bra $L__BB1_1711;
and.b16 %rs1018, %rs1253, 255;
st.global.u8 [%rd73], %rs1253;
add.s32 %r9816, %r9816, 1;
setp.eq.s16 %p2146, %rs1018, 255;
selp.b32 %r9825, 7, 8, %p2146;
mov.u16 %rs1253, 0;
mov.u32 %r10418, %r10033;
bra.uni $L__BB1_1711;
$L__BB1_1815:
setp.gt.u32 %p2264, %r9816, 191;
mov.u32 %r10567, 1;
mov.u32 %r9825, 0;
@%p2264 bra $L__BB1_1817;
and.b16 %rs1055, %rs1253, 255;
st.global.u8 [%rd74], %rs1253;
add.s32 %r9816, %r9816, 1;
setp.eq.s16 %p2265, %rs1055, 255;
selp.b32 %r9825, 7, 8, %p2265;
mov.u16 %rs1253, 0;
mov.u32 %r10567, %r10033;
bra.uni $L__BB1_1817;
$L__BB1_1638:
cvt.u64.u32 %rd1266, %r10333;
add.s64 %rd1267, %rd1266, %rd5;
shl.b64 %rd1268, %rd1267, 2;
add.s64 %rd1269, %rd3, %rd1268;
ld.global.u32 %r3357, [%rd1269];
setp.eq.s32 %p2067, %r3357, 0;
mov.u32 %r10337, %r7713;
@%p2067 bra $L__BB1_1640;
and.b32 %r7719, %r3357, -2147483648;
abs.s32 %r7720, %r3357;
shl.b32 %r7721, %r7720, %r2355;
or.b32 %r10337, %r7721, %r7719;
$L__BB1_1640:
shl.b32 %r7725, %r10337, 1;
shr.u32 %r7726, %r7725, %r2355;
and.b32 %r3360, %r7726, -2;
setp.eq.s32 %p2068, %r3360, 0;
mov.u32 %r10341, 0;
mov.u32 %r10338, %r10341;
mov.u32 %r10339, %r10341;
mov.u32 %r10345, %r10341;
@%p2068 bra $L__BB1_1642;
add.s32 %r7728, %r3360, -1;
clz.b32 %r7729, %r7728;
mov.u32 %r7730, 32;
sub.s32 %r10338, %r7730, %r7729;
shr.u32 %r7731, %r10337, 31;
add.s32 %r7732, %r7731, %r3360;
add.s32 %r10339, %r7732, -2;
mov.u32 %r10345, 1;
$L__BB1_1642:
setp.ge.u32 %p2069, %r3333, %r6;
@%p2069 bra $L__BB1_1645;
add.s32 %r7735, %r10333, %r1;
cvt.u64.u32 %rd1270, %r7735;
add.s64 %rd1271, %rd1270, %rd5;
shl.b64 %rd1272, %rd1271, 2;
add.s64 %rd1273, %rd3, %rd1272;
ld.global.u32 %r3366, [%rd1273];
setp.eq.s32 %p2070, %r3366, 0;
@%p2070 bra $L__BB1_1645;
and.b32 %r7736, %r3366, -2147483648;
abs.s32 %r7737, %r3366;
shl.b32 %r7738, %r7737, %r2355;
or.b32 %r10341, %r7738, %r7736;
$L__BB1_1645:
shl.b32 %r7741, %r10341, 1;
shr.u32 %r7742, %r7741, %r2355;
and.b32 %r3369, %r7742, -2;
setp.eq.s32 %p2071, %r3369, 0;
mov.u32 %r10356, 0;
mov.u32 %r10342, %r10356;
mov.u32 %r10343, %r10356;
mov.u32 %r10361, %r10338;
@%p2071 bra $L__BB1_1647;
or.b32 %r10345, %r10345, 2;
add.s32 %r7743, %r3369, -1;
clz.b32 %r7744, %r7743;
mov.u32 %r7745, 32;
sub.s32 %r10342, %r7745, %r7744;
max.s32 %r10361, %r10338, %r10342;
shr.u32 %r7746, %r10341, 31;
add.s32 %r7747, %r7746, %r3369;
add.s32 %r10343, %r7747, -2;
$L__BB1_1647:
add.s32 %r10638, %r10333, 1;
add.s32 %r7752, %r10316, 1;
setp.ge.u32 %p2072, %r7752, %r5;
mov.u32 %r10357, %r10356;
mov.u32 %r10358, %r10356;
mov.u32 %r10359, %r10356;
@%p2072 bra $L__BB1_1658;
cvt.u64.u32 %rd1274, %r10638;
add.s64 %rd1275, %rd1274, %rd5;
shl.b64 %rd1276, %rd1275, 2;
add.s64 %rd1277, %rd3, %rd1276;
ld.global.u32 %r3379, [%rd1277];
setp.eq.s32 %p2073, %r3379, 0;
mov.u32 %r10357, 0;
mov.u32 %r10346, %r10357;
@%p2073 bra $L__BB1_1650;
and.b32 %r7754, %r3379, -2147483648;
abs.s32 %r7755, %r3379;
shl.b32 %r7756, %r7755, %r2355;
or.b32 %r10346, %r7756, %r7754;
$L__BB1_1650:
shl.b32 %r7759, %r10346, 1;
shr.u32 %r7760, %r7759, %r2355;
and.b32 %r3382, %r7760, -2;
setp.eq.s32 %p2074, %r3382, 0;
mov.u32 %r10359, %r10357;
@%p2074 bra $L__BB1_1652;
or.b32 %r10345, %r10345, 4;
add.s32 %r7761, %r3382, -1;
clz.b32 %r7762, %r7761;
mov.u32 %r7763, 32;
sub.s32 %r10357, %r7763, %r7762;
max.s32 %r10361, %r10361, %r10357;
shr.u32 %r7764, %r10346, 31;
add.s32 %r7765, %r7764, %r3382;
add.s32 %r10359, %r7765, -2;
$L__BB1_1652:
mov.u32 %r10356, 0;
mov.u32 %r10351, %r10356;
@%p2069 bra $L__BB1_1655;
add.s32 %r7768, %r10638, %r1;
cvt.u64.u32 %rd1278, %r7768;
add.s64 %rd1279, %rd1278, %rd5;
shl.b64 %rd1280, %rd1279, 2;
add.s64 %rd1281, %rd3, %rd1280;
ld.global.u32 %r3391, [%rd1281];
setp.eq.s32 %p2076, %r3391, 0;
@%p2076 bra $L__BB1_1655;
and.b32 %r7769, %r3391, -2147483648;
abs.s32 %r7770, %r3391;
shl.b32 %r7771, %r7770, %r2355;
or.b32 %r10351, %r7771, %r7769;
$L__BB1_1655:
shl.b32 %r7774, %r10351, 1;
shr.u32 %r7775, %r7774, %r2355;
and.b32 %r3394, %r7775, -2;
setp.eq.s32 %p2077, %r3394, 0;
mov.u32 %r10358, %r10356;
@%p2077 bra $L__BB1_1657;
or.b32 %r10345, %r10345, 8;
add.s32 %r7776, %r3394, -1;
clz.b32 %r7777, %r7776;
mov.u32 %r7778, 32;
sub.s32 %r10356, %r7778, %r7777;
max.s32 %r10361, %r10361, %r10356;
shr.u32 %r7779, %r10351, 31;
add.s32 %r7780, %r7779, %r3394;
add.s32 %r10358, %r7780, -2;
$L__BB1_1657:
add.s32 %r10638, %r10333, 2;
$L__BB1_1658:
add.s32 %r7782, %r10345, -1;
and.b32 %r7783, %r7782, %r10345;
setp.ne.s32 %p2078, %r7783, 0;
mov.u32 %r10363, 0;
setp.gt.s32 %p2079, %r10335, 1;
and.pred %p2080, %p2079, %p2078;
selp.b32 %r7784, %r10335, 1, %p2080;
max.s32 %r3411, %r7784, %r10361;
sub.s32 %r3412, %r3411, %r7784;
setp.lt.s32 %p2081, %r3412, 1;
@%p2081 bra $L__BB1_1660;
setp.eq.s32 %p2082, %r10338, %r10361;
selp.u32 %r7785, 1, 0, %p2082;
setp.eq.s32 %p2083, %r10342, %r10361;
selp.u32 %r7786, -1, 0, %p2083;
bfi.b32 %r7787, %r7786, %r7785, 1, 1;
setp.eq.s32 %p2084, %r10357, %r10361;
selp.u16 %rs992, 1, 0, %p2084;
mul.wide.u16 %r7788, %rs992, 4;
or.b32 %r7789, %r7787, %r7788;
setp.eq.s32 %p2085, %r10356, %r10361;
selp.u16 %rs993, 1, 0, %p2085;
mul.wide.u16 %r7790, %rs993, 8;
or.b32 %r10363, %r7789, %r7790;
$L__BB1_1660:
shl.b32 %r7791, %r10345, 4;
shl.b32 %r7792, %r10332, 8;
or.b32 %r7793, %r7791, %r7792;
or.b32 %r7794, %r7793, %r10363;
mul.wide.u32 %rd1282, %r7794, 2;
add.s64 %rd1283, %rd72, %rd1282;
ld.global.u16 %rs426, [%rd1283];
shr.u16 %rs994, %rs426, 4;
and.b16 %rs427, %rs994, 7;
setp.eq.s16 %p2086, %rs427, 0;
mov.u32 %r10375, %r10267;
@%p2086 bra $L__BB1_1667;
cvt.u32.u16 %r10364, %rs427;
shr.u16 %rs995, %rs426, 8;
cvt.u32.u16 %r10365, %rs995;
$L__BB1_1662:
mov.u32 %r3417, %r10364;
setp.gt.u32 %p2087, %r10264, 2879;
mov.u32 %r10375, 1;
@%p2087 bra $L__BB1_1667;
mov.u32 %r7796, 8;
sub.s32 %r7797, %r7796, %r10266;
sub.s32 %r7798, %r7797, %r10265;
min.u32 %r7799, %r7798, %r3417;
setp.eq.s32 %p2088, %r7799, 32;
mov.u32 %r7800, -1;
shl.b32 %r7801, %r7800, %r7799;
not.b32 %r7802, %r7801;
selp.b32 %r7803, -1, %r7802, %p2088;
and.b32 %r7804, %r7803, %r10365;
shl.b32 %r7805, %r7804, %r10265;
cvt.u16.u32 %rs996, %r7805;
or.b16 %rs1322, %rs1322, %rs996;
add.s32 %r10265, %r7799, %r10265;
sub.s32 %r10364, %r3417, %r7799;
shr.u32 %r10365, %r10365, %r7799;
setp.gt.u32 %p2089, %r7798, %r3417;
@%p2089 bra $L__BB1_1666;
setp.ne.s32 %p2090, %r10266, 0;
mov.u32 %r10266, 0;
and.b16 %rs997, %rs1322, 255;
setp.ne.s16 %p2091, %rs997, 127;
and.pred %p2092, %p2090, %p2091;
@%p2092 bra $L__BB1_1666;
mov.u32 %r7808, 20548;
sub.s32 %r7809, %r7808, %r10264;
cvt.u64.u32 %rd1284, %r7809;
add.s64 %rd1285, %rd1284, %rd4;
add.s64 %rd1286, %rd1, %rd1285;
st.global.u8 [%rd1286], %rs1322;
add.s32 %r10264, %r10264, 1;
setp.gt.u16 %p2093, %rs997, 143;
selp.u32 %r10266, 1, 0, %p2093;
mov.u16 %rs1322, 0;
mov.u32 %r10265, 0;
$L__BB1_1666:
setp.ne.s32 %p2094, %r10364, 0;
mov.u32 %r10375, %r10267;
@%p2094 bra $L__BB1_1662;
$L__BB1_1667:
setp.ne.s32 %p2095, %r10332, 0;
@%p2095 bra $L__BB1_1715;
setp.eq.s32 %p2096, %r10345, 0;
add.s32 %r7810, %r9816, 17477;
cvt.u64.u32 %rd1287, %r7810;
add.s64 %rd1288, %rd1287, %rd4;
add.s64 %rd73, %rd1, %rd1288;
@%p2096 bra $L__BB1_1707;
shl.b16 %rs1253, %rs1253, 1;
add.s32 %r9825, %r9825, -1;
setp.ne.s32 %p2097, %r9825, 0;
mov.u32 %r10411, %r10033;
@%p2097 bra $L__BB1_1672;
setp.gt.u32 %p2098, %r9816, 191;
mov.u32 %r10411, 1;
mov.u32 %r9825, 0;
@%p2098 bra $L__BB1_1672;
st.global.u8 [%rd73], %rs1253;
add.s32 %r9816, %r9816, 1;
mov.u16 %rs1253, 0;
mov.u32 %r9825, 8;
mov.u32 %r10411, %r10033;
$L__BB1_1672:
setp.lt.u32 %p2099, %r10031, 3;
mov.u32 %r10379, 0;
@%p2099 bra $L__BB1_1675;
setp.lt.u32 %p2100, %r10031, 6;
mov.u32 %r10379, 1;
@%p2100 bra $L__BB1_1675;
setp.lt.u32 %p2101, %r10031, 9;
setp.eq.s32 %p2102, %r10031, 11;
selp.b32 %r7816, 4, 5, %p2102;
setp.lt.u32 %p2103, %r10031, 11;
selp.b32 %r7817, 3, %r7816, %p2103;
selp.b32 %r10379, 2, %r7817, %p2101;
$L__BB1_1675:
setp.eq.s32 %p2104, %r10379, 0;
@%p2104 bra $L__BB1_1703;
add.s32 %r3441, %r10379, -1;
and.b32 %r3442, %r10379, 3;
setp.eq.s32 %p2105, %r3442, 0;
mov.u32 %r10389, %r10379;
mov.u32 %r10390, %r10411;
@%p2105 bra $L__BB1_1688;
mov.u32 %r7819, 1;
shl.b32 %r7820, %r7819, %r3441;
and.b32 %r7821, %r7820, %r10030;
setp.ne.s32 %p2106, %r7821, 0;
selp.u32 %r7822, 1, 0, %p2106;
cvt.u32.u16 %r7823, %rs1253;
bfi.b32 %r7824, %r7823, %r7822, 1, 8;
cvt.u16.u32 %rs1253, %r7824;
add.s32 %r9825, %r9825, -1;
setp.ne.s32 %p2107, %r9825, 0;
mov.u32 %r10390, %r10411;
@%p2107 bra $L__BB1_1680;
setp.gt.u32 %p2108, %r9816, 191;
mov.u32 %r9825, 0;
mov.u32 %r10390, %r7819;
@%p2108 bra $L__BB1_1680;
add.s32 %r7828, %r9816, 17477;
cvt.u64.u32 %rd1289, %r7828;
add.s64 %rd1290, %rd1289, %rd4;
add.s64 %rd1291, %rd1, %rd1290;
st.global.u8 [%rd1291], %rs1253;
add.s32 %r9816, %r9816, 1;
mov.u16 %rs1253, 0;
mov.u32 %r9825, 8;
mov.u32 %r10390, %r10411;
$L__BB1_1680:
setp.eq.s32 %p2109, %r3442, 1;
mov.u32 %r10411, %r10390;
mov.u32 %r10389, %r3441;
@%p2109 bra $L__BB1_1688;
add.s32 %r10389, %r10379, -2;
mov.u32 %r7829, 1;
shl.b32 %r7830, %r7829, %r10389;
and.b32 %r7831, %r7830, %r10030;
setp.ne.s32 %p2110, %r7831, 0;
selp.u32 %r7832, 1, 0, %p2110;
cvt.u32.u16 %r7833, %rs1253;
bfi.b32 %r7834, %r7833, %r7832, 1, 8;
cvt.u16.u32 %rs1253, %r7834;
add.s32 %r9825, %r9825, -1;
setp.ne.s32 %p2111, %r9825, 0;
mov.u32 %r10385, %r10390;
@%p2111 bra $L__BB1_1684;
setp.gt.u32 %p2112, %r9816, 191;
mov.u32 %r9825, 0;
mov.u32 %r10385, %r7829;
@%p2112 bra $L__BB1_1684;
add.s32 %r7837, %r9816, 17477;
cvt.u64.u32 %rd1292, %r7837;
add.s64 %rd1293, %rd1292, %rd4;
add.s64 %rd1294, %rd1, %rd1293;
and.b16 %rs1004, %rs1253, 255;
st.global.u8 [%rd1294], %rs1253;
add.s32 %r9816, %r9816, 1;
setp.eq.s16 %p2113, %rs1004, 255;
selp.b32 %r9825, 7, 8, %p2113;
mov.u16 %rs1253, 0;
mov.u32 %r10385, %r10390;
$L__BB1_1684:
setp.eq.s32 %p2114, %r3442, 2;
mov.u32 %r10411, %r10385;
mov.u32 %r10390, %r10385;
@%p2114 bra $L__BB1_1688;
add.s32 %r10389, %r10379, -3;
mov.u32 %r7838, 1;
shl.b32 %r7839, %r7838, %r10389;
and.b32 %r7840, %r7839, %r10030;
setp.ne.s32 %p2115, %r7840, 0;
selp.u32 %r7841, 1, 0, %p2115;
cvt.u32.u16 %r7842, %rs1253;
bfi.b32 %r7843, %r7842, %r7841, 1, 8;
cvt.u16.u32 %rs1253, %r7843;
add.s32 %r9825, %r9825, -1;
setp.ne.s32 %p2116, %r9825, 0;
mov.u32 %r10411, %r10385;
mov.u32 %r10390, %r10385;
@%p2116 bra $L__BB1_1688;
setp.gt.u32 %p2117, %r9816, 191;
mov.u32 %r9825, 0;
mov.u32 %r10411, %r7838;
mov.u32 %r10390, %r7838;
@%p2117 bra $L__BB1_1688;
add.s32 %r7848, %r9816, 17477;
cvt.u64.u32 %rd1295, %r7848;
add.s64 %rd1296, %rd1295, %rd4;
add.s64 %rd1297, %rd1, %rd1296;
and.b16 %rs1007, %rs1253, 255;
st.global.u8 [%rd1297], %rs1253;
add.s32 %r9816, %r9816, 1;
setp.eq.s16 %p2118, %rs1007, 255;
selp.b32 %r9825, 7, 8, %p2118;
mov.u16 %rs1253, 0;
mov.u32 %r10411, %r10385;
mov.u32 %r10390, %r10385;
$L__BB1_1688:
setp.lt.u32 %p2119, %r3441, 3;
@%p2119 bra $L__BB1_1703;
mov.u32 %r10411, %r10390;
$L__BB1_1690:
add.s32 %r7849, %r10389, -1;
mov.u32 %r7850, 1;
shl.b32 %r7851, %r7850, %r7849;
and.b32 %r7852, %r7851, %r10030;
setp.ne.s32 %p2120, %r7852, 0;
selp.u32 %r7853, 1, 0, %p2120;
cvt.u32.u16 %r7854, %rs1253;
bfi.b32 %r10399, %r7854, %r7853, 1, 8;
add.s32 %r10398, %r9825, -1;
setp.ne.s32 %p2121, %r10398, 0;
mov.u32 %r10400, %r10411;
@%p2121 bra $L__BB1_1693;
setp.gt.u32 %p2122, %r9816, 191;
mov.u32 %r10398, 0;
mov.u32 %r10400, %r7850;
@%p2122 bra $L__BB1_1693;
cvt.u16.u32 %rs1008, %r10399;
and.b16 %rs1009, %rs1008, 255;
add.s32 %r7858, %r9816, 17477;
cvt.u64.u32 %rd1298, %r7858;
add.s64 %rd1299, %rd1298, %rd4;
add.s64 %rd1300, %rd1, %rd1299;
st.global.u8 [%rd1300], %rs1008;
add.s32 %r9816, %r9816, 1;
setp.eq.s16 %p2123, %rs1009, 255;
selp.b32 %r10398, 7, 8, %p2123;
mov.u32 %r10399, 0;
mov.u32 %r10400, %r10411;
$L__BB1_1693:
add.s32 %r7859, %r10389, -2;
shl.b32 %r7861, %r7850, %r7859;
and.b32 %r7862, %r7861, %r10030;
setp.ne.s32 %p2124, %r7862, 0;
and.b32 %r7863, %r10399, 127;
selp.u32 %r7864, 1, 0, %p2124;
bfi.b32 %r10403, %r7863, %r7864, 1, 7;
add.s32 %r10402, %r10398, -1;
setp.ne.s32 %p2125, %r10402, 0;
mov.u32 %r10404, %r10400;
@%p2125 bra $L__BB1_1696;
setp.gt.u32 %p2126, %r9816, 191;
mov.u32 %r10404, 1;
mov.u32 %r10402, 0;
@%p2126 bra $L__BB1_1696;
cvt.u16.u32 %rs1010, %r10403;
and.b16 %rs1011, %rs1010, 255;
add.s32 %r7868, %r9816, 17477;
cvt.u64.u32 %rd1301, %r7868;
add.s64 %rd1302, %rd1301, %rd4;
add.s64 %rd1303, %rd1, %rd1302;
st.global.u8 [%rd1303], %rs1010;
add.s32 %r9816, %r9816, 1;
setp.eq.s16 %p2127, %rs1011, 255;
selp.b32 %r10402, 7, 8, %p2127;
mov.u32 %r10403, 0;
mov.u32 %r10404, %r10400;
$L__BB1_1696:
add.s32 %r7869, %r10389, -3;
mov.u32 %r7870, 1;
shl.b32 %r7871, %r7870, %r7869;
and.b32 %r7872, %r7871, %r10030;
setp.ne.s32 %p2128, %r7872, 0;
and.b32 %r7873, %r10403, 127;
selp.u32 %r7874, 1, 0, %p2128;
bfi.b32 %r10407, %r7873, %r7874, 1, 7;
add.s32 %r10406, %r10402, -1;
setp.ne.s32 %p2129, %r10406, 0;
mov.u32 %r10408, %r10404;
@%p2129 bra $L__BB1_1699;
setp.gt.u32 %p2130, %r9816, 191;
mov.u32 %r10406, 0;
mov.u32 %r10408, %r7870;
@%p2130 bra $L__BB1_1699;
cvt.u16.u32 %rs1012, %r10407;
and.b16 %rs1013, %rs1012, 255;
add.s32 %r7878, %r9816, 17477;
cvt.u64.u32 %rd1304, %r7878;
add.s64 %rd1305, %rd1304, %rd4;
add.s64 %rd1306, %rd1, %rd1305;
st.global.u8 [%rd1306], %rs1012;
add.s32 %r9816, %r9816, 1;
setp.eq.s16 %p2131, %rs1013, 255;
selp.b32 %r10406, 7, 8, %p2131;
mov.u32 %r10407, 0;
mov.u32 %r10408, %r10404;
$L__BB1_1699:
add.s32 %r10389, %r10389, -4;
shl.b32 %r7880, %r7870, %r10389;
and.b32 %r7881, %r7880, %r10030;
setp.ne.s32 %p2132, %r7881, 0;
and.b32 %r7882, %r10407, 127;
selp.u32 %r7883, 1, 0, %p2132;
bfi.b32 %r7884, %r7882, %r7883, 1, 15;
cvt.u16.u32 %rs1253, %r7884;
add.s32 %r9825, %r10406, -1;
setp.ne.s32 %p2133, %r9825, 0;
mov.u32 %r10411, %r10408;
@%p2133 bra $L__BB1_1702;
setp.gt.u32 %p2134, %r9816, 191;
mov.u32 %r10411, 1;
mov.u32 %r9825, 0;
@%p2134 bra $L__BB1_1702;
add.s32 %r7887, %r9816, 17477;
cvt.u64.u32 %rd1307, %r7887;
add.s64 %rd1308, %rd1307, %rd4;
add.s64 %rd1309, %rd1, %rd1308;
and.b16 %rs1015, %rs1253, 255;
st.global.u8 [%rd1309], %rs1253;
add.s32 %r9816, %r9816, 1;
setp.eq.s16 %p2135, %rs1015, 255;
selp.b32 %r9825, 7, 8, %p2135;
mov.u16 %rs1253, 0;
mov.u32 %r10411, %r10408;
$L__BB1_1702:
setp.ne.s32 %p2136, %r10389, 0;
@%p2136 bra $L__BB1_1690;
$L__BB1_1703:
add.s32 %r7889, %r10031, -1;
setp.eq.s32 %p2137, %r10031, 0;
mov.u32 %r10030, 0;
selp.b32 %r10031, 0, %r7889, %p2137;
setp.lt.u32 %p2138, %r10031, 3;
mov.u32 %r10415, %r10030;
@%p2138 bra $L__BB1_1706;
setp.lt.u32 %p2139, %r10031, 6;
mov.u32 %r10415, 1;
@%p2139 bra $L__BB1_1706;
setp.lt.u32 %p2140, %r10031, 9;
setp.eq.s32 %p2141, %r10031, 11;
selp.b32 %r7891, 4, 5, %p2141;
setp.lt.u32 %p2142, %r10031, 11;
selp.b32 %r7892, 3, %r7891, %p2142;
selp.b32 %r10415, 2, %r7892, %p2140;
$L__BB1_1706:
mov.u32 %r7894, 1;
shl.b32 %r10032, %r7894, %r10415;
mov.u32 %r10033, %r10411;
bra.uni $L__BB1_1715;
$L__BB1_1707:
add.s32 %r10030, %r10030, 1;
setp.lt.u32 %p2143, %r10030, %r10032;
@%p2143 bra $L__BB1_1715;
shl.b16 %rs1016, %rs1253, 1;
or.b16 %rs1253, %rs1016, 1;
add.s32 %r9825, %r9825, -1;
setp.ne.s32 %p2144, %r9825, 0;
mov.u32 %r10418, %r10033;
@%p2144 bra $L__BB1_1711;
bra.uni $L__BB1_1709;
$L__BB1_1711:
add.s32 %r7898, %r10031, 1;
min.u32 %r10031, %r7898, 12;
setp.lt.u32 %p2147, %r10031, 3;
mov.u32 %r10030, 0;
mov.u32 %r10419, %r10030;
@%p2147 bra $L__BB1_1714;
setp.lt.u32 %p2148, %r10031, 6;
mov.u32 %r10419, 1;
@%p2148 bra $L__BB1_1714;
setp.lt.u32 %p2149, %r10031, 9;
setp.eq.s32 %p2150, %r10031, 11;
selp.b32 %r7900, 4, 5, %p2150;
setp.lt.u32 %p2151, %r10031, 11;
selp.b32 %r7901, 3, %r7900, %p2151;
selp.b32 %r10419, 2, %r7901, %p2149;
$L__BB1_1714:
mov.u32 %r7903, 1;
shl.b32 %r10032, %r7903, %r10419;
mov.u32 %r10033, %r10418;
$L__BB1_1715:
and.b16 %rs1019, %rs426, 15;
cvt.u32.u16 %r3525, %rs1019;
and.b32 %r7904, %r10345, 1;
setp.eq.b32 %p2152, %r7904, 1;
mov.pred %p2153, 0;
xor.pred %p2154, %p2152, %p2153;
not.pred %p2155, %p2154;
mov.u32 %r10440, %r10485;
@%p2155 bra $L__BB1_1722;
and.b32 %r7905, %r3525, 1;
sub.s32 %r10426, %r3411, %r7905;
setp.eq.s32 %p2156, %r10426, 0;
mov.u32 %r10440, %r10485;
@%p2156 bra $L__BB1_1722;
mov.u32 %r7906, -1;
shl.b32 %r7907, %r7906, %r10426;
not.b32 %r7908, %r7907;
and.b32 %r10427, %r10339, %r7908;
$L__BB1_1718:
setp.gt.u32 %p2157, %r10451, 17476;
mov.u32 %r10440, 1;
@%p2157 bra $L__BB1_1722;
sub.s32 %r7910, %r10452, %r10453;
min.u32 %r7911, %r7910, %r10426;
setp.eq.s32 %p2158, %r7911, 32;
mov.u32 %r7912, -1;
shl.b32 %r7913, %r7912, %r7911;
not.b32 %r7914, %r7913;
selp.b32 %r7915, -1, %r7914, %p2158;
and.b32 %r7916, %r7915, %r10427;
shl.b32 %r7917, %r7916, %r10453;
or.b32 %r10454, %r7917, %r10454;
add.s32 %r10453, %r7911, %r10453;
shr.u32 %r10427, %r10427, %r7911;
sub.s32 %r10426, %r10426, %r7911;
setp.lt.u32 %p2159, %r10453, %r10452;
@%p2159 bra $L__BB1_1721;
cvt.u64.u32 %rd1310, %r10451;
add.s64 %rd1311, %rd1310, %rd4;
add.s64 %rd1312, %rd1, %rd1311;
st.global.u8 [%rd1312], %r10454;
add.s32 %r10451, %r10451, 1;
setp.eq.s32 %p2160, %r10454, 255;
selp.b32 %r10452, 7, 8, %p2160;
mov.u32 %r10453, 0;
mov.u32 %r10454, %r10453;
$L__BB1_1721:
setp.ne.s32 %p2161, %r10426, 0;
mov.u32 %r10440, %r10485;
@%p2161 bra $L__BB1_1718;
$L__BB1_1722:
and.b32 %r3549, %r10345, 2;
setp.eq.s32 %p2162, %r3549, 0;
mov.u32 %r10455, %r10440;
@%p2162 bra $L__BB1_1729;
shr.u32 %r7920, %r3525, 1;
and.b32 %r7921, %r7920, 1;
sub.s32 %r10441, %r3411, %r7921;
setp.eq.s32 %p2163, %r10441, 0;
mov.u32 %r10455, %r10440;
@%p2163 bra $L__BB1_1729;
mov.u32 %r7922, -1;
shl.b32 %r7923, %r7922, %r10441;
not.b32 %r7924, %r7923;
and.b32 %r10442, %r10343, %r7924;
$L__BB1_1725:
setp.gt.u32 %p2164, %r10451, 17476;
mov.u32 %r10455, 1;
@%p2164 bra $L__BB1_1729;
sub.s32 %r7926, %r10452, %r10453;
min.u32 %r7927, %r7926, %r10441;
setp.eq.s32 %p2165, %r7927, 32;
mov.u32 %r7928, -1;
shl.b32 %r7929, %r7928, %r7927;
not.b32 %r7930, %r7929;
selp.b32 %r7931, -1, %r7930, %p2165;
and.b32 %r7932, %r7931, %r10442;
shl.b32 %r7933, %r7932, %r10453;
or.b32 %r10454, %r7933, %r10454;
add.s32 %r10453, %r7927, %r10453;
shr.u32 %r10442, %r10442, %r7927;
sub.s32 %r10441, %r10441, %r7927;
setp.lt.u32 %p2166, %r10453, %r10452;
@%p2166 bra $L__BB1_1728;
cvt.u64.u32 %rd1313, %r10451;
add.s64 %rd1314, %rd1313, %rd4;
add.s64 %rd1315, %rd1, %rd1314;
st.global.u8 [%rd1315], %r10454;
add.s32 %r10451, %r10451, 1;
setp.eq.s32 %p2167, %r10454, 255;
selp.b32 %r10452, 7, 8, %p2167;
mov.u32 %r10453, 0;
mov.u32 %r10454, %r10453;
$L__BB1_1728:
setp.ne.s32 %p2168, %r10441, 0;
mov.u32 %r10455, %r10440;
@%p2168 bra $L__BB1_1725;
$L__BB1_1729:
and.b32 %r3573, %r10345, 4;
setp.eq.s32 %p2169, %r3573, 0;
mov.u32 %r10470, %r10455;
@%p2169 bra $L__BB1_1736;
shr.u32 %r7936, %r3525, 2;
and.b32 %r7937, %r7936, 1;
sub.s32 %r10456, %r3411, %r7937;
setp.eq.s32 %p2170, %r10456, 0;
mov.u32 %r10470, %r10455;
@%p2170 bra $L__BB1_1736;
mov.u32 %r7938, -1;
shl.b32 %r7939, %r7938, %r10456;
not.b32 %r7940, %r7939;
and.b32 %r10457, %r10359, %r7940;
$L__BB1_1732:
setp.gt.u32 %p2171, %r10451, 17476;
mov.u32 %r10470, 1;
@%p2171 bra $L__BB1_1736;
sub.s32 %r7942, %r10452, %r10453;
min.u32 %r7943, %r7942, %r10456;
setp.eq.s32 %p2172, %r7943, 32;
mov.u32 %r7944, -1;
shl.b32 %r7945, %r7944, %r7943;
not.b32 %r7946, %r7945;
selp.b32 %r7947, -1, %r7946, %p2172;
and.b32 %r7948, %r7947, %r10457;
shl.b32 %r7949, %r7948, %r10453;
or.b32 %r10454, %r7949, %r10454;
add.s32 %r10453, %r7943, %r10453;
shr.u32 %r10457, %r10457, %r7943;
sub.s32 %r10456, %r10456, %r7943;
setp.lt.u32 %p2173, %r10453, %r10452;
@%p2173 bra $L__BB1_1735;
cvt.u64.u32 %rd1316, %r10451;
add.s64 %rd1317, %rd1316, %rd4;
add.s64 %rd1318, %rd1, %rd1317;
st.global.u8 [%rd1318], %r10454;
add.s32 %r10451, %r10451, 1;
setp.eq.s32 %p2174, %r10454, 255;
selp.b32 %r10452, 7, 8, %p2174;
mov.u32 %r10453, 0;
mov.u32 %r10454, %r10453;
$L__BB1_1735:
setp.ne.s32 %p2175, %r10456, 0;
mov.u32 %r10470, %r10455;
@%p2175 bra $L__BB1_1732;
$L__BB1_1736:
and.b32 %r3597, %r10345, 8;
setp.eq.s32 %p2176, %r3597, 0;
mov.u32 %r10485, %r10470;
@%p2176 bra $L__BB1_1743;
shr.u32 %r7952, %r3525, 3;
sub.s32 %r10471, %r3411, %r7952;
setp.eq.s32 %p2177, %r10471, 0;
mov.u32 %r10485, %r10470;
@%p2177 bra $L__BB1_1743;
mov.u32 %r7953, -1;
shl.b32 %r7954, %r7953, %r10471;
not.b32 %r7955, %r7954;
and.b32 %r10472, %r10358, %r7955;
$L__BB1_1739:
setp.gt.u32 %p2178, %r10451, 17476;
mov.u32 %r10485, 1;
@%p2178 bra $L__BB1_1743;
sub.s32 %r7957, %r10452, %r10453;
min.u32 %r7958, %r7957, %r10471;
setp.eq.s32 %p2179, %r7958, 32;
mov.u32 %r7959, -1;
shl.b32 %r7960, %r7959, %r7958;
not.b32 %r7961, %r7960;
selp.b32 %r7962, -1, %r7961, %p2179;
and.b32 %r7963, %r7962, %r10472;
shl.b32 %r7964, %r7963, %r10453;
or.b32 %r10454, %r7964, %r10454;
add.s32 %r10453, %r7958, %r10453;
shr.u32 %r10472, %r10472, %r7958;
sub.s32 %r10471, %r10471, %r7958;
setp.lt.u32 %p2180, %r10453, %r10452;
@%p2180 bra $L__BB1_1742;
cvt.u64.u32 %rd1319, %r10451;
add.s64 %rd1320, %rd1319, %rd4;
add.s64 %rd1321, %rd1, %rd1320;
st.global.u8 [%rd1321], %r10454;
add.s32 %r10451, %r10451, 1;
setp.eq.s32 %p2181, %r10454, 255;
selp.b32 %r10452, 7, 8, %p2181;
mov.u32 %r10453, 0;
mov.u32 %r10454, %r10453;
$L__BB1_1742:
setp.ne.s32 %p2182, %r10471, 0;
mov.u32 %r10485, %r10470;
@%p2182 bra $L__BB1_1739;
$L__BB1_1743:
add.s32 %r3621, %r4095, %r10334;
ld.shared.u8 %rs1020, [%r3621];
mov.u32 %r10332, 0;
cvt.u32.u16 %r7970, %rs1020;
and.b32 %r7971, %r7970, 255;
and.b32 %r7972, %r10342, 255;
setp.lt.u32 %p2183, %r7972, %r7971;
cvt.u16.u32 %rs1021, %r10342;
selp.b16 %rs1022, %rs1020, %rs1021, %p2183;
st.shared.u8 [%r3621], %rs1022;
ld.shared.u8 %rs448, [%r3621+2];
ld.shared.u8 %rs1023, [%r3621+1];
setp.gt.u16 %p2184, %rs1023, %rs448;
add.s32 %r10637, %r10334, 1;
add.s32 %r7973, %r10334, 2;
selp.b32 %r7974, %r10637, %r7973, %p2184;
add.s32 %r7975, %r4095, %r7974;
ld.shared.u8 %rs449, [%r7975];
cvt.u32.u16 %r7976, %rs449;
and.b32 %r7977, %r7976, 255;
add.s32 %r10335, %r7977, -1;
cvt.u16.u32 %rs450, %r10356;
cvt.u16.u32 %rs1024, %r3549;
shr.u16 %rs1025, %rs1024, 1;
mov.u32 %r7978, _ZZ32 j2k_htj2k_encode_codeblocksE14cleanup_cx_val;
add.s32 %r3624, %r7978, %r10336;
st.shared.u8 [%r3621+1], %r10356;
ld.shared.u8 %rs1026, [%r3624];
or.b16 %rs1027, %rs1026, %rs1025;
st.shared.u8 [%r3624], %rs1027;
add.s32 %r10336, %r10336, 1;
ld.shared.u8 %rs451, [%r3624+1];
ld.shared.u8 %r3626, [%r3624+2];
shr.u32 %r3627, %r3597, 3;
st.shared.u8 [%r3624+1], %r3627;
add.s32 %r7979, %r10316, 2;
setp.ge.u32 %p2185, %r7979, %r5;
mov.u32 %r10655, %r10332;
@%p2185 bra $L__BB1_1850;
cvt.u64.u32 %rd1322, %r10638;
add.s64 %rd1323, %rd1322, %rd5;
shl.b64 %rd1324, %rd1323, 2;
add.s64 %rd1325, %rd3, %rd1324;
ld.global.u32 %r3628, [%rd1325];
setp.eq.s32 %p2186, %r3628, 0;
mov.u32 %r10487, 0;
mov.u32 %r10486, %r10487;
@%p2186 bra $L__BB1_1746;
and.b32 %r7981, %r3628, -2147483648;
abs.s32 %r7982, %r3628;
shl.b32 %r7983, %r7982, %r2355;
or.b32 %r10486, %r7983, %r7981;
$L__BB1_1746:
shl.b32 %r7987, %r10486, 1;
shr.u32 %r7988, %r7987, %r2355;
and.b32 %r3631, %r7988, -2;
setp.eq.s32 %p2187, %r3631, 0;
mov.u32 %r10488, %r10487;
mov.u32 %r10494, %r10487;
@%p2187 bra $L__BB1_1748;
add.s32 %r7990, %r3631, -1;
clz.b32 %r7991, %r7990;
mov.u32 %r7992, 32;
sub.s32 %r10487, %r7992, %r7991;
shr.u32 %r7993, %r10486, 31;
add.s32 %r7994, %r7993, %r3631;
add.s32 %r10488, %r7994, -2;
mov.u32 %r10494, 1;
$L__BB1_1748:
mov.u32 %r10491, 0;
mov.u32 %r10490, %r10491;
@%p2069 bra $L__BB1_1751;
add.s32 %r7997, %r10638, %r1;
cvt.u64.u32 %rd1326, %r7997;
add.s64 %rd1327, %rd1326, %rd5;
shl.b64 %rd1328, %rd1327, 2;
add.s64 %rd1329, %rd3, %rd1328;
ld.global.u32 %r3637, [%rd1329];
setp.eq.s32 %p2189, %r3637, 0;
@%p2189 bra $L__BB1_1751;
and.b32 %r7998, %r3637, -2147483648;
abs.s32 %r7999, %r3637;
shl.b32 %r8000, %r7999, %r2355;
or.b32 %r10490, %r8000, %r7998;
$L__BB1_1751:
shl.b32 %r8003, %r10490, 1;
shr.u32 %r8004, %r8003, %r2355;
and.b32 %r3640, %r8004, -2;
setp.eq.s32 %p2190, %r3640, 0;
mov.u32 %r10492, %r10491;
mov.u32 %r10510, %r10487;
@%p2190 bra $L__BB1_1753;
or.b32 %r10494, %r10494, 2;
add.s32 %r8005, %r3640, -1;
clz.b32 %r8006, %r8005;
mov.u32 %r8007, 32;
sub.s32 %r10491, %r8007, %r8006;
max.s32 %r10510, %r10487, %r10491;
shr.u32 %r8008, %r10490, 31;
add.s32 %r8009, %r8008, %r3640;
add.s32 %r10492, %r8009, -2;
$L__BB1_1753:
add.s32 %r10509, %r10638, 1;
add.s32 %r8014, %r10316, 3;
setp.ge.u32 %p2191, %r8014, %r5;
mov.u32 %r10512, 0;
mov.u32 %r10505, %r10512;
mov.u32 %r10506, %r10512;
mov.u32 %r10507, %r10512;
mov.u32 %r10508, %r10512;
@%p2191 bra $L__BB1_1764;
cvt.u64.u32 %rd1330, %r10509;
add.s64 %rd1331, %rd1330, %rd5;
shl.b64 %rd1332, %rd1331, 2;
add.s64 %rd1333, %rd3, %rd1332;
ld.global.u32 %r3650, [%rd1333];
setp.eq.s32 %p2192, %r3650, 0;
mov.u32 %r10506, 0;
mov.u32 %r10495, %r10506;
@%p2192 bra $L__BB1_1756;
and.b32 %r8016, %r3650, -2147483648;
abs.s32 %r8017, %r3650;
shl.b32 %r8018, %r8017, %r2355;
or.b32 %r10495, %r8018, %r8016;
$L__BB1_1756:
shl.b32 %r8021, %r10495, 1;
shr.u32 %r8022, %r8021, %r2355;
and.b32 %r3653, %r8022, -2;
setp.eq.s32 %p2193, %r3653, 0;
mov.u32 %r10508, %r10506;
@%p2193 bra $L__BB1_1758;
or.b32 %r10494, %r10494, 4;
add.s32 %r8023, %r3653, -1;
clz.b32 %r8024, %r8023;
mov.u32 %r8025, 32;
sub.s32 %r10506, %r8025, %r8024;
max.s32 %r10510, %r10510, %r10506;
shr.u32 %r8026, %r10495, 31;
add.s32 %r8027, %r8026, %r3653;
add.s32 %r10508, %r8027, -2;
$L__BB1_1758:
mov.u32 %r10505, 0;
mov.u32 %r10500, %r10505;
@%p2069 bra $L__BB1_1761;
add.s32 %r8030, %r10509, %r1;
cvt.u64.u32 %rd1334, %r8030;
add.s64 %rd1335, %rd1334, %rd5;
shl.b64 %rd1336, %rd1335, 2;
add.s64 %rd1337, %rd3, %rd1336;
ld.global.u32 %r3662, [%rd1337];
setp.eq.s32 %p2195, %r3662, 0;
@%p2195 bra $L__BB1_1761;
and.b32 %r8031, %r3662, -2147483648;
abs.s32 %r8032, %r3662;
shl.b32 %r8033, %r8032, %r2355;
or.b32 %r10500, %r8033, %r8031;
$L__BB1_1761:
shl.b32 %r8036, %r10500, 1;
shr.u32 %r8037, %r8036, %r2355;
and.b32 %r3665, %r8037, -2;
setp.eq.s32 %p2196, %r3665, 0;
mov.u32 %r10507, %r10505;
@%p2196 bra $L__BB1_1763;
or.b32 %r10494, %r10494, 8;
add.s32 %r8038, %r3665, -1;
clz.b32 %r8039, %r8038;
mov.u32 %r8040, 32;
sub.s32 %r10505, %r8040, %r8039;
max.s32 %r10510, %r10510, %r10505;
shr.u32 %r8041, %r10500, 31;
add.s32 %r8042, %r8041, %r3665;
add.s32 %r10507, %r8042, -2;
$L__BB1_1763:
add.s32 %r10509, %r10638, 2;
$L__BB1_1764:
mov.u32 %r10638, %r10509;
shr.u32 %r8044, %r3597, 2;
shr.u32 %r8045, %r3573, 1;
or.b32 %r8046, %r8044, %r8045;
cvt.u32.u16 %r8047, %rs451;
and.b32 %r8048, %r8047, 255;
shl.b32 %r8049, %r3626, 2;
add.s32 %r8050, %r8049, %r8048;
or.b32 %r3682, %r8046, %r8050;
add.s32 %r8051, %r10494, -1;
and.b32 %r8052, %r8051, %r10494;
setp.ne.s32 %p2197, %r8052, 0;
setp.gt.u16 %p2198, %rs449, 2;
and.pred %p2199, %p2198, %p2197;
selp.b32 %r8053, %r10335, 1, %p2199;
max.s32 %r3683, %r8053, %r10510;
sub.s32 %r10655, %r3683, %r8053;
setp.lt.s32 %p2200, %r10655, 1;
@%p2200 bra $L__BB1_1766;
setp.eq.s32 %p2201, %r10487, %r10510;
selp.u32 %r8054, 1, 0, %p2201;
setp.eq.s32 %p2202, %r10491, %r10510;
selp.u32 %r8055, -1, 0, %p2202;
bfi.b32 %r8056, %r8055, %r8054, 1, 1;
setp.eq.s32 %p2203, %r10506, %r10510;
selp.u16 %rs1029, 1, 0, %p2203;
mul.wide.u16 %r8057, %rs1029, 4;
or.b32 %r8058, %r8056, %r8057;
setp.eq.s32 %p2204, %r10505, %r10510;
selp.u16 %rs1030, 1, 0, %p2204;
mul.wide.u16 %r8059, %rs1030, 8;
or.b32 %r10512, %r8058, %r8059;
$L__BB1_1766:
shl.b32 %r8060, %r10494, 4;
shl.b32 %r8061, %r3682, 8;
or.b32 %r8062, %r8060, %r8061;
or.b32 %r8063, %r8062, %r10512;
mul.wide.u32 %rd1339, %r8063, 2;
add.s64 %rd1340, %rd72, %rd1339;
ld.global.u16 %rs452, [%rd1340];
shr.u16 %rs1031, %rs452, 4;
and.b16 %rs453, %rs1031, 7;
setp.eq.s16 %p2205, %rs453, 0;
mov.u32 %r10524, %r10375;
@%p2205 bra $L__BB1_1773;
cvt.u32.u16 %r10513, %rs453;
shr.u16 %rs1032, %rs452, 8;
cvt.u32.u16 %r10514, %rs1032;
$L__BB1_1768:
mov.u32 %r3689, %r10513;
setp.gt.u32 %p2206, %r10264, 2879;
mov.u32 %r10524, 1;
@%p2206 bra $L__BB1_1773;
mov.u32 %r8065, 8;
sub.s32 %r8066, %r8065, %r10266;
sub.s32 %r8067, %r8066, %r10265;
min.u32 %r8068, %r8067, %r3689;
setp.eq.s32 %p2207, %r8068, 32;
mov.u32 %r8069, -1;
shl.b32 %r8070, %r8069, %r8068;
not.b32 %r8071, %r8070;
selp.b32 %r8072, -1, %r8071, %p2207;
and.b32 %r8073, %r8072, %r10514;
shl.b32 %r8074, %r8073, %r10265;
cvt.u16.u32 %rs1033, %r8074;
or.b16 %rs1322, %rs1322, %rs1033;
add.s32 %r10265, %r8068, %r10265;
sub.s32 %r10513, %r3689, %r8068;
shr.u32 %r10514, %r10514, %r8068;
setp.gt.u32 %p2208, %r8067, %r3689;
@%p2208 bra $L__BB1_1772;
setp.ne.s32 %p2209, %r10266, 0;
mov.u32 %r10266, 0;
and.b16 %rs1034, %rs1322, 255;
setp.ne.s16 %p2210, %rs1034, 127;
and.pred %p2211, %p2209, %p2210;
@%p2211 bra $L__BB1_1772;
mov.u32 %r8077, 20548;
sub.s32 %r8078, %r8077, %r10264;
cvt.u64.u32 %rd1341, %r8078;
add.s64 %rd1342, %rd1341, %rd4;
add.s64 %rd1343, %rd1, %rd1342;
st.global.u8 [%rd1343], %rs1322;
add.s32 %r10264, %r10264, 1;
setp.gt.u16 %p2212, %rs1034, 143;
selp.u32 %r10266, 1, 0, %p2212;
mov.u16 %rs1322, 0;
mov.u32 %r10265, 0;
$L__BB1_1772:
setp.ne.s32 %p2213, %r10513, 0;
mov.u32 %r10524, %r10375;
@%p2213 bra $L__BB1_1768;
$L__BB1_1773:
setp.ne.s32 %p2214, %r3682, 0;
@%p2214 bra $L__BB1_1821;
setp.eq.s32 %p2215, %r10494, 0;
add.s32 %r8079, %r9816, 17477;
cvt.u64.u32 %rd1344, %r8079;
add.s64 %rd1345, %rd1344, %rd4;
add.s64 %rd74, %rd1, %rd1345;
@%p2215 bra $L__BB1_1813;
shl.b16 %rs1253, %rs1253, 1;
add.s32 %r9825, %r9825, -1;
setp.ne.s32 %p2216, %r9825, 0;
mov.u32 %r10560, %r10033;
@%p2216 bra $L__BB1_1778;
setp.gt.u32 %p2217, %r9816, 191;
mov.u32 %r10560, 1;
mov.u32 %r9825, 0;
@%p2217 bra $L__BB1_1778;
st.global.u8 [%rd74], %rs1253;
add.s32 %r9816, %r9816, 1;
mov.u16 %rs1253, 0;
mov.u32 %r9825, 8;
mov.u32 %r10560, %r10033;
$L__BB1_1778:
setp.lt.u32 %p2218, %r10031, 3;
mov.u32 %r10528, 0;
@%p2218 bra $L__BB1_1781;
setp.lt.u32 %p2219, %r10031, 6;
mov.u32 %r10528, 1;
@%p2219 bra $L__BB1_1781;
setp.lt.u32 %p2220, %r10031, 9;
setp.eq.s32 %p2221, %r10031, 11;
selp.b32 %r8085, 4, 5, %p2221;
setp.lt.u32 %p2222, %r10031, 11;
selp.b32 %r8086, 3, %r8085, %p2222;
selp.b32 %r10528, 2, %r8086, %p2220;
$L__BB1_1781:
setp.eq.s32 %p2223, %r10528, 0;
@%p2223 bra $L__BB1_1809;
add.s32 %r3713, %r10528, -1;
and.b32 %r3714, %r10528, 3;
setp.eq.s32 %p2224, %r3714, 0;
mov.u32 %r10538, %r10528;
mov.u32 %r10539, %r10560;
@%p2224 bra $L__BB1_1794;
mov.u32 %r8088, 1;
shl.b32 %r8089, %r8088, %r3713;
and.b32 %r8090, %r8089, %r10030;
setp.ne.s32 %p2225, %r8090, 0;
selp.u32 %r8091, 1, 0, %p2225;
cvt.u32.u16 %r8092, %rs1253;
bfi.b32 %r8093, %r8092, %r8091, 1, 8;
cvt.u16.u32 %rs1253, %r8093;
add.s32 %r9825, %r9825, -1;
setp.ne.s32 %p2226, %r9825, 0;
mov.u32 %r10539, %r10560;
@%p2226 bra $L__BB1_1786;
setp.gt.u32 %p2227, %r9816, 191;
mov.u32 %r9825, 0;
mov.u32 %r10539, %r8088;
@%p2227 bra $L__BB1_1786;
add.s32 %r8097, %r9816, 17477;
cvt.u64.u32 %rd1346, %r8097;
add.s64 %rd1347, %rd1346, %rd4;
add.s64 %rd1348, %rd1, %rd1347;
st.global.u8 [%rd1348], %rs1253;
add.s32 %r9816, %r9816, 1;
mov.u16 %rs1253, 0;
mov.u32 %r9825, 8;
mov.u32 %r10539, %r10560;
$L__BB1_1786:
setp.eq.s32 %p2228, %r3714, 1;
mov.u32 %r10560, %r10539;
mov.u32 %r10538, %r3713;
@%p2228 bra $L__BB1_1794;
add.s32 %r10538, %r10528, -2;
mov.u32 %r8098, 1;
shl.b32 %r8099, %r8098, %r10538;
and.b32 %r8100, %r8099, %r10030;
setp.ne.s32 %p2229, %r8100, 0;
selp.u32 %r8101, 1, 0, %p2229;
cvt.u32.u16 %r8102, %rs1253;
bfi.b32 %r8103, %r8102, %r8101, 1, 8;
cvt.u16.u32 %rs1253, %r8103;
add.s32 %r9825, %r9825, -1;
setp.ne.s32 %p2230, %r9825, 0;
mov.u32 %r10534, %r10539;
@%p2230 bra $L__BB1_1790;
setp.gt.u32 %p2231, %r9816, 191;
mov.u32 %r9825, 0;
mov.u32 %r10534, %r8098;
@%p2231 bra $L__BB1_1790;
add.s32 %r8106, %r9816, 17477;
cvt.u64.u32 %rd1349, %r8106;
add.s64 %rd1350, %rd1349, %rd4;
add.s64 %rd1351, %rd1, %rd1350;
and.b16 %rs1041, %rs1253, 255;
st.global.u8 [%rd1351], %rs1253;
add.s32 %r9816, %r9816, 1;
setp.eq.s16 %p2232, %rs1041, 255;
selp.b32 %r9825, 7, 8, %p2232;
mov.u16 %rs1253, 0;
mov.u32 %r10534, %r10539;
$L__BB1_1790:
setp.eq.s32 %p2233, %r3714, 2;
mov.u32 %r10560, %r10534;
mov.u32 %r10539, %r10534;
@%p2233 bra $L__BB1_1794;
add.s32 %r10538, %r10528, -3;
mov.u32 %r8107, 1;
shl.b32 %r8108, %r8107, %r10538;
and.b32 %r8109, %r8108, %r10030;
setp.ne.s32 %p2234, %r8109, 0;
selp.u32 %r8110, 1, 0, %p2234;
cvt.u32.u16 %r8111, %rs1253;
bfi.b32 %r8112, %r8111, %r8110, 1, 8;
cvt.u16.u32 %rs1253, %r8112;
add.s32 %r9825, %r9825, -1;
setp.ne.s32 %p2235, %r9825, 0;
mov.u32 %r10560, %r10534;
mov.u32 %r10539, %r10534;
@%p2235 bra $L__BB1_1794;
setp.gt.u32 %p2236, %r9816, 191;
mov.u32 %r9825, 0;
mov.u32 %r10560, %r8107;
mov.u32 %r10539, %r8107;
@%p2236 bra $L__BB1_1794;
add.s32 %r8117, %r9816, 17477;
cvt.u64.u32 %rd1352, %r8117;
add.s64 %rd1353, %rd1352, %rd4;
add.s64 %rd1354, %rd1, %rd1353;
and.b16 %rs1044, %rs1253, 255;
st.global.u8 [%rd1354], %rs1253;
add.s32 %r9816, %r9816, 1;
setp.eq.s16 %p2237, %rs1044, 255;
selp.b32 %r9825, 7, 8, %p2237;
mov.u16 %rs1253, 0;
mov.u32 %r10560, %r10534;
mov.u32 %r10539, %r10534;
$L__BB1_1794:
setp.lt.u32 %p2238, %r3713, 3;
@%p2238 bra $L__BB1_1809;
mov.u32 %r10560, %r10539;
$L__BB1_1796:
add.s32 %r8118, %r10538, -1;
mov.u32 %r8119, 1;
shl.b32 %r8120, %r8119, %r8118;
and.b32 %r8121, %r8120, %r10030;
setp.ne.s32 %p2239, %r8121, 0;
selp.u32 %r8122, 1, 0, %p2239;
cvt.u32.u16 %r8123, %rs1253;
bfi.b32 %r10548, %r8123, %r8122, 1, 8;
add.s32 %r10547, %r9825, -1;
setp.ne.s32 %p2240, %r10547, 0;
mov.u32 %r10549, %r10560;
@%p2240 bra $L__BB1_1799;
setp.gt.u32 %p2241, %r9816, 191;
mov.u32 %r10547, 0;
mov.u32 %r10549, %r8119;
@%p2241 bra $L__BB1_1799;
cvt.u16.u32 %rs1045, %r10548;
and.b16 %rs1046, %rs1045, 255;
add.s32 %r8127, %r9816, 17477;
cvt.u64.u32 %rd1355, %r8127;
add.s64 %rd1356, %rd1355, %rd4;
add.s64 %rd1357, %rd1, %rd1356;
st.global.u8 [%rd1357], %rs1045;
add.s32 %r9816, %r9816, 1;
setp.eq.s16 %p2242, %rs1046, 255;
selp.b32 %r10547, 7, 8, %p2242;
mov.u32 %r10548, 0;
mov.u32 %r10549, %r10560;
$L__BB1_1799:
add.s32 %r8128, %r10538, -2;
shl.b32 %r8130, %r8119, %r8128;
and.b32 %r8131, %r8130, %r10030;
setp.ne.s32 %p2243, %r8131, 0;
and.b32 %r8132, %r10548, 127;
selp.u32 %r8133, 1, 0, %p2243;
bfi.b32 %r10552, %r8132, %r8133, 1, 7;
add.s32 %r10551, %r10547, -1;
setp.ne.s32 %p2244, %r10551, 0;
mov.u32 %r10553, %r10549;
@%p2244 bra $L__BB1_1802;
setp.gt.u32 %p2245, %r9816, 191;
mov.u32 %r10553, 1;
mov.u32 %r10551, 0;
@%p2245 bra $L__BB1_1802;
cvt.u16.u32 %rs1047, %r10552;
and.b16 %rs1048, %rs1047, 255;
add.s32 %r8137, %r9816, 17477;
cvt.u64.u32 %rd1358, %r8137;
add.s64 %rd1359, %rd1358, %rd4;
add.s64 %rd1360, %rd1, %rd1359;
st.global.u8 [%rd1360], %rs1047;
add.s32 %r9816, %r9816, 1;
setp.eq.s16 %p2246, %rs1048, 255;
selp.b32 %r10551, 7, 8, %p2246;
mov.u32 %r10552, 0;
mov.u32 %r10553, %r10549;
$L__BB1_1802:
add.s32 %r8138, %r10538, -3;
mov.u32 %r8139, 1;
shl.b32 %r8140, %r8139, %r8138;
and.b32 %r8141, %r8140, %r10030;
setp.ne.s32 %p2247, %r8141, 0;
and.b32 %r8142, %r10552, 127;
selp.u32 %r8143, 1, 0, %p2247;
bfi.b32 %r10556, %r8142, %r8143, 1, 7;
add.s32 %r10555, %r10551, -1;
setp.ne.s32 %p2248, %r10555, 0;
mov.u32 %r10557, %r10553;
@%p2248 bra $L__BB1_1805;
setp.gt.u32 %p2249, %r9816, 191;
mov.u32 %r10555, 0;
mov.u32 %r10557, %r8139;
@%p2249 bra $L__BB1_1805;
cvt.u16.u32 %rs1049, %r10556;
and.b16 %rs1050, %rs1049, 255;
add.s32 %r8147, %r9816, 17477;
cvt.u64.u32 %rd1361, %r8147;
add.s64 %rd1362, %rd1361, %rd4;
add.s64 %rd1363, %rd1, %rd1362;
st.global.u8 [%rd1363], %rs1049;
add.s32 %r9816, %r9816, 1;
setp.eq.s16 %p2250, %rs1050, 255;
selp.b32 %r10555, 7, 8, %p2250;
mov.u32 %r10556, 0;
mov.u32 %r10557, %r10553;
$L__BB1_1805:
add.s32 %r10538, %r10538, -4;
shl.b32 %r8149, %r8139, %r10538;
and.b32 %r8150, %r8149, %r10030;
setp.ne.s32 %p2251, %r8150, 0;
and.b32 %r8151, %r10556, 127;
selp.u32 %r8152, 1, 0, %p2251;
bfi.b32 %r8153, %r8151, %r8152, 1, 15;
cvt.u16.u32 %rs1253, %r8153;
add.s32 %r9825, %r10555, -1;
setp.ne.s32 %p2252, %r9825, 0;
mov.u32 %r10560, %r10557;
@%p2252 bra $L__BB1_1808;
setp.gt.u32 %p2253, %r9816, 191;
mov.u32 %r10560, 1;
mov.u32 %r9825, 0;
@%p2253 bra $L__BB1_1808;
add.s32 %r8156, %r9816, 17477;
cvt.u64.u32 %rd1364, %r8156;
add.s64 %rd1365, %rd1364, %rd4;
add.s64 %rd1366, %rd1, %rd1365;
and.b16 %rs1052, %rs1253, 255;
st.global.u8 [%rd1366], %rs1253;
add.s32 %r9816, %r9816, 1;
setp.eq.s16 %p2254, %rs1052, 255;
selp.b32 %r9825, 7, 8, %p2254;
mov.u16 %rs1253, 0;
mov.u32 %r10560, %r10557;
$L__BB1_1808:
setp.ne.s32 %p2255, %r10538, 0;
@%p2255 bra $L__BB1_1796;
$L__BB1_1809:
add.s32 %r8158, %r10031, -1;
setp.eq.s32 %p2256, %r10031, 0;
mov.u32 %r10030, 0;
selp.b32 %r10031, 0, %r8158, %p2256;
setp.lt.u32 %p2257, %r10031, 3;
mov.u32 %r10564, %r10030;
@%p2257 bra $L__BB1_1812;
setp.lt.u32 %p2258, %r10031, 6;
mov.u32 %r10564, 1;
@%p2258 bra $L__BB1_1812;
setp.lt.u32 %p2259, %r10031, 9;
setp.eq.s32 %p2260, %r10031, 11;
selp.b32 %r8160, 4, 5, %p2260;
setp.lt.u32 %p2261, %r10031, 11;
selp.b32 %r8161, 3, %r8160, %p2261;
selp.b32 %r10564, 2, %r8161, %p2259;
$L__BB1_1812:
mov.u32 %r8163, 1;
shl.b32 %r10032, %r8163, %r10564;
mov.u32 %r10033, %r10560;
bra.uni $L__BB1_1821;
$L__BB1_1813:
add.s32 %r10030, %r10030, 1;
setp.lt.u32 %p2262, %r10030, %r10032;
@%p2262 bra $L__BB1_1821;
shl.b16 %rs1053, %rs1253, 1;
or.b16 %rs1253, %rs1053, 1;
add.s32 %r9825, %r9825, -1;
setp.ne.s32 %p2263, %r9825, 0;
mov.u32 %r10567, %r10033;
@%p2263 bra $L__BB1_1817;
bra.uni $L__BB1_1815;
$L__BB1_1817:
add.s32 %r8167, %r10031, 1;
min.u32 %r10031, %r8167, 12;
setp.lt.u32 %p2266, %r10031, 3;
mov.u32 %r10030, 0;
mov.u32 %r10568, %r10030;
@%p2266 bra $L__BB1_1820;
setp.lt.u32 %p2267, %r10031, 6;
mov.u32 %r10568, 1;
@%p2267 bra $L__BB1_1820;
setp.lt.u32 %p2268, %r10031, 9;
setp.eq.s32 %p2269, %r10031, 11;
selp.b32 %r8169, 4, 5, %p2269;
setp.lt.u32 %p2270, %r10031, 11;
selp.b32 %r8170, 3, %r8169, %p2270;
selp.b32 %r10568, 2, %r8170, %p2268;
$L__BB1_1820:
mov.u32 %r8172, 1;
shl.b32 %r10032, %r8172, %r10568;
mov.u32 %r10033, %r10567;
$L__BB1_1821:
and.b16 %rs1056, %rs452, 15;
cvt.u32.u16 %r3797, %rs1056;
and.b32 %r8173, %r10494, 1;
setp.eq.b32 %p2271, %r8173, 1;
mov.pred %p2272, 0;
xor.pred %p2273, %p2271, %p2272;
not.pred %p2274, %p2273;
mov.u32 %r10589, %r10485;
@%p2274 bra $L__BB1_1828;
and.b32 %r8174, %r3797, 1;
sub.s32 %r10575, %r3683, %r8174;
setp.eq.s32 %p2275, %r10575, 0;
mov.u32 %r10589, %r10485;
@%p2275 bra $L__BB1_1828;
mov.u32 %r8175, -1;
shl.b32 %r8176, %r8175, %r10575;
not.b32 %r8177, %r8176;
and.b32 %r10576, %r10488, %r8177;
$L__BB1_1824:
setp.gt.u32 %p2276, %r10451, 17476;
mov.u32 %r10589, 1;
@%p2276 bra $L__BB1_1828;
sub.s32 %r8179, %r10452, %r10453;
min.u32 %r8180, %r8179, %r10575;
setp.eq.s32 %p2277, %r8180, 32;
mov.u32 %r8181, -1;
shl.b32 %r8182, %r8181, %r8180;
not.b32 %r8183, %r8182;
selp.b32 %r8184, -1, %r8183, %p2277;
and.b32 %r8185, %r8184, %r10576;
shl.b32 %r8186, %r8185, %r10453;
or.b32 %r10454, %r8186, %r10454;
add.s32 %r10453, %r8180, %r10453;
shr.u32 %r10576, %r10576, %r8180;
sub.s32 %r10575, %r10575, %r8180;
setp.lt.u32 %p2278, %r10453, %r10452;
@%p2278 bra $L__BB1_1827;
cvt.u64.u32 %rd1367, %r10451;
add.s64 %rd1368, %rd1367, %rd4;
add.s64 %rd1369, %rd1, %rd1368;
st.global.u8 [%rd1369], %r10454;
add.s32 %r10451, %r10451, 1;
setp.eq.s32 %p2279, %r10454, 255;
selp.b32 %r10452, 7, 8, %p2279;
mov.u32 %r10453, 0;
mov.u32 %r10454, %r10453;
$L__BB1_1827:
setp.ne.s32 %p2280, %r10575, 0;
mov.u32 %r10589, %r10485;
@%p2280 bra $L__BB1_1824;
$L__BB1_1828:
and.b32 %r3821, %r10494, 2;
setp.eq.s32 %p2281, %r3821, 0;
mov.u32 %r10604, %r10589;
@%p2281 bra $L__BB1_1835;
shr.u32 %r8189, %r3797, 1;
and.b32 %r8190, %r8189, 1;
sub.s32 %r10590, %r3683, %r8190;
setp.eq.s32 %p2282, %r10590, 0;
mov.u32 %r10604, %r10589;
@%p2282 bra $L__BB1_1835;
mov.u32 %r8191, -1;
shl.b32 %r8192, %r8191, %r10590;
not.b32 %r8193, %r8192;
and.b32 %r10591, %r10492, %r8193;
$L__BB1_1831:
setp.gt.u32 %p2283, %r10451, 17476;
mov.u32 %r10604, 1;
@%p2283 bra $L__BB1_1835;
sub.s32 %r8195, %r10452, %r10453;
min.u32 %r8196, %r8195, %r10590;
setp.eq.s32 %p2284, %r8196, 32;
mov.u32 %r8197, -1;
shl.b32 %r8198, %r8197, %r8196;
not.b32 %r8199, %r8198;
selp.b32 %r8200, -1, %r8199, %p2284;
and.b32 %r8201, %r8200, %r10591;
shl.b32 %r8202, %r8201, %r10453;
or.b32 %r10454, %r8202, %r10454;
add.s32 %r10453, %r8196, %r10453;
shr.u32 %r10591, %r10591, %r8196;
sub.s32 %r10590, %r10590, %r8196;
setp.lt.u32 %p2285, %r10453, %r10452;
@%p2285 bra $L__BB1_1834;
cvt.u64.u32 %rd1370, %r10451;
add.s64 %rd1371, %rd1370, %rd4;
add.s64 %rd1372, %rd1, %rd1371;
st.global.u8 [%rd1372], %r10454;
add.s32 %r10451, %r10451, 1;
setp.eq.s32 %p2286, %r10454, 255;
selp.b32 %r10452, 7, 8, %p2286;
mov.u32 %r10453, 0;
mov.u32 %r10454, %r10453;
$L__BB1_1834:
setp.ne.s32 %p2287, %r10590, 0;
mov.u32 %r10604, %r10589;
@%p2287 bra $L__BB1_1831;
$L__BB1_1835:
and.b32 %r3845, %r10494, 4;
setp.eq.s32 %p2288, %r3845, 0;
mov.u32 %r10619, %r10604;
@%p2288 bra $L__BB1_1842;
shr.u32 %r8205, %r3797, 2;
and.b32 %r8206, %r8205, 1;
sub.s32 %r10605, %r3683, %r8206;
setp.eq.s32 %p2289, %r10605, 0;
mov.u32 %r10619, %r10604;
@%p2289 bra $L__BB1_1842;
mov.u32 %r8207, -1;
shl.b32 %r8208, %r8207, %r10605;
not.b32 %r8209, %r8208;
and.b32 %r10606, %r10508, %r8209;
$L__BB1_1838:
setp.gt.u32 %p2290, %r10451, 17476;
mov.u32 %r10619, 1;
@%p2290 bra $L__BB1_1842;
sub.s32 %r8211, %r10452, %r10453;
min.u32 %r8212, %r8211, %r10605;
setp.eq.s32 %p2291, %r8212, 32;
mov.u32 %r8213, -1;
shl.b32 %r8214, %r8213, %r8212;
not.b32 %r8215, %r8214;
selp.b32 %r8216, -1, %r8215, %p2291;
and.b32 %r8217, %r8216, %r10606;
shl.b32 %r8218, %r8217, %r10453;
or.b32 %r10454, %r8218, %r10454;
add.s32 %r10453, %r8212, %r10453;
shr.u32 %r10606, %r10606, %r8212;
sub.s32 %r10605, %r10605, %r8212;
setp.lt.u32 %p2292, %r10453, %r10452;
@%p2292 bra $L__BB1_1841;
cvt.u64.u32 %rd1373, %r10451;
add.s64 %rd1374, %rd1373, %rd4;
add.s64 %rd1375, %rd1, %rd1374;
st.global.u8 [%rd1375], %r10454;
add.s32 %r10451, %r10451, 1;
setp.eq.s32 %p2293, %r10454, 255;
selp.b32 %r10452, 7, 8, %p2293;
mov.u32 %r10453, 0;
mov.u32 %r10454, %r10453;
$L__BB1_1841:
setp.ne.s32 %p2294, %r10605, 0;
mov.u32 %r10619, %r10604;
@%p2294 bra $L__BB1_1838;
$L__BB1_1842:
and.b32 %r3869, %r10494, 8;
setp.eq.s32 %p2295, %r3869, 0;
mov.u32 %r10485, %r10619;
@%p2295 bra $L__BB1_1849;
shr.u32 %r8221, %r3797, 3;
sub.s32 %r10620, %r3683, %r8221;
setp.eq.s32 %p2296, %r10620, 0;
mov.u32 %r10485, %r10619;
@%p2296 bra $L__BB1_1849;
mov.u32 %r8222, -1;
shl.b32 %r8223, %r8222, %r10620;
not.b32 %r8224, %r8223;
and.b32 %r10621, %r10507, %r8224;
$L__BB1_1845:
setp.gt.u32 %p2297, %r10451, 17476;
mov.u32 %r10485, 1;
@%p2297 bra $L__BB1_1849;
sub.s32 %r8226, %r10452, %r10453;
min.u32 %r8227, %r8226, %r10620;
setp.eq.s32 %p2298, %r8227, 32;
mov.u32 %r8228, -1;
shl.b32 %r8229, %r8228, %r8227;
not.b32 %r8230, %r8229;
selp.b32 %r8231, -1, %r8230, %p2298;
and.b32 %r8232, %r8231, %r10621;
shl.b32 %r8233, %r8232, %r10453;
or.b32 %r10454, %r8233, %r10454;
add.s32 %r10453, %r8227, %r10453;
shr.u32 %r10621, %r10621, %r8227;
sub.s32 %r10620, %r10620, %r8227;
setp.lt.u32 %p2299, %r10453, %r10452;
@%p2299 bra $L__BB1_1848;
cvt.u64.u32 %rd1376, %r10451;
add.s64 %rd1377, %rd1376, %rd4;
add.s64 %rd1378, %rd1, %rd1377;
st.global.u8 [%rd1378], %r10454;
add.s32 %r10451, %r10451, 1;
setp.eq.s32 %p2300, %r10454, 255;
selp.b32 %r10452, 7, 8, %p2300;
mov.u32 %r10453, 0;
mov.u32 %r10454, %r10453;
$L__BB1_1848:
setp.ne.s32 %p2301, %r10620, 0;
mov.u32 %r10485, %r10619;
@%p2301 bra $L__BB1_1845;
$L__BB1_1849:
and.b32 %r8236, %r10491, 255;
and.b32 %r8237, %r10356, 255;
setp.lt.u32 %p2302, %r8236, %r8237;
cvt.u16.u32 %rs1057, %r10491;
selp.b16 %rs1058, %rs450, %rs1057, %p2302;
st.shared.u8 [%r3621+1], %rs1058;
ld.shared.u8 %rs1059, [%r3621+3];
setp.gt.u16 %p2303, %rs448, %rs1059;
add.s32 %r10637, %r10637, 1;
add.s32 %r8238, %r10334, 3;
selp.b32 %r8239, %r10637, %r8238, %p2303;
add.s32 %r8241, %r4095, %r8239;
ld.shared.u8 %r8242, [%r8241];
add.s32 %r10335, %r8242, -1;
shr.u32 %r8243, %r3821, 1;
or.b32 %r8244, %r3627, %r8243;
st.shared.u8 [%r3621+2], %r10505;
st.shared.u8 [%r3624+1], %r8244;
ld.shared.u8 %rs1060, [%r3624+3];
mul.wide.u16 %r8245, %rs1060, 4;
add.s32 %r8246, %r8245, %r3626;
shr.u32 %r8247, %r3869, 3;
st.shared.u8 [%r3624+2], %r8247;
shr.u32 %r8248, %r3869, 2;
shr.u32 %r8249, %r3845, 1;
or.b32 %r8250, %r8248, %r8249;
or.b32 %r10332, %r8250, %r8246;
add.s32 %r10336, %r10336, 1;
mov.u32 %r10375, %r10524;
$L__BB1_1850:
mov.u32 %r10333, %r10638;
mov.u32 %r10334, %r10637;
max.s32 %r8251, %r10655, 0;
mul.lo.s32 %r8252, %r3412, 6;
setp.gt.s32 %p2304, %r3412, 0;
selp.b32 %r8253, %r8252, 0, %p2304;
cvt.u64.u32 %rd1379, %r8253;
add.s64 %rd75, %rd71, %rd1379;
ld.global.u8 %rs476, [%rd75+1];
add.s32 %r8254, %r8253, 2;
cvt.u64.u32 %rd1380, %r8254;
add.s64 %rd1381, %rd71, %rd1380;
ld.global.u8 %rs477, [%rd1381];
ld.global.u8 %rs478, [%rd1381+1];
mul.lo.s32 %r8255, %r8251, 6;
cvt.u64.u32 %rd1382, %r8255;
add.s64 %rd1383, %rd71, %rd1382;
ld.global.u8 %rs479, [%rd1383];
ld.global.u8 %rs480, [%rd1383+1];
add.s32 %r8256, %r8255, 2;
cvt.u64.u32 %rd1384, %r8256;
add.s64 %rd1385, %rd71, %rd1384;
ld.global.u8 %rs481, [%rd1385];
ld.global.u8 %rs482, [%rd1385+1];
setp.eq.s16 %p2305, %rs476, 0;
mov.u32 %r10667, %r10375;
@%p2305 bra $L__BB1_1857;
ld.global.u8 %r10657, [%rd75];
cvt.u32.u16 %r10656, %rs476;
$L__BB1_1852:
mov.u32 %r3920, %r10656;
setp.gt.u32 %p2306, %r10264, 2879;
mov.u32 %r10667, 1;
@%p2306 bra $L__BB1_1857;
mov.u32 %r8258, 8;
sub.s32 %r8259, %r8258, %r10266;
sub.s32 %r8260, %r8259, %r10265;
min.u32 %r8261, %r8260, %r3920;
setp.eq.s32 %p2307, %r8261, 32;
mov.u32 %r8262, -1;
shl.b32 %r8263, %r8262, %r8261;
not.b32 %r8264, %r8263;
selp.b32 %r8265, -1, %r8264, %p2307;
and.b32 %r8266, %r8265, %r10657;
shl.b32 %r8267, %r8266, %r10265;
cvt.u16.u32 %rs1061, %r8267;
or.b16 %rs1322, %rs1322, %rs1061;
add.s32 %r10265, %r8261, %r10265;
sub.s32 %r10656, %r3920, %r8261;
shr.u32 %r10657, %r10657, %r8261;
setp.gt.u32 %p2308, %r8260, %r3920;
@%p2308 bra $L__BB1_1856;
setp.ne.s32 %p2309, %r10266, 0;
mov.u32 %r10266, 0;
and.b16 %rs1062, %rs1322, 255;
setp.ne.s16 %p2310, %rs1062, 127;
and.pred %p2311, %p2309, %p2310;
@%p2311 bra $L__BB1_1856;
mov.u32 %r8270, 20548;
sub.s32 %r8271, %r8270, %r10264;
cvt.u64.u32 %rd1386, %r8271;
add.s64 %rd1387, %rd1386, %rd4;
add.s64 %rd1388, %rd1, %rd1387;
st.global.u8 [%rd1388], %rs1322;
add.s32 %r10264, %r10264, 1;
setp.gt.u16 %p2312, %rs1062, 143;
selp.u32 %r10266, 1, 0, %p2312;
mov.u16 %rs1322, 0;
mov.u32 %r10265, 0;
$L__BB1_1856:
setp.ne.s32 %p2313, %r10656, 0;
mov.u32 %r10667, %r10375;
@%p2313 bra $L__BB1_1852;
$L__BB1_1857:
setp.eq.s16 %p2314, %rs480, 0;
mov.u32 %r10679, %r10667;
@%p2314 bra $L__BB1_1864;
cvt.u32.u16 %r8272, %rs479;
and.b32 %r10669, %r8272, 255;
cvt.u32.u16 %r8273, %rs480;
and.b32 %r10668, %r8273, 255;
$L__BB1_1859:
mov.u32 %r3939, %r10668;
setp.gt.u32 %p2315, %r10264, 2879;
mov.u32 %r10679, 1;
@%p2315 bra $L__BB1_1864;
mov.u32 %r8275, 8;
sub.s32 %r8276, %r8275, %r10266;
sub.s32 %r8277, %r8276, %r10265;
min.u32 %r8278, %r8277, %r3939;
setp.eq.s32 %p2316, %r8278, 32;
mov.u32 %r8279, -1;
shl.b32 %r8280, %r8279, %r8278;
not.b32 %r8281, %r8280;
selp.b32 %r8282, -1, %r8281, %p2316;
and.b32 %r8283, %r8282, %r10669;
shl.b32 %r8284, %r8283, %r10265;
cvt.u16.u32 %rs1066, %r8284;
or.b16 %rs1322, %rs1322, %rs1066;
add.s32 %r10265, %r8278, %r10265;
sub.s32 %r10668, %r3939, %r8278;
shr.u32 %r10669, %r10669, %r8278;
setp.gt.u32 %p2317, %r8277, %r3939;
@%p2317 bra $L__BB1_1863;
setp.ne.s32 %p2318, %r10266, 0;
mov.u32 %r10266, 0;
and.b16 %rs1067, %rs1322, 255;
setp.ne.s16 %p2319, %rs1067, 127;
and.pred %p2320, %p2318, %p2319;
@%p2320 bra $L__BB1_1863;
mov.u32 %r8287, 20548;
sub.s32 %r8288, %r8287, %r10264;
cvt.u64.u32 %rd1389, %r8288;
add.s64 %rd1390, %rd1389, %rd4;
add.s64 %rd1391, %rd1, %rd1390;
st.global.u8 [%rd1391], %rs1322;
add.s32 %r10264, %r10264, 1;
setp.gt.u16 %p2321, %rs1067, 143;
selp.u32 %r10266, 1, 0, %p2321;
mov.u16 %rs1322, 0;
mov.u32 %r10265, 0;
$L__BB1_1863:
setp.ne.s32 %p2322, %r10668, 0;
mov.u32 %r10679, %r10667;
@%p2322 bra $L__BB1_1859;
$L__BB1_1864:
setp.eq.s16 %p2323, %rs478, 0;
mov.u32 %r10691, %r10679;
@%p2323 bra $L__BB1_1871;
cvt.u32.u16 %r8289, %rs478;
and.b32 %r10680, %r8289, 255;
cvt.u32.u16 %r8290, %rs477;
and.b32 %r10681, %r8290, 255;
$L__BB1_1866:
mov.u32 %r3958, %r10680;
setp.gt.u32 %p2324, %r10264, 2879;
mov.u32 %r10691, 1;
@%p2324 bra $L__BB1_1871;
mov.u32 %r8292, 8;
sub.s32 %r8293, %r8292, %r10266;
sub.s32 %r8294, %r8293, %r10265;
min.u32 %r8295, %r8294, %r3958;
setp.eq.s32 %p2325, %r8295, 32;
mov.u32 %r8296, -1;
shl.b32 %r8297, %r8296, %r8295;
not.b32 %r8298, %r8297;
selp.b32 %r8299, -1, %r8298, %p2325;
and.b32 %r8300, %r8299, %r10681;
shl.b32 %r8301, %r8300, %r10265;
cvt.u16.u32 %rs1071, %r8301;
or.b16 %rs1322, %rs1322, %rs1071;
add.s32 %r10265, %r8295, %r10265;
sub.s32 %r10680, %r3958, %r8295;
shr.u32 %r10681, %r10681, %r8295;
setp.gt.u32 %p2326, %r8294, %r3958;
@%p2326 bra $L__BB1_1870;
setp.ne.s32 %p2327, %r10266, 0;
mov.u32 %r10266, 0;
and.b16 %rs1072, %rs1322, 255;
setp.ne.s16 %p2328, %rs1072, 127;
and.pred %p2329, %p2327, %p2328;
@%p2329 bra $L__BB1_1870;
mov.u32 %r8304, 20548;
sub.s32 %r8305, %r8304, %r10264;
cvt.u64.u32 %rd1392, %r8305;
add.s64 %rd1393, %rd1392, %rd4;
add.s64 %rd1394, %rd1, %rd1393;
st.global.u8 [%rd1394], %rs1322;
add.s32 %r10264, %r10264, 1;
setp.gt.u16 %p2330, %rs1072, 143;
selp.u32 %r10266, 1, 0, %p2330;
mov.u16 %rs1322, 0;
mov.u32 %r10265, 0;
$L__BB1_1870:
setp.ne.s32 %p2331, %r10680, 0;
mov.u32 %r10691, %r10679;
@%p2331 bra $L__BB1_1866;
$L__BB1_1871:
setp.eq.s16 %p2332, %rs482, 0;
mov.u32 %r10267, %r10691;
@%p2332 bra $L__BB1_1878;
cvt.u32.u16 %r8306, %rs481;
and.b32 %r10693, %r8306, 255;
cvt.u32.u16 %r8307, %rs482;
and.b32 %r10692, %r8307, 255;
$L__BB1_1873:
mov.u32 %r3977, %r10692;
setp.gt.u32 %p2333, %r10264, 2879;
mov.u32 %r10267, 1;
@%p2333 bra $L__BB1_1878;
mov.u32 %r8309, 8;
sub.s32 %r8310, %r8309, %r10266;
sub.s32 %r8311, %r8310, %r10265;
min.u32 %r8312, %r8311, %r3977;
setp.eq.s32 %p2334, %r8312, 32;
mov.u32 %r8313, -1;
shl.b32 %r8314, %r8313, %r8312;
not.b32 %r8315, %r8314;
selp.b32 %r8316, -1, %r8315, %p2334;
and.b32 %r8317, %r8316, %r10693;
shl.b32 %r8318, %r8317, %r10265;
cvt.u16.u32 %rs1076, %r8318;
or.b16 %rs1322, %rs1322, %rs1076;
add.s32 %r10265, %r8312, %r10265;
sub.s32 %r10692, %r3977, %r8312;
shr.u32 %r10693, %r10693, %r8312;
setp.gt.u32 %p2335, %r8311, %r3977;
@%p2335 bra $L__BB1_1877;
setp.ne.s32 %p2336, %r10266, 0;
mov.u32 %r10266, 0;
and.b16 %rs1077, %rs1322, 255;
setp.ne.s16 %p2337, %rs1077, 127;
and.pred %p2338, %p2336, %p2337;
@%p2338 bra $L__BB1_1877;
mov.u32 %r8321, 20548;
sub.s32 %r8322, %r8321, %r10264;
cvt.u64.u32 %rd1395, %r8322;
add.s64 %rd1396, %rd1395, %rd4;
add.s64 %rd1397, %rd1, %rd1396;
st.global.u8 [%rd1397], %rs1322;
add.s32 %r10264, %r10264, 1;
setp.gt.u16 %p2339, %rs1077, 143;
selp.u32 %r10266, 1, 0, %p2339;
mov.u16 %rs1322, 0;
mov.u32 %r10265, 0;
$L__BB1_1877:
setp.ne.s32 %p2340, %r10692, 0;
mov.u32 %r10267, %r10691;
@%p2340 bra $L__BB1_1873;
$L__BB1_1878:
add.s32 %r10316, %r10316, 4;
setp.lt.u32 %p2341, %r10316, %r5;
@%p2341 bra $L__BB1_1638;
$L__BB1_1879:
add.s32 %r10300, %r10300, 2;
setp.lt.u32 %p2342, %r10300, %r6;
@%p2342 bra $L__BB1_1636;
$L__BB1_1880:
setp.eq.s32 %p2343, %r10030, 0;
mov.u32 %r10733, %r10033;
@%p2343 bra $L__BB1_1884;
shl.b16 %rs1080, %rs1253, 1;
or.b16 %rs1253, %rs1080, 1;
add.s32 %r9825, %r9825, -1;
setp.ne.s32 %p2344, %r9825, 0;
mov.u32 %r10733, %r10033;
@%p2344 bra $L__BB1_1884;
setp.gt.u32 %p2345, %r9816, 191;
mov.u32 %r10733, 1;
mov.u32 %r9825, 0;
@%p2345 bra $L__BB1_1884;
add.s32 %r8325, %r9816, 17477;
cvt.u64.u32 %rd1398, %r8325;
add.s64 %rd1399, %rd1398, %rd4;
add.s64 %rd1400, %rd1, %rd1399;
and.b16 %rs1082, %rs1253, 255;
st.global.u8 [%rd1400], %rs1253;
add.s32 %r9816, %r9816, 1;
setp.eq.s16 %p2346, %rs1082, 255;
selp.b32 %r9825, 7, 8, %p2346;
mov.u16 %rs1253, 0;
mov.u32 %r10733, %r10033;
$L__BB1_1884:
cvt.u32.u16 %r8326, %rs1253;
and.b32 %r8327, %r8326, 255;
shl.b32 %r8328, %r8327, %r9825;
cvt.u16.u32 %rs505, %r8328;
mov.u32 %r8329, -1;
shl.b32 %r8330, %r8329, %r10265;
not.b32 %r8331, %r8330;
mov.u32 %r8332, 255;
and.b32 %r8333, %r8331, 255;
setp.eq.s32 %p2347, %r10265, 0;
selp.b32 %r4029, 0, %r8333, %p2347;
shl.b32 %r4030, %r8332, %r9825;
and.b32 %r8334, %r4030, 255;
or.b32 %r8335, %r8334, %r4029;
setp.eq.s32 %p2348, %r8335, 0;
mov.u32 %r10735, %r10267;
mov.u32 %r10737, %r10733;
@%p2348 bra $L__BB1_1890;
or.b16 %rs506, %rs1322, %rs505;
and.b16 %rs1083, %rs506, 255;
xor.b16 %rs1084, %rs506, %rs505;
cvt.u32.u16 %r8336, %rs1084;
and.b32 %r8337, %r4030, %r8336;
and.b32 %r8338, %r8337, 255;
xor.b16 %rs1085, %rs506, %rs1322;
cvt.u32.u16 %r8339, %rs1085;
and.b32 %r8340, %r4029, %r8339;
or.b32 %r8341, %r8338, %r8340;
setp.eq.s32 %p2349, %r8341, 0;
setp.ne.s16 %p2350, %rs1083, 255;
and.pred %p2351, %p2350, %p2349;
setp.gt.u32 %p2352, %r10264, 1;
and.pred %p2353, %p2352, %p2351;
add.s32 %r8342, %r9816, 17477;
cvt.u64.u32 %rd1401, %r8342;
add.s64 %rd1402, %rd1401, %rd4;
add.s64 %rd76, %rd1, %rd1402;
@%p2353 bra $L__BB1_1888;
bra.uni $L__BB1_1886;
$L__BB1_1888:
setp.gt.u32 %p2357, %r9816, 191;
mov.u32 %r10737, 1;
mov.u32 %r10735, %r10267;
@%p2357 bra $L__BB1_1890;
st.global.u8 [%rd76], %rs506;
add.s32 %r9816, %r9816, 1;
mov.u32 %r10735, %r10267;
mov.u32 %r10737, %r10733;
bra.uni $L__BB1_1890;
$L__BB1_1886:
setp.gt.u32 %p2354, %r9816, 191;
setp.gt.u32 %p2355, %r10264, 2879;
or.pred %p2356, %p2355, %p2354;
mov.u32 %r10735, 1;
mov.u32 %r10737, %r10735;
@%p2356 bra $L__BB1_1890;
st.global.u8 [%rd76], %rs505;
add.s32 %r9816, %r9816, 1;
mov.u32 %r8345, 20548;
sub.s32 %r8346, %r8345, %r10264;
cvt.u64.u32 %rd1403, %r8346;
add.s64 %rd1404, %rd1403, %rd4;
add.s64 %rd1405, %rd1, %rd1404;
st.global.u8 [%rd1405], %rs1322;
add.s32 %r10264, %r10264, 1;
mov.u32 %r10735, %r10267;
mov.u32 %r10737, %r10733;
$L__BB1_1890:
setp.eq.s32 %p2358, %r10453, 0;
@%p2358 bra $L__BB1_1894;
sub.s32 %r8348, %r10452, %r10453;
mov.u32 %r8349, -1;
shl.b32 %r8350, %r8349, %r8348;
not.b32 %r8351, %r8350;
and.b32 %r8352, %r8351, 255;
shl.b32 %r8353, %r8352, %r10453;
or.b32 %r4038, %r8353, %r10454;
setp.eq.s32 %p2359, %r4038, 255;
mov.u32 %r10739, %r10485;
@%p2359 bra $L__BB1_1896;
setp.gt.u32 %p2360, %r10451, 17476;
mov.u32 %r10739, 1;
@%p2360 bra $L__BB1_1896;
cvt.u64.u32 %rd1406, %r10451;
add.s64 %rd1407, %rd1406, %rd4;
add.s64 %rd1408, %rd1, %rd1407;
st.global.u8 [%rd1408], %r4038;
add.s32 %r10451, %r10451, 1;
mov.u32 %r10739, %r10485;
bra.uni $L__BB1_1896;
$L__BB1_1894:
setp.ne.s32 %p2361, %r10452, 7;
mov.u32 %r10739, %r10485;
@%p2361 bra $L__BB1_1896;
setp.eq.s32 %p2362, %r10451, 0;
add.s32 %r8355, %r10451, -1;
selp.b32 %r10451, 0, %r8355, %p2362;
mov.u32 %r10739, %r10485;
$L__BB1_1896:
or.b32 %r8356, %r10737, %r10735;
or.b32 %r8357, %r8356, %r10739;
setp.eq.s32 %p2363, %r8357, 0;
@%p2363 bra $L__BB1_1898;
mov.u32 %r8362, 1;
st.global.u32 [%rd6], %r8362;
mov.u32 %r8363, 3;
st.global.u32 [%rd6+4], %r8363;
mov.u32 %r10740, 0;
mov.u32 %r10741, %r10740;
mov.u32 %r10742, %r10740;
mov.u32 %r10743, %r10740;
bra.uni $L__BB1_1904;
$L__BB1_1898:
add.s32 %r8364, %r9816, %r10264;
add.s32 %r10741, %r8364, %r10451;
setp.lt.u32 %p2364, %r10741, 2;
setp.gt.u32 %p2365, %r10741, %r3;
or.pred %p2366, %p2364, %p2365;
@%p2366 bra $L__BB1_1900;
bra.uni $L__BB1_1899;
$L__BB1_1900:
mov.u32 %r8374, 1;
st.global.u32 [%rd6], %r8374;
mov.u32 %r8375, 4;
st.global.u32 [%rd6+4], %r8375;
mov.u32 %r10740, 0;
mov.u32 %r10741, %r10740;
mov.u32 %r10742, %r10740;
mov.u32 %r10743, %r10740;
bra.uni $L__BB1_1904;
$L__BB1_1250:
mov.u32 %r6781, 0;
st.global.u32 [%rd6], %r6781;
st.global.u32 [%rd6+4], %r6781;
st.global.u32 [%rd6+8], %r6781;
st.global.u32 [%rd6+12], %r6781;
st.global.u32 [%rd6+16], %r2;
st.global.u32 [%rd6+20], %r6781;
st.global.u32 [%rd6+24], %r6781;
st.global.u32 [%rd6+28], %r6781;
bra.uni $L__BB1_1905;
$L__BB1_23:
setp.lt.u32 %p37, %r2, %r4;
@%p37 bra $L__BB1_1248;
bra.uni $L__BB1_24;
$L__BB1_1248:
mov.u32 %r6775, 2;
st.global.u32 [%rd6], %r6775;
mov.u32 %r6776, 5;
st.global.u32 [%rd6+4], %r6776;
mov.u32 %r6777, 0;
st.global.u32 [%rd6+8], %r6777;
st.global.u32 [%rd6+12], %r6777;
st.global.u32 [%rd6+16], %r6777;
st.global.u32 [%rd6+20], %r6777;
st.global.u32 [%rd6+24], %r6777;
st.global.u32 [%rd6+28], %r6777;
bra.uni $L__BB1_1905;
$L__BB1_1899:
and.b32 %r8366, %r9816, 32767;
and.b32 %r8367, %r10264, 32767;
bfi.b32 %r8368, %r8367, %r8366, 15, 15;
or.b32 %r10740, %r8368, -2147483648;
mov.u32 %r8369, 0;
st.global.u32 [%rd6], %r8369;
st.global.u32 [%rd6+4], %r8369;
mov.u32 %r10743, 1;
bra.uni $L__BB1_1904;
$L__BB1_24:
mov.u32 %r8423, 0;
setp.eq.s32 %p38, %r4, 2;
@%p38 bra $L__BB1_35;
setp.ne.s32 %p39, %r4, 3;
@%p39 bra $L__BB1_43;
@%p10 bra $L__BB1_43;
mov.u32 %r4106, 0;
mov.u32 %r8416, %r4106;
mov.u32 %r8423, %r4106;
$L__BB1_28:
mul.lo.s32 %r27, %r8416, %r1;
mov.u32 %r8418, %r4106;
$L__BB1_29:
add.s32 %r4108, %r8418, %r27;
cvt.u64.u32 %rd99, %r4108;
add.s64 %rd100, %rd99, %rd5;
shl.b64 %rd101, %rd100, 2;
add.s64 %rd102, %rd3, %rd101;
ld.global.u32 %r4109, [%rd102];
abs.s32 %r30, %r4109;
setp.eq.s32 %p41, %r30, 0;
@%p41 bra $L__BB1_32;
setp.eq.s32 %p42, %r30, 3;
@%p42 bra $L__BB1_32;
add.s32 %r8423, %r8423, 1;
and.b32 %r4110, %r30, 1;
setp.eq.b32 %p43, %r4110, 1;
not.pred %p44, %p43;
setp.lt.u32 %p45, %r30, 5;
or.pred %p46, %p45, %p44;
@%p46 bra $L__BB1_34;
$L__BB1_32:
add.s32 %r8418, %r8418, 1;
setp.lt.u32 %p47, %r8418, %r5;
@%p47 bra $L__BB1_29;
add.s32 %r8416, %r8416, 1;
setp.lt.u32 %p48, %r8416, %r6;
@%p48 bra $L__BB1_28;
bra.uni $L__BB1_43;
$L__BB1_35:
@%p10 bra $L__BB1_43;
mov.u32 %r4115, 0;
mov.u32 %r8421, %r4115;
$L__BB1_37:
mul.lo.s32 %r36, %r8421, %r1;
mov.u32 %r8422, %r4115;
$L__BB1_38:
add.s32 %r4117, %r8422, %r36;
cvt.u64.u32 %rd103, %r4117;
add.s64 %rd104, %rd103, %rd5;
shl.b64 %rd105, %rd104, 2;
add.s64 %rd106, %rd3, %rd105;
ld.global.u32 %r4118, [%rd106];
abs.s32 %r38, %r4118;
setp.eq.s32 %p50, %r38, 0;
@%p50 bra $L__BB1_41;
setp.gt.u32 %p51, %r38, 2;
and.b32 %r4119, %r38, 1;
setp.eq.b32 %p52, %r4119, 1;
and.pred %p53, %p51, %p52;
@%p53 bra $L__BB1_41;
bra.uni $L__BB1_40;
$L__BB1_41:
add.s32 %r8422, %r8422, 1;
setp.lt.u32 %p54, %r8422, %r5;
@%p54 bra $L__BB1_38;
add.s32 %r8421, %r8421, 1;
setp.lt.u32 %p55, %r8421, %r6;
mov.u32 %r8423, 0;
@%p55 bra $L__BB1_37;
$L__BB1_43:
add.s32 %r8396, %r5, 1;
shr.u32 %r8395, %r8396, 1;
add.s64 %rd1410, %rd1, %rd4;
add.s64 %rd1409, %rd1410, 20548;
sub.s32 %r42, %r2, %r4;
mov.u32 %r4125, 30;
sub.s32 %r43, %r4125, %r42;
mov.u16 %rs510, 255;
st.global.u8 [%rd1409], %rs510;
add.s32 %r4127, %r8395, 2;
min.u32 %r45, %r4127, 513;
mov.u32 %r4128, -3;
sub.s32 %r4129, %r4128, %r8395;
max.u32 %r4130, %r4129, -514;
mov.u32 %r4131, -2;
sub.s32 %r4132, %r4131, %r4130;
and.b32 %r8428, %r45, 3;
setp.lt.u32 %p56, %r4132, 3;
mov.u32 %r8426, 0;
@%p56 bra $L__BB1_46;
sub.s32 %r8425, %r45, %r8428;
mov.u32 %r8426, 0;
$L__BB1_45:
add.s32 %r4135, %r4095, %r8426;
mov.u16 %rs511, 0;
st.shared.u8 [%r4135], %rs511;
mov.u32 %r4136, _ZZ32 j2k_htj2k_encode_codeblocksE14cleanup_cx_val;
add.s32 %r4137, %r4136, %r8426;
st.shared.u8 [%r4137], %rs511;
st.shared.u8 [%r4135+1], %rs511;
st.shared.u8 [%r4137+1], %rs511;
st.shared.u8 [%r4135+2], %rs511;
st.shared.u8 [%r4137+2], %rs511;
st.shared.u8 [%r4135+3], %rs511;
st.shared.u8 [%r4137+3], %rs511;
add.s32 %r8426, %r8426, 4;
add.s32 %r8425, %r8425, -4;
setp.ne.s32 %p57, %r8425, 0;
@%p57 bra $L__BB1_45;
$L__BB1_46:
setp.eq.s32 %p58, %r8428, 0;
@%p58 bra $L__BB1_49;
mov.u32 %r4140, _ZZ32 j2k_htj2k_encode_codeblocksE14cleanup_cx_val;
$L__BB1_48:
.pragma "nounroll";
add.s32 %r4139, %r4095, %r8426;
mov.u16 %rs512, 0;
st.shared.u8 [%r4139], %rs512;
add.s32 %r4141, %r4140, %r8426;
st.shared.u8 [%r4141], %rs512;
add.s32 %r8426, %r8426, 1;
add.s32 %r8428, %r8428, -1;
setp.ne.s32 %p59, %r8428, 0;
@%p59 bra $L__BB1_48;
$L__BB1_49:
mov.u32 %r8723, 0;
mov.u32 %r8520, 8;
mov.u32 %r8724, 1;
mov.u32 %r8961, 4;
mov.u16 %rs1165, 15;
mov.u16 %rs1096, 0;
mov.u32 %r8725, %r8723;
mov.u32 %r8726, %r8723;
mov.u32 %r8514, %r8723;
mov.u32 %r8959, %r8723;
mov.u32 %r8960, %r8724;
mov.u32 %r8962, %r8724;
mov.u32 %r9176, %r8723;
mov.u32 %r9147, %r8723;
mov.u32 %r9148, %r8723;
mov.u32 %r9149, %r8520;
mov.u32 %r9150, %r8723;
@%p10 bra $L__BB1_417;
ld.param.u64 %rd1419, [ j2k_htj2k_encode_codeblocks_param_5];
ld.param.u64 %rd1411, [ j2k_htj2k_encode_codeblocks_param_3];
mov.u32 %r4175, 31;
sub.s32 %r57, %r4175, %r2;
cvta.to.global.u64 %rd8, %rd1411;
cvta.to.global.u64 %rd9, %rd1419;
mov.u32 %r4174, 0;
mov.u32 %r8962, 1;
mov.u16 %rs1096, 0;
mov.u32 %r9149, 8;
mov.u16 %rs1165, 15;
mov.u32 %r8961, 4;
mov.u32 %r8429, %r4174;
mov.u32 %r8430, %r4174;
mov.u32 %r8431, %r4174;
mov.u32 %r9150, %r4174;
mov.u32 %r9148, %r4174;
mov.u32 %r9147, %r4174;
mov.u32 %r9176, %r4174;
mov.u32 %r8960, %r8962;
mov.u32 %r8959, %r4174;
mov.u32 %r8514, %r4174;
mov.u32 %r8520, %r9149;
mov.u32 %r8726, %r4174;
mov.u32 %r8725, %r4174;
mov.u32 %r8724, %r8962;
mov.u32 %r8723, %r4174;
bra.uni $L__BB1_51;
$L__BB1_40:
mov.u32 %r4120, 2;
st.global.u32 [%rd6], %r4120;
mov.u32 %r4121, 6;
st.global.u32 [%rd6+4], %r4121;
mov.u32 %r4122, 0;
st.global.u32 [%rd6+8], %r4122;
st.global.u32 [%rd6+12], %r4122;
st.global.u32 [%rd6+16], %r4122;
st.global.u32 [%rd6+20], %r4122;
st.global.u32 [%rd6+24], %r4122;
st.global.u32 [%rd6+28], %r4122;
bra.uni $L__BB1_1905;
$L__BB1_34:
mov.u32 %r4111, 2;
st.global.u32 [%rd6], %r4111;
mov.u32 %r4112, 6;
st.global.u32 [%rd6+4], %r4112;
mov.u32 %r4113, 0;
st.global.u32 [%rd6+8], %r4113;
st.global.u32 [%rd6+12], %r4113;
st.global.u32 [%rd6+16], %r4113;
st.global.u32 [%rd6+20], %r4113;
st.global.u32 [%rd6+24], %r4113;
st.global.u32 [%rd6+28], %r4113;
bra.uni $L__BB1_1905;
$L__BB1_256:
setp.gt.u32 %p287, %r8514, 191;
mov.u32 %r8520, 0;
mov.u32 %r8719, 1;
@%p287 bra $L__BB1_258;
and.b16 %rs599, %rs1096, 255;
st.global.u8 [%rd13], %rs1096;
add.s32 %r8514, %r8514, 1;
setp.eq.s16 %p288, %rs599, 255;
selp.b32 %r8520, 7, 8, %p288;
mov.u16 %rs1096, 0;
mov.u32 %r8719, %r8723;
bra.uni $L__BB1_258;
$L__BB1_51:
cvt.u64.u32 %rd107, %r8430;
add.s64 %rd108, %rd107, %rd5;
shl.b64 %rd109, %rd108, 2;
add.s64 %rd110, %rd3, %rd109;
ld.global.u32 %r76, [%rd110];
setp.eq.s32 %p61, %r76, 0;
mov.u32 %r8447, %r4174;
@%p61 bra $L__BB1_53;
and.b32 %r4177, %r76, -2147483648;
abs.s32 %r4178, %r76;
shl.b32 %r4179, %r4178, %r57;
or.b32 %r8447, %r4179, %r4177;
$L__BB1_53:
shl.b32 %r4183, %r8447, 1;
shr.u32 %r4184, %r4183, %r43;
and.b32 %r79, %r4184, -2;
setp.eq.s32 %p62, %r79, 0;
mov.u32 %r8451, 0;
mov.u32 %r8448, %r8451;
mov.u32 %r8449, %r8451;
mov.u32 %r8455, %r8451;
@%p62 bra $L__BB1_55;
add.s32 %r4186, %r79, -1;
clz.b32 %r4187, %r4186;
mov.u32 %r4188, 32;
sub.s32 %r8448, %r4188, %r4187;
shr.u32 %r4189, %r8447, 31;
add.s32 %r4190, %r4189, %r79;
add.s32 %r8449, %r4190, -2;
mov.u32 %r8455, 1;
$L__BB1_55:
setp.lt.u32 %p63, %r6, 2;
@%p63 bra $L__BB1_58;
add.s32 %r4193, %r8430, %r1;
cvt.u64.u32 %rd111, %r4193;
add.s64 %rd112, %rd111, %rd5;
shl.b64 %rd113, %rd112, 2;
add.s64 %rd114, %rd3, %rd113;
ld.global.u32 %r85, [%rd114];
setp.eq.s32 %p64, %r85, 0;
@%p64 bra $L__BB1_58;
and.b32 %r4194, %r85, -2147483648;
abs.s32 %r4195, %r85;
shl.b32 %r4196, %r4195, %r57;
or.b32 %r8451, %r4196, %r4194;
$L__BB1_58:
shl.b32 %r4199, %r8451, 1;
shr.u32 %r4200, %r4199, %r43;
and.b32 %r88, %r4200, -2;
setp.eq.s32 %p65, %r88, 0;
mov.u32 %r8466, 0;
mov.u32 %r8452, %r8466;
mov.u32 %r8453, %r8466;
mov.u32 %r8470, %r8448;
@%p65 bra $L__BB1_60;
or.b32 %r8455, %r8455, 2;
add.s32 %r4201, %r88, -1;
clz.b32 %r4202, %r4201;
mov.u32 %r4203, 32;
sub.s32 %r8452, %r4203, %r4202;
max.s32 %r8470, %r8448, %r8452;
shr.u32 %r4204, %r8451, 31;
add.s32 %r4205, %r4204, %r88;
add.s32 %r8453, %r4205, -2;
$L__BB1_60:
add.s32 %r8472, %r8430, 1;
add.s32 %r4210, %r8429, 1;
setp.ge.u32 %p66, %r4210, %r5;
mov.u32 %r8467, %r8466;
mov.u32 %r8468, %r8466;
mov.u32 %r8469, %r8466;
@%p66 bra $L__BB1_71;
cvt.u64.u32 %rd115, %r8472;
add.s64 %rd116, %rd115, %rd5;
shl.b64 %rd117, %rd116, 2;
add.s64 %rd118, %rd3, %rd117;
ld.global.u32 %r98, [%rd118];
setp.eq.s32 %p67, %r98, 0;
mov.u32 %r8467, 0;
mov.u32 %r8456, %r8467;
@%p67 bra $L__BB1_63;
and.b32 %r4212, %r98, -2147483648;
abs.s32 %r4213, %r98;
shl.b32 %r4214, %r4213, %r57;
or.b32 %r8456, %r4214, %r4212;
$L__BB1_63:
shl.b32 %r4217, %r8456, 1;
shr.u32 %r4218, %r4217, %r43;
and.b32 %r101, %r4218, -2;
setp.eq.s32 %p68, %r101, 0;
mov.u32 %r8469, %r8467;
@%p68 bra $L__BB1_65;
or.b32 %r8455, %r8455, 4;
add.s32 %r4219, %r101, -1;
clz.b32 %r4220, %r4219;
mov.u32 %r4221, 32;
sub.s32 %r8467, %r4221, %r4220;
max.s32 %r8470, %r8470, %r8467;
shr.u32 %r4222, %r8456, 31;
add.s32 %r4223, %r4222, %r101;
add.s32 %r8469, %r4223, -2;
$L__BB1_65:
mov.u32 %r8466, 0;
mov.u32 %r8461, %r8466;
@%p63 bra $L__BB1_68;
add.s32 %r4226, %r8472, %r1;
cvt.u64.u32 %rd119, %r4226;
add.s64 %rd120, %rd119, %rd5;
shl.b64 %rd121, %rd120, 2;
add.s64 %rd122, %rd3, %rd121;
ld.global.u32 %r110, [%rd122];
setp.eq.s32 %p70, %r110, 0;
@%p70 bra $L__BB1_68;
and.b32 %r4227, %r110, -2147483648;
abs.s32 %r4228, %r110;
shl.b32 %r4229, %r4228, %r57;
or.b32 %r8461, %r4229, %r4227;
$L__BB1_68:
shl.b32 %r4232, %r8461, 1;
shr.u32 %r4233, %r4232, %r43;
and.b32 %r113, %r4233, -2;
setp.eq.s32 %p71, %r113, 0;
mov.u32 %r8468, %r8466;
@%p71 bra $L__BB1_70;
or.b32 %r8455, %r8455, 8;
add.s32 %r4234, %r113, -1;
clz.b32 %r4235, %r4234;
mov.u32 %r4236, 32;
sub.s32 %r8466, %r4236, %r4235;
max.s32 %r8470, %r8470, %r8466;
shr.u32 %r4237, %r8461, 31;
add.s32 %r4238, %r4237, %r113;
add.s32 %r8468, %r4238, -2;
$L__BB1_70:
add.s32 %r8472, %r8430, 2;
$L__BB1_71:
mov.u32 %r8430, %r8472;
add.s32 %r4240, %r8470, -1;
setp.lt.s32 %p72, %r8470, 2;
setp.gt.s32 %p73, %r8470, 1;
selp.b32 %r130, %r4240, 0, %p73;
mov.u32 %r8473, 0;
@%p72 bra $L__BB1_73;
setp.eq.s32 %p74, %r8448, %r8470;
selp.u32 %r4241, 1, 0, %p74;
setp.eq.s32 %p75, %r8452, %r8470;
selp.u32 %r4242, -1, 0, %p75;
bfi.b32 %r4243, %r4242, %r4241, 1, 1;
setp.eq.s32 %p76, %r8467, %r8470;
selp.u16 %rs517, 1, 0, %p76;
mul.wide.u16 %r4244, %rs517, 4;
or.b32 %r4245, %r4243, %r4244;
setp.eq.s32 %p77, %r8466, %r8470;
selp.u16 %rs518, 1, 0, %p77;
mul.wide.u16 %r4246, %rs518, 8;
or.b32 %r8473, %r4245, %r4246;
$L__BB1_73:
shr.u32 %r4247, %r8429, 1;
add.s32 %r133, %r4095, %r4247;
ld.shared.u8 %rs519, [%r133];
cvt.u32.u16 %r4249, %rs519;
and.b32 %r4250, %r4249, 255;
and.b32 %r4251, %r8452, 255;
setp.lt.u32 %p78, %r4251, %r4250;
cvt.u16.u32 %rs520, %r8452;
selp.b16 %rs521, %rs519, %rs520, %p78;
st.shared.u8 [%r133], %rs521;
cvt.u16.u32 %rs3, %r8466;
st.shared.u8 [%r133+1], %rs3;
and.b32 %r134, %r8455, 2;
cvt.u16.u32 %rs522, %r134;
shr.u16 %rs523, %rs522, 1;
mov.u32 %r4252, _ZZ32 j2k_htj2k_encode_codeblocksE14cleanup_cx_val;
add.s32 %r135, %r4252, %r4247;
ld.shared.u8 %rs524, [%r135];
or.b16 %rs525, %rs524, %rs523;
st.shared.u8 [%r135], %rs525;
and.b32 %r136, %r8455, 8;
shr.u32 %r137, %r136, 3;
st.shared.u8 [%r135+1], %r137;
shl.b32 %r4253, %r8455, 4;
shl.b32 %r4254, %r8431, 8;
or.b32 %r4255, %r4253, %r4254;
or.b32 %r4256, %r4255, %r8473;
mul.wide.u32 %rd123, %r4256, 2;
add.s64 %rd124, %rd8, %rd123;
ld.global.u16 %rs4, [%rd124];
shr.u16 %rs526, %rs4, 4;
and.b16 %rs5, %rs526, 7;
setp.eq.s16 %p79, %rs5, 0;
mov.u32 %r8485, %r8959;
@%p79 bra $L__BB1_80;
cvt.u32.u16 %r8474, %rs5;
shr.u16 %rs527, %rs4, 8;
cvt.u32.u16 %r8475, %rs527;
$L__BB1_75:
mov.u32 %r140, %r8474;
setp.gt.u32 %p80, %r8962, 2879;
mov.u32 %r8485, 1;
@%p80 bra $L__BB1_80;
mov.u32 %r4258, 8;
sub.s32 %r4259, %r4258, %r8960;
sub.s32 %r4260, %r4259, %r8961;
min.u32 %r4261, %r4260, %r140;
setp.eq.s32 %p81, %r4261, 32;
mov.u32 %r4262, -1;
shl.b32 %r4263, %r4262, %r4261;
not.b32 %r4264, %r4263;
selp.b32 %r4265, -1, %r4264, %p81;
and.b32 %r4266, %r4265, %r8475;
shl.b32 %r4267, %r4266, %r8961;
cvt.u16.u32 %rs528, %r4267;
or.b16 %rs1165, %rs1165, %rs528;
add.s32 %r8961, %r4261, %r8961;
sub.s32 %r8474, %r140, %r4261;
shr.u32 %r8475, %r8475, %r4261;
setp.gt.u32 %p82, %r4260, %r140;
@%p82 bra $L__BB1_79;
setp.ne.s32 %p83, %r8960, 0;
mov.u32 %r8960, 0;
and.b16 %rs529, %rs1165, 255;
setp.ne.s16 %p84, %rs529, 127;
and.pred %p85, %p83, %p84;
@%p85 bra $L__BB1_79;
mov.u32 %r4270, 20548;
sub.s32 %r4271, %r4270, %r8962;
cvt.u64.u32 %rd125, %r4271;
add.s64 %rd126, %rd125, %rd4;
add.s64 %rd127, %rd1, %rd126;
st.global.u8 [%rd127], %rs1165;
add.s32 %r8962, %r8962, 1;
setp.gt.u16 %p86, %rs529, 143;
selp.u32 %r8960, 1, 0, %p86;
mov.u32 %r8961, 0;
mov.u16 %rs1165, 0;
$L__BB1_79:
setp.ne.s32 %p87, %r8474, 0;
mov.u32 %r8485, %r8959;
@%p87 bra $L__BB1_75;
$L__BB1_80:
setp.ne.s32 %p88, %r8431, 0;
@%p88 bra $L__BB1_128;
setp.eq.s32 %p89, %r8455, 0;
add.s32 %r4272, %r8514, 17477;
cvt.u64.u32 %rd128, %r4272;
add.s64 %rd129, %rd128, %rd4;
add.s64 %rd10, %rd1, %rd129;
@%p89 bra $L__BB1_120;
shl.b16 %rs1096, %rs1096, 1;
add.s32 %r8520, %r8520, -1;
setp.ne.s32 %p90, %r8520, 0;
mov.u32 %r8519, %r8723;
@%p90 bra $L__BB1_85;
bra.uni $L__BB1_83;
$L__BB1_85:
setp.lt.u32 %p92, %r8725, 3;
mov.u32 %r8489, 0;
@%p92 bra $L__BB1_88;
setp.lt.u32 %p93, %r8725, 6;
mov.u32 %r8489, 1;
@%p93 bra $L__BB1_88;
setp.lt.u32 %p94, %r8725, 9;
setp.eq.s32 %p95, %r8725, 11;
selp.b32 %r4278, 4, 5, %p95;
setp.lt.u32 %p96, %r8725, 11;
selp.b32 %r4279, 3, %r4278, %p96;
selp.b32 %r8489, 2, %r4279, %p94;
$L__BB1_88:
setp.eq.s32 %p97, %r8489, 0;
@%p97 bra $L__BB1_116;
add.s32 %r164, %r8489, -1;
and.b32 %r165, %r8489, 3;
setp.eq.s32 %p98, %r165, 0;
mov.u32 %r8499, %r8489;
mov.u32 %r8502, %r8519;
@%p98 bra $L__BB1_101;
mov.u32 %r4281, 1;
shl.b32 %r4282, %r4281, %r164;
and.b32 %r4283, %r4282, %r8726;
setp.ne.s32 %p99, %r4283, 0;
selp.u32 %r4284, 1, 0, %p99;
cvt.u32.u16 %r4285, %rs1096;
bfi.b32 %r4286, %r4285, %r4284, 1, 8;
cvt.u16.u32 %rs1096, %r4286;
add.s32 %r8520, %r8520, -1;
setp.ne.s32 %p100, %r8520, 0;
mov.u32 %r8502, %r8519;
@%p100 bra $L__BB1_93;
setp.gt.u32 %p101, %r8514, 191;
mov.u32 %r8520, 0;
mov.u32 %r8502, %r4281;
@%p101 bra $L__BB1_93;
add.s32 %r4290, %r8514, 17477;
cvt.u64.u32 %rd130, %r4290;
add.s64 %rd131, %rd130, %rd4;
add.s64 %rd132, %rd1, %rd131;
st.global.u8 [%rd132], %rs1096;
add.s32 %r8514, %r8514, 1;
mov.u32 %r8520, 8;
mov.u16 %rs1096, 0;
mov.u32 %r8502, %r8519;
$L__BB1_93:
setp.eq.s32 %p102, %r165, 1;
mov.u32 %r8519, %r8502;
mov.u32 %r8499, %r164;
@%p102 bra $L__BB1_101;
add.s32 %r8499, %r8489, -2;
mov.u32 %r4291, 1;
shl.b32 %r4292, %r4291, %r8499;
and.b32 %r4293, %r4292, %r8726;
setp.ne.s32 %p103, %r4293, 0;
selp.u32 %r4294, 1, 0, %p103;
cvt.u32.u16 %r4295, %rs1096;
bfi.b32 %r4296, %r4295, %r4294, 1, 8;
cvt.u16.u32 %rs1096, %r4296;
add.s32 %r8520, %r8520, -1;
setp.ne.s32 %p104, %r8520, 0;
mov.u32 %r8493, %r8502;
@%p104 bra $L__BB1_97;
setp.gt.u32 %p105, %r8514, 191;
mov.u32 %r8520, 0;
mov.u32 %r8493, %r4291;
@%p105 bra $L__BB1_97;
add.s32 %r4299, %r8514, 17477;
cvt.u64.u32 %rd133, %r4299;
add.s64 %rd134, %rd133, %rd4;
add.s64 %rd135, %rd1, %rd134;
and.b16 %rs536, %rs1096, 255;
st.global.u8 [%rd135], %rs1096;
add.s32 %r8514, %r8514, 1;
setp.eq.s16 %p106, %rs536, 255;
selp.b32 %r8520, 7, 8, %p106;
mov.u16 %rs1096, 0;
mov.u32 %r8493, %r8502;
$L__BB1_97:
setp.eq.s32 %p107, %r165, 2;
mov.u32 %r8519, %r8493;
mov.u32 %r8502, %r8493;
@%p107 bra $L__BB1_101;
add.s32 %r8499, %r8489, -3;
mov.u32 %r4300, 1;
shl.b32 %r4301, %r4300, %r8499;
and.b32 %r4302, %r4301, %r8726;
setp.ne.s32 %p108, %r4302, 0;
selp.u32 %r4303, 1, 0, %p108;
cvt.u32.u16 %r4304, %rs1096;
bfi.b32 %r4305, %r4304, %r4303, 1, 8;
cvt.u16.u32 %rs1096, %r4305;
add.s32 %r8520, %r8520, -1;
setp.ne.s32 %p109, %r8520, 0;
mov.u32 %r8519, %r8493;
mov.u32 %r8502, %r8493;
@%p109 bra $L__BB1_101;
setp.gt.u32 %p110, %r8514, 191;
mov.u32 %r8520, 0;
mov.u32 %r8519, %r4300;
mov.u32 %r8502, %r4300;
@%p110 bra $L__BB1_101;
add.s32 %r4310, %r8514, 17477;
cvt.u64.u32 %rd136, %r4310;
add.s64 %rd137, %rd136, %rd4;
add.s64 %rd138, %rd1, %rd137;
and.b16 %rs539, %rs1096, 255;
st.global.u8 [%rd138], %rs1096;
add.s32 %r8514, %r8514, 1;
setp.eq.s16 %p111, %rs539, 255;
selp.b32 %r8520, 7, 8, %p111;
mov.u16 %rs1096, 0;
mov.u32 %r8519, %r8493;
mov.u32 %r8502, %r8493;
$L__BB1_101:
setp.lt.u32 %p112, %r164, 3;
@%p112 bra $L__BB1_116;
mov.u32 %r8519, %r8502;
$L__BB1_103:
add.s32 %r4311, %r8499, -1;
mov.u32 %r4312, 1;
shl.b32 %r4313, %r4312, %r4311;
and.b32 %r4314, %r4313, %r8726;
setp.ne.s32 %p113, %r4314, 0;
selp.u32 %r4315, 1, 0, %p113;
cvt.u32.u16 %r4316, %rs1096;
bfi.b32 %r8508, %r4316, %r4315, 1, 8;
add.s32 %r8509, %r8520, -1;
setp.ne.s32 %p114, %r8509, 0;
mov.u32 %r8507, %r8519;
@%p114 bra $L__BB1_106;
setp.gt.u32 %p115, %r8514, 191;
mov.u32 %r8509, 0;
mov.u32 %r8507, %r4312;
@%p115 bra $L__BB1_106;
cvt.u16.u32 %rs540, %r8508;
and.b16 %rs541, %rs540, 255;
add.s32 %r4320, %r8514, 17477;
cvt.u64.u32 %rd139, %r4320;
add.s64 %rd140, %rd139, %rd4;
add.s64 %rd141, %rd1, %rd140;
st.global.u8 [%rd141], %rs540;
add.s32 %r8514, %r8514, 1;
setp.eq.s16 %p116, %rs541, 255;
selp.b32 %r8509, 7, 8, %p116;
mov.u32 %r8508, 0;
mov.u32 %r8507, %r8519;
$L__BB1_106:
add.s32 %r4321, %r8499, -2;
shl.b32 %r4323, %r4312, %r4321;
and.b32 %r4324, %r4323, %r8726;
setp.ne.s32 %p117, %r4324, 0;
and.b32 %r4325, %r8508, 127;
selp.u32 %r4326, 1, 0, %p117;
bfi.b32 %r8512, %r4325, %r4326, 1, 7;
add.s32 %r8513, %r8509, -1;
setp.ne.s32 %p118, %r8513, 0;
mov.u32 %r8511, %r8507;
@%p118 bra $L__BB1_109;
setp.gt.u32 %p119, %r8514, 191;
mov.u32 %r8513, 0;
mov.u32 %r8511, 1;
@%p119 bra $L__BB1_109;
cvt.u16.u32 %rs542, %r8512;
and.b16 %rs543, %rs542, 255;
add.s32 %r4330, %r8514, 17477;
cvt.u64.u32 %rd142, %r4330;
add.s64 %rd143, %rd142, %rd4;
add.s64 %rd144, %rd1, %rd143;
st.global.u8 [%rd144], %rs542;
add.s32 %r8514, %r8514, 1;
setp.eq.s16 %p120, %rs543, 255;
selp.b32 %r8513, 7, 8, %p120;
mov.u32 %r8512, 0;
mov.u32 %r8511, %r8507;
$L__BB1_109:
add.s32 %r4331, %r8499, -3;
mov.u32 %r4332, 1;
shl.b32 %r4333, %r4332, %r4331;
and.b32 %r4334, %r4333, %r8726;
setp.ne.s32 %p121, %r4334, 0;
and.b32 %r4335, %r8512, 127;
selp.u32 %r4336, 1, 0, %p121;
bfi.b32 %r8516, %r4335, %r4336, 1, 7;
add.s32 %r8517, %r8513, -1;
setp.ne.s32 %p122, %r8517, 0;
mov.u32 %r8515, %r8511;
@%p122 bra $L__BB1_112;
setp.gt.u32 %p123, %r8514, 191;
mov.u32 %r8517, 0;
mov.u32 %r8515, %r4332;
@%p123 bra $L__BB1_112;
cvt.u16.u32 %rs544, %r8516;
and.b16 %rs545, %rs544, 255;
add.s32 %r4340, %r8514, 17477;
cvt.u64.u32 %rd145, %r4340;
add.s64 %rd146, %rd145, %rd4;
add.s64 %rd147, %rd1, %rd146;
st.global.u8 [%rd147], %rs544;
add.s32 %r8514, %r8514, 1;
setp.eq.s16 %p124, %rs545, 255;
selp.b32 %r8517, 7, 8, %p124;
mov.u32 %r8516, 0;
mov.u32 %r8515, %r8511;
$L__BB1_112:
add.s32 %r8499, %r8499, -4;
shl.b32 %r4342, %r4332, %r8499;
and.b32 %r4343, %r4342, %r8726;
setp.ne.s32 %p125, %r4343, 0;
and.b32 %r4344, %r8516, 127;
selp.u32 %r4345, 1, 0, %p125;
bfi.b32 %r4346, %r4344, %r4345, 1, 15;
cvt.u16.u32 %rs1096, %r4346;
add.s32 %r8520, %r8517, -1;
setp.ne.s32 %p126, %r8520, 0;
mov.u32 %r8519, %r8515;
@%p126 bra $L__BB1_115;
setp.gt.u32 %p127, %r8514, 191;
mov.u32 %r8520, 0;
mov.u32 %r8519, 1;
@%p127 bra $L__BB1_115;
add.s32 %r4349, %r8514, 17477;
cvt.u64.u32 %rd148, %r4349;
add.s64 %rd149, %rd148, %rd4;
add.s64 %rd150, %rd1, %rd149;
and.b16 %rs547, %rs1096, 255;
st.global.u8 [%rd150], %rs1096;
add.s32 %r8514, %r8514, 1;
setp.eq.s16 %p128, %rs547, 255;
selp.b32 %r8520, 7, 8, %p128;
mov.u16 %rs1096, 0;
mov.u32 %r8519, %r8515;
$L__BB1_115:
setp.ne.s32 %p129, %r8499, 0;
@%p129 bra $L__BB1_103;
$L__BB1_116:
add.s32 %r4351, %r8725, -1;
setp.eq.s32 %p130, %r8725, 0;
mov.u32 %r8726, 0;
selp.b32 %r8725, 0, %r4351, %p130;
setp.lt.u32 %p131, %r8725, 3;
mov.u32 %r8525, %r8726;
@%p131 bra $L__BB1_119;
setp.lt.u32 %p132, %r8725, 6;
mov.u32 %r8525, 1;
@%p132 bra $L__BB1_119;
setp.lt.u32 %p133, %r8725, 9;
setp.eq.s32 %p134, %r8725, 11;
selp.b32 %r4353, 4, 5, %p134;
setp.lt.u32 %p135, %r8725, 11;
selp.b32 %r4354, 3, %r4353, %p135;
selp.b32 %r8525, 2, %r4354, %p133;
$L__BB1_119:
mov.u32 %r4356, 1;
shl.b32 %r8724, %r4356, %r8525;
mov.u32 %r8723, %r8519;
bra.uni $L__BB1_128;
$L__BB1_120:
add.s32 %r8726, %r8726, 1;
setp.lt.u32 %p136, %r8726, %r8724;
@%p136 bra $L__BB1_128;
shl.b16 %rs548, %rs1096, 1;
or.b16 %rs1096, %rs548, 1;
add.s32 %r8520, %r8520, -1;
setp.ne.s32 %p137, %r8520, 0;
mov.u32 %r8526, %r8723;
@%p137 bra $L__BB1_124;
setp.gt.u32 %p138, %r8514, 191;
mov.u32 %r8520, 0;
mov.u32 %r8526, 1;
@%p138 bra $L__BB1_124;
and.b16 %rs550, %rs1096, 255;
st.global.u8 [%rd10], %rs1096;
add.s32 %r8514, %r8514, 1;
setp.eq.s16 %p139, %rs550, 255;
selp.b32 %r8520, 7, 8, %p139;
mov.u16 %rs1096, 0;
mov.u32 %r8526, %r8723;
$L__BB1_124:
add.s32 %r4360, %r8725, 1;
min.u32 %r8725, %r4360, 12;
setp.lt.u32 %p140, %r8725, 3;
mov.u32 %r8726, 0;
mov.u32 %r8529, %r8726;
@%p140 bra $L__BB1_127;
setp.lt.u32 %p141, %r8725, 6;
mov.u32 %r8529, 1;
@%p141 bra $L__BB1_127;
setp.lt.u32 %p142, %r8725, 9;
setp.eq.s32 %p143, %r8725, 11;
selp.b32 %r4362, 4, 5, %p143;
setp.lt.u32 %p144, %r8725, 11;
selp.b32 %r4363, 3, %r4362, %p144;
selp.b32 %r8529, 2, %r4363, %p142;
$L__BB1_127:
mov.u32 %r4365, 1;
shl.b32 %r8724, %r4365, %r8529;
mov.u32 %r8723, %r8526;
$L__BB1_128:
max.s32 %r248, %r8470, 1;
and.b16 %rs551, %rs4, 15;
cvt.u32.u16 %r249, %rs551;
and.b32 %r250, %r8455, 1;
setp.eq.s32 %p145, %r250, 0;
mov.u32 %r8546, %r9176;
@%p145 bra $L__BB1_135;
and.b32 %r4366, %r249, 1;
sub.s32 %r8536, %r248, %r4366;
setp.eq.s32 %p146, %r8536, 0;
mov.u32 %r8546, %r9176;
@%p146 bra $L__BB1_135;
mov.u32 %r4367, -1;
shl.b32 %r4368, %r4367, %r8536;
not.b32 %r4369, %r4368;
and.b32 %r8537, %r8449, %r4369;
$L__BB1_131:
setp.gt.u32 %p147, %r9150, 17476;
mov.u32 %r8546, 1;
@%p147 bra $L__BB1_135;
sub.s32 %r4371, %r9149, %r9148;
min.u32 %r4372, %r4371, %r8536;
setp.eq.s32 %p148, %r4372, 32;
mov.u32 %r4373, -1;
shl.b32 %r4374, %r4373, %r4372;
not.b32 %r4375, %r4374;
selp.b32 %r4376, -1, %r4375, %p148;
and.b32 %r4377, %r4376, %r8537;
shl.b32 %r4378, %r4377, %r9148;
or.b32 %r9147, %r4378, %r9147;
add.s32 %r9148, %r4372, %r9148;
shr.u32 %r8537, %r8537, %r4372;
sub.s32 %r8536, %r8536, %r4372;
setp.lt.u32 %p149, %r9148, %r9149;
@%p149 bra $L__BB1_134;
cvt.u64.u32 %rd151, %r9150;
add.s64 %rd152, %rd151, %rd4;
add.s64 %rd153, %rd1, %rd152;
st.global.u8 [%rd153], %r9147;
add.s32 %r9150, %r9150, 1;
setp.eq.s32 %p150, %r9147, 255;
selp.b32 %r9149, 7, 8, %p150;
mov.u32 %r9147, 0;
mov.u32 %r9148, %r9147;
$L__BB1_134:
setp.ne.s32 %p151, %r8536, 0;
mov.u32 %r8546, %r9176;
@%p151 bra $L__BB1_131;
$L__BB1_135:
setp.eq.s32 %p152, %r134, 0;
mov.u32 %r8561, %r8546;
@%p152 bra $L__BB1_142;
shr.u32 %r4381, %r249, 1;
and.b32 %r4382, %r4381, 1;
sub.s32 %r8551, %r248, %r4382;
setp.eq.s32 %p153, %r8551, 0;
mov.u32 %r8561, %r8546;
@%p153 bra $L__BB1_142;
mov.u32 %r4383, -1;
shl.b32 %r4384, %r4383, %r8551;
not.b32 %r4385, %r4384;
and.b32 %r8552, %r8453, %r4385;
$L__BB1_138:
setp.gt.u32 %p154, %r9150, 17476;
mov.u32 %r8561, 1;
@%p154 bra $L__BB1_142;
sub.s32 %r4387, %r9149, %r9148;
min.u32 %r4388, %r4387, %r8551;
setp.eq.s32 %p155, %r4388, 32;
mov.u32 %r4389, -1;
shl.b32 %r4390, %r4389, %r4388;
not.b32 %r4391, %r4390;
selp.b32 %r4392, -1, %r4391, %p155;
and.b32 %r4393, %r4392, %r8552;
shl.b32 %r4394, %r4393, %r9148;
or.b32 %r9147, %r4394, %r9147;
add.s32 %r9148, %r4388, %r9148;
shr.u32 %r8552, %r8552, %r4388;
sub.s32 %r8551, %r8551, %r4388;
setp.lt.u32 %p156, %r9148, %r9149;
@%p156 bra $L__BB1_141;
cvt.u64.u32 %rd154, %r9150;
add.s64 %rd155, %rd154, %rd4;
add.s64 %rd156, %rd1, %rd155;
st.global.u8 [%rd156], %r9147;
add.s32 %r9150, %r9150, 1;
setp.eq.s32 %p157, %r9147, 255;
selp.b32 %r9149, 7, 8, %p157;
mov.u32 %r9147, 0;
mov.u32 %r9148, %r9147;
$L__BB1_141:
setp.ne.s32 %p158, %r8551, 0;
mov.u32 %r8561, %r8546;
@%p158 bra $L__BB1_138;
$L__BB1_142:
and.b32 %r4397, %r8455, 4;
setp.eq.s32 %p159, %r4397, 0;
mov.u32 %r8576, %r8561;
@%p159 bra $L__BB1_149;
shr.u32 %r4398, %r249, 2;
and.b32 %r4399, %r4398, 1;
sub.s32 %r8566, %r248, %r4399;
setp.eq.s32 %p160, %r8566, 0;
mov.u32 %r8576, %r8561;
@%p160 bra $L__BB1_149;
mov.u32 %r4400, -1;
shl.b32 %r4401, %r4400, %r8566;
not.b32 %r4402, %r4401;
and.b32 %r8567, %r8469, %r4402;
$L__BB1_145:
setp.gt.u32 %p161, %r9150, 17476;
mov.u32 %r8576, 1;
@%p161 bra $L__BB1_149;
sub.s32 %r4404, %r9149, %r9148;
min.u32 %r4405, %r4404, %r8566;
setp.eq.s32 %p162, %r4405, 32;
mov.u32 %r4406, -1;
shl.b32 %r4407, %r4406, %r4405;
not.b32 %r4408, %r4407;
selp.b32 %r4409, -1, %r4408, %p162;
and.b32 %r4410, %r4409, %r8567;
shl.b32 %r4411, %r4410, %r9148;
or.b32 %r9147, %r4411, %r9147;
add.s32 %r9148, %r4405, %r9148;
shr.u32 %r8567, %r8567, %r4405;
sub.s32 %r8566, %r8566, %r4405;
setp.lt.u32 %p163, %r9148, %r9149;
@%p163 bra $L__BB1_148;
cvt.u64.u32 %rd157, %r9150;
add.s64 %rd158, %rd157, %rd4;
add.s64 %rd159, %rd1, %rd158;
st.global.u8 [%rd159], %r9147;
add.s32 %r9150, %r9150, 1;
setp.eq.s32 %p164, %r9147, 255;
selp.b32 %r9149, 7, 8, %p164;
mov.u32 %r9147, 0;
mov.u32 %r9148, %r9147;
$L__BB1_148:
setp.ne.s32 %p165, %r8566, 0;
mov.u32 %r8576, %r8561;
@%p165 bra $L__BB1_145;
$L__BB1_149:
setp.eq.s32 %p166, %r136, 0;
mov.u32 %r9176, %r8576;
@%p166 bra $L__BB1_156;
shr.u32 %r4414, %r249, 3;
sub.s32 %r8581, %r248, %r4414;
setp.eq.s32 %p167, %r8581, 0;
mov.u32 %r9176, %r8576;
@%p167 bra $L__BB1_156;
mov.u32 %r4415, -1;
shl.b32 %r4416, %r4415, %r8581;
not.b32 %r4417, %r4416;
and.b32 %r8582, %r8468, %r4417;
$L__BB1_152:
setp.gt.u32 %p168, %r9150, 17476;
mov.u32 %r9176, 1;
@%p168 bra $L__BB1_156;
sub.s32 %r4419, %r9149, %r9148;
min.u32 %r4420, %r4419, %r8581;
setp.eq.s32 %p169, %r4420, 32;
mov.u32 %r4421, -1;
shl.b32 %r4422, %r4421, %r4420;
not.b32 %r4423, %r4422;
selp.b32 %r4424, -1, %r4423, %p169;
and.b32 %r4425, %r4424, %r8582;
shl.b32 %r4426, %r4425, %r9148;
or.b32 %r9147, %r4426, %r9147;
add.s32 %r9148, %r4420, %r9148;
shr.u32 %r8582, %r8582, %r4420;
sub.s32 %r8581, %r8581, %r4420;
setp.lt.u32 %p170, %r9148, %r9149;
@%p170 bra $L__BB1_155;
cvt.u64.u32 %rd160, %r9150;
add.s64 %rd161, %rd160, %rd4;
add.s64 %rd162, %rd1, %rd161;
st.global.u8 [%rd162], %r9147;
add.s32 %r9150, %r9150, 1;
setp.eq.s32 %p171, %r9147, 255;
selp.b32 %r9149, 7, 8, %p171;
mov.u32 %r9147, 0;
mov.u32 %r9148, %r9147;
$L__BB1_155:
setp.ne.s32 %p172, %r8581, 0;
mov.u32 %r9176, %r8576;
@%p172 bra $L__BB1_152;
$L__BB1_156:
add.s32 %r4429, %r8429, 2;
setp.lt.u32 %p173, %r4429, %r5;
mul.lo.s32 %r343, %r130, 6;
cvt.u64.u32 %rd163, %r343;
add.s64 %rd11, %rd9, %rd163;
add.s32 %r4430, %r343, 2;
cvt.u64.u32 %rd164, %r4430;
add.s64 %rd12, %rd9, %rd164;
@%p173 bra $L__BB1_185;
bra.uni $L__BB1_157;
$L__BB1_185:
cvt.u64.u32 %rd178, %r8430;
add.s64 %rd179, %rd178, %rd5;
shl.b64 %rd180, %rd179, 2;
add.s64 %rd181, %rd3, %rd180;
ld.global.u32 %r416, [%rd181];
setp.eq.s32 %p210, %r416, 0;
mov.u32 %r8641, 0;
mov.u32 %r8640, %r8641;
@%p210 bra $L__BB1_187;
and.b32 %r4501, %r416, -2147483648;
abs.s32 %r4502, %r416;
shl.b32 %r4503, %r4502, %r57;
or.b32 %r8640, %r4503, %r4501;
$L__BB1_187:
shl.b32 %r4507, %r8640, 1;
shr.u32 %r4508, %r4507, %r43;
and.b32 %r419, %r4508, -2;
setp.eq.s32 %p211, %r419, 0;
mov.u32 %r8642, %r8641;
mov.u32 %r8648, %r8641;
@%p211 bra $L__BB1_189;
add.s32 %r4510, %r419, -1;
clz.b32 %r4511, %r4510;
mov.u32 %r4512, 32;
sub.s32 %r8641, %r4512, %r4511;
shr.u32 %r4513, %r8640, 31;
add.s32 %r4514, %r4513, %r419;
add.s32 %r8642, %r4514, -2;
mov.u32 %r8648, 1;
$L__BB1_189:
mov.u32 %r8645, 0;
mov.u32 %r8644, %r8645;
@%p63 bra $L__BB1_192;
add.s32 %r4517, %r8430, %r1;
cvt.u64.u32 %rd182, %r4517;
add.s64 %rd183, %rd182, %rd5;
shl.b64 %rd184, %rd183, 2;
add.s64 %rd185, %rd3, %rd184;
ld.global.u32 %r425, [%rd185];
setp.eq.s32 %p213, %r425, 0;
@%p213 bra $L__BB1_192;
and.b32 %r4518, %r425, -2147483648;
abs.s32 %r4519, %r425;
shl.b32 %r4520, %r4519, %r57;
or.b32 %r8644, %r4520, %r4518;
$L__BB1_192:
shl.b32 %r4523, %r8644, 1;
shr.u32 %r4524, %r4523, %r43;
and.b32 %r428, %r4524, -2;
setp.eq.s32 %p214, %r428, 0;
mov.u32 %r8646, %r8645;
mov.u32 %r8663, %r8641;
@%p214 bra $L__BB1_194;
or.b32 %r8648, %r8648, 2;
add.s32 %r4525, %r428, -1;
clz.b32 %r4526, %r4525;
mov.u32 %r4527, 32;
sub.s32 %r8645, %r4527, %r4526;
max.s32 %r8663, %r8641, %r8645;
shr.u32 %r4528, %r8644, 31;
add.s32 %r4529, %r4528, %r428;
add.s32 %r8646, %r4529, -2;
$L__BB1_194:
add.s32 %r8665, %r8430, 1;
add.s32 %r4534, %r8429, 3;
setp.ge.u32 %p215, %r4534, %r5;
mov.u32 %r8666, 0;
mov.u32 %r8659, %r8666;
mov.u32 %r8660, %r8666;
mov.u32 %r8661, %r8666;
mov.u32 %r8662, %r8666;
@%p215 bra $L__BB1_205;
cvt.u64.u32 %rd186, %r8665;
add.s64 %rd187, %rd186, %rd5;
shl.b64 %rd188, %rd187, 2;
add.s64 %rd189, %rd3, %rd188;
ld.global.u32 %r438, [%rd189];
setp.eq.s32 %p216, %r438, 0;
mov.u32 %r8660, 0;
mov.u32 %r8649, %r8660;
@%p216 bra $L__BB1_197;
and.b32 %r4536, %r438, -2147483648;
abs.s32 %r4537, %r438;
shl.b32 %r4538, %r4537, %r57;
or.b32 %r8649, %r4538, %r4536;
$L__BB1_197:
shl.b32 %r4541, %r8649, 1;
shr.u32 %r4542, %r4541, %r43;
and.b32 %r441, %r4542, -2;
setp.eq.s32 %p217, %r441, 0;
mov.u32 %r8662, %r8660;
@%p217 bra $L__BB1_199;
or.b32 %r8648, %r8648, 4;
add.s32 %r4543, %r441, -1;
clz.b32 %r4544, %r4543;
mov.u32 %r4545, 32;
sub.s32 %r8660, %r4545, %r4544;
max.s32 %r8663, %r8663, %r8660;
shr.u32 %r4546, %r8649, 31;
add.s32 %r4547, %r4546, %r441;
add.s32 %r8662, %r4547, -2;
$L__BB1_199:
mov.u32 %r8659, 0;
mov.u32 %r8654, %r8659;
@%p63 bra $L__BB1_202;
add.s32 %r4550, %r8665, %r1;
cvt.u64.u32 %rd190, %r4550;
add.s64 %rd191, %rd190, %rd5;
shl.b64 %rd192, %rd191, 2;
add.s64 %rd193, %rd3, %rd192;
ld.global.u32 %r450, [%rd193];
setp.eq.s32 %p219, %r450, 0;
@%p219 bra $L__BB1_202;
and.b32 %r4551, %r450, -2147483648;
abs.s32 %r4552, %r450;
shl.b32 %r4553, %r4552, %r57;
or.b32 %r8654, %r4553, %r4551;
$L__BB1_202:
shl.b32 %r4556, %r8654, 1;
shr.u32 %r4557, %r4556, %r43;
and.b32 %r453, %r4557, -2;
setp.eq.s32 %p220, %r453, 0;
mov.u32 %r8661, %r8659;
@%p220 bra $L__BB1_204;
or.b32 %r8648, %r8648, 8;
add.s32 %r4558, %r453, -1;
clz.b32 %r4559, %r4558;
mov.u32 %r4560, 32;
sub.s32 %r8659, %r4560, %r4559;
max.s32 %r8663, %r8663, %r8659;
shr.u32 %r4561, %r8654, 31;
add.s32 %r4562, %r4561, %r453;
add.s32 %r8661, %r4562, -2;
$L__BB1_204:
add.s32 %r8665, %r8430, 2;
$L__BB1_205:
mov.u32 %r8430, %r8665;
shr.u32 %r4564, %r8455, 1;
or.b32 %r470, %r4564, %r250;
add.s32 %r4565, %r8663, -1;
setp.lt.s32 %p221, %r8663, 2;
setp.gt.s32 %p222, %r8663, 1;
selp.b32 %r471, %r4565, 0, %p222;
@%p221 bra $L__BB1_207;
setp.eq.s32 %p223, %r8641, %r8663;
selp.u32 %r4566, 1, 0, %p223;
setp.eq.s32 %p224, %r8645, %r8663;
selp.u32 %r4567, -1, 0, %p224;
bfi.b32 %r4568, %r4567, %r4566, 1, 1;
setp.eq.s32 %p225, %r8660, %r8663;
selp.u16 %rs571, 1, 0, %p225;
mul.wide.u16 %r4569, %rs571, 4;
or.b32 %r4570, %r4568, %r4569;
setp.eq.s32 %p226, %r8659, %r8663;
selp.u16 %rs572, 1, 0, %p226;
mul.wide.u16 %r4571, %rs572, 8;
or.b32 %r8666, %r4570, %r4571;
$L__BB1_207:
and.b32 %r4572, %r8645, 255;
and.b32 %r4573, %r8466, 255;
setp.lt.u32 %p227, %r4572, %r4573;
cvt.u16.u32 %rs573, %r8645;
selp.b16 %rs574, %rs3, %rs573, %p227;
st.shared.u8 [%r133+1], %rs574;
st.shared.u8 [%r133+2], %r8659;
and.b32 %r474, %r8648, 2;
shr.u32 %r4574, %r474, 1;
or.b32 %r4575, %r137, %r4574;
st.shared.u8 [%r135+1], %r4575;
and.b32 %r475, %r8648, 8;
shr.u32 %r4576, %r475, 3;
st.shared.u8 [%r135+2], %r4576;
shl.b32 %r4577, %r8648, 4;
shl.b32 %r4578, %r470, 8;
or.b32 %r4579, %r4577, %r4578;
or.b32 %r4580, %r4579, %r8666;
mul.wide.u32 %rd195, %r4580, 2;
add.s64 %rd196, %rd8, %rd195;
ld.global.u16 %rs48, [%rd196];
shr.u16 %rs575, %rs48, 4;
and.b16 %rs49, %rs575, 7;
setp.eq.s16 %p228, %rs49, 0;
mov.u32 %r8678, %r8485;
@%p228 bra $L__BB1_214;
cvt.u32.u16 %r8667, %rs49;
shr.u16 %rs576, %rs48, 8;
cvt.u32.u16 %r8668, %rs576;
$L__BB1_209:
mov.u32 %r478, %r8667;
setp.gt.u32 %p229, %r8962, 2879;
mov.u32 %r8678, 1;
@%p229 bra $L__BB1_214;
mov.u32 %r4582, 8;
sub.s32 %r4583, %r4582, %r8960;
sub.s32 %r4584, %r4583, %r8961;
min.u32 %r4585, %r4584, %r478;
setp.eq.s32 %p230, %r4585, 32;
mov.u32 %r4586, -1;
shl.b32 %r4587, %r4586, %r4585;
not.b32 %r4588, %r4587;
selp.b32 %r4589, -1, %r4588, %p230;
and.b32 %r4590, %r4589, %r8668;
shl.b32 %r4591, %r4590, %r8961;
cvt.u16.u32 %rs577, %r4591;
or.b16 %rs1165, %rs1165, %rs577;
add.s32 %r8961, %r4585, %r8961;
sub.s32 %r8667, %r478, %r4585;
shr.u32 %r8668, %r8668, %r4585;
setp.gt.u32 %p231, %r4584, %r478;
@%p231 bra $L__BB1_213;
setp.ne.s32 %p232, %r8960, 0;
mov.u32 %r8960, 0;
and.b16 %rs578, %rs1165, 255;
setp.ne.s16 %p233, %rs578, 127;
and.pred %p234, %p232, %p233;
@%p234 bra $L__BB1_213;
mov.u32 %r4594, 20548;
sub.s32 %r4595, %r4594, %r8962;
cvt.u64.u32 %rd197, %r4595;
add.s64 %rd198, %rd197, %rd4;
add.s64 %rd199, %rd1, %rd198;
st.global.u8 [%rd199], %rs1165;
add.s32 %r8962, %r8962, 1;
setp.gt.u16 %p235, %rs578, 143;
selp.u32 %r8960, 1, 0, %p235;
mov.u32 %r8961, 0;
mov.u16 %rs1165, 0;
$L__BB1_213:
setp.ne.s32 %p236, %r8667, 0;
mov.u32 %r8678, %r8485;
@%p236 bra $L__BB1_209;
$L__BB1_214:
setp.ne.s32 %p237, %r470, 0;
@%p237 bra $L__BB1_262;
setp.eq.s32 %p238, %r8648, 0;
add.s32 %r4596, %r8514, 17477;
cvt.u64.u32 %rd200, %r4596;
add.s64 %rd201, %rd200, %rd4;
add.s64 %rd13, %rd1, %rd201;
@%p238 bra $L__BB1_254;
shl.b16 %rs1096, %rs1096, 1;
add.s32 %r8520, %r8520, -1;
setp.ne.s32 %p239, %r8520, 0;
mov.u32 %r8712, %r8723;
@%p239 bra $L__BB1_219;
setp.gt.u32 %p240, %r8514, 191;
mov.u32 %r8520, 0;
mov.u32 %r8712, 1;
@%p240 bra $L__BB1_219;
st.global.u8 [%rd13], %rs1096;
add.s32 %r8514, %r8514, 1;
mov.u32 %r8520, 8;
mov.u16 %rs1096, 0;
mov.u32 %r8712, %r8723;
$L__BB1_219:
setp.lt.u32 %p241, %r8725, 3;
mov.u32 %r8682, 0;
@%p241 bra $L__BB1_222;
setp.lt.u32 %p242, %r8725, 6;
mov.u32 %r8682, 1;
@%p242 bra $L__BB1_222;
setp.lt.u32 %p243, %r8725, 9;
setp.eq.s32 %p244, %r8725, 11;
selp.b32 %r4602, 4, 5, %p244;
setp.lt.u32 %p245, %r8725, 11;
selp.b32 %r4603, 3, %r4602, %p245;
selp.b32 %r8682, 2, %r4603, %p243;
$L__BB1_222:
setp.eq.s32 %p246, %r8682, 0;
@%p246 bra $L__BB1_250;
add.s32 %r502, %r8682, -1;
and.b32 %r503, %r8682, 3;
setp.eq.s32 %p247, %r503, 0;
mov.u32 %r8692, %r8682;
mov.u32 %r8695, %r8712;
@%p247 bra $L__BB1_235;
mov.u32 %r4605, 1;
shl.b32 %r4606, %r4605, %r502;
and.b32 %r4607, %r4606, %r8726;
setp.ne.s32 %p248, %r4607, 0;
selp.u32 %r4608, 1, 0, %p248;
cvt.u32.u16 %r4609, %rs1096;
bfi.b32 %r4610, %r4609, %r4608, 1, 8;
cvt.u16.u32 %rs1096, %r4610;
add.s32 %r8520, %r8520, -1;
setp.ne.s32 %p249, %r8520, 0;
mov.u32 %r8695, %r8712;
@%p249 bra $L__BB1_227;
setp.gt.u32 %p250, %r8514, 191;
mov.u32 %r8520, 0;
mov.u32 %r8695, %r4605;
@%p250 bra $L__BB1_227;
add.s32 %r4614, %r8514, 17477;
cvt.u64.u32 %rd202, %r4614;
add.s64 %rd203, %rd202, %rd4;
add.s64 %rd204, %rd1, %rd203;
st.global.u8 [%rd204], %rs1096;
add.s32 %r8514, %r8514, 1;
mov.u32 %r8520, 8;
mov.u16 %rs1096, 0;
mov.u32 %r8695, %r8712;
$L__BB1_227:
setp.eq.s32 %p251, %r503, 1;
mov.u32 %r8712, %r8695;
mov.u32 %r8692, %r502;
@%p251 bra $L__BB1_235;
add.s32 %r8692, %r8682, -2;
mov.u32 %r4615, 1;
shl.b32 %r4616, %r4615, %r8692;
and.b32 %r4617, %r4616, %r8726;
setp.ne.s32 %p252, %r4617, 0;
selp.u32 %r4618, 1, 0, %p252;
cvt.u32.u16 %r4619, %rs1096;
bfi.b32 %r4620, %r4619, %r4618, 1, 8;
cvt.u16.u32 %rs1096, %r4620;
add.s32 %r8520, %r8520, -1;
setp.ne.s32 %p253, %r8520, 0;
mov.u32 %r8686, %r8695;
@%p253 bra $L__BB1_231;
setp.gt.u32 %p254, %r8514, 191;
mov.u32 %r8520, 0;
mov.u32 %r8686, %r4615;
@%p254 bra $L__BB1_231;
add.s32 %r4623, %r8514, 17477;
cvt.u64.u32 %rd205, %r4623;
add.s64 %rd206, %rd205, %rd4;
add.s64 %rd207, %rd1, %rd206;
and.b16 %rs585, %rs1096, 255;
st.global.u8 [%rd207], %rs1096;
add.s32 %r8514, %r8514, 1;
setp.eq.s16 %p255, %rs585, 255;
selp.b32 %r8520, 7, 8, %p255;
mov.u16 %rs1096, 0;
mov.u32 %r8686, %r8695;
$L__BB1_231:
setp.eq.s32 %p256, %r503, 2;
mov.u32 %r8712, %r8686;
mov.u32 %r8695, %r8686;
@%p256 bra $L__BB1_235;
add.s32 %r8692, %r8682, -3;
mov.u32 %r4624, 1;
shl.b32 %r4625, %r4624, %r8692;
and.b32 %r4626, %r4625, %r8726;
setp.ne.s32 %p257, %r4626, 0;
selp.u32 %r4627, 1, 0, %p257;
cvt.u32.u16 %r4628, %rs1096;
bfi.b32 %r4629, %r4628, %r4627, 1, 8;
cvt.u16.u32 %rs1096, %r4629;
add.s32 %r8520, %r8520, -1;
setp.ne.s32 %p258, %r8520, 0;
mov.u32 %r8712, %r8686;
mov.u32 %r8695, %r8686;
@%p258 bra $L__BB1_235;
setp.gt.u32 %p259, %r8514, 191;
mov.u32 %r8520, 0;
mov.u32 %r8712, %r4624;
mov.u32 %r8695, %r4624;
@%p259 bra $L__BB1_235;
add.s32 %r4634, %r8514, 17477;
cvt.u64.u32 %rd208, %r4634;
add.s64 %rd209, %rd208, %rd4;
add.s64 %rd210, %rd1, %rd209;
and.b16 %rs588, %rs1096, 255;
st.global.u8 [%rd210], %rs1096;
add.s32 %r8514, %r8514, 1;
setp.eq.s16 %p260, %rs588, 255;
selp.b32 %r8520, 7, 8, %p260;
mov.u16 %rs1096, 0;
mov.u32 %r8712, %r8686;
mov.u32 %r8695, %r8686;
$L__BB1_235:
setp.lt.u32 %p261, %r502, 3;
@%p261 bra $L__BB1_250;
mov.u32 %r8712, %r8695;
$L__BB1_237:
add.s32 %r4635, %r8692, -1;
mov.u32 %r4636, 1;
shl.b32 %r4637, %r4636, %r4635;
and.b32 %r4638, %r4637, %r8726;
setp.ne.s32 %p262, %r4638, 0;
selp.u32 %r4639, 1, 0, %p262;
cvt.u32.u16 %r4640, %rs1096;
bfi.b32 %r8701, %r4640, %r4639, 1, 8;
add.s32 %r8702, %r8520, -1;
setp.ne.s32 %p263, %r8702, 0;
mov.u32 %r8700, %r8712;
@%p263 bra $L__BB1_240;
setp.gt.u32 %p264, %r8514, 191;
mov.u32 %r8702, 0;
mov.u32 %r8700, %r4636;
@%p264 bra $L__BB1_240;
cvt.u16.u32 %rs589, %r8701;
and.b16 %rs590, %rs589, 255;
add.s32 %r4644, %r8514, 17477;
cvt.u64.u32 %rd211, %r4644;
add.s64 %rd212, %rd211, %rd4;
add.s64 %rd213, %rd1, %rd212;
st.global.u8 [%rd213], %rs589;
add.s32 %r8514, %r8514, 1;
setp.eq.s16 %p265, %rs590, 255;
selp.b32 %r8702, 7, 8, %p265;
mov.u32 %r8701, 0;
mov.u32 %r8700, %r8712;
$L__BB1_240:
add.s32 %r4645, %r8692, -2;
shl.b32 %r4647, %r4636, %r4645;
and.b32 %r4648, %r4647, %r8726;
setp.ne.s32 %p266, %r4648, 0;
and.b32 %r4649, %r8701, 127;
selp.u32 %r4650, 1, 0, %p266;
bfi.b32 %r8705, %r4649, %r4650, 1, 7;
add.s32 %r8706, %r8702, -1;
setp.ne.s32 %p267, %r8706, 0;
mov.u32 %r8704, %r8700;
@%p267 bra $L__BB1_243;
setp.gt.u32 %p268, %r8514, 191;
mov.u32 %r8706, 0;
mov.u32 %r8704, 1;
@%p268 bra $L__BB1_243;
cvt.u16.u32 %rs591, %r8705;
and.b16 %rs592, %rs591, 255;
add.s32 %r4654, %r8514, 17477;
cvt.u64.u32 %rd214, %r4654;
add.s64 %rd215, %rd214, %rd4;
add.s64 %rd216, %rd1, %rd215;
st.global.u8 [%rd216], %rs591;
add.s32 %r8514, %r8514, 1;
setp.eq.s16 %p269, %rs592, 255;
selp.b32 %r8706, 7, 8, %p269;
mov.u32 %r8705, 0;
mov.u32 %r8704, %r8700;
$L__BB1_243:
add.s32 %r4655, %r8692, -3;
mov.u32 %r4656, 1;
shl.b32 %r4657, %r4656, %r4655;
and.b32 %r4658, %r4657, %r8726;
setp.ne.s32 %p270, %r4658, 0;
and.b32 %r4659, %r8705, 127;
selp.u32 %r4660, 1, 0, %p270;
bfi.b32 %r8709, %r4659, %r4660, 1, 7;
add.s32 %r8710, %r8706, -1;
setp.ne.s32 %p271, %r8710, 0;
mov.u32 %r8708, %r8704;
@%p271 bra $L__BB1_246;
setp.gt.u32 %p272, %r8514, 191;
mov.u32 %r8710, 0;
mov.u32 %r8708, %r4656;
@%p272 bra $L__BB1_246;
cvt.u16.u32 %rs593, %r8709;
and.b16 %rs594, %rs593, 255;
add.s32 %r4664, %r8514, 17477;
cvt.u64.u32 %rd217, %r4664;
add.s64 %rd218, %rd217, %rd4;
add.s64 %rd219, %rd1, %rd218;
st.global.u8 [%rd219], %rs593;
add.s32 %r8514, %r8514, 1;
setp.eq.s16 %p273, %rs594, 255;
selp.b32 %r8710, 7, 8, %p273;
mov.u32 %r8709, 0;
mov.u32 %r8708, %r8704;
$L__BB1_246:
add.s32 %r8692, %r8692, -4;
shl.b32 %r4666, %r4656, %r8692;
and.b32 %r4667, %r4666, %r8726;
setp.ne.s32 %p274, %r4667, 0;
and.b32 %r4668, %r8709, 127;
selp.u32 %r4669, 1, 0, %p274;
bfi.b32 %r4670, %r4668, %r4669, 1, 15;
cvt.u16.u32 %rs1096, %r4670;
add.s32 %r8520, %r8710, -1;
setp.ne.s32 %p275, %r8520, 0;
mov.u32 %r8712, %r8708;
@%p275 bra $L__BB1_249;
setp.gt.u32 %p276, %r8514, 191;
mov.u32 %r8520, 0;
mov.u32 %r8712, 1;
@%p276 bra $L__BB1_249;
add.s32 %r4673, %r8514, 17477;
cvt.u64.u32 %rd220, %r4673;
add.s64 %rd221, %rd220, %rd4;
add.s64 %rd222, %rd1, %rd221;
and.b16 %rs596, %rs1096, 255;
st.global.u8 [%rd222], %rs1096;
add.s32 %r8514, %r8514, 1;
setp.eq.s16 %p277, %rs596, 255;
selp.b32 %r8520, 7, 8, %p277;
mov.u16 %rs1096, 0;
mov.u32 %r8712, %r8708;
$L__BB1_249:
setp.ne.s32 %p278, %r8692, 0;
@%p278 bra $L__BB1_237;
$L__BB1_250:
add.s32 %r4675, %r8725, -1;
setp.eq.s32 %p279, %r8725, 0;
mov.u32 %r8726, 0;
selp.b32 %r8725, 0, %r4675, %p279;
setp.lt.u32 %p280, %r8725, 3;
mov.u32 %r8718, %r8726;
@%p280 bra $L__BB1_253;
setp.lt.u32 %p281, %r8725, 6;
mov.u32 %r8718, 1;
@%p281 bra $L__BB1_253;
setp.lt.u32 %p282, %r8725, 9;
setp.eq.s32 %p283, %r8725, 11;
selp.b32 %r4677, 4, 5, %p283;
setp.lt.u32 %p284, %r8725, 11;
selp.b32 %r4678, 3, %r4677, %p284;
selp.b32 %r8718, 2, %r4678, %p282;
$L__BB1_253:
mov.u32 %r4680, 1;
shl.b32 %r8724, %r4680, %r8718;
mov.u32 %r8723, %r8712;
bra.uni $L__BB1_262;
$L__BB1_157:
ld.global.u8 %rs26, [%rd11+1];
ld.global.u8 %rs27, [%rd12];
ld.global.u8 %rs28, [%rd12+1];
ld.global.u8 %rs29, [%rd9];
ld.global.u8 %rs30, [%rd9+1];
ld.global.u8 %rs31, [%rd9+2];
ld.global.u8 %rs32, [%rd9+3];
setp.eq.s16 %p174, %rs26, 0;
mov.u32 %r8607, %r8485;
@%p174 bra $L__BB1_164;
ld.global.u8 %r8597, [%rd11];
cvt.u32.u16 %r8596, %rs26;
$L__BB1_159:
mov.u32 %r346, %r8596;
setp.gt.u32 %p175, %r8962, 2879;
mov.u32 %r8607, 1;
@%p175 bra $L__BB1_164;
mov.u32 %r4432, 8;
sub.s32 %r4433, %r4432, %r8960;
sub.s32 %r4434, %r4433, %r8961;
min.u32 %r4435, %r4434, %r346;
setp.eq.s32 %p176, %r4435, 32;
mov.u32 %r4436, -1;
shl.b32 %r4437, %r4436, %r4435;
not.b32 %r4438, %r4437;
selp.b32 %r4439, -1, %r4438, %p176;
and.b32 %r4440, %r4439, %r8597;
shl.b32 %r4441, %r4440, %r8961;
cvt.u16.u32 %rs552, %r4441;
or.b16 %rs1165, %rs1165, %rs552;
add.s32 %r8961, %r4435, %r8961;
sub.s32 %r8596, %r346, %r4435;
shr.u32 %r8597, %r8597, %r4435;
setp.gt.u32 %p177, %r4434, %r346;
@%p177 bra $L__BB1_163;
setp.ne.s32 %p178, %r8960, 0;
mov.u32 %r8960, 0;
and.b16 %rs553, %rs1165, 255;
setp.ne.s16 %p179, %rs553, 127;
and.pred %p180, %p178, %p179;
@%p180 bra $L__BB1_163;
mov.u32 %r4444, 20548;
sub.s32 %r4445, %r4444, %r8962;
cvt.u64.u32 %rd166, %r4445;
add.s64 %rd167, %rd166, %rd4;
add.s64 %rd168, %rd1, %rd167;
st.global.u8 [%rd168], %rs1165;
add.s32 %r8962, %r8962, 1;
setp.gt.u16 %p181, %rs553, 143;
selp.u32 %r8960, 1, 0, %p181;
mov.u32 %r8961, 0;
mov.u16 %rs1165, 0;
$L__BB1_163:
setp.ne.s32 %p182, %r8596, 0;
mov.u32 %r8607, %r8485;
@%p182 bra $L__BB1_159;
$L__BB1_164:
setp.eq.s16 %p183, %rs30, 0;
mov.u32 %r8619, %r8607;
@%p183 bra $L__BB1_171;
cvt.u32.u16 %r4446, %rs29;
and.b32 %r8609, %r4446, 255;
cvt.u32.u16 %r4447, %rs30;
and.b32 %r8608, %r4447, 255;
$L__BB1_166:
mov.u32 %r365, %r8608;
setp.gt.u32 %p184, %r8962, 2879;
mov.u32 %r8619, 1;
@%p184 bra $L__BB1_171;
mov.u32 %r4449, 8;
sub.s32 %r4450, %r4449, %r8960;
sub.s32 %r4451, %r4450, %r8961;
min.u32 %r4452, %r4451, %r365;
setp.eq.s32 %p185, %r4452, 32;
mov.u32 %r4453, -1;
shl.b32 %r4454, %r4453, %r4452;
not.b32 %r4455, %r4454;
selp.b32 %r4456, -1, %r4455, %p185;
and.b32 %r4457, %r4456, %r8609;
shl.b32 %r4458, %r4457, %r8961;
cvt.u16.u32 %rs557, %r4458;
or.b16 %rs1165, %rs1165, %rs557;
add.s32 %r8961, %r4452, %r8961;
sub.s32 %r8608, %r365, %r4452;
shr.u32 %r8609, %r8609, %r4452;
setp.gt.u32 %p186, %r4451, %r365;
@%p186 bra $L__BB1_170;
setp.ne.s32 %p187, %r8960, 0;
mov.u32 %r8960, 0;
and.b16 %rs558, %rs1165, 255;
setp.ne.s16 %p188, %rs558, 127;
and.pred %p189, %p187, %p188;
@%p189 bra $L__BB1_170;
mov.u32 %r4461, 20548;
sub.s32 %r4462, %r4461, %r8962;
cvt.u64.u32 %rd169, %r4462;
add.s64 %rd170, %rd169, %rd4;
add.s64 %rd171, %rd1, %rd170;
st.global.u8 [%rd171], %rs1165;
add.s32 %r8962, %r8962, 1;
setp.gt.u16 %p190, %rs558, 143;
selp.u32 %r8960, 1, 0, %p190;
mov.u32 %r8961, 0;
mov.u16 %rs1165, 0;
$L__BB1_170:
setp.ne.s32 %p191, %r8608, 0;
mov.u32 %r8619, %r8607;
@%p191 bra $L__BB1_166;
$L__BB1_171:
setp.eq.s16 %p192, %rs28, 0;
mov.u32 %r8631, %r8619;
@%p192 bra $L__BB1_178;
cvt.u32.u16 %r4463, %rs28;
and.b32 %r8620, %r4463, 255;
cvt.u32.u16 %r4464, %rs27;
and.b32 %r8621, %r4464, 255;
$L__BB1_173:
mov.u32 %r384, %r8620;
setp.gt.u32 %p193, %r8962, 2879;
mov.u32 %r8631, 1;
@%p193 bra $L__BB1_178;
mov.u32 %r4466, 8;
sub.s32 %r4467, %r4466, %r8960;
sub.s32 %r4468, %r4467, %r8961;
min.u32 %r4469, %r4468, %r384;
setp.eq.s32 %p194, %r4469, 32;
mov.u32 %r4470, -1;
shl.b32 %r4471, %r4470, %r4469;
not.b32 %r4472, %r4471;
selp.b32 %r4473, -1, %r4472, %p194;
and.b32 %r4474, %r4473, %r8621;
shl.b32 %r4475, %r4474, %r8961;
cvt.u16.u32 %rs562, %r4475;
or.b16 %rs1165, %rs1165, %rs562;
add.s32 %r8961, %r4469, %r8961;
sub.s32 %r8620, %r384, %r4469;
shr.u32 %r8621, %r8621, %r4469;
setp.gt.u32 %p195, %r4468, %r384;
@%p195 bra $L__BB1_177;
setp.ne.s32 %p196, %r8960, 0;
mov.u32 %r8960, 0;
and.b16 %rs563, %rs1165, 255;
setp.ne.s16 %p197, %rs563, 127;
and.pred %p198, %p196, %p197;
@%p198 bra $L__BB1_177;
mov.u32 %r4478, 20548;
sub.s32 %r4479, %r4478, %r8962;
cvt.u64.u32 %rd172, %r4479;
add.s64 %rd173, %rd172, %rd4;
add.s64 %rd174, %rd1, %rd173;
st.global.u8 [%rd174], %rs1165;
add.s32 %r8962, %r8962, 1;
setp.gt.u16 %p199, %rs563, 143;
selp.u32 %r8960, 1, 0, %p199;
mov.u32 %r8961, 0;
mov.u16 %rs1165, 0;
$L__BB1_177:
setp.ne.s32 %p200, %r8620, 0;
mov.u32 %r8631, %r8619;
@%p200 bra $L__BB1_173;
$L__BB1_178:
setp.eq.s16 %p201, %rs32, 0;
mov.u32 %r8431, 0;
mov.u32 %r8959, %r8631;
@%p201 bra $L__BB1_416;
cvt.u32.u16 %r4481, %rs31;
and.b32 %r8633, %r4481, 255;
cvt.u32.u16 %r4482, %rs32;
and.b32 %r8632, %r4482, 255;
$L__BB1_180:
mov.u32 %r403, %r8632;
setp.gt.u32 %p202, %r8962, 2879;
mov.u32 %r8959, 1;
@%p202 bra $L__BB1_416;
mov.u32 %r4485, 8;
sub.s32 %r4486, %r4485, %r8960;
sub.s32 %r4487, %r4486, %r8961;
min.u32 %r4488, %r4487, %r403;
setp.eq.s32 %p203, %r4488, 32;
mov.u32 %r4489, -1;
shl.b32 %r4490, %r4489, %r4488;
not.b32 %r4491, %r4490;
selp.b32 %r4492, -1, %r4491, %p203;
and.b32 %r4493, %r4492, %r8633;
shl.b32 %r4494, %r4493, %r8961;
cvt.u16.u32 %rs567, %r4494;
or.b16 %rs1165, %rs1165, %rs567;
add.s32 %r8961, %r4488, %r8961;
sub.s32 %r8632, %r403, %r4488;
shr.u32 %r8633, %r8633, %r4488;
setp.gt.u32 %p204, %r4487, %r403;
@%p204 bra $L__BB1_184;
setp.ne.s32 %p205, %r8960, 0;
mov.u32 %r8960, 0;
and.b16 %rs568, %rs1165, 255;
setp.ne.s16 %p206, %rs568, 127;
and.pred %p207, %p205, %p206;
@%p207 bra $L__BB1_184;
mov.u32 %r4497, 20548;
sub.s32 %r4498, %r4497, %r8962;
cvt.u64.u32 %rd175, %r4498;
add.s64 %rd176, %rd175, %rd4;
add.s64 %rd177, %rd1, %rd176;
st.global.u8 [%rd177], %rs1165;
add.s32 %r8962, %r8962, 1;
setp.gt.u16 %p208, %rs568, 143;
selp.u32 %r8960, 1, 0, %p208;
mov.u32 %r8961, 0;
mov.u16 %rs1165, 0;
$L__BB1_184:
setp.eq.s32 %p209, %r8632, 0;
mov.u32 %r8959, %r8631;
@%p209 bra $L__BB1_416;
bra.uni $L__BB1_180;
$L__BB1_83:
setp.gt.u32 %p91, %r8514, 191;
mov.u32 %r8520, 0;
mov.u32 %r8519, 1;
@%p91 bra $L__BB1_85;
st.global.u8 [%rd10], %rs1096;
add.s32 %r8514, %r8514, 1;
mov.u32 %r8520, 8;
mov.u16 %rs1096, 0;
mov.u32 %r8519, %r8723;
bra.uni $L__BB1_85;
$L__BB1_254:
add.s32 %r8726, %r8726, 1;
setp.lt.u32 %p285, %r8726, %r8724;
@%p285 bra $L__BB1_262;
shl.b16 %rs597, %rs1096, 1;
or.b16 %rs1096, %rs597, 1;
add.s32 %r8520, %r8520, -1;
setp.ne.s32 %p286, %r8520, 0;
mov.u32 %r8719, %r8723;
@%p286 bra $L__BB1_258;
bra.uni $L__BB1_256;
$L__BB1_258:
add.s32 %r4684, %r8725, 1;
min.u32 %r8725, %r4684, 12;
setp.lt.u32 %p289, %r8725, 3;
mov.u32 %r8726, 0;
mov.u32 %r8722, %r8726;
@%p289 bra $L__BB1_261;
setp.lt.u32 %p290, %r8725, 6;
mov.u32 %r8722, 1;
@%p290 bra $L__BB1_261;
setp.lt.u32 %p291, %r8725, 9;
setp.eq.s32 %p292, %r8725, 11;
selp.b32 %r4686, 4, 5, %p292;
setp.lt.u32 %p293, %r8725, 11;
selp.b32 %r4687, 3, %r4686, %p293;
selp.b32 %r8722, 2, %r4687, %p291;
$L__BB1_261:
mov.u32 %r4689, 1;
shl.b32 %r8724, %r4689, %r8722;
mov.u32 %r8723, %r8719;
$L__BB1_262:
max.s32 %r586, %r8663, 1;
and.b16 %rs600, %rs48, 15;
cvt.u32.u16 %r587, %rs600;
and.b32 %r588, %r8648, 1;
setp.eq.s32 %p294, %r588, 0;
mov.u32 %r8739, %r9176;
@%p294 bra $L__BB1_269;
and.b32 %r4690, %r587, 1;
sub.s32 %r8729, %r586, %r4690;
setp.eq.s32 %p295, %r8729, 0;
mov.u32 %r8739, %r9176;
@%p295 bra $L__BB1_269;
mov.u32 %r4691, -1;
shl.b32 %r4692, %r4691, %r8729;
not.b32 %r4693, %r4692;
and.b32 %r8730, %r8642, %r4693;
$L__BB1_265:
setp.gt.u32 %p296, %r9150, 17476;
mov.u32 %r8739, 1;
@%p296 bra $L__BB1_269;
sub.s32 %r4695, %r9149, %r9148;
min.u32 %r4696, %r4695, %r8729;
setp.eq.s32 %p297, %r4696, 32;
mov.u32 %r4697, -1;
shl.b32 %r4698, %r4697, %r4696;
not.b32 %r4699, %r4698;
selp.b32 %r4700, -1, %r4699, %p297;
and.b32 %r4701, %r4700, %r8730;
shl.b32 %r4702, %r4701, %r9148;
or.b32 %r9147, %r4702, %r9147;
add.s32 %r9148, %r4696, %r9148;
shr.u32 %r8730, %r8730, %r4696;
sub.s32 %r8729, %r8729, %r4696;
setp.lt.u32 %p298, %r9148, %r9149;
@%p298 bra $L__BB1_268;
cvt.u64.u32 %rd223, %r9150;
add.s64 %rd224, %rd223, %rd4;
add.s64 %rd225, %rd1, %rd224;
st.global.u8 [%rd225], %r9147;
add.s32 %r9150, %r9150, 1;
setp.eq.s32 %p299, %r9147, 255;
selp.b32 %r9149, 7, 8, %p299;
mov.u32 %r9147, 0;
mov.u32 %r9148, %r9147;
$L__BB1_268:
setp.ne.s32 %p300, %r8729, 0;
mov.u32 %r8739, %r9176;
@%p300 bra $L__BB1_265;
$L__BB1_269:
setp.eq.s32 %p301, %r474, 0;
mov.u32 %r8754, %r8739;
@%p301 bra $L__BB1_276;
shr.u32 %r4705, %r587, 1;
and.b32 %r4706, %r4705, 1;
sub.s32 %r8744, %r586, %r4706;
setp.eq.s32 %p302, %r8744, 0;
mov.u32 %r8754, %r8739;
@%p302 bra $L__BB1_276;
mov.u32 %r4707, -1;
shl.b32 %r4708, %r4707, %r8744;
not.b32 %r4709, %r4708;
and.b32 %r8745, %r8646, %r4709;
$L__BB1_272:
setp.gt.u32 %p303, %r9150, 17476;
mov.u32 %r8754, 1;
@%p303 bra $L__BB1_276;
sub.s32 %r4711, %r9149, %r9148;
min.u32 %r4712, %r4711, %r8744;
setp.eq.s32 %p304, %r4712, 32;
mov.u32 %r4713, -1;
shl.b32 %r4714, %r4713, %r4712;
not.b32 %r4715, %r4714;
selp.b32 %r4716, -1, %r4715, %p304;
and.b32 %r4717, %r4716, %r8745;
shl.b32 %r4718, %r4717, %r9148;
or.b32 %r9147, %r4718, %r9147;
add.s32 %r9148, %r4712, %r9148;
shr.u32 %r8745, %r8745, %r4712;
sub.s32 %r8744, %r8744, %r4712;
setp.lt.u32 %p305, %r9148, %r9149;
@%p305 bra $L__BB1_275;
cvt.u64.u32 %rd226, %r9150;
add.s64 %rd227, %rd226, %rd4;
add.s64 %rd228, %rd1, %rd227;
st.global.u8 [%rd228], %r9147;
add.s32 %r9150, %r9150, 1;
setp.eq.s32 %p306, %r9147, 255;
selp.b32 %r9149, 7, 8, %p306;
mov.u32 %r9147, 0;
mov.u32 %r9148, %r9147;
$L__BB1_275:
setp.ne.s32 %p307, %r8744, 0;
mov.u32 %r8754, %r8739;
@%p307 bra $L__BB1_272;
$L__BB1_276:
and.b32 %r4721, %r8648, 4;
setp.eq.s32 %p308, %r4721, 0;
mov.u32 %r8769, %r8754;
@%p308 bra $L__BB1_283;
shr.u32 %r4722, %r587, 2;
and.b32 %r4723, %r4722, 1;
sub.s32 %r8759, %r586, %r4723;
setp.eq.s32 %p309, %r8759, 0;
mov.u32 %r8769, %r8754;
@%p309 bra $L__BB1_283;
mov.u32 %r4724, -1;
shl.b32 %r4725, %r4724, %r8759;
not.b32 %r4726, %r4725;
and.b32 %r8760, %r8662, %r4726;
$L__BB1_279:
setp.gt.u32 %p310, %r9150, 17476;
mov.u32 %r8769, 1;
@%p310 bra $L__BB1_283;
sub.s32 %r4728, %r9149, %r9148;
min.u32 %r4729, %r4728, %r8759;
setp.eq.s32 %p311, %r4729, 32;
mov.u32 %r4730, -1;
shl.b32 %r4731, %r4730, %r4729;
not.b32 %r4732, %r4731;
selp.b32 %r4733, -1, %r4732, %p311;
and.b32 %r4734, %r4733, %r8760;
shl.b32 %r4735, %r4734, %r9148;
or.b32 %r9147, %r4735, %r9147;
add.s32 %r9148, %r4729, %r9148;
shr.u32 %r8760, %r8760, %r4729;
sub.s32 %r8759, %r8759, %r4729;
setp.lt.u32 %p312, %r9148, %r9149;
@%p312 bra $L__BB1_282;
cvt.u64.u32 %rd229, %r9150;
add.s64 %rd230, %rd229, %rd4;
add.s64 %rd231, %rd1, %rd230;
st.global.u8 [%rd231], %r9147;
add.s32 %r9150, %r9150, 1;
setp.eq.s32 %p313, %r9147, 255;
selp.b32 %r9149, 7, 8, %p313;
mov.u32 %r9147, 0;
mov.u32 %r9148, %r9147;
$L__BB1_282:
setp.ne.s32 %p314, %r8759, 0;
mov.u32 %r8769, %r8754;
@%p314 bra $L__BB1_279;
$L__BB1_283:
setp.eq.s32 %p315, %r475, 0;
mov.u32 %r9176, %r8769;
@%p315 bra $L__BB1_290;
shr.u32 %r4738, %r587, 3;
sub.s32 %r8774, %r586, %r4738;
setp.eq.s32 %p316, %r8774, 0;
mov.u32 %r9176, %r8769;
@%p316 bra $L__BB1_290;
mov.u32 %r4739, -1;
shl.b32 %r4740, %r4739, %r8774;
not.b32 %r4741, %r4740;
and.b32 %r8775, %r8661, %r4741;
$L__BB1_286:
setp.gt.u32 %p317, %r9150, 17476;
mov.u32 %r9176, 1;
@%p317 bra $L__BB1_290;
sub.s32 %r4743, %r9149, %r9148;
min.u32 %r4744, %r4743, %r8774;
setp.eq.s32 %p318, %r4744, 32;
mov.u32 %r4745, -1;
shl.b32 %r4746, %r4745, %r4744;
not.b32 %r4747, %r4746;
selp.b32 %r4748, -1, %r4747, %p318;
and.b32 %r4749, %r4748, %r8775;
shl.b32 %r4750, %r4749, %r9148;
or.b32 %r9147, %r4750, %r9147;
add.s32 %r9148, %r4744, %r9148;
shr.u32 %r8775, %r8775, %r4744;
sub.s32 %r8774, %r8774, %r4744;
setp.lt.u32 %p319, %r9148, %r9149;
@%p319 bra $L__BB1_289;
cvt.u64.u32 %rd232, %r9150;
add.s64 %rd233, %rd232, %rd4;
add.s64 %rd234, %rd1, %rd233;
st.global.u8 [%rd234], %r9147;
add.s32 %r9150, %r9150, 1;
setp.eq.s32 %p320, %r9147, 255;
selp.b32 %r9149, 7, 8, %p320;
mov.u32 %r9147, 0;
mov.u32 %r9148, %r9147;
$L__BB1_289:
setp.ne.s32 %p321, %r8774, 0;
mov.u32 %r9176, %r8769;
@%p321 bra $L__BB1_286;
$L__BB1_290:
setp.lt.s32 %p322, %r471, 1;
setp.lt.s32 %p323, %r130, 1;
or.pred %p324, %p323, %p322;
@%p324 bra $L__BB1_338;
min.s32 %r4753, %r130, %r471;
setp.lt.s32 %p325, %r4753, 3;
add.s32 %r4754, %r8514, 17477;
cvt.u64.u32 %rd235, %r4754;
add.s64 %rd236, %rd235, %rd4;
add.s64 %rd14, %rd1, %rd236;
@%p325 bra $L__BB1_330;
bra.uni $L__BB1_292;
$L__BB1_330:
add.s32 %r8726, %r8726, 1;
setp.lt.u32 %p372, %r8726, %r8724;
@%p372 bra $L__BB1_338;
shl.b16 %rs617, %rs1096, 1;
or.b16 %rs1096, %rs617, 1;
add.s32 %r8520, %r8520, -1;
setp.ne.s32 %p373, %r8520, 0;
mov.u32 %r8829, %r8723;
@%p373 bra $L__BB1_334;
setp.gt.u32 %p374, %r8514, 191;
mov.u32 %r8520, 0;
mov.u32 %r8829, 1;
@%p374 bra $L__BB1_334;
and.b16 %rs619, %rs1096, 255;
st.global.u8 [%rd14], %rs1096;
add.s32 %r8514, %r8514, 1;
setp.eq.s16 %p375, %rs619, 255;
selp.b32 %r8520, 7, 8, %p375;
mov.u16 %rs1096, 0;
mov.u32 %r8829, %r8723;
$L__BB1_334:
add.s32 %r4842, %r8725, 1;
min.u32 %r8725, %r4842, 12;
setp.lt.u32 %p376, %r8725, 3;
mov.u32 %r8726, 0;
mov.u32 %r8832, %r8726;
@%p376 bra $L__BB1_337;
setp.lt.u32 %p377, %r8725, 6;
mov.u32 %r8832, 1;
@%p377 bra $L__BB1_337;
setp.lt.u32 %p378, %r8725, 9;
setp.eq.s32 %p379, %r8725, 11;
selp.b32 %r4844, 4, 5, %p379;
setp.lt.u32 %p380, %r8725, 11;
selp.b32 %r4845, 3, %r4844, %p380;
selp.b32 %r8832, 2, %r4845, %p378;
$L__BB1_337:
mov.u32 %r4847, 1;
shl.b32 %r8724, %r4847, %r8832;
mov.u32 %r8723, %r8829;
bra.uni $L__BB1_338;
$L__BB1_292:
shl.b16 %rs1096, %rs1096, 1;
add.s32 %r8520, %r8520, -1;
setp.ne.s32 %p326, %r8520, 0;
mov.u32 %r8822, %r8723;
@%p326 bra $L__BB1_295;
setp.gt.u32 %p327, %r8514, 191;
mov.u32 %r8520, 0;
mov.u32 %r8822, 1;
@%p327 bra $L__BB1_295;
st.global.u8 [%rd14], %rs1096;
add.s32 %r8514, %r8514, 1;
mov.u32 %r8520, 8;
mov.u16 %rs1096, 0;
mov.u32 %r8822, %r8723;
$L__BB1_295:
setp.lt.u32 %p328, %r8725, 3;
mov.u32 %r8792, 0;
@%p328 bra $L__BB1_298;
setp.lt.u32 %p329, %r8725, 6;
mov.u32 %r8792, 1;
@%p329 bra $L__BB1_298;
setp.lt.u32 %p330, %r8725, 9;
setp.eq.s32 %p331, %r8725, 11;
selp.b32 %r4760, 4, 5, %p331;
setp.lt.u32 %p332, %r8725, 11;
selp.b32 %r4761, 3, %r4760, %p332;
selp.b32 %r8792, 2, %r4761, %p330;
$L__BB1_298:
setp.eq.s32 %p333, %r8792, 0;
@%p333 bra $L__BB1_326;
add.s32 %r688, %r8792, -1;
and.b32 %r689, %r8792, 3;
setp.eq.s32 %p334, %r689, 0;
mov.u32 %r8802, %r8792;
mov.u32 %r8805, %r8822;
@%p334 bra $L__BB1_311;
mov.u32 %r4763, 1;
shl.b32 %r4764, %r4763, %r688;
and.b32 %r4765, %r4764, %r8726;
setp.ne.s32 %p335, %r4765, 0;
selp.u32 %r4766, 1, 0, %p335;
cvt.u32.u16 %r4767, %rs1096;
bfi.b32 %r4768, %r4767, %r4766, 1, 8;
cvt.u16.u32 %rs1096, %r4768;
add.s32 %r8520, %r8520, -1;
setp.ne.s32 %p336, %r8520, 0;
mov.u32 %r8805, %r8822;
@%p336 bra $L__BB1_303;
setp.gt.u32 %p337, %r8514, 191;
mov.u32 %r8520, 0;
mov.u32 %r8805, %r4763;
@%p337 bra $L__BB1_303;
add.s32 %r4772, %r8514, 17477;
cvt.u64.u32 %rd237, %r4772;
add.s64 %rd238, %rd237, %rd4;
add.s64 %rd239, %rd1, %rd238;
st.global.u8 [%rd239], %rs1096;
add.s32 %r8514, %r8514, 1;
mov.u32 %r8520, 8;
mov.u16 %rs1096, 0;
mov.u32 %r8805, %r8822;
$L__BB1_303:
setp.eq.s32 %p338, %r689, 1;
mov.u32 %r8822, %r8805;
mov.u32 %r8802, %r688;
@%p338 bra $L__BB1_311;
add.s32 %r8802, %r8792, -2;
mov.u32 %r4773, 1;
shl.b32 %r4774, %r4773, %r8802;
and.b32 %r4775, %r4774, %r8726;
setp.ne.s32 %p339, %r4775, 0;
selp.u32 %r4776, 1, 0, %p339;
cvt.u32.u16 %r4777, %rs1096;
bfi.b32 %r4778, %r4777, %r4776, 1, 8;
cvt.u16.u32 %rs1096, %r4778;
add.s32 %r8520, %r8520, -1;
setp.ne.s32 %p340, %r8520, 0;
mov.u32 %r8796, %r8805;
@%p340 bra $L__BB1_307;
setp.gt.u32 %p341, %r8514, 191;
mov.u32 %r8520, 0;
mov.u32 %r8796, %r4773;
@%p341 bra $L__BB1_307;
add.s32 %r4781, %r8514, 17477;
cvt.u64.u32 %rd240, %r4781;
add.s64 %rd241, %rd240, %rd4;
add.s64 %rd242, %rd1, %rd241;
and.b16 %rs605, %rs1096, 255;
st.global.u8 [%rd242], %rs1096;
add.s32 %r8514, %r8514, 1;
setp.eq.s16 %p342, %rs605, 255;
selp.b32 %r8520, 7, 8, %p342;
mov.u16 %rs1096, 0;
mov.u32 %r8796, %r8805;
$L__BB1_307:
setp.eq.s32 %p343, %r689, 2;
mov.u32 %r8822, %r8796;
mov.u32 %r8805, %r8796;
@%p343 bra $L__BB1_311;
add.s32 %r8802, %r8792, -3;
mov.u32 %r4782, 1;
shl.b32 %r4783, %r4782, %r8802;
and.b32 %r4784, %r4783, %r8726;
setp.ne.s32 %p344, %r4784, 0;
selp.u32 %r4785, 1, 0, %p344;
cvt.u32.u16 %r4786, %rs1096;
bfi.b32 %r4787, %r4786, %r4785, 1, 8;
cvt.u16.u32 %rs1096, %r4787;
add.s32 %r8520, %r8520, -1;
setp.ne.s32 %p345, %r8520, 0;
mov.u32 %r8822, %r8796;
mov.u32 %r8805, %r8796;
@%p345 bra $L__BB1_311;
setp.gt.u32 %p346, %r8514, 191;
mov.u32 %r8520, 0;
mov.u32 %r8822, %r4782;
mov.u32 %r8805, %r4782;
@%p346 bra $L__BB1_311;
add.s32 %r4792, %r8514, 17477;
cvt.u64.u32 %rd243, %r4792;
add.s64 %rd244, %rd243, %rd4;
add.s64 %rd245, %rd1, %rd244;
and.b16 %rs608, %rs1096, 255;
st.global.u8 [%rd245], %rs1096;
add.s32 %r8514, %r8514, 1;
setp.eq.s16 %p347, %rs608, 255;
selp.b32 %r8520, 7, 8, %p347;
mov.u16 %rs1096, 0;
mov.u32 %r8822, %r8796;
mov.u32 %r8805, %r8796;
$L__BB1_311:
setp.lt.u32 %p348, %r688, 3;
@%p348 bra $L__BB1_326;
mov.u32 %r8822, %r8805;
$L__BB1_313:
add.s32 %r4793, %r8802, -1;
mov.u32 %r4794, 1;
shl.b32 %r4795, %r4794, %r4793;
and.b32 %r4796, %r4795, %r8726;
setp.ne.s32 %p349, %r4796, 0;
selp.u32 %r4797, 1, 0, %p349;
cvt.u32.u16 %r4798, %rs1096;
bfi.b32 %r8811, %r4798, %r4797, 1, 8;
add.s32 %r8812, %r8520, -1;
setp.ne.s32 %p350, %r8812, 0;
mov.u32 %r8810, %r8822;
@%p350 bra $L__BB1_316;
setp.gt.u32 %p351, %r8514, 191;
mov.u32 %r8812, 0;
mov.u32 %r8810, %r4794;
@%p351 bra $L__BB1_316;
cvt.u16.u32 %rs609, %r8811;
and.b16 %rs610, %rs609, 255;
add.s32 %r4802, %r8514, 17477;
cvt.u64.u32 %rd246, %r4802;
add.s64 %rd247, %rd246, %rd4;
add.s64 %rd248, %rd1, %rd247;
st.global.u8 [%rd248], %rs609;
add.s32 %r8514, %r8514, 1;
setp.eq.s16 %p352, %rs610, 255;
selp.b32 %r8812, 7, 8, %p352;
mov.u32 %r8811, 0;
mov.u32 %r8810, %r8822;
$L__BB1_316:
add.s32 %r4803, %r8802, -2;
shl.b32 %r4805, %r4794, %r4803;
and.b32 %r4806, %r4805, %r8726;
setp.ne.s32 %p353, %r4806, 0;
and.b32 %r4807, %r8811, 127;
selp.u32 %r4808, 1, 0, %p353;
bfi.b32 %r8815, %r4807, %r4808, 1, 7;
add.s32 %r8816, %r8812, -1;
setp.ne.s32 %p354, %r8816, 0;
mov.u32 %r8814, %r8810;
@%p354 bra $L__BB1_319;
setp.gt.u32 %p355, %r8514, 191;
mov.u32 %r8816, 0;
mov.u32 %r8814, 1;
@%p355 bra $L__BB1_319;
cvt.u16.u32 %rs611, %r8815;
and.b16 %rs612, %rs611, 255;
add.s32 %r4812, %r8514, 17477;
cvt.u64.u32 %rd249, %r4812;
add.s64 %rd250, %rd249, %rd4;
add.s64 %rd251, %rd1, %rd250;
st.global.u8 [%rd251], %rs611;
add.s32 %r8514, %r8514, 1;
setp.eq.s16 %p356, %rs612, 255;
selp.b32 %r8816, 7, 8, %p356;
mov.u32 %r8815, 0;
mov.u32 %r8814, %r8810;
$L__BB1_319:
add.s32 %r4813, %r8802, -3;
mov.u32 %r4814, 1;
shl.b32 %r4815, %r4814, %r4813;
and.b32 %r4816, %r4815, %r8726;
setp.ne.s32 %p357, %r4816, 0;
and.b32 %r4817, %r8815, 127;
selp.u32 %r4818, 1, 0, %p357;
bfi.b32 %r8819, %r4817, %r4818, 1, 7;
add.s32 %r8820, %r8816, -1;
setp.ne.s32 %p358, %r8820, 0;
mov.u32 %r8818, %r8814;
@%p358 bra $L__BB1_322;
setp.gt.u32 %p359, %r8514, 191;
mov.u32 %r8820, 0;
mov.u32 %r8818, %r4814;
@%p359 bra $L__BB1_322;
cvt.u16.u32 %rs613, %r8819;
and.b16 %rs614, %rs613, 255;
add.s32 %r4822, %r8514, 17477;
cvt.u64.u32 %rd252, %r4822;
add.s64 %rd253, %rd252, %rd4;
add.s64 %rd254, %rd1, %rd253;
st.global.u8 [%rd254], %rs613;
add.s32 %r8514, %r8514, 1;
setp.eq.s16 %p360, %rs614, 255;
selp.b32 %r8820, 7, 8, %p360;
mov.u32 %r8819, 0;
mov.u32 %r8818, %r8814;
$L__BB1_322:
add.s32 %r8802, %r8802, -4;
shl.b32 %r4824, %r4814, %r8802;
and.b32 %r4825, %r4824, %r8726;
setp.ne.s32 %p361, %r4825, 0;
and.b32 %r4826, %r8819, 127;
selp.u32 %r4827, 1, 0, %p361;
bfi.b32 %r4828, %r4826, %r4827, 1, 15;
cvt.u16.u32 %rs1096, %r4828;
add.s32 %r8520, %r8820, -1;
setp.ne.s32 %p362, %r8520, 0;
mov.u32 %r8822, %r8818;
@%p362 bra $L__BB1_325;
setp.gt.u32 %p363, %r8514, 191;
mov.u32 %r8520, 0;
mov.u32 %r8822, 1;
@%p363 bra $L__BB1_325;
add.s32 %r4831, %r8514, 17477;
cvt.u64.u32 %rd255, %r4831;
add.s64 %rd256, %rd255, %rd4;
add.s64 %rd257, %rd1, %rd256;
and.b16 %rs616, %rs1096, 255;
st.global.u8 [%rd257], %rs1096;
add.s32 %r8514, %r8514, 1;
setp.eq.s16 %p364, %rs616, 255;
selp.b32 %r8520, 7, 8, %p364;
mov.u16 %rs1096, 0;
mov.u32 %r8822, %r8818;
$L__BB1_325:
setp.ne.s32 %p365, %r8802, 0;
@%p365 bra $L__BB1_313;
$L__BB1_326:
add.s32 %r4833, %r8725, -1;
setp.eq.s32 %p366, %r8725, 0;
mov.u32 %r8726, 0;
selp.b32 %r8725, 0, %r4833, %p366;
setp.lt.u32 %p367, %r8725, 3;
mov.u32 %r8828, %r8726;
@%p367 bra $L__BB1_329;
setp.lt.u32 %p368, %r8725, 6;
mov.u32 %r8828, 1;
@%p368 bra $L__BB1_329;
setp.lt.u32 %p369, %r8725, 9;
setp.eq.s32 %p370, %r8725, 11;
selp.b32 %r4835, 4, 5, %p370;
setp.lt.u32 %p371, %r8725, 11;
selp.b32 %r4836, 3, %r4835, %p371;
selp.b32 %r8828, 2, %r4836, %p369;
$L__BB1_329:
mov.u32 %r4838, 1;
shl.b32 %r8724, %r4838, %r8828;
mov.u32 %r8723, %r8822;
$L__BB1_338:
setp.gt.s32 %p381, %r471, 2;
setp.gt.s32 %p382, %r130, 2;
and.pred %p383, %p382, %p381;
@%p383 bra $L__BB1_387;
bra.uni $L__BB1_339;
$L__BB1_387:
add.s32 %r4968, %r343, -11;
cvt.u64.u32 %rd287, %r4968;
add.s64 %rd16, %rd9, %rd287;
ld.global.u8 %rs122, [%rd16];
add.s32 %r4969, %r343, -10;
cvt.u64.u32 %rd289, %r4969;
add.s64 %rd290, %rd9, %rd289;
ld.global.u8 %rs123, [%rd290];
ld.global.u8 %rs124, [%rd290+1];
mul.lo.s32 %r4970, %r471, 6;
add.s32 %r4971, %r4970, -12;
cvt.u64.u32 %rd291, %r4971;
add.s64 %rd292, %rd9, %rd291;
ld.global.u8 %rs125, [%rd292];
ld.global.u8 %rs126, [%rd292+1];
add.s32 %r4972, %r4970, -10;
cvt.u64.u32 %rd293, %r4972;
add.s64 %rd294, %rd9, %rd293;
ld.global.u8 %rs127, [%rd294];
ld.global.u8 %rs128, [%rd294+1];
setp.eq.s16 %p451, %rs122, 0;
mov.u32 %r8926, %r8678;
@%p451 bra $L__BB1_394;
ld.global.u8 %r8916, [%rd16+-1];
cvt.u32.u16 %r8915, %rs122;
$L__BB1_389:
mov.u32 %r898, %r8915;
setp.gt.u32 %p452, %r8962, 2879;
mov.u32 %r8926, 1;
@%p452 bra $L__BB1_394;
mov.u32 %r4974, 8;
sub.s32 %r4975, %r4974, %r8960;
sub.s32 %r4976, %r4975, %r8961;
min.u32 %r4977, %r4976, %r898;
setp.eq.s32 %p453, %r4977, 32;
mov.u32 %r4978, -1;
shl.b32 %r4979, %r4978, %r4977;
not.b32 %r4980, %r4979;
selp.b32 %r4981, -1, %r4980, %p453;
and.b32 %r4982, %r4981, %r8916;
shl.b32 %r4983, %r4982, %r8961;
cvt.u16.u32 %rs652, %r4983;
or.b16 %rs1165, %rs1165, %rs652;
add.s32 %r8961, %r4977, %r8961;
sub.s32 %r8915, %r898, %r4977;
shr.u32 %r8916, %r8916, %r4977;
setp.gt.u32 %p454, %r4976, %r898;
@%p454 bra $L__BB1_393;
setp.ne.s32 %p455, %r8960, 0;
mov.u32 %r8960, 0;
and.b16 %rs653, %rs1165, 255;
setp.ne.s16 %p456, %rs653, 127;
and.pred %p457, %p455, %p456;
@%p457 bra $L__BB1_393;
mov.u32 %r4986, 20548;
sub.s32 %r4987, %r4986, %r8962;
cvt.u64.u32 %rd295, %r4987;
add.s64 %rd296, %rd295, %rd4;
add.s64 %rd297, %rd1, %rd296;
st.global.u8 [%rd297], %rs1165;
add.s32 %r8962, %r8962, 1;
setp.gt.u16 %p458, %rs653, 143;
selp.u32 %r8960, 1, 0, %p458;
mov.u32 %r8961, 0;
mov.u16 %rs1165, 0;
$L__BB1_393:
setp.ne.s32 %p459, %r8915, 0;
mov.u32 %r8926, %r8678;
@%p459 bra $L__BB1_389;
$L__BB1_394:
setp.eq.s16 %p460, %rs126, 0;
mov.u32 %r8938, %r8926;
@%p460 bra $L__BB1_401;
cvt.u32.u16 %r4988, %rs125;
and.b32 %r8928, %r4988, 255;
cvt.u32.u16 %r4989, %rs126;
and.b32 %r8927, %r4989, 255;
$L__BB1_396:
mov.u32 %r917, %r8927;
setp.gt.u32 %p461, %r8962, 2879;
mov.u32 %r8938, 1;
@%p461 bra $L__BB1_401;
mov.u32 %r4991, 8;
sub.s32 %r4992, %r4991, %r8960;
sub.s32 %r4993, %r4992, %r8961;
min.u32 %r4994, %r4993, %r917;
setp.eq.s32 %p462, %r4994, 32;
mov.u32 %r4995, -1;
shl.b32 %r4996, %r4995, %r4994;
not.b32 %r4997, %r4996;
selp.b32 %r4998, -1, %r4997, %p462;
and.b32 %r4999, %r4998, %r8928;
shl.b32 %r5000, %r4999, %r8961;
cvt.u16.u32 %rs657, %r5000;
or.b16 %rs1165, %rs1165, %rs657;
add.s32 %r8961, %r4994, %r8961;
sub.s32 %r8927, %r917, %r4994;
shr.u32 %r8928, %r8928, %r4994;
setp.gt.u32 %p463, %r4993, %r917;
@%p463 bra $L__BB1_400;
setp.ne.s32 %p464, %r8960, 0;
mov.u32 %r8960, 0;
and.b16 %rs658, %rs1165, 255;
setp.ne.s16 %p465, %rs658, 127;
and.pred %p466, %p464, %p465;
@%p466 bra $L__BB1_400;
mov.u32 %r5003, 20548;
sub.s32 %r5004, %r5003, %r8962;
cvt.u64.u32 %rd298, %r5004;
add.s64 %rd299, %rd298, %rd4;
add.s64 %rd300, %rd1, %rd299;
st.global.u8 [%rd300], %rs1165;
add.s32 %r8962, %r8962, 1;
setp.gt.u16 %p467, %rs658, 143;
selp.u32 %r8960, 1, 0, %p467;
mov.u32 %r8961, 0;
mov.u16 %rs1165, 0;
$L__BB1_400:
setp.ne.s32 %p468, %r8927, 0;
mov.u32 %r8938, %r8926;
@%p468 bra $L__BB1_396;
$L__BB1_401:
setp.eq.s16 %p469, %rs124, 0;
mov.u32 %r8950, %r8938;
@%p469 bra $L__BB1_408;
cvt.u32.u16 %r5005, %rs124;
and.b32 %r8939, %r5005, 255;
cvt.u32.u16 %r5006, %rs123;
and.b32 %r8940, %r5006, 255;
$L__BB1_403:
mov.u32 %r936, %r8939;
setp.gt.u32 %p470, %r8962, 2879;
mov.u32 %r8950, 1;
@%p470 bra $L__BB1_408;
mov.u32 %r5008, 8;
sub.s32 %r5009, %r5008, %r8960;
sub.s32 %r5010, %r5009, %r8961;
min.u32 %r5011, %r5010, %r936;
setp.eq.s32 %p471, %r5011, 32;
mov.u32 %r5012, -1;
shl.b32 %r5013, %r5012, %r5011;
not.b32 %r5014, %r5013;
selp.b32 %r5015, -1, %r5014, %p471;
and.b32 %r5016, %r5015, %r8940;
shl.b32 %r5017, %r5016, %r8961;
cvt.u16.u32 %rs662, %r5017;
or.b16 %rs1165, %rs1165, %rs662;
add.s32 %r8961, %r5011, %r8961;
sub.s32 %r8939, %r936, %r5011;
shr.u32 %r8940, %r8940, %r5011;
setp.gt.u32 %p472, %r5010, %r936;
@%p472 bra $L__BB1_407;
setp.ne.s32 %p473, %r8960, 0;
mov.u32 %r8960, 0;
and.b16 %rs663, %rs1165, 255;
setp.ne.s16 %p474, %rs663, 127;
and.pred %p475, %p473, %p474;
@%p475 bra $L__BB1_407;
mov.u32 %r5020, 20548;
sub.s32 %r5021, %r5020, %r8962;
cvt.u64.u32 %rd301, %r5021;
add.s64 %rd302, %rd301, %rd4;
add.s64 %rd303, %rd1, %rd302;
st.global.u8 [%rd303], %rs1165;
add.s32 %r8962, %r8962, 1;
setp.gt.u16 %p476, %rs663, 143;
selp.u32 %r8960, 1, 0, %p476;
mov.u32 %r8961, 0;
mov.u16 %rs1165, 0;
$L__BB1_407:
setp.ne.s32 %p477, %r8939, 0;
mov.u32 %r8950, %r8938;
@%p477 bra $L__BB1_403;
$L__BB1_408:
setp.eq.s16 %p478, %rs128, 0;
mov.u32 %r8959, %r8950;
@%p478 bra $L__BB1_415;
cvt.u32.u16 %r5022, %rs127;
and.b32 %r8952, %r5022, 255;
cvt.u32.u16 %r5023, %rs128;
and.b32 %r8951, %r5023, 255;
$L__BB1_410:
mov.u32 %r955, %r8951;
setp.gt.u32 %p479, %r8962, 2879;
mov.u32 %r8959, 1;
@%p479 bra $L__BB1_415;
mov.u32 %r5025, 8;
sub.s32 %r5026, %r5025, %r8960;
sub.s32 %r5027, %r5026, %r8961;
min.u32 %r5028, %r5027, %r955;
setp.eq.s32 %p480, %r5028, 32;
mov.u32 %r5029, -1;
shl.b32 %r5030, %r5029, %r5028;
not.b32 %r5031, %r5030;
selp.b32 %r5032, -1, %r5031, %p480;
and.b32 %r5033, %r5032, %r8952;
shl.b32 %r5034, %r5033, %r8961;
cvt.u16.u32 %rs667, %r5034;
or.b16 %rs1165, %rs1165, %rs667;
add.s32 %r8961, %r5028, %r8961;
sub.s32 %r8951, %r955, %r5028;
shr.u32 %r8952, %r8952, %r5028;
setp.gt.u32 %p481, %r5027, %r955;
@%p481 bra $L__BB1_414;
setp.ne.s32 %p482, %r8960, 0;
mov.u32 %r8960, 0;
and.b16 %rs668, %rs1165, 255;
setp.ne.s16 %p483, %rs668, 127;
and.pred %p484, %p482, %p483;
@%p484 bra $L__BB1_414;
mov.u32 %r5037, 20548;
sub.s32 %r5038, %r5037, %r8962;
cvt.u64.u32 %rd304, %r5038;
add.s64 %rd305, %rd304, %rd4;
add.s64 %rd306, %rd1, %rd305;
st.global.u8 [%rd306], %rs1165;
add.s32 %r8962, %r8962, 1;
setp.gt.u16 %p485, %rs668, 143;
selp.u32 %r8960, 1, 0, %p485;
mov.u32 %r8961, 0;
mov.u16 %rs1165, 0;
$L__BB1_414:
setp.ne.s32 %p486, %r8951, 0;
mov.u32 %r8959, %r8950;
@%p486 bra $L__BB1_410;
bra.uni $L__BB1_415;
$L__BB1_339:
setp.gt.s32 %p384, %r471, 0;
and.pred %p386, %p382, %p384;
@%p386 bra $L__BB1_368;
bra.uni $L__BB1_340;
$L__BB1_368:
ld.global.u8 %rs108, [%rd11+1];
ld.global.u8 %rs109, [%rd12];
ld.global.u8 %rs110, [%rd12+1];
setp.eq.s16 %p425, %rs108, 0;
mov.u32 %r8894, %r8678;
@%p425 bra $L__BB1_375;
ld.global.u8 %r8884, [%rd11];
cvt.u32.u16 %r8883, %rs108;
$L__BB1_370:
mov.u32 %r846, %r8883;
setp.gt.u32 %p426, %r8962, 2879;
mov.u32 %r8894, 1;
@%p426 bra $L__BB1_375;
mov.u32 %r4920, 8;
sub.s32 %r4921, %r4920, %r8960;
sub.s32 %r4922, %r4921, %r8961;
min.u32 %r4923, %r4922, %r846;
setp.eq.s32 %p427, %r4923, 32;
mov.u32 %r4924, -1;
shl.b32 %r4925, %r4924, %r4923;
not.b32 %r4926, %r4925;
selp.b32 %r4927, -1, %r4926, %p427;
and.b32 %r4928, %r4927, %r8884;
shl.b32 %r4929, %r4928, %r8961;
cvt.u16.u32 %rs639, %r4929;
or.b16 %rs1165, %rs1165, %rs639;
add.s32 %r8961, %r4923, %r8961;
sub.s32 %r8883, %r846, %r4923;
shr.u32 %r8884, %r8884, %r4923;
setp.gt.u32 %p428, %r4922, %r846;
@%p428 bra $L__BB1_374;
setp.ne.s32 %p429, %r8960, 0;
mov.u32 %r8960, 0;
and.b16 %rs640, %rs1165, 255;
setp.ne.s16 %p430, %rs640, 127;
and.pred %p431, %p429, %p430;
@%p431 bra $L__BB1_374;
mov.u32 %r4932, 20548;
sub.s32 %r4933, %r4932, %r8962;
cvt.u64.u32 %rd278, %r4933;
add.s64 %rd279, %rd278, %rd4;
add.s64 %rd280, %rd1, %rd279;
st.global.u8 [%rd280], %rs1165;
add.s32 %r8962, %r8962, 1;
setp.gt.u16 %p432, %rs640, 143;
selp.u32 %r8960, 1, 0, %p432;
mov.u32 %r8961, 0;
mov.u16 %rs1165, 0;
$L__BB1_374:
setp.ne.s32 %p433, %r8883, 0;
mov.u32 %r8894, %r8678;
@%p433 bra $L__BB1_370;
$L__BB1_375:
add.s32 %r8896, %r471, -1;
cvt.u32.u16 %r4935, %rs110;
and.b32 %r8907, %r4935, 255;
cvt.u32.u16 %r4936, %rs109;
and.b32 %r8908, %r4936, 255;
mov.u32 %r4934, 1;
mov.u32 %r8895, %r4934;
$L__BB1_376:
mov.u32 %r866, %r8895;
setp.gt.u32 %p434, %r8962, 2879;
mov.u32 %r8906, %r4934;
@%p434 bra $L__BB1_381;
mov.u32 %r4938, 8;
sub.s32 %r4939, %r4938, %r8960;
sub.s32 %r4940, %r4939, %r8961;
min.u32 %r4941, %r4940, %r866;
setp.eq.s32 %p435, %r4941, 32;
mov.u32 %r4942, -1;
shl.b32 %r4943, %r4942, %r4941;
not.b32 %r4944, %r4943;
selp.b32 %r4945, -1, %r4944, %p435;
and.b32 %r4946, %r4945, %r8896;
shl.b32 %r4947, %r4946, %r8961;
cvt.u16.u32 %rs643, %r4947;
or.b16 %rs1165, %rs1165, %rs643;
add.s32 %r8961, %r4941, %r8961;
sub.s32 %r8895, %r866, %r4941;
shr.u32 %r8896, %r8896, %r4941;
setp.gt.u32 %p436, %r4940, %r866;
@%p436 bra $L__BB1_380;
setp.ne.s32 %p437, %r8960, 0;
mov.u32 %r8960, 0;
and.b16 %rs644, %rs1165, 255;
setp.ne.s16 %p438, %rs644, 127;
and.pred %p439, %p437, %p438;
@%p439 bra $L__BB1_380;
mov.u32 %r4950, 20548;
sub.s32 %r4951, %r4950, %r8962;
cvt.u64.u32 %rd281, %r4951;
add.s64 %rd282, %rd281, %rd4;
add.s64 %rd283, %rd1, %rd282;
st.global.u8 [%rd283], %rs1165;
add.s32 %r8962, %r8962, 1;
setp.gt.u16 %p440, %rs644, 143;
selp.u32 %r8960, 1, 0, %p440;
mov.u32 %r8961, 0;
mov.u16 %rs1165, 0;
$L__BB1_380:
setp.ne.s32 %p441, %r8895, 0;
mov.u32 %r8906, %r8894;
@%p441 bra $L__BB1_376;
$L__BB1_381:
setp.eq.s16 %p442, %rs110, 0;
mov.u32 %r8959, %r8906;
@%p442 bra $L__BB1_415;
$L__BB1_382:
mov.u32 %r883, %r8907;
setp.gt.u32 %p443, %r8962, 2879;
mov.u32 %r8959, 1;
@%p443 bra $L__BB1_415;
mov.u32 %r4953, 8;
sub.s32 %r4954, %r4953, %r8960;
sub.s32 %r4955, %r4954, %r8961;
min.u32 %r4956, %r4955, %r883;
setp.eq.s32 %p444, %r4956, 32;
mov.u32 %r4957, -1;
shl.b32 %r4958, %r4957, %r4956;
not.b32 %r4959, %r4958;
selp.b32 %r4960, -1, %r4959, %p444;
and.b32 %r4961, %r4960, %r8908;
shl.b32 %r4962, %r4961, %r8961;
cvt.u16.u32 %rs648, %r4962;
or.b16 %rs1165, %rs1165, %rs648;
add.s32 %r8961, %r4956, %r8961;
sub.s32 %r8907, %r883, %r4956;
shr.u32 %r8908, %r8908, %r4956;
setp.gt.u32 %p445, %r4955, %r883;
@%p445 bra $L__BB1_386;
setp.ne.s32 %p446, %r8960, 0;
mov.u32 %r8960, 0;
and.b16 %rs649, %rs1165, 255;
setp.ne.s16 %p447, %rs649, 127;
and.pred %p448, %p446, %p447;
@%p448 bra $L__BB1_386;
mov.u32 %r4965, 20548;
sub.s32 %r4966, %r4965, %r8962;
cvt.u64.u32 %rd284, %r4966;
add.s64 %rd285, %rd284, %rd4;
add.s64 %rd286, %rd1, %rd285;
st.global.u8 [%rd286], %rs1165;
add.s32 %r8962, %r8962, 1;
setp.gt.u16 %p449, %rs649, 143;
selp.u32 %r8960, 1, 0, %p449;
mov.u32 %r8961, 0;
mov.u16 %rs1165, 0;
$L__BB1_386:
setp.eq.s32 %p450, %r8907, 0;
mov.u32 %r8959, %r8906;
@%p450 bra $L__BB1_415;
bra.uni $L__BB1_382;
$L__BB1_340:
setp.gt.s32 %p388, %r130, 0;
selp.b32 %r4848, %r343, 0, %p388;
cvt.u64.u32 %rd258, %r4848;
add.s64 %rd15, %rd9, %rd258;
ld.global.u8 %rs86, [%rd15+1];
add.s32 %r4849, %r4848, 2;
cvt.u64.u32 %rd260, %r4849;
add.s64 %rd261, %rd9, %rd260;
ld.global.u8 %rs87, [%rd261];
ld.global.u8 %rs88, [%rd261+1];
mul.lo.s32 %r4850, %r471, 6;
selp.b32 %r4851, %r4850, 0, %p384;
cvt.u64.u32 %rd262, %r4851;
add.s64 %rd263, %rd9, %rd262;
ld.global.u8 %rs89, [%rd263];
ld.global.u8 %rs90, [%rd263+1];
add.s32 %r4852, %r4851, 2;
cvt.u64.u32 %rd264, %r4852;
add.s64 %rd265, %rd9, %rd264;
ld.global.u8 %rs91, [%rd265];
ld.global.u8 %rs92, [%rd265+1];
setp.eq.s16 %p389, %rs86, 0;
mov.u32 %r8850, %r8678;
@%p389 bra $L__BB1_347;
ld.global.u8 %r8840, [%rd15];
cvt.u32.u16 %r8839, %rs86;
$L__BB1_342:
mov.u32 %r774, %r8839;
setp.gt.u32 %p390, %r8962, 2879;
mov.u32 %r8850, 1;
@%p390 bra $L__BB1_347;
mov.u32 %r4854, 8;
sub.s32 %r4855, %r4854, %r8960;
sub.s32 %r4856, %r4855, %r8961;
min.u32 %r4857, %r4856, %r774;
setp.eq.s32 %p391, %r4857, 32;
mov.u32 %r4858, -1;
shl.b32 %r4859, %r4858, %r4857;
not.b32 %r4860, %r4859;
selp.b32 %r4861, -1, %r4860, %p391;
and.b32 %r4862, %r4861, %r8840;
shl.b32 %r4863, %r4862, %r8961;
cvt.u16.u32 %rs620, %r4863;
or.b16 %rs1165, %rs1165, %rs620;
add.s32 %r8961, %r4857, %r8961;
sub.s32 %r8839, %r774, %r4857;
shr.u32 %r8840, %r8840, %r4857;
setp.gt.u32 %p392, %r4856, %r774;
@%p392 bra $L__BB1_346;
setp.ne.s32 %p393, %r8960, 0;
mov.u32 %r8960, 0;
and.b16 %rs621, %rs1165, 255;
setp.ne.s16 %p394, %rs621, 127;
and.pred %p395, %p393, %p394;
@%p395 bra $L__BB1_346;
mov.u32 %r4866, 20548;
sub.s32 %r4867, %r4866, %r8962;
cvt.u64.u32 %rd266, %r4867;
add.s64 %rd267, %rd266, %rd4;
add.s64 %rd268, %rd1, %rd267;
st.global.u8 [%rd268], %rs1165;
add.s32 %r8962, %r8962, 1;
setp.gt.u16 %p396, %rs621, 143;
selp.u32 %r8960, 1, 0, %p396;
mov.u32 %r8961, 0;
mov.u16 %rs1165, 0;
$L__BB1_346:
setp.ne.s32 %p397, %r8839, 0;
mov.u32 %r8850, %r8678;
@%p397 bra $L__BB1_342;
$L__BB1_347:
setp.eq.s16 %p398, %rs90, 0;
mov.u32 %r8862, %r8850;
@%p398 bra $L__BB1_354;
cvt.u32.u16 %r4868, %rs89;
and.b32 %r8852, %r4868, 255;
cvt.u32.u16 %r4869, %rs90;
and.b32 %r8851, %r4869, 255;
$L__BB1_349:
mov.u32 %r793, %r8851;
setp.gt.u32 %p399, %r8962, 2879;
mov.u32 %r8862, 1;
@%p399 bra $L__BB1_354;
mov.u32 %r4871, 8;
sub.s32 %r4872, %r4871, %r8960;
sub.s32 %r4873, %r4872, %r8961;
min.u32 %r4874, %r4873, %r793;
setp.eq.s32 %p400, %r4874, 32;
mov.u32 %r4875, -1;
shl.b32 %r4876, %r4875, %r4874;
not.b32 %r4877, %r4876;
selp.b32 %r4878, -1, %r4877, %p400;
and.b32 %r4879, %r4878, %r8852;
shl.b32 %r4880, %r4879, %r8961;
cvt.u16.u32 %rs625, %r4880;
or.b16 %rs1165, %rs1165, %rs625;
add.s32 %r8961, %r4874, %r8961;
sub.s32 %r8851, %r793, %r4874;
shr.u32 %r8852, %r8852, %r4874;
setp.gt.u32 %p401, %r4873, %r793;
@%p401 bra $L__BB1_353;
setp.ne.s32 %p402, %r8960, 0;
mov.u32 %r8960, 0;
and.b16 %rs626, %rs1165, 255;
setp.ne.s16 %p403, %rs626, 127;
and.pred %p404, %p402, %p403;
@%p404 bra $L__BB1_353;
mov.u32 %r4883, 20548;
sub.s32 %r4884, %r4883, %r8962;
cvt.u64.u32 %rd269, %r4884;
add.s64 %rd270, %rd269, %rd4;
add.s64 %rd271, %rd1, %rd270;
st.global.u8 [%rd271], %rs1165;
add.s32 %r8962, %r8962, 1;
setp.gt.u16 %p405, %rs626, 143;
selp.u32 %r8960, 1, 0, %p405;
mov.u32 %r8961, 0;
mov.u16 %rs1165, 0;
$L__BB1_353:
setp.ne.s32 %p406, %r8851, 0;
mov.u32 %r8862, %r8850;
@%p406 bra $L__BB1_349;
$L__BB1_354:
setp.eq.s16 %p407, %rs88, 0;
mov.u32 %r8874, %r8862;
@%p407 bra $L__BB1_361;
cvt.u32.u16 %r4885, %rs88;
and.b32 %r8863, %r4885, 255;
cvt.u32.u16 %r4886, %rs87;
and.b32 %r8864, %r4886, 255;
$L__BB1_356:
mov.u32 %r812, %r8863;
setp.gt.u32 %p408, %r8962, 2879;
mov.u32 %r8874, 1;
@%p408 bra $L__BB1_361;
mov.u32 %r4888, 8;
sub.s32 %r4889, %r4888, %r8960;
sub.s32 %r4890, %r4889, %r8961;
min.u32 %r4891, %r4890, %r812;
setp.eq.s32 %p409, %r4891, 32;
mov.u32 %r4892, -1;
shl.b32 %r4893, %r4892, %r4891;
not.b32 %r4894, %r4893;
selp.b32 %r4895, -1, %r4894, %p409;
and.b32 %r4896, %r4895, %r8864;
shl.b32 %r4897, %r4896, %r8961;
cvt.u16.u32 %rs630, %r4897;
or.b16 %rs1165, %rs1165, %rs630;
add.s32 %r8961, %r4891, %r8961;
sub.s32 %r8863, %r812, %r4891;
shr.u32 %r8864, %r8864, %r4891;
setp.gt.u32 %p410, %r4890, %r812;
@%p410 bra $L__BB1_360;
setp.ne.s32 %p411, %r8960, 0;
mov.u32 %r8960, 0;
and.b16 %rs631, %rs1165, 255;
setp.ne.s16 %p412, %rs631, 127;
and.pred %p413, %p411, %p412;
@%p413 bra $L__BB1_360;
mov.u32 %r4900, 20548;
sub.s32 %r4901, %r4900, %r8962;
cvt.u64.u32 %rd272, %r4901;
add.s64 %rd273, %rd272, %rd4;
add.s64 %rd274, %rd1, %rd273;
st.global.u8 [%rd274], %rs1165;
add.s32 %r8962, %r8962, 1;
setp.gt.u16 %p414, %rs631, 143;
selp.u32 %r8960, 1, 0, %p414;
mov.u32 %r8961, 0;
mov.u16 %rs1165, 0;
$L__BB1_360:
setp.ne.s32 %p415, %r8863, 0;
mov.u32 %r8874, %r8862;
@%p415 bra $L__BB1_356;
$L__BB1_361:
setp.eq.s16 %p416, %rs92, 0;
mov.u32 %r8959, %r8874;
@%p416 bra $L__BB1_415;
cvt.u32.u16 %r4902, %rs91;
and.b32 %r8876, %r4902, 255;
cvt.u32.u16 %r4903, %rs92;
and.b32 %r8875, %r4903, 255;
$L__BB1_363:
mov.u32 %r831, %r8875;
setp.gt.u32 %p417, %r8962, 2879;
mov.u32 %r8959, 1;
@%p417 bra $L__BB1_415;
mov.u32 %r4905, 8;
sub.s32 %r4906, %r4905, %r8960;
sub.s32 %r4907, %r4906, %r8961;
min.u32 %r4908, %r4907, %r831;
setp.eq.s32 %p418, %r4908, 32;
mov.u32 %r4909, -1;
shl.b32 %r4910, %r4909, %r4908;
not.b32 %r4911, %r4910;
selp.b32 %r4912, -1, %r4911, %p418;
and.b32 %r4913, %r4912, %r8876;
shl.b32 %r4914, %r4913, %r8961;
cvt.u16.u32 %rs635, %r4914;
or.b16 %rs1165, %rs1165, %rs635;
add.s32 %r8961, %r4908, %r8961;
sub.s32 %r8875, %r831, %r4908;
shr.u32 %r8876, %r8876, %r4908;
setp.gt.u32 %p419, %r4907, %r831;
@%p419 bra $L__BB1_367;
setp.ne.s32 %p420, %r8960, 0;
mov.u32 %r8960, 0;
and.b16 %rs636, %rs1165, 255;
setp.ne.s16 %p421, %rs636, 127;
and.pred %p422, %p420, %p421;
@%p422 bra $L__BB1_367;
mov.u32 %r4917, 20548;
sub.s32 %r4918, %r4917, %r8962;
cvt.u64.u32 %rd275, %r4918;
add.s64 %rd276, %rd275, %rd4;
add.s64 %rd277, %rd1, %rd276;
st.global.u8 [%rd277], %rs1165;
add.s32 %r8962, %r8962, 1;
setp.gt.u16 %p423, %rs636, 143;
selp.u32 %r8960, 1, 0, %p423;
mov.u32 %r8961, 0;
mov.u16 %rs1165, 0;
$L__BB1_367:
setp.eq.s32 %p424, %r8875, 0;
mov.u32 %r8959, %r8874;
@%p424 bra $L__BB1_415;
bra.uni $L__BB1_363;
$L__BB1_415:
shr.u32 %r5039, %r8648, 1;
or.b32 %r8431, %r5039, %r588;
$L__BB1_416:
add.s32 %r8429, %r8429, 4;
setp.lt.u32 %p487, %r8429, %r5;
@%p487 bra $L__BB1_51;
$L__BB1_417:
add.s32 %r8398, %r5, 1;
shr.u32 %r8397, %r8398, 1;
add.s32 %r5040, %r8397, 1;
setp.gt.u32 %p488, %r5040, 512;
@%p488 bra $L__BB1_419;
add.s32 %r8401, %r5, 1;
shr.u32 %r8400, %r8401, 1;
add.s32 %r8399, %r4095, %r8400;
mov.u16 %rs671, 0;
add.s32 %r8392, %r8399, 1;
st.shared.u8 [%r8392], %rs671;
$L__BB1_419:
setp.lt.u32 %p489, %r6, 3;
@%p489 bra $L__BB1_665;
ld.param.u64 %rd1420, [ j2k_htj2k_encode_codeblocks_param_5];
ld.param.u64 %rd1413, [ j2k_htj2k_encode_codeblocks_param_4];
mov.u32 %r5042, 31;
sub.s32 %r1006, %r5042, %r2;
mov.u32 %r8995, 2;
cvta.to.global.u64 %rd17, %rd1420;
cvta.to.global.u64 %rd18, %rd1413;
$L__BB1_421:
ld.shared.u8 %rs151, [_ZZ32 j2k_htj2k_encode_codeblocksE13cleanup_e_val];
mov.u16 %rs672, 0;
st.shared.u8 [_ZZ32 j2k_htj2k_encode_codeblocksE13cleanup_e_val], %rs672;
ld.shared.u8 %rs152, [_ZZ32 j2k_htj2k_encode_codeblocksE14cleanup_cx_val];
st.shared.u8 [_ZZ32 j2k_htj2k_encode_codeblocksE14cleanup_cx_val], %rs672;
@%p10 bra $L__BB1_664;
mov.u32 %r5045, 0;
ld.shared.u8 %rs673, [_ZZ32 j2k_htj2k_encode_codeblocksE13cleanup_e_val+1];
ld.shared.u8 %rs674, [_ZZ32 j2k_htj2k_encode_codeblocksE14cleanup_cx_val+1];
max.u16 %rs676, %rs151, %rs673;
cvt.u32.u16 %r5046, %rs676;
add.s32 %r9013, %r5046, -1;
add.s32 %r1024, %r8995, 1;
mul.lo.s32 %r9015, %r8995, %r1;
mul.wide.u16 %r5047, %rs674, 4;
cvt.u32.u16 %r5048, %rs152;
and.b32 %r5049, %r5048, 255;
add.s32 %r9016, %r5047, %r5049;
mov.u32 %r9011, %r5045;
mov.u32 %r9012, %r5045;
mov.u32 %r9014, %r5045;
$L__BB1_423:
cvt.u64.u32 %rd307, %r9015;
add.s64 %rd308, %rd307, %rd5;
shl.b64 %rd309, %rd308, 2;
add.s64 %rd310, %rd3, %rd309;
ld.global.u32 %r1048, [%rd310];
setp.eq.s32 %p491, %r1048, 0;
mov.u32 %r9032, %r5045;
@%p491 bra $L__BB1_425;
and.b32 %r5051, %r1048, -2147483648;
abs.s32 %r5052, %r1048;
shl.b32 %r5053, %r5052, %r1006;
or.b32 %r9032, %r5053, %r5051;
$L__BB1_425:
shl.b32 %r5057, %r9032, 1;
shr.u32 %r5058, %r5057, %r43;
and.b32 %r1051, %r5058, -2;
setp.eq.s32 %p492, %r1051, 0;
mov.u32 %r9036, 0;
mov.u32 %r9033, %r9036;
mov.u32 %r9034, %r9036;
mov.u32 %r9040, %r9036;
@%p492 bra $L__BB1_427;
add.s32 %r5060, %r1051, -1;
clz.b32 %r5061, %r5060;
mov.u32 %r5062, 32;
sub.s32 %r9033, %r5062, %r5061;
shr.u32 %r5063, %r9032, 31;
add.s32 %r5064, %r5063, %r1051;
add.s32 %r9034, %r5064, -2;
mov.u32 %r9040, 1;
$L__BB1_427:
setp.ge.u32 %p493, %r1024, %r6;
@%p493 bra $L__BB1_430;
add.s32 %r5067, %r9015, %r1;
cvt.u64.u32 %rd311, %r5067;
add.s64 %rd312, %rd311, %rd5;
shl.b64 %rd313, %rd312, 2;
add.s64 %rd314, %rd3, %rd313;
ld.global.u32 %r1057, [%rd314];
setp.eq.s32 %p494, %r1057, 0;
@%p494 bra $L__BB1_430;
and.b32 %r5068, %r1057, -2147483648;
abs.s32 %r5069, %r1057;
shl.b32 %r5070, %r5069, %r1006;
or.b32 %r9036, %r5070, %r5068;
$L__BB1_430:
shl.b32 %r5073, %r9036, 1;
shr.u32 %r5074, %r5073, %r43;
and.b32 %r1060, %r5074, -2;
setp.eq.s32 %p495, %r1060, 0;
mov.u32 %r9051, 0;
mov.u32 %r9037, %r9051;
mov.u32 %r9038, %r9051;
mov.u32 %r9055, %r9033;
@%p495 bra $L__BB1_432;
or.b32 %r9040, %r9040, 2;
add.s32 %r5075, %r1060, -1;
clz.b32 %r5076, %r5075;
mov.u32 %r5077, 32;
sub.s32 %r9037, %r5077, %r5076;
max.s32 %r9055, %r9033, %r9037;
shr.u32 %r5078, %r9036, 31;
add.s32 %r5079, %r5078, %r1060;
add.s32 %r9038, %r5079, -2;
$L__BB1_432:
add.s32 %r9345, %r9015, 1;
add.s32 %r5084, %r9011, 1;
setp.ge.u32 %p496, %r5084, %r5;
mov.u32 %r9052, %r9051;
mov.u32 %r9053, %r9051;
mov.u32 %r9054, %r9051;
@%p496 bra $L__BB1_443;
cvt.u64.u32 %rd315, %r9345;
add.s64 %rd316, %rd315, %rd5;
shl.b64 %rd317, %rd316, 2;
add.s64 %rd318, %rd3, %rd317;
ld.global.u32 %r1070, [%rd318];
setp.eq.s32 %p497, %r1070, 0;
mov.u32 %r9052, 0;
mov.u32 %r9041, %r9052;
@%p497 bra $L__BB1_435;
and.b32 %r5086, %r1070, -2147483648;
abs.s32 %r5087, %r1070;
shl.b32 %r5088, %r5087, %r1006;
or.b32 %r9041, %r5088, %r5086;
$L__BB1_435:
shl.b32 %r5091, %r9041, 1;
shr.u32 %r5092, %r5091, %r43;
and.b32 %r1073, %r5092, -2;
setp.eq.s32 %p498, %r1073, 0;
mov.u32 %r9054, %r9052;
@%p498 bra $L__BB1_437;
or.b32 %r9040, %r9040, 4;
add.s32 %r5093, %r1073, -1;
clz.b32 %r5094, %r5093;
mov.u32 %r5095, 32;
sub.s32 %r9052, %r5095, %r5094;
max.s32 %r9055, %r9055, %r9052;
shr.u32 %r5096, %r9041, 31;
add.s32 %r5097, %r5096, %r1073;
add.s32 %r9054, %r5097, -2;
$L__BB1_437:
mov.u32 %r9051, 0;
mov.u32 %r9046, %r9051;
@%p493 bra $L__BB1_440;
add.s32 %r5100, %r9345, %r1;
cvt.u64.u32 %rd319, %r5100;
add.s64 %rd320, %rd319, %rd5;
shl.b64 %rd321, %rd320, 2;
add.s64 %rd322, %rd3, %rd321;
ld.global.u32 %r1082, [%rd322];
setp.eq.s32 %p500, %r1082, 0;
@%p500 bra $L__BB1_440;
and.b32 %r5101, %r1082, -2147483648;
abs.s32 %r5102, %r1082;
shl.b32 %r5103, %r5102, %r1006;
or.b32 %r9046, %r5103, %r5101;
$L__BB1_440:
shl.b32 %r5106, %r9046, 1;
shr.u32 %r5107, %r5106, %r43;
and.b32 %r1085, %r5107, -2;
setp.eq.s32 %p501, %r1085, 0;
mov.u32 %r9053, %r9051;
@%p501 bra $L__BB1_442;
or.b32 %r9040, %r9040, 8;
add.s32 %r5108, %r1085, -1;
clz.b32 %r5109, %r5108;
mov.u32 %r5110, 32;
sub.s32 %r9051, %r5110, %r5109;
max.s32 %r9055, %r9055, %r9051;
shr.u32 %r5111, %r9046, 31;
add.s32 %r5112, %r5111, %r1085;
add.s32 %r9053, %r5112, -2;
$L__BB1_442:
add.s32 %r9345, %r9015, 2;
$L__BB1_443:
add.s32 %r5114, %r9040, -1;
and.b32 %r5115, %r5114, %r9040;
setp.ne.s32 %p502, %r5115, 0;
mov.u32 %r9058, 0;
setp.gt.s32 %p503, %r9013, 1;
and.pred %p504, %p503, %p502;
selp.b32 %r5116, %r9013, 1, %p504;
max.s32 %r1102, %r5116, %r9055;
sub.s32 %r1103, %r1102, %r5116;
setp.lt.s32 %p505, %r1103, 1;
@%p505 bra $L__BB1_445;
setp.eq.s32 %p506, %r9033, %r9055;
selp.u32 %r5117, 1, 0, %p506;
setp.eq.s32 %p507, %r9037, %r9055;
selp.u32 %r5118, -1, 0, %p507;
bfi.b32 %r5119, %r5118, %r5117, 1, 1;
setp.eq.s32 %p508, %r9052, %r9055;
selp.u16 %rs677, 1, 0, %p508;
mul.wide.u16 %r5120, %rs677, 4;
or.b32 %r5121, %r5119, %r5120;
setp.eq.s32 %p509, %r9051, %r9055;
selp.u16 %rs678, 1, 0, %p509;
mul.wide.u16 %r5122, %rs678, 8;
or.b32 %r9058, %r5121, %r5122;
$L__BB1_445:
shl.b32 %r5123, %r9040, 4;
shl.b32 %r5124, %r9016, 8;
or.b32 %r5125, %r5123, %r5124;
or.b32 %r5126, %r5125, %r9058;
mul.wide.u32 %rd323, %r5126, 2;
add.s64 %rd324, %rd18, %rd323;
ld.global.u16 %rs155, [%rd324];
shr.u16 %rs679, %rs155, 4;
and.b16 %rs156, %rs679, 7;
setp.eq.s16 %p510, %rs156, 0;
mov.u32 %r9070, %r8959;
@%p510 bra $L__BB1_452;
cvt.u32.u16 %r9059, %rs156;
shr.u16 %rs680, %rs155, 8;
cvt.u32.u16 %r9060, %rs680;
$L__BB1_447:
mov.u32 %r1108, %r9059;
setp.gt.u32 %p511, %r8962, 2879;
mov.u32 %r9070, 1;
@%p511 bra $L__BB1_452;
mov.u32 %r5128, 8;
sub.s32 %r5129, %r5128, %r8960;
sub.s32 %r5130, %r5129, %r8961;
min.u32 %r5131, %r5130, %r1108;
setp.eq.s32 %p512, %r5131, 32;
mov.u32 %r5132, -1;
shl.b32 %r5133, %r5132, %r5131;
not.b32 %r5134, %r5133;
selp.b32 %r5135, -1, %r5134, %p512;
and.b32 %r5136, %r5135, %r9060;
shl.b32 %r5137, %r5136, %r8961;
cvt.u16.u32 %rs681, %r5137;
or.b16 %rs1165, %rs1165, %rs681;
add.s32 %r8961, %r5131, %r8961;
sub.s32 %r9059, %r1108, %r5131;
shr.u32 %r9060, %r9060, %r5131;
setp.gt.u32 %p513, %r5130, %r1108;
@%p513 bra $L__BB1_451;
setp.ne.s32 %p514, %r8960, 0;
mov.u32 %r8960, 0;
and.b16 %rs682, %rs1165, 255;
setp.ne.s16 %p515, %rs682, 127;
and.pred %p516, %p514, %p515;
@%p516 bra $L__BB1_451;
mov.u32 %r5140, 20548;
sub.s32 %r5141, %r5140, %r8962;
cvt.u64.u32 %rd325, %r5141;
add.s64 %rd326, %rd325, %rd4;
add.s64 %rd327, %rd1, %rd326;
st.global.u8 [%rd327], %rs1165;
add.s32 %r8962, %r8962, 1;
setp.gt.u16 %p517, %rs682, 143;
selp.u32 %r8960, 1, 0, %p517;
mov.u32 %r8961, 0;
mov.u16 %rs1165, 0;
$L__BB1_451:
setp.ne.s32 %p518, %r9059, 0;
mov.u32 %r9070, %r8959;
@%p518 bra $L__BB1_447;
$L__BB1_452:
setp.ne.s32 %p519, %r9016, 0;
@%p519 bra $L__BB1_500;
setp.eq.s32 %p520, %r9040, 0;
add.s32 %r5142, %r8514, 17477;
cvt.u64.u32 %rd328, %r5142;
add.s64 %rd329, %rd328, %rd4;
add.s64 %rd19, %rd1, %rd329;
@%p520 bra $L__BB1_492;
shl.b16 %rs1096, %rs1096, 1;
add.s32 %r8520, %r8520, -1;
setp.ne.s32 %p521, %r8520, 0;
mov.u32 %r9104, %r8723;
@%p521 bra $L__BB1_457;
setp.gt.u32 %p522, %r8514, 191;
mov.u32 %r8520, 0;
mov.u32 %r9104, 1;
@%p522 bra $L__BB1_457;
st.global.u8 [%rd19], %rs1096;
add.s32 %r8514, %r8514, 1;
mov.u32 %r8520, 8;
mov.u16 %rs1096, 0;
mov.u32 %r9104, %r8723;
$L__BB1_457:
setp.lt.u32 %p523, %r8725, 3;
mov.u32 %r9074, 0;
@%p523 bra $L__BB1_460;
setp.lt.u32 %p524, %r8725, 6;
mov.u32 %r9074, 1;
@%p524 bra $L__BB1_460;
setp.lt.u32 %p525, %r8725, 9;
setp.eq.s32 %p526, %r8725, 11;
selp.b32 %r5148, 4, 5, %p526;
setp.lt.u32 %p527, %r8725, 11;
selp.b32 %r5149, 3, %r5148, %p527;
selp.b32 %r9074, 2, %r5149, %p525;
$L__BB1_460:
setp.eq.s32 %p528, %r9074, 0;
@%p528 bra $L__BB1_488;
add.s32 %r1132, %r9074, -1;
and.b32 %r1133, %r9074, 3;
setp.eq.s32 %p529, %r1133, 0;
mov.u32 %r9084, %r9074;
mov.u32 %r9087, %r9104;
@%p529 bra $L__BB1_473;
mov.u32 %r5151, 1;
shl.b32 %r5152, %r5151, %r1132;
and.b32 %r5153, %r5152, %r8726;
setp.ne.s32 %p530, %r5153, 0;
selp.u32 %r5154, 1, 0, %p530;
cvt.u32.u16 %r5155, %rs1096;
bfi.b32 %r5156, %r5155, %r5154, 1, 8;
cvt.u16.u32 %rs1096, %r5156;
add.s32 %r8520, %r8520, -1;
setp.ne.s32 %p531, %r8520, 0;
mov.u32 %r9087, %r9104;
@%p531 bra $L__BB1_465;
setp.gt.u32 %p532, %r8514, 191;
mov.u32 %r8520, 0;
mov.u32 %r9087, %r5151;
@%p532 bra $L__BB1_465;
add.s32 %r5160, %r8514, 17477;
cvt.u64.u32 %rd330, %r5160;
add.s64 %rd331, %rd330, %rd4;
add.s64 %rd332, %rd1, %rd331;
st.global.u8 [%rd332], %rs1096;
add.s32 %r8514, %r8514, 1;
mov.u32 %r8520, 8;
mov.u16 %rs1096, 0;
mov.u32 %r9087, %r9104;
$L__BB1_465:
setp.eq.s32 %p533, %r1133, 1;
mov.u32 %r9104, %r9087;
mov.u32 %r9084, %r1132;
@%p533 bra $L__BB1_473;
add.s32 %r9084, %r9074, -2;
mov.u32 %r5161, 1;
shl.b32 %r5162, %r5161, %r9084;
and.b32 %r5163, %r5162, %r8726;
setp.ne.s32 %p534, %r5163, 0;
selp.u32 %r5164, 1, 0, %p534;
cvt.u32.u16 %r5165, %rs1096;
bfi.b32 %r5166, %r5165, %r5164, 1, 8;
cvt.u16.u32 %rs1096, %r5166;
add.s32 %r8520, %r8520, -1;
setp.ne.s32 %p535, %r8520, 0;
mov.u32 %r9078, %r9087;
@%p535 bra $L__BB1_469;
setp.gt.u32 %p536, %r8514, 191;
mov.u32 %r8520, 0;
mov.u32 %r9078, %r5161;
@%p536 bra $L__BB1_469;
add.s32 %r5169, %r8514, 17477;
cvt.u64.u32 %rd333, %r5169;
add.s64 %rd334, %rd333, %rd4;
add.s64 %rd335, %rd1, %rd334;
and.b16 %rs689, %rs1096, 255;
st.global.u8 [%rd335], %rs1096;
add.s32 %r8514, %r8514, 1;
setp.eq.s16 %p537, %rs689, 255;
selp.b32 %r8520, 7, 8, %p537;
mov.u16 %rs1096, 0;
mov.u32 %r9078, %r9087;
$L__BB1_469:
setp.eq.s32 %p538, %r1133, 2;
mov.u32 %r9104, %r9078;
mov.u32 %r9087, %r9078;
@%p538 bra $L__BB1_473;
add.s32 %r9084, %r9074, -3;
mov.u32 %r5170, 1;
shl.b32 %r5171, %r5170, %r9084;
and.b32 %r5172, %r5171, %r8726;
setp.ne.s32 %p539, %r5172, 0;
selp.u32 %r5173, 1, 0, %p539;
cvt.u32.u16 %r5174, %rs1096;
bfi.b32 %r5175, %r5174, %r5173, 1, 8;
cvt.u16.u32 %rs1096, %r5175;
add.s32 %r8520, %r8520, -1;
setp.ne.s32 %p540, %r8520, 0;
mov.u32 %r9104, %r9078;
mov.u32 %r9087, %r9078;
@%p540 bra $L__BB1_473;
setp.gt.u32 %p541, %r8514, 191;
mov.u32 %r8520, 0;
mov.u32 %r9104, %r5170;
mov.u32 %r9087, %r5170;
@%p541 bra $L__BB1_473;
add.s32 %r5180, %r8514, 17477;
cvt.u64.u32 %rd336, %r5180;
add.s64 %rd337, %rd336, %rd4;
add.s64 %rd338, %rd1, %rd337;
and.b16 %rs692, %rs1096, 255;
st.global.u8 [%rd338], %rs1096;
add.s32 %r8514, %r8514, 1;
setp.eq.s16 %p542, %rs692, 255;
selp.b32 %r8520, 7, 8, %p542;
mov.u16 %rs1096, 0;
mov.u32 %r9104, %r9078;
mov.u32 %r9087, %r9078;
$L__BB1_473:
setp.lt.u32 %p543, %r1132, 3;
@%p543 bra $L__BB1_488;
mov.u32 %r9104, %r9087;
$L__BB1_475:
add.s32 %r5181, %r9084, -1;
mov.u32 %r5182, 1;
shl.b32 %r5183, %r5182, %r5181;
and.b32 %r5184, %r5183, %r8726;
setp.ne.s32 %p544, %r5184, 0;
selp.u32 %r5185, 1, 0, %p544;
cvt.u32.u16 %r5186, %rs1096;
bfi.b32 %r9093, %r5186, %r5185, 1, 8;
add.s32 %r9094, %r8520, -1;
setp.ne.s32 %p545, %r9094, 0;
mov.u32 %r9092, %r9104;
@%p545 bra $L__BB1_478;
setp.gt.u32 %p546, %r8514, 191;
mov.u32 %r9094, 0;
mov.u32 %r9092, %r5182;
@%p546 bra $L__BB1_478;
cvt.u16.u32 %rs693, %r9093;
and.b16 %rs694, %rs693, 255;
add.s32 %r5190, %r8514, 17477;
cvt.u64.u32 %rd339, %r5190;
add.s64 %rd340, %rd339, %rd4;
add.s64 %rd341, %rd1, %rd340;
st.global.u8 [%rd341], %rs693;
add.s32 %r8514, %r8514, 1;
setp.eq.s16 %p547, %rs694, 255;
selp.b32 %r9094, 7, 8, %p547;
mov.u32 %r9093, 0;
mov.u32 %r9092, %r9104;
$L__BB1_478:
add.s32 %r5191, %r9084, -2;
shl.b32 %r5193, %r5182, %r5191;
and.b32 %r5194, %r5193, %r8726;
setp.ne.s32 %p548, %r5194, 0;
and.b32 %r5195, %r9093, 127;
selp.u32 %r5196, 1, 0, %p548;
bfi.b32 %r9097, %r5195, %r5196, 1, 7;
add.s32 %r9098, %r9094, -1;
setp.ne.s32 %p549, %r9098, 0;
mov.u32 %r9096, %r9092;
@%p549 bra $L__BB1_481;
setp.gt.u32 %p550, %r8514, 191;
mov.u32 %r9098, 0;
mov.u32 %r9096, 1;
@%p550 bra $L__BB1_481;
cvt.u16.u32 %rs695, %r9097;
and.b16 %rs696, %rs695, 255;
add.s32 %r5200, %r8514, 17477;
cvt.u64.u32 %rd342, %r5200;
add.s64 %rd343, %rd342, %rd4;
add.s64 %rd344, %rd1, %rd343;
st.global.u8 [%rd344], %rs695;
add.s32 %r8514, %r8514, 1;
setp.eq.s16 %p551, %rs696, 255;
selp.b32 %r9098, 7, 8, %p551;
mov.u32 %r9097, 0;
mov.u32 %r9096, %r9092;
$L__BB1_481:
add.s32 %r5201, %r9084, -3;
mov.u32 %r5202, 1;
shl.b32 %r5203, %r5202, %r5201;
and.b32 %r5204, %r5203, %r8726;
setp.ne.s32 %p552, %r5204, 0;
and.b32 %r5205, %r9097, 127;
selp.u32 %r5206, 1, 0, %p552;
bfi.b32 %r9101, %r5205, %r5206, 1, 7;
add.s32 %r9102, %r9098, -1;
setp.ne.s32 %p553, %r9102, 0;
mov.u32 %r9100, %r9096;
@%p553 bra $L__BB1_484;
setp.gt.u32 %p554, %r8514, 191;
mov.u32 %r9102, 0;
mov.u32 %r9100, %r5202;
@%p554 bra $L__BB1_484;
cvt.u16.u32 %rs697, %r9101;
and.b16 %rs698, %rs697, 255;
add.s32 %r5210, %r8514, 17477;
cvt.u64.u32 %rd345, %r5210;
add.s64 %rd346, %rd345, %rd4;
add.s64 %rd347, %rd1, %rd346;
st.global.u8 [%rd347], %rs697;
add.s32 %r8514, %r8514, 1;
setp.eq.s16 %p555, %rs698, 255;
selp.b32 %r9102, 7, 8, %p555;
mov.u32 %r9101, 0;
mov.u32 %r9100, %r9096;
$L__BB1_484:
add.s32 %r9084, %r9084, -4;
shl.b32 %r5212, %r5202, %r9084;
and.b32 %r5213, %r5212, %r8726;
setp.ne.s32 %p556, %r5213, 0;
and.b32 %r5214, %r9101, 127;
selp.u32 %r5215, 1, 0, %p556;
bfi.b32 %r5216, %r5214, %r5215, 1, 15;
cvt.u16.u32 %rs1096, %r5216;
add.s32 %r8520, %r9102, -1;
setp.ne.s32 %p557, %r8520, 0;
mov.u32 %r9104, %r9100;
@%p557 bra $L__BB1_487;
setp.gt.u32 %p558, %r8514, 191;
mov.u32 %r8520, 0;
mov.u32 %r9104, 1;
@%p558 bra $L__BB1_487;
add.s32 %r5219, %r8514, 17477;
cvt.u64.u32 %rd348, %r5219;
add.s64 %rd349, %rd348, %rd4;
add.s64 %rd350, %rd1, %rd349;
and.b16 %rs700, %rs1096, 255;
st.global.u8 [%rd350], %rs1096;
add.s32 %r8514, %r8514, 1;
setp.eq.s16 %p559, %rs700, 255;
selp.b32 %r8520, 7, 8, %p559;
mov.u16 %rs1096, 0;
mov.u32 %r9104, %r9100;
$L__BB1_487:
setp.ne.s32 %p560, %r9084, 0;
@%p560 bra $L__BB1_475;
$L__BB1_488:
add.s32 %r5221, %r8725, -1;
setp.eq.s32 %p561, %r8725, 0;
mov.u32 %r8726, 0;
selp.b32 %r8725, 0, %r5221, %p561;
setp.lt.u32 %p562, %r8725, 3;
mov.u32 %r9110, %r8726;
@%p562 bra $L__BB1_491;
setp.lt.u32 %p563, %r8725, 6;
mov.u32 %r9110, 1;
@%p563 bra $L__BB1_491;
setp.lt.u32 %p564, %r8725, 9;
setp.eq.s32 %p565, %r8725, 11;
selp.b32 %r5223, 4, 5, %p565;
setp.lt.u32 %p566, %r8725, 11;
selp.b32 %r5224, 3, %r5223, %p566;
selp.b32 %r9110, 2, %r5224, %p564;
$L__BB1_491:
mov.u32 %r5226, 1;
shl.b32 %r8724, %r5226, %r9110;
mov.u32 %r8723, %r9104;
bra.uni $L__BB1_500;
$L__BB1_492:
add.s32 %r8726, %r8726, 1;
setp.lt.u32 %p567, %r8726, %r8724;
@%p567 bra $L__BB1_500;
shl.b16 %rs701, %rs1096, 1;
or.b16 %rs1096, %rs701, 1;
add.s32 %r8520, %r8520, -1;
setp.ne.s32 %p568, %r8520, 0;
mov.u32 %r9111, %r8723;
@%p568 bra $L__BB1_496;
setp.gt.u32 %p569, %r8514, 191;
mov.u32 %r8520, 0;
mov.u32 %r9111, 1;
@%p569 bra $L__BB1_496;
and.b16 %rs703, %rs1096, 255;
st.global.u8 [%rd19], %rs1096;
add.s32 %r8514, %r8514, 1;
setp.eq.s16 %p570, %rs703, 255;
selp.b32 %r8520, 7, 8, %p570;
mov.u16 %rs1096, 0;
mov.u32 %r9111, %r8723;
$L__BB1_496:
add.s32 %r5230, %r8725, 1;
min.u32 %r8725, %r5230, 12;
setp.lt.u32 %p571, %r8725, 3;
mov.u32 %r8726, 0;
mov.u32 %r9114, %r8726;
@%p571 bra $L__BB1_499;
setp.lt.u32 %p572, %r8725, 6;
mov.u32 %r9114, 1;
@%p572 bra $L__BB1_499;
setp.lt.u32 %p573, %r8725, 9;
setp.eq.s32 %p574, %r8725, 11;
selp.b32 %r5232, 4, 5, %p574;
setp.lt.u32 %p575, %r8725, 11;
selp.b32 %r5233, 3, %r5232, %p575;
selp.b32 %r9114, 2, %r5233, %p573;
$L__BB1_499:
mov.u32 %r5235, 1;
shl.b32 %r8724, %r5235, %r9114;
mov.u32 %r8723, %r9111;
$L__BB1_500:
and.b16 %rs704, %rs155, 15;
cvt.u32.u16 %r1216, %rs704;
and.b32 %r5236, %r9040, 1;
setp.eq.b32 %p576, %r5236, 1;
mov.pred %p577, 0;
xor.pred %p578, %p576, %p577;
not.pred %p579, %p578;
mov.u32 %r9131, %r9176;
@%p579 bra $L__BB1_507;
and.b32 %r5237, %r1216, 1;
sub.s32 %r9121, %r1102, %r5237;
setp.eq.s32 %p580, %r9121, 0;
mov.u32 %r9131, %r9176;
@%p580 bra $L__BB1_507;
mov.u32 %r5238, -1;
shl.b32 %r5239, %r5238, %r9121;
not.b32 %r5240, %r5239;
and.b32 %r9122, %r9034, %r5240;
$L__BB1_503:
setp.gt.u32 %p581, %r9150, 17476;
mov.u32 %r9131, 1;
@%p581 bra $L__BB1_507;
sub.s32 %r5242, %r9149, %r9148;
min.u32 %r5243, %r5242, %r9121;
setp.eq.s32 %p582, %r5243, 32;
mov.u32 %r5244, -1;
shl.b32 %r5245, %r5244, %r5243;
not.b32 %r5246, %r5245;
selp.b32 %r5247, -1, %r5246, %p582;
and.b32 %r5248, %r5247, %r9122;
shl.b32 %r5249, %r5248, %r9148;
or.b32 %r9147, %r5249, %r9147;
add.s32 %r9148, %r5243, %r9148;
shr.u32 %r9122, %r9122, %r5243;
sub.s32 %r9121, %r9121, %r5243;
setp.lt.u32 %p583, %r9148, %r9149;
@%p583 bra $L__BB1_506;
cvt.u64.u32 %rd351, %r9150;
add.s64 %rd352, %rd351, %rd4;
add.s64 %rd353, %rd1, %rd352;
st.global.u8 [%rd353], %r9147;
add.s32 %r9150, %r9150, 1;
setp.eq.s32 %p584, %r9147, 255;
selp.b32 %r9149, 7, 8, %p584;
mov.u32 %r9147, 0;
mov.u32 %r9148, %r9147;
$L__BB1_506:
setp.ne.s32 %p585, %r9121, 0;
mov.u32 %r9131, %r9176;
@%p585 bra $L__BB1_503;
$L__BB1_507:
and.b32 %r1240, %r9040, 2;
setp.eq.s32 %p586, %r1240, 0;
mov.u32 %r9146, %r9131;
@%p586 bra $L__BB1_514;
shr.u32 %r5252, %r1216, 1;
and.b32 %r5253, %r5252, 1;
sub.s32 %r9136, %r1102, %r5253;
setp.eq.s32 %p587, %r9136, 0;
mov.u32 %r9146, %r9131;
@%p587 bra $L__BB1_514;
mov.u32 %r5254, -1;
shl.b32 %r5255, %r5254, %r9136;
not.b32 %r5256, %r5255;
and.b32 %r9137, %r9038, %r5256;
$L__BB1_510:
setp.gt.u32 %p588, %r9150, 17476;
mov.u32 %r9146, 1;
@%p588 bra $L__BB1_514;
sub.s32 %r5258, %r9149, %r9148;
min.u32 %r5259, %r5258, %r9136;
setp.eq.s32 %p589, %r5259, 32;
mov.u32 %r5260, -1;
shl.b32 %r5261, %r5260, %r5259;
not.b32 %r5262, %r5261;
selp.b32 %r5263, -1, %r5262, %p589;
and.b32 %r5264, %r5263, %r9137;
shl.b32 %r5265, %r5264, %r9148;
or.b32 %r9147, %r5265, %r9147;
add.s32 %r9148, %r5259, %r9148;
shr.u32 %r9137, %r9137, %r5259;
sub.s32 %r9136, %r9136, %r5259;
setp.lt.u32 %p590, %r9148, %r9149;
@%p590 bra $L__BB1_513;
cvt.u64.u32 %rd354, %r9150;
add.s64 %rd355, %rd354, %rd4;
add.s64 %rd356, %rd1, %rd355;
st.global.u8 [%rd356], %r9147;
add.s32 %r9150, %r9150, 1;
setp.eq.s32 %p591, %r9147, 255;
selp.b32 %r9149, 7, 8, %p591;
mov.u32 %r9147, 0;
mov.u32 %r9148, %r9147;
$L__BB1_513:
setp.ne.s32 %p592, %r9136, 0;
mov.u32 %r9146, %r9131;
@%p592 bra $L__BB1_510;
$L__BB1_514:
and.b32 %r1264, %r9040, 4;
setp.eq.s32 %p593, %r1264, 0;
mov.u32 %r9161, %r9146;
@%p593 bra $L__BB1_521;
shr.u32 %r5268, %r1216, 2;
and.b32 %r5269, %r5268, 1;
sub.s32 %r9151, %r1102, %r5269;
setp.eq.s32 %p594, %r9151, 0;
mov.u32 %r9161, %r9146;
@%p594 bra $L__BB1_521;
mov.u32 %r5270, -1;
shl.b32 %r5271, %r5270, %r9151;
not.b32 %r5272, %r5271;
and.b32 %r9152, %r9054, %r5272;
$L__BB1_517:
setp.gt.u32 %p595, %r9150, 17476;
mov.u32 %r9161, 1;
@%p595 bra $L__BB1_521;
sub.s32 %r5274, %r9149, %r9148;
min.u32 %r5275, %r5274, %r9151;
setp.eq.s32 %p596, %r5275, 32;
mov.u32 %r5276, -1;
shl.b32 %r5277, %r5276, %r5275;
not.b32 %r5278, %r5277;
selp.b32 %r5279, -1, %r5278, %p596;
and.b32 %r5280, %r5279, %r9152;
shl.b32 %r5281, %r5280, %r9148;
or.b32 %r9147, %r5281, %r9147;
add.s32 %r9148, %r5275, %r9148;
shr.u32 %r9152, %r9152, %r5275;
sub.s32 %r9151, %r9151, %r5275;
setp.lt.u32 %p597, %r9148, %r9149;
@%p597 bra $L__BB1_520;
cvt.u64.u32 %rd357, %r9150;
add.s64 %rd358, %rd357, %rd4;
add.s64 %rd359, %rd1, %rd358;
st.global.u8 [%rd359], %r9147;
add.s32 %r9150, %r9150, 1;
setp.eq.s32 %p598, %r9147, 255;
selp.b32 %r9149, 7, 8, %p598;
mov.u32 %r9147, 0;
mov.u32 %r9148, %r9147;
$L__BB1_520:
setp.ne.s32 %p599, %r9151, 0;
mov.u32 %r9161, %r9146;
@%p599 bra $L__BB1_517;
$L__BB1_521:
and.b32 %r1288, %r9040, 8;
setp.eq.s32 %p600, %r1288, 0;
mov.u32 %r9176, %r9161;
@%p600 bra $L__BB1_528;
shr.u32 %r5284, %r1216, 3;
sub.s32 %r9166, %r1102, %r5284;
setp.eq.s32 %p601, %r9166, 0;
mov.u32 %r9176, %r9161;
@%p601 bra $L__BB1_528;
mov.u32 %r5285, -1;
shl.b32 %r5286, %r5285, %r9166;
not.b32 %r5287, %r5286;
and.b32 %r9167, %r9053, %r5287;
$L__BB1_524:
setp.gt.u32 %p602, %r9150, 17476;
mov.u32 %r9176, 1;
@%p602 bra $L__BB1_528;
sub.s32 %r5289, %r9149, %r9148;
min.u32 %r5290, %r5289, %r9166;
setp.eq.s32 %p603, %r5290, 32;
mov.u32 %r5291, -1;
shl.b32 %r5292, %r5291, %r5290;
not.b32 %r5293, %r5292;
selp.b32 %r5294, -1, %r5293, %p603;
and.b32 %r5295, %r5294, %r9167;
shl.b32 %r5296, %r5295, %r9148;
or.b32 %r9147, %r5296, %r9147;
add.s32 %r9148, %r5290, %r9148;
shr.u32 %r9167, %r9167, %r5290;
sub.s32 %r9166, %r9166, %r5290;
setp.lt.u32 %p604, %r9148, %r9149;
@%p604 bra $L__BB1_527;
cvt.u64.u32 %rd360, %r9150;
add.s64 %rd361, %rd360, %rd4;
add.s64 %rd362, %rd1, %rd361;
st.global.u8 [%rd362], %r9147;
add.s32 %r9150, %r9150, 1;
setp.eq.s32 %p605, %r9147, 255;
selp.b32 %r9149, 7, 8, %p605;
mov.u32 %r9147, 0;
mov.u32 %r9148, %r9147;
$L__BB1_527:
setp.ne.s32 %p606, %r9166, 0;
mov.u32 %r9176, %r9161;
@%p606 bra $L__BB1_524;
$L__BB1_528:
add.s32 %r1312, %r4095, %r9014;
ld.shared.u8 %rs705, [%r1312];
mov.u32 %r9016, 0;
cvt.u32.u16 %r5302, %rs705;
and.b32 %r5303, %r5302, 255;
and.b32 %r5304, %r9037, 255;
setp.lt.u32 %p607, %r5304, %r5303;
cvt.u16.u32 %rs706, %r9037;
selp.b16 %rs707, %rs705, %rs706, %p607;
st.shared.u8 [%r1312], %rs707;
ld.shared.u8 %rs177, [%r1312+2];
ld.shared.u8 %rs708, [%r1312+1];
setp.gt.u16 %p608, %rs708, %rs177;
add.s32 %r9346, %r9014, 1;
add.s32 %r5305, %r9014, 2;
selp.b32 %r5306, %r9346, %r5305, %p608;
add.s32 %r5307, %r4095, %r5306;
ld.shared.u8 %rs178, [%r5307];
cvt.u32.u16 %r5308, %rs178;
and.b32 %r5309, %r5308, 255;
add.s32 %r9013, %r5309, -1;
cvt.u16.u32 %rs179, %r9051;
cvt.u16.u32 %rs709, %r1240;
shr.u16 %rs710, %rs709, 1;
mov.u32 %r5310, _ZZ32 j2k_htj2k_encode_codeblocksE14cleanup_cx_val;
add.s32 %r1315, %r5310, %r9012;
st.shared.u8 [%r1312+1], %r9051;
ld.shared.u8 %rs711, [%r1315];
or.b16 %rs712, %rs711, %rs710;
st.shared.u8 [%r1315], %rs712;
add.s32 %r9012, %r9012, 1;
ld.shared.u8 %rs180, [%r1315+1];
ld.shared.u8 %r1317, [%r1315+2];
shr.u32 %r1318, %r1288, 3;
st.shared.u8 [%r1315+1], %r1318;
add.s32 %r5311, %r9011, 2;
setp.ge.u32 %p609, %r5311, %r5;
mov.u32 %r9350, %r9016;
@%p609 bra $L__BB1_635;
cvt.u64.u32 %rd363, %r9345;
add.s64 %rd364, %rd363, %rd5;
shl.b64 %rd365, %rd364, 2;
add.s64 %rd366, %rd3, %rd365;
ld.global.u32 %r1319, [%rd366];
setp.eq.s32 %p610, %r1319, 0;
mov.u32 %r9182, 0;
mov.u32 %r9181, %r9182;
@%p610 bra $L__BB1_531;
and.b32 %r5313, %r1319, -2147483648;
abs.s32 %r5314, %r1319;
shl.b32 %r5315, %r5314, %r1006;
or.b32 %r9181, %r5315, %r5313;
$L__BB1_531:
shl.b32 %r5319, %r9181, 1;
shr.u32 %r5320, %r5319, %r43;
and.b32 %r1322, %r5320, -2;
setp.eq.s32 %p611, %r1322, 0;
mov.u32 %r9183, %r9182;
mov.u32 %r9189, %r9182;
@%p611 bra $L__BB1_533;
add.s32 %r5322, %r1322, -1;
clz.b32 %r5323, %r5322;
mov.u32 %r5324, 32;
sub.s32 %r9182, %r5324, %r5323;
shr.u32 %r5325, %r9181, 31;
add.s32 %r5326, %r5325, %r1322;
add.s32 %r9183, %r5326, -2;
mov.u32 %r9189, 1;
$L__BB1_533:
mov.u32 %r9186, 0;
mov.u32 %r9185, %r9186;
@%p493 bra $L__BB1_536;
add.s32 %r5329, %r9345, %r1;
cvt.u64.u32 %rd367, %r5329;
add.s64 %rd368, %rd367, %rd5;
shl.b64 %rd369, %rd368, 2;
add.s64 %rd370, %rd3, %rd369;
ld.global.u32 %r1328, [%rd370];
setp.eq.s32 %p613, %r1328, 0;
@%p613 bra $L__BB1_536;
and.b32 %r5330, %r1328, -2147483648;
abs.s32 %r5331, %r1328;
shl.b32 %r5332, %r5331, %r1006;
or.b32 %r9185, %r5332, %r5330;
$L__BB1_536:
shl.b32 %r5335, %r9185, 1;
shr.u32 %r5336, %r5335, %r43;
and.b32 %r1331, %r5336, -2;
setp.eq.s32 %p614, %r1331, 0;
mov.u32 %r9187, %r9186;
mov.u32 %r9204, %r9182;
@%p614 bra $L__BB1_538;
or.b32 %r9189, %r9189, 2;
add.s32 %r5337, %r1331, -1;
clz.b32 %r5338, %r5337;
mov.u32 %r5339, 32;
sub.s32 %r9186, %r5339, %r5338;
max.s32 %r9204, %r9182, %r9186;
shr.u32 %r5340, %r9185, 31;
add.s32 %r5341, %r5340, %r1331;
add.s32 %r9187, %r5341, -2;
$L__BB1_538:
add.s32 %r9206, %r9345, 1;
add.s32 %r5346, %r9011, 3;
setp.ge.u32 %p615, %r5346, %r5;
mov.u32 %r9207, 0;
mov.u32 %r9200, %r9207;
mov.u32 %r9201, %r9207;
mov.u32 %r9202, %r9207;
mov.u32 %r9203, %r9207;
@%p615 bra $L__BB1_549;
cvt.u64.u32 %rd371, %r9206;
add.s64 %rd372, %rd371, %rd5;
shl.b64 %rd373, %rd372, 2;
add.s64 %rd374, %rd3, %rd373;
ld.global.u32 %r1341, [%rd374];
setp.eq.s32 %p616, %r1341, 0;
mov.u32 %r9201, 0;
mov.u32 %r9190, %r9201;
@%p616 bra $L__BB1_541;
and.b32 %r5348, %r1341, -2147483648;
abs.s32 %r5349, %r1341;
shl.b32 %r5350, %r5349, %r1006;
or.b32 %r9190, %r5350, %r5348;
$L__BB1_541:
shl.b32 %r5353, %r9190, 1;
shr.u32 %r5354, %r5353, %r43;
and.b32 %r1344, %r5354, -2;
setp.eq.s32 %p617, %r1344, 0;
mov.u32 %r9203, %r9201;
@%p617 bra $L__BB1_543;
or.b32 %r9189, %r9189, 4;
add.s32 %r5355, %r1344, -1;
clz.b32 %r5356, %r5355;
mov.u32 %r5357, 32;
sub.s32 %r9201, %r5357, %r5356;
max.s32 %r9204, %r9204, %r9201;
shr.u32 %r5358, %r9190, 31;
add.s32 %r5359, %r5358, %r1344;
add.s32 %r9203, %r5359, -2;
$L__BB1_543:
mov.u32 %r9200, 0;
mov.u32 %r9195, %r9200;
@%p493 bra $L__BB1_546;
add.s32 %r5362, %r9206, %r1;
cvt.u64.u32 %rd375, %r5362;
add.s64 %rd376, %rd375, %rd5;
shl.b64 %rd377, %rd376, 2;
add.s64 %rd378, %rd3, %rd377;
ld.global.u32 %r1353, [%rd378];
setp.eq.s32 %p619, %r1353, 0;
@%p619 bra $L__BB1_546;
and.b32 %r5363, %r1353, -2147483648;
abs.s32 %r5364, %r1353;
shl.b32 %r5365, %r5364, %r1006;
or.b32 %r9195, %r5365, %r5363;
$L__BB1_546:
shl.b32 %r5368, %r9195, 1;
shr.u32 %r5369, %r5368, %r43;
and.b32 %r1356, %r5369, -2;
setp.eq.s32 %p620, %r1356, 0;
mov.u32 %r9202, %r9200;
@%p620 bra $L__BB1_548;
or.b32 %r9189, %r9189, 8;
add.s32 %r5370, %r1356, -1;
clz.b32 %r5371, %r5370;
mov.u32 %r5372, 32;
sub.s32 %r9200, %r5372, %r5371;
max.s32 %r9204, %r9204, %r9200;
shr.u32 %r5373, %r9195, 31;
add.s32 %r5374, %r5373, %r1356;
add.s32 %r9202, %r5374, -2;
$L__BB1_548:
add.s32 %r9206, %r9345, 2;
$L__BB1_549:
mov.u32 %r9345, %r9206;
shr.u32 %r5376, %r1288, 2;
shr.u32 %r5377, %r1264, 1;
or.b32 %r5378, %r5376, %r5377;
cvt.u32.u16 %r5379, %rs180;
and.b32 %r5380, %r5379, 255;
shl.b32 %r5381, %r1317, 2;
add.s32 %r5382, %r5381, %r5380;
or.b32 %r1373, %r5378, %r5382;
add.s32 %r5383, %r9189, -1;
and.b32 %r5384, %r5383, %r9189;
setp.ne.s32 %p621, %r5384, 0;
setp.gt.u16 %p622, %rs178, 2;
and.pred %p623, %p622, %p621;
selp.b32 %r5385, %r9013, 1, %p623;
max.s32 %r1374, %r5385, %r9204;
sub.s32 %r9350, %r1374, %r5385;
setp.lt.s32 %p624, %r9350, 1;
@%p624 bra $L__BB1_551;
setp.eq.s32 %p625, %r9182, %r9204;
selp.u32 %r5386, 1, 0, %p625;
setp.eq.s32 %p626, %r9186, %r9204;
selp.u32 %r5387, -1, 0, %p626;
bfi.b32 %r5388, %r5387, %r5386, 1, 1;
setp.eq.s32 %p627, %r9201, %r9204;
selp.u16 %rs714, 1, 0, %p627;
mul.wide.u16 %r5389, %rs714, 4;
or.b32 %r5390, %r5388, %r5389;
setp.eq.s32 %p628, %r9200, %r9204;
selp.u16 %rs715, 1, 0, %p628;
mul.wide.u16 %r5391, %rs715, 8;
or.b32 %r9207, %r5390, %r5391;
$L__BB1_551:
shl.b32 %r5392, %r9189, 4;
shl.b32 %r5393, %r1373, 8;
or.b32 %r5394, %r5392, %r5393;
or.b32 %r5395, %r5394, %r9207;
mul.wide.u32 %rd380, %r5395, 2;
add.s64 %rd381, %rd18, %rd380;
ld.global.u16 %rs181, [%rd381];
shr.u16 %rs716, %rs181, 4;
and.b16 %rs182, %rs716, 7;
setp.eq.s16 %p629, %rs182, 0;
mov.u32 %r9219, %r9070;
@%p629 bra $L__BB1_558;
cvt.u32.u16 %r9208, %rs182;
shr.u16 %rs717, %rs181, 8;
cvt.u32.u16 %r9209, %rs717;
$L__BB1_553:
mov.u32 %r1380, %r9208;
setp.gt.u32 %p630, %r8962, 2879;
mov.u32 %r9219, 1;
@%p630 bra $L__BB1_558;
mov.u32 %r5397, 8;
sub.s32 %r5398, %r5397, %r8960;
sub.s32 %r5399, %r5398, %r8961;
min.u32 %r5400, %r5399, %r1380;
setp.eq.s32 %p631, %r5400, 32;
mov.u32 %r5401, -1;
shl.b32 %r5402, %r5401, %r5400;
not.b32 %r5403, %r5402;
selp.b32 %r5404, -1, %r5403, %p631;
and.b32 %r5405, %r5404, %r9209;
shl.b32 %r5406, %r5405, %r8961;
cvt.u16.u32 %rs718, %r5406;
or.b16 %rs1165, %rs1165, %rs718;
add.s32 %r8961, %r5400, %r8961;
sub.s32 %r9208, %r1380, %r5400;
shr.u32 %r9209, %r9209, %r5400;
setp.gt.u32 %p632, %r5399, %r1380;
@%p632 bra $L__BB1_557;
setp.ne.s32 %p633, %r8960, 0;
mov.u32 %r8960, 0;
and.b16 %rs719, %rs1165, 255;
setp.ne.s16 %p634, %rs719, 127;
and.pred %p635, %p633, %p634;
@%p635 bra $L__BB1_557;
mov.u32 %r5409, 20548;
sub.s32 %r5410, %r5409, %r8962;
cvt.u64.u32 %rd382, %r5410;
add.s64 %rd383, %rd382, %rd4;
add.s64 %rd384, %rd1, %rd383;
st.global.u8 [%rd384], %rs1165;
add.s32 %r8962, %r8962, 1;
setp.gt.u16 %p636, %rs719, 143;
selp.u32 %r8960, 1, 0, %p636;
mov.u32 %r8961, 0;
mov.u16 %rs1165, 0;
$L__BB1_557:
setp.ne.s32 %p637, %r9208, 0;
mov.u32 %r9219, %r9070;
@%p637 bra $L__BB1_553;
$L__BB1_558:
setp.ne.s32 %p638, %r1373, 0;
@%p638 bra $L__BB1_606;
setp.eq.s32 %p639, %r9189, 0;
add.s32 %r5411, %r8514, 17477;
cvt.u64.u32 %rd385, %r5411;
add.s64 %rd386, %rd385, %rd4;
add.s64 %rd20, %rd1, %rd386;
@%p639 bra $L__BB1_598;
shl.b16 %rs1096, %rs1096, 1;
add.s32 %r8520, %r8520, -1;
setp.ne.s32 %p640, %r8520, 0;
mov.u32 %r9253, %r8723;
@%p640 bra $L__BB1_563;
setp.gt.u32 %p641, %r8514, 191;
mov.u32 %r8520, 0;
mov.u32 %r9253, 1;
@%p641 bra $L__BB1_563;
st.global.u8 [%rd20], %rs1096;
add.s32 %r8514, %r8514, 1;
mov.u32 %r8520, 8;
mov.u16 %rs1096, 0;
mov.u32 %r9253, %r8723;
$L__BB1_563:
setp.lt.u32 %p642, %r8725, 3;
mov.u32 %r9223, 0;
@%p642 bra $L__BB1_566;
setp.lt.u32 %p643, %r8725, 6;
mov.u32 %r9223, 1;
@%p643 bra $L__BB1_566;
setp.lt.u32 %p644, %r8725, 9;
setp.eq.s32 %p645, %r8725, 11;
selp.b32 %r5417, 4, 5, %p645;
setp.lt.u32 %p646, %r8725, 11;
selp.b32 %r5418, 3, %r5417, %p646;
selp.b32 %r9223, 2, %r5418, %p644;
$L__BB1_566:
setp.eq.s32 %p647, %r9223, 0;
@%p647 bra $L__BB1_594;
add.s32 %r1404, %r9223, -1;
and.b32 %r1405, %r9223, 3;
setp.eq.s32 %p648, %r1405, 0;
mov.u32 %r9233, %r9223;
mov.u32 %r9236, %r9253;
@%p648 bra $L__BB1_579;
mov.u32 %r5420, 1;
shl.b32 %r5421, %r5420, %r1404;
and.b32 %r5422, %r5421, %r8726;
setp.ne.s32 %p649, %r5422, 0;
selp.u32 %r5423, 1, 0, %p649;
cvt.u32.u16 %r5424, %rs1096;
bfi.b32 %r5425, %r5424, %r5423, 1, 8;
cvt.u16.u32 %rs1096, %r5425;
add.s32 %r8520, %r8520, -1;
setp.ne.s32 %p650, %r8520, 0;
mov.u32 %r9236, %r9253;
@%p650 bra $L__BB1_571;
setp.gt.u32 %p651, %r8514, 191;
mov.u32 %r8520, 0;
mov.u32 %r9236, %r5420;
@%p651 bra $L__BB1_571;
add.s32 %r5429, %r8514, 17477;
cvt.u64.u32 %rd387, %r5429;
add.s64 %rd388, %rd387, %rd4;
add.s64 %rd389, %rd1, %rd388;
st.global.u8 [%rd389], %rs1096;
add.s32 %r8514, %r8514, 1;
mov.u32 %r8520, 8;
mov.u16 %rs1096, 0;
mov.u32 %r9236, %r9253;
$L__BB1_571:
setp.eq.s32 %p652, %r1405, 1;
mov.u32 %r9253, %r9236;
mov.u32 %r9233, %r1404;
@%p652 bra $L__BB1_579;
add.s32 %r9233, %r9223, -2;
mov.u32 %r5430, 1;
shl.b32 %r5431, %r5430, %r9233;
and.b32 %r5432, %r5431, %r8726;
setp.ne.s32 %p653, %r5432, 0;
selp.u32 %r5433, 1, 0, %p653;
cvt.u32.u16 %r5434, %rs1096;
bfi.b32 %r5435, %r5434, %r5433, 1, 8;
cvt.u16.u32 %rs1096, %r5435;
add.s32 %r8520, %r8520, -1;
setp.ne.s32 %p654, %r8520, 0;
mov.u32 %r9227, %r9236;
@%p654 bra $L__BB1_575;
setp.gt.u32 %p655, %r8514, 191;
mov.u32 %r8520, 0;
mov.u32 %r9227, %r5430;
@%p655 bra $L__BB1_575;
add.s32 %r5438, %r8514, 17477;
cvt.u64.u32 %rd390, %r5438;
add.s64 %rd391, %rd390, %rd4;
add.s64 %rd392, %rd1, %rd391;
and.b16 %rs726, %rs1096, 255;
st.global.u8 [%rd392], %rs1096;
add.s32 %r8514, %r8514, 1;
setp.eq.s16 %p656, %rs726, 255;
selp.b32 %r8520, 7, 8, %p656;
mov.u16 %rs1096, 0;
mov.u32 %r9227, %r9236;
$L__BB1_575:
setp.eq.s32 %p657, %r1405, 2;
mov.u32 %r9253, %r9227;
mov.u32 %r9236, %r9227;
@%p657 bra $L__BB1_579;
add.s32 %r9233, %r9223, -3;
mov.u32 %r5439, 1;
shl.b32 %r5440, %r5439, %r9233;
and.b32 %r5441, %r5440, %r8726;
setp.ne.s32 %p658, %r5441, 0;
selp.u32 %r5442, 1, 0, %p658;
cvt.u32.u16 %r5443, %rs1096;
bfi.b32 %r5444, %r5443, %r5442, 1, 8;
cvt.u16.u32 %rs1096, %r5444;
add.s32 %r8520, %r8520, -1;
setp.ne.s32 %p659, %r8520, 0;
mov.u32 %r9253, %r9227;
mov.u32 %r9236, %r9227;
@%p659 bra $L__BB1_579;
setp.gt.u32 %p660, %r8514, 191;
mov.u32 %r8520, 0;
mov.u32 %r9253, %r5439;
mov.u32 %r9236, %r5439;
@%p660 bra $L__BB1_579;
add.s32 %r5449, %r8514, 17477;
cvt.u64.u32 %rd393, %r5449;
add.s64 %rd394, %rd393, %rd4;
add.s64 %rd395, %rd1, %rd394;
and.b16 %rs729, %rs1096, 255;
st.global.u8 [%rd395], %rs1096;
add.s32 %r8514, %r8514, 1;
setp.eq.s16 %p661, %rs729, 255;
selp.b32 %r8520, 7, 8, %p661;
mov.u16 %rs1096, 0;
mov.u32 %r9253, %r9227;
mov.u32 %r9236, %r9227;
$L__BB1_579:
setp.lt.u32 %p662, %r1404, 3;
@%p662 bra $L__BB1_594;
mov.u32 %r9253, %r9236;
$L__BB1_581:
add.s32 %r5450, %r9233, -1;
mov.u32 %r5451, 1;
shl.b32 %r5452, %r5451, %r5450;
and.b32 %r5453, %r5452, %r8726;
setp.ne.s32 %p663, %r5453, 0;
selp.u32 %r5454, 1, 0, %p663;
cvt.u32.u16 %r5455, %rs1096;
bfi.b32 %r9242, %r5455, %r5454, 1, 8;
add.s32 %r9243, %r8520, -1;
setp.ne.s32 %p664, %r9243, 0;
mov.u32 %r9241, %r9253;
@%p664 bra $L__BB1_584;
setp.gt.u32 %p665, %r8514, 191;
mov.u32 %r9243, 0;
mov.u32 %r9241, %r5451;
@%p665 bra $L__BB1_584;
cvt.u16.u32 %rs730, %r9242;
and.b16 %rs731, %rs730, 255;
add.s32 %r5459, %r8514, 17477;
cvt.u64.u32 %rd396, %r5459;
add.s64 %rd397, %rd396, %rd4;
add.s64 %rd398, %rd1, %rd397;
st.global.u8 [%rd398], %rs730;
add.s32 %r8514, %r8514, 1;
setp.eq.s16 %p666, %rs731, 255;
selp.b32 %r9243, 7, 8, %p666;
mov.u32 %r9242, 0;
mov.u32 %r9241, %r9253;
$L__BB1_584:
add.s32 %r5460, %r9233, -2;
shl.b32 %r5462, %r5451, %r5460;
and.b32 %r5463, %r5462, %r8726;
setp.ne.s32 %p667, %r5463, 0;
and.b32 %r5464, %r9242, 127;
selp.u32 %r5465, 1, 0, %p667;
bfi.b32 %r9246, %r5464, %r5465, 1, 7;
add.s32 %r9247, %r9243, -1;
setp.ne.s32 %p668, %r9247, 0;
mov.u32 %r9245, %r9241;
@%p668 bra $L__BB1_587;
setp.gt.u32 %p669, %r8514, 191;
mov.u32 %r9247, 0;
mov.u32 %r9245, 1;
@%p669 bra $L__BB1_587;
cvt.u16.u32 %rs732, %r9246;
and.b16 %rs733, %rs732, 255;
add.s32 %r5469, %r8514, 17477;
cvt.u64.u32 %rd399, %r5469;
add.s64 %rd400, %rd399, %rd4;
add.s64 %rd401, %rd1, %rd400;
st.global.u8 [%rd401], %rs732;
add.s32 %r8514, %r8514, 1;
setp.eq.s16 %p670, %rs733, 255;
selp.b32 %r9247, 7, 8, %p670;
mov.u32 %r9246, 0;
mov.u32 %r9245, %r9241;
$L__BB1_587:
add.s32 %r5470, %r9233, -3;
mov.u32 %r5471, 1;
shl.b32 %r5472, %r5471, %r5470;
and.b32 %r5473, %r5472, %r8726;
setp.ne.s32 %p671, %r5473, 0;
and.b32 %r5474, %r9246, 127;
selp.u32 %r5475, 1, 0, %p671;
bfi.b32 %r9250, %r5474, %r5475, 1, 7;
add.s32 %r9251, %r9247, -1;
setp.ne.s32 %p672, %r9251, 0;
mov.u32 %r9249, %r9245;
@%p672 bra $L__BB1_590;
setp.gt.u32 %p673, %r8514, 191;
mov.u32 %r9251, 0;
mov.u32 %r9249, %r5471;
@%p673 bra $L__BB1_590;
cvt.u16.u32 %rs734, %r9250;
and.b16 %rs735, %rs734, 255;
add.s32 %r5479, %r8514, 17477;
cvt.u64.u32 %rd402, %r5479;
add.s64 %rd403, %rd402, %rd4;
add.s64 %rd404, %rd1, %rd403;
st.global.u8 [%rd404], %rs734;
add.s32 %r8514, %r8514, 1;
setp.eq.s16 %p674, %rs735, 255;
selp.b32 %r9251, 7, 8, %p674;
mov.u32 %r9250, 0;
mov.u32 %r9249, %r9245;
$L__BB1_590:
add.s32 %r9233, %r9233, -4;
shl.b32 %r5481, %r5471, %r9233;
and.b32 %r5482, %r5481, %r8726;
setp.ne.s32 %p675, %r5482, 0;
and.b32 %r5483, %r9250, 127;
selp.u32 %r5484, 1, 0, %p675;
bfi.b32 %r5485, %r5483, %r5484, 1, 15;
cvt.u16.u32 %rs1096, %r5485;
add.s32 %r8520, %r9251, -1;
setp.ne.s32 %p676, %r8520, 0;
mov.u32 %r9253, %r9249;
@%p676 bra $L__BB1_593;
setp.gt.u32 %p677, %r8514, 191;
mov.u32 %r8520, 0;
mov.u32 %r9253, 1;
@%p677 bra $L__BB1_593;
add.s32 %r5488, %r8514, 17477;
cvt.u64.u32 %rd405, %r5488;
add.s64 %rd406, %rd405, %rd4;
add.s64 %rd407, %rd1, %rd406;
and.b16 %rs737, %rs1096, 255;
st.global.u8 [%rd407], %rs1096;
add.s32 %r8514, %r8514, 1;
setp.eq.s16 %p678, %rs737, 255;
selp.b32 %r8520, 7, 8, %p678;
mov.u16 %rs1096, 0;
mov.u32 %r9253, %r9249;
$L__BB1_593:
setp.ne.s32 %p679, %r9233, 0;
@%p679 bra $L__BB1_581;
$L__BB1_594:
add.s32 %r5490, %r8725, -1;
setp.eq.s32 %p680, %r8725, 0;
mov.u32 %r8726, 0;
selp.b32 %r8725, 0, %r5490, %p680;
setp.lt.u32 %p681, %r8725, 3;
mov.u32 %r9259, %r8726;
@%p681 bra $L__BB1_597;
setp.lt.u32 %p682, %r8725, 6;
mov.u32 %r9259, 1;
@%p682 bra $L__BB1_597;
setp.lt.u32 %p683, %r8725, 9;
setp.eq.s32 %p684, %r8725, 11;
selp.b32 %r5492, 4, 5, %p684;
setp.lt.u32 %p685, %r8725, 11;
selp.b32 %r5493, 3, %r5492, %p685;
selp.b32 %r9259, 2, %r5493, %p683;
$L__BB1_597:
mov.u32 %r5495, 1;
shl.b32 %r8724, %r5495, %r9259;
mov.u32 %r8723, %r9253;
bra.uni $L__BB1_606;
$L__BB1_598:
add.s32 %r8726, %r8726, 1;
setp.lt.u32 %p686, %r8726, %r8724;
@%p686 bra $L__BB1_606;
shl.b16 %rs738, %rs1096, 1;
or.b16 %rs1096, %rs738, 1;
add.s32 %r8520, %r8520, -1;
setp.ne.s32 %p687, %r8520, 0;
mov.u32 %r9260, %r8723;
@%p687 bra $L__BB1_602;
setp.gt.u32 %p688, %r8514, 191;
mov.u32 %r8520, 0;
mov.u32 %r9260, 1;
@%p688 bra $L__BB1_602;
and.b16 %rs740, %rs1096, 255;
st.global.u8 [%rd20], %rs1096;
add.s32 %r8514, %r8514, 1;
setp.eq.s16 %p689, %rs740, 255;
selp.b32 %r8520, 7, 8, %p689;
mov.u16 %rs1096, 0;
mov.u32 %r9260, %r8723;
$L__BB1_602:
add.s32 %r5499, %r8725, 1;
min.u32 %r8725, %r5499, 12;
setp.lt.u32 %p690, %r8725, 3;
mov.u32 %r8726, 0;
mov.u32 %r9263, %r8726;
@%p690 bra $L__BB1_605;
setp.lt.u32 %p691, %r8725, 6;
mov.u32 %r9263, 1;
@%p691 bra $L__BB1_605;
setp.lt.u32 %p692, %r8725, 9;
setp.eq.s32 %p693, %r8725, 11;
selp.b32 %r5501, 4, 5, %p693;
setp.lt.u32 %p694, %r8725, 11;
selp.b32 %r5502, 3, %r5501, %p694;
selp.b32 %r9263, 2, %r5502, %p692;
$L__BB1_605:
mov.u32 %r5504, 1;
shl.b32 %r8724, %r5504, %r9263;
mov.u32 %r8723, %r9260;
$L__BB1_606:
and.b16 %rs741, %rs181, 15;
cvt.u32.u16 %r1488, %rs741;
and.b32 %r5505, %r9189, 1;
setp.eq.b32 %p695, %r5505, 1;
mov.pred %p696, 0;
xor.pred %p697, %p695, %p696;
not.pred %p698, %p697;
mov.u32 %r9280, %r9176;
@%p698 bra $L__BB1_613;
and.b32 %r5506, %r1488, 1;
sub.s32 %r9270, %r1374, %r5506;
setp.eq.s32 %p699, %r9270, 0;
mov.u32 %r9280, %r9176;
@%p699 bra $L__BB1_613;
mov.u32 %r5507, -1;
shl.b32 %r5508, %r5507, %r9270;
not.b32 %r5509, %r5508;
and.b32 %r9271, %r9183, %r5509;
$L__BB1_609:
setp.gt.u32 %p700, %r9150, 17476;
mov.u32 %r9280, 1;
@%p700 bra $L__BB1_613;
sub.s32 %r5511, %r9149, %r9148;
min.u32 %r5512, %r5511, %r9270;
setp.eq.s32 %p701, %r5512, 32;
mov.u32 %r5513, -1;
shl.b32 %r5514, %r5513, %r5512;
not.b32 %r5515, %r5514;
selp.b32 %r5516, -1, %r5515, %p701;
and.b32 %r5517, %r5516, %r9271;
shl.b32 %r5518, %r5517, %r9148;
or.b32 %r9147, %r5518, %r9147;
add.s32 %r9148, %r5512, %r9148;
shr.u32 %r9271, %r9271, %r5512;
sub.s32 %r9270, %r9270, %r5512;
setp.lt.u32 %p702, %r9148, %r9149;
@%p702 bra $L__BB1_612;
cvt.u64.u32 %rd408, %r9150;
add.s64 %rd409, %rd408, %rd4;
add.s64 %rd410, %rd1, %rd409;
st.global.u8 [%rd410], %r9147;
add.s32 %r9150, %r9150, 1;
setp.eq.s32 %p703, %r9147, 255;
selp.b32 %r9149, 7, 8, %p703;
mov.u32 %r9147, 0;
mov.u32 %r9148, %r9147;
$L__BB1_612:
setp.ne.s32 %p704, %r9270, 0;
mov.u32 %r9280, %r9176;
@%p704 bra $L__BB1_609;
$L__BB1_613:
and.b32 %r1512, %r9189, 2;
setp.eq.s32 %p705, %r1512, 0;
mov.u32 %r9295, %r9280;
@%p705 bra $L__BB1_620;
shr.u32 %r5521, %r1488, 1;
and.b32 %r5522, %r5521, 1;
sub.s32 %r9285, %r1374, %r5522;
setp.eq.s32 %p706, %r9285, 0;
mov.u32 %r9295, %r9280;
@%p706 bra $L__BB1_620;
mov.u32 %r5523, -1;
shl.b32 %r5524, %r5523, %r9285;
not.b32 %r5525, %r5524;
and.b32 %r9286, %r9187, %r5525;
$L__BB1_616:
setp.gt.u32 %p707, %r9150, 17476;
mov.u32 %r9295, 1;
@%p707 bra $L__BB1_620;
sub.s32 %r5527, %r9149, %r9148;
min.u32 %r5528, %r5527, %r9285;
setp.eq.s32 %p708, %r5528, 32;
mov.u32 %r5529, -1;
shl.b32 %r5530, %r5529, %r5528;
not.b32 %r5531, %r5530;
selp.b32 %r5532, -1, %r5531, %p708;
and.b32 %r5533, %r5532, %r9286;
shl.b32 %r5534, %r5533, %r9148;
or.b32 %r9147, %r5534, %r9147;
add.s32 %r9148, %r5528, %r9148;
shr.u32 %r9286, %r9286, %r5528;
sub.s32 %r9285, %r9285, %r5528;
setp.lt.u32 %p709, %r9148, %r9149;
@%p709 bra $L__BB1_619;
cvt.u64.u32 %rd411, %r9150;
add.s64 %rd412, %rd411, %rd4;
add.s64 %rd413, %rd1, %rd412;
st.global.u8 [%rd413], %r9147;
add.s32 %r9150, %r9150, 1;
setp.eq.s32 %p710, %r9147, 255;
selp.b32 %r9149, 7, 8, %p710;
mov.u32 %r9147, 0;
mov.u32 %r9148, %r9147;
$L__BB1_619:
setp.ne.s32 %p711, %r9285, 0;
mov.u32 %r9295, %r9280;
@%p711 bra $L__BB1_616;
$L__BB1_620:
and.b32 %r1536, %r9189, 4;
setp.eq.s32 %p712, %r1536, 0;
mov.u32 %r9310, %r9295;
@%p712 bra $L__BB1_627;
shr.u32 %r5537, %r1488, 2;
and.b32 %r5538, %r5537, 1;
sub.s32 %r9300, %r1374, %r5538;
setp.eq.s32 %p713, %r9300, 0;
mov.u32 %r9310, %r9295;
@%p713 bra $L__BB1_627;
mov.u32 %r5539, -1;
shl.b32 %r5540, %r5539, %r9300;
not.b32 %r5541, %r5540;
and.b32 %r9301, %r9203, %r5541;
$L__BB1_623:
setp.gt.u32 %p714, %r9150, 17476;
mov.u32 %r9310, 1;
@%p714 bra $L__BB1_627;
sub.s32 %r5543, %r9149, %r9148;
min.u32 %r5544, %r5543, %r9300;
setp.eq.s32 %p715, %r5544, 32;
mov.u32 %r5545, -1;
shl.b32 %r5546, %r5545, %r5544;
not.b32 %r5547, %r5546;
selp.b32 %r5548, -1, %r5547, %p715;
and.b32 %r5549, %r5548, %r9301;
shl.b32 %r5550, %r5549, %r9148;
or.b32 %r9147, %r5550, %r9147;
add.s32 %r9148, %r5544, %r9148;
shr.u32 %r9301, %r9301, %r5544;
sub.s32 %r9300, %r9300, %r5544;
setp.lt.u32 %p716, %r9148, %r9149;
@%p716 bra $L__BB1_626;
cvt.u64.u32 %rd414, %r9150;
add.s64 %rd415, %rd414, %rd4;
add.s64 %rd416, %rd1, %rd415;
st.global.u8 [%rd416], %r9147;
add.s32 %r9150, %r9150, 1;
setp.eq.s32 %p717, %r9147, 255;
selp.b32 %r9149, 7, 8, %p717;
mov.u32 %r9147, 0;
mov.u32 %r9148, %r9147;
$L__BB1_626:
setp.ne.s32 %p718, %r9300, 0;
mov.u32 %r9310, %r9295;
@%p718 bra $L__BB1_623;
$L__BB1_627:
and.b32 %r1560, %r9189, 8;
setp.eq.s32 %p719, %r1560, 0;
mov.u32 %r9176, %r9310;
@%p719 bra $L__BB1_634;
shr.u32 %r5553, %r1488, 3;
sub.s32 %r9315, %r1374, %r5553;
setp.eq.s32 %p720, %r9315, 0;
mov.u32 %r9176, %r9310;
@%p720 bra $L__BB1_634;
mov.u32 %r5554, -1;
shl.b32 %r5555, %r5554, %r9315;
not.b32 %r5556, %r5555;
and.b32 %r9316, %r9202, %r5556;
$L__BB1_630:
setp.gt.u32 %p721, %r9150, 17476;
mov.u32 %r9176, 1;
@%p721 bra $L__BB1_634;
sub.s32 %r5558, %r9149, %r9148;
min.u32 %r5559, %r5558, %r9315;
setp.eq.s32 %p722, %r5559, 32;
mov.u32 %r5560, -1;
shl.b32 %r5561, %r5560, %r5559;
not.b32 %r5562, %r5561;
selp.b32 %r5563, -1, %r5562, %p722;
and.b32 %r5564, %r5563, %r9316;
shl.b32 %r5565, %r5564, %r9148;
or.b32 %r9147, %r5565, %r9147;
add.s32 %r9148, %r5559, %r9148;
shr.u32 %r9316, %r9316, %r5559;
sub.s32 %r9315, %r9315, %r5559;
setp.lt.u32 %p723, %r9148, %r9149;
@%p723 bra $L__BB1_633;
cvt.u64.u32 %rd417, %r9150;
add.s64 %rd418, %rd417, %rd4;
add.s64 %rd419, %rd1, %rd418;
st.global.u8 [%rd419], %r9147;
add.s32 %r9150, %r9150, 1;
setp.eq.s32 %p724, %r9147, 255;
selp.b32 %r9149, 7, 8, %p724;
mov.u32 %r9147, 0;
mov.u32 %r9148, %r9147;
$L__BB1_633:
setp.ne.s32 %p725, %r9315, 0;
mov.u32 %r9176, %r9310;
@%p725 bra $L__BB1_630;
$L__BB1_634:
and.b32 %r5568, %r9186, 255;
and.b32 %r5569, %r9051, 255;
setp.lt.u32 %p726, %r5568, %r5569;
cvt.u16.u32 %rs742, %r9186;
selp.b16 %rs743, %rs179, %rs742, %p726;
st.shared.u8 [%r1312+1], %rs743;
ld.shared.u8 %rs744, [%r1312+3];
setp.gt.u16 %p727, %rs177, %rs744;
add.s32 %r9346, %r9346, 1;
add.s32 %r5570, %r9014, 3;
selp.b32 %r5571, %r9346, %r5570, %p727;
add.s32 %r5573, %r4095, %r5571;
ld.shared.u8 %r5574, [%r5573];
add.s32 %r9013, %r5574, -1;
shr.u32 %r5575, %r1512, 1;
or.b32 %r5576, %r1318, %r5575;
st.shared.u8 [%r1312+2], %r9200;
st.shared.u8 [%r1315+1], %r5576;
ld.shared.u8 %rs745, [%r1315+3];
mul.wide.u16 %r5577, %rs745, 4;
add.s32 %r5578, %r5577, %r1317;
shr.u32 %r5579, %r1560, 3;
st.shared.u8 [%r1315+2], %r5579;
shr.u32 %r5580, %r1560, 2;
shr.u32 %r5581, %r1536, 1;
or.b32 %r5582, %r5580, %r5581;
or.b32 %r9016, %r5582, %r5578;
add.s32 %r9012, %r9012, 1;
mov.u32 %r9070, %r9219;
$L__BB1_635:
mov.u32 %r9014, %r9346;
mov.u32 %r9015, %r9345;
max.s32 %r5583, %r9350, 0;
mul.lo.s32 %r5584, %r1103, 6;
setp.gt.s32 %p728, %r1103, 0;
selp.b32 %r5585, %r5584, 0, %p728;
cvt.u64.u32 %rd420, %r5585;
add.s64 %rd21, %rd17, %rd420;
ld.global.u8 %rs205, [%rd21+1];
add.s32 %r5586, %r5585, 2;
cvt.u64.u32 %rd421, %r5586;
add.s64 %rd422, %rd17, %rd421;
ld.global.u8 %rs206, [%rd422];
ld.global.u8 %rs207, [%rd422+1];
mul.lo.s32 %r5587, %r5583, 6;
cvt.u64.u32 %rd423, %r5587;
add.s64 %rd424, %rd17, %rd423;
ld.global.u8 %rs208, [%rd424];
ld.global.u8 %rs209, [%rd424+1];
add.s32 %r5588, %r5587, 2;
cvt.u64.u32 %rd425, %r5588;
add.s64 %rd426, %rd17, %rd425;
ld.global.u8 %rs210, [%rd426];
ld.global.u8 %rs211, [%rd426+1];
setp.eq.s16 %p729, %rs205, 0;
mov.u32 %r9362, %r9070;
@%p729 bra $L__BB1_642;
ld.global.u8 %r9352, [%rd21];
cvt.u32.u16 %r9351, %rs205;
$L__BB1_637:
mov.u32 %r1611, %r9351;
setp.gt.u32 %p730, %r8962, 2879;
mov.u32 %r9362, 1;
@%p730 bra $L__BB1_642;
mov.u32 %r5590, 8;
sub.s32 %r5591, %r5590, %r8960;
sub.s32 %r5592, %r5591, %r8961;
min.u32 %r5593, %r5592, %r1611;
setp.eq.s32 %p731, %r5593, 32;
mov.u32 %r5594, -1;
shl.b32 %r5595, %r5594, %r5593;
not.b32 %r5596, %r5595;
selp.b32 %r5597, -1, %r5596, %p731;
and.b32 %r5598, %r5597, %r9352;
shl.b32 %r5599, %r5598, %r8961;
cvt.u16.u32 %rs746, %r5599;
or.b16 %rs1165, %rs1165, %rs746;
add.s32 %r8961, %r5593, %r8961;
sub.s32 %r9351, %r1611, %r5593;
shr.u32 %r9352, %r9352, %r5593;
setp.gt.u32 %p732, %r5592, %r1611;
@%p732 bra $L__BB1_641;
setp.ne.s32 %p733, %r8960, 0;
mov.u32 %r8960, 0;
and.b16 %rs747, %rs1165, 255;
setp.ne.s16 %p734, %rs747, 127;
and.pred %p735, %p733, %p734;
@%p735 bra $L__BB1_641;
mov.u32 %r5602, 20548;
sub.s32 %r5603, %r5602, %r8962;
cvt.u64.u32 %rd427, %r5603;
add.s64 %rd428, %rd427, %rd4;
add.s64 %rd429, %rd1, %rd428;
st.global.u8 [%rd429], %rs1165;
add.s32 %r8962, %r8962, 1;
setp.gt.u16 %p736, %rs747, 143;
selp.u32 %r8960, 1, 0, %p736;
mov.u32 %r8961, 0;
mov.u16 %rs1165, 0;
$L__BB1_641:
setp.ne.s32 %p737, %r9351, 0;
mov.u32 %r9362, %r9070;
@%p737 bra $L__BB1_637;
$L__BB1_642:
setp.eq.s16 %p738, %rs209, 0;
mov.u32 %r9374, %r9362;
@%p738 bra $L__BB1_649;
cvt.u32.u16 %r5604, %rs208;
and.b32 %r9364, %r5604, 255;
cvt.u32.u16 %r5605, %rs209;
and.b32 %r9363, %r5605, 255;
$L__BB1_644:
mov.u32 %r1630, %r9363;
setp.gt.u32 %p739, %r8962, 2879;
mov.u32 %r9374, 1;
@%p739 bra $L__BB1_649;
mov.u32 %r5607, 8;
sub.s32 %r5608, %r5607, %r8960;
sub.s32 %r5609, %r5608, %r8961;
min.u32 %r5610, %r5609, %r1630;
setp.eq.s32 %p740, %r5610, 32;
mov.u32 %r5611, -1;
shl.b32 %r5612, %r5611, %r5610;
not.b32 %r5613, %r5612;
selp.b32 %r5614, -1, %r5613, %p740;
and.b32 %r5615, %r5614, %r9364;
shl.b32 %r5616, %r5615, %r8961;
cvt.u16.u32 %rs751, %r5616;
or.b16 %rs1165, %rs1165, %rs751;
add.s32 %r8961, %r5610, %r8961;
sub.s32 %r9363, %r1630, %r5610;
shr.u32 %r9364, %r9364, %r5610;
setp.gt.u32 %p741, %r5609, %r1630;
@%p741 bra $L__BB1_648;
setp.ne.s32 %p742, %r8960, 0;
mov.u32 %r8960, 0;
and.b16 %rs752, %rs1165, 255;
setp.ne.s16 %p743, %rs752, 127;
and.pred %p744, %p742, %p743;
@%p744 bra $L__BB1_648;
mov.u32 %r5619, 20548;
sub.s32 %r5620, %r5619, %r8962;
cvt.u64.u32 %rd430, %r5620;
add.s64 %rd431, %rd430, %rd4;
add.s64 %rd432, %rd1, %rd431;
st.global.u8 [%rd432], %rs1165;
add.s32 %r8962, %r8962, 1;
setp.gt.u16 %p745, %rs752, 143;
selp.u32 %r8960, 1, 0, %p745;
mov.u32 %r8961, 0;
mov.u16 %rs1165, 0;
$L__BB1_648:
setp.ne.s32 %p746, %r9363, 0;
mov.u32 %r9374, %r9362;
@%p746 bra $L__BB1_644;
$L__BB1_649:
setp.eq.s16 %p747, %rs207, 0;
mov.u32 %r9386, %r9374;
@%p747 bra $L__BB1_656;
cvt.u32.u16 %r5621, %rs207;
and.b32 %r9375, %r5621, 255;
cvt.u32.u16 %r5622, %rs206;
and.b32 %r9376, %r5622, 255;
$L__BB1_651:
mov.u32 %r1649, %r9375;
setp.gt.u32 %p748, %r8962, 2879;
mov.u32 %r9386, 1;
@%p748 bra $L__BB1_656;
mov.u32 %r5624, 8;
sub.s32 %r5625, %r5624, %r8960;
sub.s32 %r5626, %r5625, %r8961;
min.u32 %r5627, %r5626, %r1649;
setp.eq.s32 %p749, %r5627, 32;
mov.u32 %r5628, -1;
shl.b32 %r5629, %r5628, %r5627;
not.b32 %r5630, %r5629;
selp.b32 %r5631, -1, %r5630, %p749;
and.b32 %r5632, %r5631, %r9376;
shl.b32 %r5633, %r5632, %r8961;
cvt.u16.u32 %rs756, %r5633;
or.b16 %rs1165, %rs1165, %rs756;
add.s32 %r8961, %r5627, %r8961;
sub.s32 %r9375, %r1649, %r5627;
shr.u32 %r9376, %r9376, %r5627;
setp.gt.u32 %p750, %r5626, %r1649;
@%p750 bra $L__BB1_655;
setp.ne.s32 %p751, %r8960, 0;
mov.u32 %r8960, 0;
and.b16 %rs757, %rs1165, 255;
setp.ne.s16 %p752, %rs757, 127;
and.pred %p753, %p751, %p752;
@%p753 bra $L__BB1_655;
mov.u32 %r5636, 20548;
sub.s32 %r5637, %r5636, %r8962;
cvt.u64.u32 %rd433, %r5637;
add.s64 %rd434, %rd433, %rd4;
add.s64 %rd435, %rd1, %rd434;
st.global.u8 [%rd435], %rs1165;
add.s32 %r8962, %r8962, 1;
setp.gt.u16 %p754, %rs757, 143;
selp.u32 %r8960, 1, 0, %p754;
mov.u32 %r8961, 0;
mov.u16 %rs1165, 0;
$L__BB1_655:
setp.ne.s32 %p755, %r9375, 0;
mov.u32 %r9386, %r9374;
@%p755 bra $L__BB1_651;
$L__BB1_656:
setp.eq.s16 %p756, %rs211, 0;
mov.u32 %r8959, %r9386;
@%p756 bra $L__BB1_663;
cvt.u32.u16 %r5638, %rs210;
and.b32 %r9388, %r5638, 255;
cvt.u32.u16 %r5639, %rs211;
and.b32 %r9387, %r5639, 255;
$L__BB1_658:
mov.u32 %r1668, %r9387;
setp.gt.u32 %p757, %r8962, 2879;
mov.u32 %r8959, 1;
@%p757 bra $L__BB1_663;
mov.u32 %r5641, 8;
sub.s32 %r5642, %r5641, %r8960;
sub.s32 %r5643, %r5642, %r8961;
min.u32 %r5644, %r5643, %r1668;
setp.eq.s32 %p758, %r5644, 32;
mov.u32 %r5645, -1;
shl.b32 %r5646, %r5645, %r5644;
not.b32 %r5647, %r5646;
selp.b32 %r5648, -1, %r5647, %p758;
and.b32 %r5649, %r5648, %r9388;
shl.b32 %r5650, %r5649, %r8961;
cvt.u16.u32 %rs761, %r5650;
or.b16 %rs1165, %rs1165, %rs761;
add.s32 %r8961, %r5644, %r8961;
sub.s32 %r9387, %r1668, %r5644;
shr.u32 %r9388, %r9388, %r5644;
setp.gt.u32 %p759, %r5643, %r1668;
@%p759 bra $L__BB1_662;
setp.ne.s32 %p760, %r8960, 0;
mov.u32 %r8960, 0;
and.b16 %rs762, %rs1165, 255;
setp.ne.s16 %p761, %rs762, 127;
and.pred %p762, %p760, %p761;
@%p762 bra $L__BB1_662;
mov.u32 %r5653, 20548;
sub.s32 %r5654, %r5653, %r8962;
cvt.u64.u32 %rd436, %r5654;
add.s64 %rd437, %rd436, %rd4;
add.s64 %rd438, %rd1, %rd437;
st.global.u8 [%rd438], %rs1165;
add.s32 %r8962, %r8962, 1;
setp.gt.u16 %p763, %rs762, 143;
selp.u32 %r8960, 1, 0, %p763;
mov.u32 %r8961, 0;
mov.u16 %rs1165, 0;
$L__BB1_662:
setp.ne.s32 %p764, %r9387, 0;
mov.u32 %r8959, %r9386;
@%p764 bra $L__BB1_658;
$L__BB1_663:
add.s32 %r9011, %r9011, 4;
setp.lt.u32 %p765, %r9011, %r5;
@%p765 bra $L__BB1_423;
$L__BB1_664:
add.s32 %r8995, %r8995, 2;
setp.lt.u32 %p766, %r8995, %r6;
@%p766 bra $L__BB1_421;
$L__BB1_665:
setp.eq.s32 %p767, %r8726, 0;
mov.u32 %r9426, %r8723;
@%p767 bra $L__BB1_669;
shl.b16 %rs765, %rs1096, 1;
or.b16 %rs1096, %rs765, 1;
add.s32 %r8520, %r8520, -1;
setp.ne.s32 %p768, %r8520, 0;
mov.u32 %r9426, %r8723;
@%p768 bra $L__BB1_669;
setp.gt.u32 %p769, %r8514, 191;
mov.u32 %r8520, 0;
mov.u32 %r9426, 1;
@%p769 bra $L__BB1_669;
add.s32 %r5657, %r8514, 17477;
cvt.u64.u32 %rd439, %r5657;
add.s64 %rd440, %rd439, %rd4;
add.s64 %rd441, %rd1, %rd440;
and.b16 %rs767, %rs1096, 255;
st.global.u8 [%rd441], %rs1096;
add.s32 %r8514, %r8514, 1;
setp.eq.s16 %p770, %rs767, 255;
selp.b32 %r8520, 7, 8, %p770;
mov.u16 %rs1096, 0;
mov.u32 %r9426, %r8723;
$L__BB1_669:
cvt.u32.u16 %r5658, %rs1096;
and.b32 %r5659, %r5658, 255;
shl.b32 %r5660, %r5659, %r8520;
cvt.u16.u32 %rs234, %r5660;
mov.u32 %r5661, -1;
shl.b32 %r5662, %r5661, %r8961;
not.b32 %r5663, %r5662;
mov.u32 %r5664, 255;
and.b32 %r5665, %r5663, 255;
setp.eq.s32 %p771, %r8961, 0;
selp.b32 %r1720, 0, %r5665, %p771;
shl.b32 %r1721, %r5664, %r8520;
and.b32 %r5666, %r1721, 255;
or.b32 %r5667, %r5666, %r1720;
setp.eq.s32 %p772, %r5667, 0;
mov.u32 %r9429, %r9426;
mov.u32 %r9431, %r8959;
@%p772 bra $L__BB1_675;
or.b16 %rs235, %rs1165, %rs234;
and.b16 %rs768, %rs235, 255;
xor.b16 %rs769, %rs235, %rs234;
cvt.u32.u16 %r5668, %rs769;
and.b32 %r5669, %r1721, %r5668;
and.b32 %r5670, %r5669, 255;
xor.b16 %rs770, %rs235, %rs1165;
cvt.u32.u16 %r5671, %rs770;
and.b32 %r5672, %r1720, %r5671;
or.b32 %r5673, %r5670, %r5672;
setp.eq.s32 %p773, %r5673, 0;
setp.ne.s16 %p774, %rs768, 255;
and.pred %p775, %p774, %p773;
setp.gt.u32 %p776, %r8962, 1;
and.pred %p777, %p776, %p775;
add.s32 %r5674, %r8514, 17477;
cvt.u64.u32 %rd442, %r5674;
add.s64 %rd443, %rd442, %rd4;
add.s64 %rd22, %rd1, %rd443;
@%p777 bra $L__BB1_673;
bra.uni $L__BB1_671;
$L__BB1_673:
setp.gt.u32 %p781, %r8514, 191;
mov.u32 %r9429, 1;
mov.u32 %r9431, %r8959;
@%p781 bra $L__BB1_675;
st.global.u8 [%rd22], %rs235;
add.s32 %r8514, %r8514, 1;
mov.u32 %r9429, %r9426;
mov.u32 %r9431, %r8959;
bra.uni $L__BB1_675;
$L__BB1_671:
setp.gt.u32 %p778, %r8514, 191;
setp.gt.u32 %p779, %r8962, 2879;
or.pred %p780, %p779, %p778;
mov.u32 %r9429, 1;
mov.u32 %r9431, %r9429;
@%p780 bra $L__BB1_675;
st.global.u8 [%rd22], %rs234;
add.s32 %r8514, %r8514, 1;
mov.u32 %r5677, 20548;
sub.s32 %r5678, %r5677, %r8962;
cvt.u64.u32 %rd444, %r5678;
add.s64 %rd445, %rd444, %rd4;
add.s64 %rd446, %rd1, %rd445;
st.global.u8 [%rd446], %rs1165;
add.s32 %r8962, %r8962, 1;
mov.u32 %r9429, %r9426;
mov.u32 %r9431, %r8959;
$L__BB1_675:
setp.eq.s32 %p782, %r9148, 0;
@%p782 bra $L__BB1_679;
sub.s32 %r5680, %r9149, %r9148;
mov.u32 %r5681, -1;
shl.b32 %r5682, %r5681, %r5680;
not.b32 %r5683, %r5682;
and.b32 %r5684, %r5683, 255;
shl.b32 %r5685, %r5684, %r9148;
or.b32 %r1729, %r5685, %r9147;
setp.eq.s32 %p783, %r1729, 255;
mov.u32 %r9433, %r9176;
@%p783 bra $L__BB1_681;
setp.gt.u32 %p784, %r9150, 17476;
mov.u32 %r9433, 1;
@%p784 bra $L__BB1_681;
cvt.u64.u32 %rd447, %r9150;
add.s64 %rd448, %rd447, %rd4;
add.s64 %rd449, %rd1, %rd448;
st.global.u8 [%rd449], %r1729;
add.s32 %r9150, %r9150, 1;
mov.u32 %r9433, %r9176;
bra.uni $L__BB1_681;
$L__BB1_679:
setp.ne.s32 %p785, %r9149, 7;
mov.u32 %r9433, %r9176;
@%p785 bra $L__BB1_681;
setp.eq.s32 %p786, %r9150, 0;
add.s32 %r5687, %r9150, -1;
selp.b32 %r9150, 0, %r5687, %p786;
mov.u32 %r9433, %r9176;
$L__BB1_681:
or.b32 %r5688, %r9431, %r9429;
or.b32 %r5689, %r5688, %r9433;
setp.eq.s32 %p787, %r5689, 0;
@%p787 bra $L__BB1_683;
mov.u32 %r5690, 1;
st.global.u32 [%rd6], %r5690;
mov.u32 %r5691, 3;
st.global.u32 [%rd6+4], %r5691;
mov.u32 %r5692, 0;
st.global.u32 [%rd6+8], %r5692;
st.global.u32 [%rd6+12], %r5692;
st.global.u32 [%rd6+16], %r5692;
st.global.u32 [%rd6+20], %r5692;
st.global.u32 [%rd6+24], %r5692;
st.global.u32 [%rd6+28], %r5692;
bra.uni $L__BB1_1905;
$L__BB1_683:
add.s32 %r1734, %r9150, %r8514;
add.s32 %r1735, %r1734, %r8962;
add.u64 %rd23, %SPL, 0;
mov.u32 %r9553, 1;
mov.u32 %r9551, 0;
mov.u32 %r9552, %r9551;
@%p38 bra $L__BB1_932;
setp.ne.s32 %p789, %r4, 3;
@%p789 bra $L__BB1_931;
add.s32 %r5697, %r5, 3;
shr.u32 %r5698, %r5697, 2;
add.s32 %r5699, %r5698, 8;
setp.gt.u32 %p791, %r5699, 513;
mov.pred %p790, -1;
mov.u32 %r9550, 0;
mov.pred %p2370, %p790;
@%p791 bra $L__BB1_928;
mov.u16 %rs1224, 0;
st.local.u16 [%rd23], %rs1224;
st.local.u16 [%rd23+2], %rs1224;
st.local.u16 [%rd23+4], %rs1224;
st.local.u16 [%rd23+6], %rs1224;
st.local.u16 [%rd23+8], %rs1224;
st.local.u16 [%rd23+10], %rs1224;
st.local.u16 [%rd23+12], %rs1224;
st.local.u16 [%rd23+14], %rs1224;
st.local.u16 [%rd23+16], %rs1224;
st.local.u16 [%rd23+18], %rs1224;
st.local.u16 [%rd23+20], %rs1224;
st.local.u16 [%rd23+22], %rs1224;
st.local.u16 [%rd23+24], %rs1224;
st.local.u16 [%rd23+26], %rs1224;
st.local.u16 [%rd23+28], %rs1224;
st.local.u16 [%rd23+30], %rs1224;
st.local.u16 [%rd23+32], %rs1224;
st.local.u16 [%rd23+34], %rs1224;
st.local.u16 [%rd23+36], %rs1224;
st.local.u16 [%rd23+38], %rs1224;
st.local.u16 [%rd23+40], %rs1224;
st.local.u16 [%rd23+42], %rs1224;
st.local.u16 [%rd23+44], %rs1224;
st.local.u16 [%rd23+46], %rs1224;
st.local.u16 [%rd23+48], %rs1224;
st.local.u16 [%rd23+50], %rs1224;
st.local.u16 [%rd23+52], %rs1224;
st.local.u16 [%rd23+54], %rs1224;
st.local.u16 [%rd23+56], %rs1224;
st.local.u16 [%rd23+58], %rs1224;
st.local.u16 [%rd23+60], %rs1224;
st.local.u16 [%rd23+62], %rs1224;
st.local.u16 [%rd23+64], %rs1224;
st.local.u16 [%rd23+66], %rs1224;
st.local.u16 [%rd23+68], %rs1224;
st.local.u16 [%rd23+70], %rs1224;
st.local.u16 [%rd23+72], %rs1224;
st.local.u16 [%rd23+74], %rs1224;
st.local.u16 [%rd23+76], %rs1224;
st.local.u16 [%rd23+78], %rs1224;
st.local.u16 [%rd23+80], %rs1224;
st.local.u16 [%rd23+82], %rs1224;
st.local.u16 [%rd23+84], %rs1224;
st.local.u16 [%rd23+86], %rs1224;
st.local.u16 [%rd23+88], %rs1224;
st.local.u16 [%rd23+90], %rs1224;
st.local.u16 [%rd23+92], %rs1224;
st.local.u16 [%rd23+94], %rs1224;
st.local.u16 [%rd23+96], %rs1224;
st.local.u16 [%rd23+98], %rs1224;
st.local.u16 [%rd23+100], %rs1224;
st.local.u16 [%rd23+102], %rs1224;
st.local.u16 [%rd23+104], %rs1224;
st.local.u16 [%rd23+106], %rs1224;
st.local.u16 [%rd23+108], %rs1224;
st.local.u16 [%rd23+110], %rs1224;
st.local.u16 [%rd23+112], %rs1224;
st.local.u16 [%rd23+114], %rs1224;
st.local.u16 [%rd23+116], %rs1224;
st.local.u16 [%rd23+118], %rs1224;
st.local.u16 [%rd23+120], %rs1224;
st.local.u16 [%rd23+122], %rs1224;
st.local.u16 [%rd23+124], %rs1224;
st.local.u16 [%rd23+126], %rs1224;
st.local.u16 [%rd23+128], %rs1224;
st.local.u16 [%rd23+130], %rs1224;
st.local.u16 [%rd23+132], %rs1224;
st.local.u16 [%rd23+134], %rs1224;
st.local.u16 [%rd23+136], %rs1224;
st.local.u16 [%rd23+138], %rs1224;
st.local.u16 [%rd23+140], %rs1224;
st.local.u16 [%rd23+142], %rs1224;
st.local.u16 [%rd23+144], %rs1224;
st.local.u16 [%rd23+146], %rs1224;
st.local.u16 [%rd23+148], %rs1224;
st.local.u16 [%rd23+150], %rs1224;
st.local.u16 [%rd23+152], %rs1224;
st.local.u16 [%rd23+154], %rs1224;
st.local.u16 [%rd23+156], %rs1224;
st.local.u16 [%rd23+158], %rs1224;
st.local.u16 [%rd23+160], %rs1224;
st.local.u16 [%rd23+162], %rs1224;
st.local.u16 [%rd23+164], %rs1224;
st.local.u16 [%rd23+166], %rs1224;
st.local.u16 [%rd23+168], %rs1224;
st.local.u16 [%rd23+170], %rs1224;
st.local.u16 [%rd23+172], %rs1224;
st.local.u16 [%rd23+174], %rs1224;
st.local.u16 [%rd23+176], %rs1224;
st.local.u16 [%rd23+178], %rs1224;
st.local.u16 [%rd23+180], %rs1224;
st.local.u16 [%rd23+182], %rs1224;
st.local.u16 [%rd23+184], %rs1224;
st.local.u16 [%rd23+186], %rs1224;
st.local.u16 [%rd23+188], %rs1224;
st.local.u16 [%rd23+190], %rs1224;
st.local.u16 [%rd23+192], %rs1224;
st.local.u16 [%rd23+194], %rs1224;
st.local.u16 [%rd23+196], %rs1224;
st.local.u16 [%rd23+198], %rs1224;
st.local.u16 [%rd23+200], %rs1224;
st.local.u16 [%rd23+202], %rs1224;
st.local.u16 [%rd23+204], %rs1224;
st.local.u16 [%rd23+206], %rs1224;
st.local.u16 [%rd23+208], %rs1224;
st.local.u16 [%rd23+210], %rs1224;
st.local.u16 [%rd23+212], %rs1224;
st.local.u16 [%rd23+214], %rs1224;
st.local.u16 [%rd23+216], %rs1224;
st.local.u16 [%rd23+218], %rs1224;
st.local.u16 [%rd23+220], %rs1224;
st.local.u16 [%rd23+222], %rs1224;
st.local.u16 [%rd23+224], %rs1224;
st.local.u16 [%rd23+226], %rs1224;
st.local.u16 [%rd23+228], %rs1224;
st.local.u16 [%rd23+230], %rs1224;
st.local.u16 [%rd23+232], %rs1224;
st.local.u16 [%rd23+234], %rs1224;
st.local.u16 [%rd23+236], %rs1224;
st.local.u16 [%rd23+238], %rs1224;
st.local.u16 [%rd23+240], %rs1224;
st.local.u16 [%rd23+242], %rs1224;
st.local.u16 [%rd23+244], %rs1224;
st.local.u16 [%rd23+246], %rs1224;
st.local.u16 [%rd23+248], %rs1224;
st.local.u16 [%rd23+250], %rs1224;
st.local.u16 [%rd23+252], %rs1224;
st.local.u16 [%rd23+254], %rs1224;
st.local.u16 [%rd23+256], %rs1224;
st.local.u16 [%rd23+258], %rs1224;
st.local.u16 [%rd23+260], %rs1224;
st.local.u16 [%rd23+262], %rs1224;
st.local.u16 [%rd23+264], %rs1224;
st.local.u16 [%rd23+266], %rs1224;
st.local.u16 [%rd23+268], %rs1224;
st.local.u16 [%rd23+270], %rs1224;
st.local.u16 [%rd23+272], %rs1224;
st.local.u16 [%rd23+274], %rs1224;
st.local.u16 [%rd23+276], %rs1224;
st.local.u16 [%rd23+278], %rs1224;
st.local.u16 [%rd23+280], %rs1224;
st.local.u16 [%rd23+282], %rs1224;
st.local.u16 [%rd23+284], %rs1224;
st.local.u16 [%rd23+286], %rs1224;
st.local.u16 [%rd23+288], %rs1224;
st.local.u16 [%rd23+290], %rs1224;
st.local.u16 [%rd23+292], %rs1224;
st.local.u16 [%rd23+294], %rs1224;
st.local.u16 [%rd23+296], %rs1224;
st.local.u16 [%rd23+298], %rs1224;
st.local.u16 [%rd23+300], %rs1224;
st.local.u16 [%rd23+302], %rs1224;
st.local.u16 [%rd23+304], %rs1224;
st.local.u16 [%rd23+306], %rs1224;
st.local.u16 [%rd23+308], %rs1224;
st.local.u16 [%rd23+310], %rs1224;
st.local.u16 [%rd23+312], %rs1224;
st.local.u16 [%rd23+314], %rs1224;
st.local.u16 [%rd23+316], %rs1224;
st.local.u16 [%rd23+318], %rs1224;
st.local.u16 [%rd23+320], %rs1224;
st.local.u16 [%rd23+322], %rs1224;
st.local.u16 [%rd23+324], %rs1224;
st.local.u16 [%rd23+326], %rs1224;
st.local.u16 [%rd23+328], %rs1224;
st.local.u16 [%rd23+330], %rs1224;
st.local.u16 [%rd23+332], %rs1224;
st.local.u16 [%rd23+334], %rs1224;
st.local.u16 [%rd23+336], %rs1224;
st.local.u16 [%rd23+338], %rs1224;
st.local.u16 [%rd23+340], %rs1224;
st.local.u16 [%rd23+342], %rs1224;
st.local.u16 [%rd23+344], %rs1224;
st.local.u16 [%rd23+346], %rs1224;
st.local.u16 [%rd23+348], %rs1224;
st.local.u16 [%rd23+350], %rs1224;
st.local.u16 [%rd23+352], %rs1224;
st.local.u16 [%rd23+354], %rs1224;
st.local.u16 [%rd23+356], %rs1224;
st.local.u16 [%rd23+358], %rs1224;
st.local.u16 [%rd23+360], %rs1224;
st.local.u16 [%rd23+362], %rs1224;
st.local.u16 [%rd23+364], %rs1224;
st.local.u16 [%rd23+366], %rs1224;
st.local.u16 [%rd23+368], %rs1224;
st.local.u16 [%rd23+370], %rs1224;
st.local.u16 [%rd23+372], %rs1224;
st.local.u16 [%rd23+374], %rs1224;
st.local.u16 [%rd23+376], %rs1224;
st.local.u16 [%rd23+378], %rs1224;
st.local.u16 [%rd23+380], %rs1224;
st.local.u16 [%rd23+382], %rs1224;
st.local.u16 [%rd23+384], %rs1224;
st.local.u16 [%rd23+386], %rs1224;
st.local.u16 [%rd23+388], %rs1224;
st.local.u16 [%rd23+390], %rs1224;
st.local.u16 [%rd23+392], %rs1224;
st.local.u16 [%rd23+394], %rs1224;
st.local.u16 [%rd23+396], %rs1224;
st.local.u16 [%rd23+398], %rs1224;
st.local.u16 [%rd23+400], %rs1224;
st.local.u16 [%rd23+402], %rs1224;
st.local.u16 [%rd23+404], %rs1224;
st.local.u16 [%rd23+406], %rs1224;
st.local.u16 [%rd23+408], %rs1224;
st.local.u16 [%rd23+410], %rs1224;
st.local.u16 [%rd23+412], %rs1224;
st.local.u16 [%rd23+414], %rs1224;
st.local.u16 [%rd23+416], %rs1224;
st.local.u16 [%rd23+418], %rs1224;
st.local.u16 [%rd23+420], %rs1224;
st.local.u16 [%rd23+422], %rs1224;
st.local.u16 [%rd23+424], %rs1224;
st.local.u16 [%rd23+426], %rs1224;
st.local.u16 [%rd23+428], %rs1224;
st.local.u16 [%rd23+430], %rs1224;
st.local.u16 [%rd23+432], %rs1224;
st.local.u16 [%rd23+434], %rs1224;
st.local.u16 [%rd23+436], %rs1224;
st.local.u16 [%rd23+438], %rs1224;
st.local.u16 [%rd23+440], %rs1224;
st.local.u16 [%rd23+442], %rs1224;
st.local.u16 [%rd23+444], %rs1224;
st.local.u16 [%rd23+446], %rs1224;
st.local.u16 [%rd23+448], %rs1224;
st.local.u16 [%rd23+450], %rs1224;
st.local.u16 [%rd23+452], %rs1224;
st.local.u16 [%rd23+454], %rs1224;
st.local.u16 [%rd23+456], %rs1224;
st.local.u16 [%rd23+458], %rs1224;
st.local.u16 [%rd23+460], %rs1224;
st.local.u16 [%rd23+462], %rs1224;
st.local.u16 [%rd23+464], %rs1224;
st.local.u16 [%rd23+466], %rs1224;
st.local.u16 [%rd23+468], %rs1224;
st.local.u16 [%rd23+470], %rs1224;
st.local.u16 [%rd23+472], %rs1224;
st.local.u16 [%rd23+474], %rs1224;
st.local.u16 [%rd23+476], %rs1224;
st.local.u16 [%rd23+478], %rs1224;
st.local.u16 [%rd23+480], %rs1224;
st.local.u16 [%rd23+482], %rs1224;
st.local.u16 [%rd23+484], %rs1224;
st.local.u16 [%rd23+486], %rs1224;
st.local.u16 [%rd23+488], %rs1224;
st.local.u16 [%rd23+490], %rs1224;
st.local.u16 [%rd23+492], %rs1224;
st.local.u16 [%rd23+494], %rs1224;
st.local.u16 [%rd23+496], %rs1224;
st.local.u16 [%rd23+498], %rs1224;
st.local.u16 [%rd23+500], %rs1224;
st.local.u16 [%rd23+502], %rs1224;
st.local.u16 [%rd23+504], %rs1224;
st.local.u16 [%rd23+506], %rs1224;
st.local.u16 [%rd23+508], %rs1224;
st.local.u16 [%rd23+510], %rs1224;
st.local.u16 [%rd23+512], %rs1224;
st.local.u16 [%rd23+514], %rs1224;
st.local.u16 [%rd23+516], %rs1224;
st.local.u16 [%rd23+518], %rs1224;
st.local.u16 [%rd23+520], %rs1224;
st.local.u16 [%rd23+522], %rs1224;
st.local.u16 [%rd23+524], %rs1224;
st.local.u16 [%rd23+526], %rs1224;
st.local.u16 [%rd23+528], %rs1224;
st.local.u16 [%rd23+530], %rs1224;
st.local.u16 [%rd23+532], %rs1224;
st.local.u16 [%rd23+534], %rs1224;
st.local.u16 [%rd23+536], %rs1224;
st.local.u16 [%rd23+538], %rs1224;
st.local.u16 [%rd23+540], %rs1224;
st.local.u16 [%rd23+542], %rs1224;
st.local.u16 [%rd23+544], %rs1224;
st.local.u16 [%rd23+546], %rs1224;
st.local.u16 [%rd23+548], %rs1224;
st.local.u16 [%rd23+550], %rs1224;
st.local.u16 [%rd23+552], %rs1224;
st.local.u16 [%rd23+554], %rs1224;
st.local.u16 [%rd23+556], %rs1224;
st.local.u16 [%rd23+558], %rs1224;
st.local.u16 [%rd23+560], %rs1224;
st.local.u16 [%rd23+562], %rs1224;
st.local.u16 [%rd23+564], %rs1224;
st.local.u16 [%rd23+566], %rs1224;
st.local.u16 [%rd23+568], %rs1224;
st.local.u16 [%rd23+570], %rs1224;
st.local.u16 [%rd23+572], %rs1224;
st.local.u16 [%rd23+574], %rs1224;
st.local.u16 [%rd23+576], %rs1224;
st.local.u16 [%rd23+578], %rs1224;
st.local.u16 [%rd23+580], %rs1224;
st.local.u16 [%rd23+582], %rs1224;
st.local.u16 [%rd23+584], %rs1224;
st.local.u16 [%rd23+586], %rs1224;
st.local.u16 [%rd23+588], %rs1224;
st.local.u16 [%rd23+590], %rs1224;
st.local.u16 [%rd23+592], %rs1224;
st.local.u16 [%rd23+594], %rs1224;
st.local.u16 [%rd23+596], %rs1224;
st.local.u16 [%rd23+598], %rs1224;
st.local.u16 [%rd23+600], %rs1224;
st.local.u16 [%rd23+602], %rs1224;
st.local.u16 [%rd23+604], %rs1224;
st.local.u16 [%rd23+606], %rs1224;
st.local.u16 [%rd23+608], %rs1224;
st.local.u16 [%rd23+610], %rs1224;
st.local.u16 [%rd23+612], %rs1224;
st.local.u16 [%rd23+614], %rs1224;
st.local.u16 [%rd23+616], %rs1224;
st.local.u16 [%rd23+618], %rs1224;
st.local.u16 [%rd23+620], %rs1224;
st.local.u16 [%rd23+622], %rs1224;
st.local.u16 [%rd23+624], %rs1224;
st.local.u16 [%rd23+626], %rs1224;
st.local.u16 [%rd23+628], %rs1224;
st.local.u16 [%rd23+630], %rs1224;
st.local.u16 [%rd23+632], %rs1224;
st.local.u16 [%rd23+634], %rs1224;
st.local.u16 [%rd23+636], %rs1224;
st.local.u16 [%rd23+638], %rs1224;
st.local.u16 [%rd23+640], %rs1224;
st.local.u16 [%rd23+642], %rs1224;
st.local.u16 [%rd23+644], %rs1224;
st.local.u16 [%rd23+646], %rs1224;
st.local.u16 [%rd23+648], %rs1224;
st.local.u16 [%rd23+650], %rs1224;
st.local.u16 [%rd23+652], %rs1224;
st.local.u16 [%rd23+654], %rs1224;
st.local.u16 [%rd23+656], %rs1224;
st.local.u16 [%rd23+658], %rs1224;
st.local.u16 [%rd23+660], %rs1224;
st.local.u16 [%rd23+662], %rs1224;
st.local.u16 [%rd23+664], %rs1224;
st.local.u16 [%rd23+666], %rs1224;
st.local.u16 [%rd23+668], %rs1224;
st.local.u16 [%rd23+670], %rs1224;
st.local.u16 [%rd23+672], %rs1224;
st.local.u16 [%rd23+674], %rs1224;
st.local.u16 [%rd23+676], %rs1224;
st.local.u16 [%rd23+678], %rs1224;
st.local.u16 [%rd23+680], %rs1224;
st.local.u16 [%rd23+682], %rs1224;
st.local.u16 [%rd23+684], %rs1224;
st.local.u16 [%rd23+686], %rs1224;
st.local.u16 [%rd23+688], %rs1224;
st.local.u16 [%rd23+690], %rs1224;
st.local.u16 [%rd23+692], %rs1224;
st.local.u16 [%rd23+694], %rs1224;
st.local.u16 [%rd23+696], %rs1224;
st.local.u16 [%rd23+698], %rs1224;
st.local.u16 [%rd23+700], %rs1224;
st.local.u16 [%rd23+702], %rs1224;
st.local.u16 [%rd23+704], %rs1224;
st.local.u16 [%rd23+706], %rs1224;
st.local.u16 [%rd23+708], %rs1224;
st.local.u16 [%rd23+710], %rs1224;
st.local.u16 [%rd23+712], %rs1224;
st.local.u16 [%rd23+714], %rs1224;
st.local.u16 [%rd23+716], %rs1224;
st.local.u16 [%rd23+718], %rs1224;
st.local.u16 [%rd23+720], %rs1224;
st.local.u16 [%rd23+722], %rs1224;
st.local.u16 [%rd23+724], %rs1224;
st.local.u16 [%rd23+726], %rs1224;
st.local.u16 [%rd23+728], %rs1224;
st.local.u16 [%rd23+730], %rs1224;
st.local.u16 [%rd23+732], %rs1224;
st.local.u16 [%rd23+734], %rs1224;
st.local.u16 [%rd23+736], %rs1224;
st.local.u16 [%rd23+738], %rs1224;
st.local.u16 [%rd23+740], %rs1224;
st.local.u16 [%rd23+742], %rs1224;
st.local.u16 [%rd23+744], %rs1224;
st.local.u16 [%rd23+746], %rs1224;
st.local.u16 [%rd23+748], %rs1224;
st.local.u16 [%rd23+750], %rs1224;
st.local.u16 [%rd23+752], %rs1224;
st.local.u16 [%rd23+754], %rs1224;
st.local.u16 [%rd23+756], %rs1224;
st.local.u16 [%rd23+758], %rs1224;
st.local.u16 [%rd23+760], %rs1224;
st.local.u16 [%rd23+762], %rs1224;
st.local.u16 [%rd23+764], %rs1224;
st.local.u16 [%rd23+766], %rs1224;
st.local.u16 [%rd23+768], %rs1224;
st.local.u16 [%rd23+770], %rs1224;
st.local.u16 [%rd23+772], %rs1224;
st.local.u16 [%rd23+774], %rs1224;
st.local.u16 [%rd23+776], %rs1224;
st.local.u16 [%rd23+778], %rs1224;
st.local.u16 [%rd23+780], %rs1224;
st.local.u16 [%rd23+782], %rs1224;
st.local.u16 [%rd23+784], %rs1224;
st.local.u16 [%rd23+786], %rs1224;
st.local.u16 [%rd23+788], %rs1224;
st.local.u16 [%rd23+790], %rs1224;
st.local.u16 [%rd23+792], %rs1224;
st.local.u16 [%rd23+794], %rs1224;
st.local.u16 [%rd23+796], %rs1224;
st.local.u16 [%rd23+798], %rs1224;
st.local.u16 [%rd23+800], %rs1224;
st.local.u16 [%rd23+802], %rs1224;
st.local.u16 [%rd23+804], %rs1224;
st.local.u16 [%rd23+806], %rs1224;
st.local.u16 [%rd23+808], %rs1224;
st.local.u16 [%rd23+810], %rs1224;
st.local.u16 [%rd23+812], %rs1224;
st.local.u16 [%rd23+814], %rs1224;
st.local.u16 [%rd23+816], %rs1224;
st.local.u16 [%rd23+818], %rs1224;
st.local.u16 [%rd23+820], %rs1224;
st.local.u16 [%rd23+822], %rs1224;
st.local.u16 [%rd23+824], %rs1224;
st.local.u16 [%rd23+826], %rs1224;
st.local.u16 [%rd23+828], %rs1224;
st.local.u16 [%rd23+830], %rs1224;
st.local.u16 [%rd23+832], %rs1224;
st.local.u16 [%rd23+834], %rs1224;
st.local.u16 [%rd23+836], %rs1224;
st.local.u16 [%rd23+838], %rs1224;
st.local.u16 [%rd23+840], %rs1224;
st.local.u16 [%rd23+842], %rs1224;
st.local.u16 [%rd23+844], %rs1224;
st.local.u16 [%rd23+846], %rs1224;
st.local.u16 [%rd23+848], %rs1224;
st.local.u16 [%rd23+850], %rs1224;
st.local.u16 [%rd23+852], %rs1224;
st.local.u16 [%rd23+854], %rs1224;
st.local.u16 [%rd23+856], %rs1224;
st.local.u16 [%rd23+858], %rs1224;
st.local.u16 [%rd23+860], %rs1224;
st.local.u16 [%rd23+862], %rs1224;
st.local.u16 [%rd23+864], %rs1224;
st.local.u16 [%rd23+866], %rs1224;
st.local.u16 [%rd23+868], %rs1224;
st.local.u16 [%rd23+870], %rs1224;
st.local.u16 [%rd23+872], %rs1224;
st.local.u16 [%rd23+874], %rs1224;
st.local.u16 [%rd23+876], %rs1224;
st.local.u16 [%rd23+878], %rs1224;
st.local.u16 [%rd23+880], %rs1224;
st.local.u16 [%rd23+882], %rs1224;
st.local.u16 [%rd23+884], %rs1224;
st.local.u16 [%rd23+886], %rs1224;
st.local.u16 [%rd23+888], %rs1224;
st.local.u16 [%rd23+890], %rs1224;
st.local.u16 [%rd23+892], %rs1224;
st.local.u16 [%rd23+894], %rs1224;
st.local.u16 [%rd23+896], %rs1224;
st.local.u16 [%rd23+898], %rs1224;
st.local.u16 [%rd23+900], %rs1224;
st.local.u16 [%rd23+902], %rs1224;
st.local.u16 [%rd23+904], %rs1224;
st.local.u16 [%rd23+906], %rs1224;
st.local.u16 [%rd23+908], %rs1224;
st.local.u16 [%rd23+910], %rs1224;
st.local.u16 [%rd23+912], %rs1224;
st.local.u16 [%rd23+914], %rs1224;
st.local.u16 [%rd23+916], %rs1224;
st.local.u16 [%rd23+918], %rs1224;
st.local.u16 [%rd23+920], %rs1224;
st.local.u16 [%rd23+922], %rs1224;
st.local.u16 [%rd23+924], %rs1224;
st.local.u16 [%rd23+926], %rs1224;
st.local.u16 [%rd23+928], %rs1224;
st.local.u16 [%rd23+930], %rs1224;
st.local.u16 [%rd23+932], %rs1224;
st.local.u16 [%rd23+934], %rs1224;
st.local.u16 [%rd23+936], %rs1224;
st.local.u16 [%rd23+938], %rs1224;
st.local.u16 [%rd23+940], %rs1224;
st.local.u16 [%rd23+942], %rs1224;
st.local.u16 [%rd23+944], %rs1224;
st.local.u16 [%rd23+946], %rs1224;
st.local.u16 [%rd23+948], %rs1224;
st.local.u16 [%rd23+950], %rs1224;
st.local.u16 [%rd23+952], %rs1224;
st.local.u16 [%rd23+954], %rs1224;
st.local.u16 [%rd23+956], %rs1224;
st.local.u16 [%rd23+958], %rs1224;
st.local.u16 [%rd23+960], %rs1224;
st.local.u16 [%rd23+962], %rs1224;
st.local.u16 [%rd23+964], %rs1224;
st.local.u16 [%rd23+966], %rs1224;
st.local.u16 [%rd23+968], %rs1224;
st.local.u16 [%rd23+970], %rs1224;
st.local.u16 [%rd23+972], %rs1224;
st.local.u16 [%rd23+974], %rs1224;
st.local.u16 [%rd23+976], %rs1224;
st.local.u16 [%rd23+978], %rs1224;
st.local.u16 [%rd23+980], %rs1224;
st.local.u16 [%rd23+982], %rs1224;
st.local.u16 [%rd23+984], %rs1224;
st.local.u16 [%rd23+986], %rs1224;
st.local.u16 [%rd23+988], %rs1224;
st.local.u16 [%rd23+990], %rs1224;
st.local.u16 [%rd23+992], %rs1224;
st.local.u16 [%rd23+994], %rs1224;
st.local.u16 [%rd23+996], %rs1224;
st.local.u16 [%rd23+998], %rs1224;
st.local.u16 [%rd23+1000], %rs1224;
st.local.u16 [%rd23+1002], %rs1224;
st.local.u16 [%rd23+1004], %rs1224;
st.local.u16 [%rd23+1006], %rs1224;
st.local.u16 [%rd23+1008], %rs1224;
st.local.u16 [%rd23+1010], %rs1224;
st.local.u16 [%rd23+1012], %rs1224;
st.local.u16 [%rd23+1014], %rs1224;
st.local.u16 [%rd23+1016], %rs1224;
st.local.u16 [%rd23+1018], %rs1224;
st.local.u16 [%rd23+1020], %rs1224;
st.local.u16 [%rd23+1022], %rs1224;
st.local.u16 [%rd23+1024], %rs1224;
mov.u32 %r9435, 0;
mov.u32 %r9545, %r9435;
mov.u32 %r9541, %r9435;
mov.u32 %r9543, %r9435;
$L__BB1_687:
@%p10 bra $L__BB1_926;
sub.s32 %r5706, %r6, %r9435;
add.s32 %r1740, %r9435, 4;
mul.lo.s32 %r1741, %r1740, %r1;
add.s32 %r1742, %r9435, 5;
add.s32 %r1743, %r1741, %r1;
add.s32 %r1744, %r9435, 6;
shl.b32 %r5707, %r1, 1;
add.s32 %r1745, %r1741, %r5707;
add.s32 %r1746, %r9435, 7;
mul.lo.s32 %r5708, %r1, 3;
add.s32 %r1747, %r1741, %r5708;
add.s32 %r1748, %r9435, 1;
add.s32 %r1749, %r9435, 2;
add.s32 %r1750, %r9435, 3;
mul.lo.s32 %r1751, %r9435, %r1;
add.s32 %r1752, %r1751, %r5708;
sub.s32 %r1753, %r1752, %r1;
sub.s32 %r1754, %r1753, %r1;
setp.lt.u32 %p793, %r5706, 2;
selp.b32 %r5709, 4369, 13107, %p793;
setp.lt.u32 %p794, %r5706, 3;
selp.b32 %r5710, %r5709, 30583, %p794;
setp.lt.u32 %p795, %r5706, 4;
selp.b32 %r1755, %r5710, 65535, %p795;
mov.u32 %r5705, 0;
mov.u32 %r9439, %r5705;
mov.u32 %r9440, %r5705;
$L__BB1_689:
shr.u32 %r5712, %r9439, 2;
mul.wide.u32 %rd451, %r5712, 2;
add.s64 %rd25, %rd23, %rd451;
ld.local.u16 %rs238, [%rd25];
ld.local.u16 %rs239, [%rd25+2];
setp.ge.u32 %p796, %r9439, %r5;
mov.u32 %r9451, %r5705;
@%p796 bra $L__BB1_698;
setp.ge.u32 %p797, %r1740, %r6;
mov.u32 %r9451, 0;
@%p797 bra $L__BB1_692;
add.s32 %r5714, %r1741, %r9439;
cvt.u64.u32 %rd452, %r5714;
add.s64 %rd453, %rd452, %rd5;
shl.b64 %rd454, %rd453, 2;
add.s64 %rd455, %rd3, %rd454;
ld.global.u32 %r5715, [%rd455];
abs.s32 %r5716, %r5715;
setp.gt.u32 %p798, %r5716, 4;
and.b32 %r5717, %r5716, 1;
setp.eq.b32 %p799, %r5717, 1;
and.pred %p800, %p798, %p799;
selp.u32 %r9451, 1, 0, %p800;
$L__BB1_692:
setp.ge.u32 %p801, %r1742, %r6;
@%p801 bra $L__BB1_694;
add.s32 %r5718, %r1743, %r9439;
cvt.u64.u32 %rd456, %r5718;
add.s64 %rd457, %rd456, %rd5;
shl.b64 %rd458, %rd457, 2;
add.s64 %rd459, %rd3, %rd458;
ld.global.u32 %r5719, [%rd459];
abs.s32 %r5720, %r5719;
setp.gt.u32 %p802, %r5720, 4;
and.b32 %r5721, %r5720, 1;
setp.eq.b32 %p803, %r5721, 1;
and.pred %p804, %p802, %p803;
selp.b32 %r5722, 2, 0, %p804;
or.b32 %r9451, %r5722, %r9451;
$L__BB1_694:
setp.ge.u32 %p805, %r1744, %r6;
@%p805 bra $L__BB1_696;
add.s32 %r5723, %r1745, %r9439;
cvt.u64.u32 %rd460, %r5723;
add.s64 %rd461, %rd460, %rd5;
shl.b64 %rd462, %rd461, 2;
add.s64 %rd463, %rd3, %rd462;
ld.global.u32 %r5724, [%rd463];
abs.s32 %r5725, %r5724;
setp.gt.u32 %p806, %r5725, 4;
and.b32 %r5726, %r5725, 1;
setp.eq.b32 %p807, %r5726, 1;
and.pred %p808, %p806, %p807;
selp.b32 %r5727, 4, 0, %p808;
or.b32 %r9451, %r5727, %r9451;
$L__BB1_696:
setp.ge.u32 %p809, %r1746, %r6;
@%p809 bra $L__BB1_698;
add.s32 %r5728, %r1747, %r9439;
cvt.u64.u32 %rd464, %r5728;
add.s64 %rd465, %rd464, %rd5;
shl.b64 %rd466, %rd465, 2;
add.s64 %rd467, %rd3, %rd466;
ld.global.u32 %r5729, [%rd467];
abs.s32 %r5730, %r5729;
setp.gt.u32 %p810, %r5730, 4;
and.b32 %r5731, %r5730, 1;
setp.eq.b32 %p811, %r5731, 1;
and.pred %p812, %p810, %p811;
selp.b32 %r5732, 8, 0, %p812;
or.b32 %r9451, %r5732, %r9451;
$L__BB1_698:
add.s32 %r1769, %r9439, 1;
setp.ge.u32 %p813, %r1769, %r5;
@%p813 bra $L__BB1_707;
setp.ge.u32 %p814, %r1740, %r6;
@%p814 bra $L__BB1_701;
add.s32 %r5733, %r1741, %r1769;
cvt.u64.u32 %rd468, %r5733;
add.s64 %rd469, %rd468, %rd5;
shl.b64 %rd470, %rd469, 2;
add.s64 %rd471, %rd3, %rd470;
ld.global.u32 %r5734, [%rd471];
abs.s32 %r5735, %r5734;
setp.gt.u32 %p815, %r5735, 4;
and.b32 %r5736, %r5735, 1;
setp.eq.b32 %p816, %r5736, 1;
and.pred %p817, %p815, %p816;
selp.b32 %r5737, 16, 0, %p817;
or.b32 %r9451, %r5737, %r9451;
$L__BB1_701:
setp.ge.u32 %p818, %r1742, %r6;
@%p818 bra $L__BB1_703;
add.s32 %r5738, %r1743, %r1769;
cvt.u64.u32 %rd472, %r5738;
add.s64 %rd473, %rd472, %rd5;
shl.b64 %rd474, %rd473, 2;
add.s64 %rd475, %rd3, %rd474;
ld.global.u32 %r5739, [%rd475];
abs.s32 %r5740, %r5739;
setp.gt.u32 %p819, %r5740, 4;
and.b32 %r5741, %r5740, 1;
setp.eq.b32 %p820, %r5741, 1;
and.pred %p821, %p819, %p820;
selp.b32 %r5742, 32, 0, %p821;
or.b32 %r9451, %r5742, %r9451;
$L__BB1_703:
setp.ge.u32 %p822, %r1744, %r6;
@%p822 bra $L__BB1_705;
add.s32 %r5743, %r1745, %r1769;
cvt.u64.u32 %rd476, %r5743;
add.s64 %rd477, %rd476, %rd5;
shl.b64 %rd478, %rd477, 2;
add.s64 %rd479, %rd3, %rd478;
ld.global.u32 %r5744, [%rd479];
abs.s32 %r5745, %r5744;
setp.gt.u32 %p823, %r5745, 4;
and.b32 %r5746, %r5745, 1;
setp.eq.b32 %p824, %r5746, 1;
and.pred %p825, %p823, %p824;
selp.b32 %r5747, 64, 0, %p825;
or.b32 %r9451, %r5747, %r9451;
$L__BB1_705:
setp.ge.u32 %p826, %r1746, %r6;
@%p826 bra $L__BB1_707;
add.s32 %r5748, %r1747, %r1769;
cvt.u64.u32 %rd480, %r5748;
add.s64 %rd481, %rd480, %rd5;
shl.b64 %rd482, %rd481, 2;
add.s64 %rd483, %rd3, %rd482;
ld.global.u32 %r5749, [%rd483];
abs.s32 %r5750, %r5749;
setp.gt.u32 %p827, %r5750, 4;
and.b32 %r5751, %r5750, 1;
setp.eq.b32 %p828, %r5751, 1;
and.pred %p829, %p827, %p828;
selp.b32 %r5752, 128, 0, %p829;
or.b32 %r9451, %r5752, %r9451;
$L__BB1_707:
add.s32 %r1778, %r9439, 2;
setp.ge.u32 %p830, %r1778, %r5;
@%p830 bra $L__BB1_716;
setp.ge.u32 %p831, %r1740, %r6;
@%p831 bra $L__BB1_710;
add.s32 %r5753, %r1741, %r1778;
cvt.u64.u32 %rd484, %r5753;
add.s64 %rd485, %rd484, %rd5;
shl.b64 %rd486, %rd485, 2;
add.s64 %rd487, %rd3, %rd486;
ld.global.u32 %r5754, [%rd487];
abs.s32 %r5755, %r5754;
setp.gt.u32 %p832, %r5755, 4;
and.b32 %r5756, %r5755, 1;
setp.eq.b32 %p833, %r5756, 1;
and.pred %p834, %p832, %p833;
selp.b32 %r5757, 256, 0, %p834;
or.b32 %r9451, %r5757, %r9451;
$L__BB1_710:
setp.ge.u32 %p835, %r1742, %r6;
@%p835 bra $L__BB1_712;
add.s32 %r5758, %r1743, %r1778;
cvt.u64.u32 %rd488, %r5758;
add.s64 %rd489, %rd488, %rd5;
shl.b64 %rd490, %rd489, 2;
add.s64 %rd491, %rd3, %rd490;
ld.global.u32 %r5759, [%rd491];
abs.s32 %r5760, %r5759;
setp.gt.u32 %p836, %r5760, 4;
and.b32 %r5761, %r5760, 1;
setp.eq.b32 %p837, %r5761, 1;
and.pred %p838, %p836, %p837;
selp.b32 %r5762, 512, 0, %p838;
or.b32 %r9451, %r5762, %r9451;
$L__BB1_712:
setp.ge.u32 %p839, %r1744, %r6;
@%p839 bra $L__BB1_714;
add.s32 %r5763, %r1745, %r1778;
cvt.u64.u32 %rd492, %r5763;
add.s64 %rd493, %rd492, %rd5;
shl.b64 %rd494, %rd493, 2;
add.s64 %rd495, %rd3, %rd494;
ld.global.u32 %r5764, [%rd495];
abs.s32 %r5765, %r5764;
setp.gt.u32 %p840, %r5765, 4;
and.b32 %r5766, %r5765, 1;
setp.eq.b32 %p841, %r5766, 1;
and.pred %p842, %p840, %p841;
selp.b32 %r5767, 1024, 0, %p842;
or.b32 %r9451, %r5767, %r9451;
$L__BB1_714:
setp.ge.u32 %p843, %r1746, %r6;
@%p843 bra $L__BB1_716;
add.s32 %r5768, %r1747, %r1778;
cvt.u64.u32 %rd496, %r5768;
add.s64 %rd497, %rd496, %rd5;
shl.b64 %rd498, %rd497, 2;
add.s64 %rd499, %rd3, %rd498;
ld.global.u32 %r5769, [%rd499];
abs.s32 %r5770, %r5769;
setp.gt.u32 %p844, %r5770, 4;
and.b32 %r5771, %r5770, 1;
setp.eq.b32 %p845, %r5771, 1;
and.pred %p846, %p844, %p845;
selp.b32 %r5772, 2048, 0, %p846;
or.b32 %r9451, %r5772, %r9451;
$L__BB1_716:
add.s32 %r1787, %r9439, 3;
setp.ge.u32 %p847, %r1787, %r5;
@%p847 bra $L__BB1_725;
setp.ge.u32 %p848, %r1740, %r6;
@%p848 bra $L__BB1_719;
add.s32 %r5773, %r1741, %r1787;
cvt.u64.u32 %rd500, %r5773;
add.s64 %rd501, %rd500, %rd5;
shl.b64 %rd502, %rd501, 2;
add.s64 %rd503, %rd3, %rd502;
ld.global.u32 %r5774, [%rd503];
abs.s32 %r5775, %r5774;
setp.gt.u32 %p849, %r5775, 4;
and.b32 %r5776, %r5775, 1;
setp.eq.b32 %p850, %r5776, 1;
and.pred %p851, %p849, %p850;
selp.b32 %r5777, 4096, 0, %p851;
or.b32 %r9451, %r5777, %r9451;
$L__BB1_719:
setp.ge.u32 %p852, %r1742, %r6;
@%p852 bra $L__BB1_721;
add.s32 %r5778, %r1743, %r1787;
cvt.u64.u32 %rd504, %r5778;
add.s64 %rd505, %rd504, %rd5;
shl.b64 %rd506, %rd505, 2;
add.s64 %rd507, %rd3, %rd506;
ld.global.u32 %r5779, [%rd507];
abs.s32 %r5780, %r5779;
setp.gt.u32 %p853, %r5780, 4;
and.b32 %r5781, %r5780, 1;
setp.eq.b32 %p854, %r5781, 1;
and.pred %p855, %p853, %p854;
selp.b32 %r5782, 8192, 0, %p855;
or.b32 %r9451, %r5782, %r9451;
$L__BB1_721:
setp.ge.u32 %p856, %r1744, %r6;
@%p856 bra $L__BB1_723;
add.s32 %r5783, %r1745, %r1787;
cvt.u64.u32 %rd508, %r5783;
add.s64 %rd509, %rd508, %rd5;
shl.b64 %rd510, %rd509, 2;
add.s64 %rd511, %rd3, %rd510;
ld.global.u32 %r5784, [%rd511];
abs.s32 %r5785, %r5784;
setp.gt.u32 %p857, %r5785, 4;
and.b32 %r5786, %r5785, 1;
setp.eq.b32 %p858, %r5786, 1;
and.pred %p859, %p857, %p858;
selp.b32 %r5787, 16384, 0, %p859;
or.b32 %r9451, %r5787, %r9451;
$L__BB1_723:
setp.ge.u32 %p860, %r1746, %r6;
@%p860 bra $L__BB1_725;
add.s32 %r5788, %r1747, %r1787;
cvt.u64.u32 %rd512, %r5788;
add.s64 %rd513, %rd512, %rd5;
shl.b64 %rd514, %rd513, 2;
add.s64 %rd515, %rd3, %rd514;
ld.global.u32 %r5789, [%rd515];
abs.s32 %r5790, %r5789;
setp.gt.u32 %p861, %r5790, 4;
and.b32 %r5791, %r5790, 1;
setp.eq.b32 %p862, %r5791, 1;
and.pred %p863, %p861, %p862;
selp.b32 %r5792, 32768, 0, %p863;
or.b32 %r9451, %r5792, %r9451;
$L__BB1_725:
add.s32 %r5794, %r9439, 4;
setp.ge.u32 %p864, %r5794, %r5;
mov.u32 %r9467, 0;
@%p864 bra $L__BB1_734;
setp.ge.u32 %p865, %r1740, %r6;
mov.u32 %r9467, 0;
@%p865 bra $L__BB1_728;
add.s32 %r5796, %r1741, %r9439;
add.s32 %r5797, %r5796, 4;
cvt.u64.u32 %rd516, %r5797;
add.s64 %rd517, %rd516, %rd5;
shl.b64 %rd518, %rd517, 2;
add.s64 %rd519, %rd3, %rd518;
ld.global.u32 %r5798, [%rd519];
abs.s32 %r5799, %r5798;
setp.gt.u32 %p866, %r5799, 4;
and.b32 %r5800, %r5799, 1;
setp.eq.b32 %p867, %r5800, 1;
and.pred %p868, %p866, %p867;
selp.u32 %r9467, 1, 0, %p868;
$L__BB1_728:
setp.ge.u32 %p869, %r1742, %r6;
@%p869 bra $L__BB1_730;
add.s32 %r5801, %r1743, %r9439;
add.s32 %r5802, %r5801, 4;
cvt.u64.u32 %rd520, %r5802;
add.s64 %rd521, %rd520, %rd5;
shl.b64 %rd522, %rd521, 2;
add.s64 %rd523, %rd3, %rd522;
ld.global.u32 %r5803, [%rd523];
abs.s32 %r5804, %r5803;
setp.gt.u32 %p870, %r5804, 4;
and.b32 %r5805, %r5804, 1;
setp.eq.b32 %p871, %r5805, 1;
and.pred %p872, %p870, %p871;
selp.b32 %r5806, 2, 0, %p872;
or.b32 %r9467, %r5806, %r9467;
$L__BB1_730:
setp.ge.u32 %p873, %r1744, %r6;
@%p873 bra $L__BB1_732;
add.s32 %r5807, %r1745, %r9439;
add.s32 %r5808, %r5807, 4;
cvt.u64.u32 %rd524, %r5808;
add.s64 %rd525, %rd524, %rd5;
shl.b64 %rd526, %rd525, 2;
add.s64 %rd527, %rd3, %rd526;
ld.global.u32 %r5809, [%rd527];
abs.s32 %r5810, %r5809;
setp.gt.u32 %p874, %r5810, 4;
and.b32 %r5811, %r5810, 1;
setp.eq.b32 %p875, %r5811, 1;
and.pred %p876, %p874, %p875;
selp.b32 %r5812, 4, 0, %p876;
or.b32 %r9467, %r5812, %r9467;
$L__BB1_732:
setp.ge.u32 %p877, %r1746, %r6;
@%p877 bra $L__BB1_734;
add.s32 %r5813, %r1747, %r9439;
add.s32 %r5814, %r5813, 4;
cvt.u64.u32 %rd528, %r5814;
add.s64 %rd529, %rd528, %rd5;
shl.b64 %rd530, %rd529, 2;
add.s64 %rd531, %rd3, %rd530;
ld.global.u32 %r5815, [%rd531];
abs.s32 %r5816, %r5815;
setp.gt.u32 %p878, %r5816, 4;
and.b32 %r5817, %r5816, 1;
setp.eq.b32 %p879, %r5817, 1;
and.pred %p880, %p878, %p879;
selp.b32 %r5818, 8, 0, %p880;
or.b32 %r9467, %r5818, %r9467;
$L__BB1_734:
add.s32 %r1804, %r9439, 5;
setp.ge.u32 %p881, %r1804, %r5;
@%p881 bra $L__BB1_743;
setp.ge.u32 %p882, %r1740, %r6;
@%p882 bra $L__BB1_737;
add.s32 %r5819, %r1741, %r1804;
cvt.u64.u32 %rd532, %r5819;
add.s64 %rd533, %rd532, %rd5;
shl.b64 %rd534, %rd533, 2;
add.s64 %rd535, %rd3, %rd534;
ld.global.u32 %r5820, [%rd535];
abs.s32 %r5821, %r5820;
setp.gt.u32 %p883, %r5821, 4;
and.b32 %r5822, %r5821, 1;
setp.eq.b32 %p884, %r5822, 1;
and.pred %p885, %p883, %p884;
selp.b32 %r5823, 16, 0, %p885;
or.b32 %r9467, %r5823, %r9467;
$L__BB1_737:
setp.ge.u32 %p886, %r1742, %r6;
@%p886 bra $L__BB1_739;
add.s32 %r5824, %r1743, %r1804;
cvt.u64.u32 %rd536, %r5824;
add.s64 %rd537, %rd536, %rd5;
shl.b64 %rd538, %rd537, 2;
add.s64 %rd539, %rd3, %rd538;
ld.global.u32 %r5825, [%rd539];
abs.s32 %r5826, %r5825;
setp.gt.u32 %p887, %r5826, 4;
and.b32 %r5827, %r5826, 1;
setp.eq.b32 %p888, %r5827, 1;
and.pred %p889, %p887, %p888;
selp.b32 %r5828, 32, 0, %p889;
or.b32 %r9467, %r5828, %r9467;
$L__BB1_739:
setp.ge.u32 %p890, %r1744, %r6;
@%p890 bra $L__BB1_741;
add.s32 %r5829, %r1745, %r1804;
cvt.u64.u32 %rd540, %r5829;
add.s64 %rd541, %rd540, %rd5;
shl.b64 %rd542, %rd541, 2;
add.s64 %rd543, %rd3, %rd542;
ld.global.u32 %r5830, [%rd543];
abs.s32 %r5831, %r5830;
setp.gt.u32 %p891, %r5831, 4;
and.b32 %r5832, %r5831, 1;
setp.eq.b32 %p892, %r5832, 1;
and.pred %p893, %p891, %p892;
selp.b32 %r5833, 64, 0, %p893;
or.b32 %r9467, %r5833, %r9467;
$L__BB1_741:
setp.ge.u32 %p894, %r1746, %r6;
@%p894 bra $L__BB1_743;
add.s32 %r5834, %r1747, %r1804;
cvt.u64.u32 %rd544, %r5834;
add.s64 %rd545, %rd544, %rd5;
shl.b64 %rd546, %rd545, 2;
add.s64 %rd547, %rd3, %rd546;
ld.global.u32 %r5835, [%rd547];
abs.s32 %r5836, %r5835;
setp.gt.u32 %p895, %r5836, 4;
and.b32 %r5837, %r5836, 1;
setp.eq.b32 %p896, %r5837, 1;
and.pred %p897, %p895, %p896;
selp.b32 %r5838, 128, 0, %p897;
or.b32 %r9467, %r5838, %r9467;
$L__BB1_743:
add.s32 %r1813, %r9439, 6;
setp.ge.u32 %p898, %r1813, %r5;
@%p898 bra $L__BB1_752;
setp.ge.u32 %p899, %r1740, %r6;
@%p899 bra $L__BB1_746;
add.s32 %r5839, %r1741, %r1813;
cvt.u64.u32 %rd548, %r5839;
add.s64 %rd549, %rd548, %rd5;
shl.b64 %rd550, %rd549, 2;
add.s64 %rd551, %rd3, %rd550;
ld.global.u32 %r5840, [%rd551];
abs.s32 %r5841, %r5840;
setp.gt.u32 %p900, %r5841, 4;
and.b32 %r5842, %r5841, 1;
setp.eq.b32 %p901, %r5842, 1;
and.pred %p902, %p900, %p901;
selp.b32 %r5843, 256, 0, %p902;
or.b32 %r9467, %r5843, %r9467;
$L__BB1_746:
setp.ge.u32 %p903, %r1742, %r6;
@%p903 bra $L__BB1_748;
add.s32 %r5844, %r1743, %r1813;
cvt.u64.u32 %rd552, %r5844;
add.s64 %rd553, %rd552, %rd5;
shl.b64 %rd554, %rd553, 2;
add.s64 %rd555, %rd3, %rd554;
ld.global.u32 %r5845, [%rd555];
abs.s32 %r5846, %r5845;
setp.gt.u32 %p904, %r5846, 4;
and.b32 %r5847, %r5846, 1;
setp.eq.b32 %p905, %r5847, 1;
and.pred %p906, %p904, %p905;
selp.b32 %r5848, 512, 0, %p906;
or.b32 %r9467, %r5848, %r9467;
$L__BB1_748:
setp.ge.u32 %p907, %r1744, %r6;
@%p907 bra $L__BB1_750;
add.s32 %r5849, %r1745, %r1813;
cvt.u64.u32 %rd556, %r5849;
add.s64 %rd557, %rd556, %rd5;
shl.b64 %rd558, %rd557, 2;
add.s64 %rd559, %rd3, %rd558;
ld.global.u32 %r5850, [%rd559];
abs.s32 %r5851, %r5850;
setp.gt.u32 %p908, %r5851, 4;
and.b32 %r5852, %r5851, 1;
setp.eq.b32 %p909, %r5852, 1;
and.pred %p910, %p908, %p909;
selp.b32 %r5853, 1024, 0, %p910;
or.b32 %r9467, %r5853, %r9467;
$L__BB1_750:
setp.ge.u32 %p911, %r1746, %r6;
@%p911 bra $L__BB1_752;
add.s32 %r5854, %r1747, %r1813;
cvt.u64.u32 %rd560, %r5854;
add.s64 %rd561, %rd560, %rd5;
shl.b64 %rd562, %rd561, 2;
add.s64 %rd563, %rd3, %rd562;
ld.global.u32 %r5855, [%rd563];
abs.s32 %r5856, %r5855;
setp.gt.u32 %p912, %r5856, 4;
and.b32 %r5857, %r5856, 1;
setp.eq.b32 %p913, %r5857, 1;
and.pred %p914, %p912, %p913;
selp.b32 %r5858, 2048, 0, %p914;
or.b32 %r9467, %r5858, %r9467;
$L__BB1_752:
add.s32 %r1822, %r9439, 7;
setp.ge.u32 %p915, %r1822, %r5;
@%p915 bra $L__BB1_761;
setp.ge.u32 %p916, %r1740, %r6;
@%p916 bra $L__BB1_755;
add.s32 %r5859, %r1741, %r1822;
cvt.u64.u32 %rd564, %r5859;
add.s64 %rd565, %rd564, %rd5;
shl.b64 %rd566, %rd565, 2;
add.s64 %rd567, %rd3, %rd566;
ld.global.u32 %r5860, [%rd567];
abs.s32 %r5861, %r5860;
setp.gt.u32 %p917, %r5861, 4;
and.b32 %r5862, %r5861, 1;
setp.eq.b32 %p918, %r5862, 1;
and.pred %p919, %p917, %p918;
selp.b32 %r5863, 4096, 0, %p919;
or.b32 %r9467, %r5863, %r9467;
$L__BB1_755:
setp.ge.u32 %p920, %r1742, %r6;
@%p920 bra $L__BB1_757;
add.s32 %r5864, %r1743, %r1822;
cvt.u64.u32 %rd568, %r5864;
add.s64 %rd569, %rd568, %rd5;
shl.b64 %rd570, %rd569, 2;
add.s64 %rd571, %rd3, %rd570;
ld.global.u32 %r5865, [%rd571];
abs.s32 %r5866, %r5865;
setp.gt.u32 %p921, %r5866, 4;
and.b32 %r5867, %r5866, 1;
setp.eq.b32 %p922, %r5867, 1;
and.pred %p923, %p921, %p922;
selp.b32 %r5868, 8192, 0, %p923;
or.b32 %r9467, %r5868, %r9467;
$L__BB1_757:
setp.ge.u32 %p924, %r1744, %r6;
@%p924 bra $L__BB1_759;
add.s32 %r5869, %r1745, %r1822;
cvt.u64.u32 %rd572, %r5869;
add.s64 %rd573, %rd572, %rd5;
shl.b64 %rd574, %rd573, 2;
add.s64 %rd575, %rd3, %rd574;
ld.global.u32 %r5870, [%rd575];
abs.s32 %r5871, %r5870;
setp.gt.u32 %p925, %r5871, 4;
and.b32 %r5872, %r5871, 1;
setp.eq.b32 %p926, %r5872, 1;
and.pred %p927, %p925, %p926;
selp.b32 %r5873, 16384, 0, %p927;
or.b32 %r9467, %r5873, %r9467;
$L__BB1_759:
setp.ge.u32 %p928, %r1746, %r6;
@%p928 bra $L__BB1_761;
add.s32 %r5874, %r1747, %r1822;
cvt.u64.u32 %rd576, %r5874;
add.s64 %rd577, %rd576, %rd5;
shl.b64 %rd578, %rd577, 2;
add.s64 %rd579, %rd3, %rd578;
ld.global.u32 %r5875, [%rd579];
abs.s32 %r5876, %r5875;
setp.gt.u32 %p929, %r5876, 4;
and.b32 %r5877, %r5876, 1;
setp.eq.b32 %p930, %r5877, 1;
and.pred %p931, %p929, %p930;
selp.b32 %r5878, 32768, 0, %p931;
or.b32 %r9467, %r5878, %r9467;
$L__BB1_761:
mov.b32 %r1831, {%rs238, %rs239};
add.s32 %r5880, %r1751, %r9439;
cvt.u64.u32 %rd580, %r5880;
add.s64 %rd581, %rd580, %rd5;
shl.b64 %rd582, %rd581, 2;
add.s64 %rd26, %rd3, %rd582;
add.s32 %r5881, %r1754, %r9439;
cvt.u64.u32 %rd583, %r5881;
add.s64 %rd584, %rd583, %rd5;
shl.b64 %rd585, %rd584, 2;
add.s64 %rd27, %rd3, %rd585;
add.s32 %r5882, %r1753, %r9439;
cvt.u64.u32 %rd586, %r5882;
add.s64 %rd587, %rd586, %rd5;
shl.b64 %rd588, %rd587, 2;
add.s64 %rd28, %rd3, %rd588;
add.s32 %r5883, %r1752, %r9439;
cvt.u64.u32 %rd589, %r5883;
add.s64 %rd590, %rd589, %rd5;
shl.b64 %rd591, %rd590, 2;
add.s64 %rd29, %rd3, %rd591;
mov.u32 %r9483, 0;
@%p796 bra $L__BB1_770;
setp.le.u32 %p933, %r6, %r9435;
mov.u32 %r9483, 0;
@%p933 bra $L__BB1_764;
ld.global.u32 %r5885, [%rd26];
abs.s32 %r5886, %r5885;
setp.gt.u32 %p934, %r5886, 4;
and.b32 %r5887, %r5886, 1;
setp.eq.b32 %p935, %r5887, 1;
and.pred %p936, %p934, %p935;
selp.u32 %r9483, 1, 0, %p936;
$L__BB1_764:
setp.ge.u32 %p937, %r1748, %r6;
@%p937 bra $L__BB1_766;
ld.global.u32 %r5888, [%rd27];
abs.s32 %r5889, %r5888;
setp.gt.u32 %p938, %r5889, 4;
and.b32 %r5890, %r5889, 1;
setp.eq.b32 %p939, %r5890, 1;
and.pred %p940, %p938, %p939;
selp.b32 %r5891, 2, 0, %p940;
or.b32 %r9483, %r5891, %r9483;
$L__BB1_766:
setp.ge.u32 %p941, %r1749, %r6;
@%p941 bra $L__BB1_768;
ld.global.u32 %r5892, [%rd28];
abs.s32 %r5893, %r5892;
setp.gt.u32 %p942, %r5893, 4;
and.b32 %r5894, %r5893, 1;
setp.eq.b32 %p943, %r5894, 1;
and.pred %p944, %p942, %p943;
selp.b32 %r5895, 4, 0, %p944;
or.b32 %r9483, %r5895, %r9483;
$L__BB1_768:
setp.ge.u32 %p945, %r1750, %r6;
@%p945 bra $L__BB1_770;
ld.global.u32 %r5896, [%rd29];
abs.s32 %r5897, %r5896;
setp.gt.u32 %p946, %r5897, 4;
and.b32 %r5898, %r5897, 1;
setp.eq.b32 %p947, %r5898, 1;
and.pred %p948, %p946, %p947;
selp.b32 %r5899, 8, 0, %p948;
or.b32 %r9483, %r5899, %r9483;
$L__BB1_770:
add.s32 %r5900, %r1751, %r1769;
cvt.u64.u32 %rd592, %r5900;
add.s64 %rd593, %rd592, %rd5;
shl.b64 %rd594, %rd593, 2;
add.s64 %rd30, %rd3, %rd594;
add.s32 %r5901, %r1754, %r1769;
cvt.u64.u32 %rd595, %r5901;
add.s64 %rd596, %rd595, %rd5;
shl.b64 %rd597, %rd596, 2;
add.s64 %rd31, %rd3, %rd597;
add.s32 %r5902, %r1753, %r1769;
cvt.u64.u32 %rd598, %r5902;
add.s64 %rd599, %rd598, %rd5;
shl.b64 %rd600, %rd599, 2;
add.s64 %rd32, %rd3, %rd600;
add.s32 %r5903, %r1752, %r1769;
cvt.u64.u32 %rd601, %r5903;
add.s64 %rd602, %rd601, %rd5;
shl.b64 %rd603, %rd602, 2;
add.s64 %rd33, %rd3, %rd603;
shl.b32 %r5904, %r9467, 16;
or.b32 %r1840, %r5904, %r9451;
@%p813 bra $L__BB1_779;
setp.le.u32 %p950, %r6, %r9435;
@%p950 bra $L__BB1_773;
ld.global.u32 %r5905, [%rd30];
abs.s32 %r5906, %r5905;
setp.gt.u32 %p951, %r5906, 4;
and.b32 %r5907, %r5906, 1;
setp.eq.b32 %p952, %r5907, 1;
and.pred %p953, %p951, %p952;
selp.b32 %r5908, 16, 0, %p953;
or.b32 %r9483, %r5908, %r9483;
$L__BB1_773:
setp.ge.u32 %p954, %r1748, %r6;
@%p954 bra $L__BB1_775;
ld.global.u32 %r5909, [%rd31];
abs.s32 %r5910, %r5909;
setp.gt.u32 %p955, %r5910, 4;
and.b32 %r5911, %r5910, 1;
setp.eq.b32 %p956, %r5911, 1;
and.pred %p957, %p955, %p956;
selp.b32 %r5912, 32, 0, %p957;
or.b32 %r9483, %r5912, %r9483;
$L__BB1_775:
setp.ge.u32 %p958, %r1749, %r6;
@%p958 bra $L__BB1_777;
ld.global.u32 %r5913, [%rd32];
abs.s32 %r5914, %r5913;
setp.gt.u32 %p959, %r5914, 4;
and.b32 %r5915, %r5914, 1;
setp.eq.b32 %p960, %r5915, 1;
and.pred %p961, %p959, %p960;
selp.b32 %r5916, 64, 0, %p961;
or.b32 %r9483, %r5916, %r9483;
$L__BB1_777:
setp.ge.u32 %p962, %r1750, %r6;
@%p962 bra $L__BB1_779;
ld.global.u32 %r5917, [%rd33];
abs.s32 %r5918, %r5917;
setp.gt.u32 %p963, %r5918, 4;
and.b32 %r5919, %r5918, 1;
setp.eq.b32 %p964, %r5919, 1;
and.pred %p965, %p963, %p964;
selp.b32 %r5920, 128, 0, %p965;
or.b32 %r9483, %r5920, %r9483;
$L__BB1_779:
add.s32 %r5921, %r1751, %r1778;
cvt.u64.u32 %rd604, %r5921;
add.s64 %rd605, %rd604, %rd5;
shl.b64 %rd606, %rd605, 2;
add.s64 %rd34, %rd3, %rd606;
add.s32 %r5922, %r1754, %r1778;
cvt.u64.u32 %rd607, %r5922;
add.s64 %rd608, %rd607, %rd5;
shl.b64 %rd609, %rd608, 2;
add.s64 %rd35, %rd3, %rd609;
add.s32 %r5923, %r1753, %r1778;
cvt.u64.u32 %rd610, %r5923;
add.s64 %rd611, %rd610, %rd5;
shl.b64 %rd612, %rd611, 2;
add.s64 %rd36, %rd3, %rd612;
add.s32 %r5924, %r1752, %r1778;
cvt.u64.u32 %rd613, %r5924;
add.s64 %rd614, %rd613, %rd5;
shl.b64 %rd615, %rd614, 2;
add.s64 %rd37, %rd3, %rd615;
@%p830 bra $L__BB1_788;
setp.le.u32 %p967, %r6, %r9435;
@%p967 bra $L__BB1_782;
ld.global.u32 %r5925, [%rd34];
abs.s32 %r5926, %r5925;
setp.gt.u32 %p968, %r5926, 4;
and.b32 %r5927, %r5926, 1;
setp.eq.b32 %p969, %r5927, 1;
and.pred %p970, %p968, %p969;
selp.b32 %r5928, 256, 0, %p970;
or.b32 %r9483, %r5928, %r9483;
$L__BB1_782:
setp.ge.u32 %p971, %r1748, %r6;
@%p971 bra $L__BB1_784;
ld.global.u32 %r5929, [%rd35];
abs.s32 %r5930, %r5929;
setp.gt.u32 %p972, %r5930, 4;
and.b32 %r5931, %r5930, 1;
setp.eq.b32 %p973, %r5931, 1;
and.pred %p974, %p972, %p973;
selp.b32 %r5932, 512, 0, %p974;
or.b32 %r9483, %r5932, %r9483;
$L__BB1_784:
setp.ge.u32 %p975, %r1749, %r6;
@%p975 bra $L__BB1_786;
ld.global.u32 %r5933, [%rd36];
abs.s32 %r5934, %r5933;
setp.gt.u32 %p976, %r5934, 4;
and.b32 %r5935, %r5934, 1;
setp.eq.b32 %p977, %r5935, 1;
and.pred %p978, %p976, %p977;
selp.b32 %r5936, 1024, 0, %p978;
or.b32 %r9483, %r5936, %r9483;
$L__BB1_786:
setp.ge.u32 %p979, %r1750, %r6;
@%p979 bra $L__BB1_788;
ld.global.u32 %r5937, [%rd37];
abs.s32 %r5938, %r5937;
setp.gt.u32 %p980, %r5938, 4;
and.b32 %r5939, %r5938, 1;
setp.eq.b32 %p981, %r5939, 1;
and.pred %p982, %p980, %p981;
selp.b32 %r5940, 2048, 0, %p982;
or.b32 %r9483, %r5940, %r9483;
$L__BB1_788:
add.s32 %r5941, %r1751, %r1787;
cvt.u64.u32 %rd616, %r5941;
add.s64 %rd617, %rd616, %rd5;
shl.b64 %rd618, %rd617, 2;
add.s64 %rd38, %rd3, %rd618;
add.s32 %r5942, %r1754, %r1787;
cvt.u64.u32 %rd619, %r5942;
add.s64 %rd620, %rd619, %rd5;
shl.b64 %rd621, %rd620, 2;
add.s64 %rd39, %rd3, %rd621;
add.s32 %r5943, %r1753, %r1787;
cvt.u64.u32 %rd622, %r5943;
add.s64 %rd623, %rd622, %rd5;
shl.b64 %rd624, %rd623, 2;
add.s64 %rd40, %rd3, %rd624;
add.s32 %r5944, %r1752, %r1787;
cvt.u64.u32 %rd625, %r5944;
add.s64 %rd626, %rd625, %rd5;
shl.b64 %rd627, %rd626, 2;
add.s64 %rd41, %rd3, %rd627;
@%p847 bra $L__BB1_797;
setp.le.u32 %p984, %r6, %r9435;
@%p984 bra $L__BB1_791;
ld.global.u32 %r5945, [%rd38];
abs.s32 %r5946, %r5945;
setp.gt.u32 %p985, %r5946, 4;
and.b32 %r5947, %r5946, 1;
setp.eq.b32 %p986, %r5947, 1;
and.pred %p987, %p985, %p986;
selp.b32 %r5948, 4096, 0, %p987;
or.b32 %r9483, %r5948, %r9483;
$L__BB1_791:
setp.ge.u32 %p988, %r1748, %r6;
@%p988 bra $L__BB1_793;
ld.global.u32 %r5949, [%rd39];
abs.s32 %r5950, %r5949;
setp.gt.u32 %p989, %r5950, 4;
and.b32 %r5951, %r5950, 1;
setp.eq.b32 %p990, %r5951, 1;
and.pred %p991, %p989, %p990;
selp.b32 %r5952, 8192, 0, %p991;
or.b32 %r9483, %r5952, %r9483;
$L__BB1_793:
setp.ge.u32 %p992, %r1749, %r6;
@%p992 bra $L__BB1_795;
ld.global.u32 %r5953, [%rd40];
abs.s32 %r5954, %r5953;
setp.gt.u32 %p993, %r5954, 4;
and.b32 %r5955, %r5954, 1;
setp.eq.b32 %p994, %r5955, 1;
and.pred %p995, %p993, %p994;
selp.b32 %r5956, 16384, 0, %p995;
or.b32 %r9483, %r5956, %r9483;
$L__BB1_795:
setp.ge.u32 %p996, %r1750, %r6;
@%p996 bra $L__BB1_797;
ld.global.u32 %r5957, [%rd41];
abs.s32 %r5958, %r5957;
setp.gt.u32 %p997, %r5958, 4;
and.b32 %r5959, %r5958, 1;
setp.eq.b32 %p998, %r5959, 1;
and.pred %p999, %p997, %p998;
selp.b32 %r5960, 32768, 0, %p999;
or.b32 %r9483, %r5960, %r9483;
$L__BB1_797:
mov.u32 %r9499, 0;
@%p864 bra $L__BB1_806;
setp.le.u32 %p1001, %r6, %r9435;
mov.u32 %r9499, 0;
@%p1001 bra $L__BB1_800;
add.s32 %r5965, %r5880, 4;
cvt.u64.u32 %rd628, %r5965;
add.s64 %rd629, %rd628, %rd5;
shl.b64 %rd630, %rd629, 2;
add.s64 %rd631, %rd3, %rd630;
ld.global.u32 %r5966, [%rd631];
abs.s32 %r5967, %r5966;
setp.gt.u32 %p1002, %r5967, 4;
and.b32 %r5968, %r5967, 1;
setp.eq.b32 %p1003, %r5968, 1;
and.pred %p1004, %p1002, %p1003;
selp.u32 %r9499, 1, 0, %p1004;
$L__BB1_800:
setp.ge.u32 %p1005, %r1748, %r6;
@%p1005 bra $L__BB1_802;
add.s32 %r5970, %r5881, 4;
cvt.u64.u32 %rd632, %r5970;
add.s64 %rd633, %rd632, %rd5;
shl.b64 %rd634, %rd633, 2;
add.s64 %rd635, %rd3, %rd634;
ld.global.u32 %r5971, [%rd635];
abs.s32 %r5972, %r5971;
setp.gt.u32 %p1006, %r5972, 4;
and.b32 %r5973, %r5972, 1;
setp.eq.b32 %p1007, %r5973, 1;
and.pred %p1008, %p1006, %p1007;
selp.b32 %r5974, 2, 0, %p1008;
or.b32 %r9499, %r5974, %r9499;
$L__BB1_802:
setp.ge.u32 %p1009, %r1749, %r6;
@%p1009 bra $L__BB1_804;
add.s32 %r5976, %r5882, 4;
cvt.u64.u32 %rd636, %r5976;
add.s64 %rd637, %rd636, %rd5;
shl.b64 %rd638, %rd637, 2;
add.s64 %rd639, %rd3, %rd638;
ld.global.u32 %r5977, [%rd639];
abs.s32 %r5978, %r5977;
setp.gt.u32 %p1010, %r5978, 4;
and.b32 %r5979, %r5978, 1;
setp.eq.b32 %p1011, %r5979, 1;
and.pred %p1012, %p1010, %p1011;
selp.b32 %r5980, 4, 0, %p1012;
or.b32 %r9499, %r5980, %r9499;
$L__BB1_804:
setp.ge.u32 %p1013, %r1750, %r6;
@%p1013 bra $L__BB1_806;
add.s32 %r5982, %r5883, 4;
cvt.u64.u32 %rd640, %r5982;
add.s64 %rd641, %rd640, %rd5;
shl.b64 %rd642, %rd641, 2;
add.s64 %rd643, %rd3, %rd642;
ld.global.u32 %r5983, [%rd643];
abs.s32 %r5984, %r5983;
setp.gt.u32 %p1014, %r5984, 4;
and.b32 %r5985, %r5984, 1;
setp.eq.b32 %p1015, %r5985, 1;
and.pred %p1016, %p1014, %p1015;
selp.b32 %r5986, 8, 0, %p1016;
or.b32 %r9499, %r5986, %r9499;
$L__BB1_806:
@%p881 bra $L__BB1_815;
setp.le.u32 %p1018, %r6, %r9435;
@%p1018 bra $L__BB1_809;
add.s32 %r5987, %r1751, %r1804;
cvt.u64.u32 %rd644, %r5987;
add.s64 %rd645, %rd644, %rd5;
shl.b64 %rd646, %rd645, 2;
add.s64 %rd647, %rd3, %rd646;
ld.global.u32 %r5988, [%rd647];
abs.s32 %r5989, %r5988;
setp.gt.u32 %p1019, %r5989, 4;
and.b32 %r5990, %r5989, 1;
setp.eq.b32 %p1020, %r5990, 1;
and.pred %p1021, %p1019, %p1020;
selp.b32 %r5991, 16, 0, %p1021;
or.b32 %r9499, %r5991, %r9499;
$L__BB1_809:
setp.ge.u32 %p1022, %r1748, %r6;
@%p1022 bra $L__BB1_811;
add.s32 %r5992, %r1754, %r1804;
cvt.u64.u32 %rd648, %r5992;
add.s64 %rd649, %rd648, %rd5;
shl.b64 %rd650, %rd649, 2;
add.s64 %rd651, %rd3, %rd650;
ld.global.u32 %r5993, [%rd651];
abs.s32 %r5994, %r5993;
setp.gt.u32 %p1023, %r5994, 4;
and.b32 %r5995, %r5994, 1;
setp.eq.b32 %p1024, %r5995, 1;
and.pred %p1025, %p1023, %p1024;
selp.b32 %r5996, 32, 0, %p1025;
or.b32 %r9499, %r5996, %r9499;
$L__BB1_811:
setp.ge.u32 %p1026, %r1749, %r6;
@%p1026 bra $L__BB1_813;
add.s32 %r5997, %r1753, %r1804;
cvt.u64.u32 %rd652, %r5997;
add.s64 %rd653, %rd652, %rd5;
shl.b64 %rd654, %rd653, 2;
add.s64 %rd655, %rd3, %rd654;
ld.global.u32 %r5998, [%rd655];
abs.s32 %r5999, %r5998;
setp.gt.u32 %p1027, %r5999, 4;
and.b32 %r6000, %r5999, 1;
setp.eq.b32 %p1028, %r6000, 1;
and.pred %p1029, %p1027, %p1028;
selp.b32 %r6001, 64, 0, %p1029;
or.b32 %r9499, %r6001, %r9499;
$L__BB1_813:
setp.ge.u32 %p1030, %r1750, %r6;
@%p1030 bra $L__BB1_815;
add.s32 %r6002, %r1752, %r1804;
cvt.u64.u32 %rd656, %r6002;
add.s64 %rd657, %rd656, %rd5;
shl.b64 %rd658, %rd657, 2;
add.s64 %rd659, %rd3, %rd658;
ld.global.u32 %r6003, [%rd659];
abs.s32 %r6004, %r6003;
setp.gt.u32 %p1031, %r6004, 4;
and.b32 %r6005, %r6004, 1;
setp.eq.b32 %p1032, %r6005, 1;
and.pred %p1033, %p1031, %p1032;
selp.b32 %r6006, 128, 0, %p1033;
or.b32 %r9499, %r6006, %r9499;
$L__BB1_815:
@%p898 bra $L__BB1_824;
setp.le.u32 %p1035, %r6, %r9435;
@%p1035 bra $L__BB1_818;
add.s32 %r6007, %r1751, %r1813;
cvt.u64.u32 %rd660, %r6007;
add.s64 %rd661, %rd660, %rd5;
shl.b64 %rd662, %rd661, 2;
add.s64 %rd663, %rd3, %rd662;
ld.global.u32 %r6008, [%rd663];
abs.s32 %r6009, %r6008;
setp.gt.u32 %p1036, %r6009, 4;
and.b32 %r6010, %r6009, 1;
setp.eq.b32 %p1037, %r6010, 1;
and.pred %p1038, %p1036, %p1037;
selp.b32 %r6011, 256, 0, %p1038;
or.b32 %r9499, %r6011, %r9499;
$L__BB1_818:
setp.ge.u32 %p1039, %r1748, %r6;
@%p1039 bra $L__BB1_820;
add.s32 %r6012, %r1754, %r1813;
cvt.u64.u32 %rd664, %r6012;
add.s64 %rd665, %rd664, %rd5;
shl.b64 %rd666, %rd665, 2;
add.s64 %rd667, %rd3, %rd666;
ld.global.u32 %r6013, [%rd667];
abs.s32 %r6014, %r6013;
setp.gt.u32 %p1040, %r6014, 4;
and.b32 %r6015, %r6014, 1;
setp.eq.b32 %p1041, %r6015, 1;
and.pred %p1042, %p1040, %p1041;
selp.b32 %r6016, 512, 0, %p1042;
or.b32 %r9499, %r6016, %r9499;
$L__BB1_820:
setp.ge.u32 %p1043, %r1749, %r6;
@%p1043 bra $L__BB1_822;
add.s32 %r6017, %r1753, %r1813;
cvt.u64.u32 %rd668, %r6017;
add.s64 %rd669, %rd668, %rd5;
shl.b64 %rd670, %rd669, 2;
add.s64 %rd671, %rd3, %rd670;
ld.global.u32 %r6018, [%rd671];
abs.s32 %r6019, %r6018;
setp.gt.u32 %p1044, %r6019, 4;
and.b32 %r6020, %r6019, 1;
setp.eq.b32 %p1045, %r6020, 1;
and.pred %p1046, %p1044, %p1045;
selp.b32 %r6021, 1024, 0, %p1046;
or.b32 %r9499, %r6021, %r9499;
$L__BB1_822:
setp.ge.u32 %p1047, %r1750, %r6;
@%p1047 bra $L__BB1_824;
add.s32 %r6022, %r1752, %r1813;
cvt.u64.u32 %rd672, %r6022;
add.s64 %rd673, %rd672, %rd5;
shl.b64 %rd674, %rd673, 2;
add.s64 %rd675, %rd3, %rd674;
ld.global.u32 %r6023, [%rd675];
abs.s32 %r6024, %r6023;
setp.gt.u32 %p1048, %r6024, 4;
and.b32 %r6025, %r6024, 1;
setp.eq.b32 %p1049, %r6025, 1;
and.pred %p1050, %p1048, %p1049;
selp.b32 %r6026, 2048, 0, %p1050;
or.b32 %r9499, %r6026, %r9499;
$L__BB1_824:
@%p915 bra $L__BB1_833;
setp.le.u32 %p1052, %r6, %r9435;
@%p1052 bra $L__BB1_827;
add.s32 %r6027, %r1751, %r1822;
cvt.u64.u32 %rd676, %r6027;
add.s64 %rd677, %rd676, %rd5;
shl.b64 %rd678, %rd677, 2;
add.s64 %rd679, %rd3, %rd678;
ld.global.u32 %r6028, [%rd679];
abs.s32 %r6029, %r6028;
setp.gt.u32 %p1053, %r6029, 4;
and.b32 %r6030, %r6029, 1;
setp.eq.b32 %p1054, %r6030, 1;
and.pred %p1055, %p1053, %p1054;
selp.b32 %r6031, 4096, 0, %p1055;
or.b32 %r9499, %r6031, %r9499;
$L__BB1_827:
setp.ge.u32 %p1056, %r1748, %r6;
@%p1056 bra $L__BB1_829;
add.s32 %r6032, %r1754, %r1822;
cvt.u64.u32 %rd680, %r6032;
add.s64 %rd681, %rd680, %rd5;
shl.b64 %rd682, %rd681, 2;
add.s64 %rd683, %rd3, %rd682;
ld.global.u32 %r6033, [%rd683];
abs.s32 %r6034, %r6033;
setp.gt.u32 %p1057, %r6034, 4;
and.b32 %r6035, %r6034, 1;
setp.eq.b32 %p1058, %r6035, 1;
and.pred %p1059, %p1057, %p1058;
selp.b32 %r6036, 8192, 0, %p1059;
or.b32 %r9499, %r6036, %r9499;
$L__BB1_829:
setp.ge.u32 %p1060, %r1749, %r6;
@%p1060 bra $L__BB1_831;
add.s32 %r6037, %r1753, %r1822;
cvt.u64.u32 %rd684, %r6037;
add.s64 %rd685, %rd684, %rd5;
shl.b64 %rd686, %rd685, 2;
add.s64 %rd687, %rd3, %rd686;
ld.global.u32 %r6038, [%rd687];
abs.s32 %r6039, %r6038;
setp.gt.u32 %p1061, %r6039, 4;
and.b32 %r6040, %r6039, 1;
setp.eq.b32 %p1062, %r6040, 1;
and.pred %p1063, %p1061, %p1062;
selp.b32 %r6041, 16384, 0, %p1063;
or.b32 %r9499, %r6041, %r9499;
$L__BB1_831:
setp.ge.u32 %p1064, %r1750, %r6;
@%p1064 bra $L__BB1_833;
add.s32 %r6042, %r1752, %r1822;
cvt.u64.u32 %rd688, %r6042;
add.s64 %rd689, %rd688, %rd5;
shl.b64 %rd690, %rd689, 2;
add.s64 %rd691, %rd3, %rd690;
ld.global.u32 %r6043, [%rd691];
abs.s32 %r6044, %r6043;
setp.gt.u32 %p1065, %r6044, 4;
and.b32 %r6045, %r6044, 1;
setp.eq.b32 %p1066, %r6045, 1;
and.pred %p1067, %p1065, %p1066;
selp.b32 %r6046, 32768, 0, %p1067;
or.b32 %r9499, %r6046, %r9499;
$L__BB1_833:
sub.s32 %r6049, %r5794, %r5;
shl.b32 %r6050, %r9499, 16;
or.b32 %r1897, %r6050, %r9483;
and.b32 %r6051, %r1831, -2004318072;
shr.u32 %r6052, %r6051, 3;
shl.b32 %r6053, %r1840, 3;
and.b32 %r6054, %r6053, -2004318072;
or.b32 %r1898, %r6054, %r6052;
not.b32 %r6055, %r1897;
setp.gt.s32 %p1068, %r6049, 0;
mov.u32 %r9515, 0;
shl.b32 %r6056, %r6049, 2;
selp.b32 %r6057, %r6056, 0, %p1068;
shr.u32 %r1899, %r1755, %r6057;
and.b32 %r1900, %r1899, %r6055;
@%p796 bra $L__BB1_842;
setp.le.u32 %p1070, %r6, %r9435;
mov.u32 %r9515, 0;
@%p1070 bra $L__BB1_836;
ld.global.u32 %r6059, [%rd26];
abs.s32 %r6060, %r6059;
setp.eq.s32 %p1071, %r6060, 3;
selp.u32 %r9515, 1, 0, %p1071;
$L__BB1_836:
setp.ge.u32 %p1072, %r1748, %r6;
@%p1072 bra $L__BB1_838;
ld.global.u32 %r6061, [%rd27];
abs.s32 %r6062, %r6061;
setp.eq.s32 %p1073, %r6062, 3;
selp.b32 %r6063, 2, 0, %p1073;
or.b32 %r9515, %r6063, %r9515;
$L__BB1_838:
setp.ge.u32 %p1074, %r1749, %r6;
@%p1074 bra $L__BB1_840;
ld.global.u32 %r6064, [%rd28];
abs.s32 %r6065, %r6064;
setp.eq.s32 %p1075, %r6065, 3;
selp.b32 %r6066, 4, 0, %p1075;
or.b32 %r9515, %r6066, %r9515;
$L__BB1_840:
setp.ge.u32 %p1076, %r1750, %r6;
@%p1076 bra $L__BB1_842;
ld.global.u32 %r6067, [%rd29];
abs.s32 %r6068, %r6067;
setp.eq.s32 %p1077, %r6068, 3;
selp.b32 %r6069, 8, 0, %p1077;
or.b32 %r9515, %r6069, %r9515;
$L__BB1_842:
@%p813 bra $L__BB1_851;
setp.le.u32 %p1079, %r6, %r9435;
@%p1079 bra $L__BB1_845;
ld.global.u32 %r6070, [%rd30];
abs.s32 %r6071, %r6070;
setp.eq.s32 %p1080, %r6071, 3;
selp.b32 %r6072, 16, 0, %p1080;
or.b32 %r9515, %r6072, %r9515;
$L__BB1_845:
setp.ge.u32 %p1081, %r1748, %r6;
@%p1081 bra $L__BB1_847;
ld.global.u32 %r6073, [%rd31];
abs.s32 %r6074, %r6073;
setp.eq.s32 %p1082, %r6074, 3;
selp.b32 %r6075, 32, 0, %p1082;
or.b32 %r9515, %r6075, %r9515;
$L__BB1_847:
setp.ge.u32 %p1083, %r1749, %r6;
@%p1083 bra $L__BB1_849;
ld.global.u32 %r6076, [%rd32];
abs.s32 %r6077, %r6076;
setp.eq.s32 %p1084, %r6077, 3;
selp.b32 %r6078, 64, 0, %p1084;
or.b32 %r9515, %r6078, %r9515;
$L__BB1_849:
setp.ge.u32 %p1085, %r1750, %r6;
@%p1085 bra $L__BB1_851;
ld.global.u32 %r6079, [%rd33];
abs.s32 %r6080, %r6079;
setp.eq.s32 %p1086, %r6080, 3;
selp.b32 %r6081, 128, 0, %p1086;
or.b32 %r9515, %r6081, %r9515;
$L__BB1_851:
@%p830 bra $L__BB1_860;
setp.le.u32 %p1088, %r6, %r9435;
@%p1088 bra $L__BB1_854;
ld.global.u32 %r6082, [%rd34];
abs.s32 %r6083, %r6082;
setp.eq.s32 %p1089, %r6083, 3;
selp.b32 %r6084, 256, 0, %p1089;
or.b32 %r9515, %r6084, %r9515;
$L__BB1_854:
setp.ge.u32 %p1090, %r1748, %r6;
@%p1090 bra $L__BB1_856;
ld.global.u32 %r6085, [%rd35];
abs.s32 %r6086, %r6085;
setp.eq.s32 %p1091, %r6086, 3;
selp.b32 %r6087, 512, 0, %p1091;
or.b32 %r9515, %r6087, %r9515;
$L__BB1_856:
setp.ge.u32 %p1092, %r1749, %r6;
@%p1092 bra $L__BB1_858;
ld.global.u32 %r6088, [%rd36];
abs.s32 %r6089, %r6088;
setp.eq.s32 %p1093, %r6089, 3;
selp.b32 %r6090, 1024, 0, %p1093;
or.b32 %r9515, %r6090, %r9515;
$L__BB1_858:
setp.ge.u32 %p1094, %r1750, %r6;
@%p1094 bra $L__BB1_860;
ld.global.u32 %r6091, [%rd37];
abs.s32 %r6092, %r6091;
setp.eq.s32 %p1095, %r6092, 3;
selp.b32 %r6093, 2048, 0, %p1095;
or.b32 %r9515, %r6093, %r9515;
$L__BB1_860:
@%p847 bra $L__BB1_869;
setp.le.u32 %p1097, %r6, %r9435;
@%p1097 bra $L__BB1_863;
ld.global.u32 %r6094, [%rd38];
abs.s32 %r6095, %r6094;
setp.eq.s32 %p1098, %r6095, 3;
selp.b32 %r6096, 4096, 0, %p1098;
or.b32 %r9515, %r6096, %r9515;
$L__BB1_863:
setp.ge.u32 %p1099, %r1748, %r6;
@%p1099 bra $L__BB1_865;
ld.global.u32 %r6097, [%rd39];
abs.s32 %r6098, %r6097;
setp.eq.s32 %p1100, %r6098, 3;
selp.b32 %r6099, 8192, 0, %p1100;
or.b32 %r9515, %r6099, %r9515;
$L__BB1_865:
setp.ge.u32 %p1101, %r1749, %r6;
@%p1101 bra $L__BB1_867;
ld.global.u32 %r6100, [%rd40];
abs.s32 %r6101, %r6100;
setp.eq.s32 %p1102, %r6101, 3;
selp.b32 %r6102, 16384, 0, %p1102;
or.b32 %r9515, %r6102, %r9515;
$L__BB1_867:
setp.ge.u32 %p1103, %r1750, %r6;
@%p1103 bra $L__BB1_869;
ld.global.u32 %r6103, [%rd41];
abs.s32 %r6104, %r6103;
setp.eq.s32 %p1104, %r6104, 3;
selp.b32 %r6105, 32768, 0, %p1104;
or.b32 %r9515, %r6105, %r9515;
$L__BB1_869:
and.b32 %r6107, %r1897, -286331154;
shr.u32 %r6108, %r6107, 1;
shl.b32 %r6109, %r1897, 1;
and.b32 %r6110, %r6109, -286331154;
or.b32 %r6111, %r1897, %r1898;
or.b32 %r6112, %r6111, %r6110;
or.b32 %r6113, %r6112, %r6108;
and.b32 %r1933, %r9515, %r1899;
shr.u32 %r6114, %r6113, 4;
shl.b32 %r6115, %r6113, 4;
shr.u32 %r6116, %r9440, 12;
or.b32 %r6117, %r6113, %r6116;
or.b32 %r6118, %r6117, %r6115;
or.b32 %r6119, %r6118, %r6114;
and.b32 %r9525, %r1900, %r6119;
setp.eq.s32 %p1105, %r9525, 0;
mov.u32 %r6106, 0;
mov.u32 %r9546, %r6106;
@%p1105 bra $L__BB1_924;
mov.u32 %r9524, 0;
mov.u32 %r9526, %r9524;
mov.u32 %r9527, %r9545;
$L__BB1_871:
brev.b32 %r6122, %r9525;
bfind.shiftamt.u32 %r1941, %r6122;
mov.pred %p2370, -1;
mov.u32 %r6123, 1;
shl.b32 %r1942, %r6123, %r1941;
mov.u32 %r6124, -2;
shf.l.wrap.b32 %r6125, %r6124, %r6124, %r1941;
and.b32 %r9525, %r9525, %r6125;
or.b32 %r9524, %r1942, %r9524;
and.b32 %r1945, %r1942, %r1933;
setp.ne.s32 %p1107, %r1945, 0;
selp.u32 %r6126, 1, 0, %p1107;
setp.eq.s32 %p1108, %r9543, 0;
selp.b32 %r6127, 8, 7, %p1108;
shl.b32 %r6128, %r6126, %r9541;
cvt.u16.u32 %rs772, %r6128;
or.b16 %rs1224, %rs1224, %rs772;
add.s32 %r9541, %r9541, 1;
setp.lt.u32 %p1109, %r9541, %r6127;
mov.pred %p2368, %p2370;
@%p1109 bra $L__BB1_874;
setp.eq.s32 %p1111, %r9527, -1;
mov.u32 %r9545, -1;
mov.pred %p2368, 0;
@%p1111 bra $L__BB1_874;
and.b16 %rs774, %rs1224, 255;
setp.eq.s16 %p1113, %rs774, 255;
selp.u32 %r9543, 1, 0, %p1113;
add.s32 %r9545, %r9527, 1;
mov.u32 %r9541, 0;
mov.u16 %rs1224, 0;
mov.pred %p2368, %p2370;
$L__BB1_874:
mov.u32 %r9550, 0;
not.pred %p1115, %p2368;
@%p1115 bra $L__BB1_928;
setp.eq.s32 %p1116, %r1945, 0;
@%p1116 bra $L__BB1_916;
or.b32 %r9526, %r1942, %r9526;
mov.u32 %r9533, 51;
setp.gt.s32 %p1117, %r1941, 7;
@%p1117 bra $L__BB1_892;
setp.gt.s32 %p1129, %r1941, 3;
@%p1129 bra $L__BB1_885;
setp.gt.s32 %p1135, %r1941, 1;
@%p1135 bra $L__BB1_882;
setp.eq.s32 %p1138, %r1941, 0;
@%p1138 bra $L__BB1_915;
setp.eq.s32 %p1139, %r1941, 1;
@%p1139 bra $L__BB1_881;
bra.uni $L__BB1_914;
$L__BB1_881:
mov.u32 %r9533, 118;
bra.uni $L__BB1_915;
$L__BB1_892:
setp.gt.s32 %p1118, %r1941, 11;
@%p1118 bra $L__BB1_900;
setp.gt.s32 %p1124, %r1941, 9;
@%p1124 bra $L__BB1_897;
setp.eq.s32 %p1127, %r1941, 8;
@%p1127 bra $L__BB1_910;
setp.eq.s32 %p1128, %r1941, 9;
@%p1128 bra $L__BB1_896;
bra.uni $L__BB1_914;
$L__BB1_896:
mov.u32 %r9533, 30208;
bra.uni $L__BB1_915;
$L__BB1_885:
setp.gt.s32 %p1130, %r1941, 5;
@%p1130 bra $L__BB1_889;
setp.eq.s32 %p1133, %r1941, 4;
@%p1133 bra $L__BB1_912;
setp.eq.s32 %p1134, %r1941, 5;
@%p1134 bra $L__BB1_888;
bra.uni $L__BB1_914;
$L__BB1_888:
mov.u32 %r9533, 1888;
bra.uni $L__BB1_915;
$L__BB1_900:
setp.gt.s32 %p1119, %r1941, 13;
@%p1119 bra $L__BB1_904;
setp.eq.s32 %p1122, %r1941, 12;
@%p1122 bra $L__BB1_908;
setp.eq.s32 %p1123, %r1941, 13;
@%p1123 bra $L__BB1_903;
bra.uni $L__BB1_914;
$L__BB1_903:
mov.u32 %r9533, 483328;
bra.uni $L__BB1_915;
$L__BB1_882:
setp.eq.s32 %p1136, %r1941, 2;
@%p1136 bra $L__BB1_913;
setp.eq.s32 %p1137, %r1941, 3;
@%p1137 bra $L__BB1_884;
bra.uni $L__BB1_914;
$L__BB1_884:
mov.u32 %r9533, 200;
bra.uni $L__BB1_915;
$L__BB1_897:
setp.eq.s32 %p1125, %r1941, 10;
@%p1125 bra $L__BB1_909;
setp.eq.s32 %p1126, %r1941, 11;
@%p1126 bra $L__BB1_899;
bra.uni $L__BB1_914;
$L__BB1_899:
mov.u32 %r9533, 51200;
bra.uni $L__BB1_915;
$L__BB1_889:
setp.eq.s32 %p1131, %r1941, 6;
@%p1131 bra $L__BB1_911;
setp.eq.s32 %p1132, %r1941, 7;
@%p1132 bra $L__BB1_891;
bra.uni $L__BB1_914;
$L__BB1_891:
mov.u32 %r9533, 3200;
bra.uni $L__BB1_915;
$L__BB1_904:
setp.eq.s32 %p1120, %r1941, 14;
@%p1120 bra $L__BB1_907;
setp.ne.s32 %p1121, %r1941, 15;
@%p1121 bra $L__BB1_914;
mov.u32 %r9533, 819200;
bra.uni $L__BB1_915;
$L__BB1_910:
mov.u32 %r9533, 13056;
bra.uni $L__BB1_915;
$L__BB1_912:
mov.u32 %r9533, 816;
bra.uni $L__BB1_915;
$L__BB1_908:
mov.u32 %r9533, 208896;
bra.uni $L__BB1_915;
$L__BB1_913:
mov.u32 %r9533, 236;
bra.uni $L__BB1_915;
$L__BB1_909:
mov.u32 %r9533, 60416;
bra.uni $L__BB1_915;
$L__BB1_911:
mov.u32 %r9533, 3776;
bra.uni $L__BB1_915;
$L__BB1_907:
mov.u32 %r9533, 966656;
bra.uni $L__BB1_915;
$L__BB1_914:
mov.u32 %r9533, 0;
$L__BB1_915:
not.b32 %r6149, %r9524;
and.b32 %r6150, %r1900, %r6149;
and.b32 %r6151, %r6150, %r9533;
or.b32 %r9525, %r6151, %r9525;
$L__BB1_916:
setp.ne.s32 %p1140, %r9525, 0;
mov.u32 %r9527, %r9545;
@%p1140 bra $L__BB1_871;
setp.eq.s32 %p1141, %r9526, 0;
mov.u32 %r9546, 0;
@%p1141 bra $L__BB1_924;
mov.u32 %r9542, %r9545;
mov.u32 %r9539, %r9526;
$L__BB1_919:
mov.u32 %r9545, %r9542;
setp.eq.s32 %p1142, %r9539, 0;
mov.u32 %r9546, %r9526;
@%p1142 bra $L__BB1_924;
brev.b32 %r6153, %r9539;
bfind.shiftamt.u32 %r6154, %r6153;
mov.pred %p2370, -1;
mov.u32 %r6155, -2;
shf.l.wrap.b32 %r6156, %r6155, %r6155, %r6154;
and.b32 %r9539, %r9539, %r6156;
shr.u32 %r6157, %r6154, 2;
and.b32 %r6158, %r6154, 3;
add.s32 %r6159, %r6158, %r9435;
add.s32 %r6160, %r6157, %r9439;
mad.lo.s32 %r6161, %r6159, %r1, %r6160;
cvt.u64.u32 %rd692, %r6161;
add.s64 %rd693, %rd692, %rd5;
shl.b64 %rd694, %rd693, 2;
add.s64 %rd695, %rd3, %rd694;
ld.global.u32 %r6162, [%rd695];
shr.u32 %r6163, %r6162, 31;
setp.eq.s32 %p1144, %r9543, 0;
selp.b32 %r6164, 8, 7, %p1144;
shl.b32 %r6165, %r6163, %r9541;
cvt.u16.u32 %rs775, %r6165;
or.b16 %rs1224, %rs1224, %rs775;
add.s32 %r9541, %r9541, 1;
setp.lt.u32 %p1145, %r9541, %r6164;
mov.pred %p2369, %p2370;
mov.u32 %r9542, %r9545;
@%p1145 bra $L__BB1_923;
setp.eq.s32 %p1147, %r9545, -1;
mov.u32 %r9542, -1;
mov.pred %p2369, 0;
@%p1147 bra $L__BB1_923;
and.b16 %rs777, %rs1224, 255;
setp.eq.s16 %p1149, %rs777, 255;
selp.u32 %r9543, 1, 0, %p1149;
add.s32 %r9542, %r9545, 1;
mov.u32 %r9541, 0;
mov.u16 %rs1224, 0;
mov.pred %p2369, %p2370;
$L__BB1_923:
mov.u32 %r9550, 0;
@%p2369 bra $L__BB1_919;
bra.uni $L__BB1_928;
$L__BB1_924:
not.b32 %r6170, %r9546;
and.b32 %r6171, %r1933, %r6170;
setp.ne.s32 %p1152, %r6171, 0;
mov.u32 %r9550, %r6106;
mov.pred %p2370, %p790;
@%p1152 bra $L__BB1_928;
setp.lt.u32 %p1153, %r5794, %r5;
or.b32 %r6172, %r9546, %r1897;
st.local.u16 [%rd25], %r6172;
shr.u32 %r6173, %r6172, 16;
st.local.u16 [%rd25+2], %r6173;
shl.b32 %r6174, %r6172, 1;
and.b32 %r6175, %r6174, 57344;
and.b32 %r6176, %r6172, 57344;
shr.u32 %r6177, %r6176, 1;
or.b32 %r6178, %r6172, %r1898;
and.b32 %r6179, %r6178, 61440;
or.b32 %r6180, %r6179, %r6175;
or.b32 %r9440, %r6180, %r6177;
mov.u32 %r9439, %r5794;
@%p1153 bra $L__BB1_689;
$L__BB1_926:
add.s32 %r9435, %r9435, 4;
setp.gt.u32 %p1154, %r6, %r9435;
@%p1154 bra $L__BB1_687;
setp.eq.s32 %p1155, %r9541, 0;
add.s32 %r6181, %r9545, 1;
setp.eq.s32 %p1156, %r9545, -1;
selp.b32 %r6182, -1, %r6181, %p1156;
selp.b32 %r6183, %r9545, %r6182, %p1155;
setp.ne.s32 %p1157, %r9545, -1;
or.pred %p1158, %p1155, %p1157;
selp.b32 %r9550, %r6183, 0, %p1158;
not.pred %p2370, %p1158;
$L__BB1_928:
@%p2370 bra $L__BB1_930;
bra.uni $L__BB1_929;
$L__BB1_930:
mov.u32 %r6191, 2;
st.global.u32 [%rd6], %r6191;
mov.u32 %r6192, 6;
st.global.u32 [%rd6+4], %r6192;
mov.u32 %r6193, 0;
st.global.u32 [%rd6+8], %r6193;
st.global.u32 [%rd6+12], %r6193;
st.global.u32 [%rd6+16], %r6193;
st.global.u32 [%rd6+20], %r6193;
st.global.u32 [%rd6+24], %r6193;
st.global.u32 [%rd6+28], %r6193;
bra.uni $L__BB1_1905;
$L__BB1_931:
mov.u32 %r9551, 0;
mov.u32 %r9552, %r9551;
mov.u32 %r9553, %r9551;
bra.uni $L__BB1_932;
$L__BB1_929:
mad.lo.s32 %r6184, %r6, %r5, 7;
shr.u32 %r6185, %r6184, 3;
max.u32 %r9551, %r6185, %r9550;
add.s32 %r6186, %r8423, 6;
mul.wide.u32 %rd696, %r6186, 613566757;
shr.u64 %rd697, %rd696, 32;
cvt.u32.u64 %r6187, %rd697;
sub.s32 %r6188, %r6186, %r6187;
shr.u32 %r6189, %r6188, 1;
add.s32 %r6190, %r6189, %r6187;
shr.u32 %r9552, %r6190, 2;
add.s32 %r9553, %r9551, %r9552;
$L__BB1_932:
add.s32 %r1986, %r9553, %r1735;
setp.gt.u32 %p1159, %r1986, %r3;
setp.lt.u32 %p1160, %r1735, 2;
or.pred %p1161, %p1160, %p1159;
@%p1161 bra $L__BB1_1247;
bra.uni $L__BB1_933;
$L__BB1_1247:
mov.u32 %r6772, 1;
st.global.u32 [%rd6], %r6772;
mov.u32 %r6773, 4;
st.global.u32 [%rd6+4], %r6773;
mov.u32 %r6774, 0;
st.global.u32 [%rd6+8], %r6774;
st.global.u32 [%rd6+12], %r6774;
st.global.u32 [%rd6+16], %r6774;
st.global.u32 [%rd6+20], %r6774;
st.global.u32 [%rd6+24], %r6774;
st.global.u32 [%rd6+28], %r6774;
bra.uni $L__BB1_1905;
$L__BB1_933:
setp.eq.s32 %p1162, %r8514, 0;
@%p1162 bra $L__BB1_939;
add.s32 %r6198, %r8514, -1;
and.b32 %r9558, %r8514, 3;
setp.lt.u32 %p1163, %r6198, 3;
mov.u32 %r9556, 0;
@%p1163 bra $L__BB1_937;
sub.s32 %r9555, %r8514, %r9558;
mov.u32 %r9556, 0;
$L__BB1_936:
add.s32 %r6200, %r9556, 17477;
cvt.u64.u32 %rd698, %r6200;
add.s64 %rd699, %rd698, %rd4;
add.s64 %rd700, %rd1, %rd699;
ld.global.u8 %rs778, [%rd700];
add.s32 %r6201, %r9556, %r9150;
cvt.u64.u32 %rd701, %r6201;
add.s64 %rd702, %rd701, %rd4;
add.s64 %rd703, %rd1, %rd702;
st.global.u8 [%rd703], %rs778;
ld.global.u8 %rs779, [%rd700+1];
add.s32 %r6202, %r6201, 1;
cvt.u64.u32 %rd704, %r6202;
add.s64 %rd705, %rd704, %rd4;
add.s64 %rd706, %rd1, %rd705;
st.global.u8 [%rd706], %rs779;
ld.global.u8 %rs780, [%rd700+2];
add.s32 %r6203, %r6201, 2;
cvt.u64.u32 %rd707, %r6203;
add.s64 %rd708, %rd707, %rd4;
add.s64 %rd709, %rd1, %rd708;
st.global.u8 [%rd709], %rs780;
add.s32 %r6204, %r9556, 17480;
cvt.u64.u32 %rd710, %r6204;
add.s64 %rd711, %rd710, %rd4;
add.s64 %rd712, %rd1, %rd711;
ld.global.u8 %rs781, [%rd712];
add.s32 %r6205, %r6201, 3;
cvt.u64.u32 %rd713, %r6205;
add.s64 %rd714, %rd713, %rd4;
add.s64 %rd715, %rd1, %rd714;
st.global.u8 [%rd715], %rs781;
add.s32 %r9556, %r9556, 4;
add.s32 %r9555, %r9555, -4;
setp.ne.s32 %p1164, %r9555, 0;
@%p1164 bra $L__BB1_936;
$L__BB1_937:
setp.eq.s32 %p1165, %r9558, 0;
@%p1165 bra $L__BB1_939;
$L__BB1_938:
.pragma "nounroll";
add.s32 %r6206, %r9556, 17477;
cvt.u64.u32 %rd716, %r6206;
add.s64 %rd717, %rd716, %rd4;
add.s64 %rd718, %rd1, %rd717;
ld.global.u8 %rs782, [%rd718];
add.s32 %r6207, %r9556, %r9150;
cvt.u64.u32 %rd719, %r6207;
add.s64 %rd720, %rd719, %rd4;
add.s64 %rd721, %rd1, %rd720;
st.global.u8 [%rd721], %rs782;
add.s32 %r9556, %r9556, 1;
add.s32 %r9558, %r9558, -1;
setp.ne.s32 %p1166, %r9558, 0;
@%p1166 bra $L__BB1_938;
$L__BB1_939:
setp.eq.s32 %p1167, %r8962, 0;
@%p1167 bra $L__BB1_945;
mov.u32 %r6209, 20549;
sub.s32 %r1998, %r6209, %r8962;
and.b32 %r9563, %r8962, 3;
add.s32 %r6210, %r8962, -1;
setp.lt.u32 %p1168, %r6210, 3;
mov.u32 %r9561, 0;
@%p1168 bra $L__BB1_943;
sub.s32 %r9560, %r8962, %r9563;
mov.u32 %r9561, 0;
$L__BB1_942:
add.s32 %r6212, %r1998, %r9561;
cvt.u64.u32 %rd722, %r6212;
add.s64 %rd723, %rd722, %rd4;
add.s64 %rd724, %rd1, %rd723;
ld.global.u8 %rs783, [%rd724];
add.s32 %r6213, %r9561, %r1734;
cvt.u64.u32 %rd725, %r6213;
add.s64 %rd726, %rd725, %rd4;
add.s64 %rd727, %rd1, %rd726;
st.global.u8 [%rd727], %rs783;
add.s32 %r6214, %r9561, 1;
add.s32 %r6215, %r1998, %r6214;
cvt.u64.u32 %rd728, %r6215;
add.s64 %rd729, %rd728, %rd4;
add.s64 %rd730, %rd1, %rd729;
ld.global.u8 %rs784, [%rd730];
add.s32 %r6216, %r6214, %r1734;
cvt.u64.u32 %rd731, %r6216;
add.s64 %rd732, %rd731, %rd4;
add.s64 %rd733, %rd1, %rd732;
st.global.u8 [%rd733], %rs784;
add.s32 %r6217, %r9561, 2;
add.s32 %r6218, %r1998, %r6217;
cvt.u64.u32 %rd734, %r6218;
add.s64 %rd735, %rd734, %rd4;
add.s64 %rd736, %rd1, %rd735;
ld.global.u8 %rs785, [%rd736];
add.s32 %r6219, %r6217, %r1734;
cvt.u64.u32 %rd737, %r6219;
add.s64 %rd738, %rd737, %rd4;
add.s64 %rd739, %rd1, %rd738;
st.global.u8 [%rd739], %rs785;
add.s32 %r6220, %r9561, 3;
add.s32 %r6221, %r1998, %r6220;
cvt.u64.u32 %rd740, %r6221;
add.s64 %rd741, %rd740, %rd4;
add.s64 %rd742, %rd1, %rd741;
ld.global.u8 %rs786, [%rd742];
add.s32 %r6222, %r6220, %r1734;
cvt.u64.u32 %rd743, %r6222;
add.s64 %rd744, %rd743, %rd4;
add.s64 %rd745, %rd1, %rd744;
st.global.u8 [%rd745], %rs786;
add.s32 %r9561, %r9561, 4;
add.s32 %r9560, %r9560, -4;
setp.ne.s32 %p1169, %r9560, 0;
@%p1169 bra $L__BB1_942;
$L__BB1_943:
setp.eq.s32 %p1170, %r9563, 0;
@%p1170 bra $L__BB1_945;
$L__BB1_944:
.pragma "nounroll";
add.s32 %r6223, %r1998, %r9561;
cvt.u64.u32 %rd746, %r6223;
add.s64 %rd747, %rd746, %rd4;
add.s64 %rd748, %rd1, %rd747;
ld.global.u8 %rs787, [%rd748];
add.s32 %r6224, %r9561, %r1734;
cvt.u64.u32 %rd749, %r6224;
add.s64 %rd750, %rd749, %rd4;
add.s64 %rd751, %rd1, %rd750;
st.global.u8 [%rd751], %rs787;
add.s32 %r9561, %r9561, 1;
add.s32 %r9563, %r9563, -1;
setp.ne.s32 %p1171, %r9563, 0;
@%p1171 bra $L__BB1_944;
$L__BB1_945:
add.s32 %r6225, %r8962, %r8514;
shr.u32 %r6226, %r6225, 4;
add.s32 %r6227, %r1735, -1;
cvt.u64.u32 %rd752, %r6227;
add.s64 %rd753, %rd752, %rd4;
add.s64 %rd754, %rd1, %rd753;
st.global.u8 [%rd754], %r6226;
add.s32 %r6228, %r1735, -2;
cvt.u64.u32 %rd755, %r6228;
add.s64 %rd756, %rd755, %rd4;
add.s64 %rd757, %rd1, %rd756;
ld.global.u8 %rs788, [%rd757];
and.b16 %rs789, %rs788, 240;
cvt.u16.u32 %rs790, %r6225;
and.b16 %rs791, %rs790, 15;
or.b16 %rs792, %rs789, %rs791;
st.global.u8 [%rd757], %rs792;
setp.eq.s32 %p1172, %r9553, 0;
@%p1172 bra $L__BB1_951;
add.s32 %r6230, %r9553, -1;
and.b32 %r9568, %r9553, 3;
setp.lt.u32 %p1173, %r6230, 3;
mov.u32 %r9566, 0;
@%p1173 bra $L__BB1_949;
sub.s32 %r9565, %r9553, %r9568;
mov.u32 %r9566, 0;
$L__BB1_948:
add.s32 %r6232, %r9566, %r1735;
cvt.u64.u32 %rd758, %r6232;
add.s64 %rd759, %rd758, %rd4;
add.s64 %rd760, %rd1, %rd759;
mov.u16 %rs793, 0;
st.global.u8 [%rd760], %rs793;
add.s32 %r6233, %r6232, 1;
cvt.u64.u32 %rd761, %r6233;
add.s64 %rd762, %rd761, %rd4;
add.s64 %rd763, %rd1, %rd762;
st.global.u8 [%rd763], %rs793;
add.s32 %r6234, %r6232, 2;
cvt.u64.u32 %rd764, %r6234;
add.s64 %rd765, %rd764, %rd4;
add.s64 %rd766, %rd1, %rd765;
st.global.u8 [%rd766], %rs793;
add.s32 %r6235, %r6232, 3;
cvt.u64.u32 %rd767, %r6235;
add.s64 %rd768, %rd767, %rd4;
add.s64 %rd769, %rd1, %rd768;
st.global.u8 [%rd769], %rs793;
add.s32 %r9566, %r9566, 4;
add.s32 %r9565, %r9565, -4;
setp.ne.s32 %p1174, %r9565, 0;
@%p1174 bra $L__BB1_948;
$L__BB1_949:
setp.eq.s32 %p1175, %r9568, 0;
@%p1175 bra $L__BB1_951;
$L__BB1_950:
.pragma "nounroll";
add.s32 %r6236, %r9566, %r1735;
cvt.u64.u32 %rd770, %r6236;
add.s64 %rd771, %rd770, %rd4;
add.s64 %rd772, %rd1, %rd771;
mov.u16 %rs794, 0;
st.global.u8 [%rd772], %rs794;
add.s32 %r9566, %r9566, 1;
add.s32 %r9568, %r9568, -1;
setp.ne.s32 %p1176, %r9568, 0;
@%p1176 bra $L__BB1_950;
$L__BB1_951:
setp.ne.s32 %p1177, %r4, 3;
@%p1177 bra $L__BB1_1243;
ld.param.u64 %rd1412, [ j2k_htj2k_encode_codeblocks_param_1];
cvt.u64.u32 %rd773, %r1735;
add.s64 %rd42, %rd773, %rd4;
add.s64 %rd43, %rd1412, %rd42;
add.s32 %r6237, %r5, 3;
shr.u32 %r6238, %r6237, 2;
add.s32 %r6239, %r6238, 8;
setp.gt.u32 %p1179, %r6239, 513;
mov.pred %p1178, -1;
mov.pred %p2373, %p1178;
@%p1179 bra $L__BB1_1202;
mov.u16 %rs1232, 0;
st.local.u16 [%rd23], %rs1232;
st.local.u16 [%rd23+2], %rs1232;
st.local.u16 [%rd23+4], %rs1232;
st.local.u16 [%rd23+6], %rs1232;
st.local.u16 [%rd23+8], %rs1232;
st.local.u16 [%rd23+10], %rs1232;
st.local.u16 [%rd23+12], %rs1232;
st.local.u16 [%rd23+14], %rs1232;
st.local.u16 [%rd23+16], %rs1232;
st.local.u16 [%rd23+18], %rs1232;
st.local.u16 [%rd23+20], %rs1232;
st.local.u16 [%rd23+22], %rs1232;
st.local.u16 [%rd23+24], %rs1232;
st.local.u16 [%rd23+26], %rs1232;
st.local.u16 [%rd23+28], %rs1232;
st.local.u16 [%rd23+30], %rs1232;
st.local.u16 [%rd23+32], %rs1232;
st.local.u16 [%rd23+34], %rs1232;
st.local.u16 [%rd23+36], %rs1232;
st.local.u16 [%rd23+38], %rs1232;
st.local.u16 [%rd23+40], %rs1232;
st.local.u16 [%rd23+42], %rs1232;
st.local.u16 [%rd23+44], %rs1232;
st.local.u16 [%rd23+46], %rs1232;
st.local.u16 [%rd23+48], %rs1232;
st.local.u16 [%rd23+50], %rs1232;
st.local.u16 [%rd23+52], %rs1232;
st.local.u16 [%rd23+54], %rs1232;
st.local.u16 [%rd23+56], %rs1232;
st.local.u16 [%rd23+58], %rs1232;
st.local.u16 [%rd23+60], %rs1232;
st.local.u16 [%rd23+62], %rs1232;
st.local.u16 [%rd23+64], %rs1232;
st.local.u16 [%rd23+66], %rs1232;
st.local.u16 [%rd23+68], %rs1232;
st.local.u16 [%rd23+70], %rs1232;
st.local.u16 [%rd23+72], %rs1232;
st.local.u16 [%rd23+74], %rs1232;
st.local.u16 [%rd23+76], %rs1232;
st.local.u16 [%rd23+78], %rs1232;
st.local.u16 [%rd23+80], %rs1232;
st.local.u16 [%rd23+82], %rs1232;
st.local.u16 [%rd23+84], %rs1232;
st.local.u16 [%rd23+86], %rs1232;
st.local.u16 [%rd23+88], %rs1232;
st.local.u16 [%rd23+90], %rs1232;
st.local.u16 [%rd23+92], %rs1232;
st.local.u16 [%rd23+94], %rs1232;
st.local.u16 [%rd23+96], %rs1232;
st.local.u16 [%rd23+98], %rs1232;
st.local.u16 [%rd23+100], %rs1232;
st.local.u16 [%rd23+102], %rs1232;
st.local.u16 [%rd23+104], %rs1232;
st.local.u16 [%rd23+106], %rs1232;
st.local.u16 [%rd23+108], %rs1232;
st.local.u16 [%rd23+110], %rs1232;
st.local.u16 [%rd23+112], %rs1232;
st.local.u16 [%rd23+114], %rs1232;
st.local.u16 [%rd23+116], %rs1232;
st.local.u16 [%rd23+118], %rs1232;
st.local.u16 [%rd23+120], %rs1232;
st.local.u16 [%rd23+122], %rs1232;
st.local.u16 [%rd23+124], %rs1232;
st.local.u16 [%rd23+126], %rs1232;
st.local.u16 [%rd23+128], %rs1232;
st.local.u16 [%rd23+130], %rs1232;
st.local.u16 [%rd23+132], %rs1232;
st.local.u16 [%rd23+134], %rs1232;
st.local.u16 [%rd23+136], %rs1232;
st.local.u16 [%rd23+138], %rs1232;
st.local.u16 [%rd23+140], %rs1232;
st.local.u16 [%rd23+142], %rs1232;
st.local.u16 [%rd23+144], %rs1232;
st.local.u16 [%rd23+146], %rs1232;
st.local.u16 [%rd23+148], %rs1232;
st.local.u16 [%rd23+150], %rs1232;
st.local.u16 [%rd23+152], %rs1232;
st.local.u16 [%rd23+154], %rs1232;
st.local.u16 [%rd23+156], %rs1232;
st.local.u16 [%rd23+158], %rs1232;
st.local.u16 [%rd23+160], %rs1232;
st.local.u16 [%rd23+162], %rs1232;
st.local.u16 [%rd23+164], %rs1232;
st.local.u16 [%rd23+166], %rs1232;
st.local.u16 [%rd23+168], %rs1232;
st.local.u16 [%rd23+170], %rs1232;
st.local.u16 [%rd23+172], %rs1232;
st.local.u16 [%rd23+174], %rs1232;
st.local.u16 [%rd23+176], %rs1232;
st.local.u16 [%rd23+178], %rs1232;
st.local.u16 [%rd23+180], %rs1232;
st.local.u16 [%rd23+182], %rs1232;
st.local.u16 [%rd23+184], %rs1232;
st.local.u16 [%rd23+186], %rs1232;
st.local.u16 [%rd23+188], %rs1232;
st.local.u16 [%rd23+190], %rs1232;
st.local.u16 [%rd23+192], %rs1232;
st.local.u16 [%rd23+194], %rs1232;
st.local.u16 [%rd23+196], %rs1232;
st.local.u16 [%rd23+198], %rs1232;
st.local.u16 [%rd23+200], %rs1232;
st.local.u16 [%rd23+202], %rs1232;
st.local.u16 [%rd23+204], %rs1232;
st.local.u16 [%rd23+206], %rs1232;
st.local.u16 [%rd23+208], %rs1232;
st.local.u16 [%rd23+210], %rs1232;
st.local.u16 [%rd23+212], %rs1232;
st.local.u16 [%rd23+214], %rs1232;
st.local.u16 [%rd23+216], %rs1232;
st.local.u16 [%rd23+218], %rs1232;
st.local.u16 [%rd23+220], %rs1232;
st.local.u16 [%rd23+222], %rs1232;
st.local.u16 [%rd23+224], %rs1232;
st.local.u16 [%rd23+226], %rs1232;
st.local.u16 [%rd23+228], %rs1232;
st.local.u16 [%rd23+230], %rs1232;
st.local.u16 [%rd23+232], %rs1232;
st.local.u16 [%rd23+234], %rs1232;
st.local.u16 [%rd23+236], %rs1232;
st.local.u16 [%rd23+238], %rs1232;
st.local.u16 [%rd23+240], %rs1232;
st.local.u16 [%rd23+242], %rs1232;
st.local.u16 [%rd23+244], %rs1232;
st.local.u16 [%rd23+246], %rs1232;
st.local.u16 [%rd23+248], %rs1232;
st.local.u16 [%rd23+250], %rs1232;
st.local.u16 [%rd23+252], %rs1232;
st.local.u16 [%rd23+254], %rs1232;
st.local.u16 [%rd23+256], %rs1232;
st.local.u16 [%rd23+258], %rs1232;
st.local.u16 [%rd23+260], %rs1232;
st.local.u16 [%rd23+262], %rs1232;
st.local.u16 [%rd23+264], %rs1232;
st.local.u16 [%rd23+266], %rs1232;
st.local.u16 [%rd23+268], %rs1232;
st.local.u16 [%rd23+270], %rs1232;
st.local.u16 [%rd23+272], %rs1232;
st.local.u16 [%rd23+274], %rs1232;
st.local.u16 [%rd23+276], %rs1232;
st.local.u16 [%rd23+278], %rs1232;
st.local.u16 [%rd23+280], %rs1232;
st.local.u16 [%rd23+282], %rs1232;
st.local.u16 [%rd23+284], %rs1232;
st.local.u16 [%rd23+286], %rs1232;
st.local.u16 [%rd23+288], %rs1232;
st.local.u16 [%rd23+290], %rs1232;
st.local.u16 [%rd23+292], %rs1232;
st.local.u16 [%rd23+294], %rs1232;
st.local.u16 [%rd23+296], %rs1232;
st.local.u16 [%rd23+298], %rs1232;
st.local.u16 [%rd23+300], %rs1232;
st.local.u16 [%rd23+302], %rs1232;
st.local.u16 [%rd23+304], %rs1232;
st.local.u16 [%rd23+306], %rs1232;
st.local.u16 [%rd23+308], %rs1232;
st.local.u16 [%rd23+310], %rs1232;
st.local.u16 [%rd23+312], %rs1232;
st.local.u16 [%rd23+314], %rs1232;
st.local.u16 [%rd23+316], %rs1232;
st.local.u16 [%rd23+318], %rs1232;
st.local.u16 [%rd23+320], %rs1232;
st.local.u16 [%rd23+322], %rs1232;
st.local.u16 [%rd23+324], %rs1232;
st.local.u16 [%rd23+326], %rs1232;
st.local.u16 [%rd23+328], %rs1232;
st.local.u16 [%rd23+330], %rs1232;
st.local.u16 [%rd23+332], %rs1232;
st.local.u16 [%rd23+334], %rs1232;
st.local.u16 [%rd23+336], %rs1232;
st.local.u16 [%rd23+338], %rs1232;
st.local.u16 [%rd23+340], %rs1232;
st.local.u16 [%rd23+342], %rs1232;
st.local.u16 [%rd23+344], %rs1232;
st.local.u16 [%rd23+346], %rs1232;
st.local.u16 [%rd23+348], %rs1232;
st.local.u16 [%rd23+350], %rs1232;
st.local.u16 [%rd23+352], %rs1232;
st.local.u16 [%rd23+354], %rs1232;
st.local.u16 [%rd23+356], %rs1232;
st.local.u16 [%rd23+358], %rs1232;
st.local.u16 [%rd23+360], %rs1232;
st.local.u16 [%rd23+362], %rs1232;
st.local.u16 [%rd23+364], %rs1232;
st.local.u16 [%rd23+366], %rs1232;
st.local.u16 [%rd23+368], %rs1232;
st.local.u16 [%rd23+370], %rs1232;
st.local.u16 [%rd23+372], %rs1232;
st.local.u16 [%rd23+374], %rs1232;
st.local.u16 [%rd23+376], %rs1232;
st.local.u16 [%rd23+378], %rs1232;
st.local.u16 [%rd23+380], %rs1232;
st.local.u16 [%rd23+382], %rs1232;
st.local.u16 [%rd23+384], %rs1232;
st.local.u16 [%rd23+386], %rs1232;
st.local.u16 [%rd23+388], %rs1232;
st.local.u16 [%rd23+390], %rs1232;
st.local.u16 [%rd23+392], %rs1232;
st.local.u16 [%rd23+394], %rs1232;
st.local.u16 [%rd23+396], %rs1232;
st.local.u16 [%rd23+398], %rs1232;
st.local.u16 [%rd23+400], %rs1232;
st.local.u16 [%rd23+402], %rs1232;
st.local.u16 [%rd23+404], %rs1232;
st.local.u16 [%rd23+406], %rs1232;
st.local.u16 [%rd23+408], %rs1232;
st.local.u16 [%rd23+410], %rs1232;
st.local.u16 [%rd23+412], %rs1232;
st.local.u16 [%rd23+414], %rs1232;
st.local.u16 [%rd23+416], %rs1232;
st.local.u16 [%rd23+418], %rs1232;
st.local.u16 [%rd23+420], %rs1232;
st.local.u16 [%rd23+422], %rs1232;
st.local.u16 [%rd23+424], %rs1232;
st.local.u16 [%rd23+426], %rs1232;
st.local.u16 [%rd23+428], %rs1232;
st.local.u16 [%rd23+430], %rs1232;
st.local.u16 [%rd23+432], %rs1232;
st.local.u16 [%rd23+434], %rs1232;
st.local.u16 [%rd23+436], %rs1232;
st.local.u16 [%rd23+438], %rs1232;
st.local.u16 [%rd23+440], %rs1232;
st.local.u16 [%rd23+442], %rs1232;
st.local.u16 [%rd23+444], %rs1232;
st.local.u16 [%rd23+446], %rs1232;
st.local.u16 [%rd23+448], %rs1232;
st.local.u16 [%rd23+450], %rs1232;
st.local.u16 [%rd23+452], %rs1232;
st.local.u16 [%rd23+454], %rs1232;
st.local.u16 [%rd23+456], %rs1232;
st.local.u16 [%rd23+458], %rs1232;
st.local.u16 [%rd23+460], %rs1232;
st.local.u16 [%rd23+462], %rs1232;
st.local.u16 [%rd23+464], %rs1232;
st.local.u16 [%rd23+466], %rs1232;
st.local.u16 [%rd23+468], %rs1232;
st.local.u16 [%rd23+470], %rs1232;
st.local.u16 [%rd23+472], %rs1232;
st.local.u16 [%rd23+474], %rs1232;
st.local.u16 [%rd23+476], %rs1232;
st.local.u16 [%rd23+478], %rs1232;
st.local.u16 [%rd23+480], %rs1232;
st.local.u16 [%rd23+482], %rs1232;
st.local.u16 [%rd23+484], %rs1232;
st.local.u16 [%rd23+486], %rs1232;
st.local.u16 [%rd23+488], %rs1232;
st.local.u16 [%rd23+490], %rs1232;
st.local.u16 [%rd23+492], %rs1232;
st.local.u16 [%rd23+494], %rs1232;
st.local.u16 [%rd23+496], %rs1232;
st.local.u16 [%rd23+498], %rs1232;
st.local.u16 [%rd23+500], %rs1232;
st.local.u16 [%rd23+502], %rs1232;
st.local.u16 [%rd23+504], %rs1232;
st.local.u16 [%rd23+506], %rs1232;
st.local.u16 [%rd23+508], %rs1232;
st.local.u16 [%rd23+510], %rs1232;
st.local.u16 [%rd23+512], %rs1232;
st.local.u16 [%rd23+514], %rs1232;
st.local.u16 [%rd23+516], %rs1232;
st.local.u16 [%rd23+518], %rs1232;
st.local.u16 [%rd23+520], %rs1232;
st.local.u16 [%rd23+522], %rs1232;
st.local.u16 [%rd23+524], %rs1232;
st.local.u16 [%rd23+526], %rs1232;
st.local.u16 [%rd23+528], %rs1232;
st.local.u16 [%rd23+530], %rs1232;
st.local.u16 [%rd23+532], %rs1232;
st.local.u16 [%rd23+534], %rs1232;
st.local.u16 [%rd23+536], %rs1232;
st.local.u16 [%rd23+538], %rs1232;
st.local.u16 [%rd23+540], %rs1232;
st.local.u16 [%rd23+542], %rs1232;
st.local.u16 [%rd23+544], %rs1232;
st.local.u16 [%rd23+546], %rs1232;
st.local.u16 [%rd23+548], %rs1232;
st.local.u16 [%rd23+550], %rs1232;
st.local.u16 [%rd23+552], %rs1232;
st.local.u16 [%rd23+554], %rs1232;
st.local.u16 [%rd23+556], %rs1232;
st.local.u16 [%rd23+558], %rs1232;
st.local.u16 [%rd23+560], %rs1232;
st.local.u16 [%rd23+562], %rs1232;
st.local.u16 [%rd23+564], %rs1232;
st.local.u16 [%rd23+566], %rs1232;
st.local.u16 [%rd23+568], %rs1232;
st.local.u16 [%rd23+570], %rs1232;
st.local.u16 [%rd23+572], %rs1232;
st.local.u16 [%rd23+574], %rs1232;
st.local.u16 [%rd23+576], %rs1232;
st.local.u16 [%rd23+578], %rs1232;
st.local.u16 [%rd23+580], %rs1232;
st.local.u16 [%rd23+582], %rs1232;
st.local.u16 [%rd23+584], %rs1232;
st.local.u16 [%rd23+586], %rs1232;
st.local.u16 [%rd23+588], %rs1232;
st.local.u16 [%rd23+590], %rs1232;
st.local.u16 [%rd23+592], %rs1232;
st.local.u16 [%rd23+594], %rs1232;
st.local.u16 [%rd23+596], %rs1232;
st.local.u16 [%rd23+598], %rs1232;
st.local.u16 [%rd23+600], %rs1232;
st.local.u16 [%rd23+602], %rs1232;
st.local.u16 [%rd23+604], %rs1232;
st.local.u16 [%rd23+606], %rs1232;
st.local.u16 [%rd23+608], %rs1232;
st.local.u16 [%rd23+610], %rs1232;
st.local.u16 [%rd23+612], %rs1232;
st.local.u16 [%rd23+614], %rs1232;
st.local.u16 [%rd23+616], %rs1232;
st.local.u16 [%rd23+618], %rs1232;
st.local.u16 [%rd23+620], %rs1232;
st.local.u16 [%rd23+622], %rs1232;
st.local.u16 [%rd23+624], %rs1232;
st.local.u16 [%rd23+626], %rs1232;
st.local.u16 [%rd23+628], %rs1232;
st.local.u16 [%rd23+630], %rs1232;
st.local.u16 [%rd23+632], %rs1232;
st.local.u16 [%rd23+634], %rs1232;
st.local.u16 [%rd23+636], %rs1232;
st.local.u16 [%rd23+638], %rs1232;
st.local.u16 [%rd23+640], %rs1232;
st.local.u16 [%rd23+642], %rs1232;
st.local.u16 [%rd23+644], %rs1232;
st.local.u16 [%rd23+646], %rs1232;
st.local.u16 [%rd23+648], %rs1232;
st.local.u16 [%rd23+650], %rs1232;
st.local.u16 [%rd23+652], %rs1232;
st.local.u16 [%rd23+654], %rs1232;
st.local.u16 [%rd23+656], %rs1232;
st.local.u16 [%rd23+658], %rs1232;
st.local.u16 [%rd23+660], %rs1232;
st.local.u16 [%rd23+662], %rs1232;
st.local.u16 [%rd23+664], %rs1232;
st.local.u16 [%rd23+666], %rs1232;
st.local.u16 [%rd23+668], %rs1232;
st.local.u16 [%rd23+670], %rs1232;
st.local.u16 [%rd23+672], %rs1232;
st.local.u16 [%rd23+674], %rs1232;
st.local.u16 [%rd23+676], %rs1232;
st.local.u16 [%rd23+678], %rs1232;
st.local.u16 [%rd23+680], %rs1232;
st.local.u16 [%rd23+682], %rs1232;
st.local.u16 [%rd23+684], %rs1232;
st.local.u16 [%rd23+686], %rs1232;
st.local.u16 [%rd23+688], %rs1232;
st.local.u16 [%rd23+690], %rs1232;
st.local.u16 [%rd23+692], %rs1232;
st.local.u16 [%rd23+694], %rs1232;
st.local.u16 [%rd23+696], %rs1232;
st.local.u16 [%rd23+698], %rs1232;
st.local.u16 [%rd23+700], %rs1232;
st.local.u16 [%rd23+702], %rs1232;
st.local.u16 [%rd23+704], %rs1232;
st.local.u16 [%rd23+706], %rs1232;
st.local.u16 [%rd23+708], %rs1232;
st.local.u16 [%rd23+710], %rs1232;
st.local.u16 [%rd23+712], %rs1232;
st.local.u16 [%rd23+714], %rs1232;
st.local.u16 [%rd23+716], %rs1232;
st.local.u16 [%rd23+718], %rs1232;
st.local.u16 [%rd23+720], %rs1232;
st.local.u16 [%rd23+722], %rs1232;
st.local.u16 [%rd23+724], %rs1232;
st.local.u16 [%rd23+726], %rs1232;
st.local.u16 [%rd23+728], %rs1232;
st.local.u16 [%rd23+730], %rs1232;
st.local.u16 [%rd23+732], %rs1232;
st.local.u16 [%rd23+734], %rs1232;
st.local.u16 [%rd23+736], %rs1232;
st.local.u16 [%rd23+738], %rs1232;
st.local.u16 [%rd23+740], %rs1232;
st.local.u16 [%rd23+742], %rs1232;
st.local.u16 [%rd23+744], %rs1232;
st.local.u16 [%rd23+746], %rs1232;
st.local.u16 [%rd23+748], %rs1232;
st.local.u16 [%rd23+750], %rs1232;
st.local.u16 [%rd23+752], %rs1232;
st.local.u16 [%rd23+754], %rs1232;
st.local.u16 [%rd23+756], %rs1232;
st.local.u16 [%rd23+758], %rs1232;
st.local.u16 [%rd23+760], %rs1232;
st.local.u16 [%rd23+762], %rs1232;
st.local.u16 [%rd23+764], %rs1232;
st.local.u16 [%rd23+766], %rs1232;
st.local.u16 [%rd23+768], %rs1232;
st.local.u16 [%rd23+770], %rs1232;
st.local.u16 [%rd23+772], %rs1232;
st.local.u16 [%rd23+774], %rs1232;
st.local.u16 [%rd23+776], %rs1232;
st.local.u16 [%rd23+778], %rs1232;
st.local.u16 [%rd23+780], %rs1232;
st.local.u16 [%rd23+782], %rs1232;
st.local.u16 [%rd23+784], %rs1232;
st.local.u16 [%rd23+786], %rs1232;
st.local.u16 [%rd23+788], %rs1232;
st.local.u16 [%rd23+790], %rs1232;
st.local.u16 [%rd23+792], %rs1232;
st.local.u16 [%rd23+794], %rs1232;
st.local.u16 [%rd23+796], %rs1232;
st.local.u16 [%rd23+798], %rs1232;
st.local.u16 [%rd23+800], %rs1232;
st.local.u16 [%rd23+802], %rs1232;
st.local.u16 [%rd23+804], %rs1232;
st.local.u16 [%rd23+806], %rs1232;
st.local.u16 [%rd23+808], %rs1232;
st.local.u16 [%rd23+810], %rs1232;
st.local.u16 [%rd23+812], %rs1232;
st.local.u16 [%rd23+814], %rs1232;
st.local.u16 [%rd23+816], %rs1232;
st.local.u16 [%rd23+818], %rs1232;
st.local.u16 [%rd23+820], %rs1232;
st.local.u16 [%rd23+822], %rs1232;
st.local.u16 [%rd23+824], %rs1232;
st.local.u16 [%rd23+826], %rs1232;
st.local.u16 [%rd23+828], %rs1232;
st.local.u16 [%rd23+830], %rs1232;
st.local.u16 [%rd23+832], %rs1232;
st.local.u16 [%rd23+834], %rs1232;
st.local.u16 [%rd23+836], %rs1232;
st.local.u16 [%rd23+838], %rs1232;
st.local.u16 [%rd23+840], %rs1232;
st.local.u16 [%rd23+842], %rs1232;
st.local.u16 [%rd23+844], %rs1232;
st.local.u16 [%rd23+846], %rs1232;
st.local.u16 [%rd23+848], %rs1232;
st.local.u16 [%rd23+850], %rs1232;
st.local.u16 [%rd23+852], %rs1232;
st.local.u16 [%rd23+854], %rs1232;
st.local.u16 [%rd23+856], %rs1232;
st.local.u16 [%rd23+858], %rs1232;
st.local.u16 [%rd23+860], %rs1232;
st.local.u16 [%rd23+862], %rs1232;
st.local.u16 [%rd23+864], %rs1232;
st.local.u16 [%rd23+866], %rs1232;
st.local.u16 [%rd23+868], %rs1232;
st.local.u16 [%rd23+870], %rs1232;
st.local.u16 [%rd23+872], %rs1232;
st.local.u16 [%rd23+874], %rs1232;
st.local.u16 [%rd23+876], %rs1232;
st.local.u16 [%rd23+878], %rs1232;
st.local.u16 [%rd23+880], %rs1232;
st.local.u16 [%rd23+882], %rs1232;
st.local.u16 [%rd23+884], %rs1232;
st.local.u16 [%rd23+886], %rs1232;
st.local.u16 [%rd23+888], %rs1232;
st.local.u16 [%rd23+890], %rs1232;
st.local.u16 [%rd23+892], %rs1232;
st.local.u16 [%rd23+894], %rs1232;
st.local.u16 [%rd23+896], %rs1232;
st.local.u16 [%rd23+898], %rs1232;
st.local.u16 [%rd23+900], %rs1232;
st.local.u16 [%rd23+902], %rs1232;
st.local.u16 [%rd23+904], %rs1232;
st.local.u16 [%rd23+906], %rs1232;
st.local.u16 [%rd23+908], %rs1232;
st.local.u16 [%rd23+910], %rs1232;
st.local.u16 [%rd23+912], %rs1232;
st.local.u16 [%rd23+914], %rs1232;
st.local.u16 [%rd23+916], %rs1232;
st.local.u16 [%rd23+918], %rs1232;
st.local.u16 [%rd23+920], %rs1232;
st.local.u16 [%rd23+922], %rs1232;
st.local.u16 [%rd23+924], %rs1232;
st.local.u16 [%rd23+926], %rs1232;
st.local.u16 [%rd23+928], %rs1232;
st.local.u16 [%rd23+930], %rs1232;
st.local.u16 [%rd23+932], %rs1232;
st.local.u16 [%rd23+934], %rs1232;
st.local.u16 [%rd23+936], %rs1232;
st.local.u16 [%rd23+938], %rs1232;
st.local.u16 [%rd23+940], %rs1232;
st.local.u16 [%rd23+942], %rs1232;
st.local.u16 [%rd23+944], %rs1232;
st.local.u16 [%rd23+946], %rs1232;
st.local.u16 [%rd23+948], %rs1232;
st.local.u16 [%rd23+950], %rs1232;
st.local.u16 [%rd23+952], %rs1232;
st.local.u16 [%rd23+954], %rs1232;
st.local.u16 [%rd23+956], %rs1232;
st.local.u16 [%rd23+958], %rs1232;
st.local.u16 [%rd23+960], %rs1232;
st.local.u16 [%rd23+962], %rs1232;
st.local.u16 [%rd23+964], %rs1232;
st.local.u16 [%rd23+966], %rs1232;
st.local.u16 [%rd23+968], %rs1232;
st.local.u16 [%rd23+970], %rs1232;
st.local.u16 [%rd23+972], %rs1232;
st.local.u16 [%rd23+974], %rs1232;
st.local.u16 [%rd23+976], %rs1232;
st.local.u16 [%rd23+978], %rs1232;
st.local.u16 [%rd23+980], %rs1232;
st.local.u16 [%rd23+982], %rs1232;
st.local.u16 [%rd23+984], %rs1232;
st.local.u16 [%rd23+986], %rs1232;
st.local.u16 [%rd23+988], %rs1232;
st.local.u16 [%rd23+990], %rs1232;
st.local.u16 [%rd23+992], %rs1232;
st.local.u16 [%rd23+994], %rs1232;
st.local.u16 [%rd23+996], %rs1232;
st.local.u16 [%rd23+998], %rs1232;
st.local.u16 [%rd23+1000], %rs1232;
st.local.u16 [%rd23+1002], %rs1232;
st.local.u16 [%rd23+1004], %rs1232;
st.local.u16 [%rd23+1006], %rs1232;
st.local.u16 [%rd23+1008], %rs1232;
st.local.u16 [%rd23+1010], %rs1232;
st.local.u16 [%rd23+1012], %rs1232;
st.local.u16 [%rd23+1014], %rs1232;
st.local.u16 [%rd23+1016], %rs1232;
st.local.u16 [%rd23+1018], %rs1232;
st.local.u16 [%rd23+1020], %rs1232;
st.local.u16 [%rd23+1022], %rs1232;
st.local.u16 [%rd23+1024], %rs1232;
mov.u32 %r9569, 0;
mov.u32 %r9679, %r9569;
mov.u32 %r9675, %r9569;
mov.u32 %r9677, %r9569;
$L__BB1_954:
@%p10 bra $L__BB1_1197;
sub.s32 %r6246, %r6, %r9569;
add.s32 %r2025, %r9569, 4;
mul.lo.s32 %r2026, %r2025, %r1;
add.s32 %r2027, %r9569, 5;
add.s32 %r2028, %r2026, %r1;
add.s32 %r2029, %r9569, 6;
shl.b32 %r6247, %r1, 1;
add.s32 %r2030, %r2026, %r6247;
add.s32 %r2031, %r9569, 7;
mul.lo.s32 %r6248, %r1, 3;
add.s32 %r2032, %r2026, %r6248;
add.s32 %r2033, %r9569, 1;
add.s32 %r2034, %r9569, 2;
add.s32 %r2035, %r9569, 3;
mul.lo.s32 %r2036, %r9569, %r1;
add.s32 %r2037, %r2036, %r6248;
sub.s32 %r2038, %r2037, %r1;
sub.s32 %r2039, %r2038, %r1;
setp.lt.u32 %p1181, %r6246, 2;
selp.b32 %r6249, 4369, 13107, %p1181;
setp.lt.u32 %p1182, %r6246, 3;
selp.b32 %r6250, %r6249, 30583, %p1182;
setp.lt.u32 %p1183, %r6246, 4;
selp.b32 %r2040, %r6250, 65535, %p1183;
mov.u32 %r6245, 0;
mov.u32 %r9573, %r6245;
mov.u32 %r9574, %r6245;
$L__BB1_956:
shr.u32 %r6252, %r9573, 2;
mul.wide.u32 %rd774, %r6252, 2;
add.s64 %rd44, %rd23, %rd774;
ld.local.u16 %rs250, [%rd44];
ld.local.u16 %rs251, [%rd44+2];
setp.ge.u32 %p1184, %r9573, %r5;
mov.u32 %r9585, %r6245;
@%p1184 bra $L__BB1_965;
setp.ge.u32 %p1185, %r2025, %r6;
mov.u32 %r9585, 0;
@%p1185 bra $L__BB1_959;
add.s32 %r6254, %r2026, %r9573;
cvt.u64.u32 %rd775, %r6254;
add.s64 %rd776, %rd775, %rd5;
shl.b64 %rd777, %rd776, 2;
add.s64 %rd778, %rd3, %rd777;
ld.global.u32 %r6255, [%rd778];
abs.s32 %r6256, %r6255;
setp.gt.u32 %p1186, %r6256, 4;
and.b32 %r6257, %r6256, 1;
setp.eq.b32 %p1187, %r6257, 1;
and.pred %p1188, %p1186, %p1187;
selp.u32 %r9585, 1, 0, %p1188;
$L__BB1_959:
setp.ge.u32 %p1189, %r2027, %r6;
@%p1189 bra $L__BB1_961;
add.s32 %r6258, %r2028, %r9573;
cvt.u64.u32 %rd779, %r6258;
add.s64 %rd780, %rd779, %rd5;
shl.b64 %rd781, %rd780, 2;
add.s64 %rd782, %rd3, %rd781;
ld.global.u32 %r6259, [%rd782];
abs.s32 %r6260, %r6259;
setp.gt.u32 %p1190, %r6260, 4;
and.b32 %r6261, %r6260, 1;
setp.eq.b32 %p1191, %r6261, 1;
and.pred %p1192, %p1190, %p1191;
selp.b32 %r6262, 2, 0, %p1192;
or.b32 %r9585, %r6262, %r9585;
$L__BB1_961:
setp.ge.u32 %p1193, %r2029, %r6;
@%p1193 bra $L__BB1_963;
add.s32 %r6263, %r2030, %r9573;
cvt.u64.u32 %rd783, %r6263;
add.s64 %rd784, %rd783, %rd5;
shl.b64 %rd785, %rd784, 2;
add.s64 %rd786, %rd3, %rd785;
ld.global.u32 %r6264, [%rd786];
abs.s32 %r6265, %r6264;
setp.gt.u32 %p1194, %r6265, 4;
and.b32 %r6266, %r6265, 1;
setp.eq.b32 %p1195, %r6266, 1;
and.pred %p1196, %p1194, %p1195;
selp.b32 %r6267, 4, 0, %p1196;
or.b32 %r9585, %r6267, %r9585;
$L__BB1_963:
setp.ge.u32 %p1197, %r2031, %r6;
@%p1197 bra $L__BB1_965;
add.s32 %r6268, %r2032, %r9573;
cvt.u64.u32 %rd787, %r6268;
add.s64 %rd788, %rd787, %rd5;
shl.b64 %rd789, %rd788, 2;
add.s64 %rd790, %rd3, %rd789;
ld.global.u32 %r6269, [%rd790];
abs.s32 %r6270, %r6269;
setp.gt.u32 %p1198, %r6270, 4;
and.b32 %r6271, %r6270, 1;
setp.eq.b32 %p1199, %r6271, 1;
and.pred %p1200, %p1198, %p1199;
selp.b32 %r6272, 8, 0, %p1200;
or.b32 %r9585, %r6272, %r9585;
$L__BB1_965:
add.s32 %r2054, %r9573, 1;
setp.ge.u32 %p1201, %r2054, %r5;
@%p1201 bra $L__BB1_974;
setp.ge.u32 %p1202, %r2025, %r6;
@%p1202 bra $L__BB1_968;
add.s32 %r6273, %r2026, %r2054;
cvt.u64.u32 %rd791, %r6273;
add.s64 %rd792, %rd791, %rd5;
shl.b64 %rd793, %rd792, 2;
add.s64 %rd794, %rd3, %rd793;
ld.global.u32 %r6274, [%rd794];
abs.s32 %r6275, %r6274;
setp.gt.u32 %p1203, %r6275, 4;
and.b32 %r6276, %r6275, 1;
setp.eq.b32 %p1204, %r6276, 1;
and.pred %p1205, %p1203, %p1204;
selp.b32 %r6277, 16, 0, %p1205;
or.b32 %r9585, %r6277, %r9585;
$L__BB1_968:
setp.ge.u32 %p1206, %r2027, %r6;
@%p1206 bra $L__BB1_970;
add.s32 %r6278, %r2028, %r2054;
cvt.u64.u32 %rd795, %r6278;
add.s64 %rd796, %rd795, %rd5;
shl.b64 %rd797, %rd796, 2;
add.s64 %rd798, %rd3, %rd797;
ld.global.u32 %r6279, [%rd798];
abs.s32 %r6280, %r6279;
setp.gt.u32 %p1207, %r6280, 4;
and.b32 %r6281, %r6280, 1;
setp.eq.b32 %p1208, %r6281, 1;
and.pred %p1209, %p1207, %p1208;
selp.b32 %r6282, 32, 0, %p1209;
or.b32 %r9585, %r6282, %r9585;
$L__BB1_970:
setp.ge.u32 %p1210, %r2029, %r6;
@%p1210 bra $L__BB1_972;
add.s32 %r6283, %r2030, %r2054;
cvt.u64.u32 %rd799, %r6283;
add.s64 %rd800, %rd799, %rd5;
shl.b64 %rd801, %rd800, 2;
add.s64 %rd802, %rd3, %rd801;
ld.global.u32 %r6284, [%rd802];
abs.s32 %r6285, %r6284;
setp.gt.u32 %p1211, %r6285, 4;
and.b32 %r6286, %r6285, 1;
setp.eq.b32 %p1212, %r6286, 1;
and.pred %p1213, %p1211, %p1212;
selp.b32 %r6287, 64, 0, %p1213;
or.b32 %r9585, %r6287, %r9585;
$L__BB1_972:
setp.ge.u32 %p1214, %r2031, %r6;
@%p1214 bra $L__BB1_974;
add.s32 %r6288, %r2032, %r2054;
cvt.u64.u32 %rd803, %r6288;
add.s64 %rd804, %rd803, %rd5;
shl.b64 %rd805, %rd804, 2;
add.s64 %rd806, %rd3, %rd805;
ld.global.u32 %r6289, [%rd806];
abs.s32 %r6290, %r6289;
setp.gt.u32 %p1215, %r6290, 4;
and.b32 %r6291, %r6290, 1;
setp.eq.b32 %p1216, %r6291, 1;
and.pred %p1217, %p1215, %p1216;
selp.b32 %r6292, 128, 0, %p1217;
or.b32 %r9585, %r6292, %r9585;
$L__BB1_974:
add.s32 %r2063, %r9573, 2;
setp.ge.u32 %p1218, %r2063, %r5;
@%p1218 bra $L__BB1_983;
setp.ge.u32 %p1219, %r2025, %r6;
@%p1219 bra $L__BB1_977;
add.s32 %r6293, %r2026, %r2063;
cvt.u64.u32 %rd807, %r6293;
add.s64 %rd808, %rd807, %rd5;
shl.b64 %rd809, %rd808, 2;
add.s64 %rd810, %rd3, %rd809;
ld.global.u32 %r6294, [%rd810];
abs.s32 %r6295, %r6294;
setp.gt.u32 %p1220, %r6295, 4;
and.b32 %r6296, %r6295, 1;
setp.eq.b32 %p1221, %r6296, 1;
and.pred %p1222, %p1220, %p1221;
selp.b32 %r6297, 256, 0, %p1222;
or.b32 %r9585, %r6297, %r9585;
$L__BB1_977:
setp.ge.u32 %p1223, %r2027, %r6;
@%p1223 bra $L__BB1_979;
add.s32 %r6298, %r2028, %r2063;
cvt.u64.u32 %rd811, %r6298;
add.s64 %rd812, %rd811, %rd5;
shl.b64 %rd813, %rd812, 2;
add.s64 %rd814, %rd3, %rd813;
ld.global.u32 %r6299, [%rd814];
abs.s32 %r6300, %r6299;
setp.gt.u32 %p1224, %r6300, 4;
and.b32 %r6301, %r6300, 1;
setp.eq.b32 %p1225, %r6301, 1;
and.pred %p1226, %p1224, %p1225;
selp.b32 %r6302, 512, 0, %p1226;
or.b32 %r9585, %r6302, %r9585;
$L__BB1_979:
setp.ge.u32 %p1227, %r2029, %r6;
@%p1227 bra $L__BB1_981;
add.s32 %r6303, %r2030, %r2063;
cvt.u64.u32 %rd815, %r6303;
add.s64 %rd816, %rd815, %rd5;
shl.b64 %rd817, %rd816, 2;
add.s64 %rd818, %rd3, %rd817;
ld.global.u32 %r6304, [%rd818];
abs.s32 %r6305, %r6304;
setp.gt.u32 %p1228, %r6305, 4;
and.b32 %r6306, %r6305, 1;
setp.eq.b32 %p1229, %r6306, 1;
and.pred %p1230, %p1228, %p1229;
selp.b32 %r6307, 1024, 0, %p1230;
or.b32 %r9585, %r6307, %r9585;
$L__BB1_981:
setp.ge.u32 %p1231, %r2031, %r6;
@%p1231 bra $L__BB1_983;
add.s32 %r6308, %r2032, %r2063;
cvt.u64.u32 %rd819, %r6308;
add.s64 %rd820, %rd819, %rd5;
shl.b64 %rd821, %rd820, 2;
add.s64 %rd822, %rd3, %rd821;
ld.global.u32 %r6309, [%rd822];
abs.s32 %r6310, %r6309;
setp.gt.u32 %p1232, %r6310, 4;
and.b32 %r6311, %r6310, 1;
setp.eq.b32 %p1233, %r6311, 1;
and.pred %p1234, %p1232, %p1233;
selp.b32 %r6312, 2048, 0, %p1234;
or.b32 %r9585, %r6312, %r9585;
$L__BB1_983:
add.s32 %r2072, %r9573, 3;
setp.ge.u32 %p1235, %r2072, %r5;
@%p1235 bra $L__BB1_992;
setp.ge.u32 %p1236, %r2025, %r6;
@%p1236 bra $L__BB1_986;
add.s32 %r6313, %r2026, %r2072;
cvt.u64.u32 %rd823, %r6313;
add.s64 %rd824, %rd823, %rd5;
shl.b64 %rd825, %rd824, 2;
add.s64 %rd826, %rd3, %rd825;
ld.global.u32 %r6314, [%rd826];
abs.s32 %r6315, %r6314;
setp.gt.u32 %p1237, %r6315, 4;
and.b32 %r6316, %r6315, 1;
setp.eq.b32 %p1238, %r6316, 1;
and.pred %p1239, %p1237, %p1238;
selp.b32 %r6317, 4096, 0, %p1239;
or.b32 %r9585, %r6317, %r9585;
$L__BB1_986:
setp.ge.u32 %p1240, %r2027, %r6;
@%p1240 bra $L__BB1_988;
add.s32 %r6318, %r2028, %r2072;
cvt.u64.u32 %rd827, %r6318;
add.s64 %rd828, %rd827, %rd5;
shl.b64 %rd829, %rd828, 2;
add.s64 %rd830, %rd3, %rd829;
ld.global.u32 %r6319, [%rd830];
abs.s32 %r6320, %r6319;
setp.gt.u32 %p1241, %r6320, 4;
and.b32 %r6321, %r6320, 1;
setp.eq.b32 %p1242, %r6321, 1;
and.pred %p1243, %p1241, %p1242;
selp.b32 %r6322, 8192, 0, %p1243;
or.b32 %r9585, %r6322, %r9585;
$L__BB1_988:
setp.ge.u32 %p1244, %r2029, %r6;
@%p1244 bra $L__BB1_990;
add.s32 %r6323, %r2030, %r2072;
cvt.u64.u32 %rd831, %r6323;
add.s64 %rd832, %rd831, %rd5;
shl.b64 %rd833, %rd832, 2;
add.s64 %rd834, %rd3, %rd833;
ld.global.u32 %r6324, [%rd834];
abs.s32 %r6325, %r6324;
setp.gt.u32 %p1245, %r6325, 4;
and.b32 %r6326, %r6325, 1;
setp.eq.b32 %p1246, %r6326, 1;
and.pred %p1247, %p1245, %p1246;
selp.b32 %r6327, 16384, 0, %p1247;
or.b32 %r9585, %r6327, %r9585;
$L__BB1_990:
setp.ge.u32 %p1248, %r2031, %r6;
@%p1248 bra $L__BB1_992;
add.s32 %r6328, %r2032, %r2072;
cvt.u64.u32 %rd835, %r6328;
add.s64 %rd836, %rd835, %rd5;
shl.b64 %rd837, %rd836, 2;
add.s64 %rd838, %rd3, %rd837;
ld.global.u32 %r6329, [%rd838];
abs.s32 %r6330, %r6329;
setp.gt.u32 %p1249, %r6330, 4;
and.b32 %r6331, %r6330, 1;
setp.eq.b32 %p1250, %r6331, 1;
and.pred %p1251, %p1249, %p1250;
selp.b32 %r6332, 32768, 0, %p1251;
or.b32 %r9585, %r6332, %r9585;
$L__BB1_992:
add.s32 %r6334, %r9573, 4;
setp.ge.u32 %p1252, %r6334, %r5;
mov.u32 %r9601, 0;
@%p1252 bra $L__BB1_1001;
setp.ge.u32 %p1253, %r2025, %r6;
mov.u32 %r9601, 0;
@%p1253 bra $L__BB1_995;
add.s32 %r6336, %r2026, %r9573;
add.s32 %r6337, %r6336, 4;
cvt.u64.u32 %rd839, %r6337;
add.s64 %rd840, %rd839, %rd5;
shl.b64 %rd841, %rd840, 2;
add.s64 %rd842, %rd3, %rd841;
ld.global.u32 %r6338, [%rd842];
abs.s32 %r6339, %r6338;
setp.gt.u32 %p1254, %r6339, 4;
and.b32 %r6340, %r6339, 1;
setp.eq.b32 %p1255, %r6340, 1;
and.pred %p1256, %p1254, %p1255;
selp.u32 %r9601, 1, 0, %p1256;
$L__BB1_995:
setp.ge.u32 %p1257, %r2027, %r6;
@%p1257 bra $L__BB1_997;
add.s32 %r6341, %r2028, %r9573;
add.s32 %r6342, %r6341, 4;
cvt.u64.u32 %rd843, %r6342;
add.s64 %rd844, %rd843, %rd5;
shl.b64 %rd845, %rd844, 2;
add.s64 %rd846, %rd3, %rd845;
ld.global.u32 %r6343, [%rd846];
abs.s32 %r6344, %r6343;
setp.gt.u32 %p1258, %r6344, 4;
and.b32 %r6345, %r6344, 1;
setp.eq.b32 %p1259, %r6345, 1;
and.pred %p1260, %p1258, %p1259;
selp.b32 %r6346, 2, 0, %p1260;
or.b32 %r9601, %r6346, %r9601;
$L__BB1_997:
setp.ge.u32 %p1261, %r2029, %r6;
@%p1261 bra $L__BB1_999;
add.s32 %r6347, %r2030, %r9573;
add.s32 %r6348, %r6347, 4;
cvt.u64.u32 %rd847, %r6348;
add.s64 %rd848, %rd847, %rd5;
shl.b64 %rd849, %rd848, 2;
add.s64 %rd850, %rd3, %rd849;
ld.global.u32 %r6349, [%rd850];
abs.s32 %r6350, %r6349;
setp.gt.u32 %p1262, %r6350, 4;
and.b32 %r6351, %r6350, 1;
setp.eq.b32 %p1263, %r6351, 1;
and.pred %p1264, %p1262, %p1263;
selp.b32 %r6352, 4, 0, %p1264;
or.b32 %r9601, %r6352, %r9601;
$L__BB1_999:
setp.ge.u32 %p1265, %r2031, %r6;
@%p1265 bra $L__BB1_1001;
add.s32 %r6353, %r2032, %r9573;
add.s32 %r6354, %r6353, 4;
cvt.u64.u32 %rd851, %r6354;
add.s64 %rd852, %rd851, %rd5;
shl.b64 %rd853, %rd852, 2;
add.s64 %rd854, %rd3, %rd853;
ld.global.u32 %r6355, [%rd854];
abs.s32 %r6356, %r6355;
setp.gt.u32 %p1266, %r6356, 4;
and.b32 %r6357, %r6356, 1;
setp.eq.b32 %p1267, %r6357, 1;
and.pred %p1268, %p1266, %p1267;
selp.b32 %r6358, 8, 0, %p1268;
or.b32 %r9601, %r6358, %r9601;
$L__BB1_1001:
add.s32 %r2089, %r9573, 5;
setp.ge.u32 %p1269, %r2089, %r5;
@%p1269 bra $L__BB1_1010;
setp.ge.u32 %p1270, %r2025, %r6;
@%p1270 bra $L__BB1_1004;
add.s32 %r6359, %r2026, %r2089;
cvt.u64.u32 %rd855, %r6359;
add.s64 %rd856, %rd855, %rd5;
shl.b64 %rd857, %rd856, 2;
add.s64 %rd858, %rd3, %rd857;
ld.global.u32 %r6360, [%rd858];
abs.s32 %r6361, %r6360;
setp.gt.u32 %p1271, %r6361, 4;
and.b32 %r6362, %r6361, 1;
setp.eq.b32 %p1272, %r6362, 1;
and.pred %p1273, %p1271, %p1272;
selp.b32 %r6363, 16, 0, %p1273;
or.b32 %r9601, %r6363, %r9601;
$L__BB1_1004:
setp.ge.u32 %p1274, %r2027, %r6;
@%p1274 bra $L__BB1_1006;
add.s32 %r6364, %r2028, %r2089;
cvt.u64.u32 %rd859, %r6364;
add.s64 %rd860, %rd859, %rd5;
shl.b64 %rd861, %rd860, 2;
add.s64 %rd862, %rd3, %rd861;
ld.global.u32 %r6365, [%rd862];
abs.s32 %r6366, %r6365;
setp.gt.u32 %p1275, %r6366, 4;
and.b32 %r6367, %r6366, 1;
setp.eq.b32 %p1276, %r6367, 1;
and.pred %p1277, %p1275, %p1276;
selp.b32 %r6368, 32, 0, %p1277;
or.b32 %r9601, %r6368, %r9601;
$L__BB1_1006:
setp.ge.u32 %p1278, %r2029, %r6;
@%p1278 bra $L__BB1_1008;
add.s32 %r6369, %r2030, %r2089;
cvt.u64.u32 %rd863, %r6369;
add.s64 %rd864, %rd863, %rd5;
shl.b64 %rd865, %rd864, 2;
add.s64 %rd866, %rd3, %rd865;
ld.global.u32 %r6370, [%rd866];
abs.s32 %r6371, %r6370;
setp.gt.u32 %p1279, %r6371, 4;
and.b32 %r6372, %r6371, 1;
setp.eq.b32 %p1280, %r6372, 1;
and.pred %p1281, %p1279, %p1280;
selp.b32 %r6373, 64, 0, %p1281;
or.b32 %r9601, %r6373, %r9601;
$L__BB1_1008:
setp.ge.u32 %p1282, %r2031, %r6;
@%p1282 bra $L__BB1_1010;
add.s32 %r6374, %r2032, %r2089;
cvt.u64.u32 %rd867, %r6374;
add.s64 %rd868, %rd867, %rd5;
shl.b64 %rd869, %rd868, 2;
add.s64 %rd870, %rd3, %rd869;
ld.global.u32 %r6375, [%rd870];
abs.s32 %r6376, %r6375;
setp.gt.u32 %p1283, %r6376, 4;
and.b32 %r6377, %r6376, 1;
setp.eq.b32 %p1284, %r6377, 1;
and.pred %p1285, %p1283, %p1284;
selp.b32 %r6378, 128, 0, %p1285;
or.b32 %r9601, %r6378, %r9601;
$L__BB1_1010:
add.s32 %r2098, %r9573, 6;
setp.ge.u32 %p1286, %r2098, %r5;
@%p1286 bra $L__BB1_1019;
setp.ge.u32 %p1287, %r2025, %r6;
@%p1287 bra $L__BB1_1013;
add.s32 %r6379, %r2026, %r2098;
cvt.u64.u32 %rd871, %r6379;
add.s64 %rd872, %rd871, %rd5;
shl.b64 %rd873, %rd872, 2;
add.s64 %rd874, %rd3, %rd873;
ld.global.u32 %r6380, [%rd874];
abs.s32 %r6381, %r6380;
setp.gt.u32 %p1288, %r6381, 4;
and.b32 %r6382, %r6381, 1;
setp.eq.b32 %p1289, %r6382, 1;
and.pred %p1290, %p1288, %p1289;
selp.b32 %r6383, 256, 0, %p1290;
or.b32 %r9601, %r6383, %r9601;
$L__BB1_1013:
setp.ge.u32 %p1291, %r2027, %r6;
@%p1291 bra $L__BB1_1015;
add.s32 %r6384, %r2028, %r2098;
cvt.u64.u32 %rd875, %r6384;
add.s64 %rd876, %rd875, %rd5;
shl.b64 %rd877, %rd876, 2;
add.s64 %rd878, %rd3, %rd877;
ld.global.u32 %r6385, [%rd878];
abs.s32 %r6386, %r6385;
setp.gt.u32 %p1292, %r6386, 4;
and.b32 %r6387, %r6386, 1;
setp.eq.b32 %p1293, %r6387, 1;
and.pred %p1294, %p1292, %p1293;
selp.b32 %r6388, 512, 0, %p1294;
or.b32 %r9601, %r6388, %r9601;
$L__BB1_1015:
setp.ge.u32 %p1295, %r2029, %r6;
@%p1295 bra $L__BB1_1017;
add.s32 %r6389, %r2030, %r2098;
cvt.u64.u32 %rd879, %r6389;
add.s64 %rd880, %rd879, %rd5;
shl.b64 %rd881, %rd880, 2;
add.s64 %rd882, %rd3, %rd881;
ld.global.u32 %r6390, [%rd882];
abs.s32 %r6391, %r6390;
setp.gt.u32 %p1296, %r6391, 4;
and.b32 %r6392, %r6391, 1;
setp.eq.b32 %p1297, %r6392, 1;
and.pred %p1298, %p1296, %p1297;
selp.b32 %r6393, 1024, 0, %p1298;
or.b32 %r9601, %r6393, %r9601;
$L__BB1_1017:
setp.ge.u32 %p1299, %r2031, %r6;
@%p1299 bra $L__BB1_1019;
add.s32 %r6394, %r2032, %r2098;
cvt.u64.u32 %rd883, %r6394;
add.s64 %rd884, %rd883, %rd5;
shl.b64 %rd885, %rd884, 2;
add.s64 %rd886, %rd3, %rd885;
ld.global.u32 %r6395, [%rd886];
abs.s32 %r6396, %r6395;
setp.gt.u32 %p1300, %r6396, 4;
and.b32 %r6397, %r6396, 1;
setp.eq.b32 %p1301, %r6397, 1;
and.pred %p1302, %p1300, %p1301;
selp.b32 %r6398, 2048, 0, %p1302;
or.b32 %r9601, %r6398, %r9601;
$L__BB1_1019:
add.s32 %r2107, %r9573, 7;
setp.ge.u32 %p1303, %r2107, %r5;
@%p1303 bra $L__BB1_1028;
setp.ge.u32 %p1304, %r2025, %r6;
@%p1304 bra $L__BB1_1022;
add.s32 %r6399, %r2026, %r2107;
cvt.u64.u32 %rd887, %r6399;
add.s64 %rd888, %rd887, %rd5;
shl.b64 %rd889, %rd888, 2;
add.s64 %rd890, %rd3, %rd889;
ld.global.u32 %r6400, [%rd890];
abs.s32 %r6401, %r6400;
setp.gt.u32 %p1305, %r6401, 4;
and.b32 %r6402, %r6401, 1;
setp.eq.b32 %p1306, %r6402, 1;
and.pred %p1307, %p1305, %p1306;
selp.b32 %r6403, 4096, 0, %p1307;
or.b32 %r9601, %r6403, %r9601;
$L__BB1_1022:
setp.ge.u32 %p1308, %r2027, %r6;
@%p1308 bra $L__BB1_1024;
add.s32 %r6404, %r2028, %r2107;
cvt.u64.u32 %rd891, %r6404;
add.s64 %rd892, %rd891, %rd5;
shl.b64 %rd893, %rd892, 2;
add.s64 %rd894, %rd3, %rd893;
ld.global.u32 %r6405, [%rd894];
abs.s32 %r6406, %r6405;
setp.gt.u32 %p1309, %r6406, 4;
and.b32 %r6407, %r6406, 1;
setp.eq.b32 %p1310, %r6407, 1;
and.pred %p1311, %p1309, %p1310;
selp.b32 %r6408, 8192, 0, %p1311;
or.b32 %r9601, %r6408, %r9601;
$L__BB1_1024:
setp.ge.u32 %p1312, %r2029, %r6;
@%p1312 bra $L__BB1_1026;
add.s32 %r6409, %r2030, %r2107;
cvt.u64.u32 %rd895, %r6409;
add.s64 %rd896, %rd895, %rd5;
shl.b64 %rd897, %rd896, 2;
add.s64 %rd898, %rd3, %rd897;
ld.global.u32 %r6410, [%rd898];
abs.s32 %r6411, %r6410;
setp.gt.u32 %p1313, %r6411, 4;
and.b32 %r6412, %r6411, 1;
setp.eq.b32 %p1314, %r6412, 1;
and.pred %p1315, %p1313, %p1314;
selp.b32 %r6413, 16384, 0, %p1315;
or.b32 %r9601, %r6413, %r9601;
$L__BB1_1026:
setp.ge.u32 %p1316, %r2031, %r6;
@%p1316 bra $L__BB1_1028;
add.s32 %r6414, %r2032, %r2107;
cvt.u64.u32 %rd899, %r6414;
add.s64 %rd900, %rd899, %rd5;
shl.b64 %rd901, %rd900, 2;
add.s64 %rd902, %rd3, %rd901;
ld.global.u32 %r6415, [%rd902];
abs.s32 %r6416, %r6415;
setp.gt.u32 %p1317, %r6416, 4;
and.b32 %r6417, %r6416, 1;
setp.eq.b32 %p1318, %r6417, 1;
and.pred %p1319, %p1317, %p1318;
selp.b32 %r6418, 32768, 0, %p1319;
or.b32 %r9601, %r6418, %r9601;
$L__BB1_1028:
mov.b32 %r2116, {%rs250, %rs251};
add.s32 %r6420, %r2036, %r9573;
cvt.u64.u32 %rd903, %r6420;
add.s64 %rd904, %rd903, %rd5;
shl.b64 %rd905, %rd904, 2;
add.s64 %rd45, %rd3, %rd905;
add.s32 %r6421, %r2039, %r9573;
cvt.u64.u32 %rd906, %r6421;
add.s64 %rd907, %rd906, %rd5;
shl.b64 %rd908, %rd907, 2;
add.s64 %rd46, %rd3, %rd908;
add.s32 %r6422, %r2038, %r9573;
cvt.u64.u32 %rd909, %r6422;
add.s64 %rd910, %rd909, %rd5;
shl.b64 %rd911, %rd910, 2;
add.s64 %rd47, %rd3, %rd911;
add.s32 %r6423, %r2037, %r9573;
cvt.u64.u32 %rd912, %r6423;
add.s64 %rd913, %rd912, %rd5;
shl.b64 %rd914, %rd913, 2;
add.s64 %rd48, %rd3, %rd914;
mov.u32 %r9617, 0;
@%p1184 bra $L__BB1_1037;
setp.le.u32 %p1321, %r6, %r9569;
mov.u32 %r9617, 0;
@%p1321 bra $L__BB1_1031;
ld.global.u32 %r6425, [%rd45];
abs.s32 %r6426, %r6425;
setp.gt.u32 %p1322, %r6426, 4;
and.b32 %r6427, %r6426, 1;
setp.eq.b32 %p1323, %r6427, 1;
and.pred %p1324, %p1322, %p1323;
selp.u32 %r9617, 1, 0, %p1324;
$L__BB1_1031:
setp.ge.u32 %p1325, %r2033, %r6;
@%p1325 bra $L__BB1_1033;
ld.global.u32 %r6428, [%rd46];
abs.s32 %r6429, %r6428;
setp.gt.u32 %p1326, %r6429, 4;
and.b32 %r6430, %r6429, 1;
setp.eq.b32 %p1327, %r6430, 1;
and.pred %p1328, %p1326, %p1327;
selp.b32 %r6431, 2, 0, %p1328;
or.b32 %r9617, %r6431, %r9617;
$L__BB1_1033:
setp.ge.u32 %p1329, %r2034, %r6;
@%p1329 bra $L__BB1_1035;
ld.global.u32 %r6432, [%rd47];
abs.s32 %r6433, %r6432;
setp.gt.u32 %p1330, %r6433, 4;
and.b32 %r6434, %r6433, 1;
setp.eq.b32 %p1331, %r6434, 1;
and.pred %p1332, %p1330, %p1331;
selp.b32 %r6435, 4, 0, %p1332;
or.b32 %r9617, %r6435, %r9617;
$L__BB1_1035:
setp.ge.u32 %p1333, %r2035, %r6;
@%p1333 bra $L__BB1_1037;
ld.global.u32 %r6436, [%rd48];
abs.s32 %r6437, %r6436;
setp.gt.u32 %p1334, %r6437, 4;
and.b32 %r6438, %r6437, 1;
setp.eq.b32 %p1335, %r6438, 1;
and.pred %p1336, %p1334, %p1335;
selp.b32 %r6439, 8, 0, %p1336;
or.b32 %r9617, %r6439, %r9617;
$L__BB1_1037:
add.s32 %r6440, %r2036, %r2054;
cvt.u64.u32 %rd915, %r6440;
add.s64 %rd916, %rd915, %rd5;
shl.b64 %rd917, %rd916, 2;
add.s64 %rd49, %rd3, %rd917;
add.s32 %r6441, %r2039, %r2054;
cvt.u64.u32 %rd918, %r6441;
add.s64 %rd919, %rd918, %rd5;
shl.b64 %rd920, %rd919, 2;
add.s64 %rd50, %rd3, %rd920;
add.s32 %r6442, %r2038, %r2054;
cvt.u64.u32 %rd921, %r6442;
add.s64 %rd922, %rd921, %rd5;
shl.b64 %rd923, %rd922, 2;
add.s64 %rd51, %rd3, %rd923;
add.s32 %r6443, %r2037, %r2054;
cvt.u64.u32 %rd924, %r6443;
add.s64 %rd925, %rd924, %rd5;
shl.b64 %rd926, %rd925, 2;
add.s64 %rd52, %rd3, %rd926;
shl.b32 %r6444, %r9601, 16;
or.b32 %r2125, %r6444, %r9585;
@%p1201 bra $L__BB1_1046;
setp.le.u32 %p1338, %r6, %r9569;
@%p1338 bra $L__BB1_1040;
ld.global.u32 %r6445, [%rd49];
abs.s32 %r6446, %r6445;
setp.gt.u32 %p1339, %r6446, 4;
and.b32 %r6447, %r6446, 1;
setp.eq.b32 %p1340, %r6447, 1;
and.pred %p1341, %p1339, %p1340;
selp.b32 %r6448, 16, 0, %p1341;
or.b32 %r9617, %r6448, %r9617;
$L__BB1_1040:
setp.ge.u32 %p1342, %r2033, %r6;
@%p1342 bra $L__BB1_1042;
ld.global.u32 %r6449, [%rd50];
abs.s32 %r6450, %r6449;
setp.gt.u32 %p1343, %r6450, 4;
and.b32 %r6451, %r6450, 1;
setp.eq.b32 %p1344, %r6451, 1;
and.pred %p1345, %p1343, %p1344;
selp.b32 %r6452, 32, 0, %p1345;
or.b32 %r9617, %r6452, %r9617;
$L__BB1_1042:
setp.ge.u32 %p1346, %r2034, %r6;
@%p1346 bra $L__BB1_1044;
ld.global.u32 %r6453, [%rd51];
abs.s32 %r6454, %r6453;
setp.gt.u32 %p1347, %r6454, 4;
and.b32 %r6455, %r6454, 1;
setp.eq.b32 %p1348, %r6455, 1;
and.pred %p1349, %p1347, %p1348;
selp.b32 %r6456, 64, 0, %p1349;
or.b32 %r9617, %r6456, %r9617;
$L__BB1_1044:
setp.ge.u32 %p1350, %r2035, %r6;
@%p1350 bra $L__BB1_1046;
ld.global.u32 %r6457, [%rd52];
abs.s32 %r6458, %r6457;
setp.gt.u32 %p1351, %r6458, 4;
and.b32 %r6459, %r6458, 1;
setp.eq.b32 %p1352, %r6459, 1;
and.pred %p1353, %p1351, %p1352;
selp.b32 %r6460, 128, 0, %p1353;
or.b32 %r9617, %r6460, %r9617;
$L__BB1_1046:
add.s32 %r6461, %r2036, %r2063;
cvt.u64.u32 %rd927, %r6461;
add.s64 %rd928, %rd927, %rd5;
shl.b64 %rd929, %rd928, 2;
add.s64 %rd53, %rd3, %rd929;
add.s32 %r6462, %r2039, %r2063;
cvt.u64.u32 %rd930, %r6462;
add.s64 %rd931, %rd930, %rd5;
shl.b64 %rd932, %rd931, 2;
add.s64 %rd54, %rd3, %rd932;
add.s32 %r6463, %r2038, %r2063;
cvt.u64.u32 %rd933, %r6463;
add.s64 %rd934, %rd933, %rd5;
shl.b64 %rd935, %rd934, 2;
add.s64 %rd55, %rd3, %rd935;
add.s32 %r6464, %r2037, %r2063;
cvt.u64.u32 %rd936, %r6464;
add.s64 %rd937, %rd936, %rd5;
shl.b64 %rd938, %rd937, 2;
add.s64 %rd56, %rd3, %rd938;
@%p1218 bra $L__BB1_1055;
setp.le.u32 %p1355, %r6, %r9569;
@%p1355 bra $L__BB1_1049;
ld.global.u32 %r6465, [%rd53];
abs.s32 %r6466, %r6465;
setp.gt.u32 %p1356, %r6466, 4;
and.b32 %r6467, %r6466, 1;
setp.eq.b32 %p1357, %r6467, 1;
and.pred %p1358, %p1356, %p1357;
selp.b32 %r6468, 256, 0, %p1358;
or.b32 %r9617, %r6468, %r9617;
$L__BB1_1049:
setp.ge.u32 %p1359, %r2033, %r6;
@%p1359 bra $L__BB1_1051;
ld.global.u32 %r6469, [%rd54];
abs.s32 %r6470, %r6469;
setp.gt.u32 %p1360, %r6470, 4;
and.b32 %r6471, %r6470, 1;
setp.eq.b32 %p1361, %r6471, 1;
and.pred %p1362, %p1360, %p1361;
selp.b32 %r6472, 512, 0, %p1362;
or.b32 %r9617, %r6472, %r9617;
$L__BB1_1051:
setp.ge.u32 %p1363, %r2034, %r6;
@%p1363 bra $L__BB1_1053;
ld.global.u32 %r6473, [%rd55];
abs.s32 %r6474, %r6473;
setp.gt.u32 %p1364, %r6474, 4;
and.b32 %r6475, %r6474, 1;
setp.eq.b32 %p1365, %r6475, 1;
and.pred %p1366, %p1364, %p1365;
selp.b32 %r6476, 1024, 0, %p1366;
or.b32 %r9617, %r6476, %r9617;
$L__BB1_1053:
setp.ge.u32 %p1367, %r2035, %r6;
@%p1367 bra $L__BB1_1055;
ld.global.u32 %r6477, [%rd56];
abs.s32 %r6478, %r6477;
setp.gt.u32 %p1368, %r6478, 4;
and.b32 %r6479, %r6478, 1;
setp.eq.b32 %p1369, %r6479, 1;
and.pred %p1370, %p1368, %p1369;
selp.b32 %r6480, 2048, 0, %p1370;
or.b32 %r9617, %r6480, %r9617;
$L__BB1_1055:
add.s32 %r6481, %r2036, %r2072;
cvt.u64.u32 %rd939, %r6481;
add.s64 %rd940, %rd939, %rd5;
shl.b64 %rd941, %rd940, 2;
add.s64 %rd57, %rd3, %rd941;
add.s32 %r6482, %r2039, %r2072;
cvt.u64.u32 %rd942, %r6482;
add.s64 %rd943, %rd942, %rd5;
shl.b64 %rd944, %rd943, 2;
add.s64 %rd58, %rd3, %rd944;
add.s32 %r6483, %r2038, %r2072;
cvt.u64.u32 %rd945, %r6483;
add.s64 %rd946, %rd945, %rd5;
shl.b64 %rd947, %rd946, 2;
add.s64 %rd59, %rd3, %rd947;
add.s32 %r6484, %r2037, %r2072;
cvt.u64.u32 %rd948, %r6484;
add.s64 %rd949, %rd948, %rd5;
shl.b64 %rd950, %rd949, 2;
add.s64 %rd60, %rd3, %rd950;
@%p1235 bra $L__BB1_1064;
setp.le.u32 %p1372, %r6, %r9569;
@%p1372 bra $L__BB1_1058;
ld.global.u32 %r6485, [%rd57];
abs.s32 %r6486, %r6485;
setp.gt.u32 %p1373, %r6486, 4;
and.b32 %r6487, %r6486, 1;
setp.eq.b32 %p1374, %r6487, 1;
and.pred %p1375, %p1373, %p1374;
selp.b32 %r6488, 4096, 0, %p1375;
or.b32 %r9617, %r6488, %r9617;
$L__BB1_1058:
setp.ge.u32 %p1376, %r2033, %r6;
@%p1376 bra $L__BB1_1060;
ld.global.u32 %r6489, [%rd58];
abs.s32 %r6490, %r6489;
setp.gt.u32 %p1377, %r6490, 4;
and.b32 %r6491, %r6490, 1;
setp.eq.b32 %p1378, %r6491, 1;
and.pred %p1379, %p1377, %p1378;
selp.b32 %r6492, 8192, 0, %p1379;
or.b32 %r9617, %r6492, %r9617;
$L__BB1_1060:
setp.ge.u32 %p1380, %r2034, %r6;
@%p1380 bra $L__BB1_1062;
ld.global.u32 %r6493, [%rd59];
abs.s32 %r6494, %r6493;
setp.gt.u32 %p1381, %r6494, 4;
and.b32 %r6495, %r6494, 1;
setp.eq.b32 %p1382, %r6495, 1;
and.pred %p1383, %p1381, %p1382;
selp.b32 %r6496, 16384, 0, %p1383;
or.b32 %r9617, %r6496, %r9617;
$L__BB1_1062:
setp.ge.u32 %p1384, %r2035, %r6;
@%p1384 bra $L__BB1_1064;
ld.global.u32 %r6497, [%rd60];
abs.s32 %r6498, %r6497;
setp.gt.u32 %p1385, %r6498, 4;
and.b32 %r6499, %r6498, 1;
setp.eq.b32 %p1386, %r6499, 1;
and.pred %p1387, %p1385, %p1386;
selp.b32 %r6500, 32768, 0, %p1387;
or.b32 %r9617, %r6500, %r9617;
$L__BB1_1064:
mov.u32 %r9633, 0;
@%p1252 bra $L__BB1_1073;
setp.le.u32 %p1389, %r6, %r9569;
mov.u32 %r9633, 0;
@%p1389 bra $L__BB1_1067;
add.s32 %r6505, %r6420, 4;
cvt.u64.u32 %rd951, %r6505;
add.s64 %rd952, %rd951, %rd5;
shl.b64 %rd953, %rd952, 2;
add.s64 %rd954, %rd3, %rd953;
ld.global.u32 %r6506, [%rd954];
abs.s32 %r6507, %r6506;
setp.gt.u32 %p1390, %r6507, 4;
and.b32 %r6508, %r6507, 1;
setp.eq.b32 %p1391, %r6508, 1;
and.pred %p1392, %p1390, %p1391;
selp.u32 %r9633, 1, 0, %p1392;
$L__BB1_1067:
setp.ge.u32 %p1393, %r2033, %r6;
@%p1393 bra $L__BB1_1069;
add.s32 %r6510, %r6421, 4;
cvt.u64.u32 %rd955, %r6510;
add.s64 %rd956, %rd955, %rd5;
shl.b64 %rd957, %rd956, 2;
add.s64 %rd958, %rd3, %rd957;
ld.global.u32 %r6511, [%rd958];
abs.s32 %r6512, %r6511;
setp.gt.u32 %p1394, %r6512, 4;
and.b32 %r6513, %r6512, 1;
setp.eq.b32 %p1395, %r6513, 1;
and.pred %p1396, %p1394, %p1395;
selp.b32 %r6514, 2, 0, %p1396;
or.b32 %r9633, %r6514, %r9633;
$L__BB1_1069:
setp.ge.u32 %p1397, %r2034, %r6;
@%p1397 bra $L__BB1_1071;
add.s32 %r6516, %r6422, 4;
cvt.u64.u32 %rd959, %r6516;
add.s64 %rd960, %rd959, %rd5;
shl.b64 %rd961, %rd960, 2;
add.s64 %rd962, %rd3, %rd961;
ld.global.u32 %r6517, [%rd962];
abs.s32 %r6518, %r6517;
setp.gt.u32 %p1398, %r6518, 4;
and.b32 %r6519, %r6518, 1;
setp.eq.b32 %p1399, %r6519, 1;
and.pred %p1400, %p1398, %p1399;
selp.b32 %r6520, 4, 0, %p1400;
or.b32 %r9633, %r6520, %r9633;
$L__BB1_1071:
setp.ge.u32 %p1401, %r2035, %r6;
@%p1401 bra $L__BB1_1073;
add.s32 %r6522, %r6423, 4;
cvt.u64.u32 %rd963, %r6522;
add.s64 %rd964, %rd963, %rd5;
shl.b64 %rd965, %rd964, 2;
add.s64 %rd966, %rd3, %rd965;
ld.global.u32 %r6523, [%rd966];
abs.s32 %r6524, %r6523;
setp.gt.u32 %p1402, %r6524, 4;
and.b32 %r6525, %r6524, 1;
setp.eq.b32 %p1403, %r6525, 1;
and.pred %p1404, %p1402, %p1403;
selp.b32 %r6526, 8, 0, %p1404;
or.b32 %r9633, %r6526, %r9633;
$L__BB1_1073:
@%p1269 bra $L__BB1_1082;
setp.le.u32 %p1406, %r6, %r9569;
@%p1406 bra $L__BB1_1076;
add.s32 %r6527, %r2036, %r2089;
cvt.u64.u32 %rd967, %r6527;
add.s64 %rd968, %rd967, %rd5;
shl.b64 %rd969, %rd968, 2;
add.s64 %rd970, %rd3, %rd969;
ld.global.u32 %r6528, [%rd970];
abs.s32 %r6529, %r6528;
setp.gt.u32 %p1407, %r6529, 4;
and.b32 %r6530, %r6529, 1;
setp.eq.b32 %p1408, %r6530, 1;
and.pred %p1409, %p1407, %p1408;
selp.b32 %r6531, 16, 0, %p1409;
or.b32 %r9633, %r6531, %r9633;
$L__BB1_1076:
setp.ge.u32 %p1410, %r2033, %r6;
@%p1410 bra $L__BB1_1078;
add.s32 %r6532, %r2039, %r2089;
cvt.u64.u32 %rd971, %r6532;
add.s64 %rd972, %rd971, %rd5;
shl.b64 %rd973, %rd972, 2;
add.s64 %rd974, %rd3, %rd973;
ld.global.u32 %r6533, [%rd974];
abs.s32 %r6534, %r6533;
setp.gt.u32 %p1411, %r6534, 4;
and.b32 %r6535, %r6534, 1;
setp.eq.b32 %p1412, %r6535, 1;
and.pred %p1413, %p1411, %p1412;
selp.b32 %r6536, 32, 0, %p1413;
or.b32 %r9633, %r6536, %r9633;
$L__BB1_1078:
setp.ge.u32 %p1414, %r2034, %r6;
@%p1414 bra $L__BB1_1080;
add.s32 %r6537, %r2038, %r2089;
cvt.u64.u32 %rd975, %r6537;
add.s64 %rd976, %rd975, %rd5;
shl.b64 %rd977, %rd976, 2;
add.s64 %rd978, %rd3, %rd977;
ld.global.u32 %r6538, [%rd978];
abs.s32 %r6539, %r6538;
setp.gt.u32 %p1415, %r6539, 4;
and.b32 %r6540, %r6539, 1;
setp.eq.b32 %p1416, %r6540, 1;
and.pred %p1417, %p1415, %p1416;
selp.b32 %r6541, 64, 0, %p1417;
or.b32 %r9633, %r6541, %r9633;
$L__BB1_1080:
setp.ge.u32 %p1418, %r2035, %r6;
@%p1418 bra $L__BB1_1082;
add.s32 %r6542, %r2037, %r2089;
cvt.u64.u32 %rd979, %r6542;
add.s64 %rd980, %rd979, %rd5;
shl.b64 %rd981, %rd980, 2;
add.s64 %rd982, %rd3, %rd981;
ld.global.u32 %r6543, [%rd982];
abs.s32 %r6544, %r6543;
setp.gt.u32 %p1419, %r6544, 4;
and.b32 %r6545, %r6544, 1;
setp.eq.b32 %p1420, %r6545, 1;
and.pred %p1421, %p1419, %p1420;
selp.b32 %r6546, 128, 0, %p1421;
or.b32 %r9633, %r6546, %r9633;
$L__BB1_1082:
@%p1286 bra $L__BB1_1091;
setp.le.u32 %p1423, %r6, %r9569;
@%p1423 bra $L__BB1_1085;
add.s32 %r6547, %r2036, %r2098;
cvt.u64.u32 %rd983, %r6547;
add.s64 %rd984, %rd983, %rd5;
shl.b64 %rd985, %rd984, 2;
add.s64 %rd986, %rd3, %rd985;
ld.global.u32 %r6548, [%rd986];
abs.s32 %r6549, %r6548;
setp.gt.u32 %p1424, %r6549, 4;
and.b32 %r6550, %r6549, 1;
setp.eq.b32 %p1425, %r6550, 1;
and.pred %p1426, %p1424, %p1425;
selp.b32 %r6551, 256, 0, %p1426;
or.b32 %r9633, %r6551, %r9633;
$L__BB1_1085:
setp.ge.u32 %p1427, %r2033, %r6;
@%p1427 bra $L__BB1_1087;
add.s32 %r6552, %r2039, %r2098;
cvt.u64.u32 %rd987, %r6552;
add.s64 %rd988, %rd987, %rd5;
shl.b64 %rd989, %rd988, 2;
add.s64 %rd990, %rd3, %rd989;
ld.global.u32 %r6553, [%rd990];
abs.s32 %r6554, %r6553;
setp.gt.u32 %p1428, %r6554, 4;
and.b32 %r6555, %r6554, 1;
setp.eq.b32 %p1429, %r6555, 1;
and.pred %p1430, %p1428, %p1429;
selp.b32 %r6556, 512, 0, %p1430;
or.b32 %r9633, %r6556, %r9633;
$L__BB1_1087:
setp.ge.u32 %p1431, %r2034, %r6;
@%p1431 bra $L__BB1_1089;
add.s32 %r6557, %r2038, %r2098;
cvt.u64.u32 %rd991, %r6557;
add.s64 %rd992, %rd991, %rd5;
shl.b64 %rd993, %rd992, 2;
add.s64 %rd994, %rd3, %rd993;
ld.global.u32 %r6558, [%rd994];
abs.s32 %r6559, %r6558;
setp.gt.u32 %p1432, %r6559, 4;
and.b32 %r6560, %r6559, 1;
setp.eq.b32 %p1433, %r6560, 1;
and.pred %p1434, %p1432, %p1433;
selp.b32 %r6561, 1024, 0, %p1434;
or.b32 %r9633, %r6561, %r9633;
$L__BB1_1089:
setp.ge.u32 %p1435, %r2035, %r6;
@%p1435 bra $L__BB1_1091;
add.s32 %r6562, %r2037, %r2098;
cvt.u64.u32 %rd995, %r6562;
add.s64 %rd996, %rd995, %rd5;
shl.b64 %rd997, %rd996, 2;
add.s64 %rd998, %rd3, %rd997;
ld.global.u32 %r6563, [%rd998];
abs.s32 %r6564, %r6563;
setp.gt.u32 %p1436, %r6564, 4;
and.b32 %r6565, %r6564, 1;
setp.eq.b32 %p1437, %r6565, 1;
and.pred %p1438, %p1436, %p1437;
selp.b32 %r6566, 2048, 0, %p1438;
or.b32 %r9633, %r6566, %r9633;
$L__BB1_1091:
@%p1303 bra $L__BB1_1100;
setp.le.u32 %p1440, %r6, %r9569;
@%p1440 bra $L__BB1_1094;
add.s32 %r6567, %r2036, %r2107;
cvt.u64.u32 %rd999, %r6567;
add.s64 %rd1000, %rd999, %rd5;
shl.b64 %rd1001, %rd1000, 2;
add.s64 %rd1002, %rd3, %rd1001;
ld.global.u32 %r6568, [%rd1002];
abs.s32 %r6569, %r6568;
setp.gt.u32 %p1441, %r6569, 4;
and.b32 %r6570, %r6569, 1;
setp.eq.b32 %p1442, %r6570, 1;
and.pred %p1443, %p1441, %p1442;
selp.b32 %r6571, 4096, 0, %p1443;
or.b32 %r9633, %r6571, %r9633;
$L__BB1_1094:
setp.ge.u32 %p1444, %r2033, %r6;
@%p1444 bra $L__BB1_1096;
add.s32 %r6572, %r2039, %r2107;
cvt.u64.u32 %rd1003, %r6572;
add.s64 %rd1004, %rd1003, %rd5;
shl.b64 %rd1005, %rd1004, 2;
add.s64 %rd1006, %rd3, %rd1005;
ld.global.u32 %r6573, [%rd1006];
abs.s32 %r6574, %r6573;
setp.gt.u32 %p1445, %r6574, 4;
and.b32 %r6575, %r6574, 1;
setp.eq.b32 %p1446, %r6575, 1;
and.pred %p1447, %p1445, %p1446;
selp.b32 %r6576, 8192, 0, %p1447;
or.b32 %r9633, %r6576, %r9633;
$L__BB1_1096:
setp.ge.u32 %p1448, %r2034, %r6;
@%p1448 bra $L__BB1_1098;
add.s32 %r6577, %r2038, %r2107;
cvt.u64.u32 %rd1007, %r6577;
add.s64 %rd1008, %rd1007, %rd5;
shl.b64 %rd1009, %rd1008, 2;
add.s64 %rd1010, %rd3, %rd1009;
ld.global.u32 %r6578, [%rd1010];
abs.s32 %r6579, %r6578;
setp.gt.u32 %p1449, %r6579, 4;
and.b32 %r6580, %r6579, 1;
setp.eq.b32 %p1450, %r6580, 1;
and.pred %p1451, %p1449, %p1450;
selp.b32 %r6581, 16384, 0, %p1451;
or.b32 %r9633, %r6581, %r9633;
$L__BB1_1098:
setp.ge.u32 %p1452, %r2035, %r6;
@%p1452 bra $L__BB1_1100;
add.s32 %r6582, %r2037, %r2107;
cvt.u64.u32 %rd1011, %r6582;
add.s64 %rd1012, %rd1011, %rd5;
shl.b64 %rd1013, %rd1012, 2;
add.s64 %rd1014, %rd3, %rd1013;
ld.global.u32 %r6583, [%rd1014];
abs.s32 %r6584, %r6583;
setp.gt.u32 %p1453, %r6584, 4;
and.b32 %r6585, %r6584, 1;
setp.eq.b32 %p1454, %r6585, 1;
and.pred %p1455, %p1453, %p1454;
selp.b32 %r6586, 32768, 0, %p1455;
or.b32 %r9633, %r6586, %r9633;
$L__BB1_1100:
sub.s32 %r6589, %r6334, %r5;
shl.b32 %r6590, %r9633, 16;
or.b32 %r2182, %r6590, %r9617;
and.b32 %r6591, %r2116, -2004318072;
shr.u32 %r6592, %r6591, 3;
shl.b32 %r6593, %r2125, 3;
and.b32 %r6594, %r6593, -2004318072;
or.b32 %r2183, %r6594, %r6592;
not.b32 %r6595, %r2182;
setp.gt.s32 %p1456, %r6589, 0;
mov.u32 %r9649, 0;
shl.b32 %r6596, %r6589, 2;
selp.b32 %r6597, %r6596, 0, %p1456;
shr.u32 %r2184, %r2040, %r6597;
and.b32 %r2185, %r2184, %r6595;
@%p1184 bra $L__BB1_1109;
setp.le.u32 %p1458, %r6, %r9569;
mov.u32 %r9649, 0;
@%p1458 bra $L__BB1_1103;
ld.global.u32 %r6599, [%rd45];
abs.s32 %r6600, %r6599;
setp.eq.s32 %p1459, %r6600, 3;
selp.u32 %r9649, 1, 0, %p1459;
$L__BB1_1103:
setp.ge.u32 %p1460, %r2033, %r6;
@%p1460 bra $L__BB1_1105;
ld.global.u32 %r6601, [%rd46];
abs.s32 %r6602, %r6601;
setp.eq.s32 %p1461, %r6602, 3;
selp.b32 %r6603, 2, 0, %p1461;
or.b32 %r9649, %r6603, %r9649;
$L__BB1_1105:
setp.ge.u32 %p1462, %r2034, %r6;
@%p1462 bra $L__BB1_1107;
ld.global.u32 %r6604, [%rd47];
abs.s32 %r6605, %r6604;
setp.eq.s32 %p1463, %r6605, 3;
selp.b32 %r6606, 4, 0, %p1463;
or.b32 %r9649, %r6606, %r9649;
$L__BB1_1107:
setp.ge.u32 %p1464, %r2035, %r6;
@%p1464 bra $L__BB1_1109;
ld.global.u32 %r6607, [%rd48];
abs.s32 %r6608, %r6607;
setp.eq.s32 %p1465, %r6608, 3;
selp.b32 %r6609, 8, 0, %p1465;
or.b32 %r9649, %r6609, %r9649;
$L__BB1_1109:
@%p1201 bra $L__BB1_1118;
setp.le.u32 %p1467, %r6, %r9569;
@%p1467 bra $L__BB1_1112;
ld.global.u32 %r6610, [%rd49];
abs.s32 %r6611, %r6610;
setp.eq.s32 %p1468, %r6611, 3;
selp.b32 %r6612, 16, 0, %p1468;
or.b32 %r9649, %r6612, %r9649;
$L__BB1_1112:
setp.ge.u32 %p1469, %r2033, %r6;
@%p1469 bra $L__BB1_1114;
ld.global.u32 %r6613, [%rd50];
abs.s32 %r6614, %r6613;
setp.eq.s32 %p1470, %r6614, 3;
selp.b32 %r6615, 32, 0, %p1470;
or.b32 %r9649, %r6615, %r9649;
$L__BB1_1114:
setp.ge.u32 %p1471, %r2034, %r6;
@%p1471 bra $L__BB1_1116;
ld.global.u32 %r6616, [%rd51];
abs.s32 %r6617, %r6616;
setp.eq.s32 %p1472, %r6617, 3;
selp.b32 %r6618, 64, 0, %p1472;
or.b32 %r9649, %r6618, %r9649;
$L__BB1_1116:
setp.ge.u32 %p1473, %r2035, %r6;
@%p1473 bra $L__BB1_1118;
ld.global.u32 %r6619, [%rd52];
abs.s32 %r6620, %r6619;
setp.eq.s32 %p1474, %r6620, 3;
selp.b32 %r6621, 128, 0, %p1474;
or.b32 %r9649, %r6621, %r9649;
$L__BB1_1118:
@%p1218 bra $L__BB1_1127;
setp.le.u32 %p1476, %r6, %r9569;
@%p1476 bra $L__BB1_1121;
ld.global.u32 %r6622, [%rd53];
abs.s32 %r6623, %r6622;
setp.eq.s32 %p1477, %r6623, 3;
selp.b32 %r6624, 256, 0, %p1477;
or.b32 %r9649, %r6624, %r9649;
$L__BB1_1121:
setp.ge.u32 %p1478, %r2033, %r6;
@%p1478 bra $L__BB1_1123;
ld.global.u32 %r6625, [%rd54];
abs.s32 %r6626, %r6625;
setp.eq.s32 %p1479, %r6626, 3;
selp.b32 %r6627, 512, 0, %p1479;
or.b32 %r9649, %r6627, %r9649;
$L__BB1_1123:
setp.ge.u32 %p1480, %r2034, %r6;
@%p1480 bra $L__BB1_1125;
ld.global.u32 %r6628, [%rd55];
abs.s32 %r6629, %r6628;
setp.eq.s32 %p1481, %r6629, 3;
selp.b32 %r6630, 1024, 0, %p1481;
or.b32 %r9649, %r6630, %r9649;
$L__BB1_1125:
setp.ge.u32 %p1482, %r2035, %r6;
@%p1482 bra $L__BB1_1127;
ld.global.u32 %r6631, [%rd56];
abs.s32 %r6632, %r6631;
setp.eq.s32 %p1483, %r6632, 3;
selp.b32 %r6633, 2048, 0, %p1483;
or.b32 %r9649, %r6633, %r9649;
$L__BB1_1127:
@%p1235 bra $L__BB1_1136;
setp.le.u32 %p1485, %r6, %r9569;
@%p1485 bra $L__BB1_1130;
ld.global.u32 %r6634, [%rd57];
abs.s32 %r6635, %r6634;
setp.eq.s32 %p1486, %r6635, 3;
selp.b32 %r6636, 4096, 0, %p1486;
or.b32 %r9649, %r6636, %r9649;
$L__BB1_1130:
setp.ge.u32 %p1487, %r2033, %r6;
@%p1487 bra $L__BB1_1132;
ld.global.u32 %r6637, [%rd58];
abs.s32 %r6638, %r6637;
setp.eq.s32 %p1488, %r6638, 3;
selp.b32 %r6639, 8192, 0, %p1488;
or.b32 %r9649, %r6639, %r9649;
$L__BB1_1132:
setp.ge.u32 %p1489, %r2034, %r6;
@%p1489 bra $L__BB1_1134;
ld.global.u32 %r6640, [%rd59];
abs.s32 %r6641, %r6640;
setp.eq.s32 %p1490, %r6641, 3;
selp.b32 %r6642, 16384, 0, %p1490;
or.b32 %r9649, %r6642, %r9649;
$L__BB1_1134:
setp.ge.u32 %p1491, %r2035, %r6;
@%p1491 bra $L__BB1_1136;
ld.global.u32 %r6643, [%rd60];
abs.s32 %r6644, %r6643;
setp.eq.s32 %p1492, %r6644, 3;
selp.b32 %r6645, 32768, 0, %p1492;
or.b32 %r9649, %r6645, %r9649;
$L__BB1_1136:
and.b32 %r6647, %r2182, -286331154;
shr.u32 %r6648, %r6647, 1;
shl.b32 %r6649, %r2182, 1;
and.b32 %r6650, %r6649, -286331154;
or.b32 %r6651, %r2182, %r2183;
or.b32 %r6652, %r6651, %r6650;
or.b32 %r6653, %r6652, %r6648;
and.b32 %r2218, %r9649, %r2184;
shr.u32 %r6654, %r6653, 4;
shl.b32 %r6655, %r6653, 4;
shr.u32 %r6656, %r9574, 12;
or.b32 %r6657, %r6653, %r6656;
or.b32 %r6658, %r6657, %r6655;
or.b32 %r6659, %r6658, %r6654;
and.b32 %r9659, %r2185, %r6659;
setp.eq.s32 %p1493, %r9659, 0;
mov.u32 %r9680, 0;
@%p1493 bra $L__BB1_1195;
mov.u32 %r9658, 0;
mov.u32 %r9660, %r9658;
$L__BB1_1138:
brev.b32 %r6662, %r9659;
bfind.shiftamt.u32 %r2226, %r6662;
mov.pred %p2373, -1;
mov.u32 %r6663, 1;
shl.b32 %r2227, %r6663, %r2226;
mov.u32 %r6664, -2;
shf.l.wrap.b32 %r6665, %r6664, %r6664, %r2226;
and.b32 %r9659, %r9659, %r6665;
or.b32 %r9658, %r2227, %r9658;
and.b32 %r2230, %r2227, %r2218;
setp.ne.s32 %p1495, %r2230, 0;
selp.u32 %r6666, 1, 0, %p1495;
setp.eq.s32 %p1496, %r9677, 0;
selp.b32 %r6667, 8, 7, %p1496;
shl.b32 %r6668, %r6666, %r9675;
cvt.u16.u32 %rs796, %r6668;
or.b16 %rs1232, %rs1232, %rs796;
add.s32 %r9675, %r9675, 1;
setp.lt.u32 %p1497, %r9675, %r6667;
mov.pred %p2371, %p2373;
@%p1497 bra $L__BB1_1143;
setp.ge.u32 %p1499, %r9679, %r9551;
mov.pred %p2371, 0;
@%p1499 bra $L__BB1_1143;
setp.eq.s64 %p1500, %rd43, 0;
@%p1500 bra $L__BB1_1142;
cvt.u64.u32 %rd1015, %r9679;
add.s64 %rd1016, %rd42, %rd1015;
add.s64 %rd1017, %rd1, %rd1016;
st.global.u8 [%rd1017], %rs1232;
$L__BB1_1142:
and.b16 %rs798, %rs1232, 255;
setp.eq.s16 %p1502, %rs798, 255;
selp.u32 %r9677, 1, 0, %p1502;
add.s32 %r9679, %r9679, 1;
mov.u32 %r9675, 0;
mov.u16 %rs1232, 0;
mov.pred %p2371, %p2373;
$L__BB1_1143:
not.pred %p1504, %p2371;
@%p1504 bra $L__BB1_1202;
setp.eq.s32 %p1505, %r2230, 0;
@%p1505 bra $L__BB1_1185;
or.b32 %r9660, %r2227, %r9660;
mov.u32 %r9667, 51;
setp.gt.s32 %p1506, %r2226, 7;
@%p1506 bra $L__BB1_1161;
setp.gt.s32 %p1518, %r2226, 3;
@%p1518 bra $L__BB1_1154;
setp.gt.s32 %p1524, %r2226, 1;
@%p1524 bra $L__BB1_1151;
setp.eq.s32 %p1527, %r2226, 0;
@%p1527 bra $L__BB1_1184;
setp.eq.s32 %p1528, %r2226, 1;
@%p1528 bra $L__BB1_1150;
bra.uni $L__BB1_1183;
$L__BB1_1150:
mov.u32 %r9667, 118;
bra.uni $L__BB1_1184;
$L__BB1_1161:
setp.gt.s32 %p1507, %r2226, 11;
@%p1507 bra $L__BB1_1169;
setp.gt.s32 %p1513, %r2226, 9;
@%p1513 bra $L__BB1_1166;
setp.eq.s32 %p1516, %r2226, 8;
@%p1516 bra $L__BB1_1179;
setp.eq.s32 %p1517, %r2226, 9;
@%p1517 bra $L__BB1_1165;
bra.uni $L__BB1_1183;
$L__BB1_1165:
mov.u32 %r9667, 30208;
bra.uni $L__BB1_1184;
$L__BB1_1154:
setp.gt.s32 %p1519, %r2226, 5;
@%p1519 bra $L__BB1_1158;
setp.eq.s32 %p1522, %r2226, 4;
@%p1522 bra $L__BB1_1181;
setp.eq.s32 %p1523, %r2226, 5;
@%p1523 bra $L__BB1_1157;
bra.uni $L__BB1_1183;
$L__BB1_1157:
mov.u32 %r9667, 1888;
bra.uni $L__BB1_1184;
$L__BB1_1169:
setp.gt.s32 %p1508, %r2226, 13;
@%p1508 bra $L__BB1_1173;
setp.eq.s32 %p1511, %r2226, 12;
@%p1511 bra $L__BB1_1177;
setp.eq.s32 %p1512, %r2226, 13;
@%p1512 bra $L__BB1_1172;
bra.uni $L__BB1_1183;
$L__BB1_1172:
mov.u32 %r9667, 483328;
bra.uni $L__BB1_1184;
$L__BB1_1151:
setp.eq.s32 %p1525, %r2226, 2;
@%p1525 bra $L__BB1_1182;
setp.eq.s32 %p1526, %r2226, 3;
@%p1526 bra $L__BB1_1153;
bra.uni $L__BB1_1183;
$L__BB1_1153:
mov.u32 %r9667, 200;
bra.uni $L__BB1_1184;
$L__BB1_1166:
setp.eq.s32 %p1514, %r2226, 10;
@%p1514 bra $L__BB1_1178;
setp.eq.s32 %p1515, %r2226, 11;
@%p1515 bra $L__BB1_1168;
bra.uni $L__BB1_1183;
$L__BB1_1168:
mov.u32 %r9667, 51200;
bra.uni $L__BB1_1184;
$L__BB1_1158:
setp.eq.s32 %p1520, %r2226, 6;
@%p1520 bra $L__BB1_1180;
setp.eq.s32 %p1521, %r2226, 7;
@%p1521 bra $L__BB1_1160;
bra.uni $L__BB1_1183;
$L__BB1_1160:
mov.u32 %r9667, 3200;
bra.uni $L__BB1_1184;
$L__BB1_1173:
setp.eq.s32 %p1509, %r2226, 14;
@%p1509 bra $L__BB1_1176;
setp.ne.s32 %p1510, %r2226, 15;
@%p1510 bra $L__BB1_1183;
mov.u32 %r9667, 819200;
bra.uni $L__BB1_1184;
$L__BB1_1179:
mov.u32 %r9667, 13056;
bra.uni $L__BB1_1184;
$L__BB1_1181:
mov.u32 %r9667, 816;
bra.uni $L__BB1_1184;
$L__BB1_1177:
mov.u32 %r9667, 208896;
bra.uni $L__BB1_1184;
$L__BB1_1182:
mov.u32 %r9667, 236;
bra.uni $L__BB1_1184;
$L__BB1_1178:
mov.u32 %r9667, 60416;
bra.uni $L__BB1_1184;
$L__BB1_1180:
mov.u32 %r9667, 3776;
bra.uni $L__BB1_1184;
$L__BB1_1176:
mov.u32 %r9667, 966656;
bra.uni $L__BB1_1184;
$L__BB1_1183:
mov.u32 %r9667, 0;
$L__BB1_1184:
not.b32 %r6687, %r9658;
and.b32 %r6688, %r2185, %r6687;
and.b32 %r6689, %r6688, %r9667;
or.b32 %r9659, %r6689, %r9659;
$L__BB1_1185:
setp.ne.s32 %p1529, %r9659, 0;
@%p1529 bra $L__BB1_1138;
setp.eq.s32 %p1530, %r9660, 0;
mov.u32 %r9680, 0;
@%p1530 bra $L__BB1_1195;
mov.u32 %r9673, %r9660;
$L__BB1_1188:
setp.eq.s32 %p1531, %r9673, 0;
mov.u32 %r9680, %r9660;
@%p1531 bra $L__BB1_1195;
brev.b32 %r6691, %r9673;
bfind.shiftamt.u32 %r6692, %r6691;
mov.pred %p2373, -1;
mov.u32 %r6693, -2;
shf.l.wrap.b32 %r6694, %r6693, %r6693, %r6692;
and.b32 %r9673, %r9673, %r6694;
shr.u32 %r6695, %r6692, 2;
and.b32 %r6696, %r6692, 3;
add.s32 %r6697, %r6696, %r9569;
add.s32 %r6698, %r6695, %r9573;
mad.lo.s32 %r6699, %r6697, %r1, %r6698;
cvt.u64.u32 %rd1018, %r6699;
add.s64 %rd1019, %rd1018, %rd5;
shl.b64 %rd1020, %rd1019, 2;
add.s64 %rd1021, %rd3, %rd1020;
ld.global.u32 %r6700, [%rd1021];
shr.u32 %r6701, %r6700, 31;
setp.eq.s32 %p1533, %r9677, 0;
selp.b32 %r6702, 8, 7, %p1533;
shl.b32 %r6703, %r6701, %r9675;
cvt.u16.u32 %rs799, %r6703;
or.b16 %rs1232, %rs1232, %rs799;
add.s32 %r9675, %r9675, 1;
setp.lt.u32 %p1534, %r9675, %r6702;
mov.pred %p2372, %p2373;
@%p1534 bra $L__BB1_1194;
setp.ge.u32 %p1536, %r9679, %r9551;
mov.pred %p2372, 0;
@%p1536 bra $L__BB1_1194;
setp.eq.s64 %p1537, %rd43, 0;
@%p1537 bra $L__BB1_1193;
cvt.u64.u32 %rd1022, %r9679;
add.s64 %rd1023, %rd42, %rd1022;
add.s64 %rd1024, %rd1, %rd1023;
st.global.u8 [%rd1024], %rs1232;
$L__BB1_1193:
and.b16 %rs801, %rs1232, 255;
setp.eq.s16 %p1539, %rs801, 255;
selp.u32 %r9677, 1, 0, %p1539;
add.s32 %r9679, %r9679, 1;
mov.u32 %r9675, 0;
mov.u16 %rs1232, 0;
mov.pred %p2372, %p2373;
$L__BB1_1194:
@%p2372 bra $L__BB1_1188;
bra.uni $L__BB1_1202;
$L__BB1_1195:
not.b32 %r6705, %r9680;
and.b32 %r6706, %r2218, %r6705;
setp.ne.s32 %p1542, %r6706, 0;
mov.pred %p2373, %p1178;
@%p1542 bra $L__BB1_1202;
setp.lt.u32 %p1543, %r6334, %r5;
or.b32 %r6707, %r9680, %r2182;
st.local.u16 [%rd44], %r6707;
shr.u32 %r6708, %r6707, 16;
st.local.u16 [%rd44+2], %r6708;
shl.b32 %r6709, %r6707, 1;
and.b32 %r6710, %r6709, 57344;
and.b32 %r6711, %r6707, 57344;
shr.u32 %r6712, %r6711, 1;
or.b32 %r6713, %r6707, %r2183;
and.b32 %r6714, %r6713, 61440;
or.b32 %r6715, %r6714, %r6710;
or.b32 %r9574, %r6715, %r6712;
mov.u32 %r9573, %r6334;
@%p1543 bra $L__BB1_956;
$L__BB1_1197:
add.s32 %r9569, %r9569, 4;
setp.gt.u32 %p1544, %r6, %r9569;
@%p1544 bra $L__BB1_954;
setp.eq.s32 %p1546, %r9675, 0;
mov.pred %p1545, 0;
mov.pred %p2373, %p1545;
@%p1546 bra $L__BB1_1202;
setp.ge.u32 %p1548, %r9679, %r9551;
mov.pred %p2373, %p1178;
@%p1548 bra $L__BB1_1202;
setp.eq.s64 %p1550, %rd43, 0;
mov.pred %p2373, %p1545;
@%p1550 bra $L__BB1_1202;
cvt.u64.u32 %rd1025, %r9679;
add.s64 %rd1026, %rd42, %rd1025;
add.s64 %rd1027, %rd1, %rd1026;
st.global.u8 [%rd1027], %rs1232;
mov.pred %p2373, %p1545;
$L__BB1_1202:
@%p2373 bra $L__BB1_1246;
bra.uni $L__BB1_1203;
$L__BB1_1246:
mov.u32 %r6769, 2;
st.global.u32 [%rd6], %r6769;
mov.u32 %r6770, 6;
st.global.u32 [%rd6+4], %r6770;
mov.u32 %r6771, 0;
st.global.u32 [%rd6+8], %r6771;
st.global.u32 [%rd6+12], %r6771;
st.global.u32 [%rd6+16], %r6771;
st.global.u32 [%rd6+20], %r6771;
st.global.u32 [%rd6+24], %r6771;
st.global.u32 [%rd6+28], %r6771;
bra.uni $L__BB1_1905;
$L__BB1_1203:
cvt.u64.u32 %rd1028, %r9551;
add.s64 %rd61, %rd42, %rd1028;
setp.eq.s32 %p1552, %r9552, 0;
@%p1552 bra $L__BB1_1244;
add.s32 %r6717, %r9552, -1;
and.b32 %r9688, %r9552, 3;
setp.lt.u32 %p1553, %r6717, 3;
mov.u32 %r9686, 0;
@%p1553 bra $L__BB1_1207;
sub.s32 %r9685, %r9552, %r9688;
mov.u32 %r9686, 0;
$L__BB1_1206:
cvt.u64.u32 %rd1029, %r9686;
add.s64 %rd1030, %rd61, %rd1029;
add.s64 %rd1031, %rd1, %rd1030;
mov.u16 %rs802, 0;
st.global.u8 [%rd1031], %rs802;
st.global.u8 [%rd1031+1], %rs802;
st.global.u8 [%rd1031+2], %rs802;
st.global.u8 [%rd1031+3], %rs802;
add.s32 %r9686, %r9686, 4;
add.s32 %r9685, %r9685, -4;
setp.ne.s32 %p1554, %r9685, 0;
@%p1554 bra $L__BB1_1206;
$L__BB1_1207:
setp.eq.s32 %p1555, %r9688, 0;
@%p1555 bra $L__BB1_1209;
$L__BB1_1208:
.pragma "nounroll";
cvt.u64.u32 %rd1032, %r9686;
add.s64 %rd1033, %rd61, %rd1032;
add.s64 %rd1034, %rd1, %rd1033;
mov.u16 %rs803, 0;
st.global.u8 [%rd1034], %rs803;
add.s32 %r9686, %r9686, 1;
add.s32 %r9688, %r9688, -1;
setp.ne.s32 %p1556, %r9688, 0;
@%p1556 bra $L__BB1_1208;
$L__BB1_1209:
@%p10 bra $L__BB1_1237;
mov.u32 %r6723, 0;
mov.u32 %r9715, 1;
mov.u16 %rs1239, 0;
mov.u32 %r9689, %r6723;
mov.u32 %r9714, %r6723;
mov.u32 %r9713, %r6723;
mov.u32 %r9712, %r6723;
$L__BB1_1211:
mul.lo.s32 %r2279, %r9689, %r1;
add.s32 %r2280, %r9689, 3;
mul.lo.s32 %r2281, %r1, %r2280;
add.s32 %r2282, %r9689, 2;
mul.lo.s32 %r2283, %r1, %r2282;
add.s32 %r2284, %r9689, 1;
mul.lo.s32 %r2285, %r1, %r2284;
mov.u32 %r9694, %r6723;
$L__BB1_1212:
add.s32 %r9702, %r2281, %r9694;
add.s32 %r9701, %r2283, %r9694;
add.s32 %r9700, %r2285, %r9694;
add.s32 %r9699, %r2279, %r9694;
mov.u32 %r9703, 0;
$L__BB1_1213:
add.s32 %r6726, %r9694, %r9703;
setp.ge.u32 %p1558, %r6726, %r5;
@%p1558 bra $L__BB1_1234;
setp.ge.u32 %p1559, %r9689, %r6;
@%p1559 bra $L__BB1_1219;
cvt.u64.u32 %rd1035, %r9699;
add.s64 %rd1036, %rd1035, %rd5;
shl.b64 %rd1037, %rd1036, 2;
add.s64 %rd1038, %rd3, %rd1037;
ld.global.u32 %r6727, [%rd1038];
abs.s32 %r2304, %r6727;
setp.lt.u32 %p1560, %r2304, 5;
and.b32 %r6728, %r2304, 1;
setp.eq.b32 %p1561, %r6728, 1;
not.pred %p1562, %p1561;
or.pred %p1563, %p1560, %p1562;
@%p1563 bra $L__BB1_1219;
shr.u32 %r6729, %r2304, 1;
and.b32 %r6730, %r6729, 1;
shl.b32 %r6731, %r6730, %r9712;
cvt.u16.u32 %rs805, %r6731;
or.b16 %rs1239, %rs1239, %rs805;
add.s32 %r9714, %r9714, 1;
add.s32 %r9712, %r9712, 1;
setp.ne.s32 %p1564, %r9712, 7;
setp.eq.s32 %p1565, %r9715, 0;
or.pred %p1566, %p1564, %p1565;
and.b16 %rs806, %rs1239, 127;
setp.ne.s16 %p1567, %rs806, 127;
or.pred %p1568, %p1566, %p1567;
setp.ne.s32 %p1569, %r9712, 8;
and.pred %p1570, %p1569, %p1568;
@%p1570 bra $L__BB1_1219;
setp.ge.u32 %p1571, %r9713, %r9552;
@%p1571 bra $L__BB1_1245;
not.b32 %r6733, %r9713;
add.s32 %r6734, %r9552, %r6733;
cvt.u64.u32 %rd1039, %r6734;
add.s64 %rd1040, %rd61, %rd1039;
add.s64 %rd1041, %rd1, %rd1040;
and.b16 %rs808, %rs1239, 255;
st.global.u8 [%rd1041], %rs1239;
add.s32 %r9713, %r9713, 1;
setp.gt.u16 %p1572, %rs808, 143;
selp.u32 %r9715, 1, 0, %p1572;
mov.u16 %rs1239, 0;
mov.u32 %r9712, 0;
$L__BB1_1219:
setp.ge.u32 %p1573, %r2284, %r6;
@%p1573 bra $L__BB1_1224;
cvt.u64.u32 %rd1042, %r9700;
add.s64 %rd1043, %rd1042, %rd5;
shl.b64 %rd1044, %rd1043, 2;
add.s64 %rd1045, %rd3, %rd1044;
ld.global.u32 %r6735, [%rd1045];
abs.s32 %r2313, %r6735;
setp.lt.u32 %p1574, %r2313, 5;
and.b32 %r6736, %r2313, 1;
setp.eq.b32 %p1575, %r6736, 1;
not.pred %p1576, %p1575;
or.pred %p1577, %p1574, %p1576;
@%p1577 bra $L__BB1_1224;
shr.u32 %r6737, %r2313, 1;
and.b32 %r6738, %r6737, 1;
shl.b32 %r6739, %r6738, %r9712;
cvt.u16.u32 %rs809, %r6739;
or.b16 %rs1239, %rs1239, %rs809;
add.s32 %r9714, %r9714, 1;
add.s32 %r9712, %r9712, 1;
setp.ne.s32 %p1578, %r9712, 7;
setp.eq.s32 %p1579, %r9715, 0;
or.pred %p1580, %p1578, %p1579;
and.b16 %rs810, %rs1239, 127;
setp.ne.s16 %p1581, %rs810, 127;
or.pred %p1582, %p1580, %p1581;
setp.ne.s32 %p1583, %r9712, 8;
and.pred %p1584, %p1583, %p1582;
@%p1584 bra $L__BB1_1224;
setp.ge.u32 %p1585, %r9713, %r9552;
@%p1585 bra $L__BB1_1245;
not.b32 %r6741, %r9713;
add.s32 %r6742, %r9552, %r6741;
cvt.u64.u32 %rd1046, %r6742;
add.s64 %rd1047, %rd61, %rd1046;
add.s64 %rd1048, %rd1, %rd1047;
and.b16 %rs812, %rs1239, 255;
st.global.u8 [%rd1048], %rs1239;
add.s32 %r9713, %r9713, 1;
setp.gt.u16 %p1586, %rs812, 143;
selp.u32 %r9715, 1, 0, %p1586;
mov.u16 %rs1239, 0;
mov.u32 %r9712, 0;
$L__BB1_1224:
setp.ge.u32 %p1587, %r2282, %r6;
@%p1587 bra $L__BB1_1229;
cvt.u64.u32 %rd1049, %r9701;
add.s64 %rd1050, %rd1049, %rd5;
shl.b64 %rd1051, %rd1050, 2;
add.s64 %rd1052, %rd3, %rd1051;
ld.global.u32 %r6743, [%rd1052];
abs.s32 %r2322, %r6743;
setp.lt.u32 %p1588, %r2322, 5;
and.b32 %r6744, %r2322, 1;
setp.eq.b32 %p1589, %r6744, 1;
not.pred %p1590, %p1589;
or.pred %p1591, %p1588, %p1590;
@%p1591 bra $L__BB1_1229;
shr.u32 %r6745, %r2322, 1;
and.b32 %r6746, %r6745, 1;
shl.b32 %r6747, %r6746, %r9712;
cvt.u16.u32 %rs813, %r6747;
or.b16 %rs1239, %rs1239, %rs813;
add.s32 %r9714, %r9714, 1;
add.s32 %r9712, %r9712, 1;
setp.ne.s32 %p1592, %r9712, 7;
setp.eq.s32 %p1593, %r9715, 0;
or.pred %p1594, %p1592, %p1593;
and.b16 %rs814, %rs1239, 127;
setp.ne.s16 %p1595, %rs814, 127;
or.pred %p1596, %p1594, %p1595;
setp.ne.s32 %p1597, %r9712, 8;
and.pred %p1598, %p1597, %p1596;
@%p1598 bra $L__BB1_1229;
setp.ge.u32 %p1599, %r9713, %r9552;
@%p1599 bra $L__BB1_1245;
not.b32 %r6749, %r9713;
add.s32 %r6750, %r9552, %r6749;
cvt.u64.u32 %rd1053, %r6750;
add.s64 %rd1054, %rd61, %rd1053;
add.s64 %rd1055, %rd1, %rd1054;
and.b16 %rs816, %rs1239, 255;
st.global.u8 [%rd1055], %rs1239;
add.s32 %r9713, %r9713, 1;
setp.gt.u16 %p1600, %rs816, 143;
selp.u32 %r9715, 1, 0, %p1600;
mov.u16 %rs1239, 0;
mov.u32 %r9712, 0;
$L__BB1_1229:
setp.ge.u32 %p1601, %r2280, %r6;
@%p1601 bra $L__BB1_1234;
cvt.u64.u32 %rd1056, %r9702;
add.s64 %rd1057, %rd1056, %rd5;
shl.b64 %rd1058, %rd1057, 2;
add.s64 %rd1059, %rd3, %rd1058;
ld.global.u32 %r6751, [%rd1059];
abs.s32 %r2331, %r6751;
setp.lt.u32 %p1602, %r2331, 5;
and.b32 %r6752, %r2331, 1;
setp.eq.b32 %p1603, %r6752, 1;
not.pred %p1604, %p1603;
or.pred %p1605, %p1602, %p1604;
@%p1605 bra $L__BB1_1234;
shr.u32 %r6753, %r2331, 1;
and.b32 %r6754, %r6753, 1;
shl.b32 %r6755, %r6754, %r9712;
cvt.u16.u32 %rs817, %r6755;
or.b16 %rs1239, %rs1239, %rs817;
add.s32 %r9714, %r9714, 1;
add.s32 %r9712, %r9712, 1;
setp.ne.s32 %p1606, %r9712, 7;
setp.eq.s32 %p1607, %r9715, 0;
or.pred %p1608, %p1606, %p1607;
and.b16 %rs818, %rs1239, 127;
setp.ne.s16 %p1609, %rs818, 127;
or.pred %p1610, %p1608, %p1609;
setp.ne.s32 %p1611, %r9712, 8;
and.pred %p1612, %p1611, %p1610;
@%p1612 bra $L__BB1_1234;
setp.ge.u32 %p1613, %r9713, %r9552;
@%p1613 bra $L__BB1_1245;
not.b32 %r6757, %r9713;
add.s32 %r6758, %r9552, %r6757;
cvt.u64.u32 %rd1060, %r6758;
add.s64 %rd1061, %rd61, %rd1060;
add.s64 %rd1062, %rd1, %rd1061;
and.b16 %rs820, %rs1239, 255;
st.global.u8 [%rd1062], %rs1239;
add.s32 %r9713, %r9713, 1;
setp.gt.u16 %p1614, %rs820, 143;
selp.u32 %r9715, 1, 0, %p1614;
mov.u16 %rs1239, 0;
mov.u32 %r9712, 0;
$L__BB1_1234:
add.s32 %r9702, %r9702, 1;
add.s32 %r9701, %r9701, 1;
add.s32 %r9700, %r9700, 1;
add.s32 %r9699, %r9699, 1;
add.s32 %r9703, %r9703, 1;
setp.lt.u32 %p1615, %r9703, 8;
@%p1615 bra $L__BB1_1213;
add.s32 %r9694, %r9694, 8;
setp.lt.u32 %p1616, %r9694, %r5;
@%p1616 bra $L__BB1_1212;
add.s32 %r9689, %r9689, 4;
setp.lt.u32 %p1617, %r9689, %r6;
@%p1617 bra $L__BB1_1211;
bra.uni $L__BB1_1239;
$L__BB1_1244:
setp.eq.s32 %p1624, %r8423, 0;
@%p1624 bra $L__BB1_1243;
bra.uni $L__BB1_1245;
$L__BB1_1237:
mov.u32 %r9712, 0;
mov.u32 %r9724, %r9712;
$L__BB1_1238:
add.s32 %r9724, %r9724, 4;
setp.lt.u32 %p1618, %r9724, %r6;
mov.u16 %rs1239, 0;
mov.u32 %r9713, %r9712;
mov.u32 %r9714, %r9712;
@%p1618 bra $L__BB1_1238;
$L__BB1_1239:
setp.eq.s32 %p1619, %r9712, 0;
@%p1619 bra $L__BB1_1242;
setp.ge.u32 %p1620, %r9713, %r9552;
@%p1620 bra $L__BB1_1245;
not.b32 %r6763, %r9713;
add.s32 %r6764, %r9552, %r6763;
cvt.u64.u32 %rd1063, %r6764;
add.s64 %rd1064, %rd61, %rd1063;
add.s64 %rd1065, %rd1, %rd1064;
st.global.u8 [%rd1065], %rs1239;
add.s32 %r9713, %r9713, 1;
$L__BB1_1242:
setp.le.u32 %p1621, %r9713, %r9552;
setp.eq.s32 %p1622, %r9714, %r8423;
and.pred %p1623, %p1622, %p1621;
@%p1623 bra $L__BB1_1243;
bra.uni $L__BB1_1245;
$L__BB1_1243:
mov.u32 %r6768, 0;
st.global.u32 [%rd6], %r6768;
st.global.u32 [%rd6+4], %r6768;
st.global.u32 [%rd6+8], %r1986;
st.global.u32 [%rd6+12], %r4;
st.global.u32 [%rd6+16], %r42;
st.global.u32 [%rd6+20], %r1735;
st.global.u32 [%rd6+24], %r9553;
st.global.u32 [%rd6+28], %r6768;
bra.uni $L__BB1_1905;
$L__BB1_1245:
mov.u32 %r6765, 2;
st.global.u32 [%rd6], %r6765;
mov.u32 %r6766, 7;
st.global.u32 [%rd6+4], %r6766;
mov.u32 %r6767, 0;
st.global.u32 [%rd6+8], %r6767;
st.global.u32 [%rd6+12], %r6767;
st.global.u32 [%rd6+16], %r6767;
st.global.u32 [%rd6+20], %r6767;
st.global.u32 [%rd6+24], %r6767;
st.global.u32 [%rd6+28], %r6767;
bra.uni $L__BB1_1905;
}
// .globl j2k_htj2k_encode_codeblocks_multi_input
.visible .entry j2k_htj2k_encode_codeblocks_multi_input(
.param .u64 j2k_htj2k_encode_codeblocks_multi_input_param_0,
.param .u64 j2k_htj2k_encode_codeblocks_multi_input_param_1,
.param .u64 j2k_htj2k_encode_codeblocks_multi_input_param_2,
.param .u64 j2k_htj2k_encode_codeblocks_multi_input_param_3,
.param .u64 j2k_htj2k_encode_codeblocks_multi_input_param_4,
.param .u64 j2k_htj2k_encode_codeblocks_multi_input_param_5,
.param .u64 j2k_htj2k_encode_codeblocks_multi_input_param_6
)
.maxntid 128, 1, 1
{
.local .align 2 .b8 __local_depot2[1026];
.reg .b64 %SP;
.reg .b64 %SPL;
.reg .pred %p<2374>;
.reg .b16 %rs<1376>;
.reg .b32 %r<10754>;
.reg .b64 %rd<1420>;
// demoted variable
.shared .align 4 .b8 _ZZ44 j2k_htj2k_encode_codeblocks_multi_inputE9block_max[512];
// demoted variable
.shared .align 1 .b8 _ZZ44 j2k_htj2k_encode_codeblocks_multi_inputE13cleanup_e_val[513];
// demoted variable
.shared .align 1 .b8 _ZZ44 j2k_htj2k_encode_codeblocks_multi_inputE14cleanup_cx_val[513];
mov.u64 %SPL, __local_depot2;
ld.param.u64 %rd77, [ j2k_htj2k_encode_codeblocks_multi_input_param_0];
ld.param.u64 %rd83, [ j2k_htj2k_encode_codeblocks_multi_input_param_6];
cvta.to.global.u64 %rd1, %rd77;
mov.u32 %r4053, %ctaid.x;
cvt.u64.u32 %rd2, %r4053;
setp.ge.u64 %p9, %rd2, %rd83;
@%p9 bra $L__BB2_1905;
ld.param.u64 %rd1419, [ j2k_htj2k_encode_codeblocks_multi_input_param_1];
cvta.to.global.u64 %rd84, %rd1419;
mul.lo.s64 %rd85, %rd2, 40;
add.s64 %rd86, %rd84, %rd85;
ld.global.u64 %rd87, [%rd86];
cvta.to.global.u64 %rd3, %rd87;
ld.global.v2.u32 {%r4054, %r4055}, [%rd86+8];
ld.global.v2.u32 {%r4057, %r4058}, [%rd86+16];
ld.global.v2.u32 {%r4059, %r4060}, [%rd86+24];
ld.global.v2.u32 {%r4061, %r4062}, [%rd86+32];
cvt.u64.u32 %rd4, %r4054;
setp.eq.s32 %p10, %r4057, 0;
setp.eq.s32 %p11, %r4058, 0;
or.pred %p12, %p10, %p11;
@%p12 bra $L__BB2_14;
bra.uni $L__BB2_2;
$L__BB2_14:
mov.u32 %r8425, 0;
bra.uni $L__BB2_15;
$L__BB2_2:
mul.lo.s32 %r8, %r4058, %r4057;
setp.eq.s32 %p13, %r4055, %r4057;
@%p13 bra $L__BB2_6;
bra.uni $L__BB2_3;
$L__BB2_6:
mov.u32 %r8421, %tid.x;
setp.ge.u32 %p16, %r8421, %r8;
mov.u32 %r8423, 0;
@%p16 bra $L__BB2_9;
mov.u32 %r8423, 0;
$L__BB2_8:
cvt.u64.u32 %rd92, %r8421;
add.s64 %rd93, %rd92, %rd4;
shl.b64 %rd94, %rd93, 2;
add.s64 %rd95, %rd3, %rd94;
ld.global.u32 %r4074, [%rd95];
abs.s32 %r4075, %r4074;
max.u32 %r8423, %r8423, %r4075;
mov.u32 %r4076, %ntid.x;
add.s32 %r8421, %r8421, %r4076;
setp.lt.u32 %p17, %r8421, %r8;
@%p17 bra $L__BB2_8;
bra.uni $L__BB2_9;
$L__BB2_3:
mov.u32 %r8419, %tid.x;
setp.ge.u32 %p14, %r8419, %r8;
mov.u32 %r8423, 0;
@%p14 bra $L__BB2_9;
sub.s32 %r9, %r4055, %r4057;
mov.u32 %r8423, 0;
$L__BB2_5:
div.u32 %r4066, %r8419, %r4057;
mad.lo.s32 %r4067, %r9, %r4066, %r8419;
cvt.u64.u32 %rd88, %r4067;
add.s64 %rd89, %rd88, %rd4;
shl.b64 %rd90, %rd89, 2;
add.s64 %rd91, %rd3, %rd90;
ld.global.u32 %r4068, [%rd91];
abs.s32 %r4069, %r4068;
max.u32 %r8423, %r8423, %r4069;
mov.u32 %r4070, %ntid.x;
add.s32 %r8419, %r8419, %r4070;
setp.lt.u32 %p15, %r8419, %r8;
@%p15 bra $L__BB2_5;
$L__BB2_9:
mov.u32 %r4077, %tid.x;
shl.b32 %r4078, %r4077, 2;
mov.u32 %r4079, _ZZ44 j2k_htj2k_encode_codeblocks_multi_inputE9block_max;
add.s32 %r4080, %r4079, %r4078;
st.shared.u32 [%r4080], %r8423;
bar.sync 0;
mov.u32 %r4081, %ntid.x;
shr.u32 %r8424, %r4081, 1;
setp.eq.s32 %p18, %r8424, 0;
@%p18 bra $L__BB2_13;
$L__BB2_10:
setp.ge.u32 %p19, %r4077, %r8424;
@%p19 bra $L__BB2_12;
add.s32 %r4087, %r8424, %r4077;
shl.b32 %r4088, %r8424, 2;
add.s32 %r4089, %r4080, %r4088;
ld.shared.u32 %r4090, [%r4089];
ld.shared.u32 %r4091, [%r4080];
setp.gt.u32 %p20, %r4091, %r4090;
selp.b32 %r4092, %r4077, %r4087, %p20;
shl.b32 %r4093, %r4092, 2;
add.s32 %r4094, %r4079, %r4093;
ld.shared.u32 %r4095, [%r4094];
st.shared.u32 [%r4080], %r4095;
$L__BB2_12:
bar.sync 0;
shr.u32 %r8424, %r8424, 1;
setp.ne.s32 %p21, %r8424, 0;
@%p21 bra $L__BB2_10;
$L__BB2_13:
ld.shared.u32 %r8425, [_ZZ44 j2k_htj2k_encode_codeblocks_multi_inputE9block_max];
$L__BB2_15:
mov.u32 %r4097, %tid.x;
setp.ne.s32 %p22, %r4097, 0;
@%p22 bra $L__BB2_1905;
setp.eq.s32 %p2367, %r4058, 0;
mov.u32 %r8410, %ctaid.x;
ld.param.u64 %rd1414, [ j2k_htj2k_encode_codeblocks_multi_input_param_5];
mov.u32 %r4098, 0;
mov.u32 %r4099, 1;
cvt.u64.u32 %rd5, %r4060;
setp.eq.s32 %p24, %r4062, 1;
cvta.to.global.u64 %rd96, %rd1414;
mul.wide.u32 %rd97, %r8410, 32;
add.s64 %rd6, %rd96, %rd97;
st.global.u32 [%rd6], %r4099;
st.global.u32 [%rd6+4], %r4098;
st.global.u32 [%rd6+8], %r4098;
st.global.u32 [%rd6+12], %r4098;
st.global.u32 [%rd6+16], %r4098;
st.global.u32 [%rd6+20], %r4098;
st.global.u32 [%rd6+24], %r4098;
st.global.u32 [%rd6+28], %r4098;
add.s32 %r4101, %r4057, -1;
setp.ge.u32 %p25, %r4101, %r4055;
or.pred %p26, %p25, %p2367;
setp.gt.u32 %p27, %r4057, 1024;
or.pred %p1, %p27, %p26;
add.s64 %rd98, %rd1, %rd5;
add.s64 %rd7, %rd98, 20548;
mov.u32 %r4103, _ZZ44 j2k_htj2k_encode_codeblocks_multi_inputE13cleanup_e_val;
@%p24 bra $L__BB2_1254;
bra.uni $L__BB2_17;
$L__BB2_1254:
@%p1 bra $L__BB2_1903;
cvt.u16.u32 %rs822, %r4057;
mov.u16 %rs823, 4096;
div.u16 %rs824, %rs823, %rs822;
cvt.u32.u16 %r6801, %rs824;
setp.gt.u32 %p1625, %r4058, %r6801;
add.s32 %r10752, %r4059, -1;
setp.gt.u32 %p1626, %r10752, 29;
or.pred %p1627, %p1626, %p1625;
setp.lt.u32 %p1628, %r4061, 20549;
or.pred %p1629, %p1628, %p1627;
@%p1629 bra $L__BB2_1903;
bra.uni $L__BB2_1256;
$L__BB2_1903:
mov.u32 %r8399, 2;
st.global.u32 [%rd6], %r8399;
mov.u32 %r8400, 1;
st.global.u32 [%rd6+4], %r8400;
mov.u32 %r10750, 0;
mov.u32 %r10751, %r10750;
mov.u32 %r10752, %r10750;
mov.u32 %r10753, %r10750;
$L__BB2_1904:
st.global.u32 [%rd6+8], %r10751;
st.global.u32 [%rd6+12], %r10753;
st.global.u32 [%rd6+16], %r10752;
st.global.u32 [%rd6+20], %r10751;
mov.u32 %r8401, 0;
st.global.u32 [%rd6+24], %r8401;
st.global.u32 [%rd6+28], %r10750;
bra.uni $L__BB2_1905;
$L__BB2_17:
@%p1 bra $L__BB2_1253;
cvt.u16.u32 %rs507, %r4057;
mov.u16 %rs508, 4096;
div.u16 %rs509, %rs508, %rs507;
cvt.u32.u16 %r4105, %rs509;
setp.gt.u32 %p28, %r4058, %r4105;
add.s32 %r4106, %r4059, -1;
setp.gt.u32 %p29, %r4106, 29;
or.pred %p30, %p29, %p28;
setp.lt.u32 %p31, %r4061, 20549;
or.pred %p32, %p31, %p30;
@%p32 bra $L__BB2_1253;
bra.uni $L__BB2_19;
$L__BB2_1253:
mov.u32 %r6798, 2;
st.global.u32 [%rd6], %r6798;
mov.u32 %r6799, 1;
st.global.u32 [%rd6+4], %r6799;
mov.u32 %r6800, 0;
st.global.u32 [%rd6+8], %r6800;
st.global.u32 [%rd6+12], %r6800;
st.global.u32 [%rd6+16], %r6800;
st.global.u32 [%rd6+20], %r6800;
st.global.u32 [%rd6+24], %r6800;
st.global.u32 [%rd6+28], %r6800;
$L__BB2_1905:
ret;
$L__BB2_1256:
setp.eq.s32 %p1630, %r8425, 0;
@%p1630 bra $L__BB2_1902;
clz.b32 %r6802, %r8425;
mov.u32 %r6803, 32;
sub.s32 %r6804, %r6803, %r6802;
setp.gt.u32 %p1631, %r6804, %r4059;
@%p1631 bra $L__BB2_1901;
bra.uni $L__BB2_1258;
$L__BB2_1901:
mov.u32 %r8390, 1;
st.global.u32 [%rd6], %r8390;
mov.u32 %r8391, 2;
st.global.u32 [%rd6+4], %r8391;
mov.u32 %r10750, 0;
mov.u32 %r10751, %r10750;
mov.u32 %r10752, %r10750;
mov.u32 %r10753, %r10750;
bra.uni $L__BB2_1904;
$L__BB2_19:
add.s32 %r4107, %r4062, -1;
setp.gt.u32 %p33, %r4107, 163;
@%p33 bra $L__BB2_1252;
bra.uni $L__BB2_20;
$L__BB2_1252:
mov.u32 %r6795, 2;
st.global.u32 [%rd6], %r6795;
mov.u32 %r6796, 5;
st.global.u32 [%rd6+4], %r6796;
mov.u32 %r6797, 0;
st.global.u32 [%rd6+8], %r6797;
st.global.u32 [%rd6+12], %r6797;
st.global.u32 [%rd6+16], %r6797;
st.global.u32 [%rd6+20], %r6797;
st.global.u32 [%rd6+24], %r6797;
st.global.u32 [%rd6+28], %r6797;
bra.uni $L__BB2_1905;
$L__BB2_20:
setp.gt.u32 %p34, %r4062, 3;
@%p34 bra $L__BB2_1251;
bra.uni $L__BB2_21;
$L__BB2_1251:
mov.u32 %r6792, 2;
st.global.u32 [%rd6], %r6792;
mov.u32 %r6793, 5;
st.global.u32 [%rd6+4], %r6793;
mov.u32 %r6794, 0;
st.global.u32 [%rd6+8], %r6794;
st.global.u32 [%rd6+12], %r6794;
st.global.u32 [%rd6+16], %r6794;
st.global.u32 [%rd6+20], %r6794;
st.global.u32 [%rd6+24], %r6794;
st.global.u32 [%rd6+28], %r6794;
bra.uni $L__BB2_1905;
$L__BB2_1902:
mov.u32 %r10750, 0;
st.global.u32 [%rd6], %r10750;
st.global.u32 [%rd6+4], %r10750;
mov.u32 %r10751, %r10750;
mov.u32 %r10752, %r4059;
mov.u32 %r10753, %r10750;
bra.uni $L__BB2_1904;
$L__BB2_1258:
add.s32 %r8416, %r4057, 1;
shr.u32 %r8415, %r8416, 1;
mov.u32 %r9741, 0;
mov.u32 %r6806, 31;
sub.s32 %r2358, %r6806, %r4059;
mov.u16 %rs825, 255;
st.global.u8 [%rd7], %rs825;
add.s32 %r6807, %r8415, 2;
min.u32 %r2359, %r6807, 513;
mov.u32 %r6808, -3;
sub.s32 %r6809, %r6808, %r8415;
max.u32 %r6810, %r6809, -514;
mov.u32 %r6811, -2;
sub.s32 %r6812, %r6811, %r6810;
and.b32 %r9743, %r2359, 3;
setp.lt.u32 %p1632, %r6812, 3;
@%p1632 bra $L__BB2_1261;
sub.s32 %r9740, %r2359, %r9743;
mov.u32 %r9741, 0;
$L__BB2_1260:
add.s32 %r6815, %r4103, %r9741;
mov.u16 %rs826, 0;
st.shared.u8 [%r6815], %rs826;
mov.u32 %r6816, _ZZ44 j2k_htj2k_encode_codeblocks_multi_inputE14cleanup_cx_val;
add.s32 %r6817, %r6816, %r9741;
st.shared.u8 [%r6817], %rs826;
st.shared.u8 [%r6815+1], %rs826;
st.shared.u8 [%r6817+1], %rs826;
st.shared.u8 [%r6815+2], %rs826;
st.shared.u8 [%r6817+2], %rs826;
st.shared.u8 [%r6815+3], %rs826;
st.shared.u8 [%r6817+3], %rs826;
add.s32 %r9741, %r9741, 4;
add.s32 %r9740, %r9740, -4;
setp.ne.s32 %p1633, %r9740, 0;
@%p1633 bra $L__BB2_1260;
$L__BB2_1261:
setp.eq.s32 %p1634, %r9743, 0;
@%p1634 bra $L__BB2_1264;
mov.u32 %r6820, _ZZ44 j2k_htj2k_encode_codeblocks_multi_inputE14cleanup_cx_val;
$L__BB2_1263:
.pragma "nounroll";
add.s32 %r6819, %r4103, %r9741;
mov.u16 %rs827, 0;
st.shared.u8 [%r6819], %rs827;
add.s32 %r6821, %r6820, %r9741;
st.shared.u8 [%r6821], %rs827;
add.s32 %r9741, %r9741, 1;
add.s32 %r9743, %r9743, -1;
setp.ne.s32 %p1635, %r9743, 0;
@%p1635 bra $L__BB2_1263;
$L__BB2_1264:
mov.u32 %r10461, 0;
mov.u32 %r10274, 1;
mov.u16 %rs1253, 0;
mov.u32 %r10462, 8;
mov.u16 %rs1322, 15;
mov.u32 %r10275, 4;
mov.u32 %r10463, %r10461;
mov.u32 %r10464, %r10461;
mov.u32 %r10495, %r10461;
mov.u32 %r10276, %r10274;
mov.u32 %r10277, %r10461;
mov.u32 %r9826, %r10461;
mov.u32 %r9835, %r10462;
mov.u32 %r10040, %r10461;
mov.u32 %r10041, %r10461;
mov.u32 %r10042, %r10274;
mov.u32 %r10043, %r10461;
@%p10 bra $L__BB2_1632;
ld.param.u64 %rd1417, [ j2k_htj2k_encode_codeblocks_multi_input_param_4];
ld.param.u64 %rd1412, [ j2k_htj2k_encode_codeblocks_multi_input_param_2];
cvta.to.global.u64 %rd62, %rd1412;
cvta.to.global.u64 %rd63, %rd1417;
mov.u32 %r6854, 0;
mov.u32 %r9835, 8;
mov.u32 %r10042, 1;
mov.u32 %r10275, 4;
mov.u16 %rs1322, 15;
mov.u16 %rs1253, 0;
mov.u32 %r9744, %r6854;
mov.u32 %r10043, %r6854;
mov.u32 %r10041, %r6854;
mov.u32 %r10040, %r6854;
mov.u32 %r9826, %r6854;
mov.u32 %r10277, %r6854;
mov.u32 %r10276, %r10042;
mov.u32 %r10274, %r10042;
mov.u32 %r10495, %r6854;
mov.u32 %r10464, %r6854;
mov.u32 %r10463, %r6854;
mov.u32 %r10462, %r9835;
mov.u32 %r10461, %r6854;
mov.u32 %r9760, %r6854;
mov.u32 %r9761, %r6854;
bra.uni $L__BB2_1266;
$L__BB2_21:
setp.eq.s32 %p35, %r8425, 0;
@%p35 bra $L__BB2_1250;
clz.b32 %r4108, %r8425;
mov.u32 %r4109, 32;
sub.s32 %r4110, %r4109, %r4108;
setp.gt.u32 %p36, %r4110, %r4059;
@%p36 bra $L__BB2_1249;
bra.uni $L__BB2_23;
$L__BB2_1249:
mov.u32 %r6788, 1;
st.global.u32 [%rd6], %r6788;
mov.u32 %r6789, 2;
st.global.u32 [%rd6+4], %r6789;
mov.u32 %r6790, 0;
st.global.u32 [%rd6+8], %r6790;
st.global.u32 [%rd6+12], %r6790;
st.global.u32 [%rd6+16], %r6790;
st.global.u32 [%rd6+20], %r6790;
st.global.u32 [%rd6+24], %r6790;
st.global.u32 [%rd6+28], %r6790;
bra.uni $L__BB2_1905;
$L__BB2_1432:
setp.gt.u32 %p1816, %r9826, 191;
mov.u32 %r10029, 1;
mov.u32 %r9835, 0;
@%p1816 bra $L__BB2_1434;
st.global.u8 [%rd67], %rs1253;
add.s32 %r9826, %r9826, 1;
mov.u16 %rs1253, 0;
mov.u32 %r9835, 8;
mov.u32 %r10029, %r10043;
bra.uni $L__BB2_1434;
$L__BB2_1337:
setp.gt.u32 %p1714, %r9826, 191;
mov.u32 %r9843, 1;
mov.u32 %r9835, 0;
@%p1714 bra $L__BB2_1339;
and.b16 %rs865, %rs1253, 255;
st.global.u8 [%rd64], %rs1253;
add.s32 %r9826, %r9826, 1;
setp.eq.s16 %p1715, %rs865, 255;
selp.b32 %r9835, 7, 8, %p1715;
mov.u16 %rs1253, 0;
mov.u32 %r9843, %r10043;
bra.uni $L__BB2_1339;
$L__BB2_1471:
setp.gt.u32 %p1863, %r9826, 191;
mov.u32 %r10036, 1;
mov.u32 %r9835, 0;
@%p1863 bra $L__BB2_1473;
and.b16 %rs914, %rs1253, 255;
st.global.u8 [%rd67], %rs1253;
add.s32 %r9826, %r9826, 1;
setp.eq.s16 %p1864, %rs914, 255;
selp.b32 %r9835, 7, 8, %p1864;
mov.u16 %rs1253, 0;
mov.u32 %r10036, %r10043;
bra.uni $L__BB2_1473;
$L__BB2_1266:
cvt.u64.u32 %rd1066, %r9761;
add.s64 %rd1067, %rd1066, %rd4;
shl.b64 %rd1068, %rd1067, 2;
add.s64 %rd1069, %rd3, %rd1068;
ld.global.u32 %r2389, [%rd1069];
setp.eq.s32 %p1637, %r2389, 0;
mov.u32 %r9762, %r6854;
@%p1637 bra $L__BB2_1268;
and.b32 %r6856, %r2389, -2147483648;
abs.s32 %r6857, %r2389;
shl.b32 %r6858, %r6857, %r2358;
or.b32 %r9762, %r6858, %r6856;
$L__BB2_1268:
shl.b32 %r6862, %r9762, 1;
shr.u32 %r6863, %r6862, %r2358;
and.b32 %r2392, %r6863, -2;
setp.eq.s32 %p1638, %r2392, 0;
mov.u32 %r9766, 0;
mov.u32 %r9763, %r9766;
mov.u32 %r9764, %r9766;
mov.u32 %r9770, %r9766;
@%p1638 bra $L__BB2_1270;
add.s32 %r6865, %r2392, -1;
clz.b32 %r6866, %r6865;
mov.u32 %r6867, 32;
sub.s32 %r9763, %r6867, %r6866;
shr.u32 %r6868, %r9762, 31;
add.s32 %r6869, %r6868, %r2392;
add.s32 %r9764, %r6869, -2;
mov.u32 %r9770, 1;
$L__BB2_1270:
setp.lt.u32 %p1639, %r4058, 2;
@%p1639 bra $L__BB2_1273;
add.s32 %r6872, %r9761, %r4055;
cvt.u64.u32 %rd1070, %r6872;
add.s64 %rd1071, %rd1070, %rd4;
shl.b64 %rd1072, %rd1071, 2;
add.s64 %rd1073, %rd3, %rd1072;
ld.global.u32 %r2398, [%rd1073];
setp.eq.s32 %p1640, %r2398, 0;
@%p1640 bra $L__BB2_1273;
and.b32 %r6873, %r2398, -2147483648;
abs.s32 %r6874, %r2398;
shl.b32 %r6875, %r6874, %r2358;
or.b32 %r9766, %r6875, %r6873;
$L__BB2_1273:
shl.b32 %r6878, %r9766, 1;
shr.u32 %r6879, %r6878, %r2358;
and.b32 %r2401, %r6879, -2;
setp.eq.s32 %p1641, %r2401, 0;
mov.u32 %r9781, 0;
mov.u32 %r9767, %r9781;
mov.u32 %r9768, %r9781;
mov.u32 %r9786, %r9763;
@%p1641 bra $L__BB2_1275;
or.b32 %r9770, %r9770, 2;
add.s32 %r6880, %r2401, -1;
clz.b32 %r6881, %r6880;
mov.u32 %r6882, 32;
sub.s32 %r9767, %r6882, %r6881;
max.s32 %r9786, %r9763, %r9767;
shr.u32 %r6883, %r9766, 31;
add.s32 %r6884, %r6883, %r2401;
add.s32 %r9768, %r6884, -2;
$L__BB2_1275:
add.s32 %r9785, %r9761, 1;
add.s32 %r6889, %r9744, 1;
setp.ge.u32 %p1642, %r6889, %r4057;
mov.u32 %r9782, %r9781;
mov.u32 %r9783, %r9781;
mov.u32 %r9784, %r9781;
@%p1642 bra $L__BB2_1286;
cvt.u64.u32 %rd1074, %r9785;
add.s64 %rd1075, %rd1074, %rd4;
shl.b64 %rd1076, %rd1075, 2;
add.s64 %rd1077, %rd3, %rd1076;
ld.global.u32 %r2411, [%rd1077];
setp.eq.s32 %p1643, %r2411, 0;
mov.u32 %r9782, 0;
mov.u32 %r9771, %r9782;
@%p1643 bra $L__BB2_1278;
and.b32 %r6891, %r2411, -2147483648;
abs.s32 %r6892, %r2411;
shl.b32 %r6893, %r6892, %r2358;
or.b32 %r9771, %r6893, %r6891;
$L__BB2_1278:
shl.b32 %r6896, %r9771, 1;
shr.u32 %r6897, %r6896, %r2358;
and.b32 %r2414, %r6897, -2;
setp.eq.s32 %p1644, %r2414, 0;
mov.u32 %r9784, %r9782;
@%p1644 bra $L__BB2_1280;
or.b32 %r9770, %r9770, 4;
add.s32 %r6898, %r2414, -1;
clz.b32 %r6899, %r6898;
mov.u32 %r6900, 32;
sub.s32 %r9782, %r6900, %r6899;
max.s32 %r9786, %r9786, %r9782;
shr.u32 %r6901, %r9771, 31;
add.s32 %r6902, %r6901, %r2414;
add.s32 %r9784, %r6902, -2;
$L__BB2_1280:
mov.u32 %r9781, 0;
mov.u32 %r9776, %r9781;
@%p1639 bra $L__BB2_1283;
add.s32 %r6905, %r9785, %r4055;
cvt.u64.u32 %rd1078, %r6905;
add.s64 %rd1079, %rd1078, %rd4;
shl.b64 %rd1080, %rd1079, 2;
add.s64 %rd1081, %rd3, %rd1080;
ld.global.u32 %r2423, [%rd1081];
setp.eq.s32 %p1646, %r2423, 0;
@%p1646 bra $L__BB2_1283;
and.b32 %r6906, %r2423, -2147483648;
abs.s32 %r6907, %r2423;
shl.b32 %r6908, %r6907, %r2358;
or.b32 %r9776, %r6908, %r6906;
$L__BB2_1283:
shl.b32 %r6911, %r9776, 1;
shr.u32 %r6912, %r6911, %r2358;
and.b32 %r2426, %r6912, -2;
setp.eq.s32 %p1647, %r2426, 0;
mov.u32 %r9783, %r9781;
@%p1647 bra $L__BB2_1285;
or.b32 %r9770, %r9770, 8;
add.s32 %r6913, %r2426, -1;
clz.b32 %r6914, %r6913;
mov.u32 %r6915, 32;
sub.s32 %r9781, %r6915, %r6914;
max.s32 %r9786, %r9786, %r9781;
shr.u32 %r6916, %r9776, 31;
add.s32 %r6917, %r6916, %r2426;
add.s32 %r9783, %r6917, -2;
$L__BB2_1285:
add.s32 %r9785, %r9761, 2;
$L__BB2_1286:
mov.u32 %r9761, %r9785;
add.s32 %r6919, %r9786, -1;
setp.lt.s32 %p1648, %r9786, 2;
setp.gt.s32 %p1649, %r9786, 1;
selp.b32 %r2443, %r6919, 0, %p1649;
mov.u32 %r9788, 0;
@%p1648 bra $L__BB2_1288;
setp.eq.s32 %p1650, %r9763, %r9786;
selp.u32 %r6920, 1, 0, %p1650;
setp.eq.s32 %p1651, %r9767, %r9786;
selp.u32 %r6921, -1, 0, %p1651;
bfi.b32 %r6922, %r6921, %r6920, 1, 1;
setp.eq.s32 %p1652, %r9782, %r9786;
selp.u16 %rs832, 1, 0, %p1652;
mul.wide.u16 %r6923, %rs832, 4;
or.b32 %r6924, %r6922, %r6923;
setp.eq.s32 %p1653, %r9781, %r9786;
selp.u16 %rs833, 1, 0, %p1653;
mul.wide.u16 %r6925, %rs833, 8;
or.b32 %r9788, %r6924, %r6925;
$L__BB2_1288:
shr.u32 %r6926, %r9744, 1;
add.s32 %r2446, %r4103, %r6926;
ld.shared.u8 %rs834, [%r2446];
cvt.u32.u16 %r6928, %rs834;
and.b32 %r6929, %r6928, 255;
and.b32 %r6930, %r9767, 255;
setp.lt.u32 %p1654, %r6930, %r6929;
cvt.u16.u32 %rs835, %r9767;
selp.b16 %rs836, %rs834, %rs835, %p1654;
st.shared.u8 [%r2446], %rs836;
cvt.u16.u32 %rs274, %r9781;
st.shared.u8 [%r2446+1], %rs274;
and.b32 %r2447, %r9770, 2;
cvt.u16.u32 %rs837, %r2447;
shr.u16 %rs838, %rs837, 1;
mov.u32 %r6931, _ZZ44 j2k_htj2k_encode_codeblocks_multi_inputE14cleanup_cx_val;
add.s32 %r2448, %r6931, %r6926;
ld.shared.u8 %rs839, [%r2448];
or.b16 %rs840, %rs839, %rs838;
st.shared.u8 [%r2448], %rs840;
and.b32 %r2449, %r9770, 8;
shr.u32 %r2450, %r2449, 3;
st.shared.u8 [%r2448+1], %r2450;
shl.b32 %r6932, %r9770, 4;
shl.b32 %r6933, %r9760, 8;
or.b32 %r6934, %r6932, %r6933;
or.b32 %r6935, %r6934, %r9788;
mul.wide.u32 %rd1082, %r6935, 2;
add.s64 %rd1083, %rd62, %rd1082;
ld.global.u16 %rs275, [%rd1083];
shr.u16 %rs841, %rs275, 4;
and.b16 %rs276, %rs841, 7;
setp.eq.s16 %p1655, %rs276, 0;
mov.u32 %r9800, %r10277;
@%p1655 bra $L__BB2_1295;
cvt.u32.u16 %r9789, %rs276;
shr.u16 %rs842, %rs275, 8;
cvt.u32.u16 %r9790, %rs842;
$L__BB2_1290:
mov.u32 %r2453, %r9789;
setp.gt.u32 %p1656, %r10274, 2879;
mov.u32 %r9800, 1;
@%p1656 bra $L__BB2_1295;
mov.u32 %r6937, 8;
sub.s32 %r6938, %r6937, %r10276;
sub.s32 %r6939, %r6938, %r10275;
min.u32 %r6940, %r6939, %r2453;
setp.eq.s32 %p1657, %r6940, 32;
mov.u32 %r6941, -1;
shl.b32 %r6942, %r6941, %r6940;
not.b32 %r6943, %r6942;
selp.b32 %r6944, -1, %r6943, %p1657;
and.b32 %r6945, %r6944, %r9790;
shl.b32 %r6946, %r6945, %r10275;
cvt.u16.u32 %rs843, %r6946;
or.b16 %rs1322, %rs1322, %rs843;
add.s32 %r10275, %r6940, %r10275;
sub.s32 %r9789, %r2453, %r6940;
shr.u32 %r9790, %r9790, %r6940;
setp.gt.u32 %p1658, %r6939, %r2453;
@%p1658 bra $L__BB2_1294;
setp.ne.s32 %p1659, %r10276, 0;
mov.u32 %r10276, 0;
and.b16 %rs844, %rs1322, 255;
setp.ne.s16 %p1660, %rs844, 127;
and.pred %p1661, %p1659, %p1660;
@%p1661 bra $L__BB2_1294;
mov.u32 %r6949, 20548;
sub.s32 %r6950, %r6949, %r10274;
cvt.u64.u32 %rd1084, %r6950;
add.s64 %rd1085, %rd1084, %rd5;
add.s64 %rd1086, %rd1, %rd1085;
st.global.u8 [%rd1086], %rs1322;
add.s32 %r10274, %r10274, 1;
setp.gt.u16 %p1662, %rs844, 143;
selp.u32 %r10276, 1, 0, %p1662;
mov.u16 %rs1322, 0;
mov.u32 %r10275, 0;
$L__BB2_1294:
setp.ne.s32 %p1663, %r9789, 0;
mov.u32 %r9800, %r10277;
@%p1663 bra $L__BB2_1290;
$L__BB2_1295:
setp.ne.s32 %p1664, %r9760, 0;
@%p1664 bra $L__BB2_1343;
setp.eq.s32 %p1665, %r9770, 0;
add.s32 %r6951, %r9826, 17477;
cvt.u64.u32 %rd1087, %r6951;
add.s64 %rd1088, %rd1087, %rd5;
add.s64 %rd64, %rd1, %rd1088;
@%p1665 bra $L__BB2_1335;
shl.b16 %rs1253, %rs1253, 1;
add.s32 %r9835, %r9835, -1;
setp.ne.s32 %p1666, %r9835, 0;
mov.u32 %r9836, %r10043;
@%p1666 bra $L__BB2_1300;
setp.gt.u32 %p1667, %r9826, 191;
mov.u32 %r9836, 1;
mov.u32 %r9835, 0;
@%p1667 bra $L__BB2_1300;
st.global.u8 [%rd64], %rs1253;
add.s32 %r9826, %r9826, 1;
mov.u16 %rs1253, 0;
mov.u32 %r9835, 8;
mov.u32 %r9836, %r10043;
$L__BB2_1300:
setp.lt.u32 %p1668, %r10041, 3;
mov.u32 %r9804, 0;
@%p1668 bra $L__BB2_1303;
setp.lt.u32 %p1669, %r10041, 6;
mov.u32 %r9804, 1;
@%p1669 bra $L__BB2_1303;
setp.lt.u32 %p1670, %r10041, 9;
setp.eq.s32 %p1671, %r10041, 11;
selp.b32 %r6957, 4, 5, %p1671;
setp.lt.u32 %p1672, %r10041, 11;
selp.b32 %r6958, 3, %r6957, %p1672;
selp.b32 %r9804, 2, %r6958, %p1670;
$L__BB2_1303:
setp.eq.s32 %p1673, %r9804, 0;
@%p1673 bra $L__BB2_1331;
add.s32 %r2477, %r9804, -1;
and.b32 %r2478, %r9804, 3;
setp.eq.s32 %p1674, %r2478, 0;
mov.u32 %r9814, %r9804;
mov.u32 %r9815, %r9836;
@%p1674 bra $L__BB2_1316;
mov.u32 %r6960, 1;
shl.b32 %r6961, %r6960, %r2477;
and.b32 %r6962, %r6961, %r10040;
setp.ne.s32 %p1675, %r6962, 0;
selp.u32 %r6963, 1, 0, %p1675;
cvt.u32.u16 %r6964, %rs1253;
bfi.b32 %r6965, %r6964, %r6963, 1, 8;
cvt.u16.u32 %rs1253, %r6965;
add.s32 %r9835, %r9835, -1;
setp.ne.s32 %p1676, %r9835, 0;
mov.u32 %r9815, %r9836;
@%p1676 bra $L__BB2_1308;
setp.gt.u32 %p1677, %r9826, 191;
mov.u32 %r9835, 0;
mov.u32 %r9815, %r6960;
@%p1677 bra $L__BB2_1308;
add.s32 %r6969, %r9826, 17477;
cvt.u64.u32 %rd1089, %r6969;
add.s64 %rd1090, %rd1089, %rd5;
add.s64 %rd1091, %rd1, %rd1090;
st.global.u8 [%rd1091], %rs1253;
add.s32 %r9826, %r9826, 1;
mov.u16 %rs1253, 0;
mov.u32 %r9835, 8;
mov.u32 %r9815, %r9836;
$L__BB2_1308:
setp.eq.s32 %p1678, %r2478, 1;
mov.u32 %r9836, %r9815;
mov.u32 %r9814, %r2477;
@%p1678 bra $L__BB2_1316;
add.s32 %r9814, %r9804, -2;
mov.u32 %r6970, 1;
shl.b32 %r6971, %r6970, %r9814;
and.b32 %r6972, %r6971, %r10040;
setp.ne.s32 %p1679, %r6972, 0;
selp.u32 %r6973, 1, 0, %p1679;
cvt.u32.u16 %r6974, %rs1253;
bfi.b32 %r6975, %r6974, %r6973, 1, 8;
cvt.u16.u32 %rs1253, %r6975;
add.s32 %r9835, %r9835, -1;
setp.ne.s32 %p1680, %r9835, 0;
mov.u32 %r9810, %r9815;
@%p1680 bra $L__BB2_1312;
setp.gt.u32 %p1681, %r9826, 191;
mov.u32 %r9835, 0;
mov.u32 %r9810, %r6970;
@%p1681 bra $L__BB2_1312;
add.s32 %r6978, %r9826, 17477;
cvt.u64.u32 %rd1092, %r6978;
add.s64 %rd1093, %rd1092, %rd5;
add.s64 %rd1094, %rd1, %rd1093;
and.b16 %rs851, %rs1253, 255;
st.global.u8 [%rd1094], %rs1253;
add.s32 %r9826, %r9826, 1;
setp.eq.s16 %p1682, %rs851, 255;
selp.b32 %r9835, 7, 8, %p1682;
mov.u16 %rs1253, 0;
mov.u32 %r9810, %r9815;
$L__BB2_1312:
setp.eq.s32 %p1683, %r2478, 2;
mov.u32 %r9836, %r9810;
mov.u32 %r9815, %r9810;
@%p1683 bra $L__BB2_1316;
add.s32 %r9814, %r9804, -3;
mov.u32 %r6979, 1;
shl.b32 %r6980, %r6979, %r9814;
and.b32 %r6981, %r6980, %r10040;
setp.ne.s32 %p1684, %r6981, 0;
selp.u32 %r6982, 1, 0, %p1684;
cvt.u32.u16 %r6983, %rs1253;
bfi.b32 %r6984, %r6983, %r6982, 1, 8;
cvt.u16.u32 %rs1253, %r6984;
add.s32 %r9835, %r9835, -1;
setp.ne.s32 %p1685, %r9835, 0;
mov.u32 %r9836, %r9810;
mov.u32 %r9815, %r9810;
@%p1685 bra $L__BB2_1316;
setp.gt.u32 %p1686, %r9826, 191;
mov.u32 %r9835, 0;
mov.u32 %r9836, %r6979;
mov.u32 %r9815, %r6979;
@%p1686 bra $L__BB2_1316;
add.s32 %r6989, %r9826, 17477;
cvt.u64.u32 %rd1095, %r6989;
add.s64 %rd1096, %rd1095, %rd5;
add.s64 %rd1097, %rd1, %rd1096;
and.b16 %rs854, %rs1253, 255;
st.global.u8 [%rd1097], %rs1253;
add.s32 %r9826, %r9826, 1;
setp.eq.s16 %p1687, %rs854, 255;
selp.b32 %r9835, 7, 8, %p1687;
mov.u16 %rs1253, 0;
mov.u32 %r9836, %r9810;
mov.u32 %r9815, %r9810;
$L__BB2_1316:
setp.lt.u32 %p1688, %r2477, 3;
@%p1688 bra $L__BB2_1331;
mov.u32 %r9836, %r9815;
$L__BB2_1318:
add.s32 %r6990, %r9814, -1;
mov.u32 %r6991, 1;
shl.b32 %r6992, %r6991, %r6990;
and.b32 %r6993, %r6992, %r10040;
setp.ne.s32 %p1689, %r6993, 0;
selp.u32 %r6994, 1, 0, %p1689;
cvt.u32.u16 %r6995, %rs1253;
bfi.b32 %r9824, %r6995, %r6994, 1, 8;
add.s32 %r9823, %r9835, -1;
setp.ne.s32 %p1690, %r9823, 0;
mov.u32 %r9825, %r9836;
@%p1690 bra $L__BB2_1321;
setp.gt.u32 %p1691, %r9826, 191;
mov.u32 %r9823, 0;
mov.u32 %r9825, %r6991;
@%p1691 bra $L__BB2_1321;
cvt.u16.u32 %rs855, %r9824;
and.b16 %rs856, %rs855, 255;
add.s32 %r6999, %r9826, 17477;
cvt.u64.u32 %rd1098, %r6999;
add.s64 %rd1099, %rd1098, %rd5;
add.s64 %rd1100, %rd1, %rd1099;
st.global.u8 [%rd1100], %rs855;
add.s32 %r9826, %r9826, 1;
setp.eq.s16 %p1692, %rs856, 255;
selp.b32 %r9823, 7, 8, %p1692;
mov.u32 %r9824, 0;
mov.u32 %r9825, %r9836;
$L__BB2_1321:
add.s32 %r7000, %r9814, -2;
shl.b32 %r7002, %r6991, %r7000;
and.b32 %r7003, %r7002, %r10040;
setp.ne.s32 %p1693, %r7003, 0;
and.b32 %r7004, %r9824, 127;
selp.u32 %r7005, 1, 0, %p1693;
bfi.b32 %r9828, %r7004, %r7005, 1, 7;
add.s32 %r9827, %r9823, -1;
setp.ne.s32 %p1694, %r9827, 0;
mov.u32 %r9829, %r9825;
@%p1694 bra $L__BB2_1324;
setp.gt.u32 %p1695, %r9826, 191;
mov.u32 %r9829, 1;
mov.u32 %r9827, 0;
@%p1695 bra $L__BB2_1324;
cvt.u16.u32 %rs857, %r9828;
and.b16 %rs858, %rs857, 255;
add.s32 %r7009, %r9826, 17477;
cvt.u64.u32 %rd1101, %r7009;
add.s64 %rd1102, %rd1101, %rd5;
add.s64 %rd1103, %rd1, %rd1102;
st.global.u8 [%rd1103], %rs857;
add.s32 %r9826, %r9826, 1;
setp.eq.s16 %p1696, %rs858, 255;
selp.b32 %r9827, 7, 8, %p1696;
mov.u32 %r9828, 0;
mov.u32 %r9829, %r9825;
$L__BB2_1324:
add.s32 %r7010, %r9814, -3;
mov.u32 %r7011, 1;
shl.b32 %r7012, %r7011, %r7010;
and.b32 %r7013, %r7012, %r10040;
setp.ne.s32 %p1697, %r7013, 0;
and.b32 %r7014, %r9828, 127;
selp.u32 %r7015, 1, 0, %p1697;
bfi.b32 %r9832, %r7014, %r7015, 1, 7;
add.s32 %r9831, %r9827, -1;
setp.ne.s32 %p1698, %r9831, 0;
mov.u32 %r9833, %r9829;
@%p1698 bra $L__BB2_1327;
setp.gt.u32 %p1699, %r9826, 191;
mov.u32 %r9831, 0;
mov.u32 %r9833, %r7011;
@%p1699 bra $L__BB2_1327;
cvt.u16.u32 %rs859, %r9832;
and.b16 %rs860, %rs859, 255;
add.s32 %r7019, %r9826, 17477;
cvt.u64.u32 %rd1104, %r7019;
add.s64 %rd1105, %rd1104, %rd5;
add.s64 %rd1106, %rd1, %rd1105;
st.global.u8 [%rd1106], %rs859;
add.s32 %r9826, %r9826, 1;
setp.eq.s16 %p1700, %rs860, 255;
selp.b32 %r9831, 7, 8, %p1700;
mov.u32 %r9832, 0;
mov.u32 %r9833, %r9829;
$L__BB2_1327:
add.s32 %r9814, %r9814, -4;
shl.b32 %r7021, %r7011, %r9814;
and.b32 %r7022, %r7021, %r10040;
setp.ne.s32 %p1701, %r7022, 0;
and.b32 %r7023, %r9832, 127;
selp.u32 %r7024, 1, 0, %p1701;
bfi.b32 %r7025, %r7023, %r7024, 1, 15;
cvt.u16.u32 %rs1253, %r7025;
add.s32 %r9835, %r9831, -1;
setp.ne.s32 %p1702, %r9835, 0;
mov.u32 %r9836, %r9833;
@%p1702 bra $L__BB2_1330;
setp.gt.u32 %p1703, %r9826, 191;
mov.u32 %r9836, 1;
mov.u32 %r9835, 0;
@%p1703 bra $L__BB2_1330;
add.s32 %r7028, %r9826, 17477;
cvt.u64.u32 %rd1107, %r7028;
add.s64 %rd1108, %rd1107, %rd5;
add.s64 %rd1109, %rd1, %rd1108;
and.b16 %rs862, %rs1253, 255;
st.global.u8 [%rd1109], %rs1253;
add.s32 %r9826, %r9826, 1;
setp.eq.s16 %p1704, %rs862, 255;
selp.b32 %r9835, 7, 8, %p1704;
mov.u16 %rs1253, 0;
mov.u32 %r9836, %r9833;
$L__BB2_1330:
setp.ne.s32 %p1705, %r9814, 0;
@%p1705 bra $L__BB2_1318;
$L__BB2_1331:
add.s32 %r7030, %r10041, -1;
setp.eq.s32 %p1706, %r10041, 0;
mov.u32 %r10040, 0;
selp.b32 %r10041, 0, %r7030, %p1706;
setp.lt.u32 %p1707, %r10041, 3;
mov.u32 %r9840, %r10040;
@%p1707 bra $L__BB2_1334;
setp.lt.u32 %p1708, %r10041, 6;
mov.u32 %r9840, 1;
@%p1708 bra $L__BB2_1334;
setp.lt.u32 %p1709, %r10041, 9;
setp.eq.s32 %p1710, %r10041, 11;
selp.b32 %r7032, 4, 5, %p1710;
setp.lt.u32 %p1711, %r10041, 11;
selp.b32 %r7033, 3, %r7032, %p1711;
selp.b32 %r9840, 2, %r7033, %p1709;
$L__BB2_1334:
mov.u32 %r7035, 1;
shl.b32 %r10042, %r7035, %r9840;
mov.u32 %r10043, %r9836;
bra.uni $L__BB2_1343;
$L__BB2_1335:
add.s32 %r10040, %r10040, 1;
setp.lt.u32 %p1712, %r10040, %r10042;
@%p1712 bra $L__BB2_1343;
shl.b16 %rs863, %rs1253, 1;
or.b16 %rs1253, %rs863, 1;
add.s32 %r9835, %r9835, -1;
setp.ne.s32 %p1713, %r9835, 0;
mov.u32 %r9843, %r10043;
@%p1713 bra $L__BB2_1339;
bra.uni $L__BB2_1337;
$L__BB2_1339:
add.s32 %r7039, %r10041, 1;
min.u32 %r10041, %r7039, 12;
setp.lt.u32 %p1716, %r10041, 3;
mov.u32 %r10040, 0;
mov.u32 %r9844, %r10040;
@%p1716 bra $L__BB2_1342;
setp.lt.u32 %p1717, %r10041, 6;
mov.u32 %r9844, 1;
@%p1717 bra $L__BB2_1342;
setp.lt.u32 %p1718, %r10041, 9;
setp.eq.s32 %p1719, %r10041, 11;
selp.b32 %r7041, 4, 5, %p1719;
setp.lt.u32 %p1720, %r10041, 11;
selp.b32 %r7042, 3, %r7041, %p1720;
selp.b32 %r9844, 2, %r7042, %p1718;
$L__BB2_1342:
mov.u32 %r7044, 1;
shl.b32 %r10042, %r7044, %r9844;
mov.u32 %r10043, %r9843;
$L__BB2_1343:
max.s32 %r2561, %r9786, 1;
and.b16 %rs866, %rs275, 15;
cvt.u32.u16 %r2562, %rs866;
and.b32 %r2563, %r9770, 1;
setp.eq.s32 %p1721, %r2563, 0;
mov.u32 %r9865, %r10495;
@%p1721 bra $L__BB2_1350;
and.b32 %r7045, %r2562, 1;
sub.s32 %r9851, %r2561, %r7045;
setp.eq.s32 %p1722, %r9851, 0;
mov.u32 %r9865, %r10495;
@%p1722 bra $L__BB2_1350;
mov.u32 %r7046, -1;
shl.b32 %r7047, %r7046, %r9851;
not.b32 %r7048, %r7047;
and.b32 %r9852, %r9764, %r7048;
$L__BB2_1346:
setp.gt.u32 %p1723, %r10461, 17476;
mov.u32 %r9865, 1;
@%p1723 bra $L__BB2_1350;
sub.s32 %r7050, %r10462, %r10463;
min.u32 %r7051, %r7050, %r9851;
setp.eq.s32 %p1724, %r7051, 32;
mov.u32 %r7052, -1;
shl.b32 %r7053, %r7052, %r7051;
not.b32 %r7054, %r7053;
selp.b32 %r7055, -1, %r7054, %p1724;
and.b32 %r7056, %r7055, %r9852;
shl.b32 %r7057, %r7056, %r10463;
or.b32 %r10464, %r7057, %r10464;
add.s32 %r10463, %r7051, %r10463;
shr.u32 %r9852, %r9852, %r7051;
sub.s32 %r9851, %r9851, %r7051;
setp.lt.u32 %p1725, %r10463, %r10462;
@%p1725 bra $L__BB2_1349;
cvt.u64.u32 %rd1110, %r10461;
add.s64 %rd1111, %rd1110, %rd5;
add.s64 %rd1112, %rd1, %rd1111;
st.global.u8 [%rd1112], %r10464;
add.s32 %r10461, %r10461, 1;
setp.eq.s32 %p1726, %r10464, 255;
selp.b32 %r10462, 7, 8, %p1726;
mov.u32 %r10463, 0;
mov.u32 %r10464, %r10463;
$L__BB2_1349:
setp.ne.s32 %p1727, %r9851, 0;
mov.u32 %r9865, %r10495;
@%p1727 bra $L__BB2_1346;
$L__BB2_1350:
setp.eq.s32 %p1728, %r2447, 0;
mov.u32 %r9880, %r9865;
@%p1728 bra $L__BB2_1357;
shr.u32 %r7060, %r2562, 1;
and.b32 %r7061, %r7060, 1;
sub.s32 %r9866, %r2561, %r7061;
setp.eq.s32 %p1729, %r9866, 0;
mov.u32 %r9880, %r9865;
@%p1729 bra $L__BB2_1357;
mov.u32 %r7062, -1;
shl.b32 %r7063, %r7062, %r9866;
not.b32 %r7064, %r7063;
and.b32 %r9867, %r9768, %r7064;
$L__BB2_1353:
setp.gt.u32 %p1730, %r10461, 17476;
mov.u32 %r9880, 1;
@%p1730 bra $L__BB2_1357;
sub.s32 %r7066, %r10462, %r10463;
min.u32 %r7067, %r7066, %r9866;
setp.eq.s32 %p1731, %r7067, 32;
mov.u32 %r7068, -1;
shl.b32 %r7069, %r7068, %r7067;
not.b32 %r7070, %r7069;
selp.b32 %r7071, -1, %r7070, %p1731;
and.b32 %r7072, %r7071, %r9867;
shl.b32 %r7073, %r7072, %r10463;
or.b32 %r10464, %r7073, %r10464;
add.s32 %r10463, %r7067, %r10463;
shr.u32 %r9867, %r9867, %r7067;
sub.s32 %r9866, %r9866, %r7067;
setp.lt.u32 %p1732, %r10463, %r10462;
@%p1732 bra $L__BB2_1356;
cvt.u64.u32 %rd1113, %r10461;
add.s64 %rd1114, %rd1113, %rd5;
add.s64 %rd1115, %rd1, %rd1114;
st.global.u8 [%rd1115], %r10464;
add.s32 %r10461, %r10461, 1;
setp.eq.s32 %p1733, %r10464, 255;
selp.b32 %r10462, 7, 8, %p1733;
mov.u32 %r10463, 0;
mov.u32 %r10464, %r10463;
$L__BB2_1356:
setp.ne.s32 %p1734, %r9866, 0;
mov.u32 %r9880, %r9865;
@%p1734 bra $L__BB2_1353;
$L__BB2_1357:
and.b32 %r7076, %r9770, 4;
setp.eq.s32 %p1735, %r7076, 0;
mov.u32 %r9895, %r9880;
@%p1735 bra $L__BB2_1364;
shr.u32 %r7077, %r2562, 2;
and.b32 %r7078, %r7077, 1;
sub.s32 %r9881, %r2561, %r7078;
setp.eq.s32 %p1736, %r9881, 0;
mov.u32 %r9895, %r9880;
@%p1736 bra $L__BB2_1364;
mov.u32 %r7079, -1;
shl.b32 %r7080, %r7079, %r9881;
not.b32 %r7081, %r7080;
and.b32 %r9882, %r9784, %r7081;
$L__BB2_1360:
setp.gt.u32 %p1737, %r10461, 17476;
mov.u32 %r9895, 1;
@%p1737 bra $L__BB2_1364;
sub.s32 %r7083, %r10462, %r10463;
min.u32 %r7084, %r7083, %r9881;
setp.eq.s32 %p1738, %r7084, 32;
mov.u32 %r7085, -1;
shl.b32 %r7086, %r7085, %r7084;
not.b32 %r7087, %r7086;
selp.b32 %r7088, -1, %r7087, %p1738;
and.b32 %r7089, %r7088, %r9882;
shl.b32 %r7090, %r7089, %r10463;
or.b32 %r10464, %r7090, %r10464;
add.s32 %r10463, %r7084, %r10463;
shr.u32 %r9882, %r9882, %r7084;
sub.s32 %r9881, %r9881, %r7084;
setp.lt.u32 %p1739, %r10463, %r10462;
@%p1739 bra $L__BB2_1363;
cvt.u64.u32 %rd1116, %r10461;
add.s64 %rd1117, %rd1116, %rd5;
add.s64 %rd1118, %rd1, %rd1117;
st.global.u8 [%rd1118], %r10464;
add.s32 %r10461, %r10461, 1;
setp.eq.s32 %p1740, %r10464, 255;
selp.b32 %r10462, 7, 8, %p1740;
mov.u32 %r10463, 0;
mov.u32 %r10464, %r10463;
$L__BB2_1363:
setp.ne.s32 %p1741, %r9881, 0;
mov.u32 %r9895, %r9880;
@%p1741 bra $L__BB2_1360;
$L__BB2_1364:
setp.eq.s32 %p1742, %r2449, 0;
mov.u32 %r10495, %r9895;
@%p1742 bra $L__BB2_1371;
shr.u32 %r7093, %r2562, 3;
sub.s32 %r9896, %r2561, %r7093;
setp.eq.s32 %p1743, %r9896, 0;
mov.u32 %r10495, %r9895;
@%p1743 bra $L__BB2_1371;
mov.u32 %r7094, -1;
shl.b32 %r7095, %r7094, %r9896;
not.b32 %r7096, %r7095;
and.b32 %r9897, %r9783, %r7096;
$L__BB2_1367:
setp.gt.u32 %p1744, %r10461, 17476;
mov.u32 %r10495, 1;
@%p1744 bra $L__BB2_1371;
sub.s32 %r7098, %r10462, %r10463;
min.u32 %r7099, %r7098, %r9896;
setp.eq.s32 %p1745, %r7099, 32;
mov.u32 %r7100, -1;
shl.b32 %r7101, %r7100, %r7099;
not.b32 %r7102, %r7101;
selp.b32 %r7103, -1, %r7102, %p1745;
and.b32 %r7104, %r7103, %r9897;
shl.b32 %r7105, %r7104, %r10463;
or.b32 %r10464, %r7105, %r10464;
add.s32 %r10463, %r7099, %r10463;
shr.u32 %r9897, %r9897, %r7099;
sub.s32 %r9896, %r9896, %r7099;
setp.lt.u32 %p1746, %r10463, %r10462;
@%p1746 bra $L__BB2_1370;
cvt.u64.u32 %rd1119, %r10461;
add.s64 %rd1120, %rd1119, %rd5;
add.s64 %rd1121, %rd1, %rd1120;
st.global.u8 [%rd1121], %r10464;
add.s32 %r10461, %r10461, 1;
setp.eq.s32 %p1747, %r10464, 255;
selp.b32 %r10462, 7, 8, %p1747;
mov.u32 %r10463, 0;
mov.u32 %r10464, %r10463;
$L__BB2_1370:
setp.ne.s32 %p1748, %r9896, 0;
mov.u32 %r10495, %r9895;
@%p1748 bra $L__BB2_1367;
$L__BB2_1371:
add.s32 %r7108, %r9744, 2;
setp.lt.u32 %p1749, %r7108, %r4057;
mul.lo.s32 %r2656, %r2443, 6;
cvt.u64.u32 %rd1122, %r2656;
add.s64 %rd65, %rd63, %rd1122;
add.s32 %r7109, %r2656, 2;
cvt.u64.u32 %rd1123, %r7109;
add.s64 %rd66, %rd63, %rd1123;
@%p1749 bra $L__BB2_1400;
bra.uni $L__BB2_1372;
$L__BB2_1400:
cvt.u64.u32 %rd1137, %r9761;
add.s64 %rd1138, %rd1137, %rd4;
shl.b64 %rd1139, %rd1138, 2;
add.s64 %rd1140, %rd3, %rd1139;
ld.global.u32 %r2729, [%rd1140];
setp.eq.s32 %p1786, %r2729, 0;
mov.u32 %r9956, 0;
mov.u32 %r9955, %r9956;
@%p1786 bra $L__BB2_1402;
and.b32 %r7180, %r2729, -2147483648;
abs.s32 %r7181, %r2729;
shl.b32 %r7182, %r7181, %r2358;
or.b32 %r9955, %r7182, %r7180;
$L__BB2_1402:
shl.b32 %r7186, %r9955, 1;
shr.u32 %r7187, %r7186, %r2358;
and.b32 %r2732, %r7187, -2;
setp.eq.s32 %p1787, %r2732, 0;
mov.u32 %r9957, %r9956;
mov.u32 %r9963, %r9956;
@%p1787 bra $L__BB2_1404;
add.s32 %r7189, %r2732, -1;
clz.b32 %r7190, %r7189;
mov.u32 %r7191, 32;
sub.s32 %r9956, %r7191, %r7190;
shr.u32 %r7192, %r9955, 31;
add.s32 %r7193, %r7192, %r2732;
add.s32 %r9957, %r7193, -2;
mov.u32 %r9963, 1;
$L__BB2_1404:
mov.u32 %r9960, 0;
mov.u32 %r9959, %r9960;
@%p1639 bra $L__BB2_1407;
add.s32 %r7196, %r9761, %r4055;
cvt.u64.u32 %rd1141, %r7196;
add.s64 %rd1142, %rd1141, %rd4;
shl.b64 %rd1143, %rd1142, 2;
add.s64 %rd1144, %rd3, %rd1143;
ld.global.u32 %r2738, [%rd1144];
setp.eq.s32 %p1789, %r2738, 0;
@%p1789 bra $L__BB2_1407;
and.b32 %r7197, %r2738, -2147483648;
abs.s32 %r7198, %r2738;
shl.b32 %r7199, %r7198, %r2358;
or.b32 %r9959, %r7199, %r7197;
$L__BB2_1407:
shl.b32 %r7202, %r9959, 1;
shr.u32 %r7203, %r7202, %r2358;
and.b32 %r2741, %r7203, -2;
setp.eq.s32 %p1790, %r2741, 0;
mov.u32 %r9961, %r9960;
mov.u32 %r9979, %r9956;
@%p1790 bra $L__BB2_1409;
or.b32 %r9963, %r9963, 2;
add.s32 %r7204, %r2741, -1;
clz.b32 %r7205, %r7204;
mov.u32 %r7206, 32;
sub.s32 %r9960, %r7206, %r7205;
max.s32 %r9979, %r9956, %r9960;
shr.u32 %r7207, %r9959, 31;
add.s32 %r7208, %r7207, %r2741;
add.s32 %r9961, %r7208, -2;
$L__BB2_1409:
add.s32 %r9978, %r9761, 1;
add.s32 %r7213, %r9744, 3;
setp.ge.u32 %p1791, %r7213, %r4057;
mov.u32 %r9981, 0;
mov.u32 %r9974, %r9981;
mov.u32 %r9975, %r9981;
mov.u32 %r9976, %r9981;
mov.u32 %r9977, %r9981;
@%p1791 bra $L__BB2_1420;
cvt.u64.u32 %rd1145, %r9978;
add.s64 %rd1146, %rd1145, %rd4;
shl.b64 %rd1147, %rd1146, 2;
add.s64 %rd1148, %rd3, %rd1147;
ld.global.u32 %r2751, [%rd1148];
setp.eq.s32 %p1792, %r2751, 0;
mov.u32 %r9975, 0;
mov.u32 %r9964, %r9975;
@%p1792 bra $L__BB2_1412;
and.b32 %r7215, %r2751, -2147483648;
abs.s32 %r7216, %r2751;
shl.b32 %r7217, %r7216, %r2358;
or.b32 %r9964, %r7217, %r7215;
$L__BB2_1412:
shl.b32 %r7220, %r9964, 1;
shr.u32 %r7221, %r7220, %r2358;
and.b32 %r2754, %r7221, -2;
setp.eq.s32 %p1793, %r2754, 0;
mov.u32 %r9977, %r9975;
@%p1793 bra $L__BB2_1414;
or.b32 %r9963, %r9963, 4;
add.s32 %r7222, %r2754, -1;
clz.b32 %r7223, %r7222;
mov.u32 %r7224, 32;
sub.s32 %r9975, %r7224, %r7223;
max.s32 %r9979, %r9979, %r9975;
shr.u32 %r7225, %r9964, 31;
add.s32 %r7226, %r7225, %r2754;
add.s32 %r9977, %r7226, -2;
$L__BB2_1414:
mov.u32 %r9974, 0;
mov.u32 %r9969, %r9974;
@%p1639 bra $L__BB2_1417;
add.s32 %r7229, %r9978, %r4055;
cvt.u64.u32 %rd1149, %r7229;
add.s64 %rd1150, %rd1149, %rd4;
shl.b64 %rd1151, %rd1150, 2;
add.s64 %rd1152, %rd3, %rd1151;
ld.global.u32 %r2763, [%rd1152];
setp.eq.s32 %p1795, %r2763, 0;
@%p1795 bra $L__BB2_1417;
and.b32 %r7230, %r2763, -2147483648;
abs.s32 %r7231, %r2763;
shl.b32 %r7232, %r7231, %r2358;
or.b32 %r9969, %r7232, %r7230;
$L__BB2_1417:
shl.b32 %r7235, %r9969, 1;
shr.u32 %r7236, %r7235, %r2358;
and.b32 %r2766, %r7236, -2;
setp.eq.s32 %p1796, %r2766, 0;
mov.u32 %r9976, %r9974;
@%p1796 bra $L__BB2_1419;
or.b32 %r9963, %r9963, 8;
add.s32 %r7237, %r2766, -1;
clz.b32 %r7238, %r7237;
mov.u32 %r7239, 32;
sub.s32 %r9974, %r7239, %r7238;
max.s32 %r9979, %r9979, %r9974;
shr.u32 %r7240, %r9969, 31;
add.s32 %r7241, %r7240, %r2766;
add.s32 %r9976, %r7241, -2;
$L__BB2_1419:
add.s32 %r9978, %r9761, 2;
$L__BB2_1420:
mov.u32 %r9761, %r9978;
shr.u32 %r7243, %r9770, 1;
or.b32 %r2783, %r7243, %r2563;
add.s32 %r7244, %r9979, -1;
setp.lt.s32 %p1797, %r9979, 2;
setp.gt.s32 %p1798, %r9979, 1;
selp.b32 %r2784, %r7244, 0, %p1798;
@%p1797 bra $L__BB2_1422;
setp.eq.s32 %p1799, %r9956, %r9979;
selp.u32 %r7245, 1, 0, %p1799;
setp.eq.s32 %p1800, %r9960, %r9979;
selp.u32 %r7246, -1, 0, %p1800;
bfi.b32 %r7247, %r7246, %r7245, 1, 1;
setp.eq.s32 %p1801, %r9975, %r9979;
selp.u16 %rs886, 1, 0, %p1801;
mul.wide.u16 %r7248, %rs886, 4;
or.b32 %r7249, %r7247, %r7248;
setp.eq.s32 %p1802, %r9974, %r9979;
selp.u16 %rs887, 1, 0, %p1802;
mul.wide.u16 %r7250, %rs887, 8;
or.b32 %r9981, %r7249, %r7250;
$L__BB2_1422:
and.b32 %r7251, %r9960, 255;
and.b32 %r7252, %r9781, 255;
setp.lt.u32 %p1803, %r7251, %r7252;
cvt.u16.u32 %rs888, %r9960;
selp.b16 %rs889, %rs274, %rs888, %p1803;
st.shared.u8 [%r2446+1], %rs889;
st.shared.u8 [%r2446+2], %r9974;
and.b32 %r2787, %r9963, 2;
shr.u32 %r7253, %r2787, 1;
or.b32 %r7254, %r2450, %r7253;
st.shared.u8 [%r2448+1], %r7254;
and.b32 %r2788, %r9963, 8;
shr.u32 %r7255, %r2788, 3;
st.shared.u8 [%r2448+2], %r7255;
shl.b32 %r7256, %r9963, 4;
shl.b32 %r7257, %r2783, 8;
or.b32 %r7258, %r7256, %r7257;
or.b32 %r7259, %r7258, %r9981;
mul.wide.u32 %rd1154, %r7259, 2;
add.s64 %rd1155, %rd62, %rd1154;
ld.global.u16 %rs319, [%rd1155];
shr.u16 %rs890, %rs319, 4;
and.b16 %rs320, %rs890, 7;
setp.eq.s16 %p1804, %rs320, 0;
mov.u32 %r9993, %r9800;
@%p1804 bra $L__BB2_1429;
cvt.u32.u16 %r9982, %rs320;
shr.u16 %rs891, %rs319, 8;
cvt.u32.u16 %r9983, %rs891;
$L__BB2_1424:
mov.u32 %r2791, %r9982;
setp.gt.u32 %p1805, %r10274, 2879;
mov.u32 %r9993, 1;
@%p1805 bra $L__BB2_1429;
mov.u32 %r7261, 8;
sub.s32 %r7262, %r7261, %r10276;
sub.s32 %r7263, %r7262, %r10275;
min.u32 %r7264, %r7263, %r2791;
setp.eq.s32 %p1806, %r7264, 32;
mov.u32 %r7265, -1;
shl.b32 %r7266, %r7265, %r7264;
not.b32 %r7267, %r7266;
selp.b32 %r7268, -1, %r7267, %p1806;
and.b32 %r7269, %r7268, %r9983;
shl.b32 %r7270, %r7269, %r10275;
cvt.u16.u32 %rs892, %r7270;
or.b16 %rs1322, %rs1322, %rs892;
add.s32 %r10275, %r7264, %r10275;
sub.s32 %r9982, %r2791, %r7264;
shr.u32 %r9983, %r9983, %r7264;
setp.gt.u32 %p1807, %r7263, %r2791;
@%p1807 bra $L__BB2_1428;
setp.ne.s32 %p1808, %r10276, 0;
mov.u32 %r10276, 0;
and.b16 %rs893, %rs1322, 255;
setp.ne.s16 %p1809, %rs893, 127;
and.pred %p1810, %p1808, %p1809;
@%p1810 bra $L__BB2_1428;
mov.u32 %r7273, 20548;
sub.s32 %r7274, %r7273, %r10274;
cvt.u64.u32 %rd1156, %r7274;
add.s64 %rd1157, %rd1156, %rd5;
add.s64 %rd1158, %rd1, %rd1157;
st.global.u8 [%rd1158], %rs1322;
add.s32 %r10274, %r10274, 1;
setp.gt.u16 %p1811, %rs893, 143;
selp.u32 %r10276, 1, 0, %p1811;
mov.u16 %rs1322, 0;
mov.u32 %r10275, 0;
$L__BB2_1428:
setp.ne.s32 %p1812, %r9982, 0;
mov.u32 %r9993, %r9800;
@%p1812 bra $L__BB2_1424;
$L__BB2_1429:
setp.ne.s32 %p1813, %r2783, 0;
@%p1813 bra $L__BB2_1477;
setp.eq.s32 %p1814, %r9963, 0;
add.s32 %r7275, %r9826, 17477;
cvt.u64.u32 %rd1159, %r7275;
add.s64 %rd1160, %rd1159, %rd5;
add.s64 %rd67, %rd1, %rd1160;
@%p1814 bra $L__BB2_1469;
shl.b16 %rs1253, %rs1253, 1;
add.s32 %r9835, %r9835, -1;
setp.ne.s32 %p1815, %r9835, 0;
mov.u32 %r10029, %r10043;
@%p1815 bra $L__BB2_1434;
bra.uni $L__BB2_1432;
$L__BB2_1434:
setp.lt.u32 %p1817, %r10041, 3;
mov.u32 %r9997, 0;
@%p1817 bra $L__BB2_1437;
setp.lt.u32 %p1818, %r10041, 6;
mov.u32 %r9997, 1;
@%p1818 bra $L__BB2_1437;
setp.lt.u32 %p1819, %r10041, 9;
setp.eq.s32 %p1820, %r10041, 11;
selp.b32 %r7281, 4, 5, %p1820;
setp.lt.u32 %p1821, %r10041, 11;
selp.b32 %r7282, 3, %r7281, %p1821;
selp.b32 %r9997, 2, %r7282, %p1819;
$L__BB2_1437:
setp.eq.s32 %p1822, %r9997, 0;
@%p1822 bra $L__BB2_1465;
add.s32 %r2815, %r9997, -1;
and.b32 %r2816, %r9997, 3;
setp.eq.s32 %p1823, %r2816, 0;
mov.u32 %r10007, %r9997;
mov.u32 %r10008, %r10029;
@%p1823 bra $L__BB2_1450;
mov.u32 %r7284, 1;
shl.b32 %r7285, %r7284, %r2815;
and.b32 %r7286, %r7285, %r10040;
setp.ne.s32 %p1824, %r7286, 0;
selp.u32 %r7287, 1, 0, %p1824;
cvt.u32.u16 %r7288, %rs1253;
bfi.b32 %r7289, %r7288, %r7287, 1, 8;
cvt.u16.u32 %rs1253, %r7289;
add.s32 %r9835, %r9835, -1;
setp.ne.s32 %p1825, %r9835, 0;
mov.u32 %r10008, %r10029;
@%p1825 bra $L__BB2_1442;
setp.gt.u32 %p1826, %r9826, 191;
mov.u32 %r9835, 0;
mov.u32 %r10008, %r7284;
@%p1826 bra $L__BB2_1442;
add.s32 %r7293, %r9826, 17477;
cvt.u64.u32 %rd1161, %r7293;
add.s64 %rd1162, %rd1161, %rd5;
add.s64 %rd1163, %rd1, %rd1162;
st.global.u8 [%rd1163], %rs1253;
add.s32 %r9826, %r9826, 1;
mov.u16 %rs1253, 0;
mov.u32 %r9835, 8;
mov.u32 %r10008, %r10029;
$L__BB2_1442:
setp.eq.s32 %p1827, %r2816, 1;
mov.u32 %r10029, %r10008;
mov.u32 %r10007, %r2815;
@%p1827 bra $L__BB2_1450;
add.s32 %r10007, %r9997, -2;
mov.u32 %r7294, 1;
shl.b32 %r7295, %r7294, %r10007;
and.b32 %r7296, %r7295, %r10040;
setp.ne.s32 %p1828, %r7296, 0;
selp.u32 %r7297, 1, 0, %p1828;
cvt.u32.u16 %r7298, %rs1253;
bfi.b32 %r7299, %r7298, %r7297, 1, 8;
cvt.u16.u32 %rs1253, %r7299;
add.s32 %r9835, %r9835, -1;
setp.ne.s32 %p1829, %r9835, 0;
mov.u32 %r10003, %r10008;
@%p1829 bra $L__BB2_1446;
setp.gt.u32 %p1830, %r9826, 191;
mov.u32 %r9835, 0;
mov.u32 %r10003, %r7294;
@%p1830 bra $L__BB2_1446;
add.s32 %r7302, %r9826, 17477;
cvt.u64.u32 %rd1164, %r7302;
add.s64 %rd1165, %rd1164, %rd5;
add.s64 %rd1166, %rd1, %rd1165;
and.b16 %rs900, %rs1253, 255;
st.global.u8 [%rd1166], %rs1253;
add.s32 %r9826, %r9826, 1;
setp.eq.s16 %p1831, %rs900, 255;
selp.b32 %r9835, 7, 8, %p1831;
mov.u16 %rs1253, 0;
mov.u32 %r10003, %r10008;
$L__BB2_1446:
setp.eq.s32 %p1832, %r2816, 2;
mov.u32 %r10029, %r10003;
mov.u32 %r10008, %r10003;
@%p1832 bra $L__BB2_1450;
add.s32 %r10007, %r9997, -3;
mov.u32 %r7303, 1;
shl.b32 %r7304, %r7303, %r10007;
and.b32 %r7305, %r7304, %r10040;
setp.ne.s32 %p1833, %r7305, 0;
selp.u32 %r7306, 1, 0, %p1833;
cvt.u32.u16 %r7307, %rs1253;
bfi.b32 %r7308, %r7307, %r7306, 1, 8;
cvt.u16.u32 %rs1253, %r7308;
add.s32 %r9835, %r9835, -1;
setp.ne.s32 %p1834, %r9835, 0;
mov.u32 %r10029, %r10003;
mov.u32 %r10008, %r10003;
@%p1834 bra $L__BB2_1450;
setp.gt.u32 %p1835, %r9826, 191;
mov.u32 %r9835, 0;
mov.u32 %r10029, %r7303;
mov.u32 %r10008, %r7303;
@%p1835 bra $L__BB2_1450;
add.s32 %r7313, %r9826, 17477;
cvt.u64.u32 %rd1167, %r7313;
add.s64 %rd1168, %rd1167, %rd5;
add.s64 %rd1169, %rd1, %rd1168;
and.b16 %rs903, %rs1253, 255;
st.global.u8 [%rd1169], %rs1253;
add.s32 %r9826, %r9826, 1;
setp.eq.s16 %p1836, %rs903, 255;
selp.b32 %r9835, 7, 8, %p1836;
mov.u16 %rs1253, 0;
mov.u32 %r10029, %r10003;
mov.u32 %r10008, %r10003;
$L__BB2_1450:
setp.lt.u32 %p1837, %r2815, 3;
@%p1837 bra $L__BB2_1465;
mov.u32 %r10029, %r10008;
$L__BB2_1452:
add.s32 %r7314, %r10007, -1;
mov.u32 %r7315, 1;
shl.b32 %r7316, %r7315, %r7314;
and.b32 %r7317, %r7316, %r10040;
setp.ne.s32 %p1838, %r7317, 0;
selp.u32 %r7318, 1, 0, %p1838;
cvt.u32.u16 %r7319, %rs1253;
bfi.b32 %r10017, %r7319, %r7318, 1, 8;
add.s32 %r10016, %r9835, -1;
setp.ne.s32 %p1839, %r10016, 0;
mov.u32 %r10018, %r10029;
@%p1839 bra $L__BB2_1455;
setp.gt.u32 %p1840, %r9826, 191;
mov.u32 %r10016, 0;
mov.u32 %r10018, %r7315;
@%p1840 bra $L__BB2_1455;
cvt.u16.u32 %rs904, %r10017;
and.b16 %rs905, %rs904, 255;
add.s32 %r7323, %r9826, 17477;
cvt.u64.u32 %rd1170, %r7323;
add.s64 %rd1171, %rd1170, %rd5;
add.s64 %rd1172, %rd1, %rd1171;
st.global.u8 [%rd1172], %rs904;
add.s32 %r9826, %r9826, 1;
setp.eq.s16 %p1841, %rs905, 255;
selp.b32 %r10016, 7, 8, %p1841;
mov.u32 %r10017, 0;
mov.u32 %r10018, %r10029;
$L__BB2_1455:
add.s32 %r7324, %r10007, -2;
shl.b32 %r7326, %r7315, %r7324;
and.b32 %r7327, %r7326, %r10040;
setp.ne.s32 %p1842, %r7327, 0;
and.b32 %r7328, %r10017, 127;
selp.u32 %r7329, 1, 0, %p1842;
bfi.b32 %r10021, %r7328, %r7329, 1, 7;
add.s32 %r10020, %r10016, -1;
setp.ne.s32 %p1843, %r10020, 0;
mov.u32 %r10022, %r10018;
@%p1843 bra $L__BB2_1458;
setp.gt.u32 %p1844, %r9826, 191;
mov.u32 %r10022, 1;
mov.u32 %r10020, 0;
@%p1844 bra $L__BB2_1458;
cvt.u16.u32 %rs906, %r10021;
and.b16 %rs907, %rs906, 255;
add.s32 %r7333, %r9826, 17477;
cvt.u64.u32 %rd1173, %r7333;
add.s64 %rd1174, %rd1173, %rd5;
add.s64 %rd1175, %rd1, %rd1174;
st.global.u8 [%rd1175], %rs906;
add.s32 %r9826, %r9826, 1;
setp.eq.s16 %p1845, %rs907, 255;
selp.b32 %r10020, 7, 8, %p1845;
mov.u32 %r10021, 0;
mov.u32 %r10022, %r10018;
$L__BB2_1458:
add.s32 %r7334, %r10007, -3;
mov.u32 %r7335, 1;
shl.b32 %r7336, %r7335, %r7334;
and.b32 %r7337, %r7336, %r10040;
setp.ne.s32 %p1846, %r7337, 0;
and.b32 %r7338, %r10021, 127;
selp.u32 %r7339, 1, 0, %p1846;
bfi.b32 %r10025, %r7338, %r7339, 1, 7;
add.s32 %r10024, %r10020, -1;
setp.ne.s32 %p1847, %r10024, 0;
mov.u32 %r10026, %r10022;
@%p1847 bra $L__BB2_1461;
setp.gt.u32 %p1848, %r9826, 191;
mov.u32 %r10024, 0;
mov.u32 %r10026, %r7335;
@%p1848 bra $L__BB2_1461;
cvt.u16.u32 %rs908, %r10025;
and.b16 %rs909, %rs908, 255;
add.s32 %r7343, %r9826, 17477;
cvt.u64.u32 %rd1176, %r7343;
add.s64 %rd1177, %rd1176, %rd5;
add.s64 %rd1178, %rd1, %rd1177;
st.global.u8 [%rd1178], %rs908;
add.s32 %r9826, %r9826, 1;
setp.eq.s16 %p1849, %rs909, 255;
selp.b32 %r10024, 7, 8, %p1849;
mov.u32 %r10025, 0;
mov.u32 %r10026, %r10022;
$L__BB2_1461:
add.s32 %r10007, %r10007, -4;
shl.b32 %r7345, %r7335, %r10007;
and.b32 %r7346, %r7345, %r10040;
setp.ne.s32 %p1850, %r7346, 0;
and.b32 %r7347, %r10025, 127;
selp.u32 %r7348, 1, 0, %p1850;
bfi.b32 %r7349, %r7347, %r7348, 1, 15;
cvt.u16.u32 %rs1253, %r7349;
add.s32 %r9835, %r10024, -1;
setp.ne.s32 %p1851, %r9835, 0;
mov.u32 %r10029, %r10026;
@%p1851 bra $L__BB2_1464;
setp.gt.u32 %p1852, %r9826, 191;
mov.u32 %r10029, 1;
mov.u32 %r9835, 0;
@%p1852 bra $L__BB2_1464;
add.s32 %r7352, %r9826, 17477;
cvt.u64.u32 %rd1179, %r7352;
add.s64 %rd1180, %rd1179, %rd5;
add.s64 %rd1181, %rd1, %rd1180;
and.b16 %rs911, %rs1253, 255;
st.global.u8 [%rd1181], %rs1253;
add.s32 %r9826, %r9826, 1;
setp.eq.s16 %p1853, %rs911, 255;
selp.b32 %r9835, 7, 8, %p1853;
mov.u16 %rs1253, 0;
mov.u32 %r10029, %r10026;
$L__BB2_1464:
setp.ne.s32 %p1854, %r10007, 0;
@%p1854 bra $L__BB2_1452;
$L__BB2_1465:
add.s32 %r7354, %r10041, -1;
setp.eq.s32 %p1855, %r10041, 0;
mov.u32 %r10040, 0;
selp.b32 %r10041, 0, %r7354, %p1855;
setp.lt.u32 %p1856, %r10041, 3;
mov.u32 %r10033, %r10040;
@%p1856 bra $L__BB2_1468;
setp.lt.u32 %p1857, %r10041, 6;
mov.u32 %r10033, 1;
@%p1857 bra $L__BB2_1468;
setp.lt.u32 %p1858, %r10041, 9;
setp.eq.s32 %p1859, %r10041, 11;
selp.b32 %r7356, 4, 5, %p1859;
setp.lt.u32 %p1860, %r10041, 11;
selp.b32 %r7357, 3, %r7356, %p1860;
selp.b32 %r10033, 2, %r7357, %p1858;
$L__BB2_1468:
mov.u32 %r7359, 1;
shl.b32 %r10042, %r7359, %r10033;
mov.u32 %r10043, %r10029;
bra.uni $L__BB2_1477;
$L__BB2_1372:
ld.global.u8 %rs297, [%rd65+1];
ld.global.u8 %rs298, [%rd66];
ld.global.u8 %rs299, [%rd66+1];
ld.global.u8 %rs300, [%rd63];
ld.global.u8 %rs301, [%rd63+1];
ld.global.u8 %rs302, [%rd63+2];
ld.global.u8 %rs303, [%rd63+3];
setp.eq.s16 %p1750, %rs297, 0;
mov.u32 %r9922, %r9800;
@%p1750 bra $L__BB2_1379;
ld.global.u8 %r9912, [%rd65];
cvt.u32.u16 %r9911, %rs297;
$L__BB2_1374:
mov.u32 %r2659, %r9911;
setp.gt.u32 %p1751, %r10274, 2879;
mov.u32 %r9922, 1;
@%p1751 bra $L__BB2_1379;
mov.u32 %r7111, 8;
sub.s32 %r7112, %r7111, %r10276;
sub.s32 %r7113, %r7112, %r10275;
min.u32 %r7114, %r7113, %r2659;
setp.eq.s32 %p1752, %r7114, 32;
mov.u32 %r7115, -1;
shl.b32 %r7116, %r7115, %r7114;
not.b32 %r7117, %r7116;
selp.b32 %r7118, -1, %r7117, %p1752;
and.b32 %r7119, %r7118, %r9912;
shl.b32 %r7120, %r7119, %r10275;
cvt.u16.u32 %rs867, %r7120;
or.b16 %rs1322, %rs1322, %rs867;
add.s32 %r10275, %r7114, %r10275;
sub.s32 %r9911, %r2659, %r7114;
shr.u32 %r9912, %r9912, %r7114;
setp.gt.u32 %p1753, %r7113, %r2659;
@%p1753 bra $L__BB2_1378;
setp.ne.s32 %p1754, %r10276, 0;
mov.u32 %r10276, 0;
and.b16 %rs868, %rs1322, 255;
setp.ne.s16 %p1755, %rs868, 127;
and.pred %p1756, %p1754, %p1755;
@%p1756 bra $L__BB2_1378;
mov.u32 %r7123, 20548;
sub.s32 %r7124, %r7123, %r10274;
cvt.u64.u32 %rd1125, %r7124;
add.s64 %rd1126, %rd1125, %rd5;
add.s64 %rd1127, %rd1, %rd1126;
st.global.u8 [%rd1127], %rs1322;
add.s32 %r10274, %r10274, 1;
setp.gt.u16 %p1757, %rs868, 143;
selp.u32 %r10276, 1, 0, %p1757;
mov.u16 %rs1322, 0;
mov.u32 %r10275, 0;
$L__BB2_1378:
setp.ne.s32 %p1758, %r9911, 0;
mov.u32 %r9922, %r9800;
@%p1758 bra $L__BB2_1374;
$L__BB2_1379:
setp.eq.s16 %p1759, %rs301, 0;
mov.u32 %r9934, %r9922;
@%p1759 bra $L__BB2_1386;
cvt.u32.u16 %r7125, %rs300;
and.b32 %r9924, %r7125, 255;
cvt.u32.u16 %r7126, %rs301;
and.b32 %r9923, %r7126, 255;
$L__BB2_1381:
mov.u32 %r2678, %r9923;
setp.gt.u32 %p1760, %r10274, 2879;
mov.u32 %r9934, 1;
@%p1760 bra $L__BB2_1386;
mov.u32 %r7128, 8;
sub.s32 %r7129, %r7128, %r10276;
sub.s32 %r7130, %r7129, %r10275;
min.u32 %r7131, %r7130, %r2678;
setp.eq.s32 %p1761, %r7131, 32;
mov.u32 %r7132, -1;
shl.b32 %r7133, %r7132, %r7131;
not.b32 %r7134, %r7133;
selp.b32 %r7135, -1, %r7134, %p1761;
and.b32 %r7136, %r7135, %r9924;
shl.b32 %r7137, %r7136, %r10275;
cvt.u16.u32 %rs872, %r7137;
or.b16 %rs1322, %rs1322, %rs872;
add.s32 %r10275, %r7131, %r10275;
sub.s32 %r9923, %r2678, %r7131;
shr.u32 %r9924, %r9924, %r7131;
setp.gt.u32 %p1762, %r7130, %r2678;
@%p1762 bra $L__BB2_1385;
setp.ne.s32 %p1763, %r10276, 0;
mov.u32 %r10276, 0;
and.b16 %rs873, %rs1322, 255;
setp.ne.s16 %p1764, %rs873, 127;
and.pred %p1765, %p1763, %p1764;
@%p1765 bra $L__BB2_1385;
mov.u32 %r7140, 20548;
sub.s32 %r7141, %r7140, %r10274;
cvt.u64.u32 %rd1128, %r7141;
add.s64 %rd1129, %rd1128, %rd5;
add.s64 %rd1130, %rd1, %rd1129;
st.global.u8 [%rd1130], %rs1322;
add.s32 %r10274, %r10274, 1;
setp.gt.u16 %p1766, %rs873, 143;
selp.u32 %r10276, 1, 0, %p1766;
mov.u16 %rs1322, 0;
mov.u32 %r10275, 0;
$L__BB2_1385:
setp.ne.s32 %p1767, %r9923, 0;
mov.u32 %r9934, %r9922;
@%p1767 bra $L__BB2_1381;
$L__BB2_1386:
setp.eq.s16 %p1768, %rs299, 0;
mov.u32 %r9946, %r9934;
@%p1768 bra $L__BB2_1393;
cvt.u32.u16 %r7142, %rs299;
and.b32 %r9935, %r7142, 255;
cvt.u32.u16 %r7143, %rs298;
and.b32 %r9936, %r7143, 255;
$L__BB2_1388:
mov.u32 %r2697, %r9935;
setp.gt.u32 %p1769, %r10274, 2879;
mov.u32 %r9946, 1;
@%p1769 bra $L__BB2_1393;
mov.u32 %r7145, 8;
sub.s32 %r7146, %r7145, %r10276;
sub.s32 %r7147, %r7146, %r10275;
min.u32 %r7148, %r7147, %r2697;
setp.eq.s32 %p1770, %r7148, 32;
mov.u32 %r7149, -1;
shl.b32 %r7150, %r7149, %r7148;
not.b32 %r7151, %r7150;
selp.b32 %r7152, -1, %r7151, %p1770;
and.b32 %r7153, %r7152, %r9936;
shl.b32 %r7154, %r7153, %r10275;
cvt.u16.u32 %rs877, %r7154;
or.b16 %rs1322, %rs1322, %rs877;
add.s32 %r10275, %r7148, %r10275;
sub.s32 %r9935, %r2697, %r7148;
shr.u32 %r9936, %r9936, %r7148;
setp.gt.u32 %p1771, %r7147, %r2697;
@%p1771 bra $L__BB2_1392;
setp.ne.s32 %p1772, %r10276, 0;
mov.u32 %r10276, 0;
and.b16 %rs878, %rs1322, 255;
setp.ne.s16 %p1773, %rs878, 127;
and.pred %p1774, %p1772, %p1773;
@%p1774 bra $L__BB2_1392;
mov.u32 %r7157, 20548;
sub.s32 %r7158, %r7157, %r10274;
cvt.u64.u32 %rd1131, %r7158;
add.s64 %rd1132, %rd1131, %rd5;
add.s64 %rd1133, %rd1, %rd1132;
st.global.u8 [%rd1133], %rs1322;
add.s32 %r10274, %r10274, 1;
setp.gt.u16 %p1775, %rs878, 143;
selp.u32 %r10276, 1, 0, %p1775;
mov.u16 %rs1322, 0;
mov.u32 %r10275, 0;
$L__BB2_1392:
setp.ne.s32 %p1776, %r9935, 0;
mov.u32 %r9946, %r9934;
@%p1776 bra $L__BB2_1388;
$L__BB2_1393:
setp.eq.s16 %p1777, %rs303, 0;
mov.u32 %r9760, 0;
mov.u32 %r10277, %r9946;
@%p1777 bra $L__BB2_1631;
cvt.u32.u16 %r7160, %rs302;
and.b32 %r9948, %r7160, 255;
cvt.u32.u16 %r7161, %rs303;
and.b32 %r9947, %r7161, 255;
$L__BB2_1395:
mov.u32 %r2716, %r9947;
setp.gt.u32 %p1778, %r10274, 2879;
mov.u32 %r10277, 1;
@%p1778 bra $L__BB2_1631;
mov.u32 %r7164, 8;
sub.s32 %r7165, %r7164, %r10276;
sub.s32 %r7166, %r7165, %r10275;
min.u32 %r7167, %r7166, %r2716;
setp.eq.s32 %p1779, %r7167, 32;
mov.u32 %r7168, -1;
shl.b32 %r7169, %r7168, %r7167;
not.b32 %r7170, %r7169;
selp.b32 %r7171, -1, %r7170, %p1779;
and.b32 %r7172, %r7171, %r9948;
shl.b32 %r7173, %r7172, %r10275;
cvt.u16.u32 %rs882, %r7173;
or.b16 %rs1322, %rs1322, %rs882;
add.s32 %r10275, %r7167, %r10275;
sub.s32 %r9947, %r2716, %r7167;
shr.u32 %r9948, %r9948, %r7167;
setp.gt.u32 %p1780, %r7166, %r2716;
@%p1780 bra $L__BB2_1399;
setp.ne.s32 %p1781, %r10276, 0;
mov.u32 %r10276, 0;
and.b16 %rs883, %rs1322, 255;
setp.ne.s16 %p1782, %rs883, 127;
and.pred %p1783, %p1781, %p1782;
@%p1783 bra $L__BB2_1399;
mov.u32 %r7176, 20548;
sub.s32 %r7177, %r7176, %r10274;
cvt.u64.u32 %rd1134, %r7177;
add.s64 %rd1135, %rd1134, %rd5;
add.s64 %rd1136, %rd1, %rd1135;
st.global.u8 [%rd1136], %rs1322;
add.s32 %r10274, %r10274, 1;
setp.gt.u16 %p1784, %rs883, 143;
selp.u32 %r10276, 1, 0, %p1784;
mov.u16 %rs1322, 0;
mov.u32 %r10275, 0;
$L__BB2_1399:
setp.eq.s32 %p1785, %r9947, 0;
mov.u32 %r10277, %r9946;
@%p1785 bra $L__BB2_1631;
bra.uni $L__BB2_1395;
$L__BB2_1469:
add.s32 %r10040, %r10040, 1;
setp.lt.u32 %p1861, %r10040, %r10042;
@%p1861 bra $L__BB2_1477;
shl.b16 %rs912, %rs1253, 1;
or.b16 %rs1253, %rs912, 1;
add.s32 %r9835, %r9835, -1;
setp.ne.s32 %p1862, %r9835, 0;
mov.u32 %r10036, %r10043;
@%p1862 bra $L__BB2_1473;
bra.uni $L__BB2_1471;
$L__BB2_1473:
add.s32 %r7363, %r10041, 1;
min.u32 %r10041, %r7363, 12;
setp.lt.u32 %p1865, %r10041, 3;
mov.u32 %r10040, 0;
mov.u32 %r10037, %r10040;
@%p1865 bra $L__BB2_1476;
setp.lt.u32 %p1866, %r10041, 6;
mov.u32 %r10037, 1;
@%p1866 bra $L__BB2_1476;
setp.lt.u32 %p1867, %r10041, 9;
setp.eq.s32 %p1868, %r10041, 11;
selp.b32 %r7365, 4, 5, %p1868;
setp.lt.u32 %p1869, %r10041, 11;
selp.b32 %r7366, 3, %r7365, %p1869;
selp.b32 %r10037, 2, %r7366, %p1867;
$L__BB2_1476:
mov.u32 %r7368, 1;
shl.b32 %r10042, %r7368, %r10037;
mov.u32 %r10043, %r10036;
$L__BB2_1477:
max.s32 %r2899, %r9979, 1;
and.b16 %rs915, %rs319, 15;
cvt.u32.u16 %r2900, %rs915;
and.b32 %r2901, %r9963, 1;
setp.eq.s32 %p1870, %r2901, 0;
mov.u32 %r10058, %r10495;
@%p1870 bra $L__BB2_1484;
and.b32 %r7369, %r2900, 1;
sub.s32 %r10044, %r2899, %r7369;
setp.eq.s32 %p1871, %r10044, 0;
mov.u32 %r10058, %r10495;
@%p1871 bra $L__BB2_1484;
mov.u32 %r7370, -1;
shl.b32 %r7371, %r7370, %r10044;
not.b32 %r7372, %r7371;
and.b32 %r10045, %r9957, %r7372;
$L__BB2_1480:
setp.gt.u32 %p1872, %r10461, 17476;
mov.u32 %r10058, 1;
@%p1872 bra $L__BB2_1484;
sub.s32 %r7374, %r10462, %r10463;
min.u32 %r7375, %r7374, %r10044;
setp.eq.s32 %p1873, %r7375, 32;
mov.u32 %r7376, -1;
shl.b32 %r7377, %r7376, %r7375;
not.b32 %r7378, %r7377;
selp.b32 %r7379, -1, %r7378, %p1873;
and.b32 %r7380, %r7379, %r10045;
shl.b32 %r7381, %r7380, %r10463;
or.b32 %r10464, %r7381, %r10464;
add.s32 %r10463, %r7375, %r10463;
shr.u32 %r10045, %r10045, %r7375;
sub.s32 %r10044, %r10044, %r7375;
setp.lt.u32 %p1874, %r10463, %r10462;
@%p1874 bra $L__BB2_1483;
cvt.u64.u32 %rd1182, %r10461;
add.s64 %rd1183, %rd1182, %rd5;
add.s64 %rd1184, %rd1, %rd1183;
st.global.u8 [%rd1184], %r10464;
add.s32 %r10461, %r10461, 1;
setp.eq.s32 %p1875, %r10464, 255;
selp.b32 %r10462, 7, 8, %p1875;
mov.u32 %r10463, 0;
mov.u32 %r10464, %r10463;
$L__BB2_1483:
setp.ne.s32 %p1876, %r10044, 0;
mov.u32 %r10058, %r10495;
@%p1876 bra $L__BB2_1480;
$L__BB2_1484:
setp.eq.s32 %p1877, %r2787, 0;
mov.u32 %r10073, %r10058;
@%p1877 bra $L__BB2_1491;
shr.u32 %r7384, %r2900, 1;
and.b32 %r7385, %r7384, 1;
sub.s32 %r10059, %r2899, %r7385;
setp.eq.s32 %p1878, %r10059, 0;
mov.u32 %r10073, %r10058;
@%p1878 bra $L__BB2_1491;
mov.u32 %r7386, -1;
shl.b32 %r7387, %r7386, %r10059;
not.b32 %r7388, %r7387;
and.b32 %r10060, %r9961, %r7388;
$L__BB2_1487:
setp.gt.u32 %p1879, %r10461, 17476;
mov.u32 %r10073, 1;
@%p1879 bra $L__BB2_1491;
sub.s32 %r7390, %r10462, %r10463;
min.u32 %r7391, %r7390, %r10059;
setp.eq.s32 %p1880, %r7391, 32;
mov.u32 %r7392, -1;
shl.b32 %r7393, %r7392, %r7391;
not.b32 %r7394, %r7393;
selp.b32 %r7395, -1, %r7394, %p1880;
and.b32 %r7396, %r7395, %r10060;
shl.b32 %r7397, %r7396, %r10463;
or.b32 %r10464, %r7397, %r10464;
add.s32 %r10463, %r7391, %r10463;
shr.u32 %r10060, %r10060, %r7391;
sub.s32 %r10059, %r10059, %r7391;
setp.lt.u32 %p1881, %r10463, %r10462;
@%p1881 bra $L__BB2_1490;
cvt.u64.u32 %rd1185, %r10461;
add.s64 %rd1186, %rd1185, %rd5;
add.s64 %rd1187, %rd1, %rd1186;
st.global.u8 [%rd1187], %r10464;
add.s32 %r10461, %r10461, 1;
setp.eq.s32 %p1882, %r10464, 255;
selp.b32 %r10462, 7, 8, %p1882;
mov.u32 %r10463, 0;
mov.u32 %r10464, %r10463;
$L__BB2_1490:
setp.ne.s32 %p1883, %r10059, 0;
mov.u32 %r10073, %r10058;
@%p1883 bra $L__BB2_1487;
$L__BB2_1491:
and.b32 %r7400, %r9963, 4;
setp.eq.s32 %p1884, %r7400, 0;
mov.u32 %r10088, %r10073;
@%p1884 bra $L__BB2_1498;
shr.u32 %r7401, %r2900, 2;
and.b32 %r7402, %r7401, 1;
sub.s32 %r10074, %r2899, %r7402;
setp.eq.s32 %p1885, %r10074, 0;
mov.u32 %r10088, %r10073;
@%p1885 bra $L__BB2_1498;
mov.u32 %r7403, -1;
shl.b32 %r7404, %r7403, %r10074;
not.b32 %r7405, %r7404;
and.b32 %r10075, %r9977, %r7405;
$L__BB2_1494:
setp.gt.u32 %p1886, %r10461, 17476;
mov.u32 %r10088, 1;
@%p1886 bra $L__BB2_1498;
sub.s32 %r7407, %r10462, %r10463;
min.u32 %r7408, %r7407, %r10074;
setp.eq.s32 %p1887, %r7408, 32;
mov.u32 %r7409, -1;
shl.b32 %r7410, %r7409, %r7408;
not.b32 %r7411, %r7410;
selp.b32 %r7412, -1, %r7411, %p1887;
and.b32 %r7413, %r7412, %r10075;
shl.b32 %r7414, %r7413, %r10463;
or.b32 %r10464, %r7414, %r10464;
add.s32 %r10463, %r7408, %r10463;
shr.u32 %r10075, %r10075, %r7408;
sub.s32 %r10074, %r10074, %r7408;
setp.lt.u32 %p1888, %r10463, %r10462;
@%p1888 bra $L__BB2_1497;
cvt.u64.u32 %rd1188, %r10461;
add.s64 %rd1189, %rd1188, %rd5;
add.s64 %rd1190, %rd1, %rd1189;
st.global.u8 [%rd1190], %r10464;
add.s32 %r10461, %r10461, 1;
setp.eq.s32 %p1889, %r10464, 255;
selp.b32 %r10462, 7, 8, %p1889;
mov.u32 %r10463, 0;
mov.u32 %r10464, %r10463;
$L__BB2_1497:
setp.ne.s32 %p1890, %r10074, 0;
mov.u32 %r10088, %r10073;
@%p1890 bra $L__BB2_1494;
$L__BB2_1498:
setp.eq.s32 %p1891, %r2788, 0;
mov.u32 %r10495, %r10088;
@%p1891 bra $L__BB2_1505;
shr.u32 %r7417, %r2900, 3;
sub.s32 %r10089, %r2899, %r7417;
setp.eq.s32 %p1892, %r10089, 0;
mov.u32 %r10495, %r10088;
@%p1892 bra $L__BB2_1505;
mov.u32 %r7418, -1;
shl.b32 %r7419, %r7418, %r10089;
not.b32 %r7420, %r7419;
and.b32 %r10090, %r9976, %r7420;
$L__BB2_1501:
setp.gt.u32 %p1893, %r10461, 17476;
mov.u32 %r10495, 1;
@%p1893 bra $L__BB2_1505;
sub.s32 %r7422, %r10462, %r10463;
min.u32 %r7423, %r7422, %r10089;
setp.eq.s32 %p1894, %r7423, 32;
mov.u32 %r7424, -1;
shl.b32 %r7425, %r7424, %r7423;
not.b32 %r7426, %r7425;
selp.b32 %r7427, -1, %r7426, %p1894;
and.b32 %r7428, %r7427, %r10090;
shl.b32 %r7429, %r7428, %r10463;
or.b32 %r10464, %r7429, %r10464;
add.s32 %r10463, %r7423, %r10463;
shr.u32 %r10090, %r10090, %r7423;
sub.s32 %r10089, %r10089, %r7423;
setp.lt.u32 %p1895, %r10463, %r10462;
@%p1895 bra $L__BB2_1504;
cvt.u64.u32 %rd1191, %r10461;
add.s64 %rd1192, %rd1191, %rd5;
add.s64 %rd1193, %rd1, %rd1192;
st.global.u8 [%rd1193], %r10464;
add.s32 %r10461, %r10461, 1;
setp.eq.s32 %p1896, %r10464, 255;
selp.b32 %r10462, 7, 8, %p1896;
mov.u32 %r10463, 0;
mov.u32 %r10464, %r10463;
$L__BB2_1504:
setp.ne.s32 %p1897, %r10089, 0;
mov.u32 %r10495, %r10088;
@%p1897 bra $L__BB2_1501;
$L__BB2_1505:
setp.lt.s32 %p1898, %r2784, 1;
setp.lt.s32 %p1899, %r2443, 1;
or.pred %p1900, %p1899, %p1898;
@%p1900 bra $L__BB2_1553;
min.s32 %r7432, %r2443, %r2784;
setp.lt.s32 %p1901, %r7432, 3;
add.s32 %r7433, %r9826, 17477;
cvt.u64.u32 %rd1194, %r7433;
add.s64 %rd1195, %rd1194, %rd5;
add.s64 %rd68, %rd1, %rd1195;
@%p1901 bra $L__BB2_1545;
bra.uni $L__BB2_1507;
$L__BB2_1545:
add.s32 %r10040, %r10040, 1;
setp.lt.u32 %p1948, %r10040, %r10042;
@%p1948 bra $L__BB2_1553;
shl.b16 %rs932, %rs1253, 1;
or.b16 %rs1253, %rs932, 1;
add.s32 %r9835, %r9835, -1;
setp.ne.s32 %p1949, %r9835, 0;
mov.u32 %r10146, %r10043;
@%p1949 bra $L__BB2_1549;
setp.gt.u32 %p1950, %r9826, 191;
mov.u32 %r10146, 1;
mov.u32 %r9835, 0;
@%p1950 bra $L__BB2_1549;
and.b16 %rs934, %rs1253, 255;
st.global.u8 [%rd68], %rs1253;
add.s32 %r9826, %r9826, 1;
setp.eq.s16 %p1951, %rs934, 255;
selp.b32 %r9835, 7, 8, %p1951;
mov.u16 %rs1253, 0;
mov.u32 %r10146, %r10043;
$L__BB2_1549:
add.s32 %r7521, %r10041, 1;
min.u32 %r10041, %r7521, 12;
setp.lt.u32 %p1952, %r10041, 3;
mov.u32 %r10040, 0;
mov.u32 %r10147, %r10040;
@%p1952 bra $L__BB2_1552;
setp.lt.u32 %p1953, %r10041, 6;
mov.u32 %r10147, 1;
@%p1953 bra $L__BB2_1552;
setp.lt.u32 %p1954, %r10041, 9;
setp.eq.s32 %p1955, %r10041, 11;
selp.b32 %r7523, 4, 5, %p1955;
setp.lt.u32 %p1956, %r10041, 11;
selp.b32 %r7524, 3, %r7523, %p1956;
selp.b32 %r10147, 2, %r7524, %p1954;
$L__BB2_1552:
mov.u32 %r7526, 1;
shl.b32 %r10042, %r7526, %r10147;
mov.u32 %r10043, %r10146;
bra.uni $L__BB2_1553;
$L__BB2_1507:
shl.b16 %rs1253, %rs1253, 1;
add.s32 %r9835, %r9835, -1;
setp.ne.s32 %p1902, %r9835, 0;
mov.u32 %r10139, %r10043;
@%p1902 bra $L__BB2_1510;
setp.gt.u32 %p1903, %r9826, 191;
mov.u32 %r10139, 1;
mov.u32 %r9835, 0;
@%p1903 bra $L__BB2_1510;
st.global.u8 [%rd68], %rs1253;
add.s32 %r9826, %r9826, 1;
mov.u16 %rs1253, 0;
mov.u32 %r9835, 8;
mov.u32 %r10139, %r10043;
$L__BB2_1510:
setp.lt.u32 %p1904, %r10041, 3;
mov.u32 %r10107, 0;
@%p1904 bra $L__BB2_1513;
setp.lt.u32 %p1905, %r10041, 6;
mov.u32 %r10107, 1;
@%p1905 bra $L__BB2_1513;
setp.lt.u32 %p1906, %r10041, 9;
setp.eq.s32 %p1907, %r10041, 11;
selp.b32 %r7439, 4, 5, %p1907;
setp.lt.u32 %p1908, %r10041, 11;
selp.b32 %r7440, 3, %r7439, %p1908;
selp.b32 %r10107, 2, %r7440, %p1906;
$L__BB2_1513:
setp.eq.s32 %p1909, %r10107, 0;
@%p1909 bra $L__BB2_1541;
add.s32 %r3001, %r10107, -1;
and.b32 %r3002, %r10107, 3;
setp.eq.s32 %p1910, %r3002, 0;
mov.u32 %r10117, %r10107;
mov.u32 %r10118, %r10139;
@%p1910 bra $L__BB2_1526;
mov.u32 %r7442, 1;
shl.b32 %r7443, %r7442, %r3001;
and.b32 %r7444, %r7443, %r10040;
setp.ne.s32 %p1911, %r7444, 0;
selp.u32 %r7445, 1, 0, %p1911;
cvt.u32.u16 %r7446, %rs1253;
bfi.b32 %r7447, %r7446, %r7445, 1, 8;
cvt.u16.u32 %rs1253, %r7447;
add.s32 %r9835, %r9835, -1;
setp.ne.s32 %p1912, %r9835, 0;
mov.u32 %r10118, %r10139;
@%p1912 bra $L__BB2_1518;
setp.gt.u32 %p1913, %r9826, 191;
mov.u32 %r9835, 0;
mov.u32 %r10118, %r7442;
@%p1913 bra $L__BB2_1518;
add.s32 %r7451, %r9826, 17477;
cvt.u64.u32 %rd1196, %r7451;
add.s64 %rd1197, %rd1196, %rd5;
add.s64 %rd1198, %rd1, %rd1197;
st.global.u8 [%rd1198], %rs1253;
add.s32 %r9826, %r9826, 1;
mov.u16 %rs1253, 0;
mov.u32 %r9835, 8;
mov.u32 %r10118, %r10139;
$L__BB2_1518:
setp.eq.s32 %p1914, %r3002, 1;
mov.u32 %r10139, %r10118;
mov.u32 %r10117, %r3001;
@%p1914 bra $L__BB2_1526;
add.s32 %r10117, %r10107, -2;
mov.u32 %r7452, 1;
shl.b32 %r7453, %r7452, %r10117;
and.b32 %r7454, %r7453, %r10040;
setp.ne.s32 %p1915, %r7454, 0;
selp.u32 %r7455, 1, 0, %p1915;
cvt.u32.u16 %r7456, %rs1253;
bfi.b32 %r7457, %r7456, %r7455, 1, 8;
cvt.u16.u32 %rs1253, %r7457;
add.s32 %r9835, %r9835, -1;
setp.ne.s32 %p1916, %r9835, 0;
mov.u32 %r10113, %r10118;
@%p1916 bra $L__BB2_1522;
setp.gt.u32 %p1917, %r9826, 191;
mov.u32 %r9835, 0;
mov.u32 %r10113, %r7452;
@%p1917 bra $L__BB2_1522;
add.s32 %r7460, %r9826, 17477;
cvt.u64.u32 %rd1199, %r7460;
add.s64 %rd1200, %rd1199, %rd5;
add.s64 %rd1201, %rd1, %rd1200;
and.b16 %rs920, %rs1253, 255;
st.global.u8 [%rd1201], %rs1253;
add.s32 %r9826, %r9826, 1;
setp.eq.s16 %p1918, %rs920, 255;
selp.b32 %r9835, 7, 8, %p1918;
mov.u16 %rs1253, 0;
mov.u32 %r10113, %r10118;
$L__BB2_1522:
setp.eq.s32 %p1919, %r3002, 2;
mov.u32 %r10139, %r10113;
mov.u32 %r10118, %r10113;
@%p1919 bra $L__BB2_1526;
add.s32 %r10117, %r10107, -3;
mov.u32 %r7461, 1;
shl.b32 %r7462, %r7461, %r10117;
and.b32 %r7463, %r7462, %r10040;
setp.ne.s32 %p1920, %r7463, 0;
selp.u32 %r7464, 1, 0, %p1920;
cvt.u32.u16 %r7465, %rs1253;
bfi.b32 %r7466, %r7465, %r7464, 1, 8;
cvt.u16.u32 %rs1253, %r7466;
add.s32 %r9835, %r9835, -1;
setp.ne.s32 %p1921, %r9835, 0;
mov.u32 %r10139, %r10113;
mov.u32 %r10118, %r10113;
@%p1921 bra $L__BB2_1526;
setp.gt.u32 %p1922, %r9826, 191;
mov.u32 %r9835, 0;
mov.u32 %r10139, %r7461;
mov.u32 %r10118, %r7461;
@%p1922 bra $L__BB2_1526;
add.s32 %r7471, %r9826, 17477;
cvt.u64.u32 %rd1202, %r7471;
add.s64 %rd1203, %rd1202, %rd5;
add.s64 %rd1204, %rd1, %rd1203;
and.b16 %rs923, %rs1253, 255;
st.global.u8 [%rd1204], %rs1253;
add.s32 %r9826, %r9826, 1;
setp.eq.s16 %p1923, %rs923, 255;
selp.b32 %r9835, 7, 8, %p1923;
mov.u16 %rs1253, 0;
mov.u32 %r10139, %r10113;
mov.u32 %r10118, %r10113;
$L__BB2_1526:
setp.lt.u32 %p1924, %r3001, 3;
@%p1924 bra $L__BB2_1541;
mov.u32 %r10139, %r10118;
$L__BB2_1528:
add.s32 %r7472, %r10117, -1;
mov.u32 %r7473, 1;
shl.b32 %r7474, %r7473, %r7472;
and.b32 %r7475, %r7474, %r10040;
setp.ne.s32 %p1925, %r7475, 0;
selp.u32 %r7476, 1, 0, %p1925;
cvt.u32.u16 %r7477, %rs1253;
bfi.b32 %r10127, %r7477, %r7476, 1, 8;
add.s32 %r10126, %r9835, -1;
setp.ne.s32 %p1926, %r10126, 0;
mov.u32 %r10128, %r10139;
@%p1926 bra $L__BB2_1531;
setp.gt.u32 %p1927, %r9826, 191;
mov.u32 %r10126, 0;
mov.u32 %r10128, %r7473;
@%p1927 bra $L__BB2_1531;
cvt.u16.u32 %rs924, %r10127;
and.b16 %rs925, %rs924, 255;
add.s32 %r7481, %r9826, 17477;
cvt.u64.u32 %rd1205, %r7481;
add.s64 %rd1206, %rd1205, %rd5;
add.s64 %rd1207, %rd1, %rd1206;
st.global.u8 [%rd1207], %rs924;
add.s32 %r9826, %r9826, 1;
setp.eq.s16 %p1928, %rs925, 255;
selp.b32 %r10126, 7, 8, %p1928;
mov.u32 %r10127, 0;
mov.u32 %r10128, %r10139;
$L__BB2_1531:
add.s32 %r7482, %r10117, -2;
shl.b32 %r7484, %r7473, %r7482;
and.b32 %r7485, %r7484, %r10040;
setp.ne.s32 %p1929, %r7485, 0;
and.b32 %r7486, %r10127, 127;
selp.u32 %r7487, 1, 0, %p1929;
bfi.b32 %r10131, %r7486, %r7487, 1, 7;
add.s32 %r10130, %r10126, -1;
setp.ne.s32 %p1930, %r10130, 0;
mov.u32 %r10132, %r10128;
@%p1930 bra $L__BB2_1534;
setp.gt.u32 %p1931, %r9826, 191;
mov.u32 %r10132, 1;
mov.u32 %r10130, 0;
@%p1931 bra $L__BB2_1534;
cvt.u16.u32 %rs926, %r10131;
and.b16 %rs927, %rs926, 255;
add.s32 %r7491, %r9826, 17477;
cvt.u64.u32 %rd1208, %r7491;
add.s64 %rd1209, %rd1208, %rd5;
add.s64 %rd1210, %rd1, %rd1209;
st.global.u8 [%rd1210], %rs926;
add.s32 %r9826, %r9826, 1;
setp.eq.s16 %p1932, %rs927, 255;
selp.b32 %r10130, 7, 8, %p1932;
mov.u32 %r10131, 0;
mov.u32 %r10132, %r10128;
$L__BB2_1534:
add.s32 %r7492, %r10117, -3;
mov.u32 %r7493, 1;
shl.b32 %r7494, %r7493, %r7492;
and.b32 %r7495, %r7494, %r10040;
setp.ne.s32 %p1933, %r7495, 0;
and.b32 %r7496, %r10131, 127;
selp.u32 %r7497, 1, 0, %p1933;
bfi.b32 %r10135, %r7496, %r7497, 1, 7;
add.s32 %r10134, %r10130, -1;
setp.ne.s32 %p1934, %r10134, 0;
mov.u32 %r10136, %r10132;
@%p1934 bra $L__BB2_1537;
setp.gt.u32 %p1935, %r9826, 191;
mov.u32 %r10134, 0;
mov.u32 %r10136, %r7493;
@%p1935 bra $L__BB2_1537;
cvt.u16.u32 %rs928, %r10135;
and.b16 %rs929, %rs928, 255;
add.s32 %r7501, %r9826, 17477;
cvt.u64.u32 %rd1211, %r7501;
add.s64 %rd1212, %rd1211, %rd5;
add.s64 %rd1213, %rd1, %rd1212;
st.global.u8 [%rd1213], %rs928;
add.s32 %r9826, %r9826, 1;
setp.eq.s16 %p1936, %rs929, 255;
selp.b32 %r10134, 7, 8, %p1936;
mov.u32 %r10135, 0;
mov.u32 %r10136, %r10132;
$L__BB2_1537:
add.s32 %r10117, %r10117, -4;
shl.b32 %r7503, %r7493, %r10117;
and.b32 %r7504, %r7503, %r10040;
setp.ne.s32 %p1937, %r7504, 0;
and.b32 %r7505, %r10135, 127;
selp.u32 %r7506, 1, 0, %p1937;
bfi.b32 %r7507, %r7505, %r7506, 1, 15;
cvt.u16.u32 %rs1253, %r7507;
add.s32 %r9835, %r10134, -1;
setp.ne.s32 %p1938, %r9835, 0;
mov.u32 %r10139, %r10136;
@%p1938 bra $L__BB2_1540;
setp.gt.u32 %p1939, %r9826, 191;
mov.u32 %r10139, 1;
mov.u32 %r9835, 0;
@%p1939 bra $L__BB2_1540;
add.s32 %r7510, %r9826, 17477;
cvt.u64.u32 %rd1214, %r7510;
add.s64 %rd1215, %rd1214, %rd5;
add.s64 %rd1216, %rd1, %rd1215;
and.b16 %rs931, %rs1253, 255;
st.global.u8 [%rd1216], %rs1253;
add.s32 %r9826, %r9826, 1;
setp.eq.s16 %p1940, %rs931, 255;
selp.b32 %r9835, 7, 8, %p1940;
mov.u16 %rs1253, 0;
mov.u32 %r10139, %r10136;
$L__BB2_1540:
setp.ne.s32 %p1941, %r10117, 0;
@%p1941 bra $L__BB2_1528;
$L__BB2_1541:
add.s32 %r7512, %r10041, -1;
setp.eq.s32 %p1942, %r10041, 0;
mov.u32 %r10040, 0;
selp.b32 %r10041, 0, %r7512, %p1942;
setp.lt.u32 %p1943, %r10041, 3;
mov.u32 %r10143, %r10040;
@%p1943 bra $L__BB2_1544;
setp.lt.u32 %p1944, %r10041, 6;
mov.u32 %r10143, 1;
@%p1944 bra $L__BB2_1544;
setp.lt.u32 %p1945, %r10041, 9;
setp.eq.s32 %p1946, %r10041, 11;
selp.b32 %r7514, 4, 5, %p1946;
setp.lt.u32 %p1947, %r10041, 11;
selp.b32 %r7515, 3, %r7514, %p1947;
selp.b32 %r10143, 2, %r7515, %p1945;
$L__BB2_1544:
mov.u32 %r7517, 1;
shl.b32 %r10042, %r7517, %r10143;
mov.u32 %r10043, %r10139;
$L__BB2_1553:
setp.gt.s32 %p1957, %r2784, 2;
setp.gt.s32 %p1958, %r2443, 2;
and.pred %p1959, %p1958, %p1957;
@%p1959 bra $L__BB2_1602;
bra.uni $L__BB2_1554;
$L__BB2_1602:
add.s32 %r7647, %r2656, -11;
cvt.u64.u32 %rd1246, %r7647;
add.s64 %rd70, %rd63, %rd1246;
ld.global.u8 %rs393, [%rd70];
add.s32 %r7648, %r2656, -10;
cvt.u64.u32 %rd1248, %r7648;
add.s64 %rd1249, %rd63, %rd1248;
ld.global.u8 %rs394, [%rd1249];
ld.global.u8 %rs395, [%rd1249+1];
mul.lo.s32 %r7649, %r2784, 6;
add.s32 %r7650, %r7649, -12;
cvt.u64.u32 %rd1250, %r7650;
add.s64 %rd1251, %rd63, %rd1250;
ld.global.u8 %rs396, [%rd1251];
ld.global.u8 %rs397, [%rd1251+1];
add.s32 %r7651, %r7649, -10;
cvt.u64.u32 %rd1252, %r7651;
add.s64 %rd1253, %rd63, %rd1252;
ld.global.u8 %rs398, [%rd1253];
ld.global.u8 %rs399, [%rd1253+1];
setp.eq.s16 %p2027, %rs393, 0;
mov.u32 %r10241, %r9993;
@%p2027 bra $L__BB2_1609;
ld.global.u8 %r10231, [%rd70+-1];
cvt.u32.u16 %r10230, %rs393;
$L__BB2_1604:
mov.u32 %r3211, %r10230;
setp.gt.u32 %p2028, %r10274, 2879;
mov.u32 %r10241, 1;
@%p2028 bra $L__BB2_1609;
mov.u32 %r7653, 8;
sub.s32 %r7654, %r7653, %r10276;
sub.s32 %r7655, %r7654, %r10275;
min.u32 %r7656, %r7655, %r3211;
setp.eq.s32 %p2029, %r7656, 32;
mov.u32 %r7657, -1;
shl.b32 %r7658, %r7657, %r7656;
not.b32 %r7659, %r7658;
selp.b32 %r7660, -1, %r7659, %p2029;
and.b32 %r7661, %r7660, %r10231;
shl.b32 %r7662, %r7661, %r10275;
cvt.u16.u32 %rs967, %r7662;
or.b16 %rs1322, %rs1322, %rs967;
add.s32 %r10275, %r7656, %r10275;
sub.s32 %r10230, %r3211, %r7656;
shr.u32 %r10231, %r10231, %r7656;
setp.gt.u32 %p2030, %r7655, %r3211;
@%p2030 bra $L__BB2_1608;
setp.ne.s32 %p2031, %r10276, 0;
mov.u32 %r10276, 0;
and.b16 %rs968, %rs1322, 255;
setp.ne.s16 %p2032, %rs968, 127;
and.pred %p2033, %p2031, %p2032;
@%p2033 bra $L__BB2_1608;
mov.u32 %r7665, 20548;
sub.s32 %r7666, %r7665, %r10274;
cvt.u64.u32 %rd1254, %r7666;
add.s64 %rd1255, %rd1254, %rd5;
add.s64 %rd1256, %rd1, %rd1255;
st.global.u8 [%rd1256], %rs1322;
add.s32 %r10274, %r10274, 1;
setp.gt.u16 %p2034, %rs968, 143;
selp.u32 %r10276, 1, 0, %p2034;
mov.u16 %rs1322, 0;
mov.u32 %r10275, 0;
$L__BB2_1608:
setp.ne.s32 %p2035, %r10230, 0;
mov.u32 %r10241, %r9993;
@%p2035 bra $L__BB2_1604;
$L__BB2_1609:
setp.eq.s16 %p2036, %rs397, 0;
mov.u32 %r10253, %r10241;
@%p2036 bra $L__BB2_1616;
cvt.u32.u16 %r7667, %rs396;
and.b32 %r10243, %r7667, 255;
cvt.u32.u16 %r7668, %rs397;
and.b32 %r10242, %r7668, 255;
$L__BB2_1611:
mov.u32 %r3230, %r10242;
setp.gt.u32 %p2037, %r10274, 2879;
mov.u32 %r10253, 1;
@%p2037 bra $L__BB2_1616;
mov.u32 %r7670, 8;
sub.s32 %r7671, %r7670, %r10276;
sub.s32 %r7672, %r7671, %r10275;
min.u32 %r7673, %r7672, %r3230;
setp.eq.s32 %p2038, %r7673, 32;
mov.u32 %r7674, -1;
shl.b32 %r7675, %r7674, %r7673;
not.b32 %r7676, %r7675;
selp.b32 %r7677, -1, %r7676, %p2038;
and.b32 %r7678, %r7677, %r10243;
shl.b32 %r7679, %r7678, %r10275;
cvt.u16.u32 %rs972, %r7679;
or.b16 %rs1322, %rs1322, %rs972;
add.s32 %r10275, %r7673, %r10275;
sub.s32 %r10242, %r3230, %r7673;
shr.u32 %r10243, %r10243, %r7673;
setp.gt.u32 %p2039, %r7672, %r3230;
@%p2039 bra $L__BB2_1615;
setp.ne.s32 %p2040, %r10276, 0;
mov.u32 %r10276, 0;
and.b16 %rs973, %rs1322, 255;
setp.ne.s16 %p2041, %rs973, 127;
and.pred %p2042, %p2040, %p2041;
@%p2042 bra $L__BB2_1615;
mov.u32 %r7682, 20548;
sub.s32 %r7683, %r7682, %r10274;
cvt.u64.u32 %rd1257, %r7683;
add.s64 %rd1258, %rd1257, %rd5;
add.s64 %rd1259, %rd1, %rd1258;
st.global.u8 [%rd1259], %rs1322;
add.s32 %r10274, %r10274, 1;
setp.gt.u16 %p2043, %rs973, 143;
selp.u32 %r10276, 1, 0, %p2043;
mov.u16 %rs1322, 0;
mov.u32 %r10275, 0;
$L__BB2_1615:
setp.ne.s32 %p2044, %r10242, 0;
mov.u32 %r10253, %r10241;
@%p2044 bra $L__BB2_1611;
$L__BB2_1616:
setp.eq.s16 %p2045, %rs395, 0;
mov.u32 %r10265, %r10253;
@%p2045 bra $L__BB2_1623;
cvt.u32.u16 %r7684, %rs395;
and.b32 %r10254, %r7684, 255;
cvt.u32.u16 %r7685, %rs394;
and.b32 %r10255, %r7685, 255;
$L__BB2_1618:
mov.u32 %r3249, %r10254;
setp.gt.u32 %p2046, %r10274, 2879;
mov.u32 %r10265, 1;
@%p2046 bra $L__BB2_1623;
mov.u32 %r7687, 8;
sub.s32 %r7688, %r7687, %r10276;
sub.s32 %r7689, %r7688, %r10275;
min.u32 %r7690, %r7689, %r3249;
setp.eq.s32 %p2047, %r7690, 32;
mov.u32 %r7691, -1;
shl.b32 %r7692, %r7691, %r7690;
not.b32 %r7693, %r7692;
selp.b32 %r7694, -1, %r7693, %p2047;
and.b32 %r7695, %r7694, %r10255;
shl.b32 %r7696, %r7695, %r10275;
cvt.u16.u32 %rs977, %r7696;
or.b16 %rs1322, %rs1322, %rs977;
add.s32 %r10275, %r7690, %r10275;
sub.s32 %r10254, %r3249, %r7690;
shr.u32 %r10255, %r10255, %r7690;
setp.gt.u32 %p2048, %r7689, %r3249;
@%p2048 bra $L__BB2_1622;
setp.ne.s32 %p2049, %r10276, 0;
mov.u32 %r10276, 0;
and.b16 %rs978, %rs1322, 255;
setp.ne.s16 %p2050, %rs978, 127;
and.pred %p2051, %p2049, %p2050;
@%p2051 bra $L__BB2_1622;
mov.u32 %r7699, 20548;
sub.s32 %r7700, %r7699, %r10274;
cvt.u64.u32 %rd1260, %r7700;
add.s64 %rd1261, %rd1260, %rd5;
add.s64 %rd1262, %rd1, %rd1261;
st.global.u8 [%rd1262], %rs1322;
add.s32 %r10274, %r10274, 1;
setp.gt.u16 %p2052, %rs978, 143;
selp.u32 %r10276, 1, 0, %p2052;
mov.u16 %rs1322, 0;
mov.u32 %r10275, 0;
$L__BB2_1622:
setp.ne.s32 %p2053, %r10254, 0;
mov.u32 %r10265, %r10253;
@%p2053 bra $L__BB2_1618;
$L__BB2_1623:
setp.eq.s16 %p2054, %rs399, 0;
mov.u32 %r10277, %r10265;
@%p2054 bra $L__BB2_1630;
cvt.u32.u16 %r7701, %rs398;
and.b32 %r10267, %r7701, 255;
cvt.u32.u16 %r7702, %rs399;
and.b32 %r10266, %r7702, 255;
$L__BB2_1625:
mov.u32 %r3268, %r10266;
setp.gt.u32 %p2055, %r10274, 2879;
mov.u32 %r10277, 1;
@%p2055 bra $L__BB2_1630;
mov.u32 %r7704, 8;
sub.s32 %r7705, %r7704, %r10276;
sub.s32 %r7706, %r7705, %r10275;
min.u32 %r7707, %r7706, %r3268;
setp.eq.s32 %p2056, %r7707, 32;
mov.u32 %r7708, -1;
shl.b32 %r7709, %r7708, %r7707;
not.b32 %r7710, %r7709;
selp.b32 %r7711, -1, %r7710, %p2056;
and.b32 %r7712, %r7711, %r10267;
shl.b32 %r7713, %r7712, %r10275;
cvt.u16.u32 %rs982, %r7713;
or.b16 %rs1322, %rs1322, %rs982;
add.s32 %r10275, %r7707, %r10275;
sub.s32 %r10266, %r3268, %r7707;
shr.u32 %r10267, %r10267, %r7707;
setp.gt.u32 %p2057, %r7706, %r3268;
@%p2057 bra $L__BB2_1629;
setp.ne.s32 %p2058, %r10276, 0;
mov.u32 %r10276, 0;
and.b16 %rs983, %rs1322, 255;
setp.ne.s16 %p2059, %rs983, 127;
and.pred %p2060, %p2058, %p2059;
@%p2060 bra $L__BB2_1629;
mov.u32 %r7716, 20548;
sub.s32 %r7717, %r7716, %r10274;
cvt.u64.u32 %rd1263, %r7717;
add.s64 %rd1264, %rd1263, %rd5;
add.s64 %rd1265, %rd1, %rd1264;
st.global.u8 [%rd1265], %rs1322;
add.s32 %r10274, %r10274, 1;
setp.gt.u16 %p2061, %rs983, 143;
selp.u32 %r10276, 1, 0, %p2061;
mov.u16 %rs1322, 0;
mov.u32 %r10275, 0;
$L__BB2_1629:
setp.ne.s32 %p2062, %r10266, 0;
mov.u32 %r10277, %r10265;
@%p2062 bra $L__BB2_1625;
bra.uni $L__BB2_1630;
$L__BB2_1554:
setp.gt.s32 %p1960, %r2784, 0;
and.pred %p1962, %p1958, %p1960;
@%p1962 bra $L__BB2_1583;
bra.uni $L__BB2_1555;
$L__BB2_1583:
ld.global.u8 %rs379, [%rd65+1];
ld.global.u8 %rs380, [%rd66];
ld.global.u8 %rs381, [%rd66+1];
setp.eq.s16 %p2001, %rs379, 0;
mov.u32 %r10209, %r9993;
@%p2001 bra $L__BB2_1590;
ld.global.u8 %r10199, [%rd65];
cvt.u32.u16 %r10198, %rs379;
$L__BB2_1585:
mov.u32 %r3159, %r10198;
setp.gt.u32 %p2002, %r10274, 2879;
mov.u32 %r10209, 1;
@%p2002 bra $L__BB2_1590;
mov.u32 %r7599, 8;
sub.s32 %r7600, %r7599, %r10276;
sub.s32 %r7601, %r7600, %r10275;
min.u32 %r7602, %r7601, %r3159;
setp.eq.s32 %p2003, %r7602, 32;
mov.u32 %r7603, -1;
shl.b32 %r7604, %r7603, %r7602;
not.b32 %r7605, %r7604;
selp.b32 %r7606, -1, %r7605, %p2003;
and.b32 %r7607, %r7606, %r10199;
shl.b32 %r7608, %r7607, %r10275;
cvt.u16.u32 %rs954, %r7608;
or.b16 %rs1322, %rs1322, %rs954;
add.s32 %r10275, %r7602, %r10275;
sub.s32 %r10198, %r3159, %r7602;
shr.u32 %r10199, %r10199, %r7602;
setp.gt.u32 %p2004, %r7601, %r3159;
@%p2004 bra $L__BB2_1589;
setp.ne.s32 %p2005, %r10276, 0;
mov.u32 %r10276, 0;
and.b16 %rs955, %rs1322, 255;
setp.ne.s16 %p2006, %rs955, 127;
and.pred %p2007, %p2005, %p2006;
@%p2007 bra $L__BB2_1589;
mov.u32 %r7611, 20548;
sub.s32 %r7612, %r7611, %r10274;
cvt.u64.u32 %rd1237, %r7612;
add.s64 %rd1238, %rd1237, %rd5;
add.s64 %rd1239, %rd1, %rd1238;
st.global.u8 [%rd1239], %rs1322;
add.s32 %r10274, %r10274, 1;
setp.gt.u16 %p2008, %rs955, 143;
selp.u32 %r10276, 1, 0, %p2008;
mov.u16 %rs1322, 0;
mov.u32 %r10275, 0;
$L__BB2_1589:
setp.ne.s32 %p2009, %r10198, 0;
mov.u32 %r10209, %r9993;
@%p2009 bra $L__BB2_1585;
$L__BB2_1590:
add.s32 %r10211, %r2784, -1;
cvt.u32.u16 %r7614, %rs381;
and.b32 %r10222, %r7614, 255;
cvt.u32.u16 %r7615, %rs380;
and.b32 %r10223, %r7615, 255;
mov.u32 %r7613, 1;
mov.u32 %r10210, %r7613;
$L__BB2_1591:
mov.u32 %r3179, %r10210;
setp.gt.u32 %p2010, %r10274, 2879;
mov.u32 %r10221, %r7613;
@%p2010 bra $L__BB2_1596;
mov.u32 %r7617, 8;
sub.s32 %r7618, %r7617, %r10276;
sub.s32 %r7619, %r7618, %r10275;
min.u32 %r7620, %r7619, %r3179;
setp.eq.s32 %p2011, %r7620, 32;
mov.u32 %r7621, -1;
shl.b32 %r7622, %r7621, %r7620;
not.b32 %r7623, %r7622;
selp.b32 %r7624, -1, %r7623, %p2011;
and.b32 %r7625, %r7624, %r10211;
shl.b32 %r7626, %r7625, %r10275;
cvt.u16.u32 %rs958, %r7626;
or.b16 %rs1322, %rs1322, %rs958;
add.s32 %r10275, %r7620, %r10275;
sub.s32 %r10210, %r3179, %r7620;
shr.u32 %r10211, %r10211, %r7620;
setp.gt.u32 %p2012, %r7619, %r3179;
@%p2012 bra $L__BB2_1595;
setp.ne.s32 %p2013, %r10276, 0;
mov.u32 %r10276, 0;
and.b16 %rs959, %rs1322, 255;
setp.ne.s16 %p2014, %rs959, 127;
and.pred %p2015, %p2013, %p2014;
@%p2015 bra $L__BB2_1595;
mov.u32 %r7629, 20548;
sub.s32 %r7630, %r7629, %r10274;
cvt.u64.u32 %rd1240, %r7630;
add.s64 %rd1241, %rd1240, %rd5;
add.s64 %rd1242, %rd1, %rd1241;
st.global.u8 [%rd1242], %rs1322;
add.s32 %r10274, %r10274, 1;
setp.gt.u16 %p2016, %rs959, 143;
selp.u32 %r10276, 1, 0, %p2016;
mov.u16 %rs1322, 0;
mov.u32 %r10275, 0;
$L__BB2_1595:
setp.ne.s32 %p2017, %r10210, 0;
mov.u32 %r10221, %r10209;
@%p2017 bra $L__BB2_1591;
$L__BB2_1596:
setp.eq.s16 %p2018, %rs381, 0;
mov.u32 %r10277, %r10221;
@%p2018 bra $L__BB2_1630;
$L__BB2_1597:
mov.u32 %r3196, %r10222;
setp.gt.u32 %p2019, %r10274, 2879;
mov.u32 %r10277, 1;
@%p2019 bra $L__BB2_1630;
mov.u32 %r7632, 8;
sub.s32 %r7633, %r7632, %r10276;
sub.s32 %r7634, %r7633, %r10275;
min.u32 %r7635, %r7634, %r3196;
setp.eq.s32 %p2020, %r7635, 32;
mov.u32 %r7636, -1;
shl.b32 %r7637, %r7636, %r7635;
not.b32 %r7638, %r7637;
selp.b32 %r7639, -1, %r7638, %p2020;
and.b32 %r7640, %r7639, %r10223;
shl.b32 %r7641, %r7640, %r10275;
cvt.u16.u32 %rs963, %r7641;
or.b16 %rs1322, %rs1322, %rs963;
add.s32 %r10275, %r7635, %r10275;
sub.s32 %r10222, %r3196, %r7635;
shr.u32 %r10223, %r10223, %r7635;
setp.gt.u32 %p2021, %r7634, %r3196;
@%p2021 bra $L__BB2_1601;
setp.ne.s32 %p2022, %r10276, 0;
mov.u32 %r10276, 0;
and.b16 %rs964, %rs1322, 255;
setp.ne.s16 %p2023, %rs964, 127;
and.pred %p2024, %p2022, %p2023;
@%p2024 bra $L__BB2_1601;
mov.u32 %r7644, 20548;
sub.s32 %r7645, %r7644, %r10274;
cvt.u64.u32 %rd1243, %r7645;
add.s64 %rd1244, %rd1243, %rd5;
add.s64 %rd1245, %rd1, %rd1244;
st.global.u8 [%rd1245], %rs1322;
add.s32 %r10274, %r10274, 1;
setp.gt.u16 %p2025, %rs964, 143;
selp.u32 %r10276, 1, 0, %p2025;
mov.u16 %rs1322, 0;
mov.u32 %r10275, 0;
$L__BB2_1601:
setp.eq.s32 %p2026, %r10222, 0;
mov.u32 %r10277, %r10221;
@%p2026 bra $L__BB2_1630;
bra.uni $L__BB2_1597;
$L__BB2_1555:
setp.gt.s32 %p1964, %r2443, 0;
selp.b32 %r7527, %r2656, 0, %p1964;
cvt.u64.u32 %rd1217, %r7527;
add.s64 %rd69, %rd63, %rd1217;
ld.global.u8 %rs357, [%rd69+1];
add.s32 %r7528, %r7527, 2;
cvt.u64.u32 %rd1219, %r7528;
add.s64 %rd1220, %rd63, %rd1219;
ld.global.u8 %rs358, [%rd1220];
ld.global.u8 %rs359, [%rd1220+1];
mul.lo.s32 %r7529, %r2784, 6;
selp.b32 %r7530, %r7529, 0, %p1960;
cvt.u64.u32 %rd1221, %r7530;
add.s64 %rd1222, %rd63, %rd1221;
ld.global.u8 %rs360, [%rd1222];
ld.global.u8 %rs361, [%rd1222+1];
add.s32 %r7531, %r7530, 2;
cvt.u64.u32 %rd1223, %r7531;
add.s64 %rd1224, %rd63, %rd1223;
ld.global.u8 %rs362, [%rd1224];
ld.global.u8 %rs363, [%rd1224+1];
setp.eq.s16 %p1965, %rs357, 0;
mov.u32 %r10165, %r9993;
@%p1965 bra $L__BB2_1562;
ld.global.u8 %r10155, [%rd69];
cvt.u32.u16 %r10154, %rs357;
$L__BB2_1557:
mov.u32 %r3087, %r10154;
setp.gt.u32 %p1966, %r10274, 2879;
mov.u32 %r10165, 1;
@%p1966 bra $L__BB2_1562;
mov.u32 %r7533, 8;
sub.s32 %r7534, %r7533, %r10276;
sub.s32 %r7535, %r7534, %r10275;
min.u32 %r7536, %r7535, %r3087;
setp.eq.s32 %p1967, %r7536, 32;
mov.u32 %r7537, -1;
shl.b32 %r7538, %r7537, %r7536;
not.b32 %r7539, %r7538;
selp.b32 %r7540, -1, %r7539, %p1967;
and.b32 %r7541, %r7540, %r10155;
shl.b32 %r7542, %r7541, %r10275;
cvt.u16.u32 %rs935, %r7542;
or.b16 %rs1322, %rs1322, %rs935;
add.s32 %r10275, %r7536, %r10275;
sub.s32 %r10154, %r3087, %r7536;
shr.u32 %r10155, %r10155, %r7536;
setp.gt.u32 %p1968, %r7535, %r3087;
@%p1968 bra $L__BB2_1561;
setp.ne.s32 %p1969, %r10276, 0;
mov.u32 %r10276, 0;
and.b16 %rs936, %rs1322, 255;
setp.ne.s16 %p1970, %rs936, 127;
and.pred %p1971, %p1969, %p1970;
@%p1971 bra $L__BB2_1561;
mov.u32 %r7545, 20548;
sub.s32 %r7546, %r7545, %r10274;
cvt.u64.u32 %rd1225, %r7546;
add.s64 %rd1226, %rd1225, %rd5;
add.s64 %rd1227, %rd1, %rd1226;
st.global.u8 [%rd1227], %rs1322;
add.s32 %r10274, %r10274, 1;
setp.gt.u16 %p1972, %rs936, 143;
selp.u32 %r10276, 1, 0, %p1972;
mov.u16 %rs1322, 0;
mov.u32 %r10275, 0;
$L__BB2_1561:
setp.ne.s32 %p1973, %r10154, 0;
mov.u32 %r10165, %r9993;
@%p1973 bra $L__BB2_1557;
$L__BB2_1562:
setp.eq.s16 %p1974, %rs361, 0;
mov.u32 %r10177, %r10165;
@%p1974 bra $L__BB2_1569;
cvt.u32.u16 %r7547, %rs360;
and.b32 %r10167, %r7547, 255;
cvt.u32.u16 %r7548, %rs361;
and.b32 %r10166, %r7548, 255;
$L__BB2_1564:
mov.u32 %r3106, %r10166;
setp.gt.u32 %p1975, %r10274, 2879;
mov.u32 %r10177, 1;
@%p1975 bra $L__BB2_1569;
mov.u32 %r7550, 8;
sub.s32 %r7551, %r7550, %r10276;
sub.s32 %r7552, %r7551, %r10275;
min.u32 %r7553, %r7552, %r3106;
setp.eq.s32 %p1976, %r7553, 32;
mov.u32 %r7554, -1;
shl.b32 %r7555, %r7554, %r7553;
not.b32 %r7556, %r7555;
selp.b32 %r7557, -1, %r7556, %p1976;
and.b32 %r7558, %r7557, %r10167;
shl.b32 %r7559, %r7558, %r10275;
cvt.u16.u32 %rs940, %r7559;
or.b16 %rs1322, %rs1322, %rs940;
add.s32 %r10275, %r7553, %r10275;
sub.s32 %r10166, %r3106, %r7553;
shr.u32 %r10167, %r10167, %r7553;
setp.gt.u32 %p1977, %r7552, %r3106;
@%p1977 bra $L__BB2_1568;
setp.ne.s32 %p1978, %r10276, 0;
mov.u32 %r10276, 0;
and.b16 %rs941, %rs1322, 255;
setp.ne.s16 %p1979, %rs941, 127;
and.pred %p1980, %p1978, %p1979;
@%p1980 bra $L__BB2_1568;
mov.u32 %r7562, 20548;
sub.s32 %r7563, %r7562, %r10274;
cvt.u64.u32 %rd1228, %r7563;
add.s64 %rd1229, %rd1228, %rd5;
add.s64 %rd1230, %rd1, %rd1229;
st.global.u8 [%rd1230], %rs1322;
add.s32 %r10274, %r10274, 1;
setp.gt.u16 %p1981, %rs941, 143;
selp.u32 %r10276, 1, 0, %p1981;
mov.u16 %rs1322, 0;
mov.u32 %r10275, 0;
$L__BB2_1568:
setp.ne.s32 %p1982, %r10166, 0;
mov.u32 %r10177, %r10165;
@%p1982 bra $L__BB2_1564;
$L__BB2_1569:
setp.eq.s16 %p1983, %rs359, 0;
mov.u32 %r10189, %r10177;
@%p1983 bra $L__BB2_1576;
cvt.u32.u16 %r7564, %rs359;
and.b32 %r10178, %r7564, 255;
cvt.u32.u16 %r7565, %rs358;
and.b32 %r10179, %r7565, 255;
$L__BB2_1571:
mov.u32 %r3125, %r10178;
setp.gt.u32 %p1984, %r10274, 2879;
mov.u32 %r10189, 1;
@%p1984 bra $L__BB2_1576;
mov.u32 %r7567, 8;
sub.s32 %r7568, %r7567, %r10276;
sub.s32 %r7569, %r7568, %r10275;
min.u32 %r7570, %r7569, %r3125;
setp.eq.s32 %p1985, %r7570, 32;
mov.u32 %r7571, -1;
shl.b32 %r7572, %r7571, %r7570;
not.b32 %r7573, %r7572;
selp.b32 %r7574, -1, %r7573, %p1985;
and.b32 %r7575, %r7574, %r10179;
shl.b32 %r7576, %r7575, %r10275;
cvt.u16.u32 %rs945, %r7576;
or.b16 %rs1322, %rs1322, %rs945;
add.s32 %r10275, %r7570, %r10275;
sub.s32 %r10178, %r3125, %r7570;
shr.u32 %r10179, %r10179, %r7570;
setp.gt.u32 %p1986, %r7569, %r3125;
@%p1986 bra $L__BB2_1575;
setp.ne.s32 %p1987, %r10276, 0;
mov.u32 %r10276, 0;
and.b16 %rs946, %rs1322, 255;
setp.ne.s16 %p1988, %rs946, 127;
and.pred %p1989, %p1987, %p1988;
@%p1989 bra $L__BB2_1575;
mov.u32 %r7579, 20548;
sub.s32 %r7580, %r7579, %r10274;
cvt.u64.u32 %rd1231, %r7580;
add.s64 %rd1232, %rd1231, %rd5;
add.s64 %rd1233, %rd1, %rd1232;
st.global.u8 [%rd1233], %rs1322;
add.s32 %r10274, %r10274, 1;
setp.gt.u16 %p1990, %rs946, 143;
selp.u32 %r10276, 1, 0, %p1990;
mov.u16 %rs1322, 0;
mov.u32 %r10275, 0;
$L__BB2_1575:
setp.ne.s32 %p1991, %r10178, 0;
mov.u32 %r10189, %r10177;
@%p1991 bra $L__BB2_1571;
$L__BB2_1576:
setp.eq.s16 %p1992, %rs363, 0;
mov.u32 %r10277, %r10189;
@%p1992 bra $L__BB2_1630;
cvt.u32.u16 %r7581, %rs362;
and.b32 %r10191, %r7581, 255;
cvt.u32.u16 %r7582, %rs363;
and.b32 %r10190, %r7582, 255;
$L__BB2_1578:
mov.u32 %r3144, %r10190;
setp.gt.u32 %p1993, %r10274, 2879;
mov.u32 %r10277, 1;
@%p1993 bra $L__BB2_1630;
mov.u32 %r7584, 8;
sub.s32 %r7585, %r7584, %r10276;
sub.s32 %r7586, %r7585, %r10275;
min.u32 %r7587, %r7586, %r3144;
setp.eq.s32 %p1994, %r7587, 32;
mov.u32 %r7588, -1;
shl.b32 %r7589, %r7588, %r7587;
not.b32 %r7590, %r7589;
selp.b32 %r7591, -1, %r7590, %p1994;
and.b32 %r7592, %r7591, %r10191;
shl.b32 %r7593, %r7592, %r10275;
cvt.u16.u32 %rs950, %r7593;
or.b16 %rs1322, %rs1322, %rs950;
add.s32 %r10275, %r7587, %r10275;
sub.s32 %r10190, %r3144, %r7587;
shr.u32 %r10191, %r10191, %r7587;
setp.gt.u32 %p1995, %r7586, %r3144;
@%p1995 bra $L__BB2_1582;
setp.ne.s32 %p1996, %r10276, 0;
mov.u32 %r10276, 0;
and.b16 %rs951, %rs1322, 255;
setp.ne.s16 %p1997, %rs951, 127;
and.pred %p1998, %p1996, %p1997;
@%p1998 bra $L__BB2_1582;
mov.u32 %r7596, 20548;
sub.s32 %r7597, %r7596, %r10274;
cvt.u64.u32 %rd1234, %r7597;
add.s64 %rd1235, %rd1234, %rd5;
add.s64 %rd1236, %rd1, %rd1235;
st.global.u8 [%rd1236], %rs1322;
add.s32 %r10274, %r10274, 1;
setp.gt.u16 %p1999, %rs951, 143;
selp.u32 %r10276, 1, 0, %p1999;
mov.u16 %rs1322, 0;
mov.u32 %r10275, 0;
$L__BB2_1582:
setp.eq.s32 %p2000, %r10190, 0;
mov.u32 %r10277, %r10189;
@%p2000 bra $L__BB2_1630;
bra.uni $L__BB2_1578;
$L__BB2_1630:
shr.u32 %r7718, %r9963, 1;
or.b32 %r9760, %r7718, %r2901;
$L__BB2_1631:
add.s32 %r9744, %r9744, 4;
setp.lt.u32 %p2063, %r9744, %r4057;
@%p2063 bra $L__BB2_1266;
$L__BB2_1632:
add.s32 %r8418, %r4057, 1;
shr.u32 %r8417, %r8418, 1;
add.s32 %r7719, %r8417, 1;
setp.gt.u32 %p2064, %r7719, 512;
@%p2064 bra $L__BB2_1634;
add.s32 %r8409, %r4057, 1;
shr.u32 %r8408, %r8409, 1;
add.s32 %r8407, %r4103, %r8408;
mov.u16 %rs986, 0;
add.s32 %r8403, %r8407, 1;
st.shared.u8 [%r8403], %rs986;
$L__BB2_1634:
setp.lt.u32 %p2065, %r4058, 3;
@%p2065 bra $L__BB2_1880;
ld.param.u64 %rd1418, [ j2k_htj2k_encode_codeblocks_multi_input_param_4];
ld.param.u64 %rd1413, [ j2k_htj2k_encode_codeblocks_multi_input_param_3];
mov.u32 %r10310, 2;
cvta.to.global.u64 %rd71, %rd1418;
cvta.to.global.u64 %rd72, %rd1413;
$L__BB2_1636:
ld.shared.u8 %rs422, [_ZZ44 j2k_htj2k_encode_codeblocks_multi_inputE13cleanup_e_val];
mov.u16 %rs987, 0;
st.shared.u8 [_ZZ44 j2k_htj2k_encode_codeblocks_multi_inputE13cleanup_e_val], %rs987;
ld.shared.u8 %rs423, [_ZZ44 j2k_htj2k_encode_codeblocks_multi_inputE14cleanup_cx_val];
st.shared.u8 [_ZZ44 j2k_htj2k_encode_codeblocks_multi_inputE14cleanup_cx_val], %rs987;
@%p10 bra $L__BB2_1879;
mov.u32 %r7723, 0;
ld.shared.u8 %rs988, [_ZZ44 j2k_htj2k_encode_codeblocks_multi_inputE13cleanup_e_val+1];
ld.shared.u8 %rs989, [_ZZ44 j2k_htj2k_encode_codeblocks_multi_inputE14cleanup_cx_val+1];
max.u16 %rs991, %rs422, %rs988;
cvt.u32.u16 %r7724, %rs991;
add.s32 %r10345, %r7724, -1;
add.s32 %r3336, %r10310, 1;
mul.lo.s32 %r10343, %r10310, %r4055;
mul.wide.u16 %r7725, %rs989, 4;
cvt.u32.u16 %r7726, %rs423;
and.b32 %r7727, %r7726, 255;
add.s32 %r10342, %r7725, %r7727;
mov.u32 %r10326, %r7723;
mov.u32 %r10344, %r7723;
mov.u32 %r10346, %r7723;
bra.uni $L__BB2_1638;
$L__BB2_1709:
setp.gt.u32 %p2145, %r9826, 191;
mov.u32 %r10428, 1;
mov.u32 %r9835, 0;
@%p2145 bra $L__BB2_1711;
and.b16 %rs1018, %rs1253, 255;
st.global.u8 [%rd73], %rs1253;
add.s32 %r9826, %r9826, 1;
setp.eq.s16 %p2146, %rs1018, 255;
selp.b32 %r9835, 7, 8, %p2146;
mov.u16 %rs1253, 0;
mov.u32 %r10428, %r10043;
bra.uni $L__BB2_1711;
$L__BB2_1815:
setp.gt.u32 %p2264, %r9826, 191;
mov.u32 %r10577, 1;
mov.u32 %r9835, 0;
@%p2264 bra $L__BB2_1817;
and.b16 %rs1055, %rs1253, 255;
st.global.u8 [%rd74], %rs1253;
add.s32 %r9826, %r9826, 1;
setp.eq.s16 %p2265, %rs1055, 255;
selp.b32 %r9835, 7, 8, %p2265;
mov.u16 %rs1253, 0;
mov.u32 %r10577, %r10043;
bra.uni $L__BB2_1817;
$L__BB2_1638:
cvt.u64.u32 %rd1266, %r10343;
add.s64 %rd1267, %rd1266, %rd4;
shl.b64 %rd1268, %rd1267, 2;
add.s64 %rd1269, %rd3, %rd1268;
ld.global.u32 %r3360, [%rd1269];
setp.eq.s32 %p2067, %r3360, 0;
mov.u32 %r10347, %r7723;
@%p2067 bra $L__BB2_1640;
and.b32 %r7729, %r3360, -2147483648;
abs.s32 %r7730, %r3360;
shl.b32 %r7731, %r7730, %r2358;
or.b32 %r10347, %r7731, %r7729;
$L__BB2_1640:
shl.b32 %r7735, %r10347, 1;
shr.u32 %r7736, %r7735, %r2358;
and.b32 %r3363, %r7736, -2;
setp.eq.s32 %p2068, %r3363, 0;
mov.u32 %r10351, 0;
mov.u32 %r10348, %r10351;
mov.u32 %r10349, %r10351;
mov.u32 %r10355, %r10351;
@%p2068 bra $L__BB2_1642;
add.s32 %r7738, %r3363, -1;
clz.b32 %r7739, %r7738;
mov.u32 %r7740, 32;
sub.s32 %r10348, %r7740, %r7739;
shr.u32 %r7741, %r10347, 31;
add.s32 %r7742, %r7741, %r3363;
add.s32 %r10349, %r7742, -2;
mov.u32 %r10355, 1;
$L__BB2_1642:
setp.ge.u32 %p2069, %r3336, %r4058;
@%p2069 bra $L__BB2_1645;
add.s32 %r7745, %r10343, %r4055;
cvt.u64.u32 %rd1270, %r7745;
add.s64 %rd1271, %rd1270, %rd4;
shl.b64 %rd1272, %rd1271, 2;
add.s64 %rd1273, %rd3, %rd1272;
ld.global.u32 %r3369, [%rd1273];
setp.eq.s32 %p2070, %r3369, 0;
@%p2070 bra $L__BB2_1645;
and.b32 %r7746, %r3369, -2147483648;
abs.s32 %r7747, %r3369;
shl.b32 %r7748, %r7747, %r2358;
or.b32 %r10351, %r7748, %r7746;
$L__BB2_1645:
shl.b32 %r7751, %r10351, 1;
shr.u32 %r7752, %r7751, %r2358;
and.b32 %r3372, %r7752, -2;
setp.eq.s32 %p2071, %r3372, 0;
mov.u32 %r10366, 0;
mov.u32 %r10352, %r10366;
mov.u32 %r10353, %r10366;
mov.u32 %r10371, %r10348;
@%p2071 bra $L__BB2_1647;
or.b32 %r10355, %r10355, 2;
add.s32 %r7753, %r3372, -1;
clz.b32 %r7754, %r7753;
mov.u32 %r7755, 32;
sub.s32 %r10352, %r7755, %r7754;
max.s32 %r10371, %r10348, %r10352;
shr.u32 %r7756, %r10351, 31;
add.s32 %r7757, %r7756, %r3372;
add.s32 %r10353, %r7757, -2;
$L__BB2_1647:
add.s32 %r10648, %r10343, 1;
add.s32 %r7762, %r10326, 1;
setp.ge.u32 %p2072, %r7762, %r4057;
mov.u32 %r10367, %r10366;
mov.u32 %r10368, %r10366;
mov.u32 %r10369, %r10366;
@%p2072 bra $L__BB2_1658;
cvt.u64.u32 %rd1274, %r10648;
add.s64 %rd1275, %rd1274, %rd4;
shl.b64 %rd1276, %rd1275, 2;
add.s64 %rd1277, %rd3, %rd1276;
ld.global.u32 %r3382, [%rd1277];
setp.eq.s32 %p2073, %r3382, 0;
mov.u32 %r10367, 0;
mov.u32 %r10356, %r10367;
@%p2073 bra $L__BB2_1650;
and.b32 %r7764, %r3382, -2147483648;
abs.s32 %r7765, %r3382;
shl.b32 %r7766, %r7765, %r2358;
or.b32 %r10356, %r7766, %r7764;
$L__BB2_1650:
shl.b32 %r7769, %r10356, 1;
shr.u32 %r7770, %r7769, %r2358;
and.b32 %r3385, %r7770, -2;
setp.eq.s32 %p2074, %r3385, 0;
mov.u32 %r10369, %r10367;
@%p2074 bra $L__BB2_1652;
or.b32 %r10355, %r10355, 4;
add.s32 %r7771, %r3385, -1;
clz.b32 %r7772, %r7771;
mov.u32 %r7773, 32;
sub.s32 %r10367, %r7773, %r7772;
max.s32 %r10371, %r10371, %r10367;
shr.u32 %r7774, %r10356, 31;
add.s32 %r7775, %r7774, %r3385;
add.s32 %r10369, %r7775, -2;
$L__BB2_1652:
mov.u32 %r10366, 0;
mov.u32 %r10361, %r10366;
@%p2069 bra $L__BB2_1655;
add.s32 %r7778, %r10648, %r4055;
cvt.u64.u32 %rd1278, %r7778;
add.s64 %rd1279, %rd1278, %rd4;
shl.b64 %rd1280, %rd1279, 2;
add.s64 %rd1281, %rd3, %rd1280;
ld.global.u32 %r3394, [%rd1281];
setp.eq.s32 %p2076, %r3394, 0;
@%p2076 bra $L__BB2_1655;
and.b32 %r7779, %r3394, -2147483648;
abs.s32 %r7780, %r3394;
shl.b32 %r7781, %r7780, %r2358;
or.b32 %r10361, %r7781, %r7779;
$L__BB2_1655:
shl.b32 %r7784, %r10361, 1;
shr.u32 %r7785, %r7784, %r2358;
and.b32 %r3397, %r7785, -2;
setp.eq.s32 %p2077, %r3397, 0;
mov.u32 %r10368, %r10366;
@%p2077 bra $L__BB2_1657;
or.b32 %r10355, %r10355, 8;
add.s32 %r7786, %r3397, -1;
clz.b32 %r7787, %r7786;
mov.u32 %r7788, 32;
sub.s32 %r10366, %r7788, %r7787;
max.s32 %r10371, %r10371, %r10366;
shr.u32 %r7789, %r10361, 31;
add.s32 %r7790, %r7789, %r3397;
add.s32 %r10368, %r7790, -2;
$L__BB2_1657:
add.s32 %r10648, %r10343, 2;
$L__BB2_1658:
add.s32 %r7792, %r10355, -1;
and.b32 %r7793, %r7792, %r10355;
setp.ne.s32 %p2078, %r7793, 0;
mov.u32 %r10373, 0;
setp.gt.s32 %p2079, %r10345, 1;
and.pred %p2080, %p2079, %p2078;
selp.b32 %r7794, %r10345, 1, %p2080;
max.s32 %r3414, %r7794, %r10371;
sub.s32 %r3415, %r3414, %r7794;
setp.lt.s32 %p2081, %r3415, 1;
@%p2081 bra $L__BB2_1660;
setp.eq.s32 %p2082, %r10348, %r10371;
selp.u32 %r7795, 1, 0, %p2082;
setp.eq.s32 %p2083, %r10352, %r10371;
selp.u32 %r7796, -1, 0, %p2083;
bfi.b32 %r7797, %r7796, %r7795, 1, 1;
setp.eq.s32 %p2084, %r10367, %r10371;
selp.u16 %rs992, 1, 0, %p2084;
mul.wide.u16 %r7798, %rs992, 4;
or.b32 %r7799, %r7797, %r7798;
setp.eq.s32 %p2085, %r10366, %r10371;
selp.u16 %rs993, 1, 0, %p2085;
mul.wide.u16 %r7800, %rs993, 8;
or.b32 %r10373, %r7799, %r7800;
$L__BB2_1660:
shl.b32 %r7801, %r10355, 4;
shl.b32 %r7802, %r10342, 8;
or.b32 %r7803, %r7801, %r7802;
or.b32 %r7804, %r7803, %r10373;
mul.wide.u32 %rd1282, %r7804, 2;
add.s64 %rd1283, %rd72, %rd1282;
ld.global.u16 %rs426, [%rd1283];
shr.u16 %rs994, %rs426, 4;
and.b16 %rs427, %rs994, 7;
setp.eq.s16 %p2086, %rs427, 0;
mov.u32 %r10385, %r10277;
@%p2086 bra $L__BB2_1667;
cvt.u32.u16 %r10374, %rs427;
shr.u16 %rs995, %rs426, 8;
cvt.u32.u16 %r10375, %rs995;
$L__BB2_1662:
mov.u32 %r3420, %r10374;
setp.gt.u32 %p2087, %r10274, 2879;
mov.u32 %r10385, 1;
@%p2087 bra $L__BB2_1667;
mov.u32 %r7806, 8;
sub.s32 %r7807, %r7806, %r10276;
sub.s32 %r7808, %r7807, %r10275;
min.u32 %r7809, %r7808, %r3420;
setp.eq.s32 %p2088, %r7809, 32;
mov.u32 %r7810, -1;
shl.b32 %r7811, %r7810, %r7809;
not.b32 %r7812, %r7811;
selp.b32 %r7813, -1, %r7812, %p2088;
and.b32 %r7814, %r7813, %r10375;
shl.b32 %r7815, %r7814, %r10275;
cvt.u16.u32 %rs996, %r7815;
or.b16 %rs1322, %rs1322, %rs996;
add.s32 %r10275, %r7809, %r10275;
sub.s32 %r10374, %r3420, %r7809;
shr.u32 %r10375, %r10375, %r7809;
setp.gt.u32 %p2089, %r7808, %r3420;
@%p2089 bra $L__BB2_1666;
setp.ne.s32 %p2090, %r10276, 0;
mov.u32 %r10276, 0;
and.b16 %rs997, %rs1322, 255;
setp.ne.s16 %p2091, %rs997, 127;
and.pred %p2092, %p2090, %p2091;
@%p2092 bra $L__BB2_1666;
mov.u32 %r7818, 20548;
sub.s32 %r7819, %r7818, %r10274;
cvt.u64.u32 %rd1284, %r7819;
add.s64 %rd1285, %rd1284, %rd5;
add.s64 %rd1286, %rd1, %rd1285;
st.global.u8 [%rd1286], %rs1322;
add.s32 %r10274, %r10274, 1;
setp.gt.u16 %p2093, %rs997, 143;
selp.u32 %r10276, 1, 0, %p2093;
mov.u16 %rs1322, 0;
mov.u32 %r10275, 0;
$L__BB2_1666:
setp.ne.s32 %p2094, %r10374, 0;
mov.u32 %r10385, %r10277;
@%p2094 bra $L__BB2_1662;
$L__BB2_1667:
setp.ne.s32 %p2095, %r10342, 0;
@%p2095 bra $L__BB2_1715;
setp.eq.s32 %p2096, %r10355, 0;
add.s32 %r7820, %r9826, 17477;
cvt.u64.u32 %rd1287, %r7820;
add.s64 %rd1288, %rd1287, %rd5;
add.s64 %rd73, %rd1, %rd1288;
@%p2096 bra $L__BB2_1707;
shl.b16 %rs1253, %rs1253, 1;
add.s32 %r9835, %r9835, -1;
setp.ne.s32 %p2097, %r9835, 0;
mov.u32 %r10421, %r10043;
@%p2097 bra $L__BB2_1672;
setp.gt.u32 %p2098, %r9826, 191;
mov.u32 %r10421, 1;
mov.u32 %r9835, 0;
@%p2098 bra $L__BB2_1672;
st.global.u8 [%rd73], %rs1253;
add.s32 %r9826, %r9826, 1;
mov.u16 %rs1253, 0;
mov.u32 %r9835, 8;
mov.u32 %r10421, %r10043;
$L__BB2_1672:
setp.lt.u32 %p2099, %r10041, 3;
mov.u32 %r10389, 0;
@%p2099 bra $L__BB2_1675;
setp.lt.u32 %p2100, %r10041, 6;
mov.u32 %r10389, 1;
@%p2100 bra $L__BB2_1675;
setp.lt.u32 %p2101, %r10041, 9;
setp.eq.s32 %p2102, %r10041, 11;
selp.b32 %r7826, 4, 5, %p2102;
setp.lt.u32 %p2103, %r10041, 11;
selp.b32 %r7827, 3, %r7826, %p2103;
selp.b32 %r10389, 2, %r7827, %p2101;
$L__BB2_1675:
setp.eq.s32 %p2104, %r10389, 0;
@%p2104 bra $L__BB2_1703;
add.s32 %r3444, %r10389, -1;
and.b32 %r3445, %r10389, 3;
setp.eq.s32 %p2105, %r3445, 0;
mov.u32 %r10399, %r10389;
mov.u32 %r10400, %r10421;
@%p2105 bra $L__BB2_1688;
mov.u32 %r7829, 1;
shl.b32 %r7830, %r7829, %r3444;
and.b32 %r7831, %r7830, %r10040;
setp.ne.s32 %p2106, %r7831, 0;
selp.u32 %r7832, 1, 0, %p2106;
cvt.u32.u16 %r7833, %rs1253;
bfi.b32 %r7834, %r7833, %r7832, 1, 8;
cvt.u16.u32 %rs1253, %r7834;
add.s32 %r9835, %r9835, -1;
setp.ne.s32 %p2107, %r9835, 0;
mov.u32 %r10400, %r10421;
@%p2107 bra $L__BB2_1680;
setp.gt.u32 %p2108, %r9826, 191;
mov.u32 %r9835, 0;
mov.u32 %r10400, %r7829;
@%p2108 bra $L__BB2_1680;
add.s32 %r7838, %r9826, 17477;
cvt.u64.u32 %rd1289, %r7838;
add.s64 %rd1290, %rd1289, %rd5;
add.s64 %rd1291, %rd1, %rd1290;
st.global.u8 [%rd1291], %rs1253;
add.s32 %r9826, %r9826, 1;
mov.u16 %rs1253, 0;
mov.u32 %r9835, 8;
mov.u32 %r10400, %r10421;
$L__BB2_1680:
setp.eq.s32 %p2109, %r3445, 1;
mov.u32 %r10421, %r10400;
mov.u32 %r10399, %r3444;
@%p2109 bra $L__BB2_1688;
add.s32 %r10399, %r10389, -2;
mov.u32 %r7839, 1;
shl.b32 %r7840, %r7839, %r10399;
and.b32 %r7841, %r7840, %r10040;
setp.ne.s32 %p2110, %r7841, 0;
selp.u32 %r7842, 1, 0, %p2110;
cvt.u32.u16 %r7843, %rs1253;
bfi.b32 %r7844, %r7843, %r7842, 1, 8;
cvt.u16.u32 %rs1253, %r7844;
add.s32 %r9835, %r9835, -1;
setp.ne.s32 %p2111, %r9835, 0;
mov.u32 %r10395, %r10400;
@%p2111 bra $L__BB2_1684;
setp.gt.u32 %p2112, %r9826, 191;
mov.u32 %r9835, 0;
mov.u32 %r10395, %r7839;
@%p2112 bra $L__BB2_1684;
add.s32 %r7847, %r9826, 17477;
cvt.u64.u32 %rd1292, %r7847;
add.s64 %rd1293, %rd1292, %rd5;
add.s64 %rd1294, %rd1, %rd1293;
and.b16 %rs1004, %rs1253, 255;
st.global.u8 [%rd1294], %rs1253;
add.s32 %r9826, %r9826, 1;
setp.eq.s16 %p2113, %rs1004, 255;
selp.b32 %r9835, 7, 8, %p2113;
mov.u16 %rs1253, 0;
mov.u32 %r10395, %r10400;
$L__BB2_1684:
setp.eq.s32 %p2114, %r3445, 2;
mov.u32 %r10421, %r10395;
mov.u32 %r10400, %r10395;
@%p2114 bra $L__BB2_1688;
add.s32 %r10399, %r10389, -3;
mov.u32 %r7848, 1;
shl.b32 %r7849, %r7848, %r10399;
and.b32 %r7850, %r7849, %r10040;
setp.ne.s32 %p2115, %r7850, 0;
selp.u32 %r7851, 1, 0, %p2115;
cvt.u32.u16 %r7852, %rs1253;
bfi.b32 %r7853, %r7852, %r7851, 1, 8;
cvt.u16.u32 %rs1253, %r7853;
add.s32 %r9835, %r9835, -1;
setp.ne.s32 %p2116, %r9835, 0;
mov.u32 %r10421, %r10395;
mov.u32 %r10400, %r10395;
@%p2116 bra $L__BB2_1688;
setp.gt.u32 %p2117, %r9826, 191;
mov.u32 %r9835, 0;
mov.u32 %r10421, %r7848;
mov.u32 %r10400, %r7848;
@%p2117 bra $L__BB2_1688;
add.s32 %r7858, %r9826, 17477;
cvt.u64.u32 %rd1295, %r7858;
add.s64 %rd1296, %rd1295, %rd5;
add.s64 %rd1297, %rd1, %rd1296;
and.b16 %rs1007, %rs1253, 255;
st.global.u8 [%rd1297], %rs1253;
add.s32 %r9826, %r9826, 1;
setp.eq.s16 %p2118, %rs1007, 255;
selp.b32 %r9835, 7, 8, %p2118;
mov.u16 %rs1253, 0;
mov.u32 %r10421, %r10395;
mov.u32 %r10400, %r10395;
$L__BB2_1688:
setp.lt.u32 %p2119, %r3444, 3;
@%p2119 bra $L__BB2_1703;
mov.u32 %r10421, %r10400;
$L__BB2_1690:
add.s32 %r7859, %r10399, -1;
mov.u32 %r7860, 1;
shl.b32 %r7861, %r7860, %r7859;
and.b32 %r7862, %r7861, %r10040;
setp.ne.s32 %p2120, %r7862, 0;
selp.u32 %r7863, 1, 0, %p2120;
cvt.u32.u16 %r7864, %rs1253;
bfi.b32 %r10409, %r7864, %r7863, 1, 8;
add.s32 %r10408, %r9835, -1;
setp.ne.s32 %p2121, %r10408, 0;
mov.u32 %r10410, %r10421;
@%p2121 bra $L__BB2_1693;
setp.gt.u32 %p2122, %r9826, 191;
mov.u32 %r10408, 0;
mov.u32 %r10410, %r7860;
@%p2122 bra $L__BB2_1693;
cvt.u16.u32 %rs1008, %r10409;
and.b16 %rs1009, %rs1008, 255;
add.s32 %r7868, %r9826, 17477;
cvt.u64.u32 %rd1298, %r7868;
add.s64 %rd1299, %rd1298, %rd5;
add.s64 %rd1300, %rd1, %rd1299;
st.global.u8 [%rd1300], %rs1008;
add.s32 %r9826, %r9826, 1;
setp.eq.s16 %p2123, %rs1009, 255;
selp.b32 %r10408, 7, 8, %p2123;
mov.u32 %r10409, 0;
mov.u32 %r10410, %r10421;
$L__BB2_1693:
add.s32 %r7869, %r10399, -2;
shl.b32 %r7871, %r7860, %r7869;
and.b32 %r7872, %r7871, %r10040;
setp.ne.s32 %p2124, %r7872, 0;
and.b32 %r7873, %r10409, 127;
selp.u32 %r7874, 1, 0, %p2124;
bfi.b32 %r10413, %r7873, %r7874, 1, 7;
add.s32 %r10412, %r10408, -1;
setp.ne.s32 %p2125, %r10412, 0;
mov.u32 %r10414, %r10410;
@%p2125 bra $L__BB2_1696;
setp.gt.u32 %p2126, %r9826, 191;
mov.u32 %r10414, 1;
mov.u32 %r10412, 0;
@%p2126 bra $L__BB2_1696;
cvt.u16.u32 %rs1010, %r10413;
and.b16 %rs1011, %rs1010, 255;
add.s32 %r7878, %r9826, 17477;
cvt.u64.u32 %rd1301, %r7878;
add.s64 %rd1302, %rd1301, %rd5;
add.s64 %rd1303, %rd1, %rd1302;
st.global.u8 [%rd1303], %rs1010;
add.s32 %r9826, %r9826, 1;
setp.eq.s16 %p2127, %rs1011, 255;
selp.b32 %r10412, 7, 8, %p2127;
mov.u32 %r10413, 0;
mov.u32 %r10414, %r10410;
$L__BB2_1696:
add.s32 %r7879, %r10399, -3;
mov.u32 %r7880, 1;
shl.b32 %r7881, %r7880, %r7879;
and.b32 %r7882, %r7881, %r10040;
setp.ne.s32 %p2128, %r7882, 0;
and.b32 %r7883, %r10413, 127;
selp.u32 %r7884, 1, 0, %p2128;
bfi.b32 %r10417, %r7883, %r7884, 1, 7;
add.s32 %r10416, %r10412, -1;
setp.ne.s32 %p2129, %r10416, 0;
mov.u32 %r10418, %r10414;
@%p2129 bra $L__BB2_1699;
setp.gt.u32 %p2130, %r9826, 191;
mov.u32 %r10416, 0;
mov.u32 %r10418, %r7880;
@%p2130 bra $L__BB2_1699;
cvt.u16.u32 %rs1012, %r10417;
and.b16 %rs1013, %rs1012, 255;
add.s32 %r7888, %r9826, 17477;
cvt.u64.u32 %rd1304, %r7888;
add.s64 %rd1305, %rd1304, %rd5;
add.s64 %rd1306, %rd1, %rd1305;
st.global.u8 [%rd1306], %rs1012;
add.s32 %r9826, %r9826, 1;
setp.eq.s16 %p2131, %rs1013, 255;
selp.b32 %r10416, 7, 8, %p2131;
mov.u32 %r10417, 0;
mov.u32 %r10418, %r10414;
$L__BB2_1699:
add.s32 %r10399, %r10399, -4;
shl.b32 %r7890, %r7880, %r10399;
and.b32 %r7891, %r7890, %r10040;
setp.ne.s32 %p2132, %r7891, 0;
and.b32 %r7892, %r10417, 127;
selp.u32 %r7893, 1, 0, %p2132;
bfi.b32 %r7894, %r7892, %r7893, 1, 15;
cvt.u16.u32 %rs1253, %r7894;
add.s32 %r9835, %r10416, -1;
setp.ne.s32 %p2133, %r9835, 0;
mov.u32 %r10421, %r10418;
@%p2133 bra $L__BB2_1702;
setp.gt.u32 %p2134, %r9826, 191;
mov.u32 %r10421, 1;
mov.u32 %r9835, 0;
@%p2134 bra $L__BB2_1702;
add.s32 %r7897, %r9826, 17477;
cvt.u64.u32 %rd1307, %r7897;
add.s64 %rd1308, %rd1307, %rd5;
add.s64 %rd1309, %rd1, %rd1308;
and.b16 %rs1015, %rs1253, 255;
st.global.u8 [%rd1309], %rs1253;
add.s32 %r9826, %r9826, 1;
setp.eq.s16 %p2135, %rs1015, 255;
selp.b32 %r9835, 7, 8, %p2135;
mov.u16 %rs1253, 0;
mov.u32 %r10421, %r10418;
$L__BB2_1702:
setp.ne.s32 %p2136, %r10399, 0;
@%p2136 bra $L__BB2_1690;
$L__BB2_1703:
add.s32 %r7899, %r10041, -1;
setp.eq.s32 %p2137, %r10041, 0;
mov.u32 %r10040, 0;
selp.b32 %r10041, 0, %r7899, %p2137;
setp.lt.u32 %p2138, %r10041, 3;
mov.u32 %r10425, %r10040;
@%p2138 bra $L__BB2_1706;
setp.lt.u32 %p2139, %r10041, 6;
mov.u32 %r10425, 1;
@%p2139 bra $L__BB2_1706;
setp.lt.u32 %p2140, %r10041, 9;
setp.eq.s32 %p2141, %r10041, 11;
selp.b32 %r7901, 4, 5, %p2141;
setp.lt.u32 %p2142, %r10041, 11;
selp.b32 %r7902, 3, %r7901, %p2142;
selp.b32 %r10425, 2, %r7902, %p2140;
$L__BB2_1706:
mov.u32 %r7904, 1;
shl.b32 %r10042, %r7904, %r10425;
mov.u32 %r10043, %r10421;
bra.uni $L__BB2_1715;
$L__BB2_1707:
add.s32 %r10040, %r10040, 1;
setp.lt.u32 %p2143, %r10040, %r10042;
@%p2143 bra $L__BB2_1715;
shl.b16 %rs1016, %rs1253, 1;
or.b16 %rs1253, %rs1016, 1;
add.s32 %r9835, %r9835, -1;
setp.ne.s32 %p2144, %r9835, 0;
mov.u32 %r10428, %r10043;
@%p2144 bra $L__BB2_1711;
bra.uni $L__BB2_1709;
$L__BB2_1711:
add.s32 %r7908, %r10041, 1;
min.u32 %r10041, %r7908, 12;
setp.lt.u32 %p2147, %r10041, 3;
mov.u32 %r10040, 0;
mov.u32 %r10429, %r10040;
@%p2147 bra $L__BB2_1714;
setp.lt.u32 %p2148, %r10041, 6;
mov.u32 %r10429, 1;
@%p2148 bra $L__BB2_1714;
setp.lt.u32 %p2149, %r10041, 9;
setp.eq.s32 %p2150, %r10041, 11;
selp.b32 %r7910, 4, 5, %p2150;
setp.lt.u32 %p2151, %r10041, 11;
selp.b32 %r7911, 3, %r7910, %p2151;
selp.b32 %r10429, 2, %r7911, %p2149;
$L__BB2_1714:
mov.u32 %r7913, 1;
shl.b32 %r10042, %r7913, %r10429;
mov.u32 %r10043, %r10428;
$L__BB2_1715:
and.b16 %rs1019, %rs426, 15;
cvt.u32.u16 %r3528, %rs1019;
and.b32 %r7914, %r10355, 1;
setp.eq.b32 %p2152, %r7914, 1;
mov.pred %p2153, 0;
xor.pred %p2154, %p2152, %p2153;
not.pred %p2155, %p2154;
mov.u32 %r10450, %r10495;
@%p2155 bra $L__BB2_1722;
and.b32 %r7915, %r3528, 1;
sub.s32 %r10436, %r3414, %r7915;
setp.eq.s32 %p2156, %r10436, 0;
mov.u32 %r10450, %r10495;
@%p2156 bra $L__BB2_1722;
mov.u32 %r7916, -1;
shl.b32 %r7917, %r7916, %r10436;
not.b32 %r7918, %r7917;
and.b32 %r10437, %r10349, %r7918;
$L__BB2_1718:
setp.gt.u32 %p2157, %r10461, 17476;
mov.u32 %r10450, 1;
@%p2157 bra $L__BB2_1722;
sub.s32 %r7920, %r10462, %r10463;
min.u32 %r7921, %r7920, %r10436;
setp.eq.s32 %p2158, %r7921, 32;
mov.u32 %r7922, -1;
shl.b32 %r7923, %r7922, %r7921;
not.b32 %r7924, %r7923;
selp.b32 %r7925, -1, %r7924, %p2158;
and.b32 %r7926, %r7925, %r10437;
shl.b32 %r7927, %r7926, %r10463;
or.b32 %r10464, %r7927, %r10464;
add.s32 %r10463, %r7921, %r10463;
shr.u32 %r10437, %r10437, %r7921;
sub.s32 %r10436, %r10436, %r7921;
setp.lt.u32 %p2159, %r10463, %r10462;
@%p2159 bra $L__BB2_1721;
cvt.u64.u32 %rd1310, %r10461;
add.s64 %rd1311, %rd1310, %rd5;
add.s64 %rd1312, %rd1, %rd1311;
st.global.u8 [%rd1312], %r10464;
add.s32 %r10461, %r10461, 1;
setp.eq.s32 %p2160, %r10464, 255;
selp.b32 %r10462, 7, 8, %p2160;
mov.u32 %r10463, 0;
mov.u32 %r10464, %r10463;
$L__BB2_1721:
setp.ne.s32 %p2161, %r10436, 0;
mov.u32 %r10450, %r10495;
@%p2161 bra $L__BB2_1718;
$L__BB2_1722:
and.b32 %r3552, %r10355, 2;
setp.eq.s32 %p2162, %r3552, 0;
mov.u32 %r10465, %r10450;
@%p2162 bra $L__BB2_1729;
shr.u32 %r7930, %r3528, 1;
and.b32 %r7931, %r7930, 1;
sub.s32 %r10451, %r3414, %r7931;
setp.eq.s32 %p2163, %r10451, 0;
mov.u32 %r10465, %r10450;
@%p2163 bra $L__BB2_1729;
mov.u32 %r7932, -1;
shl.b32 %r7933, %r7932, %r10451;
not.b32 %r7934, %r7933;
and.b32 %r10452, %r10353, %r7934;
$L__BB2_1725:
setp.gt.u32 %p2164, %r10461, 17476;
mov.u32 %r10465, 1;
@%p2164 bra $L__BB2_1729;
sub.s32 %r7936, %r10462, %r10463;
min.u32 %r7937, %r7936, %r10451;
setp.eq.s32 %p2165, %r7937, 32;
mov.u32 %r7938, -1;
shl.b32 %r7939, %r7938, %r7937;
not.b32 %r7940, %r7939;
selp.b32 %r7941, -1, %r7940, %p2165;
and.b32 %r7942, %r7941, %r10452;
shl.b32 %r7943, %r7942, %r10463;
or.b32 %r10464, %r7943, %r10464;
add.s32 %r10463, %r7937, %r10463;
shr.u32 %r10452, %r10452, %r7937;
sub.s32 %r10451, %r10451, %r7937;
setp.lt.u32 %p2166, %r10463, %r10462;
@%p2166 bra $L__BB2_1728;
cvt.u64.u32 %rd1313, %r10461;
add.s64 %rd1314, %rd1313, %rd5;
add.s64 %rd1315, %rd1, %rd1314;
st.global.u8 [%rd1315], %r10464;
add.s32 %r10461, %r10461, 1;
setp.eq.s32 %p2167, %r10464, 255;
selp.b32 %r10462, 7, 8, %p2167;
mov.u32 %r10463, 0;
mov.u32 %r10464, %r10463;
$L__BB2_1728:
setp.ne.s32 %p2168, %r10451, 0;
mov.u32 %r10465, %r10450;
@%p2168 bra $L__BB2_1725;
$L__BB2_1729:
and.b32 %r3576, %r10355, 4;
setp.eq.s32 %p2169, %r3576, 0;
mov.u32 %r10480, %r10465;
@%p2169 bra $L__BB2_1736;
shr.u32 %r7946, %r3528, 2;
and.b32 %r7947, %r7946, 1;
sub.s32 %r10466, %r3414, %r7947;
setp.eq.s32 %p2170, %r10466, 0;
mov.u32 %r10480, %r10465;
@%p2170 bra $L__BB2_1736;
mov.u32 %r7948, -1;
shl.b32 %r7949, %r7948, %r10466;
not.b32 %r7950, %r7949;
and.b32 %r10467, %r10369, %r7950;
$L__BB2_1732:
setp.gt.u32 %p2171, %r10461, 17476;
mov.u32 %r10480, 1;
@%p2171 bra $L__BB2_1736;
sub.s32 %r7952, %r10462, %r10463;
min.u32 %r7953, %r7952, %r10466;
setp.eq.s32 %p2172, %r7953, 32;
mov.u32 %r7954, -1;
shl.b32 %r7955, %r7954, %r7953;
not.b32 %r7956, %r7955;
selp.b32 %r7957, -1, %r7956, %p2172;
and.b32 %r7958, %r7957, %r10467;
shl.b32 %r7959, %r7958, %r10463;
or.b32 %r10464, %r7959, %r10464;
add.s32 %r10463, %r7953, %r10463;
shr.u32 %r10467, %r10467, %r7953;
sub.s32 %r10466, %r10466, %r7953;
setp.lt.u32 %p2173, %r10463, %r10462;
@%p2173 bra $L__BB2_1735;
cvt.u64.u32 %rd1316, %r10461;
add.s64 %rd1317, %rd1316, %rd5;
add.s64 %rd1318, %rd1, %rd1317;
st.global.u8 [%rd1318], %r10464;
add.s32 %r10461, %r10461, 1;
setp.eq.s32 %p2174, %r10464, 255;
selp.b32 %r10462, 7, 8, %p2174;
mov.u32 %r10463, 0;
mov.u32 %r10464, %r10463;
$L__BB2_1735:
setp.ne.s32 %p2175, %r10466, 0;
mov.u32 %r10480, %r10465;
@%p2175 bra $L__BB2_1732;
$L__BB2_1736:
and.b32 %r3600, %r10355, 8;
setp.eq.s32 %p2176, %r3600, 0;
mov.u32 %r10495, %r10480;
@%p2176 bra $L__BB2_1743;
shr.u32 %r7962, %r3528, 3;
sub.s32 %r10481, %r3414, %r7962;
setp.eq.s32 %p2177, %r10481, 0;
mov.u32 %r10495, %r10480;
@%p2177 bra $L__BB2_1743;
mov.u32 %r7963, -1;
shl.b32 %r7964, %r7963, %r10481;
not.b32 %r7965, %r7964;
and.b32 %r10482, %r10368, %r7965;
$L__BB2_1739:
setp.gt.u32 %p2178, %r10461, 17476;
mov.u32 %r10495, 1;
@%p2178 bra $L__BB2_1743;
sub.s32 %r7967, %r10462, %r10463;
min.u32 %r7968, %r7967, %r10481;
setp.eq.s32 %p2179, %r7968, 32;
mov.u32 %r7969, -1;
shl.b32 %r7970, %r7969, %r7968;
not.b32 %r7971, %r7970;
selp.b32 %r7972, -1, %r7971, %p2179;
and.b32 %r7973, %r7972, %r10482;
shl.b32 %r7974, %r7973, %r10463;
or.b32 %r10464, %r7974, %r10464;
add.s32 %r10463, %r7968, %r10463;
shr.u32 %r10482, %r10482, %r7968;
sub.s32 %r10481, %r10481, %r7968;
setp.lt.u32 %p2180, %r10463, %r10462;
@%p2180 bra $L__BB2_1742;
cvt.u64.u32 %rd1319, %r10461;
add.s64 %rd1320, %rd1319, %rd5;
add.s64 %rd1321, %rd1, %rd1320;
st.global.u8 [%rd1321], %r10464;
add.s32 %r10461, %r10461, 1;
setp.eq.s32 %p2181, %r10464, 255;
selp.b32 %r10462, 7, 8, %p2181;
mov.u32 %r10463, 0;
mov.u32 %r10464, %r10463;
$L__BB2_1742:
setp.ne.s32 %p2182, %r10481, 0;
mov.u32 %r10495, %r10480;
@%p2182 bra $L__BB2_1739;
$L__BB2_1743:
add.s32 %r3624, %r4103, %r10344;
ld.shared.u8 %rs1020, [%r3624];
mov.u32 %r10342, 0;
cvt.u32.u16 %r7980, %rs1020;
and.b32 %r7981, %r7980, 255;
and.b32 %r7982, %r10352, 255;
setp.lt.u32 %p2183, %r7982, %r7981;
cvt.u16.u32 %rs1021, %r10352;
selp.b16 %rs1022, %rs1020, %rs1021, %p2183;
st.shared.u8 [%r3624], %rs1022;
ld.shared.u8 %rs448, [%r3624+2];
ld.shared.u8 %rs1023, [%r3624+1];
setp.gt.u16 %p2184, %rs1023, %rs448;
add.s32 %r10647, %r10344, 1;
add.s32 %r7983, %r10344, 2;
selp.b32 %r7984, %r10647, %r7983, %p2184;
add.s32 %r7985, %r4103, %r7984;
ld.shared.u8 %rs449, [%r7985];
cvt.u32.u16 %r7986, %rs449;
and.b32 %r7987, %r7986, 255;
add.s32 %r10345, %r7987, -1;
cvt.u16.u32 %rs450, %r10366;
cvt.u16.u32 %rs1024, %r3552;
shr.u16 %rs1025, %rs1024, 1;
mov.u32 %r7988, _ZZ44 j2k_htj2k_encode_codeblocks_multi_inputE14cleanup_cx_val;
add.s32 %r3627, %r7988, %r10346;
st.shared.u8 [%r3624+1], %r10366;
ld.shared.u8 %rs1026, [%r3627];
or.b16 %rs1027, %rs1026, %rs1025;
st.shared.u8 [%r3627], %rs1027;
add.s32 %r10346, %r10346, 1;
ld.shared.u8 %rs451, [%r3627+1];
ld.shared.u8 %r3629, [%r3627+2];
shr.u32 %r3630, %r3600, 3;
st.shared.u8 [%r3627+1], %r3630;
add.s32 %r7989, %r10326, 2;
setp.ge.u32 %p2185, %r7989, %r4057;
mov.u32 %r10665, %r10342;
@%p2185 bra $L__BB2_1850;
cvt.u64.u32 %rd1322, %r10648;
add.s64 %rd1323, %rd1322, %rd4;
shl.b64 %rd1324, %rd1323, 2;
add.s64 %rd1325, %rd3, %rd1324;
ld.global.u32 %r3631, [%rd1325];
setp.eq.s32 %p2186, %r3631, 0;
mov.u32 %r10497, 0;
mov.u32 %r10496, %r10497;
@%p2186 bra $L__BB2_1746;
and.b32 %r7991, %r3631, -2147483648;
abs.s32 %r7992, %r3631;
shl.b32 %r7993, %r7992, %r2358;
or.b32 %r10496, %r7993, %r7991;
$L__BB2_1746:
shl.b32 %r7997, %r10496, 1;
shr.u32 %r7998, %r7997, %r2358;
and.b32 %r3634, %r7998, -2;
setp.eq.s32 %p2187, %r3634, 0;
mov.u32 %r10498, %r10497;
mov.u32 %r10504, %r10497;
@%p2187 bra $L__BB2_1748;
add.s32 %r8000, %r3634, -1;
clz.b32 %r8001, %r8000;
mov.u32 %r8002, 32;
sub.s32 %r10497, %r8002, %r8001;
shr.u32 %r8003, %r10496, 31;
add.s32 %r8004, %r8003, %r3634;
add.s32 %r10498, %r8004, -2;
mov.u32 %r10504, 1;
$L__BB2_1748:
mov.u32 %r10501, 0;
mov.u32 %r10500, %r10501;
@%p2069 bra $L__BB2_1751;
add.s32 %r8007, %r10648, %r4055;
cvt.u64.u32 %rd1326, %r8007;
add.s64 %rd1327, %rd1326, %rd4;
shl.b64 %rd1328, %rd1327, 2;
add.s64 %rd1329, %rd3, %rd1328;
ld.global.u32 %r3640, [%rd1329];
setp.eq.s32 %p2189, %r3640, 0;
@%p2189 bra $L__BB2_1751;
and.b32 %r8008, %r3640, -2147483648;
abs.s32 %r8009, %r3640;
shl.b32 %r8010, %r8009, %r2358;
or.b32 %r10500, %r8010, %r8008;
$L__BB2_1751:
shl.b32 %r8013, %r10500, 1;
shr.u32 %r8014, %r8013, %r2358;
and.b32 %r3643, %r8014, -2;
setp.eq.s32 %p2190, %r3643, 0;
mov.u32 %r10502, %r10501;
mov.u32 %r10520, %r10497;
@%p2190 bra $L__BB2_1753;
or.b32 %r10504, %r10504, 2;
add.s32 %r8015, %r3643, -1;
clz.b32 %r8016, %r8015;
mov.u32 %r8017, 32;
sub.s32 %r10501, %r8017, %r8016;
max.s32 %r10520, %r10497, %r10501;
shr.u32 %r8018, %r10500, 31;
add.s32 %r8019, %r8018, %r3643;
add.s32 %r10502, %r8019, -2;
$L__BB2_1753:
add.s32 %r10519, %r10648, 1;
add.s32 %r8024, %r10326, 3;
setp.ge.u32 %p2191, %r8024, %r4057;
mov.u32 %r10522, 0;
mov.u32 %r10515, %r10522;
mov.u32 %r10516, %r10522;
mov.u32 %r10517, %r10522;
mov.u32 %r10518, %r10522;
@%p2191 bra $L__BB2_1764;
cvt.u64.u32 %rd1330, %r10519;
add.s64 %rd1331, %rd1330, %rd4;
shl.b64 %rd1332, %rd1331, 2;
add.s64 %rd1333, %rd3, %rd1332;
ld.global.u32 %r3653, [%rd1333];
setp.eq.s32 %p2192, %r3653, 0;
mov.u32 %r10516, 0;
mov.u32 %r10505, %r10516;
@%p2192 bra $L__BB2_1756;
and.b32 %r8026, %r3653, -2147483648;
abs.s32 %r8027, %r3653;
shl.b32 %r8028, %r8027, %r2358;
or.b32 %r10505, %r8028, %r8026;
$L__BB2_1756:
shl.b32 %r8031, %r10505, 1;
shr.u32 %r8032, %r8031, %r2358;
and.b32 %r3656, %r8032, -2;
setp.eq.s32 %p2193, %r3656, 0;
mov.u32 %r10518, %r10516;
@%p2193 bra $L__BB2_1758;
or.b32 %r10504, %r10504, 4;
add.s32 %r8033, %r3656, -1;
clz.b32 %r8034, %r8033;
mov.u32 %r8035, 32;
sub.s32 %r10516, %r8035, %r8034;
max.s32 %r10520, %r10520, %r10516;
shr.u32 %r8036, %r10505, 31;
add.s32 %r8037, %r8036, %r3656;
add.s32 %r10518, %r8037, -2;
$L__BB2_1758:
mov.u32 %r10515, 0;
mov.u32 %r10510, %r10515;
@%p2069 bra $L__BB2_1761;
add.s32 %r8040, %r10519, %r4055;
cvt.u64.u32 %rd1334, %r8040;
add.s64 %rd1335, %rd1334, %rd4;
shl.b64 %rd1336, %rd1335, 2;
add.s64 %rd1337, %rd3, %rd1336;
ld.global.u32 %r3665, [%rd1337];
setp.eq.s32 %p2195, %r3665, 0;
@%p2195 bra $L__BB2_1761;
and.b32 %r8041, %r3665, -2147483648;
abs.s32 %r8042, %r3665;
shl.b32 %r8043, %r8042, %r2358;
or.b32 %r10510, %r8043, %r8041;
$L__BB2_1761:
shl.b32 %r8046, %r10510, 1;
shr.u32 %r8047, %r8046, %r2358;
and.b32 %r3668, %r8047, -2;
setp.eq.s32 %p2196, %r3668, 0;
mov.u32 %r10517, %r10515;
@%p2196 bra $L__BB2_1763;
or.b32 %r10504, %r10504, 8;
add.s32 %r8048, %r3668, -1;
clz.b32 %r8049, %r8048;
mov.u32 %r8050, 32;
sub.s32 %r10515, %r8050, %r8049;
max.s32 %r10520, %r10520, %r10515;
shr.u32 %r8051, %r10510, 31;
add.s32 %r8052, %r8051, %r3668;
add.s32 %r10517, %r8052, -2;
$L__BB2_1763:
add.s32 %r10519, %r10648, 2;
$L__BB2_1764:
mov.u32 %r10648, %r10519;
shr.u32 %r8054, %r3600, 2;
shr.u32 %r8055, %r3576, 1;
or.b32 %r8056, %r8054, %r8055;
cvt.u32.u16 %r8057, %rs451;
and.b32 %r8058, %r8057, 255;
shl.b32 %r8059, %r3629, 2;
add.s32 %r8060, %r8059, %r8058;
or.b32 %r3685, %r8056, %r8060;
add.s32 %r8061, %r10504, -1;
and.b32 %r8062, %r8061, %r10504;
setp.ne.s32 %p2197, %r8062, 0;
setp.gt.u16 %p2198, %rs449, 2;
and.pred %p2199, %p2198, %p2197;
selp.b32 %r8063, %r10345, 1, %p2199;
max.s32 %r3686, %r8063, %r10520;
sub.s32 %r10665, %r3686, %r8063;
setp.lt.s32 %p2200, %r10665, 1;
@%p2200 bra $L__BB2_1766;
setp.eq.s32 %p2201, %r10497, %r10520;
selp.u32 %r8064, 1, 0, %p2201;
setp.eq.s32 %p2202, %r10501, %r10520;
selp.u32 %r8065, -1, 0, %p2202;
bfi.b32 %r8066, %r8065, %r8064, 1, 1;
setp.eq.s32 %p2203, %r10516, %r10520;
selp.u16 %rs1029, 1, 0, %p2203;
mul.wide.u16 %r8067, %rs1029, 4;
or.b32 %r8068, %r8066, %r8067;
setp.eq.s32 %p2204, %r10515, %r10520;
selp.u16 %rs1030, 1, 0, %p2204;
mul.wide.u16 %r8069, %rs1030, 8;
or.b32 %r10522, %r8068, %r8069;
$L__BB2_1766:
shl.b32 %r8070, %r10504, 4;
shl.b32 %r8071, %r3685, 8;
or.b32 %r8072, %r8070, %r8071;
or.b32 %r8073, %r8072, %r10522;
mul.wide.u32 %rd1339, %r8073, 2;
add.s64 %rd1340, %rd72, %rd1339;
ld.global.u16 %rs452, [%rd1340];
shr.u16 %rs1031, %rs452, 4;
and.b16 %rs453, %rs1031, 7;
setp.eq.s16 %p2205, %rs453, 0;
mov.u32 %r10534, %r10385;
@%p2205 bra $L__BB2_1773;
cvt.u32.u16 %r10523, %rs453;
shr.u16 %rs1032, %rs452, 8;
cvt.u32.u16 %r10524, %rs1032;
$L__BB2_1768:
mov.u32 %r3692, %r10523;
setp.gt.u32 %p2206, %r10274, 2879;
mov.u32 %r10534, 1;
@%p2206 bra $L__BB2_1773;
mov.u32 %r8075, 8;
sub.s32 %r8076, %r8075, %r10276;
sub.s32 %r8077, %r8076, %r10275;
min.u32 %r8078, %r8077, %r3692;
setp.eq.s32 %p2207, %r8078, 32;
mov.u32 %r8079, -1;
shl.b32 %r8080, %r8079, %r8078;
not.b32 %r8081, %r8080;
selp.b32 %r8082, -1, %r8081, %p2207;
and.b32 %r8083, %r8082, %r10524;
shl.b32 %r8084, %r8083, %r10275;
cvt.u16.u32 %rs1033, %r8084;
or.b16 %rs1322, %rs1322, %rs1033;
add.s32 %r10275, %r8078, %r10275;
sub.s32 %r10523, %r3692, %r8078;
shr.u32 %r10524, %r10524, %r8078;
setp.gt.u32 %p2208, %r8077, %r3692;
@%p2208 bra $L__BB2_1772;
setp.ne.s32 %p2209, %r10276, 0;
mov.u32 %r10276, 0;
and.b16 %rs1034, %rs1322, 255;
setp.ne.s16 %p2210, %rs1034, 127;
and.pred %p2211, %p2209, %p2210;
@%p2211 bra $L__BB2_1772;
mov.u32 %r8087, 20548;
sub.s32 %r8088, %r8087, %r10274;
cvt.u64.u32 %rd1341, %r8088;
add.s64 %rd1342, %rd1341, %rd5;
add.s64 %rd1343, %rd1, %rd1342;
st.global.u8 [%rd1343], %rs1322;
add.s32 %r10274, %r10274, 1;
setp.gt.u16 %p2212, %rs1034, 143;
selp.u32 %r10276, 1, 0, %p2212;
mov.u16 %rs1322, 0;
mov.u32 %r10275, 0;
$L__BB2_1772:
setp.ne.s32 %p2213, %r10523, 0;
mov.u32 %r10534, %r10385;
@%p2213 bra $L__BB2_1768;
$L__BB2_1773:
setp.ne.s32 %p2214, %r3685, 0;
@%p2214 bra $L__BB2_1821;
setp.eq.s32 %p2215, %r10504, 0;
add.s32 %r8089, %r9826, 17477;
cvt.u64.u32 %rd1344, %r8089;
add.s64 %rd1345, %rd1344, %rd5;
add.s64 %rd74, %rd1, %rd1345;
@%p2215 bra $L__BB2_1813;
shl.b16 %rs1253, %rs1253, 1;
add.s32 %r9835, %r9835, -1;
setp.ne.s32 %p2216, %r9835, 0;
mov.u32 %r10570, %r10043;
@%p2216 bra $L__BB2_1778;
setp.gt.u32 %p2217, %r9826, 191;
mov.u32 %r10570, 1;
mov.u32 %r9835, 0;
@%p2217 bra $L__BB2_1778;
st.global.u8 [%rd74], %rs1253;
add.s32 %r9826, %r9826, 1;
mov.u16 %rs1253, 0;
mov.u32 %r9835, 8;
mov.u32 %r10570, %r10043;
$L__BB2_1778:
setp.lt.u32 %p2218, %r10041, 3;
mov.u32 %r10538, 0;
@%p2218 bra $L__BB2_1781;
setp.lt.u32 %p2219, %r10041, 6;
mov.u32 %r10538, 1;
@%p2219 bra $L__BB2_1781;
setp.lt.u32 %p2220, %r10041, 9;
setp.eq.s32 %p2221, %r10041, 11;
selp.b32 %r8095, 4, 5, %p2221;
setp.lt.u32 %p2222, %r10041, 11;
selp.b32 %r8096, 3, %r8095, %p2222;
selp.b32 %r10538, 2, %r8096, %p2220;
$L__BB2_1781:
setp.eq.s32 %p2223, %r10538, 0;
@%p2223 bra $L__BB2_1809;
add.s32 %r3716, %r10538, -1;
and.b32 %r3717, %r10538, 3;
setp.eq.s32 %p2224, %r3717, 0;
mov.u32 %r10548, %r10538;
mov.u32 %r10549, %r10570;
@%p2224 bra $L__BB2_1794;
mov.u32 %r8098, 1;
shl.b32 %r8099, %r8098, %r3716;
and.b32 %r8100, %r8099, %r10040;
setp.ne.s32 %p2225, %r8100, 0;
selp.u32 %r8101, 1, 0, %p2225;
cvt.u32.u16 %r8102, %rs1253;
bfi.b32 %r8103, %r8102, %r8101, 1, 8;
cvt.u16.u32 %rs1253, %r8103;
add.s32 %r9835, %r9835, -1;
setp.ne.s32 %p2226, %r9835, 0;
mov.u32 %r10549, %r10570;
@%p2226 bra $L__BB2_1786;
setp.gt.u32 %p2227, %r9826, 191;
mov.u32 %r9835, 0;
mov.u32 %r10549, %r8098;
@%p2227 bra $L__BB2_1786;
add.s32 %r8107, %r9826, 17477;
cvt.u64.u32 %rd1346, %r8107;
add.s64 %rd1347, %rd1346, %rd5;
add.s64 %rd1348, %rd1, %rd1347;
st.global.u8 [%rd1348], %rs1253;
add.s32 %r9826, %r9826, 1;
mov.u16 %rs1253, 0;
mov.u32 %r9835, 8;
mov.u32 %r10549, %r10570;
$L__BB2_1786:
setp.eq.s32 %p2228, %r3717, 1;
mov.u32 %r10570, %r10549;
mov.u32 %r10548, %r3716;
@%p2228 bra $L__BB2_1794;
add.s32 %r10548, %r10538, -2;
mov.u32 %r8108, 1;
shl.b32 %r8109, %r8108, %r10548;
and.b32 %r8110, %r8109, %r10040;
setp.ne.s32 %p2229, %r8110, 0;
selp.u32 %r8111, 1, 0, %p2229;
cvt.u32.u16 %r8112, %rs1253;
bfi.b32 %r8113, %r8112, %r8111, 1, 8;
cvt.u16.u32 %rs1253, %r8113;
add.s32 %r9835, %r9835, -1;
setp.ne.s32 %p2230, %r9835, 0;
mov.u32 %r10544, %r10549;
@%p2230 bra $L__BB2_1790;
setp.gt.u32 %p2231, %r9826, 191;
mov.u32 %r9835, 0;
mov.u32 %r10544, %r8108;
@%p2231 bra $L__BB2_1790;
add.s32 %r8116, %r9826, 17477;
cvt.u64.u32 %rd1349, %r8116;
add.s64 %rd1350, %rd1349, %rd5;
add.s64 %rd1351, %rd1, %rd1350;
and.b16 %rs1041, %rs1253, 255;
st.global.u8 [%rd1351], %rs1253;
add.s32 %r9826, %r9826, 1;
setp.eq.s16 %p2232, %rs1041, 255;
selp.b32 %r9835, 7, 8, %p2232;
mov.u16 %rs1253, 0;
mov.u32 %r10544, %r10549;
$L__BB2_1790:
setp.eq.s32 %p2233, %r3717, 2;
mov.u32 %r10570, %r10544;
mov.u32 %r10549, %r10544;
@%p2233 bra $L__BB2_1794;
add.s32 %r10548, %r10538, -3;
mov.u32 %r8117, 1;
shl.b32 %r8118, %r8117, %r10548;
and.b32 %r8119, %r8118, %r10040;
setp.ne.s32 %p2234, %r8119, 0;
selp.u32 %r8120, 1, 0, %p2234;
cvt.u32.u16 %r8121, %rs1253;
bfi.b32 %r8122, %r8121, %r8120, 1, 8;
cvt.u16.u32 %rs1253, %r8122;
add.s32 %r9835, %r9835, -1;
setp.ne.s32 %p2235, %r9835, 0;
mov.u32 %r10570, %r10544;
mov.u32 %r10549, %r10544;
@%p2235 bra $L__BB2_1794;
setp.gt.u32 %p2236, %r9826, 191;
mov.u32 %r9835, 0;
mov.u32 %r10570, %r8117;
mov.u32 %r10549, %r8117;
@%p2236 bra $L__BB2_1794;
add.s32 %r8127, %r9826, 17477;
cvt.u64.u32 %rd1352, %r8127;
add.s64 %rd1353, %rd1352, %rd5;
add.s64 %rd1354, %rd1, %rd1353;
and.b16 %rs1044, %rs1253, 255;
st.global.u8 [%rd1354], %rs1253;
add.s32 %r9826, %r9826, 1;
setp.eq.s16 %p2237, %rs1044, 255;
selp.b32 %r9835, 7, 8, %p2237;
mov.u16 %rs1253, 0;
mov.u32 %r10570, %r10544;
mov.u32 %r10549, %r10544;
$L__BB2_1794:
setp.lt.u32 %p2238, %r3716, 3;
@%p2238 bra $L__BB2_1809;
mov.u32 %r10570, %r10549;
$L__BB2_1796:
add.s32 %r8128, %r10548, -1;
mov.u32 %r8129, 1;
shl.b32 %r8130, %r8129, %r8128;
and.b32 %r8131, %r8130, %r10040;
setp.ne.s32 %p2239, %r8131, 0;
selp.u32 %r8132, 1, 0, %p2239;
cvt.u32.u16 %r8133, %rs1253;
bfi.b32 %r10558, %r8133, %r8132, 1, 8;
add.s32 %r10557, %r9835, -1;
setp.ne.s32 %p2240, %r10557, 0;
mov.u32 %r10559, %r10570;
@%p2240 bra $L__BB2_1799;
setp.gt.u32 %p2241, %r9826, 191;
mov.u32 %r10557, 0;
mov.u32 %r10559, %r8129;
@%p2241 bra $L__BB2_1799;
cvt.u16.u32 %rs1045, %r10558;
and.b16 %rs1046, %rs1045, 255;
add.s32 %r8137, %r9826, 17477;
cvt.u64.u32 %rd1355, %r8137;
add.s64 %rd1356, %rd1355, %rd5;
add.s64 %rd1357, %rd1, %rd1356;
st.global.u8 [%rd1357], %rs1045;
add.s32 %r9826, %r9826, 1;
setp.eq.s16 %p2242, %rs1046, 255;
selp.b32 %r10557, 7, 8, %p2242;
mov.u32 %r10558, 0;
mov.u32 %r10559, %r10570;
$L__BB2_1799:
add.s32 %r8138, %r10548, -2;
shl.b32 %r8140, %r8129, %r8138;
and.b32 %r8141, %r8140, %r10040;
setp.ne.s32 %p2243, %r8141, 0;
and.b32 %r8142, %r10558, 127;
selp.u32 %r8143, 1, 0, %p2243;
bfi.b32 %r10562, %r8142, %r8143, 1, 7;
add.s32 %r10561, %r10557, -1;
setp.ne.s32 %p2244, %r10561, 0;
mov.u32 %r10563, %r10559;
@%p2244 bra $L__BB2_1802;
setp.gt.u32 %p2245, %r9826, 191;
mov.u32 %r10563, 1;
mov.u32 %r10561, 0;
@%p2245 bra $L__BB2_1802;
cvt.u16.u32 %rs1047, %r10562;
and.b16 %rs1048, %rs1047, 255;
add.s32 %r8147, %r9826, 17477;
cvt.u64.u32 %rd1358, %r8147;
add.s64 %rd1359, %rd1358, %rd5;
add.s64 %rd1360, %rd1, %rd1359;
st.global.u8 [%rd1360], %rs1047;
add.s32 %r9826, %r9826, 1;
setp.eq.s16 %p2246, %rs1048, 255;
selp.b32 %r10561, 7, 8, %p2246;
mov.u32 %r10562, 0;
mov.u32 %r10563, %r10559;
$L__BB2_1802:
add.s32 %r8148, %r10548, -3;
mov.u32 %r8149, 1;
shl.b32 %r8150, %r8149, %r8148;
and.b32 %r8151, %r8150, %r10040;
setp.ne.s32 %p2247, %r8151, 0;
and.b32 %r8152, %r10562, 127;
selp.u32 %r8153, 1, 0, %p2247;
bfi.b32 %r10566, %r8152, %r8153, 1, 7;
add.s32 %r10565, %r10561, -1;
setp.ne.s32 %p2248, %r10565, 0;
mov.u32 %r10567, %r10563;
@%p2248 bra $L__BB2_1805;
setp.gt.u32 %p2249, %r9826, 191;
mov.u32 %r10565, 0;
mov.u32 %r10567, %r8149;
@%p2249 bra $L__BB2_1805;
cvt.u16.u32 %rs1049, %r10566;
and.b16 %rs1050, %rs1049, 255;
add.s32 %r8157, %r9826, 17477;
cvt.u64.u32 %rd1361, %r8157;
add.s64 %rd1362, %rd1361, %rd5;
add.s64 %rd1363, %rd1, %rd1362;
st.global.u8 [%rd1363], %rs1049;
add.s32 %r9826, %r9826, 1;
setp.eq.s16 %p2250, %rs1050, 255;
selp.b32 %r10565, 7, 8, %p2250;
mov.u32 %r10566, 0;
mov.u32 %r10567, %r10563;
$L__BB2_1805:
add.s32 %r10548, %r10548, -4;
shl.b32 %r8159, %r8149, %r10548;
and.b32 %r8160, %r8159, %r10040;
setp.ne.s32 %p2251, %r8160, 0;
and.b32 %r8161, %r10566, 127;
selp.u32 %r8162, 1, 0, %p2251;
bfi.b32 %r8163, %r8161, %r8162, 1, 15;
cvt.u16.u32 %rs1253, %r8163;
add.s32 %r9835, %r10565, -1;
setp.ne.s32 %p2252, %r9835, 0;
mov.u32 %r10570, %r10567;
@%p2252 bra $L__BB2_1808;
setp.gt.u32 %p2253, %r9826, 191;
mov.u32 %r10570, 1;
mov.u32 %r9835, 0;
@%p2253 bra $L__BB2_1808;
add.s32 %r8166, %r9826, 17477;
cvt.u64.u32 %rd1364, %r8166;
add.s64 %rd1365, %rd1364, %rd5;
add.s64 %rd1366, %rd1, %rd1365;
and.b16 %rs1052, %rs1253, 255;
st.global.u8 [%rd1366], %rs1253;
add.s32 %r9826, %r9826, 1;
setp.eq.s16 %p2254, %rs1052, 255;
selp.b32 %r9835, 7, 8, %p2254;
mov.u16 %rs1253, 0;
mov.u32 %r10570, %r10567;
$L__BB2_1808:
setp.ne.s32 %p2255, %r10548, 0;
@%p2255 bra $L__BB2_1796;
$L__BB2_1809:
add.s32 %r8168, %r10041, -1;
setp.eq.s32 %p2256, %r10041, 0;
mov.u32 %r10040, 0;
selp.b32 %r10041, 0, %r8168, %p2256;
setp.lt.u32 %p2257, %r10041, 3;
mov.u32 %r10574, %r10040;
@%p2257 bra $L__BB2_1812;
setp.lt.u32 %p2258, %r10041, 6;
mov.u32 %r10574, 1;
@%p2258 bra $L__BB2_1812;
setp.lt.u32 %p2259, %r10041, 9;
setp.eq.s32 %p2260, %r10041, 11;
selp.b32 %r8170, 4, 5, %p2260;
setp.lt.u32 %p2261, %r10041, 11;
selp.b32 %r8171, 3, %r8170, %p2261;
selp.b32 %r10574, 2, %r8171, %p2259;
$L__BB2_1812:
mov.u32 %r8173, 1;
shl.b32 %r10042, %r8173, %r10574;
mov.u32 %r10043, %r10570;
bra.uni $L__BB2_1821;
$L__BB2_1813:
add.s32 %r10040, %r10040, 1;
setp.lt.u32 %p2262, %r10040, %r10042;
@%p2262 bra $L__BB2_1821;
shl.b16 %rs1053, %rs1253, 1;
or.b16 %rs1253, %rs1053, 1;
add.s32 %r9835, %r9835, -1;
setp.ne.s32 %p2263, %r9835, 0;
mov.u32 %r10577, %r10043;
@%p2263 bra $L__BB2_1817;
bra.uni $L__BB2_1815;
$L__BB2_1817:
add.s32 %r8177, %r10041, 1;
min.u32 %r10041, %r8177, 12;
setp.lt.u32 %p2266, %r10041, 3;
mov.u32 %r10040, 0;
mov.u32 %r10578, %r10040;
@%p2266 bra $L__BB2_1820;
setp.lt.u32 %p2267, %r10041, 6;
mov.u32 %r10578, 1;
@%p2267 bra $L__BB2_1820;
setp.lt.u32 %p2268, %r10041, 9;
setp.eq.s32 %p2269, %r10041, 11;
selp.b32 %r8179, 4, 5, %p2269;
setp.lt.u32 %p2270, %r10041, 11;
selp.b32 %r8180, 3, %r8179, %p2270;
selp.b32 %r10578, 2, %r8180, %p2268;
$L__BB2_1820:
mov.u32 %r8182, 1;
shl.b32 %r10042, %r8182, %r10578;
mov.u32 %r10043, %r10577;
$L__BB2_1821:
and.b16 %rs1056, %rs452, 15;
cvt.u32.u16 %r3800, %rs1056;
and.b32 %r8183, %r10504, 1;
setp.eq.b32 %p2271, %r8183, 1;
mov.pred %p2272, 0;
xor.pred %p2273, %p2271, %p2272;
not.pred %p2274, %p2273;
mov.u32 %r10599, %r10495;
@%p2274 bra $L__BB2_1828;
and.b32 %r8184, %r3800, 1;
sub.s32 %r10585, %r3686, %r8184;
setp.eq.s32 %p2275, %r10585, 0;
mov.u32 %r10599, %r10495;
@%p2275 bra $L__BB2_1828;
mov.u32 %r8185, -1;
shl.b32 %r8186, %r8185, %r10585;
not.b32 %r8187, %r8186;
and.b32 %r10586, %r10498, %r8187;
$L__BB2_1824:
setp.gt.u32 %p2276, %r10461, 17476;
mov.u32 %r10599, 1;
@%p2276 bra $L__BB2_1828;
sub.s32 %r8189, %r10462, %r10463;
min.u32 %r8190, %r8189, %r10585;
setp.eq.s32 %p2277, %r8190, 32;
mov.u32 %r8191, -1;
shl.b32 %r8192, %r8191, %r8190;
not.b32 %r8193, %r8192;
selp.b32 %r8194, -1, %r8193, %p2277;
and.b32 %r8195, %r8194, %r10586;
shl.b32 %r8196, %r8195, %r10463;
or.b32 %r10464, %r8196, %r10464;
add.s32 %r10463, %r8190, %r10463;
shr.u32 %r10586, %r10586, %r8190;
sub.s32 %r10585, %r10585, %r8190;
setp.lt.u32 %p2278, %r10463, %r10462;
@%p2278 bra $L__BB2_1827;
cvt.u64.u32 %rd1367, %r10461;
add.s64 %rd1368, %rd1367, %rd5;
add.s64 %rd1369, %rd1, %rd1368;
st.global.u8 [%rd1369], %r10464;
add.s32 %r10461, %r10461, 1;
setp.eq.s32 %p2279, %r10464, 255;
selp.b32 %r10462, 7, 8, %p2279;
mov.u32 %r10463, 0;
mov.u32 %r10464, %r10463;
$L__BB2_1827:
setp.ne.s32 %p2280, %r10585, 0;
mov.u32 %r10599, %r10495;
@%p2280 bra $L__BB2_1824;
$L__BB2_1828:
and.b32 %r3824, %r10504, 2;
setp.eq.s32 %p2281, %r3824, 0;
mov.u32 %r10614, %r10599;
@%p2281 bra $L__BB2_1835;
shr.u32 %r8199, %r3800, 1;
and.b32 %r8200, %r8199, 1;
sub.s32 %r10600, %r3686, %r8200;
setp.eq.s32 %p2282, %r10600, 0;
mov.u32 %r10614, %r10599;
@%p2282 bra $L__BB2_1835;
mov.u32 %r8201, -1;
shl.b32 %r8202, %r8201, %r10600;
not.b32 %r8203, %r8202;
and.b32 %r10601, %r10502, %r8203;
$L__BB2_1831:
setp.gt.u32 %p2283, %r10461, 17476;
mov.u32 %r10614, 1;
@%p2283 bra $L__BB2_1835;
sub.s32 %r8205, %r10462, %r10463;
min.u32 %r8206, %r8205, %r10600;
setp.eq.s32 %p2284, %r8206, 32;
mov.u32 %r8207, -1;
shl.b32 %r8208, %r8207, %r8206;
not.b32 %r8209, %r8208;
selp.b32 %r8210, -1, %r8209, %p2284;
and.b32 %r8211, %r8210, %r10601;
shl.b32 %r8212, %r8211, %r10463;
or.b32 %r10464, %r8212, %r10464;
add.s32 %r10463, %r8206, %r10463;
shr.u32 %r10601, %r10601, %r8206;
sub.s32 %r10600, %r10600, %r8206;
setp.lt.u32 %p2285, %r10463, %r10462;
@%p2285 bra $L__BB2_1834;
cvt.u64.u32 %rd1370, %r10461;
add.s64 %rd1371, %rd1370, %rd5;
add.s64 %rd1372, %rd1, %rd1371;
st.global.u8 [%rd1372], %r10464;
add.s32 %r10461, %r10461, 1;
setp.eq.s32 %p2286, %r10464, 255;
selp.b32 %r10462, 7, 8, %p2286;
mov.u32 %r10463, 0;
mov.u32 %r10464, %r10463;
$L__BB2_1834:
setp.ne.s32 %p2287, %r10600, 0;
mov.u32 %r10614, %r10599;
@%p2287 bra $L__BB2_1831;
$L__BB2_1835:
and.b32 %r3848, %r10504, 4;
setp.eq.s32 %p2288, %r3848, 0;
mov.u32 %r10629, %r10614;
@%p2288 bra $L__BB2_1842;
shr.u32 %r8215, %r3800, 2;
and.b32 %r8216, %r8215, 1;
sub.s32 %r10615, %r3686, %r8216;
setp.eq.s32 %p2289, %r10615, 0;
mov.u32 %r10629, %r10614;
@%p2289 bra $L__BB2_1842;
mov.u32 %r8217, -1;
shl.b32 %r8218, %r8217, %r10615;
not.b32 %r8219, %r8218;
and.b32 %r10616, %r10518, %r8219;
$L__BB2_1838:
setp.gt.u32 %p2290, %r10461, 17476;
mov.u32 %r10629, 1;
@%p2290 bra $L__BB2_1842;
sub.s32 %r8221, %r10462, %r10463;
min.u32 %r8222, %r8221, %r10615;
setp.eq.s32 %p2291, %r8222, 32;
mov.u32 %r8223, -1;
shl.b32 %r8224, %r8223, %r8222;
not.b32 %r8225, %r8224;
selp.b32 %r8226, -1, %r8225, %p2291;
and.b32 %r8227, %r8226, %r10616;
shl.b32 %r8228, %r8227, %r10463;
or.b32 %r10464, %r8228, %r10464;
add.s32 %r10463, %r8222, %r10463;
shr.u32 %r10616, %r10616, %r8222;
sub.s32 %r10615, %r10615, %r8222;
setp.lt.u32 %p2292, %r10463, %r10462;
@%p2292 bra $L__BB2_1841;
cvt.u64.u32 %rd1373, %r10461;
add.s64 %rd1374, %rd1373, %rd5;
add.s64 %rd1375, %rd1, %rd1374;
st.global.u8 [%rd1375], %r10464;
add.s32 %r10461, %r10461, 1;
setp.eq.s32 %p2293, %r10464, 255;
selp.b32 %r10462, 7, 8, %p2293;
mov.u32 %r10463, 0;
mov.u32 %r10464, %r10463;
$L__BB2_1841:
setp.ne.s32 %p2294, %r10615, 0;
mov.u32 %r10629, %r10614;
@%p2294 bra $L__BB2_1838;
$L__BB2_1842:
and.b32 %r3872, %r10504, 8;
setp.eq.s32 %p2295, %r3872, 0;
mov.u32 %r10495, %r10629;
@%p2295 bra $L__BB2_1849;
shr.u32 %r8231, %r3800, 3;
sub.s32 %r10630, %r3686, %r8231;
setp.eq.s32 %p2296, %r10630, 0;
mov.u32 %r10495, %r10629;
@%p2296 bra $L__BB2_1849;
mov.u32 %r8232, -1;
shl.b32 %r8233, %r8232, %r10630;
not.b32 %r8234, %r8233;
and.b32 %r10631, %r10517, %r8234;
$L__BB2_1845:
setp.gt.u32 %p2297, %r10461, 17476;
mov.u32 %r10495, 1;
@%p2297 bra $L__BB2_1849;
sub.s32 %r8236, %r10462, %r10463;
min.u32 %r8237, %r8236, %r10630;
setp.eq.s32 %p2298, %r8237, 32;
mov.u32 %r8238, -1;
shl.b32 %r8239, %r8238, %r8237;
not.b32 %r8240, %r8239;
selp.b32 %r8241, -1, %r8240, %p2298;
and.b32 %r8242, %r8241, %r10631;
shl.b32 %r8243, %r8242, %r10463;
or.b32 %r10464, %r8243, %r10464;
add.s32 %r10463, %r8237, %r10463;
shr.u32 %r10631, %r10631, %r8237;
sub.s32 %r10630, %r10630, %r8237;
setp.lt.u32 %p2299, %r10463, %r10462;
@%p2299 bra $L__BB2_1848;
cvt.u64.u32 %rd1376, %r10461;
add.s64 %rd1377, %rd1376, %rd5;
add.s64 %rd1378, %rd1, %rd1377;
st.global.u8 [%rd1378], %r10464;
add.s32 %r10461, %r10461, 1;
setp.eq.s32 %p2300, %r10464, 255;
selp.b32 %r10462, 7, 8, %p2300;
mov.u32 %r10463, 0;
mov.u32 %r10464, %r10463;
$L__BB2_1848:
setp.ne.s32 %p2301, %r10630, 0;
mov.u32 %r10495, %r10629;
@%p2301 bra $L__BB2_1845;
$L__BB2_1849:
and.b32 %r8246, %r10501, 255;
and.b32 %r8247, %r10366, 255;
setp.lt.u32 %p2302, %r8246, %r8247;
cvt.u16.u32 %rs1057, %r10501;
selp.b16 %rs1058, %rs450, %rs1057, %p2302;
st.shared.u8 [%r3624+1], %rs1058;
ld.shared.u8 %rs1059, [%r3624+3];
setp.gt.u16 %p2303, %rs448, %rs1059;
add.s32 %r10647, %r10647, 1;
add.s32 %r8248, %r10344, 3;
selp.b32 %r8249, %r10647, %r8248, %p2303;
add.s32 %r8251, %r4103, %r8249;
ld.shared.u8 %r8252, [%r8251];
add.s32 %r10345, %r8252, -1;
shr.u32 %r8253, %r3824, 1;
or.b32 %r8254, %r3630, %r8253;
st.shared.u8 [%r3624+2], %r10515;
st.shared.u8 [%r3627+1], %r8254;
ld.shared.u8 %rs1060, [%r3627+3];
mul.wide.u16 %r8255, %rs1060, 4;
add.s32 %r8256, %r8255, %r3629;
shr.u32 %r8257, %r3872, 3;
st.shared.u8 [%r3627+2], %r8257;
shr.u32 %r8258, %r3872, 2;
shr.u32 %r8259, %r3848, 1;
or.b32 %r8260, %r8258, %r8259;
or.b32 %r10342, %r8260, %r8256;
add.s32 %r10346, %r10346, 1;
mov.u32 %r10385, %r10534;
$L__BB2_1850:
mov.u32 %r10343, %r10648;
mov.u32 %r10344, %r10647;
max.s32 %r8261, %r10665, 0;
mul.lo.s32 %r8262, %r3415, 6;
setp.gt.s32 %p2304, %r3415, 0;
selp.b32 %r8263, %r8262, 0, %p2304;
cvt.u64.u32 %rd1379, %r8263;
add.s64 %rd75, %rd71, %rd1379;
ld.global.u8 %rs476, [%rd75+1];
add.s32 %r8264, %r8263, 2;
cvt.u64.u32 %rd1380, %r8264;
add.s64 %rd1381, %rd71, %rd1380;
ld.global.u8 %rs477, [%rd1381];
ld.global.u8 %rs478, [%rd1381+1];
mul.lo.s32 %r8265, %r8261, 6;
cvt.u64.u32 %rd1382, %r8265;
add.s64 %rd1383, %rd71, %rd1382;
ld.global.u8 %rs479, [%rd1383];
ld.global.u8 %rs480, [%rd1383+1];
add.s32 %r8266, %r8265, 2;
cvt.u64.u32 %rd1384, %r8266;
add.s64 %rd1385, %rd71, %rd1384;
ld.global.u8 %rs481, [%rd1385];
ld.global.u8 %rs482, [%rd1385+1];
setp.eq.s16 %p2305, %rs476, 0;
mov.u32 %r10677, %r10385;
@%p2305 bra $L__BB2_1857;
ld.global.u8 %r10667, [%rd75];
cvt.u32.u16 %r10666, %rs476;
$L__BB2_1852:
mov.u32 %r3923, %r10666;
setp.gt.u32 %p2306, %r10274, 2879;
mov.u32 %r10677, 1;
@%p2306 bra $L__BB2_1857;
mov.u32 %r8268, 8;
sub.s32 %r8269, %r8268, %r10276;
sub.s32 %r8270, %r8269, %r10275;
min.u32 %r8271, %r8270, %r3923;
setp.eq.s32 %p2307, %r8271, 32;
mov.u32 %r8272, -1;
shl.b32 %r8273, %r8272, %r8271;
not.b32 %r8274, %r8273;
selp.b32 %r8275, -1, %r8274, %p2307;
and.b32 %r8276, %r8275, %r10667;
shl.b32 %r8277, %r8276, %r10275;
cvt.u16.u32 %rs1061, %r8277;
or.b16 %rs1322, %rs1322, %rs1061;
add.s32 %r10275, %r8271, %r10275;
sub.s32 %r10666, %r3923, %r8271;
shr.u32 %r10667, %r10667, %r8271;
setp.gt.u32 %p2308, %r8270, %r3923;
@%p2308 bra $L__BB2_1856;
setp.ne.s32 %p2309, %r10276, 0;
mov.u32 %r10276, 0;
and.b16 %rs1062, %rs1322, 255;
setp.ne.s16 %p2310, %rs1062, 127;
and.pred %p2311, %p2309, %p2310;
@%p2311 bra $L__BB2_1856;
mov.u32 %r8280, 20548;
sub.s32 %r8281, %r8280, %r10274;
cvt.u64.u32 %rd1386, %r8281;
add.s64 %rd1387, %rd1386, %rd5;
add.s64 %rd1388, %rd1, %rd1387;
st.global.u8 [%rd1388], %rs1322;
add.s32 %r10274, %r10274, 1;
setp.gt.u16 %p2312, %rs1062, 143;
selp.u32 %r10276, 1, 0, %p2312;
mov.u16 %rs1322, 0;
mov.u32 %r10275, 0;
$L__BB2_1856:
setp.ne.s32 %p2313, %r10666, 0;
mov.u32 %r10677, %r10385;
@%p2313 bra $L__BB2_1852;
$L__BB2_1857:
setp.eq.s16 %p2314, %rs480, 0;
mov.u32 %r10689, %r10677;
@%p2314 bra $L__BB2_1864;
cvt.u32.u16 %r8282, %rs479;
and.b32 %r10679, %r8282, 255;
cvt.u32.u16 %r8283, %rs480;
and.b32 %r10678, %r8283, 255;
$L__BB2_1859:
mov.u32 %r3942, %r10678;
setp.gt.u32 %p2315, %r10274, 2879;
mov.u32 %r10689, 1;
@%p2315 bra $L__BB2_1864;
mov.u32 %r8285, 8;
sub.s32 %r8286, %r8285, %r10276;
sub.s32 %r8287, %r8286, %r10275;
min.u32 %r8288, %r8287, %r3942;
setp.eq.s32 %p2316, %r8288, 32;
mov.u32 %r8289, -1;
shl.b32 %r8290, %r8289, %r8288;
not.b32 %r8291, %r8290;
selp.b32 %r8292, -1, %r8291, %p2316;
and.b32 %r8293, %r8292, %r10679;
shl.b32 %r8294, %r8293, %r10275;
cvt.u16.u32 %rs1066, %r8294;
or.b16 %rs1322, %rs1322, %rs1066;
add.s32 %r10275, %r8288, %r10275;
sub.s32 %r10678, %r3942, %r8288;
shr.u32 %r10679, %r10679, %r8288;
setp.gt.u32 %p2317, %r8287, %r3942;
@%p2317 bra $L__BB2_1863;
setp.ne.s32 %p2318, %r10276, 0;
mov.u32 %r10276, 0;
and.b16 %rs1067, %rs1322, 255;
setp.ne.s16 %p2319, %rs1067, 127;
and.pred %p2320, %p2318, %p2319;
@%p2320 bra $L__BB2_1863;
mov.u32 %r8297, 20548;
sub.s32 %r8298, %r8297, %r10274;
cvt.u64.u32 %rd1389, %r8298;
add.s64 %rd1390, %rd1389, %rd5;
add.s64 %rd1391, %rd1, %rd1390;
st.global.u8 [%rd1391], %rs1322;
add.s32 %r10274, %r10274, 1;
setp.gt.u16 %p2321, %rs1067, 143;
selp.u32 %r10276, 1, 0, %p2321;
mov.u16 %rs1322, 0;
mov.u32 %r10275, 0;
$L__BB2_1863:
setp.ne.s32 %p2322, %r10678, 0;
mov.u32 %r10689, %r10677;
@%p2322 bra $L__BB2_1859;
$L__BB2_1864:
setp.eq.s16 %p2323, %rs478, 0;
mov.u32 %r10701, %r10689;
@%p2323 bra $L__BB2_1871;
cvt.u32.u16 %r8299, %rs478;
and.b32 %r10690, %r8299, 255;
cvt.u32.u16 %r8300, %rs477;
and.b32 %r10691, %r8300, 255;
$L__BB2_1866:
mov.u32 %r3961, %r10690;
setp.gt.u32 %p2324, %r10274, 2879;
mov.u32 %r10701, 1;
@%p2324 bra $L__BB2_1871;
mov.u32 %r8302, 8;
sub.s32 %r8303, %r8302, %r10276;
sub.s32 %r8304, %r8303, %r10275;
min.u32 %r8305, %r8304, %r3961;
setp.eq.s32 %p2325, %r8305, 32;
mov.u32 %r8306, -1;
shl.b32 %r8307, %r8306, %r8305;
not.b32 %r8308, %r8307;
selp.b32 %r8309, -1, %r8308, %p2325;
and.b32 %r8310, %r8309, %r10691;
shl.b32 %r8311, %r8310, %r10275;
cvt.u16.u32 %rs1071, %r8311;
or.b16 %rs1322, %rs1322, %rs1071;
add.s32 %r10275, %r8305, %r10275;
sub.s32 %r10690, %r3961, %r8305;
shr.u32 %r10691, %r10691, %r8305;
setp.gt.u32 %p2326, %r8304, %r3961;
@%p2326 bra $L__BB2_1870;
setp.ne.s32 %p2327, %r10276, 0;
mov.u32 %r10276, 0;
and.b16 %rs1072, %rs1322, 255;
setp.ne.s16 %p2328, %rs1072, 127;
and.pred %p2329, %p2327, %p2328;
@%p2329 bra $L__BB2_1870;
mov.u32 %r8314, 20548;
sub.s32 %r8315, %r8314, %r10274;
cvt.u64.u32 %rd1392, %r8315;
add.s64 %rd1393, %rd1392, %rd5;
add.s64 %rd1394, %rd1, %rd1393;
st.global.u8 [%rd1394], %rs1322;
add.s32 %r10274, %r10274, 1;
setp.gt.u16 %p2330, %rs1072, 143;
selp.u32 %r10276, 1, 0, %p2330;
mov.u16 %rs1322, 0;
mov.u32 %r10275, 0;
$L__BB2_1870:
setp.ne.s32 %p2331, %r10690, 0;
mov.u32 %r10701, %r10689;
@%p2331 bra $L__BB2_1866;
$L__BB2_1871:
setp.eq.s16 %p2332, %rs482, 0;
mov.u32 %r10277, %r10701;
@%p2332 bra $L__BB2_1878;
cvt.u32.u16 %r8316, %rs481;
and.b32 %r10703, %r8316, 255;
cvt.u32.u16 %r8317, %rs482;
and.b32 %r10702, %r8317, 255;
$L__BB2_1873:
mov.u32 %r3980, %r10702;
setp.gt.u32 %p2333, %r10274, 2879;
mov.u32 %r10277, 1;
@%p2333 bra $L__BB2_1878;
mov.u32 %r8319, 8;
sub.s32 %r8320, %r8319, %r10276;
sub.s32 %r8321, %r8320, %r10275;
min.u32 %r8322, %r8321, %r3980;
setp.eq.s32 %p2334, %r8322, 32;
mov.u32 %r8323, -1;
shl.b32 %r8324, %r8323, %r8322;
not.b32 %r8325, %r8324;
selp.b32 %r8326, -1, %r8325, %p2334;
and.b32 %r8327, %r8326, %r10703;
shl.b32 %r8328, %r8327, %r10275;
cvt.u16.u32 %rs1076, %r8328;
or.b16 %rs1322, %rs1322, %rs1076;
add.s32 %r10275, %r8322, %r10275;
sub.s32 %r10702, %r3980, %r8322;
shr.u32 %r10703, %r10703, %r8322;
setp.gt.u32 %p2335, %r8321, %r3980;
@%p2335 bra $L__BB2_1877;
setp.ne.s32 %p2336, %r10276, 0;
mov.u32 %r10276, 0;
and.b16 %rs1077, %rs1322, 255;
setp.ne.s16 %p2337, %rs1077, 127;
and.pred %p2338, %p2336, %p2337;
@%p2338 bra $L__BB2_1877;
mov.u32 %r8331, 20548;
sub.s32 %r8332, %r8331, %r10274;
cvt.u64.u32 %rd1395, %r8332;
add.s64 %rd1396, %rd1395, %rd5;
add.s64 %rd1397, %rd1, %rd1396;
st.global.u8 [%rd1397], %rs1322;
add.s32 %r10274, %r10274, 1;
setp.gt.u16 %p2339, %rs1077, 143;
selp.u32 %r10276, 1, 0, %p2339;
mov.u16 %rs1322, 0;
mov.u32 %r10275, 0;
$L__BB2_1877:
setp.ne.s32 %p2340, %r10702, 0;
mov.u32 %r10277, %r10701;
@%p2340 bra $L__BB2_1873;
$L__BB2_1878:
add.s32 %r10326, %r10326, 4;
setp.lt.u32 %p2341, %r10326, %r4057;
@%p2341 bra $L__BB2_1638;
$L__BB2_1879:
add.s32 %r10310, %r10310, 2;
setp.lt.u32 %p2342, %r10310, %r4058;
@%p2342 bra $L__BB2_1636;
$L__BB2_1880:
setp.eq.s32 %p2343, %r10040, 0;
mov.u32 %r10743, %r10043;
@%p2343 bra $L__BB2_1884;
shl.b16 %rs1080, %rs1253, 1;
or.b16 %rs1253, %rs1080, 1;
add.s32 %r9835, %r9835, -1;
setp.ne.s32 %p2344, %r9835, 0;
mov.u32 %r10743, %r10043;
@%p2344 bra $L__BB2_1884;
setp.gt.u32 %p2345, %r9826, 191;
mov.u32 %r10743, 1;
mov.u32 %r9835, 0;
@%p2345 bra $L__BB2_1884;
add.s32 %r8335, %r9826, 17477;
cvt.u64.u32 %rd1398, %r8335;
add.s64 %rd1399, %rd1398, %rd5;
add.s64 %rd1400, %rd1, %rd1399;
and.b16 %rs1082, %rs1253, 255;
st.global.u8 [%rd1400], %rs1253;
add.s32 %r9826, %r9826, 1;
setp.eq.s16 %p2346, %rs1082, 255;
selp.b32 %r9835, 7, 8, %p2346;
mov.u16 %rs1253, 0;
mov.u32 %r10743, %r10043;
$L__BB2_1884:
cvt.u32.u16 %r8336, %rs1253;
and.b32 %r8337, %r8336, 255;
shl.b32 %r8338, %r8337, %r9835;
cvt.u16.u32 %rs505, %r8338;
mov.u32 %r8339, -1;
shl.b32 %r8340, %r8339, %r10275;
not.b32 %r8341, %r8340;
mov.u32 %r8342, 255;
and.b32 %r8343, %r8341, 255;
setp.eq.s32 %p2347, %r10275, 0;
selp.b32 %r4032, 0, %r8343, %p2347;
shl.b32 %r4033, %r8342, %r9835;
and.b32 %r8344, %r4033, 255;
or.b32 %r8345, %r8344, %r4032;
setp.eq.s32 %p2348, %r8345, 0;
mov.u32 %r10745, %r10277;
mov.u32 %r10747, %r10743;
@%p2348 bra $L__BB2_1890;
or.b16 %rs506, %rs1322, %rs505;
and.b16 %rs1083, %rs506, 255;
xor.b16 %rs1084, %rs506, %rs505;
cvt.u32.u16 %r8346, %rs1084;
and.b32 %r8347, %r4033, %r8346;
and.b32 %r8348, %r8347, 255;
xor.b16 %rs1085, %rs506, %rs1322;
cvt.u32.u16 %r8349, %rs1085;
and.b32 %r8350, %r4032, %r8349;
or.b32 %r8351, %r8348, %r8350;
setp.eq.s32 %p2349, %r8351, 0;
setp.ne.s16 %p2350, %rs1083, 255;
and.pred %p2351, %p2350, %p2349;
setp.gt.u32 %p2352, %r10274, 1;
and.pred %p2353, %p2352, %p2351;
add.s32 %r8352, %r9826, 17477;
cvt.u64.u32 %rd1401, %r8352;
add.s64 %rd1402, %rd1401, %rd5;
add.s64 %rd76, %rd1, %rd1402;
@%p2353 bra $L__BB2_1888;
bra.uni $L__BB2_1886;
$L__BB2_1888:
setp.gt.u32 %p2357, %r9826, 191;
mov.u32 %r10747, 1;
mov.u32 %r10745, %r10277;
@%p2357 bra $L__BB2_1890;
st.global.u8 [%rd76], %rs506;
add.s32 %r9826, %r9826, 1;
mov.u32 %r10745, %r10277;
mov.u32 %r10747, %r10743;
bra.uni $L__BB2_1890;
$L__BB2_1886:
setp.gt.u32 %p2354, %r9826, 191;
setp.gt.u32 %p2355, %r10274, 2879;
or.pred %p2356, %p2355, %p2354;
mov.u32 %r10745, 1;
mov.u32 %r10747, %r10745;
@%p2356 bra $L__BB2_1890;
st.global.u8 [%rd76], %rs505;
add.s32 %r9826, %r9826, 1;
mov.u32 %r8355, 20548;
sub.s32 %r8356, %r8355, %r10274;
cvt.u64.u32 %rd1403, %r8356;
add.s64 %rd1404, %rd1403, %rd5;
add.s64 %rd1405, %rd1, %rd1404;
st.global.u8 [%rd1405], %rs1322;
add.s32 %r10274, %r10274, 1;
mov.u32 %r10745, %r10277;
mov.u32 %r10747, %r10743;
$L__BB2_1890:
setp.eq.s32 %p2358, %r10463, 0;
@%p2358 bra $L__BB2_1894;
sub.s32 %r8358, %r10462, %r10463;
mov.u32 %r8359, -1;
shl.b32 %r8360, %r8359, %r8358;
not.b32 %r8361, %r8360;
and.b32 %r8362, %r8361, 255;
shl.b32 %r8363, %r8362, %r10463;
or.b32 %r4041, %r8363, %r10464;
setp.eq.s32 %p2359, %r4041, 255;
mov.u32 %r10749, %r10495;
@%p2359 bra $L__BB2_1896;
setp.gt.u32 %p2360, %r10461, 17476;
mov.u32 %r10749, 1;
@%p2360 bra $L__BB2_1896;
cvt.u64.u32 %rd1406, %r10461;
add.s64 %rd1407, %rd1406, %rd5;
add.s64 %rd1408, %rd1, %rd1407;
st.global.u8 [%rd1408], %r4041;
add.s32 %r10461, %r10461, 1;
mov.u32 %r10749, %r10495;
bra.uni $L__BB2_1896;
$L__BB2_1894:
setp.ne.s32 %p2361, %r10462, 7;
mov.u32 %r10749, %r10495;
@%p2361 bra $L__BB2_1896;
setp.eq.s32 %p2362, %r10461, 0;
add.s32 %r8365, %r10461, -1;
selp.b32 %r10461, 0, %r8365, %p2362;
mov.u32 %r10749, %r10495;
$L__BB2_1896:
or.b32 %r8366, %r10747, %r10745;
or.b32 %r8367, %r8366, %r10749;
setp.eq.s32 %p2363, %r8367, 0;
@%p2363 bra $L__BB2_1898;
mov.u32 %r8372, 1;
st.global.u32 [%rd6], %r8372;
mov.u32 %r8373, 3;
st.global.u32 [%rd6+4], %r8373;
mov.u32 %r10750, 0;
mov.u32 %r10751, %r10750;
mov.u32 %r10752, %r10750;
mov.u32 %r10753, %r10750;
bra.uni $L__BB2_1904;
$L__BB2_1898:
add.s32 %r8374, %r9826, %r10274;
add.s32 %r10751, %r8374, %r10461;
setp.lt.u32 %p2364, %r10751, 2;
setp.gt.u32 %p2365, %r10751, %r4061;
or.pred %p2366, %p2364, %p2365;
@%p2366 bra $L__BB2_1900;
bra.uni $L__BB2_1899;
$L__BB2_1900:
mov.u32 %r8384, 1;
st.global.u32 [%rd6], %r8384;
mov.u32 %r8385, 4;
st.global.u32 [%rd6+4], %r8385;
mov.u32 %r10750, 0;
mov.u32 %r10751, %r10750;
mov.u32 %r10752, %r10750;
mov.u32 %r10753, %r10750;
bra.uni $L__BB2_1904;
$L__BB2_1250:
mov.u32 %r6791, 0;
st.global.u32 [%rd6], %r6791;
st.global.u32 [%rd6+4], %r6791;
st.global.u32 [%rd6+8], %r6791;
st.global.u32 [%rd6+12], %r6791;
st.global.u32 [%rd6+16], %r4059;
st.global.u32 [%rd6+20], %r6791;
st.global.u32 [%rd6+24], %r6791;
st.global.u32 [%rd6+28], %r6791;
bra.uni $L__BB2_1905;
$L__BB2_23:
setp.lt.u32 %p37, %r4059, %r4062;
@%p37 bra $L__BB2_1248;
bra.uni $L__BB2_24;
$L__BB2_1248:
mov.u32 %r6785, 2;
st.global.u32 [%rd6], %r6785;
mov.u32 %r6786, 5;
st.global.u32 [%rd6+4], %r6786;
mov.u32 %r6787, 0;
st.global.u32 [%rd6+8], %r6787;
st.global.u32 [%rd6+12], %r6787;
st.global.u32 [%rd6+16], %r6787;
st.global.u32 [%rd6+20], %r6787;
st.global.u32 [%rd6+24], %r6787;
st.global.u32 [%rd6+28], %r6787;
bra.uni $L__BB2_1905;
$L__BB2_1899:
and.b32 %r8376, %r9826, 32767;
and.b32 %r8377, %r10274, 32767;
bfi.b32 %r8378, %r8377, %r8376, 15, 15;
or.b32 %r10750, %r8378, -2147483648;
mov.u32 %r8379, 0;
st.global.u32 [%rd6], %r8379;
st.global.u32 [%rd6+4], %r8379;
mov.u32 %r10753, 1;
bra.uni $L__BB2_1904;
$L__BB2_24:
mov.u32 %r8433, 0;
setp.eq.s32 %p38, %r4062, 2;
@%p38 bra $L__BB2_35;
setp.ne.s32 %p39, %r4062, 3;
@%p39 bra $L__BB2_43;
@%p10 bra $L__BB2_43;
mov.u32 %r4115, 0;
mov.u32 %r8426, %r4115;
mov.u32 %r8433, %r4115;
$L__BB2_28:
mul.lo.s32 %r30, %r8426, %r4055;
mov.u32 %r8428, %r4115;
$L__BB2_29:
add.s32 %r4117, %r8428, %r30;
cvt.u64.u32 %rd99, %r4117;
add.s64 %rd100, %rd99, %rd4;
shl.b64 %rd101, %rd100, 2;
add.s64 %rd102, %rd3, %rd101;
ld.global.u32 %r4118, [%rd102];
abs.s32 %r33, %r4118;
setp.eq.s32 %p41, %r33, 0;
@%p41 bra $L__BB2_32;
setp.eq.s32 %p42, %r33, 3;
@%p42 bra $L__BB2_32;
add.s32 %r8433, %r8433, 1;
and.b32 %r4119, %r33, 1;
setp.eq.b32 %p43, %r4119, 1;
not.pred %p44, %p43;
setp.lt.u32 %p45, %r33, 5;
or.pred %p46, %p45, %p44;
@%p46 bra $L__BB2_34;
$L__BB2_32:
add.s32 %r8428, %r8428, 1;
setp.lt.u32 %p47, %r8428, %r4057;
@%p47 bra $L__BB2_29;
add.s32 %r8426, %r8426, 1;
setp.lt.u32 %p48, %r8426, %r4058;
@%p48 bra $L__BB2_28;
bra.uni $L__BB2_43;
$L__BB2_35:
@%p10 bra $L__BB2_43;
mov.u32 %r4124, 0;
mov.u32 %r8431, %r4124;
$L__BB2_37:
mul.lo.s32 %r39, %r8431, %r4055;
mov.u32 %r8432, %r4124;
$L__BB2_38:
add.s32 %r4126, %r8432, %r39;
cvt.u64.u32 %rd103, %r4126;
add.s64 %rd104, %rd103, %rd4;
shl.b64 %rd105, %rd104, 2;
add.s64 %rd106, %rd3, %rd105;
ld.global.u32 %r4127, [%rd106];
abs.s32 %r41, %r4127;
setp.eq.s32 %p50, %r41, 0;
@%p50 bra $L__BB2_41;
setp.gt.u32 %p51, %r41, 2;
and.b32 %r4128, %r41, 1;
setp.eq.b32 %p52, %r4128, 1;
and.pred %p53, %p51, %p52;
@%p53 bra $L__BB2_41;
bra.uni $L__BB2_40;
$L__BB2_41:
add.s32 %r8432, %r8432, 1;
setp.lt.u32 %p54, %r8432, %r4057;
@%p54 bra $L__BB2_38;
add.s32 %r8431, %r8431, 1;
setp.lt.u32 %p55, %r8431, %r4058;
mov.u32 %r8433, 0;
@%p55 bra $L__BB2_37;
$L__BB2_43:
add.s32 %r8412, %r4057, 1;
shr.u32 %r8411, %r8412, 1;
mov.u32 %r8436, 0;
sub.s32 %r45, %r4059, %r4062;
mov.u32 %r4134, 30;
sub.s32 %r46, %r4134, %r45;
mov.u16 %rs510, 255;
st.global.u8 [%rd7], %rs510;
add.s32 %r4136, %r8411, 2;
min.u32 %r48, %r4136, 513;
mov.u32 %r4137, -3;
sub.s32 %r4138, %r4137, %r8411;
max.u32 %r4139, %r4138, -514;
mov.u32 %r4140, -2;
sub.s32 %r4141, %r4140, %r4139;
and.b32 %r8438, %r48, 3;
setp.lt.u32 %p56, %r4141, 3;
@%p56 bra $L__BB2_46;
sub.s32 %r8435, %r48, %r8438;
mov.u32 %r8436, 0;
$L__BB2_45:
add.s32 %r4144, %r4103, %r8436;
mov.u16 %rs511, 0;
st.shared.u8 [%r4144], %rs511;
mov.u32 %r4145, _ZZ44 j2k_htj2k_encode_codeblocks_multi_inputE14cleanup_cx_val;
add.s32 %r4146, %r4145, %r8436;
st.shared.u8 [%r4146], %rs511;
st.shared.u8 [%r4144+1], %rs511;
st.shared.u8 [%r4146+1], %rs511;
st.shared.u8 [%r4144+2], %rs511;
st.shared.u8 [%r4146+2], %rs511;
st.shared.u8 [%r4144+3], %rs511;
st.shared.u8 [%r4146+3], %rs511;
add.s32 %r8436, %r8436, 4;
add.s32 %r8435, %r8435, -4;
setp.ne.s32 %p57, %r8435, 0;
@%p57 bra $L__BB2_45;
$L__BB2_46:
setp.eq.s32 %p58, %r8438, 0;
@%p58 bra $L__BB2_49;
mov.u32 %r4149, _ZZ44 j2k_htj2k_encode_codeblocks_multi_inputE14cleanup_cx_val;
$L__BB2_48:
.pragma "nounroll";
add.s32 %r4148, %r4103, %r8436;
mov.u16 %rs512, 0;
st.shared.u8 [%r4148], %rs512;
add.s32 %r4150, %r4149, %r8436;
st.shared.u8 [%r4150], %rs512;
add.s32 %r8436, %r8436, 1;
add.s32 %r8438, %r8438, -1;
setp.ne.s32 %p59, %r8438, 0;
@%p59 bra $L__BB2_48;
$L__BB2_49:
mov.u32 %r8733, 0;
mov.u32 %r8530, 8;
mov.u32 %r8734, 1;
mov.u32 %r8971, 4;
mov.u16 %rs1165, 15;
mov.u16 %rs1096, 0;
mov.u32 %r8735, %r8733;
mov.u32 %r8736, %r8733;
mov.u32 %r8524, %r8733;
mov.u32 %r8969, %r8733;
mov.u32 %r8970, %r8734;
mov.u32 %r8972, %r8734;
mov.u32 %r9186, %r8733;
mov.u32 %r9157, %r8733;
mov.u32 %r9158, %r8733;
mov.u32 %r9159, %r8530;
mov.u32 %r9160, %r8733;
@%p10 bra $L__BB2_417;
ld.param.u64 %rd1415, [ j2k_htj2k_encode_codeblocks_multi_input_param_4];
ld.param.u64 %rd1409, [ j2k_htj2k_encode_codeblocks_multi_input_param_2];
mov.u32 %r4183, 0;
mov.u32 %r4184, 31;
sub.s32 %r60, %r4184, %r4059;
cvta.to.global.u64 %rd8, %rd1409;
cvta.to.global.u64 %rd9, %rd1415;
mov.u32 %r8972, 1;
mov.u16 %rs1096, 0;
mov.u32 %r9159, 8;
mov.u16 %rs1165, 15;
mov.u32 %r8971, 4;
mov.u32 %r8439, %r4183;
mov.u32 %r8440, %r4183;
mov.u32 %r8441, %r4183;
mov.u32 %r9160, %r4183;
mov.u32 %r9158, %r4183;
mov.u32 %r9157, %r4183;
mov.u32 %r9186, %r4183;
mov.u32 %r8970, %r8972;
mov.u32 %r8969, %r4183;
mov.u32 %r8524, %r4183;
mov.u32 %r8530, %r9159;
mov.u32 %r8736, %r4183;
mov.u32 %r8735, %r4183;
mov.u32 %r8734, %r8972;
mov.u32 %r8733, %r4183;
bra.uni $L__BB2_51;
$L__BB2_40:
mov.u32 %r4129, 2;
st.global.u32 [%rd6], %r4129;
mov.u32 %r4130, 6;
st.global.u32 [%rd6+4], %r4130;
mov.u32 %r4131, 0;
st.global.u32 [%rd6+8], %r4131;
st.global.u32 [%rd6+12], %r4131;
st.global.u32 [%rd6+16], %r4131;
st.global.u32 [%rd6+20], %r4131;
st.global.u32 [%rd6+24], %r4131;
st.global.u32 [%rd6+28], %r4131;
bra.uni $L__BB2_1905;
$L__BB2_34:
mov.u32 %r4120, 2;
st.global.u32 [%rd6], %r4120;
mov.u32 %r4121, 6;
st.global.u32 [%rd6+4], %r4121;
mov.u32 %r4122, 0;
st.global.u32 [%rd6+8], %r4122;
st.global.u32 [%rd6+12], %r4122;
st.global.u32 [%rd6+16], %r4122;
st.global.u32 [%rd6+20], %r4122;
st.global.u32 [%rd6+24], %r4122;
st.global.u32 [%rd6+28], %r4122;
bra.uni $L__BB2_1905;
$L__BB2_256:
setp.gt.u32 %p287, %r8524, 191;
mov.u32 %r8530, 0;
mov.u32 %r8729, 1;
@%p287 bra $L__BB2_258;
and.b16 %rs599, %rs1096, 255;
st.global.u8 [%rd13], %rs1096;
add.s32 %r8524, %r8524, 1;
setp.eq.s16 %p288, %rs599, 255;
selp.b32 %r8530, 7, 8, %p288;
mov.u16 %rs1096, 0;
mov.u32 %r8729, %r8733;
bra.uni $L__BB2_258;
$L__BB2_51:
cvt.u64.u32 %rd107, %r8440;
add.s64 %rd108, %rd107, %rd4;
shl.b64 %rd109, %rd108, 2;
add.s64 %rd110, %rd3, %rd109;
ld.global.u32 %r79, [%rd110];
setp.eq.s32 %p61, %r79, 0;
mov.u32 %r8457, %r4183;
@%p61 bra $L__BB2_53;
and.b32 %r4186, %r79, -2147483648;
abs.s32 %r4187, %r79;
shl.b32 %r4188, %r4187, %r60;
or.b32 %r8457, %r4188, %r4186;
$L__BB2_53:
shl.b32 %r4192, %r8457, 1;
shr.u32 %r4193, %r4192, %r46;
and.b32 %r82, %r4193, -2;
setp.eq.s32 %p62, %r82, 0;
mov.u32 %r8461, 0;
mov.u32 %r8458, %r8461;
mov.u32 %r8459, %r8461;
mov.u32 %r8465, %r8461;
@%p62 bra $L__BB2_55;
add.s32 %r4195, %r82, -1;
clz.b32 %r4196, %r4195;
mov.u32 %r4197, 32;
sub.s32 %r8458, %r4197, %r4196;
shr.u32 %r4198, %r8457, 31;
add.s32 %r4199, %r4198, %r82;
add.s32 %r8459, %r4199, -2;
mov.u32 %r8465, 1;
$L__BB2_55:
setp.lt.u32 %p63, %r4058, 2;
@%p63 bra $L__BB2_58;
add.s32 %r4202, %r8440, %r4055;
cvt.u64.u32 %rd111, %r4202;
add.s64 %rd112, %rd111, %rd4;
shl.b64 %rd113, %rd112, 2;
add.s64 %rd114, %rd3, %rd113;
ld.global.u32 %r88, [%rd114];
setp.eq.s32 %p64, %r88, 0;
@%p64 bra $L__BB2_58;
and.b32 %r4203, %r88, -2147483648;
abs.s32 %r4204, %r88;
shl.b32 %r4205, %r4204, %r60;
or.b32 %r8461, %r4205, %r4203;
$L__BB2_58:
shl.b32 %r4208, %r8461, 1;
shr.u32 %r4209, %r4208, %r46;
and.b32 %r91, %r4209, -2;
setp.eq.s32 %p65, %r91, 0;
mov.u32 %r8476, 0;
mov.u32 %r8462, %r8476;
mov.u32 %r8463, %r8476;
mov.u32 %r8480, %r8458;
@%p65 bra $L__BB2_60;
or.b32 %r8465, %r8465, 2;
add.s32 %r4210, %r91, -1;
clz.b32 %r4211, %r4210;
mov.u32 %r4212, 32;
sub.s32 %r8462, %r4212, %r4211;
max.s32 %r8480, %r8458, %r8462;
shr.u32 %r4213, %r8461, 31;
add.s32 %r4214, %r4213, %r91;
add.s32 %r8463, %r4214, -2;
$L__BB2_60:
add.s32 %r8482, %r8440, 1;
add.s32 %r4219, %r8439, 1;
setp.ge.u32 %p66, %r4219, %r4057;
mov.u32 %r8477, %r8476;
mov.u32 %r8478, %r8476;
mov.u32 %r8479, %r8476;
@%p66 bra $L__BB2_71;
cvt.u64.u32 %rd115, %r8482;
add.s64 %rd116, %rd115, %rd4;
shl.b64 %rd117, %rd116, 2;
add.s64 %rd118, %rd3, %rd117;
ld.global.u32 %r101, [%rd118];
setp.eq.s32 %p67, %r101, 0;
mov.u32 %r8477, 0;
mov.u32 %r8466, %r8477;
@%p67 bra $L__BB2_63;
and.b32 %r4221, %r101, -2147483648;
abs.s32 %r4222, %r101;
shl.b32 %r4223, %r4222, %r60;
or.b32 %r8466, %r4223, %r4221;
$L__BB2_63:
shl.b32 %r4226, %r8466, 1;
shr.u32 %r4227, %r4226, %r46;
and.b32 %r104, %r4227, -2;
setp.eq.s32 %p68, %r104, 0;
mov.u32 %r8479, %r8477;
@%p68 bra $L__BB2_65;
or.b32 %r8465, %r8465, 4;
add.s32 %r4228, %r104, -1;
clz.b32 %r4229, %r4228;
mov.u32 %r4230, 32;
sub.s32 %r8477, %r4230, %r4229;
max.s32 %r8480, %r8480, %r8477;
shr.u32 %r4231, %r8466, 31;
add.s32 %r4232, %r4231, %r104;
add.s32 %r8479, %r4232, -2;
$L__BB2_65:
mov.u32 %r8476, 0;
mov.u32 %r8471, %r8476;
@%p63 bra $L__BB2_68;
add.s32 %r4235, %r8482, %r4055;
cvt.u64.u32 %rd119, %r4235;
add.s64 %rd120, %rd119, %rd4;
shl.b64 %rd121, %rd120, 2;
add.s64 %rd122, %rd3, %rd121;
ld.global.u32 %r113, [%rd122];
setp.eq.s32 %p70, %r113, 0;
@%p70 bra $L__BB2_68;
and.b32 %r4236, %r113, -2147483648;
abs.s32 %r4237, %r113;
shl.b32 %r4238, %r4237, %r60;
or.b32 %r8471, %r4238, %r4236;
$L__BB2_68:
shl.b32 %r4241, %r8471, 1;
shr.u32 %r4242, %r4241, %r46;
and.b32 %r116, %r4242, -2;
setp.eq.s32 %p71, %r116, 0;
mov.u32 %r8478, %r8476;
@%p71 bra $L__BB2_70;
or.b32 %r8465, %r8465, 8;
add.s32 %r4243, %r116, -1;
clz.b32 %r4244, %r4243;
mov.u32 %r4245, 32;
sub.s32 %r8476, %r4245, %r4244;
max.s32 %r8480, %r8480, %r8476;
shr.u32 %r4246, %r8471, 31;
add.s32 %r4247, %r4246, %r116;
add.s32 %r8478, %r4247, -2;
$L__BB2_70:
add.s32 %r8482, %r8440, 2;
$L__BB2_71:
mov.u32 %r8440, %r8482;
add.s32 %r4249, %r8480, -1;
setp.lt.s32 %p72, %r8480, 2;
setp.gt.s32 %p73, %r8480, 1;
selp.b32 %r133, %r4249, 0, %p73;
mov.u32 %r8483, 0;
@%p72 bra $L__BB2_73;
setp.eq.s32 %p74, %r8458, %r8480;
selp.u32 %r4250, 1, 0, %p74;
setp.eq.s32 %p75, %r8462, %r8480;
selp.u32 %r4251, -1, 0, %p75;
bfi.b32 %r4252, %r4251, %r4250, 1, 1;
setp.eq.s32 %p76, %r8477, %r8480;
selp.u16 %rs517, 1, 0, %p76;
mul.wide.u16 %r4253, %rs517, 4;
or.b32 %r4254, %r4252, %r4253;
setp.eq.s32 %p77, %r8476, %r8480;
selp.u16 %rs518, 1, 0, %p77;
mul.wide.u16 %r4255, %rs518, 8;
or.b32 %r8483, %r4254, %r4255;
$L__BB2_73:
shr.u32 %r4256, %r8439, 1;
add.s32 %r136, %r4103, %r4256;
ld.shared.u8 %rs519, [%r136];
cvt.u32.u16 %r4258, %rs519;
and.b32 %r4259, %r4258, 255;
and.b32 %r4260, %r8462, 255;
setp.lt.u32 %p78, %r4260, %r4259;
cvt.u16.u32 %rs520, %r8462;
selp.b16 %rs521, %rs519, %rs520, %p78;
st.shared.u8 [%r136], %rs521;
cvt.u16.u32 %rs3, %r8476;
st.shared.u8 [%r136+1], %rs3;
and.b32 %r137, %r8465, 2;
cvt.u16.u32 %rs522, %r137;
shr.u16 %rs523, %rs522, 1;
mov.u32 %r4261, _ZZ44 j2k_htj2k_encode_codeblocks_multi_inputE14cleanup_cx_val;
add.s32 %r138, %r4261, %r4256;
ld.shared.u8 %rs524, [%r138];
or.b16 %rs525, %rs524, %rs523;
st.shared.u8 [%r138], %rs525;
and.b32 %r139, %r8465, 8;
shr.u32 %r140, %r139, 3;
st.shared.u8 [%r138+1], %r140;
shl.b32 %r4262, %r8465, 4;
shl.b32 %r4263, %r8441, 8;
or.b32 %r4264, %r4262, %r4263;
or.b32 %r4265, %r4264, %r8483;
mul.wide.u32 %rd123, %r4265, 2;
add.s64 %rd124, %rd8, %rd123;
ld.global.u16 %rs4, [%rd124];
shr.u16 %rs526, %rs4, 4;
and.b16 %rs5, %rs526, 7;
setp.eq.s16 %p79, %rs5, 0;
mov.u32 %r8495, %r8969;
@%p79 bra $L__BB2_80;
cvt.u32.u16 %r8484, %rs5;
shr.u16 %rs527, %rs4, 8;
cvt.u32.u16 %r8485, %rs527;
$L__BB2_75:
mov.u32 %r143, %r8484;
setp.gt.u32 %p80, %r8972, 2879;
mov.u32 %r8495, 1;
@%p80 bra $L__BB2_80;
mov.u32 %r4267, 8;
sub.s32 %r4268, %r4267, %r8970;
sub.s32 %r4269, %r4268, %r8971;
min.u32 %r4270, %r4269, %r143;
setp.eq.s32 %p81, %r4270, 32;
mov.u32 %r4271, -1;
shl.b32 %r4272, %r4271, %r4270;
not.b32 %r4273, %r4272;
selp.b32 %r4274, -1, %r4273, %p81;
and.b32 %r4275, %r4274, %r8485;
shl.b32 %r4276, %r4275, %r8971;
cvt.u16.u32 %rs528, %r4276;
or.b16 %rs1165, %rs1165, %rs528;
add.s32 %r8971, %r4270, %r8971;
sub.s32 %r8484, %r143, %r4270;
shr.u32 %r8485, %r8485, %r4270;
setp.gt.u32 %p82, %r4269, %r143;
@%p82 bra $L__BB2_79;
setp.ne.s32 %p83, %r8970, 0;
mov.u32 %r8970, 0;
and.b16 %rs529, %rs1165, 255;
setp.ne.s16 %p84, %rs529, 127;
and.pred %p85, %p83, %p84;
@%p85 bra $L__BB2_79;
mov.u32 %r4279, 20548;
sub.s32 %r4280, %r4279, %r8972;
cvt.u64.u32 %rd125, %r4280;
add.s64 %rd126, %rd125, %rd5;
add.s64 %rd127, %rd1, %rd126;
st.global.u8 [%rd127], %rs1165;
add.s32 %r8972, %r8972, 1;
setp.gt.u16 %p86, %rs529, 143;
selp.u32 %r8970, 1, 0, %p86;
mov.u32 %r8971, 0;
mov.u16 %rs1165, 0;
$L__BB2_79:
setp.ne.s32 %p87, %r8484, 0;
mov.u32 %r8495, %r8969;
@%p87 bra $L__BB2_75;
$L__BB2_80:
setp.ne.s32 %p88, %r8441, 0;
@%p88 bra $L__BB2_128;
setp.eq.s32 %p89, %r8465, 0;
add.s32 %r4281, %r8524, 17477;
cvt.u64.u32 %rd128, %r4281;
add.s64 %rd129, %rd128, %rd5;
add.s64 %rd10, %rd1, %rd129;
@%p89 bra $L__BB2_120;
shl.b16 %rs1096, %rs1096, 1;
add.s32 %r8530, %r8530, -1;
setp.ne.s32 %p90, %r8530, 0;
mov.u32 %r8529, %r8733;
@%p90 bra $L__BB2_85;
bra.uni $L__BB2_83;
$L__BB2_85:
setp.lt.u32 %p92, %r8735, 3;
mov.u32 %r8499, 0;
@%p92 bra $L__BB2_88;
setp.lt.u32 %p93, %r8735, 6;
mov.u32 %r8499, 1;
@%p93 bra $L__BB2_88;
setp.lt.u32 %p94, %r8735, 9;
setp.eq.s32 %p95, %r8735, 11;
selp.b32 %r4287, 4, 5, %p95;
setp.lt.u32 %p96, %r8735, 11;
selp.b32 %r4288, 3, %r4287, %p96;
selp.b32 %r8499, 2, %r4288, %p94;
$L__BB2_88:
setp.eq.s32 %p97, %r8499, 0;
@%p97 bra $L__BB2_116;
add.s32 %r167, %r8499, -1;
and.b32 %r168, %r8499, 3;
setp.eq.s32 %p98, %r168, 0;
mov.u32 %r8509, %r8499;
mov.u32 %r8512, %r8529;
@%p98 bra $L__BB2_101;
mov.u32 %r4290, 1;
shl.b32 %r4291, %r4290, %r167;
and.b32 %r4292, %r4291, %r8736;
setp.ne.s32 %p99, %r4292, 0;
selp.u32 %r4293, 1, 0, %p99;
cvt.u32.u16 %r4294, %rs1096;
bfi.b32 %r4295, %r4294, %r4293, 1, 8;
cvt.u16.u32 %rs1096, %r4295;
add.s32 %r8530, %r8530, -1;
setp.ne.s32 %p100, %r8530, 0;
mov.u32 %r8512, %r8529;
@%p100 bra $L__BB2_93;
setp.gt.u32 %p101, %r8524, 191;
mov.u32 %r8530, 0;
mov.u32 %r8512, %r4290;
@%p101 bra $L__BB2_93;
add.s32 %r4299, %r8524, 17477;
cvt.u64.u32 %rd130, %r4299;
add.s64 %rd131, %rd130, %rd5;
add.s64 %rd132, %rd1, %rd131;
st.global.u8 [%rd132], %rs1096;
add.s32 %r8524, %r8524, 1;
mov.u32 %r8530, 8;
mov.u16 %rs1096, 0;
mov.u32 %r8512, %r8529;
$L__BB2_93:
setp.eq.s32 %p102, %r168, 1;
mov.u32 %r8529, %r8512;
mov.u32 %r8509, %r167;
@%p102 bra $L__BB2_101;
add.s32 %r8509, %r8499, -2;
mov.u32 %r4300, 1;
shl.b32 %r4301, %r4300, %r8509;
and.b32 %r4302, %r4301, %r8736;
setp.ne.s32 %p103, %r4302, 0;
selp.u32 %r4303, 1, 0, %p103;
cvt.u32.u16 %r4304, %rs1096;
bfi.b32 %r4305, %r4304, %r4303, 1, 8;
cvt.u16.u32 %rs1096, %r4305;
add.s32 %r8530, %r8530, -1;
setp.ne.s32 %p104, %r8530, 0;
mov.u32 %r8503, %r8512;
@%p104 bra $L__BB2_97;
setp.gt.u32 %p105, %r8524, 191;
mov.u32 %r8530, 0;
mov.u32 %r8503, %r4300;
@%p105 bra $L__BB2_97;
add.s32 %r4308, %r8524, 17477;
cvt.u64.u32 %rd133, %r4308;
add.s64 %rd134, %rd133, %rd5;
add.s64 %rd135, %rd1, %rd134;
and.b16 %rs536, %rs1096, 255;
st.global.u8 [%rd135], %rs1096;
add.s32 %r8524, %r8524, 1;
setp.eq.s16 %p106, %rs536, 255;
selp.b32 %r8530, 7, 8, %p106;
mov.u16 %rs1096, 0;
mov.u32 %r8503, %r8512;
$L__BB2_97:
setp.eq.s32 %p107, %r168, 2;
mov.u32 %r8529, %r8503;
mov.u32 %r8512, %r8503;
@%p107 bra $L__BB2_101;
add.s32 %r8509, %r8499, -3;
mov.u32 %r4309, 1;
shl.b32 %r4310, %r4309, %r8509;
and.b32 %r4311, %r4310, %r8736;
setp.ne.s32 %p108, %r4311, 0;
selp.u32 %r4312, 1, 0, %p108;
cvt.u32.u16 %r4313, %rs1096;
bfi.b32 %r4314, %r4313, %r4312, 1, 8;
cvt.u16.u32 %rs1096, %r4314;
add.s32 %r8530, %r8530, -1;
setp.ne.s32 %p109, %r8530, 0;
mov.u32 %r8529, %r8503;
mov.u32 %r8512, %r8503;
@%p109 bra $L__BB2_101;
setp.gt.u32 %p110, %r8524, 191;
mov.u32 %r8530, 0;
mov.u32 %r8529, %r4309;
mov.u32 %r8512, %r4309;
@%p110 bra $L__BB2_101;
add.s32 %r4319, %r8524, 17477;
cvt.u64.u32 %rd136, %r4319;
add.s64 %rd137, %rd136, %rd5;
add.s64 %rd138, %rd1, %rd137;
and.b16 %rs539, %rs1096, 255;
st.global.u8 [%rd138], %rs1096;
add.s32 %r8524, %r8524, 1;
setp.eq.s16 %p111, %rs539, 255;
selp.b32 %r8530, 7, 8, %p111;
mov.u16 %rs1096, 0;
mov.u32 %r8529, %r8503;
mov.u32 %r8512, %r8503;
$L__BB2_101:
setp.lt.u32 %p112, %r167, 3;
@%p112 bra $L__BB2_116;
mov.u32 %r8529, %r8512;
$L__BB2_103:
add.s32 %r4320, %r8509, -1;
mov.u32 %r4321, 1;
shl.b32 %r4322, %r4321, %r4320;
and.b32 %r4323, %r4322, %r8736;
setp.ne.s32 %p113, %r4323, 0;
selp.u32 %r4324, 1, 0, %p113;
cvt.u32.u16 %r4325, %rs1096;
bfi.b32 %r8518, %r4325, %r4324, 1, 8;
add.s32 %r8519, %r8530, -1;
setp.ne.s32 %p114, %r8519, 0;
mov.u32 %r8517, %r8529;
@%p114 bra $L__BB2_106;
setp.gt.u32 %p115, %r8524, 191;
mov.u32 %r8519, 0;
mov.u32 %r8517, %r4321;
@%p115 bra $L__BB2_106;
cvt.u16.u32 %rs540, %r8518;
and.b16 %rs541, %rs540, 255;
add.s32 %r4329, %r8524, 17477;
cvt.u64.u32 %rd139, %r4329;
add.s64 %rd140, %rd139, %rd5;
add.s64 %rd141, %rd1, %rd140;
st.global.u8 [%rd141], %rs540;
add.s32 %r8524, %r8524, 1;
setp.eq.s16 %p116, %rs541, 255;
selp.b32 %r8519, 7, 8, %p116;
mov.u32 %r8518, 0;
mov.u32 %r8517, %r8529;
$L__BB2_106:
add.s32 %r4330, %r8509, -2;
shl.b32 %r4332, %r4321, %r4330;
and.b32 %r4333, %r4332, %r8736;
setp.ne.s32 %p117, %r4333, 0;
and.b32 %r4334, %r8518, 127;
selp.u32 %r4335, 1, 0, %p117;
bfi.b32 %r8522, %r4334, %r4335, 1, 7;
add.s32 %r8523, %r8519, -1;
setp.ne.s32 %p118, %r8523, 0;
mov.u32 %r8521, %r8517;
@%p118 bra $L__BB2_109;
setp.gt.u32 %p119, %r8524, 191;
mov.u32 %r8523, 0;
mov.u32 %r8521, 1;
@%p119 bra $L__BB2_109;
cvt.u16.u32 %rs542, %r8522;
and.b16 %rs543, %rs542, 255;
add.s32 %r4339, %r8524, 17477;
cvt.u64.u32 %rd142, %r4339;
add.s64 %rd143, %rd142, %rd5;
add.s64 %rd144, %rd1, %rd143;
st.global.u8 [%rd144], %rs542;
add.s32 %r8524, %r8524, 1;
setp.eq.s16 %p120, %rs543, 255;
selp.b32 %r8523, 7, 8, %p120;
mov.u32 %r8522, 0;
mov.u32 %r8521, %r8517;
$L__BB2_109:
add.s32 %r4340, %r8509, -3;
mov.u32 %r4341, 1;
shl.b32 %r4342, %r4341, %r4340;
and.b32 %r4343, %r4342, %r8736;
setp.ne.s32 %p121, %r4343, 0;
and.b32 %r4344, %r8522, 127;
selp.u32 %r4345, 1, 0, %p121;
bfi.b32 %r8526, %r4344, %r4345, 1, 7;
add.s32 %r8527, %r8523, -1;
setp.ne.s32 %p122, %r8527, 0;
mov.u32 %r8525, %r8521;
@%p122 bra $L__BB2_112;
setp.gt.u32 %p123, %r8524, 191;
mov.u32 %r8527, 0;
mov.u32 %r8525, %r4341;
@%p123 bra $L__BB2_112;
cvt.u16.u32 %rs544, %r8526;
and.b16 %rs545, %rs544, 255;
add.s32 %r4349, %r8524, 17477;
cvt.u64.u32 %rd145, %r4349;
add.s64 %rd146, %rd145, %rd5;
add.s64 %rd147, %rd1, %rd146;
st.global.u8 [%rd147], %rs544;
add.s32 %r8524, %r8524, 1;
setp.eq.s16 %p124, %rs545, 255;
selp.b32 %r8527, 7, 8, %p124;
mov.u32 %r8526, 0;
mov.u32 %r8525, %r8521;
$L__BB2_112:
add.s32 %r8509, %r8509, -4;
shl.b32 %r4351, %r4341, %r8509;
and.b32 %r4352, %r4351, %r8736;
setp.ne.s32 %p125, %r4352, 0;
and.b32 %r4353, %r8526, 127;
selp.u32 %r4354, 1, 0, %p125;
bfi.b32 %r4355, %r4353, %r4354, 1, 15;
cvt.u16.u32 %rs1096, %r4355;
add.s32 %r8530, %r8527, -1;
setp.ne.s32 %p126, %r8530, 0;
mov.u32 %r8529, %r8525;
@%p126 bra $L__BB2_115;
setp.gt.u32 %p127, %r8524, 191;
mov.u32 %r8530, 0;
mov.u32 %r8529, 1;
@%p127 bra $L__BB2_115;
add.s32 %r4358, %r8524, 17477;
cvt.u64.u32 %rd148, %r4358;
add.s64 %rd149, %rd148, %rd5;
add.s64 %rd150, %rd1, %rd149;
and.b16 %rs547, %rs1096, 255;
st.global.u8 [%rd150], %rs1096;
add.s32 %r8524, %r8524, 1;
setp.eq.s16 %p128, %rs547, 255;
selp.b32 %r8530, 7, 8, %p128;
mov.u16 %rs1096, 0;
mov.u32 %r8529, %r8525;
$L__BB2_115:
setp.ne.s32 %p129, %r8509, 0;
@%p129 bra $L__BB2_103;
$L__BB2_116:
add.s32 %r4360, %r8735, -1;
setp.eq.s32 %p130, %r8735, 0;
mov.u32 %r8736, 0;
selp.b32 %r8735, 0, %r4360, %p130;
setp.lt.u32 %p131, %r8735, 3;
mov.u32 %r8535, %r8736;
@%p131 bra $L__BB2_119;
setp.lt.u32 %p132, %r8735, 6;
mov.u32 %r8535, 1;
@%p132 bra $L__BB2_119;
setp.lt.u32 %p133, %r8735, 9;
setp.eq.s32 %p134, %r8735, 11;
selp.b32 %r4362, 4, 5, %p134;
setp.lt.u32 %p135, %r8735, 11;
selp.b32 %r4363, 3, %r4362, %p135;
selp.b32 %r8535, 2, %r4363, %p133;
$L__BB2_119:
mov.u32 %r4365, 1;
shl.b32 %r8734, %r4365, %r8535;
mov.u32 %r8733, %r8529;
bra.uni $L__BB2_128;
$L__BB2_120:
add.s32 %r8736, %r8736, 1;
setp.lt.u32 %p136, %r8736, %r8734;
@%p136 bra $L__BB2_128;
shl.b16 %rs548, %rs1096, 1;
or.b16 %rs1096, %rs548, 1;
add.s32 %r8530, %r8530, -1;
setp.ne.s32 %p137, %r8530, 0;
mov.u32 %r8536, %r8733;
@%p137 bra $L__BB2_124;
setp.gt.u32 %p138, %r8524, 191;
mov.u32 %r8530, 0;
mov.u32 %r8536, 1;
@%p138 bra $L__BB2_124;
and.b16 %rs550, %rs1096, 255;
st.global.u8 [%rd10], %rs1096;
add.s32 %r8524, %r8524, 1;
setp.eq.s16 %p139, %rs550, 255;
selp.b32 %r8530, 7, 8, %p139;
mov.u16 %rs1096, 0;
mov.u32 %r8536, %r8733;
$L__BB2_124:
add.s32 %r4369, %r8735, 1;
min.u32 %r8735, %r4369, 12;
setp.lt.u32 %p140, %r8735, 3;
mov.u32 %r8736, 0;
mov.u32 %r8539, %r8736;
@%p140 bra $L__BB2_127;
setp.lt.u32 %p141, %r8735, 6;
mov.u32 %r8539, 1;
@%p141 bra $L__BB2_127;
setp.lt.u32 %p142, %r8735, 9;
setp.eq.s32 %p143, %r8735, 11;
selp.b32 %r4371, 4, 5, %p143;
setp.lt.u32 %p144, %r8735, 11;
selp.b32 %r4372, 3, %r4371, %p144;
selp.b32 %r8539, 2, %r4372, %p142;
$L__BB2_127:
mov.u32 %r4374, 1;
shl.b32 %r8734, %r4374, %r8539;
mov.u32 %r8733, %r8536;
$L__BB2_128:
max.s32 %r251, %r8480, 1;
and.b16 %rs551, %rs4, 15;
cvt.u32.u16 %r252, %rs551;
and.b32 %r253, %r8465, 1;
setp.eq.s32 %p145, %r253, 0;
mov.u32 %r8556, %r9186;
@%p145 bra $L__BB2_135;
and.b32 %r4375, %r252, 1;
sub.s32 %r8546, %r251, %r4375;
setp.eq.s32 %p146, %r8546, 0;
mov.u32 %r8556, %r9186;
@%p146 bra $L__BB2_135;
mov.u32 %r4376, -1;
shl.b32 %r4377, %r4376, %r8546;
not.b32 %r4378, %r4377;
and.b32 %r8547, %r8459, %r4378;
$L__BB2_131:
setp.gt.u32 %p147, %r9160, 17476;
mov.u32 %r8556, 1;
@%p147 bra $L__BB2_135;
sub.s32 %r4380, %r9159, %r9158;
min.u32 %r4381, %r4380, %r8546;
setp.eq.s32 %p148, %r4381, 32;
mov.u32 %r4382, -1;
shl.b32 %r4383, %r4382, %r4381;
not.b32 %r4384, %r4383;
selp.b32 %r4385, -1, %r4384, %p148;
and.b32 %r4386, %r4385, %r8547;
shl.b32 %r4387, %r4386, %r9158;
or.b32 %r9157, %r4387, %r9157;
add.s32 %r9158, %r4381, %r9158;
shr.u32 %r8547, %r8547, %r4381;
sub.s32 %r8546, %r8546, %r4381;
setp.lt.u32 %p149, %r9158, %r9159;
@%p149 bra $L__BB2_134;
cvt.u64.u32 %rd151, %r9160;
add.s64 %rd152, %rd151, %rd5;
add.s64 %rd153, %rd1, %rd152;
st.global.u8 [%rd153], %r9157;
add.s32 %r9160, %r9160, 1;
setp.eq.s32 %p150, %r9157, 255;
selp.b32 %r9159, 7, 8, %p150;
mov.u32 %r9157, 0;
mov.u32 %r9158, %r9157;
$L__BB2_134:
setp.ne.s32 %p151, %r8546, 0;
mov.u32 %r8556, %r9186;
@%p151 bra $L__BB2_131;
$L__BB2_135:
setp.eq.s32 %p152, %r137, 0;
mov.u32 %r8571, %r8556;
@%p152 bra $L__BB2_142;
shr.u32 %r4390, %r252, 1;
and.b32 %r4391, %r4390, 1;
sub.s32 %r8561, %r251, %r4391;
setp.eq.s32 %p153, %r8561, 0;
mov.u32 %r8571, %r8556;
@%p153 bra $L__BB2_142;
mov.u32 %r4392, -1;
shl.b32 %r4393, %r4392, %r8561;
not.b32 %r4394, %r4393;
and.b32 %r8562, %r8463, %r4394;
$L__BB2_138:
setp.gt.u32 %p154, %r9160, 17476;
mov.u32 %r8571, 1;
@%p154 bra $L__BB2_142;
sub.s32 %r4396, %r9159, %r9158;
min.u32 %r4397, %r4396, %r8561;
setp.eq.s32 %p155, %r4397, 32;
mov.u32 %r4398, -1;
shl.b32 %r4399, %r4398, %r4397;
not.b32 %r4400, %r4399;
selp.b32 %r4401, -1, %r4400, %p155;
and.b32 %r4402, %r4401, %r8562;
shl.b32 %r4403, %r4402, %r9158;
or.b32 %r9157, %r4403, %r9157;
add.s32 %r9158, %r4397, %r9158;
shr.u32 %r8562, %r8562, %r4397;
sub.s32 %r8561, %r8561, %r4397;
setp.lt.u32 %p156, %r9158, %r9159;
@%p156 bra $L__BB2_141;
cvt.u64.u32 %rd154, %r9160;
add.s64 %rd155, %rd154, %rd5;
add.s64 %rd156, %rd1, %rd155;
st.global.u8 [%rd156], %r9157;
add.s32 %r9160, %r9160, 1;
setp.eq.s32 %p157, %r9157, 255;
selp.b32 %r9159, 7, 8, %p157;
mov.u32 %r9157, 0;
mov.u32 %r9158, %r9157;
$L__BB2_141:
setp.ne.s32 %p158, %r8561, 0;
mov.u32 %r8571, %r8556;
@%p158 bra $L__BB2_138;
$L__BB2_142:
and.b32 %r4406, %r8465, 4;
setp.eq.s32 %p159, %r4406, 0;
mov.u32 %r8586, %r8571;
@%p159 bra $L__BB2_149;
shr.u32 %r4407, %r252, 2;
and.b32 %r4408, %r4407, 1;
sub.s32 %r8576, %r251, %r4408;
setp.eq.s32 %p160, %r8576, 0;
mov.u32 %r8586, %r8571;
@%p160 bra $L__BB2_149;
mov.u32 %r4409, -1;
shl.b32 %r4410, %r4409, %r8576;
not.b32 %r4411, %r4410;
and.b32 %r8577, %r8479, %r4411;
$L__BB2_145:
setp.gt.u32 %p161, %r9160, 17476;
mov.u32 %r8586, 1;
@%p161 bra $L__BB2_149;
sub.s32 %r4413, %r9159, %r9158;
min.u32 %r4414, %r4413, %r8576;
setp.eq.s32 %p162, %r4414, 32;
mov.u32 %r4415, -1;
shl.b32 %r4416, %r4415, %r4414;
not.b32 %r4417, %r4416;
selp.b32 %r4418, -1, %r4417, %p162;
and.b32 %r4419, %r4418, %r8577;
shl.b32 %r4420, %r4419, %r9158;
or.b32 %r9157, %r4420, %r9157;
add.s32 %r9158, %r4414, %r9158;
shr.u32 %r8577, %r8577, %r4414;
sub.s32 %r8576, %r8576, %r4414;
setp.lt.u32 %p163, %r9158, %r9159;
@%p163 bra $L__BB2_148;
cvt.u64.u32 %rd157, %r9160;
add.s64 %rd158, %rd157, %rd5;
add.s64 %rd159, %rd1, %rd158;
st.global.u8 [%rd159], %r9157;
add.s32 %r9160, %r9160, 1;
setp.eq.s32 %p164, %r9157, 255;
selp.b32 %r9159, 7, 8, %p164;
mov.u32 %r9157, 0;
mov.u32 %r9158, %r9157;
$L__BB2_148:
setp.ne.s32 %p165, %r8576, 0;
mov.u32 %r8586, %r8571;
@%p165 bra $L__BB2_145;
$L__BB2_149:
setp.eq.s32 %p166, %r139, 0;
mov.u32 %r9186, %r8586;
@%p166 bra $L__BB2_156;
shr.u32 %r4423, %r252, 3;
sub.s32 %r8591, %r251, %r4423;
setp.eq.s32 %p167, %r8591, 0;
mov.u32 %r9186, %r8586;
@%p167 bra $L__BB2_156;
mov.u32 %r4424, -1;
shl.b32 %r4425, %r4424, %r8591;
not.b32 %r4426, %r4425;
and.b32 %r8592, %r8478, %r4426;
$L__BB2_152:
setp.gt.u32 %p168, %r9160, 17476;
mov.u32 %r9186, 1;
@%p168 bra $L__BB2_156;
sub.s32 %r4428, %r9159, %r9158;
min.u32 %r4429, %r4428, %r8591;
setp.eq.s32 %p169, %r4429, 32;
mov.u32 %r4430, -1;
shl.b32 %r4431, %r4430, %r4429;
not.b32 %r4432, %r4431;
selp.b32 %r4433, -1, %r4432, %p169;
and.b32 %r4434, %r4433, %r8592;
shl.b32 %r4435, %r4434, %r9158;
or.b32 %r9157, %r4435, %r9157;
add.s32 %r9158, %r4429, %r9158;
shr.u32 %r8592, %r8592, %r4429;
sub.s32 %r8591, %r8591, %r4429;
setp.lt.u32 %p170, %r9158, %r9159;
@%p170 bra $L__BB2_155;
cvt.u64.u32 %rd160, %r9160;
add.s64 %rd161, %rd160, %rd5;
add.s64 %rd162, %rd1, %rd161;
st.global.u8 [%rd162], %r9157;
add.s32 %r9160, %r9160, 1;
setp.eq.s32 %p171, %r9157, 255;
selp.b32 %r9159, 7, 8, %p171;
mov.u32 %r9157, 0;
mov.u32 %r9158, %r9157;
$L__BB2_155:
setp.ne.s32 %p172, %r8591, 0;
mov.u32 %r9186, %r8586;
@%p172 bra $L__BB2_152;
$L__BB2_156:
add.s32 %r4438, %r8439, 2;
setp.lt.u32 %p173, %r4438, %r4057;
mul.lo.s32 %r346, %r133, 6;
cvt.u64.u32 %rd163, %r346;
add.s64 %rd11, %rd9, %rd163;
add.s32 %r4439, %r346, 2;
cvt.u64.u32 %rd164, %r4439;
add.s64 %rd12, %rd9, %rd164;
@%p173 bra $L__BB2_185;
bra.uni $L__BB2_157;
$L__BB2_185:
cvt.u64.u32 %rd178, %r8440;
add.s64 %rd179, %rd178, %rd4;
shl.b64 %rd180, %rd179, 2;
add.s64 %rd181, %rd3, %rd180;
ld.global.u32 %r419, [%rd181];
setp.eq.s32 %p210, %r419, 0;
mov.u32 %r8651, 0;
mov.u32 %r8650, %r8651;
@%p210 bra $L__BB2_187;
and.b32 %r4510, %r419, -2147483648;
abs.s32 %r4511, %r419;
shl.b32 %r4512, %r4511, %r60;
or.b32 %r8650, %r4512, %r4510;
$L__BB2_187:
shl.b32 %r4516, %r8650, 1;
shr.u32 %r4517, %r4516, %r46;
and.b32 %r422, %r4517, -2;
setp.eq.s32 %p211, %r422, 0;
mov.u32 %r8652, %r8651;
mov.u32 %r8658, %r8651;
@%p211 bra $L__BB2_189;
add.s32 %r4519, %r422, -1;
clz.b32 %r4520, %r4519;
mov.u32 %r4521, 32;
sub.s32 %r8651, %r4521, %r4520;
shr.u32 %r4522, %r8650, 31;
add.s32 %r4523, %r4522, %r422;
add.s32 %r8652, %r4523, -2;
mov.u32 %r8658, 1;
$L__BB2_189:
mov.u32 %r8655, 0;
mov.u32 %r8654, %r8655;
@%p63 bra $L__BB2_192;
add.s32 %r4526, %r8440, %r4055;
cvt.u64.u32 %rd182, %r4526;
add.s64 %rd183, %rd182, %rd4;
shl.b64 %rd184, %rd183, 2;
add.s64 %rd185, %rd3, %rd184;
ld.global.u32 %r428, [%rd185];
setp.eq.s32 %p213, %r428, 0;
@%p213 bra $L__BB2_192;
and.b32 %r4527, %r428, -2147483648;
abs.s32 %r4528, %r428;
shl.b32 %r4529, %r4528, %r60;
or.b32 %r8654, %r4529, %r4527;
$L__BB2_192:
shl.b32 %r4532, %r8654, 1;
shr.u32 %r4533, %r4532, %r46;
and.b32 %r431, %r4533, -2;
setp.eq.s32 %p214, %r431, 0;
mov.u32 %r8656, %r8655;
mov.u32 %r8673, %r8651;
@%p214 bra $L__BB2_194;
or.b32 %r8658, %r8658, 2;
add.s32 %r4534, %r431, -1;
clz.b32 %r4535, %r4534;
mov.u32 %r4536, 32;
sub.s32 %r8655, %r4536, %r4535;
max.s32 %r8673, %r8651, %r8655;
shr.u32 %r4537, %r8654, 31;
add.s32 %r4538, %r4537, %r431;
add.s32 %r8656, %r4538, -2;
$L__BB2_194:
add.s32 %r8675, %r8440, 1;
add.s32 %r4543, %r8439, 3;
setp.ge.u32 %p215, %r4543, %r4057;
mov.u32 %r8676, 0;
mov.u32 %r8669, %r8676;
mov.u32 %r8670, %r8676;
mov.u32 %r8671, %r8676;
mov.u32 %r8672, %r8676;
@%p215 bra $L__BB2_205;
cvt.u64.u32 %rd186, %r8675;
add.s64 %rd187, %rd186, %rd4;
shl.b64 %rd188, %rd187, 2;
add.s64 %rd189, %rd3, %rd188;
ld.global.u32 %r441, [%rd189];
setp.eq.s32 %p216, %r441, 0;
mov.u32 %r8670, 0;
mov.u32 %r8659, %r8670;
@%p216 bra $L__BB2_197;
and.b32 %r4545, %r441, -2147483648;
abs.s32 %r4546, %r441;
shl.b32 %r4547, %r4546, %r60;
or.b32 %r8659, %r4547, %r4545;
$L__BB2_197:
shl.b32 %r4550, %r8659, 1;
shr.u32 %r4551, %r4550, %r46;
and.b32 %r444, %r4551, -2;
setp.eq.s32 %p217, %r444, 0;
mov.u32 %r8672, %r8670;
@%p217 bra $L__BB2_199;
or.b32 %r8658, %r8658, 4;
add.s32 %r4552, %r444, -1;
clz.b32 %r4553, %r4552;
mov.u32 %r4554, 32;
sub.s32 %r8670, %r4554, %r4553;
max.s32 %r8673, %r8673, %r8670;
shr.u32 %r4555, %r8659, 31;
add.s32 %r4556, %r4555, %r444;
add.s32 %r8672, %r4556, -2;
$L__BB2_199:
mov.u32 %r8669, 0;
mov.u32 %r8664, %r8669;
@%p63 bra $L__BB2_202;
add.s32 %r4559, %r8675, %r4055;
cvt.u64.u32 %rd190, %r4559;
add.s64 %rd191, %rd190, %rd4;
shl.b64 %rd192, %rd191, 2;
add.s64 %rd193, %rd3, %rd192;
ld.global.u32 %r453, [%rd193];
setp.eq.s32 %p219, %r453, 0;
@%p219 bra $L__BB2_202;
and.b32 %r4560, %r453, -2147483648;
abs.s32 %r4561, %r453;
shl.b32 %r4562, %r4561, %r60;
or.b32 %r8664, %r4562, %r4560;
$L__BB2_202:
shl.b32 %r4565, %r8664, 1;
shr.u32 %r4566, %r4565, %r46;
and.b32 %r456, %r4566, -2;
setp.eq.s32 %p220, %r456, 0;
mov.u32 %r8671, %r8669;
@%p220 bra $L__BB2_204;
or.b32 %r8658, %r8658, 8;
add.s32 %r4567, %r456, -1;
clz.b32 %r4568, %r4567;
mov.u32 %r4569, 32;
sub.s32 %r8669, %r4569, %r4568;
max.s32 %r8673, %r8673, %r8669;
shr.u32 %r4570, %r8664, 31;
add.s32 %r4571, %r4570, %r456;
add.s32 %r8671, %r4571, -2;
$L__BB2_204:
add.s32 %r8675, %r8440, 2;
$L__BB2_205:
mov.u32 %r8440, %r8675;
shr.u32 %r4573, %r8465, 1;
or.b32 %r473, %r4573, %r253;
add.s32 %r4574, %r8673, -1;
setp.lt.s32 %p221, %r8673, 2;
setp.gt.s32 %p222, %r8673, 1;
selp.b32 %r474, %r4574, 0, %p222;
@%p221 bra $L__BB2_207;
setp.eq.s32 %p223, %r8651, %r8673;
selp.u32 %r4575, 1, 0, %p223;
setp.eq.s32 %p224, %r8655, %r8673;
selp.u32 %r4576, -1, 0, %p224;
bfi.b32 %r4577, %r4576, %r4575, 1, 1;
setp.eq.s32 %p225, %r8670, %r8673;
selp.u16 %rs571, 1, 0, %p225;
mul.wide.u16 %r4578, %rs571, 4;
or.b32 %r4579, %r4577, %r4578;
setp.eq.s32 %p226, %r8669, %r8673;
selp.u16 %rs572, 1, 0, %p226;
mul.wide.u16 %r4580, %rs572, 8;
or.b32 %r8676, %r4579, %r4580;
$L__BB2_207:
and.b32 %r4581, %r8655, 255;
and.b32 %r4582, %r8476, 255;
setp.lt.u32 %p227, %r4581, %r4582;
cvt.u16.u32 %rs573, %r8655;
selp.b16 %rs574, %rs3, %rs573, %p227;
st.shared.u8 [%r136+1], %rs574;
st.shared.u8 [%r136+2], %r8669;
and.b32 %r477, %r8658, 2;
shr.u32 %r4583, %r477, 1;
or.b32 %r4584, %r140, %r4583;
st.shared.u8 [%r138+1], %r4584;
and.b32 %r478, %r8658, 8;
shr.u32 %r4585, %r478, 3;
st.shared.u8 [%r138+2], %r4585;
shl.b32 %r4586, %r8658, 4;
shl.b32 %r4587, %r473, 8;
or.b32 %r4588, %r4586, %r4587;
or.b32 %r4589, %r4588, %r8676;
mul.wide.u32 %rd195, %r4589, 2;
add.s64 %rd196, %rd8, %rd195;
ld.global.u16 %rs48, [%rd196];
shr.u16 %rs575, %rs48, 4;
and.b16 %rs49, %rs575, 7;
setp.eq.s16 %p228, %rs49, 0;
mov.u32 %r8688, %r8495;
@%p228 bra $L__BB2_214;
cvt.u32.u16 %r8677, %rs49;
shr.u16 %rs576, %rs48, 8;
cvt.u32.u16 %r8678, %rs576;
$L__BB2_209:
mov.u32 %r481, %r8677;
setp.gt.u32 %p229, %r8972, 2879;
mov.u32 %r8688, 1;
@%p229 bra $L__BB2_214;
mov.u32 %r4591, 8;
sub.s32 %r4592, %r4591, %r8970;
sub.s32 %r4593, %r4592, %r8971;
min.u32 %r4594, %r4593, %r481;
setp.eq.s32 %p230, %r4594, 32;
mov.u32 %r4595, -1;
shl.b32 %r4596, %r4595, %r4594;
not.b32 %r4597, %r4596;
selp.b32 %r4598, -1, %r4597, %p230;
and.b32 %r4599, %r4598, %r8678;
shl.b32 %r4600, %r4599, %r8971;
cvt.u16.u32 %rs577, %r4600;
or.b16 %rs1165, %rs1165, %rs577;
add.s32 %r8971, %r4594, %r8971;
sub.s32 %r8677, %r481, %r4594;
shr.u32 %r8678, %r8678, %r4594;
setp.gt.u32 %p231, %r4593, %r481;
@%p231 bra $L__BB2_213;
setp.ne.s32 %p232, %r8970, 0;
mov.u32 %r8970, 0;
and.b16 %rs578, %rs1165, 255;
setp.ne.s16 %p233, %rs578, 127;
and.pred %p234, %p232, %p233;
@%p234 bra $L__BB2_213;
mov.u32 %r4603, 20548;
sub.s32 %r4604, %r4603, %r8972;
cvt.u64.u32 %rd197, %r4604;
add.s64 %rd198, %rd197, %rd5;
add.s64 %rd199, %rd1, %rd198;
st.global.u8 [%rd199], %rs1165;
add.s32 %r8972, %r8972, 1;
setp.gt.u16 %p235, %rs578, 143;
selp.u32 %r8970, 1, 0, %p235;
mov.u32 %r8971, 0;
mov.u16 %rs1165, 0;
$L__BB2_213:
setp.ne.s32 %p236, %r8677, 0;
mov.u32 %r8688, %r8495;
@%p236 bra $L__BB2_209;
$L__BB2_214:
setp.ne.s32 %p237, %r473, 0;
@%p237 bra $L__BB2_262;
setp.eq.s32 %p238, %r8658, 0;
add.s32 %r4605, %r8524, 17477;
cvt.u64.u32 %rd200, %r4605;
add.s64 %rd201, %rd200, %rd5;
add.s64 %rd13, %rd1, %rd201;
@%p238 bra $L__BB2_254;
shl.b16 %rs1096, %rs1096, 1;
add.s32 %r8530, %r8530, -1;
setp.ne.s32 %p239, %r8530, 0;
mov.u32 %r8722, %r8733;
@%p239 bra $L__BB2_219;
setp.gt.u32 %p240, %r8524, 191;
mov.u32 %r8530, 0;
mov.u32 %r8722, 1;
@%p240 bra $L__BB2_219;
st.global.u8 [%rd13], %rs1096;
add.s32 %r8524, %r8524, 1;
mov.u32 %r8530, 8;
mov.u16 %rs1096, 0;
mov.u32 %r8722, %r8733;
$L__BB2_219:
setp.lt.u32 %p241, %r8735, 3;
mov.u32 %r8692, 0;
@%p241 bra $L__BB2_222;
setp.lt.u32 %p242, %r8735, 6;
mov.u32 %r8692, 1;
@%p242 bra $L__BB2_222;
setp.lt.u32 %p243, %r8735, 9;
setp.eq.s32 %p244, %r8735, 11;
selp.b32 %r4611, 4, 5, %p244;
setp.lt.u32 %p245, %r8735, 11;
selp.b32 %r4612, 3, %r4611, %p245;
selp.b32 %r8692, 2, %r4612, %p243;
$L__BB2_222:
setp.eq.s32 %p246, %r8692, 0;
@%p246 bra $L__BB2_250;
add.s32 %r505, %r8692, -1;
and.b32 %r506, %r8692, 3;
setp.eq.s32 %p247, %r506, 0;
mov.u32 %r8702, %r8692;
mov.u32 %r8705, %r8722;
@%p247 bra $L__BB2_235;
mov.u32 %r4614, 1;
shl.b32 %r4615, %r4614, %r505;
and.b32 %r4616, %r4615, %r8736;
setp.ne.s32 %p248, %r4616, 0;
selp.u32 %r4617, 1, 0, %p248;
cvt.u32.u16 %r4618, %rs1096;
bfi.b32 %r4619, %r4618, %r4617, 1, 8;
cvt.u16.u32 %rs1096, %r4619;
add.s32 %r8530, %r8530, -1;
setp.ne.s32 %p249, %r8530, 0;
mov.u32 %r8705, %r8722;
@%p249 bra $L__BB2_227;
setp.gt.u32 %p250, %r8524, 191;
mov.u32 %r8530, 0;
mov.u32 %r8705, %r4614;
@%p250 bra $L__BB2_227;
add.s32 %r4623, %r8524, 17477;
cvt.u64.u32 %rd202, %r4623;
add.s64 %rd203, %rd202, %rd5;
add.s64 %rd204, %rd1, %rd203;
st.global.u8 [%rd204], %rs1096;
add.s32 %r8524, %r8524, 1;
mov.u32 %r8530, 8;
mov.u16 %rs1096, 0;
mov.u32 %r8705, %r8722;
$L__BB2_227:
setp.eq.s32 %p251, %r506, 1;
mov.u32 %r8722, %r8705;
mov.u32 %r8702, %r505;
@%p251 bra $L__BB2_235;
add.s32 %r8702, %r8692, -2;
mov.u32 %r4624, 1;
shl.b32 %r4625, %r4624, %r8702;
and.b32 %r4626, %r4625, %r8736;
setp.ne.s32 %p252, %r4626, 0;
selp.u32 %r4627, 1, 0, %p252;
cvt.u32.u16 %r4628, %rs1096;
bfi.b32 %r4629, %r4628, %r4627, 1, 8;
cvt.u16.u32 %rs1096, %r4629;
add.s32 %r8530, %r8530, -1;
setp.ne.s32 %p253, %r8530, 0;
mov.u32 %r8696, %r8705;
@%p253 bra $L__BB2_231;
setp.gt.u32 %p254, %r8524, 191;
mov.u32 %r8530, 0;
mov.u32 %r8696, %r4624;
@%p254 bra $L__BB2_231;
add.s32 %r4632, %r8524, 17477;
cvt.u64.u32 %rd205, %r4632;
add.s64 %rd206, %rd205, %rd5;
add.s64 %rd207, %rd1, %rd206;
and.b16 %rs585, %rs1096, 255;
st.global.u8 [%rd207], %rs1096;
add.s32 %r8524, %r8524, 1;
setp.eq.s16 %p255, %rs585, 255;
selp.b32 %r8530, 7, 8, %p255;
mov.u16 %rs1096, 0;
mov.u32 %r8696, %r8705;
$L__BB2_231:
setp.eq.s32 %p256, %r506, 2;
mov.u32 %r8722, %r8696;
mov.u32 %r8705, %r8696;
@%p256 bra $L__BB2_235;
add.s32 %r8702, %r8692, -3;
mov.u32 %r4633, 1;
shl.b32 %r4634, %r4633, %r8702;
and.b32 %r4635, %r4634, %r8736;
setp.ne.s32 %p257, %r4635, 0;
selp.u32 %r4636, 1, 0, %p257;
cvt.u32.u16 %r4637, %rs1096;
bfi.b32 %r4638, %r4637, %r4636, 1, 8;
cvt.u16.u32 %rs1096, %r4638;
add.s32 %r8530, %r8530, -1;
setp.ne.s32 %p258, %r8530, 0;
mov.u32 %r8722, %r8696;
mov.u32 %r8705, %r8696;
@%p258 bra $L__BB2_235;
setp.gt.u32 %p259, %r8524, 191;
mov.u32 %r8530, 0;
mov.u32 %r8722, %r4633;
mov.u32 %r8705, %r4633;
@%p259 bra $L__BB2_235;
add.s32 %r4643, %r8524, 17477;
cvt.u64.u32 %rd208, %r4643;
add.s64 %rd209, %rd208, %rd5;
add.s64 %rd210, %rd1, %rd209;
and.b16 %rs588, %rs1096, 255;
st.global.u8 [%rd210], %rs1096;
add.s32 %r8524, %r8524, 1;
setp.eq.s16 %p260, %rs588, 255;
selp.b32 %r8530, 7, 8, %p260;
mov.u16 %rs1096, 0;
mov.u32 %r8722, %r8696;
mov.u32 %r8705, %r8696;
$L__BB2_235:
setp.lt.u32 %p261, %r505, 3;
@%p261 bra $L__BB2_250;
mov.u32 %r8722, %r8705;
$L__BB2_237:
add.s32 %r4644, %r8702, -1;
mov.u32 %r4645, 1;
shl.b32 %r4646, %r4645, %r4644;
and.b32 %r4647, %r4646, %r8736;
setp.ne.s32 %p262, %r4647, 0;
selp.u32 %r4648, 1, 0, %p262;
cvt.u32.u16 %r4649, %rs1096;
bfi.b32 %r8711, %r4649, %r4648, 1, 8;
add.s32 %r8712, %r8530, -1;
setp.ne.s32 %p263, %r8712, 0;
mov.u32 %r8710, %r8722;
@%p263 bra $L__BB2_240;
setp.gt.u32 %p264, %r8524, 191;
mov.u32 %r8712, 0;
mov.u32 %r8710, %r4645;
@%p264 bra $L__BB2_240;
cvt.u16.u32 %rs589, %r8711;
and.b16 %rs590, %rs589, 255;
add.s32 %r4653, %r8524, 17477;
cvt.u64.u32 %rd211, %r4653;
add.s64 %rd212, %rd211, %rd5;
add.s64 %rd213, %rd1, %rd212;
st.global.u8 [%rd213], %rs589;
add.s32 %r8524, %r8524, 1;
setp.eq.s16 %p265, %rs590, 255;
selp.b32 %r8712, 7, 8, %p265;
mov.u32 %r8711, 0;
mov.u32 %r8710, %r8722;
$L__BB2_240:
add.s32 %r4654, %r8702, -2;
shl.b32 %r4656, %r4645, %r4654;
and.b32 %r4657, %r4656, %r8736;
setp.ne.s32 %p266, %r4657, 0;
and.b32 %r4658, %r8711, 127;
selp.u32 %r4659, 1, 0, %p266;
bfi.b32 %r8715, %r4658, %r4659, 1, 7;
add.s32 %r8716, %r8712, -1;
setp.ne.s32 %p267, %r8716, 0;
mov.u32 %r8714, %r8710;
@%p267 bra $L__BB2_243;
setp.gt.u32 %p268, %r8524, 191;
mov.u32 %r8716, 0;
mov.u32 %r8714, 1;
@%p268 bra $L__BB2_243;
cvt.u16.u32 %rs591, %r8715;
and.b16 %rs592, %rs591, 255;
add.s32 %r4663, %r8524, 17477;
cvt.u64.u32 %rd214, %r4663;
add.s64 %rd215, %rd214, %rd5;
add.s64 %rd216, %rd1, %rd215;
st.global.u8 [%rd216], %rs591;
add.s32 %r8524, %r8524, 1;
setp.eq.s16 %p269, %rs592, 255;
selp.b32 %r8716, 7, 8, %p269;
mov.u32 %r8715, 0;
mov.u32 %r8714, %r8710;
$L__BB2_243:
add.s32 %r4664, %r8702, -3;
mov.u32 %r4665, 1;
shl.b32 %r4666, %r4665, %r4664;
and.b32 %r4667, %r4666, %r8736;
setp.ne.s32 %p270, %r4667, 0;
and.b32 %r4668, %r8715, 127;
selp.u32 %r4669, 1, 0, %p270;
bfi.b32 %r8719, %r4668, %r4669, 1, 7;
add.s32 %r8720, %r8716, -1;
setp.ne.s32 %p271, %r8720, 0;
mov.u32 %r8718, %r8714;
@%p271 bra $L__BB2_246;
setp.gt.u32 %p272, %r8524, 191;
mov.u32 %r8720, 0;
mov.u32 %r8718, %r4665;
@%p272 bra $L__BB2_246;
cvt.u16.u32 %rs593, %r8719;
and.b16 %rs594, %rs593, 255;
add.s32 %r4673, %r8524, 17477;
cvt.u64.u32 %rd217, %r4673;
add.s64 %rd218, %rd217, %rd5;
add.s64 %rd219, %rd1, %rd218;
st.global.u8 [%rd219], %rs593;
add.s32 %r8524, %r8524, 1;
setp.eq.s16 %p273, %rs594, 255;
selp.b32 %r8720, 7, 8, %p273;
mov.u32 %r8719, 0;
mov.u32 %r8718, %r8714;
$L__BB2_246:
add.s32 %r8702, %r8702, -4;
shl.b32 %r4675, %r4665, %r8702;
and.b32 %r4676, %r4675, %r8736;
setp.ne.s32 %p274, %r4676, 0;
and.b32 %r4677, %r8719, 127;
selp.u32 %r4678, 1, 0, %p274;
bfi.b32 %r4679, %r4677, %r4678, 1, 15;
cvt.u16.u32 %rs1096, %r4679;
add.s32 %r8530, %r8720, -1;
setp.ne.s32 %p275, %r8530, 0;
mov.u32 %r8722, %r8718;
@%p275 bra $L__BB2_249;
setp.gt.u32 %p276, %r8524, 191;
mov.u32 %r8530, 0;
mov.u32 %r8722, 1;
@%p276 bra $L__BB2_249;
add.s32 %r4682, %r8524, 17477;
cvt.u64.u32 %rd220, %r4682;
add.s64 %rd221, %rd220, %rd5;
add.s64 %rd222, %rd1, %rd221;
and.b16 %rs596, %rs1096, 255;
st.global.u8 [%rd222], %rs1096;
add.s32 %r8524, %r8524, 1;
setp.eq.s16 %p277, %rs596, 255;
selp.b32 %r8530, 7, 8, %p277;
mov.u16 %rs1096, 0;
mov.u32 %r8722, %r8718;
$L__BB2_249:
setp.ne.s32 %p278, %r8702, 0;
@%p278 bra $L__BB2_237;
$L__BB2_250:
add.s32 %r4684, %r8735, -1;
setp.eq.s32 %p279, %r8735, 0;
mov.u32 %r8736, 0;
selp.b32 %r8735, 0, %r4684, %p279;
setp.lt.u32 %p280, %r8735, 3;
mov.u32 %r8728, %r8736;
@%p280 bra $L__BB2_253;
setp.lt.u32 %p281, %r8735, 6;
mov.u32 %r8728, 1;
@%p281 bra $L__BB2_253;
setp.lt.u32 %p282, %r8735, 9;
setp.eq.s32 %p283, %r8735, 11;
selp.b32 %r4686, 4, 5, %p283;
setp.lt.u32 %p284, %r8735, 11;
selp.b32 %r4687, 3, %r4686, %p284;
selp.b32 %r8728, 2, %r4687, %p282;
$L__BB2_253:
mov.u32 %r4689, 1;
shl.b32 %r8734, %r4689, %r8728;
mov.u32 %r8733, %r8722;
bra.uni $L__BB2_262;
$L__BB2_157:
ld.global.u8 %rs26, [%rd11+1];
ld.global.u8 %rs27, [%rd12];
ld.global.u8 %rs28, [%rd12+1];
ld.global.u8 %rs29, [%rd9];
ld.global.u8 %rs30, [%rd9+1];
ld.global.u8 %rs31, [%rd9+2];
ld.global.u8 %rs32, [%rd9+3];
setp.eq.s16 %p174, %rs26, 0;
mov.u32 %r8617, %r8495;
@%p174 bra $L__BB2_164;
ld.global.u8 %r8607, [%rd11];
cvt.u32.u16 %r8606, %rs26;
$L__BB2_159:
mov.u32 %r349, %r8606;
setp.gt.u32 %p175, %r8972, 2879;
mov.u32 %r8617, 1;
@%p175 bra $L__BB2_164;
mov.u32 %r4441, 8;
sub.s32 %r4442, %r4441, %r8970;
sub.s32 %r4443, %r4442, %r8971;
min.u32 %r4444, %r4443, %r349;
setp.eq.s32 %p176, %r4444, 32;
mov.u32 %r4445, -1;
shl.b32 %r4446, %r4445, %r4444;
not.b32 %r4447, %r4446;
selp.b32 %r4448, -1, %r4447, %p176;
and.b32 %r4449, %r4448, %r8607;
shl.b32 %r4450, %r4449, %r8971;
cvt.u16.u32 %rs552, %r4450;
or.b16 %rs1165, %rs1165, %rs552;
add.s32 %r8971, %r4444, %r8971;
sub.s32 %r8606, %r349, %r4444;
shr.u32 %r8607, %r8607, %r4444;
setp.gt.u32 %p177, %r4443, %r349;
@%p177 bra $L__BB2_163;
setp.ne.s32 %p178, %r8970, 0;
mov.u32 %r8970, 0;
and.b16 %rs553, %rs1165, 255;
setp.ne.s16 %p179, %rs553, 127;
and.pred %p180, %p178, %p179;
@%p180 bra $L__BB2_163;
mov.u32 %r4453, 20548;
sub.s32 %r4454, %r4453, %r8972;
cvt.u64.u32 %rd166, %r4454;
add.s64 %rd167, %rd166, %rd5;
add.s64 %rd168, %rd1, %rd167;
st.global.u8 [%rd168], %rs1165;
add.s32 %r8972, %r8972, 1;
setp.gt.u16 %p181, %rs553, 143;
selp.u32 %r8970, 1, 0, %p181;
mov.u32 %r8971, 0;
mov.u16 %rs1165, 0;
$L__BB2_163:
setp.ne.s32 %p182, %r8606, 0;
mov.u32 %r8617, %r8495;
@%p182 bra $L__BB2_159;
$L__BB2_164:
setp.eq.s16 %p183, %rs30, 0;
mov.u32 %r8629, %r8617;
@%p183 bra $L__BB2_171;
cvt.u32.u16 %r4455, %rs29;
and.b32 %r8619, %r4455, 255;
cvt.u32.u16 %r4456, %rs30;
and.b32 %r8618, %r4456, 255;
$L__BB2_166:
mov.u32 %r368, %r8618;
setp.gt.u32 %p184, %r8972, 2879;
mov.u32 %r8629, 1;
@%p184 bra $L__BB2_171;
mov.u32 %r4458, 8;
sub.s32 %r4459, %r4458, %r8970;
sub.s32 %r4460, %r4459, %r8971;
min.u32 %r4461, %r4460, %r368;
setp.eq.s32 %p185, %r4461, 32;
mov.u32 %r4462, -1;
shl.b32 %r4463, %r4462, %r4461;
not.b32 %r4464, %r4463;
selp.b32 %r4465, -1, %r4464, %p185;
and.b32 %r4466, %r4465, %r8619;
shl.b32 %r4467, %r4466, %r8971;
cvt.u16.u32 %rs557, %r4467;
or.b16 %rs1165, %rs1165, %rs557;
add.s32 %r8971, %r4461, %r8971;
sub.s32 %r8618, %r368, %r4461;
shr.u32 %r8619, %r8619, %r4461;
setp.gt.u32 %p186, %r4460, %r368;
@%p186 bra $L__BB2_170;
setp.ne.s32 %p187, %r8970, 0;
mov.u32 %r8970, 0;
and.b16 %rs558, %rs1165, 255;
setp.ne.s16 %p188, %rs558, 127;
and.pred %p189, %p187, %p188;
@%p189 bra $L__BB2_170;
mov.u32 %r4470, 20548;
sub.s32 %r4471, %r4470, %r8972;
cvt.u64.u32 %rd169, %r4471;
add.s64 %rd170, %rd169, %rd5;
add.s64 %rd171, %rd1, %rd170;
st.global.u8 [%rd171], %rs1165;
add.s32 %r8972, %r8972, 1;
setp.gt.u16 %p190, %rs558, 143;
selp.u32 %r8970, 1, 0, %p190;
mov.u32 %r8971, 0;
mov.u16 %rs1165, 0;
$L__BB2_170:
setp.ne.s32 %p191, %r8618, 0;
mov.u32 %r8629, %r8617;
@%p191 bra $L__BB2_166;
$L__BB2_171:
setp.eq.s16 %p192, %rs28, 0;
mov.u32 %r8641, %r8629;
@%p192 bra $L__BB2_178;
cvt.u32.u16 %r4472, %rs28;
and.b32 %r8630, %r4472, 255;
cvt.u32.u16 %r4473, %rs27;
and.b32 %r8631, %r4473, 255;
$L__BB2_173:
mov.u32 %r387, %r8630;
setp.gt.u32 %p193, %r8972, 2879;
mov.u32 %r8641, 1;
@%p193 bra $L__BB2_178;
mov.u32 %r4475, 8;
sub.s32 %r4476, %r4475, %r8970;
sub.s32 %r4477, %r4476, %r8971;
min.u32 %r4478, %r4477, %r387;
setp.eq.s32 %p194, %r4478, 32;
mov.u32 %r4479, -1;
shl.b32 %r4480, %r4479, %r4478;
not.b32 %r4481, %r4480;
selp.b32 %r4482, -1, %r4481, %p194;
and.b32 %r4483, %r4482, %r8631;
shl.b32 %r4484, %r4483, %r8971;
cvt.u16.u32 %rs562, %r4484;
or.b16 %rs1165, %rs1165, %rs562;
add.s32 %r8971, %r4478, %r8971;
sub.s32 %r8630, %r387, %r4478;
shr.u32 %r8631, %r8631, %r4478;
setp.gt.u32 %p195, %r4477, %r387;
@%p195 bra $L__BB2_177;
setp.ne.s32 %p196, %r8970, 0;
mov.u32 %r8970, 0;
and.b16 %rs563, %rs1165, 255;
setp.ne.s16 %p197, %rs563, 127;
and.pred %p198, %p196, %p197;
@%p198 bra $L__BB2_177;
mov.u32 %r4487, 20548;
sub.s32 %r4488, %r4487, %r8972;
cvt.u64.u32 %rd172, %r4488;
add.s64 %rd173, %rd172, %rd5;
add.s64 %rd174, %rd1, %rd173;
st.global.u8 [%rd174], %rs1165;
add.s32 %r8972, %r8972, 1;
setp.gt.u16 %p199, %rs563, 143;
selp.u32 %r8970, 1, 0, %p199;
mov.u32 %r8971, 0;
mov.u16 %rs1165, 0;
$L__BB2_177:
setp.ne.s32 %p200, %r8630, 0;
mov.u32 %r8641, %r8629;
@%p200 bra $L__BB2_173;
$L__BB2_178:
setp.eq.s16 %p201, %rs32, 0;
mov.u32 %r8441, 0;
mov.u32 %r8969, %r8641;
@%p201 bra $L__BB2_416;
cvt.u32.u16 %r4490, %rs31;
and.b32 %r8643, %r4490, 255;
cvt.u32.u16 %r4491, %rs32;
and.b32 %r8642, %r4491, 255;
$L__BB2_180:
mov.u32 %r406, %r8642;
setp.gt.u32 %p202, %r8972, 2879;
mov.u32 %r8969, 1;
@%p202 bra $L__BB2_416;
mov.u32 %r4494, 8;
sub.s32 %r4495, %r4494, %r8970;
sub.s32 %r4496, %r4495, %r8971;
min.u32 %r4497, %r4496, %r406;
setp.eq.s32 %p203, %r4497, 32;
mov.u32 %r4498, -1;
shl.b32 %r4499, %r4498, %r4497;
not.b32 %r4500, %r4499;
selp.b32 %r4501, -1, %r4500, %p203;
and.b32 %r4502, %r4501, %r8643;
shl.b32 %r4503, %r4502, %r8971;
cvt.u16.u32 %rs567, %r4503;
or.b16 %rs1165, %rs1165, %rs567;
add.s32 %r8971, %r4497, %r8971;
sub.s32 %r8642, %r406, %r4497;
shr.u32 %r8643, %r8643, %r4497;
setp.gt.u32 %p204, %r4496, %r406;
@%p204 bra $L__BB2_184;
setp.ne.s32 %p205, %r8970, 0;
mov.u32 %r8970, 0;
and.b16 %rs568, %rs1165, 255;
setp.ne.s16 %p206, %rs568, 127;
and.pred %p207, %p205, %p206;
@%p207 bra $L__BB2_184;
mov.u32 %r4506, 20548;
sub.s32 %r4507, %r4506, %r8972;
cvt.u64.u32 %rd175, %r4507;
add.s64 %rd176, %rd175, %rd5;
add.s64 %rd177, %rd1, %rd176;
st.global.u8 [%rd177], %rs1165;
add.s32 %r8972, %r8972, 1;
setp.gt.u16 %p208, %rs568, 143;
selp.u32 %r8970, 1, 0, %p208;
mov.u32 %r8971, 0;
mov.u16 %rs1165, 0;
$L__BB2_184:
setp.eq.s32 %p209, %r8642, 0;
mov.u32 %r8969, %r8641;
@%p209 bra $L__BB2_416;
bra.uni $L__BB2_180;
$L__BB2_83:
setp.gt.u32 %p91, %r8524, 191;
mov.u32 %r8530, 0;
mov.u32 %r8529, 1;
@%p91 bra $L__BB2_85;
st.global.u8 [%rd10], %rs1096;
add.s32 %r8524, %r8524, 1;
mov.u32 %r8530, 8;
mov.u16 %rs1096, 0;
mov.u32 %r8529, %r8733;
bra.uni $L__BB2_85;
$L__BB2_254:
add.s32 %r8736, %r8736, 1;
setp.lt.u32 %p285, %r8736, %r8734;
@%p285 bra $L__BB2_262;
shl.b16 %rs597, %rs1096, 1;
or.b16 %rs1096, %rs597, 1;
add.s32 %r8530, %r8530, -1;
setp.ne.s32 %p286, %r8530, 0;
mov.u32 %r8729, %r8733;
@%p286 bra $L__BB2_258;
bra.uni $L__BB2_256;
$L__BB2_258:
add.s32 %r4693, %r8735, 1;
min.u32 %r8735, %r4693, 12;
setp.lt.u32 %p289, %r8735, 3;
mov.u32 %r8736, 0;
mov.u32 %r8732, %r8736;
@%p289 bra $L__BB2_261;
setp.lt.u32 %p290, %r8735, 6;
mov.u32 %r8732, 1;
@%p290 bra $L__BB2_261;
setp.lt.u32 %p291, %r8735, 9;
setp.eq.s32 %p292, %r8735, 11;
selp.b32 %r4695, 4, 5, %p292;
setp.lt.u32 %p293, %r8735, 11;
selp.b32 %r4696, 3, %r4695, %p293;
selp.b32 %r8732, 2, %r4696, %p291;
$L__BB2_261:
mov.u32 %r4698, 1;
shl.b32 %r8734, %r4698, %r8732;
mov.u32 %r8733, %r8729;
$L__BB2_262:
max.s32 %r589, %r8673, 1;
and.b16 %rs600, %rs48, 15;
cvt.u32.u16 %r590, %rs600;
and.b32 %r591, %r8658, 1;
setp.eq.s32 %p294, %r591, 0;
mov.u32 %r8749, %r9186;
@%p294 bra $L__BB2_269;
and.b32 %r4699, %r590, 1;
sub.s32 %r8739, %r589, %r4699;
setp.eq.s32 %p295, %r8739, 0;
mov.u32 %r8749, %r9186;
@%p295 bra $L__BB2_269;
mov.u32 %r4700, -1;
shl.b32 %r4701, %r4700, %r8739;
not.b32 %r4702, %r4701;
and.b32 %r8740, %r8652, %r4702;
$L__BB2_265:
setp.gt.u32 %p296, %r9160, 17476;
mov.u32 %r8749, 1;
@%p296 bra $L__BB2_269;
sub.s32 %r4704, %r9159, %r9158;
min.u32 %r4705, %r4704, %r8739;
setp.eq.s32 %p297, %r4705, 32;
mov.u32 %r4706, -1;
shl.b32 %r4707, %r4706, %r4705;
not.b32 %r4708, %r4707;
selp.b32 %r4709, -1, %r4708, %p297;
and.b32 %r4710, %r4709, %r8740;
shl.b32 %r4711, %r4710, %r9158;
or.b32 %r9157, %r4711, %r9157;
add.s32 %r9158, %r4705, %r9158;
shr.u32 %r8740, %r8740, %r4705;
sub.s32 %r8739, %r8739, %r4705;
setp.lt.u32 %p298, %r9158, %r9159;
@%p298 bra $L__BB2_268;
cvt.u64.u32 %rd223, %r9160;
add.s64 %rd224, %rd223, %rd5;
add.s64 %rd225, %rd1, %rd224;
st.global.u8 [%rd225], %r9157;
add.s32 %r9160, %r9160, 1;
setp.eq.s32 %p299, %r9157, 255;
selp.b32 %r9159, 7, 8, %p299;
mov.u32 %r9157, 0;
mov.u32 %r9158, %r9157;
$L__BB2_268:
setp.ne.s32 %p300, %r8739, 0;
mov.u32 %r8749, %r9186;
@%p300 bra $L__BB2_265;
$L__BB2_269:
setp.eq.s32 %p301, %r477, 0;
mov.u32 %r8764, %r8749;
@%p301 bra $L__BB2_276;
shr.u32 %r4714, %r590, 1;
and.b32 %r4715, %r4714, 1;
sub.s32 %r8754, %r589, %r4715;
setp.eq.s32 %p302, %r8754, 0;
mov.u32 %r8764, %r8749;
@%p302 bra $L__BB2_276;
mov.u32 %r4716, -1;
shl.b32 %r4717, %r4716, %r8754;
not.b32 %r4718, %r4717;
and.b32 %r8755, %r8656, %r4718;
$L__BB2_272:
setp.gt.u32 %p303, %r9160, 17476;
mov.u32 %r8764, 1;
@%p303 bra $L__BB2_276;
sub.s32 %r4720, %r9159, %r9158;
min.u32 %r4721, %r4720, %r8754;
setp.eq.s32 %p304, %r4721, 32;
mov.u32 %r4722, -1;
shl.b32 %r4723, %r4722, %r4721;
not.b32 %r4724, %r4723;
selp.b32 %r4725, -1, %r4724, %p304;
and.b32 %r4726, %r4725, %r8755;
shl.b32 %r4727, %r4726, %r9158;
or.b32 %r9157, %r4727, %r9157;
add.s32 %r9158, %r4721, %r9158;
shr.u32 %r8755, %r8755, %r4721;
sub.s32 %r8754, %r8754, %r4721;
setp.lt.u32 %p305, %r9158, %r9159;
@%p305 bra $L__BB2_275;
cvt.u64.u32 %rd226, %r9160;
add.s64 %rd227, %rd226, %rd5;
add.s64 %rd228, %rd1, %rd227;
st.global.u8 [%rd228], %r9157;
add.s32 %r9160, %r9160, 1;
setp.eq.s32 %p306, %r9157, 255;
selp.b32 %r9159, 7, 8, %p306;
mov.u32 %r9157, 0;
mov.u32 %r9158, %r9157;
$L__BB2_275:
setp.ne.s32 %p307, %r8754, 0;
mov.u32 %r8764, %r8749;
@%p307 bra $L__BB2_272;
$L__BB2_276:
and.b32 %r4730, %r8658, 4;
setp.eq.s32 %p308, %r4730, 0;
mov.u32 %r8779, %r8764;
@%p308 bra $L__BB2_283;
shr.u32 %r4731, %r590, 2;
and.b32 %r4732, %r4731, 1;
sub.s32 %r8769, %r589, %r4732;
setp.eq.s32 %p309, %r8769, 0;
mov.u32 %r8779, %r8764;
@%p309 bra $L__BB2_283;
mov.u32 %r4733, -1;
shl.b32 %r4734, %r4733, %r8769;
not.b32 %r4735, %r4734;
and.b32 %r8770, %r8672, %r4735;
$L__BB2_279:
setp.gt.u32 %p310, %r9160, 17476;
mov.u32 %r8779, 1;
@%p310 bra $L__BB2_283;
sub.s32 %r4737, %r9159, %r9158;
min.u32 %r4738, %r4737, %r8769;
setp.eq.s32 %p311, %r4738, 32;
mov.u32 %r4739, -1;
shl.b32 %r4740, %r4739, %r4738;
not.b32 %r4741, %r4740;
selp.b32 %r4742, -1, %r4741, %p311;
and.b32 %r4743, %r4742, %r8770;
shl.b32 %r4744, %r4743, %r9158;
or.b32 %r9157, %r4744, %r9157;
add.s32 %r9158, %r4738, %r9158;
shr.u32 %r8770, %r8770, %r4738;
sub.s32 %r8769, %r8769, %r4738;
setp.lt.u32 %p312, %r9158, %r9159;
@%p312 bra $L__BB2_282;
cvt.u64.u32 %rd229, %r9160;
add.s64 %rd230, %rd229, %rd5;
add.s64 %rd231, %rd1, %rd230;
st.global.u8 [%rd231], %r9157;
add.s32 %r9160, %r9160, 1;
setp.eq.s32 %p313, %r9157, 255;
selp.b32 %r9159, 7, 8, %p313;
mov.u32 %r9157, 0;
mov.u32 %r9158, %r9157;
$L__BB2_282:
setp.ne.s32 %p314, %r8769, 0;
mov.u32 %r8779, %r8764;
@%p314 bra $L__BB2_279;
$L__BB2_283:
setp.eq.s32 %p315, %r478, 0;
mov.u32 %r9186, %r8779;
@%p315 bra $L__BB2_290;
shr.u32 %r4747, %r590, 3;
sub.s32 %r8784, %r589, %r4747;
setp.eq.s32 %p316, %r8784, 0;
mov.u32 %r9186, %r8779;
@%p316 bra $L__BB2_290;
mov.u32 %r4748, -1;
shl.b32 %r4749, %r4748, %r8784;
not.b32 %r4750, %r4749;
and.b32 %r8785, %r8671, %r4750;
$L__BB2_286:
setp.gt.u32 %p317, %r9160, 17476;
mov.u32 %r9186, 1;
@%p317 bra $L__BB2_290;
sub.s32 %r4752, %r9159, %r9158;
min.u32 %r4753, %r4752, %r8784;
setp.eq.s32 %p318, %r4753, 32;
mov.u32 %r4754, -1;
shl.b32 %r4755, %r4754, %r4753;
not.b32 %r4756, %r4755;
selp.b32 %r4757, -1, %r4756, %p318;
and.b32 %r4758, %r4757, %r8785;
shl.b32 %r4759, %r4758, %r9158;
or.b32 %r9157, %r4759, %r9157;
add.s32 %r9158, %r4753, %r9158;
shr.u32 %r8785, %r8785, %r4753;
sub.s32 %r8784, %r8784, %r4753;
setp.lt.u32 %p319, %r9158, %r9159;
@%p319 bra $L__BB2_289;
cvt.u64.u32 %rd232, %r9160;
add.s64 %rd233, %rd232, %rd5;
add.s64 %rd234, %rd1, %rd233;
st.global.u8 [%rd234], %r9157;
add.s32 %r9160, %r9160, 1;
setp.eq.s32 %p320, %r9157, 255;
selp.b32 %r9159, 7, 8, %p320;
mov.u32 %r9157, 0;
mov.u32 %r9158, %r9157;
$L__BB2_289:
setp.ne.s32 %p321, %r8784, 0;
mov.u32 %r9186, %r8779;
@%p321 bra $L__BB2_286;
$L__BB2_290:
setp.lt.s32 %p322, %r474, 1;
setp.lt.s32 %p323, %r133, 1;
or.pred %p324, %p323, %p322;
@%p324 bra $L__BB2_338;
min.s32 %r4762, %r133, %r474;
setp.lt.s32 %p325, %r4762, 3;
add.s32 %r4763, %r8524, 17477;
cvt.u64.u32 %rd235, %r4763;
add.s64 %rd236, %rd235, %rd5;
add.s64 %rd14, %rd1, %rd236;
@%p325 bra $L__BB2_330;
bra.uni $L__BB2_292;
$L__BB2_330:
add.s32 %r8736, %r8736, 1;
setp.lt.u32 %p372, %r8736, %r8734;
@%p372 bra $L__BB2_338;
shl.b16 %rs617, %rs1096, 1;
or.b16 %rs1096, %rs617, 1;
add.s32 %r8530, %r8530, -1;
setp.ne.s32 %p373, %r8530, 0;
mov.u32 %r8839, %r8733;
@%p373 bra $L__BB2_334;
setp.gt.u32 %p374, %r8524, 191;
mov.u32 %r8530, 0;
mov.u32 %r8839, 1;
@%p374 bra $L__BB2_334;
and.b16 %rs619, %rs1096, 255;
st.global.u8 [%rd14], %rs1096;
add.s32 %r8524, %r8524, 1;
setp.eq.s16 %p375, %rs619, 255;
selp.b32 %r8530, 7, 8, %p375;
mov.u16 %rs1096, 0;
mov.u32 %r8839, %r8733;
$L__BB2_334:
add.s32 %r4851, %r8735, 1;
min.u32 %r8735, %r4851, 12;
setp.lt.u32 %p376, %r8735, 3;
mov.u32 %r8736, 0;
mov.u32 %r8842, %r8736;
@%p376 bra $L__BB2_337;
setp.lt.u32 %p377, %r8735, 6;
mov.u32 %r8842, 1;
@%p377 bra $L__BB2_337;
setp.lt.u32 %p378, %r8735, 9;
setp.eq.s32 %p379, %r8735, 11;
selp.b32 %r4853, 4, 5, %p379;
setp.lt.u32 %p380, %r8735, 11;
selp.b32 %r4854, 3, %r4853, %p380;
selp.b32 %r8842, 2, %r4854, %p378;
$L__BB2_337:
mov.u32 %r4856, 1;
shl.b32 %r8734, %r4856, %r8842;
mov.u32 %r8733, %r8839;
bra.uni $L__BB2_338;
$L__BB2_292:
shl.b16 %rs1096, %rs1096, 1;
add.s32 %r8530, %r8530, -1;
setp.ne.s32 %p326, %r8530, 0;
mov.u32 %r8832, %r8733;
@%p326 bra $L__BB2_295;
setp.gt.u32 %p327, %r8524, 191;
mov.u32 %r8530, 0;
mov.u32 %r8832, 1;
@%p327 bra $L__BB2_295;
st.global.u8 [%rd14], %rs1096;
add.s32 %r8524, %r8524, 1;
mov.u32 %r8530, 8;
mov.u16 %rs1096, 0;
mov.u32 %r8832, %r8733;
$L__BB2_295:
setp.lt.u32 %p328, %r8735, 3;
mov.u32 %r8802, 0;
@%p328 bra $L__BB2_298;
setp.lt.u32 %p329, %r8735, 6;
mov.u32 %r8802, 1;
@%p329 bra $L__BB2_298;
setp.lt.u32 %p330, %r8735, 9;
setp.eq.s32 %p331, %r8735, 11;
selp.b32 %r4769, 4, 5, %p331;
setp.lt.u32 %p332, %r8735, 11;
selp.b32 %r4770, 3, %r4769, %p332;
selp.b32 %r8802, 2, %r4770, %p330;
$L__BB2_298:
setp.eq.s32 %p333, %r8802, 0;
@%p333 bra $L__BB2_326;
add.s32 %r691, %r8802, -1;
and.b32 %r692, %r8802, 3;
setp.eq.s32 %p334, %r692, 0;
mov.u32 %r8812, %r8802;
mov.u32 %r8815, %r8832;
@%p334 bra $L__BB2_311;
mov.u32 %r4772, 1;
shl.b32 %r4773, %r4772, %r691;
and.b32 %r4774, %r4773, %r8736;
setp.ne.s32 %p335, %r4774, 0;
selp.u32 %r4775, 1, 0, %p335;
cvt.u32.u16 %r4776, %rs1096;
bfi.b32 %r4777, %r4776, %r4775, 1, 8;
cvt.u16.u32 %rs1096, %r4777;
add.s32 %r8530, %r8530, -1;
setp.ne.s32 %p336, %r8530, 0;
mov.u32 %r8815, %r8832;
@%p336 bra $L__BB2_303;
setp.gt.u32 %p337, %r8524, 191;
mov.u32 %r8530, 0;
mov.u32 %r8815, %r4772;
@%p337 bra $L__BB2_303;
add.s32 %r4781, %r8524, 17477;
cvt.u64.u32 %rd237, %r4781;
add.s64 %rd238, %rd237, %rd5;
add.s64 %rd239, %rd1, %rd238;
st.global.u8 [%rd239], %rs1096;
add.s32 %r8524, %r8524, 1;
mov.u32 %r8530, 8;
mov.u16 %rs1096, 0;
mov.u32 %r8815, %r8832;
$L__BB2_303:
setp.eq.s32 %p338, %r692, 1;
mov.u32 %r8832, %r8815;
mov.u32 %r8812, %r691;
@%p338 bra $L__BB2_311;
add.s32 %r8812, %r8802, -2;
mov.u32 %r4782, 1;
shl.b32 %r4783, %r4782, %r8812;
and.b32 %r4784, %r4783, %r8736;
setp.ne.s32 %p339, %r4784, 0;
selp.u32 %r4785, 1, 0, %p339;
cvt.u32.u16 %r4786, %rs1096;
bfi.b32 %r4787, %r4786, %r4785, 1, 8;
cvt.u16.u32 %rs1096, %r4787;
add.s32 %r8530, %r8530, -1;
setp.ne.s32 %p340, %r8530, 0;
mov.u32 %r8806, %r8815;
@%p340 bra $L__BB2_307;
setp.gt.u32 %p341, %r8524, 191;
mov.u32 %r8530, 0;
mov.u32 %r8806, %r4782;
@%p341 bra $L__BB2_307;
add.s32 %r4790, %r8524, 17477;
cvt.u64.u32 %rd240, %r4790;
add.s64 %rd241, %rd240, %rd5;
add.s64 %rd242, %rd1, %rd241;
and.b16 %rs605, %rs1096, 255;
st.global.u8 [%rd242], %rs1096;
add.s32 %r8524, %r8524, 1;
setp.eq.s16 %p342, %rs605, 255;
selp.b32 %r8530, 7, 8, %p342;
mov.u16 %rs1096, 0;
mov.u32 %r8806, %r8815;
$L__BB2_307:
setp.eq.s32 %p343, %r692, 2;
mov.u32 %r8832, %r8806;
mov.u32 %r8815, %r8806;
@%p343 bra $L__BB2_311;
add.s32 %r8812, %r8802, -3;
mov.u32 %r4791, 1;
shl.b32 %r4792, %r4791, %r8812;
and.b32 %r4793, %r4792, %r8736;
setp.ne.s32 %p344, %r4793, 0;
selp.u32 %r4794, 1, 0, %p344;
cvt.u32.u16 %r4795, %rs1096;
bfi.b32 %r4796, %r4795, %r4794, 1, 8;
cvt.u16.u32 %rs1096, %r4796;
add.s32 %r8530, %r8530, -1;
setp.ne.s32 %p345, %r8530, 0;
mov.u32 %r8832, %r8806;
mov.u32 %r8815, %r8806;
@%p345 bra $L__BB2_311;
setp.gt.u32 %p346, %r8524, 191;
mov.u32 %r8530, 0;
mov.u32 %r8832, %r4791;
mov.u32 %r8815, %r4791;
@%p346 bra $L__BB2_311;
add.s32 %r4801, %r8524, 17477;
cvt.u64.u32 %rd243, %r4801;
add.s64 %rd244, %rd243, %rd5;
add.s64 %rd245, %rd1, %rd244;
and.b16 %rs608, %rs1096, 255;
st.global.u8 [%rd245], %rs1096;
add.s32 %r8524, %r8524, 1;
setp.eq.s16 %p347, %rs608, 255;
selp.b32 %r8530, 7, 8, %p347;
mov.u16 %rs1096, 0;
mov.u32 %r8832, %r8806;
mov.u32 %r8815, %r8806;
$L__BB2_311:
setp.lt.u32 %p348, %r691, 3;
@%p348 bra $L__BB2_326;
mov.u32 %r8832, %r8815;
$L__BB2_313:
add.s32 %r4802, %r8812, -1;
mov.u32 %r4803, 1;
shl.b32 %r4804, %r4803, %r4802;
and.b32 %r4805, %r4804, %r8736;
setp.ne.s32 %p349, %r4805, 0;
selp.u32 %r4806, 1, 0, %p349;
cvt.u32.u16 %r4807, %rs1096;
bfi.b32 %r8821, %r4807, %r4806, 1, 8;
add.s32 %r8822, %r8530, -1;
setp.ne.s32 %p350, %r8822, 0;
mov.u32 %r8820, %r8832;
@%p350 bra $L__BB2_316;
setp.gt.u32 %p351, %r8524, 191;
mov.u32 %r8822, 0;
mov.u32 %r8820, %r4803;
@%p351 bra $L__BB2_316;
cvt.u16.u32 %rs609, %r8821;
and.b16 %rs610, %rs609, 255;
add.s32 %r4811, %r8524, 17477;
cvt.u64.u32 %rd246, %r4811;
add.s64 %rd247, %rd246, %rd5;
add.s64 %rd248, %rd1, %rd247;
st.global.u8 [%rd248], %rs609;
add.s32 %r8524, %r8524, 1;
setp.eq.s16 %p352, %rs610, 255;
selp.b32 %r8822, 7, 8, %p352;
mov.u32 %r8821, 0;
mov.u32 %r8820, %r8832;
$L__BB2_316:
add.s32 %r4812, %r8812, -2;
shl.b32 %r4814, %r4803, %r4812;
and.b32 %r4815, %r4814, %r8736;
setp.ne.s32 %p353, %r4815, 0;
and.b32 %r4816, %r8821, 127;
selp.u32 %r4817, 1, 0, %p353;
bfi.b32 %r8825, %r4816, %r4817, 1, 7;
add.s32 %r8826, %r8822, -1;
setp.ne.s32 %p354, %r8826, 0;
mov.u32 %r8824, %r8820;
@%p354 bra $L__BB2_319;
setp.gt.u32 %p355, %r8524, 191;
mov.u32 %r8826, 0;
mov.u32 %r8824, 1;
@%p355 bra $L__BB2_319;
cvt.u16.u32 %rs611, %r8825;
and.b16 %rs612, %rs611, 255;
add.s32 %r4821, %r8524, 17477;
cvt.u64.u32 %rd249, %r4821;
add.s64 %rd250, %rd249, %rd5;
add.s64 %rd251, %rd1, %rd250;
st.global.u8 [%rd251], %rs611;
add.s32 %r8524, %r8524, 1;
setp.eq.s16 %p356, %rs612, 255;
selp.b32 %r8826, 7, 8, %p356;
mov.u32 %r8825, 0;
mov.u32 %r8824, %r8820;
$L__BB2_319:
add.s32 %r4822, %r8812, -3;
mov.u32 %r4823, 1;
shl.b32 %r4824, %r4823, %r4822;
and.b32 %r4825, %r4824, %r8736;
setp.ne.s32 %p357, %r4825, 0;
and.b32 %r4826, %r8825, 127;
selp.u32 %r4827, 1, 0, %p357;
bfi.b32 %r8829, %r4826, %r4827, 1, 7;
add.s32 %r8830, %r8826, -1;
setp.ne.s32 %p358, %r8830, 0;
mov.u32 %r8828, %r8824;
@%p358 bra $L__BB2_322;
setp.gt.u32 %p359, %r8524, 191;
mov.u32 %r8830, 0;
mov.u32 %r8828, %r4823;
@%p359 bra $L__BB2_322;
cvt.u16.u32 %rs613, %r8829;
and.b16 %rs614, %rs613, 255;
add.s32 %r4831, %r8524, 17477;
cvt.u64.u32 %rd252, %r4831;
add.s64 %rd253, %rd252, %rd5;
add.s64 %rd254, %rd1, %rd253;
st.global.u8 [%rd254], %rs613;
add.s32 %r8524, %r8524, 1;
setp.eq.s16 %p360, %rs614, 255;
selp.b32 %r8830, 7, 8, %p360;
mov.u32 %r8829, 0;
mov.u32 %r8828, %r8824;
$L__BB2_322:
add.s32 %r8812, %r8812, -4;
shl.b32 %r4833, %r4823, %r8812;
and.b32 %r4834, %r4833, %r8736;
setp.ne.s32 %p361, %r4834, 0;
and.b32 %r4835, %r8829, 127;
selp.u32 %r4836, 1, 0, %p361;
bfi.b32 %r4837, %r4835, %r4836, 1, 15;
cvt.u16.u32 %rs1096, %r4837;
add.s32 %r8530, %r8830, -1;
setp.ne.s32 %p362, %r8530, 0;
mov.u32 %r8832, %r8828;
@%p362 bra $L__BB2_325;
setp.gt.u32 %p363, %r8524, 191;
mov.u32 %r8530, 0;
mov.u32 %r8832, 1;
@%p363 bra $L__BB2_325;
add.s32 %r4840, %r8524, 17477;
cvt.u64.u32 %rd255, %r4840;
add.s64 %rd256, %rd255, %rd5;
add.s64 %rd257, %rd1, %rd256;
and.b16 %rs616, %rs1096, 255;
st.global.u8 [%rd257], %rs1096;
add.s32 %r8524, %r8524, 1;
setp.eq.s16 %p364, %rs616, 255;
selp.b32 %r8530, 7, 8, %p364;
mov.u16 %rs1096, 0;
mov.u32 %r8832, %r8828;
$L__BB2_325:
setp.ne.s32 %p365, %r8812, 0;
@%p365 bra $L__BB2_313;
$L__BB2_326:
add.s32 %r4842, %r8735, -1;
setp.eq.s32 %p366, %r8735, 0;
mov.u32 %r8736, 0;
selp.b32 %r8735, 0, %r4842, %p366;
setp.lt.u32 %p367, %r8735, 3;
mov.u32 %r8838, %r8736;
@%p367 bra $L__BB2_329;
setp.lt.u32 %p368, %r8735, 6;
mov.u32 %r8838, 1;
@%p368 bra $L__BB2_329;
setp.lt.u32 %p369, %r8735, 9;
setp.eq.s32 %p370, %r8735, 11;
selp.b32 %r4844, 4, 5, %p370;
setp.lt.u32 %p371, %r8735, 11;
selp.b32 %r4845, 3, %r4844, %p371;
selp.b32 %r8838, 2, %r4845, %p369;
$L__BB2_329:
mov.u32 %r4847, 1;
shl.b32 %r8734, %r4847, %r8838;
mov.u32 %r8733, %r8832;
$L__BB2_338:
setp.gt.s32 %p381, %r474, 2;
setp.gt.s32 %p382, %r133, 2;
and.pred %p383, %p382, %p381;
@%p383 bra $L__BB2_387;
bra.uni $L__BB2_339;
$L__BB2_387:
add.s32 %r4977, %r346, -11;
cvt.u64.u32 %rd287, %r4977;
add.s64 %rd16, %rd9, %rd287;
ld.global.u8 %rs122, [%rd16];
add.s32 %r4978, %r346, -10;
cvt.u64.u32 %rd289, %r4978;
add.s64 %rd290, %rd9, %rd289;
ld.global.u8 %rs123, [%rd290];
ld.global.u8 %rs124, [%rd290+1];
mul.lo.s32 %r4979, %r474, 6;
add.s32 %r4980, %r4979, -12;
cvt.u64.u32 %rd291, %r4980;
add.s64 %rd292, %rd9, %rd291;
ld.global.u8 %rs125, [%rd292];
ld.global.u8 %rs126, [%rd292+1];
add.s32 %r4981, %r4979, -10;
cvt.u64.u32 %rd293, %r4981;
add.s64 %rd294, %rd9, %rd293;
ld.global.u8 %rs127, [%rd294];
ld.global.u8 %rs128, [%rd294+1];
setp.eq.s16 %p451, %rs122, 0;
mov.u32 %r8936, %r8688;
@%p451 bra $L__BB2_394;
ld.global.u8 %r8926, [%rd16+-1];
cvt.u32.u16 %r8925, %rs122;
$L__BB2_389:
mov.u32 %r901, %r8925;
setp.gt.u32 %p452, %r8972, 2879;
mov.u32 %r8936, 1;
@%p452 bra $L__BB2_394;
mov.u32 %r4983, 8;
sub.s32 %r4984, %r4983, %r8970;
sub.s32 %r4985, %r4984, %r8971;
min.u32 %r4986, %r4985, %r901;
setp.eq.s32 %p453, %r4986, 32;
mov.u32 %r4987, -1;
shl.b32 %r4988, %r4987, %r4986;
not.b32 %r4989, %r4988;
selp.b32 %r4990, -1, %r4989, %p453;
and.b32 %r4991, %r4990, %r8926;
shl.b32 %r4992, %r4991, %r8971;
cvt.u16.u32 %rs652, %r4992;
or.b16 %rs1165, %rs1165, %rs652;
add.s32 %r8971, %r4986, %r8971;
sub.s32 %r8925, %r901, %r4986;
shr.u32 %r8926, %r8926, %r4986;
setp.gt.u32 %p454, %r4985, %r901;
@%p454 bra $L__BB2_393;
setp.ne.s32 %p455, %r8970, 0;
mov.u32 %r8970, 0;
and.b16 %rs653, %rs1165, 255;
setp.ne.s16 %p456, %rs653, 127;
and.pred %p457, %p455, %p456;
@%p457 bra $L__BB2_393;
mov.u32 %r4995, 20548;
sub.s32 %r4996, %r4995, %r8972;
cvt.u64.u32 %rd295, %r4996;
add.s64 %rd296, %rd295, %rd5;
add.s64 %rd297, %rd1, %rd296;
st.global.u8 [%rd297], %rs1165;
add.s32 %r8972, %r8972, 1;
setp.gt.u16 %p458, %rs653, 143;
selp.u32 %r8970, 1, 0, %p458;
mov.u32 %r8971, 0;
mov.u16 %rs1165, 0;
$L__BB2_393:
setp.ne.s32 %p459, %r8925, 0;
mov.u32 %r8936, %r8688;
@%p459 bra $L__BB2_389;
$L__BB2_394:
setp.eq.s16 %p460, %rs126, 0;
mov.u32 %r8948, %r8936;
@%p460 bra $L__BB2_401;
cvt.u32.u16 %r4997, %rs125;
and.b32 %r8938, %r4997, 255;
cvt.u32.u16 %r4998, %rs126;
and.b32 %r8937, %r4998, 255;
$L__BB2_396:
mov.u32 %r920, %r8937;
setp.gt.u32 %p461, %r8972, 2879;
mov.u32 %r8948, 1;
@%p461 bra $L__BB2_401;
mov.u32 %r5000, 8;
sub.s32 %r5001, %r5000, %r8970;
sub.s32 %r5002, %r5001, %r8971;
min.u32 %r5003, %r5002, %r920;
setp.eq.s32 %p462, %r5003, 32;
mov.u32 %r5004, -1;
shl.b32 %r5005, %r5004, %r5003;
not.b32 %r5006, %r5005;
selp.b32 %r5007, -1, %r5006, %p462;
and.b32 %r5008, %r5007, %r8938;
shl.b32 %r5009, %r5008, %r8971;
cvt.u16.u32 %rs657, %r5009;
or.b16 %rs1165, %rs1165, %rs657;
add.s32 %r8971, %r5003, %r8971;
sub.s32 %r8937, %r920, %r5003;
shr.u32 %r8938, %r8938, %r5003;
setp.gt.u32 %p463, %r5002, %r920;
@%p463 bra $L__BB2_400;
setp.ne.s32 %p464, %r8970, 0;
mov.u32 %r8970, 0;
and.b16 %rs658, %rs1165, 255;
setp.ne.s16 %p465, %rs658, 127;
and.pred %p466, %p464, %p465;
@%p466 bra $L__BB2_400;
mov.u32 %r5012, 20548;
sub.s32 %r5013, %r5012, %r8972;
cvt.u64.u32 %rd298, %r5013;
add.s64 %rd299, %rd298, %rd5;
add.s64 %rd300, %rd1, %rd299;
st.global.u8 [%rd300], %rs1165;
add.s32 %r8972, %r8972, 1;
setp.gt.u16 %p467, %rs658, 143;
selp.u32 %r8970, 1, 0, %p467;
mov.u32 %r8971, 0;
mov.u16 %rs1165, 0;
$L__BB2_400:
setp.ne.s32 %p468, %r8937, 0;
mov.u32 %r8948, %r8936;
@%p468 bra $L__BB2_396;
$L__BB2_401:
setp.eq.s16 %p469, %rs124, 0;
mov.u32 %r8960, %r8948;
@%p469 bra $L__BB2_408;
cvt.u32.u16 %r5014, %rs124;
and.b32 %r8949, %r5014, 255;
cvt.u32.u16 %r5015, %rs123;
and.b32 %r8950, %r5015, 255;
$L__BB2_403:
mov.u32 %r939, %r8949;
setp.gt.u32 %p470, %r8972, 2879;
mov.u32 %r8960, 1;
@%p470 bra $L__BB2_408;
mov.u32 %r5017, 8;
sub.s32 %r5018, %r5017, %r8970;
sub.s32 %r5019, %r5018, %r8971;
min.u32 %r5020, %r5019, %r939;
setp.eq.s32 %p471, %r5020, 32;
mov.u32 %r5021, -1;
shl.b32 %r5022, %r5021, %r5020;
not.b32 %r5023, %r5022;
selp.b32 %r5024, -1, %r5023, %p471;
and.b32 %r5025, %r5024, %r8950;
shl.b32 %r5026, %r5025, %r8971;
cvt.u16.u32 %rs662, %r5026;
or.b16 %rs1165, %rs1165, %rs662;
add.s32 %r8971, %r5020, %r8971;
sub.s32 %r8949, %r939, %r5020;
shr.u32 %r8950, %r8950, %r5020;
setp.gt.u32 %p472, %r5019, %r939;
@%p472 bra $L__BB2_407;
setp.ne.s32 %p473, %r8970, 0;
mov.u32 %r8970, 0;
and.b16 %rs663, %rs1165, 255;
setp.ne.s16 %p474, %rs663, 127;
and.pred %p475, %p473, %p474;
@%p475 bra $L__BB2_407;
mov.u32 %r5029, 20548;
sub.s32 %r5030, %r5029, %r8972;
cvt.u64.u32 %rd301, %r5030;
add.s64 %rd302, %rd301, %rd5;
add.s64 %rd303, %rd1, %rd302;
st.global.u8 [%rd303], %rs1165;
add.s32 %r8972, %r8972, 1;
setp.gt.u16 %p476, %rs663, 143;
selp.u32 %r8970, 1, 0, %p476;
mov.u32 %r8971, 0;
mov.u16 %rs1165, 0;
$L__BB2_407:
setp.ne.s32 %p477, %r8949, 0;
mov.u32 %r8960, %r8948;
@%p477 bra $L__BB2_403;
$L__BB2_408:
setp.eq.s16 %p478, %rs128, 0;
mov.u32 %r8969, %r8960;
@%p478 bra $L__BB2_415;
cvt.u32.u16 %r5031, %rs127;
and.b32 %r8962, %r5031, 255;
cvt.u32.u16 %r5032, %rs128;
and.b32 %r8961, %r5032, 255;
$L__BB2_410:
mov.u32 %r958, %r8961;
setp.gt.u32 %p479, %r8972, 2879;
mov.u32 %r8969, 1;
@%p479 bra $L__BB2_415;
mov.u32 %r5034, 8;
sub.s32 %r5035, %r5034, %r8970;
sub.s32 %r5036, %r5035, %r8971;
min.u32 %r5037, %r5036, %r958;
setp.eq.s32 %p480, %r5037, 32;
mov.u32 %r5038, -1;
shl.b32 %r5039, %r5038, %r5037;
not.b32 %r5040, %r5039;
selp.b32 %r5041, -1, %r5040, %p480;
and.b32 %r5042, %r5041, %r8962;
shl.b32 %r5043, %r5042, %r8971;
cvt.u16.u32 %rs667, %r5043;
or.b16 %rs1165, %rs1165, %rs667;
add.s32 %r8971, %r5037, %r8971;
sub.s32 %r8961, %r958, %r5037;
shr.u32 %r8962, %r8962, %r5037;
setp.gt.u32 %p481, %r5036, %r958;
@%p481 bra $L__BB2_414;
setp.ne.s32 %p482, %r8970, 0;
mov.u32 %r8970, 0;
and.b16 %rs668, %rs1165, 255;
setp.ne.s16 %p483, %rs668, 127;
and.pred %p484, %p482, %p483;
@%p484 bra $L__BB2_414;
mov.u32 %r5046, 20548;
sub.s32 %r5047, %r5046, %r8972;
cvt.u64.u32 %rd304, %r5047;
add.s64 %rd305, %rd304, %rd5;
add.s64 %rd306, %rd1, %rd305;
st.global.u8 [%rd306], %rs1165;
add.s32 %r8972, %r8972, 1;
setp.gt.u16 %p485, %rs668, 143;
selp.u32 %r8970, 1, 0, %p485;
mov.u32 %r8971, 0;
mov.u16 %rs1165, 0;
$L__BB2_414:
setp.ne.s32 %p486, %r8961, 0;
mov.u32 %r8969, %r8960;
@%p486 bra $L__BB2_410;
bra.uni $L__BB2_415;
$L__BB2_339:
setp.gt.s32 %p384, %r474, 0;
and.pred %p386, %p382, %p384;
@%p386 bra $L__BB2_368;
bra.uni $L__BB2_340;
$L__BB2_368:
ld.global.u8 %rs108, [%rd11+1];
ld.global.u8 %rs109, [%rd12];
ld.global.u8 %rs110, [%rd12+1];
setp.eq.s16 %p425, %rs108, 0;
mov.u32 %r8904, %r8688;
@%p425 bra $L__BB2_375;
ld.global.u8 %r8894, [%rd11];
cvt.u32.u16 %r8893, %rs108;
$L__BB2_370:
mov.u32 %r849, %r8893;
setp.gt.u32 %p426, %r8972, 2879;
mov.u32 %r8904, 1;
@%p426 bra $L__BB2_375;
mov.u32 %r4929, 8;
sub.s32 %r4930, %r4929, %r8970;
sub.s32 %r4931, %r4930, %r8971;
min.u32 %r4932, %r4931, %r849;
setp.eq.s32 %p427, %r4932, 32;
mov.u32 %r4933, -1;
shl.b32 %r4934, %r4933, %r4932;
not.b32 %r4935, %r4934;
selp.b32 %r4936, -1, %r4935, %p427;
and.b32 %r4937, %r4936, %r8894;
shl.b32 %r4938, %r4937, %r8971;
cvt.u16.u32 %rs639, %r4938;
or.b16 %rs1165, %rs1165, %rs639;
add.s32 %r8971, %r4932, %r8971;
sub.s32 %r8893, %r849, %r4932;
shr.u32 %r8894, %r8894, %r4932;
setp.gt.u32 %p428, %r4931, %r849;
@%p428 bra $L__BB2_374;
setp.ne.s32 %p429, %r8970, 0;
mov.u32 %r8970, 0;
and.b16 %rs640, %rs1165, 255;
setp.ne.s16 %p430, %rs640, 127;
and.pred %p431, %p429, %p430;
@%p431 bra $L__BB2_374;
mov.u32 %r4941, 20548;
sub.s32 %r4942, %r4941, %r8972;
cvt.u64.u32 %rd278, %r4942;
add.s64 %rd279, %rd278, %rd5;
add.s64 %rd280, %rd1, %rd279;
st.global.u8 [%rd280], %rs1165;
add.s32 %r8972, %r8972, 1;
setp.gt.u16 %p432, %rs640, 143;
selp.u32 %r8970, 1, 0, %p432;
mov.u32 %r8971, 0;
mov.u16 %rs1165, 0;
$L__BB2_374:
setp.ne.s32 %p433, %r8893, 0;
mov.u32 %r8904, %r8688;
@%p433 bra $L__BB2_370;
$L__BB2_375:
add.s32 %r8906, %r474, -1;
cvt.u32.u16 %r4944, %rs110;
and.b32 %r8917, %r4944, 255;
cvt.u32.u16 %r4945, %rs109;
and.b32 %r8918, %r4945, 255;
mov.u32 %r4943, 1;
mov.u32 %r8905, %r4943;
$L__BB2_376:
mov.u32 %r869, %r8905;
setp.gt.u32 %p434, %r8972, 2879;
mov.u32 %r8916, %r4943;
@%p434 bra $L__BB2_381;
mov.u32 %r4947, 8;
sub.s32 %r4948, %r4947, %r8970;
sub.s32 %r4949, %r4948, %r8971;
min.u32 %r4950, %r4949, %r869;
setp.eq.s32 %p435, %r4950, 32;
mov.u32 %r4951, -1;
shl.b32 %r4952, %r4951, %r4950;
not.b32 %r4953, %r4952;
selp.b32 %r4954, -1, %r4953, %p435;
and.b32 %r4955, %r4954, %r8906;
shl.b32 %r4956, %r4955, %r8971;
cvt.u16.u32 %rs643, %r4956;
or.b16 %rs1165, %rs1165, %rs643;
add.s32 %r8971, %r4950, %r8971;
sub.s32 %r8905, %r869, %r4950;
shr.u32 %r8906, %r8906, %r4950;
setp.gt.u32 %p436, %r4949, %r869;
@%p436 bra $L__BB2_380;
setp.ne.s32 %p437, %r8970, 0;
mov.u32 %r8970, 0;
and.b16 %rs644, %rs1165, 255;
setp.ne.s16 %p438, %rs644, 127;
and.pred %p439, %p437, %p438;
@%p439 bra $L__BB2_380;
mov.u32 %r4959, 20548;
sub.s32 %r4960, %r4959, %r8972;
cvt.u64.u32 %rd281, %r4960;
add.s64 %rd282, %rd281, %rd5;
add.s64 %rd283, %rd1, %rd282;
st.global.u8 [%rd283], %rs1165;
add.s32 %r8972, %r8972, 1;
setp.gt.u16 %p440, %rs644, 143;
selp.u32 %r8970, 1, 0, %p440;
mov.u32 %r8971, 0;
mov.u16 %rs1165, 0;
$L__BB2_380:
setp.ne.s32 %p441, %r8905, 0;
mov.u32 %r8916, %r8904;
@%p441 bra $L__BB2_376;
$L__BB2_381:
setp.eq.s16 %p442, %rs110, 0;
mov.u32 %r8969, %r8916;
@%p442 bra $L__BB2_415;
$L__BB2_382:
mov.u32 %r886, %r8917;
setp.gt.u32 %p443, %r8972, 2879;
mov.u32 %r8969, 1;
@%p443 bra $L__BB2_415;
mov.u32 %r4962, 8;
sub.s32 %r4963, %r4962, %r8970;
sub.s32 %r4964, %r4963, %r8971;
min.u32 %r4965, %r4964, %r886;
setp.eq.s32 %p444, %r4965, 32;
mov.u32 %r4966, -1;
shl.b32 %r4967, %r4966, %r4965;
not.b32 %r4968, %r4967;
selp.b32 %r4969, -1, %r4968, %p444;
and.b32 %r4970, %r4969, %r8918;
shl.b32 %r4971, %r4970, %r8971;
cvt.u16.u32 %rs648, %r4971;
or.b16 %rs1165, %rs1165, %rs648;
add.s32 %r8971, %r4965, %r8971;
sub.s32 %r8917, %r886, %r4965;
shr.u32 %r8918, %r8918, %r4965;
setp.gt.u32 %p445, %r4964, %r886;
@%p445 bra $L__BB2_386;
setp.ne.s32 %p446, %r8970, 0;
mov.u32 %r8970, 0;
and.b16 %rs649, %rs1165, 255;
setp.ne.s16 %p447, %rs649, 127;
and.pred %p448, %p446, %p447;
@%p448 bra $L__BB2_386;
mov.u32 %r4974, 20548;
sub.s32 %r4975, %r4974, %r8972;
cvt.u64.u32 %rd284, %r4975;
add.s64 %rd285, %rd284, %rd5;
add.s64 %rd286, %rd1, %rd285;
st.global.u8 [%rd286], %rs1165;
add.s32 %r8972, %r8972, 1;
setp.gt.u16 %p449, %rs649, 143;
selp.u32 %r8970, 1, 0, %p449;
mov.u32 %r8971, 0;
mov.u16 %rs1165, 0;
$L__BB2_386:
setp.eq.s32 %p450, %r8917, 0;
mov.u32 %r8969, %r8916;
@%p450 bra $L__BB2_415;
bra.uni $L__BB2_382;
$L__BB2_340:
setp.gt.s32 %p388, %r133, 0;
selp.b32 %r4857, %r346, 0, %p388;
cvt.u64.u32 %rd258, %r4857;
add.s64 %rd15, %rd9, %rd258;
ld.global.u8 %rs86, [%rd15+1];
add.s32 %r4858, %r4857, 2;
cvt.u64.u32 %rd260, %r4858;
add.s64 %rd261, %rd9, %rd260;
ld.global.u8 %rs87, [%rd261];
ld.global.u8 %rs88, [%rd261+1];
mul.lo.s32 %r4859, %r474, 6;
selp.b32 %r4860, %r4859, 0, %p384;
cvt.u64.u32 %rd262, %r4860;
add.s64 %rd263, %rd9, %rd262;
ld.global.u8 %rs89, [%rd263];
ld.global.u8 %rs90, [%rd263+1];
add.s32 %r4861, %r4860, 2;
cvt.u64.u32 %rd264, %r4861;
add.s64 %rd265, %rd9, %rd264;
ld.global.u8 %rs91, [%rd265];
ld.global.u8 %rs92, [%rd265+1];
setp.eq.s16 %p389, %rs86, 0;
mov.u32 %r8860, %r8688;
@%p389 bra $L__BB2_347;
ld.global.u8 %r8850, [%rd15];
cvt.u32.u16 %r8849, %rs86;
$L__BB2_342:
mov.u32 %r777, %r8849;
setp.gt.u32 %p390, %r8972, 2879;
mov.u32 %r8860, 1;
@%p390 bra $L__BB2_347;
mov.u32 %r4863, 8;
sub.s32 %r4864, %r4863, %r8970;
sub.s32 %r4865, %r4864, %r8971;
min.u32 %r4866, %r4865, %r777;
setp.eq.s32 %p391, %r4866, 32;
mov.u32 %r4867, -1;
shl.b32 %r4868, %r4867, %r4866;
not.b32 %r4869, %r4868;
selp.b32 %r4870, -1, %r4869, %p391;
and.b32 %r4871, %r4870, %r8850;
shl.b32 %r4872, %r4871, %r8971;
cvt.u16.u32 %rs620, %r4872;
or.b16 %rs1165, %rs1165, %rs620;
add.s32 %r8971, %r4866, %r8971;
sub.s32 %r8849, %r777, %r4866;
shr.u32 %r8850, %r8850, %r4866;
setp.gt.u32 %p392, %r4865, %r777;
@%p392 bra $L__BB2_346;
setp.ne.s32 %p393, %r8970, 0;
mov.u32 %r8970, 0;
and.b16 %rs621, %rs1165, 255;
setp.ne.s16 %p394, %rs621, 127;
and.pred %p395, %p393, %p394;
@%p395 bra $L__BB2_346;
mov.u32 %r4875, 20548;
sub.s32 %r4876, %r4875, %r8972;
cvt.u64.u32 %rd266, %r4876;
add.s64 %rd267, %rd266, %rd5;
add.s64 %rd268, %rd1, %rd267;
st.global.u8 [%rd268], %rs1165;
add.s32 %r8972, %r8972, 1;
setp.gt.u16 %p396, %rs621, 143;
selp.u32 %r8970, 1, 0, %p396;
mov.u32 %r8971, 0;
mov.u16 %rs1165, 0;
$L__BB2_346:
setp.ne.s32 %p397, %r8849, 0;
mov.u32 %r8860, %r8688;
@%p397 bra $L__BB2_342;
$L__BB2_347:
setp.eq.s16 %p398, %rs90, 0;
mov.u32 %r8872, %r8860;
@%p398 bra $L__BB2_354;
cvt.u32.u16 %r4877, %rs89;
and.b32 %r8862, %r4877, 255;
cvt.u32.u16 %r4878, %rs90;
and.b32 %r8861, %r4878, 255;
$L__BB2_349:
mov.u32 %r796, %r8861;
setp.gt.u32 %p399, %r8972, 2879;
mov.u32 %r8872, 1;
@%p399 bra $L__BB2_354;
mov.u32 %r4880, 8;
sub.s32 %r4881, %r4880, %r8970;
sub.s32 %r4882, %r4881, %r8971;
min.u32 %r4883, %r4882, %r796;
setp.eq.s32 %p400, %r4883, 32;
mov.u32 %r4884, -1;
shl.b32 %r4885, %r4884, %r4883;
not.b32 %r4886, %r4885;
selp.b32 %r4887, -1, %r4886, %p400;
and.b32 %r4888, %r4887, %r8862;
shl.b32 %r4889, %r4888, %r8971;
cvt.u16.u32 %rs625, %r4889;
or.b16 %rs1165, %rs1165, %rs625;
add.s32 %r8971, %r4883, %r8971;
sub.s32 %r8861, %r796, %r4883;
shr.u32 %r8862, %r8862, %r4883;
setp.gt.u32 %p401, %r4882, %r796;
@%p401 bra $L__BB2_353;
setp.ne.s32 %p402, %r8970, 0;
mov.u32 %r8970, 0;
and.b16 %rs626, %rs1165, 255;
setp.ne.s16 %p403, %rs626, 127;
and.pred %p404, %p402, %p403;
@%p404 bra $L__BB2_353;
mov.u32 %r4892, 20548;
sub.s32 %r4893, %r4892, %r8972;
cvt.u64.u32 %rd269, %r4893;
add.s64 %rd270, %rd269, %rd5;
add.s64 %rd271, %rd1, %rd270;
st.global.u8 [%rd271], %rs1165;
add.s32 %r8972, %r8972, 1;
setp.gt.u16 %p405, %rs626, 143;
selp.u32 %r8970, 1, 0, %p405;
mov.u32 %r8971, 0;
mov.u16 %rs1165, 0;
$L__BB2_353:
setp.ne.s32 %p406, %r8861, 0;
mov.u32 %r8872, %r8860;
@%p406 bra $L__BB2_349;
$L__BB2_354:
setp.eq.s16 %p407, %rs88, 0;
mov.u32 %r8884, %r8872;
@%p407 bra $L__BB2_361;
cvt.u32.u16 %r4894, %rs88;
and.b32 %r8873, %r4894, 255;
cvt.u32.u16 %r4895, %rs87;
and.b32 %r8874, %r4895, 255;
$L__BB2_356:
mov.u32 %r815, %r8873;
setp.gt.u32 %p408, %r8972, 2879;
mov.u32 %r8884, 1;
@%p408 bra $L__BB2_361;
mov.u32 %r4897, 8;
sub.s32 %r4898, %r4897, %r8970;
sub.s32 %r4899, %r4898, %r8971;
min.u32 %r4900, %r4899, %r815;
setp.eq.s32 %p409, %r4900, 32;
mov.u32 %r4901, -1;
shl.b32 %r4902, %r4901, %r4900;
not.b32 %r4903, %r4902;
selp.b32 %r4904, -1, %r4903, %p409;
and.b32 %r4905, %r4904, %r8874;
shl.b32 %r4906, %r4905, %r8971;
cvt.u16.u32 %rs630, %r4906;
or.b16 %rs1165, %rs1165, %rs630;
add.s32 %r8971, %r4900, %r8971;
sub.s32 %r8873, %r815, %r4900;
shr.u32 %r8874, %r8874, %r4900;
setp.gt.u32 %p410, %r4899, %r815;
@%p410 bra $L__BB2_360;
setp.ne.s32 %p411, %r8970, 0;
mov.u32 %r8970, 0;
and.b16 %rs631, %rs1165, 255;
setp.ne.s16 %p412, %rs631, 127;
and.pred %p413, %p411, %p412;
@%p413 bra $L__BB2_360;
mov.u32 %r4909, 20548;
sub.s32 %r4910, %r4909, %r8972;
cvt.u64.u32 %rd272, %r4910;
add.s64 %rd273, %rd272, %rd5;
add.s64 %rd274, %rd1, %rd273;
st.global.u8 [%rd274], %rs1165;
add.s32 %r8972, %r8972, 1;
setp.gt.u16 %p414, %rs631, 143;
selp.u32 %r8970, 1, 0, %p414;
mov.u32 %r8971, 0;
mov.u16 %rs1165, 0;
$L__BB2_360:
setp.ne.s32 %p415, %r8873, 0;
mov.u32 %r8884, %r8872;
@%p415 bra $L__BB2_356;
$L__BB2_361:
setp.eq.s16 %p416, %rs92, 0;
mov.u32 %r8969, %r8884;
@%p416 bra $L__BB2_415;
cvt.u32.u16 %r4911, %rs91;
and.b32 %r8886, %r4911, 255;
cvt.u32.u16 %r4912, %rs92;
and.b32 %r8885, %r4912, 255;
$L__BB2_363:
mov.u32 %r834, %r8885;
setp.gt.u32 %p417, %r8972, 2879;
mov.u32 %r8969, 1;
@%p417 bra $L__BB2_415;
mov.u32 %r4914, 8;
sub.s32 %r4915, %r4914, %r8970;
sub.s32 %r4916, %r4915, %r8971;
min.u32 %r4917, %r4916, %r834;
setp.eq.s32 %p418, %r4917, 32;
mov.u32 %r4918, -1;
shl.b32 %r4919, %r4918, %r4917;
not.b32 %r4920, %r4919;
selp.b32 %r4921, -1, %r4920, %p418;
and.b32 %r4922, %r4921, %r8886;
shl.b32 %r4923, %r4922, %r8971;
cvt.u16.u32 %rs635, %r4923;
or.b16 %rs1165, %rs1165, %rs635;
add.s32 %r8971, %r4917, %r8971;
sub.s32 %r8885, %r834, %r4917;
shr.u32 %r8886, %r8886, %r4917;
setp.gt.u32 %p419, %r4916, %r834;
@%p419 bra $L__BB2_367;
setp.ne.s32 %p420, %r8970, 0;
mov.u32 %r8970, 0;
and.b16 %rs636, %rs1165, 255;
setp.ne.s16 %p421, %rs636, 127;
and.pred %p422, %p420, %p421;
@%p422 bra $L__BB2_367;
mov.u32 %r4926, 20548;
sub.s32 %r4927, %r4926, %r8972;
cvt.u64.u32 %rd275, %r4927;
add.s64 %rd276, %rd275, %rd5;
add.s64 %rd277, %rd1, %rd276;
st.global.u8 [%rd277], %rs1165;
add.s32 %r8972, %r8972, 1;
setp.gt.u16 %p423, %rs636, 143;
selp.u32 %r8970, 1, 0, %p423;
mov.u32 %r8971, 0;
mov.u16 %rs1165, 0;
$L__BB2_367:
setp.eq.s32 %p424, %r8885, 0;
mov.u32 %r8969, %r8884;
@%p424 bra $L__BB2_415;
bra.uni $L__BB2_363;
$L__BB2_415:
shr.u32 %r5048, %r8658, 1;
or.b32 %r8441, %r5048, %r591;
$L__BB2_416:
add.s32 %r8439, %r8439, 4;
setp.lt.u32 %p487, %r8439, %r4057;
@%p487 bra $L__BB2_51;
$L__BB2_417:
add.s32 %r8414, %r4057, 1;
shr.u32 %r8413, %r8414, 1;
add.s32 %r5049, %r8413, 1;
setp.gt.u32 %p488, %r5049, 512;
@%p488 bra $L__BB2_419;
add.s32 %r8406, %r4057, 1;
shr.u32 %r8405, %r8406, 1;
add.s32 %r8404, %r4103, %r8405;
mov.u16 %rs671, 0;
add.s32 %r8402, %r8404, 1;
st.shared.u8 [%r8402], %rs671;
$L__BB2_419:
setp.lt.u32 %p489, %r4058, 3;
@%p489 bra $L__BB2_665;
ld.param.u64 %rd1416, [ j2k_htj2k_encode_codeblocks_multi_input_param_4];
ld.param.u64 %rd1411, [ j2k_htj2k_encode_codeblocks_multi_input_param_3];
mov.u32 %r5051, 31;
sub.s32 %r1009, %r5051, %r4059;
mov.u32 %r9005, 2;
cvta.to.global.u64 %rd17, %rd1416;
cvta.to.global.u64 %rd18, %rd1411;
$L__BB2_421:
ld.shared.u8 %rs151, [_ZZ44 j2k_htj2k_encode_codeblocks_multi_inputE13cleanup_e_val];
mov.u16 %rs672, 0;
st.shared.u8 [_ZZ44 j2k_htj2k_encode_codeblocks_multi_inputE13cleanup_e_val], %rs672;
ld.shared.u8 %rs152, [_ZZ44 j2k_htj2k_encode_codeblocks_multi_inputE14cleanup_cx_val];
st.shared.u8 [_ZZ44 j2k_htj2k_encode_codeblocks_multi_inputE14cleanup_cx_val], %rs672;
@%p10 bra $L__BB2_664;
mov.u32 %r5054, 0;
ld.shared.u8 %rs673, [_ZZ44 j2k_htj2k_encode_codeblocks_multi_inputE13cleanup_e_val+1];
ld.shared.u8 %rs674, [_ZZ44 j2k_htj2k_encode_codeblocks_multi_inputE14cleanup_cx_val+1];
max.u16 %rs676, %rs151, %rs673;
cvt.u32.u16 %r5055, %rs676;
add.s32 %r9023, %r5055, -1;
add.s32 %r1027, %r9005, 1;
mul.lo.s32 %r9025, %r9005, %r4055;
mul.wide.u16 %r5056, %rs674, 4;
cvt.u32.u16 %r5057, %rs152;
and.b32 %r5058, %r5057, 255;
add.s32 %r9026, %r5056, %r5058;
mov.u32 %r9021, %r5054;
mov.u32 %r9022, %r5054;
mov.u32 %r9024, %r5054;
$L__BB2_423:
cvt.u64.u32 %rd307, %r9025;
add.s64 %rd308, %rd307, %rd4;
shl.b64 %rd309, %rd308, 2;
add.s64 %rd310, %rd3, %rd309;
ld.global.u32 %r1051, [%rd310];
setp.eq.s32 %p491, %r1051, 0;
mov.u32 %r9042, %r5054;
@%p491 bra $L__BB2_425;
and.b32 %r5060, %r1051, -2147483648;
abs.s32 %r5061, %r1051;
shl.b32 %r5062, %r5061, %r1009;
or.b32 %r9042, %r5062, %r5060;
$L__BB2_425:
shl.b32 %r5066, %r9042, 1;
shr.u32 %r5067, %r5066, %r46;
and.b32 %r1054, %r5067, -2;
setp.eq.s32 %p492, %r1054, 0;
mov.u32 %r9046, 0;
mov.u32 %r9043, %r9046;
mov.u32 %r9044, %r9046;
mov.u32 %r9050, %r9046;
@%p492 bra $L__BB2_427;
add.s32 %r5069, %r1054, -1;
clz.b32 %r5070, %r5069;
mov.u32 %r5071, 32;
sub.s32 %r9043, %r5071, %r5070;
shr.u32 %r5072, %r9042, 31;
add.s32 %r5073, %r5072, %r1054;
add.s32 %r9044, %r5073, -2;
mov.u32 %r9050, 1;
$L__BB2_427:
setp.ge.u32 %p493, %r1027, %r4058;
@%p493 bra $L__BB2_430;
add.s32 %r5076, %r9025, %r4055;
cvt.u64.u32 %rd311, %r5076;
add.s64 %rd312, %rd311, %rd4;
shl.b64 %rd313, %rd312, 2;
add.s64 %rd314, %rd3, %rd313;
ld.global.u32 %r1060, [%rd314];
setp.eq.s32 %p494, %r1060, 0;
@%p494 bra $L__BB2_430;
and.b32 %r5077, %r1060, -2147483648;
abs.s32 %r5078, %r1060;
shl.b32 %r5079, %r5078, %r1009;
or.b32 %r9046, %r5079, %r5077;
$L__BB2_430:
shl.b32 %r5082, %r9046, 1;
shr.u32 %r5083, %r5082, %r46;
and.b32 %r1063, %r5083, -2;
setp.eq.s32 %p495, %r1063, 0;
mov.u32 %r9061, 0;
mov.u32 %r9047, %r9061;
mov.u32 %r9048, %r9061;
mov.u32 %r9065, %r9043;
@%p495 bra $L__BB2_432;
or.b32 %r9050, %r9050, 2;
add.s32 %r5084, %r1063, -1;
clz.b32 %r5085, %r5084;
mov.u32 %r5086, 32;
sub.s32 %r9047, %r5086, %r5085;
max.s32 %r9065, %r9043, %r9047;
shr.u32 %r5087, %r9046, 31;
add.s32 %r5088, %r5087, %r1063;
add.s32 %r9048, %r5088, -2;
$L__BB2_432:
add.s32 %r9355, %r9025, 1;
add.s32 %r5093, %r9021, 1;
setp.ge.u32 %p496, %r5093, %r4057;
mov.u32 %r9062, %r9061;
mov.u32 %r9063, %r9061;
mov.u32 %r9064, %r9061;
@%p496 bra $L__BB2_443;
cvt.u64.u32 %rd315, %r9355;
add.s64 %rd316, %rd315, %rd4;
shl.b64 %rd317, %rd316, 2;
add.s64 %rd318, %rd3, %rd317;
ld.global.u32 %r1073, [%rd318];
setp.eq.s32 %p497, %r1073, 0;
mov.u32 %r9062, 0;
mov.u32 %r9051, %r9062;
@%p497 bra $L__BB2_435;
and.b32 %r5095, %r1073, -2147483648;
abs.s32 %r5096, %r1073;
shl.b32 %r5097, %r5096, %r1009;
or.b32 %r9051, %r5097, %r5095;
$L__BB2_435:
shl.b32 %r5100, %r9051, 1;
shr.u32 %r5101, %r5100, %r46;
and.b32 %r1076, %r5101, -2;
setp.eq.s32 %p498, %r1076, 0;
mov.u32 %r9064, %r9062;
@%p498 bra $L__BB2_437;
or.b32 %r9050, %r9050, 4;
add.s32 %r5102, %r1076, -1;
clz.b32 %r5103, %r5102;
mov.u32 %r5104, 32;
sub.s32 %r9062, %r5104, %r5103;
max.s32 %r9065, %r9065, %r9062;
shr.u32 %r5105, %r9051, 31;
add.s32 %r5106, %r5105, %r1076;
add.s32 %r9064, %r5106, -2;
$L__BB2_437:
mov.u32 %r9061, 0;
mov.u32 %r9056, %r9061;
@%p493 bra $L__BB2_440;
add.s32 %r5109, %r9355, %r4055;
cvt.u64.u32 %rd319, %r5109;
add.s64 %rd320, %rd319, %rd4;
shl.b64 %rd321, %rd320, 2;
add.s64 %rd322, %rd3, %rd321;
ld.global.u32 %r1085, [%rd322];
setp.eq.s32 %p500, %r1085, 0;
@%p500 bra $L__BB2_440;
and.b32 %r5110, %r1085, -2147483648;
abs.s32 %r5111, %r1085;
shl.b32 %r5112, %r5111, %r1009;
or.b32 %r9056, %r5112, %r5110;
$L__BB2_440:
shl.b32 %r5115, %r9056, 1;
shr.u32 %r5116, %r5115, %r46;
and.b32 %r1088, %r5116, -2;
setp.eq.s32 %p501, %r1088, 0;
mov.u32 %r9063, %r9061;
@%p501 bra $L__BB2_442;
or.b32 %r9050, %r9050, 8;
add.s32 %r5117, %r1088, -1;
clz.b32 %r5118, %r5117;
mov.u32 %r5119, 32;
sub.s32 %r9061, %r5119, %r5118;
max.s32 %r9065, %r9065, %r9061;
shr.u32 %r5120, %r9056, 31;
add.s32 %r5121, %r5120, %r1088;
add.s32 %r9063, %r5121, -2;
$L__BB2_442:
add.s32 %r9355, %r9025, 2;
$L__BB2_443:
add.s32 %r5123, %r9050, -1;
and.b32 %r5124, %r5123, %r9050;
setp.ne.s32 %p502, %r5124, 0;
mov.u32 %r9068, 0;
setp.gt.s32 %p503, %r9023, 1;
and.pred %p504, %p503, %p502;
selp.b32 %r5125, %r9023, 1, %p504;
max.s32 %r1105, %r5125, %r9065;
sub.s32 %r1106, %r1105, %r5125;
setp.lt.s32 %p505, %r1106, 1;
@%p505 bra $L__BB2_445;
setp.eq.s32 %p506, %r9043, %r9065;
selp.u32 %r5126, 1, 0, %p506;
setp.eq.s32 %p507, %r9047, %r9065;
selp.u32 %r5127, -1, 0, %p507;
bfi.b32 %r5128, %r5127, %r5126, 1, 1;
setp.eq.s32 %p508, %r9062, %r9065;
selp.u16 %rs677, 1, 0, %p508;
mul.wide.u16 %r5129, %rs677, 4;
or.b32 %r5130, %r5128, %r5129;
setp.eq.s32 %p509, %r9061, %r9065;
selp.u16 %rs678, 1, 0, %p509;
mul.wide.u16 %r5131, %rs678, 8;
or.b32 %r9068, %r5130, %r5131;
$L__BB2_445:
shl.b32 %r5132, %r9050, 4;
shl.b32 %r5133, %r9026, 8;
or.b32 %r5134, %r5132, %r5133;
or.b32 %r5135, %r5134, %r9068;
mul.wide.u32 %rd323, %r5135, 2;
add.s64 %rd324, %rd18, %rd323;
ld.global.u16 %rs155, [%rd324];
shr.u16 %rs679, %rs155, 4;
and.b16 %rs156, %rs679, 7;
setp.eq.s16 %p510, %rs156, 0;
mov.u32 %r9080, %r8969;
@%p510 bra $L__BB2_452;
cvt.u32.u16 %r9069, %rs156;
shr.u16 %rs680, %rs155, 8;
cvt.u32.u16 %r9070, %rs680;
$L__BB2_447:
mov.u32 %r1111, %r9069;
setp.gt.u32 %p511, %r8972, 2879;
mov.u32 %r9080, 1;
@%p511 bra $L__BB2_452;
mov.u32 %r5137, 8;
sub.s32 %r5138, %r5137, %r8970;
sub.s32 %r5139, %r5138, %r8971;
min.u32 %r5140, %r5139, %r1111;
setp.eq.s32 %p512, %r5140, 32;
mov.u32 %r5141, -1;
shl.b32 %r5142, %r5141, %r5140;
not.b32 %r5143, %r5142;
selp.b32 %r5144, -1, %r5143, %p512;
and.b32 %r5145, %r5144, %r9070;
shl.b32 %r5146, %r5145, %r8971;
cvt.u16.u32 %rs681, %r5146;
or.b16 %rs1165, %rs1165, %rs681;
add.s32 %r8971, %r5140, %r8971;
sub.s32 %r9069, %r1111, %r5140;
shr.u32 %r9070, %r9070, %r5140;
setp.gt.u32 %p513, %r5139, %r1111;
@%p513 bra $L__BB2_451;
setp.ne.s32 %p514, %r8970, 0;
mov.u32 %r8970, 0;
and.b16 %rs682, %rs1165, 255;
setp.ne.s16 %p515, %rs682, 127;
and.pred %p516, %p514, %p515;
@%p516 bra $L__BB2_451;
mov.u32 %r5149, 20548;
sub.s32 %r5150, %r5149, %r8972;
cvt.u64.u32 %rd325, %r5150;
add.s64 %rd326, %rd325, %rd5;
add.s64 %rd327, %rd1, %rd326;
st.global.u8 [%rd327], %rs1165;
add.s32 %r8972, %r8972, 1;
setp.gt.u16 %p517, %rs682, 143;
selp.u32 %r8970, 1, 0, %p517;
mov.u32 %r8971, 0;
mov.u16 %rs1165, 0;
$L__BB2_451:
setp.ne.s32 %p518, %r9069, 0;
mov.u32 %r9080, %r8969;
@%p518 bra $L__BB2_447;
$L__BB2_452:
setp.ne.s32 %p519, %r9026, 0;
@%p519 bra $L__BB2_500;
setp.eq.s32 %p520, %r9050, 0;
add.s32 %r5151, %r8524, 17477;
cvt.u64.u32 %rd328, %r5151;
add.s64 %rd329, %rd328, %rd5;
add.s64 %rd19, %rd1, %rd329;
@%p520 bra $L__BB2_492;
shl.b16 %rs1096, %rs1096, 1;
add.s32 %r8530, %r8530, -1;
setp.ne.s32 %p521, %r8530, 0;
mov.u32 %r9114, %r8733;
@%p521 bra $L__BB2_457;
setp.gt.u32 %p522, %r8524, 191;
mov.u32 %r8530, 0;
mov.u32 %r9114, 1;
@%p522 bra $L__BB2_457;
st.global.u8 [%rd19], %rs1096;
add.s32 %r8524, %r8524, 1;
mov.u32 %r8530, 8;
mov.u16 %rs1096, 0;
mov.u32 %r9114, %r8733;
$L__BB2_457:
setp.lt.u32 %p523, %r8735, 3;
mov.u32 %r9084, 0;
@%p523 bra $L__BB2_460;
setp.lt.u32 %p524, %r8735, 6;
mov.u32 %r9084, 1;
@%p524 bra $L__BB2_460;
setp.lt.u32 %p525, %r8735, 9;
setp.eq.s32 %p526, %r8735, 11;
selp.b32 %r5157, 4, 5, %p526;
setp.lt.u32 %p527, %r8735, 11;
selp.b32 %r5158, 3, %r5157, %p527;
selp.b32 %r9084, 2, %r5158, %p525;
$L__BB2_460:
setp.eq.s32 %p528, %r9084, 0;
@%p528 bra $L__BB2_488;
add.s32 %r1135, %r9084, -1;
and.b32 %r1136, %r9084, 3;
setp.eq.s32 %p529, %r1136, 0;
mov.u32 %r9094, %r9084;
mov.u32 %r9097, %r9114;
@%p529 bra $L__BB2_473;
mov.u32 %r5160, 1;
shl.b32 %r5161, %r5160, %r1135;
and.b32 %r5162, %r5161, %r8736;
setp.ne.s32 %p530, %r5162, 0;
selp.u32 %r5163, 1, 0, %p530;
cvt.u32.u16 %r5164, %rs1096;
bfi.b32 %r5165, %r5164, %r5163, 1, 8;
cvt.u16.u32 %rs1096, %r5165;
add.s32 %r8530, %r8530, -1;
setp.ne.s32 %p531, %r8530, 0;
mov.u32 %r9097, %r9114;
@%p531 bra $L__BB2_465;
setp.gt.u32 %p532, %r8524, 191;
mov.u32 %r8530, 0;
mov.u32 %r9097, %r5160;
@%p532 bra $L__BB2_465;
add.s32 %r5169, %r8524, 17477;
cvt.u64.u32 %rd330, %r5169;
add.s64 %rd331, %rd330, %rd5;
add.s64 %rd332, %rd1, %rd331;
st.global.u8 [%rd332], %rs1096;
add.s32 %r8524, %r8524, 1;
mov.u32 %r8530, 8;
mov.u16 %rs1096, 0;
mov.u32 %r9097, %r9114;
$L__BB2_465:
setp.eq.s32 %p533, %r1136, 1;
mov.u32 %r9114, %r9097;
mov.u32 %r9094, %r1135;
@%p533 bra $L__BB2_473;
add.s32 %r9094, %r9084, -2;
mov.u32 %r5170, 1;
shl.b32 %r5171, %r5170, %r9094;
and.b32 %r5172, %r5171, %r8736;
setp.ne.s32 %p534, %r5172, 0;
selp.u32 %r5173, 1, 0, %p534;
cvt.u32.u16 %r5174, %rs1096;
bfi.b32 %r5175, %r5174, %r5173, 1, 8;
cvt.u16.u32 %rs1096, %r5175;
add.s32 %r8530, %r8530, -1;
setp.ne.s32 %p535, %r8530, 0;
mov.u32 %r9088, %r9097;
@%p535 bra $L__BB2_469;
setp.gt.u32 %p536, %r8524, 191;
mov.u32 %r8530, 0;
mov.u32 %r9088, %r5170;
@%p536 bra $L__BB2_469;
add.s32 %r5178, %r8524, 17477;
cvt.u64.u32 %rd333, %r5178;
add.s64 %rd334, %rd333, %rd5;
add.s64 %rd335, %rd1, %rd334;
and.b16 %rs689, %rs1096, 255;
st.global.u8 [%rd335], %rs1096;
add.s32 %r8524, %r8524, 1;
setp.eq.s16 %p537, %rs689, 255;
selp.b32 %r8530, 7, 8, %p537;
mov.u16 %rs1096, 0;
mov.u32 %r9088, %r9097;
$L__BB2_469:
setp.eq.s32 %p538, %r1136, 2;
mov.u32 %r9114, %r9088;
mov.u32 %r9097, %r9088;
@%p538 bra $L__BB2_473;
add.s32 %r9094, %r9084, -3;
mov.u32 %r5179, 1;
shl.b32 %r5180, %r5179, %r9094;
and.b32 %r5181, %r5180, %r8736;
setp.ne.s32 %p539, %r5181, 0;
selp.u32 %r5182, 1, 0, %p539;
cvt.u32.u16 %r5183, %rs1096;
bfi.b32 %r5184, %r5183, %r5182, 1, 8;
cvt.u16.u32 %rs1096, %r5184;
add.s32 %r8530, %r8530, -1;
setp.ne.s32 %p540, %r8530, 0;
mov.u32 %r9114, %r9088;
mov.u32 %r9097, %r9088;
@%p540 bra $L__BB2_473;
setp.gt.u32 %p541, %r8524, 191;
mov.u32 %r8530, 0;
mov.u32 %r9114, %r5179;
mov.u32 %r9097, %r5179;
@%p541 bra $L__BB2_473;
add.s32 %r5189, %r8524, 17477;
cvt.u64.u32 %rd336, %r5189;
add.s64 %rd337, %rd336, %rd5;
add.s64 %rd338, %rd1, %rd337;
and.b16 %rs692, %rs1096, 255;
st.global.u8 [%rd338], %rs1096;
add.s32 %r8524, %r8524, 1;
setp.eq.s16 %p542, %rs692, 255;
selp.b32 %r8530, 7, 8, %p542;
mov.u16 %rs1096, 0;
mov.u32 %r9114, %r9088;
mov.u32 %r9097, %r9088;
$L__BB2_473:
setp.lt.u32 %p543, %r1135, 3;
@%p543 bra $L__BB2_488;
mov.u32 %r9114, %r9097;
$L__BB2_475:
add.s32 %r5190, %r9094, -1;
mov.u32 %r5191, 1;
shl.b32 %r5192, %r5191, %r5190;
and.b32 %r5193, %r5192, %r8736;
setp.ne.s32 %p544, %r5193, 0;
selp.u32 %r5194, 1, 0, %p544;
cvt.u32.u16 %r5195, %rs1096;
bfi.b32 %r9103, %r5195, %r5194, 1, 8;
add.s32 %r9104, %r8530, -1;
setp.ne.s32 %p545, %r9104, 0;
mov.u32 %r9102, %r9114;
@%p545 bra $L__BB2_478;
setp.gt.u32 %p546, %r8524, 191;
mov.u32 %r9104, 0;
mov.u32 %r9102, %r5191;
@%p546 bra $L__BB2_478;
cvt.u16.u32 %rs693, %r9103;
and.b16 %rs694, %rs693, 255;
add.s32 %r5199, %r8524, 17477;
cvt.u64.u32 %rd339, %r5199;
add.s64 %rd340, %rd339, %rd5;
add.s64 %rd341, %rd1, %rd340;
st.global.u8 [%rd341], %rs693;
add.s32 %r8524, %r8524, 1;
setp.eq.s16 %p547, %rs694, 255;
selp.b32 %r9104, 7, 8, %p547;
mov.u32 %r9103, 0;
mov.u32 %r9102, %r9114;
$L__BB2_478:
add.s32 %r5200, %r9094, -2;
shl.b32 %r5202, %r5191, %r5200;
and.b32 %r5203, %r5202, %r8736;
setp.ne.s32 %p548, %r5203, 0;
and.b32 %r5204, %r9103, 127;
selp.u32 %r5205, 1, 0, %p548;
bfi.b32 %r9107, %r5204, %r5205, 1, 7;
add.s32 %r9108, %r9104, -1;
setp.ne.s32 %p549, %r9108, 0;
mov.u32 %r9106, %r9102;
@%p549 bra $L__BB2_481;
setp.gt.u32 %p550, %r8524, 191;
mov.u32 %r9108, 0;
mov.u32 %r9106, 1;
@%p550 bra $L__BB2_481;
cvt.u16.u32 %rs695, %r9107;
and.b16 %rs696, %rs695, 255;
add.s32 %r5209, %r8524, 17477;
cvt.u64.u32 %rd342, %r5209;
add.s64 %rd343, %rd342, %rd5;
add.s64 %rd344, %rd1, %rd343;
st.global.u8 [%rd344], %rs695;
add.s32 %r8524, %r8524, 1;
setp.eq.s16 %p551, %rs696, 255;
selp.b32 %r9108, 7, 8, %p551;
mov.u32 %r9107, 0;
mov.u32 %r9106, %r9102;
$L__BB2_481:
add.s32 %r5210, %r9094, -3;
mov.u32 %r5211, 1;
shl.b32 %r5212, %r5211, %r5210;
and.b32 %r5213, %r5212, %r8736;
setp.ne.s32 %p552, %r5213, 0;
and.b32 %r5214, %r9107, 127;
selp.u32 %r5215, 1, 0, %p552;
bfi.b32 %r9111, %r5214, %r5215, 1, 7;
add.s32 %r9112, %r9108, -1;
setp.ne.s32 %p553, %r9112, 0;
mov.u32 %r9110, %r9106;
@%p553 bra $L__BB2_484;
setp.gt.u32 %p554, %r8524, 191;
mov.u32 %r9112, 0;
mov.u32 %r9110, %r5211;
@%p554 bra $L__BB2_484;
cvt.u16.u32 %rs697, %r9111;
and.b16 %rs698, %rs697, 255;
add.s32 %r5219, %r8524, 17477;
cvt.u64.u32 %rd345, %r5219;
add.s64 %rd346, %rd345, %rd5;
add.s64 %rd347, %rd1, %rd346;
st.global.u8 [%rd347], %rs697;
add.s32 %r8524, %r8524, 1;
setp.eq.s16 %p555, %rs698, 255;
selp.b32 %r9112, 7, 8, %p555;
mov.u32 %r9111, 0;
mov.u32 %r9110, %r9106;
$L__BB2_484:
add.s32 %r9094, %r9094, -4;
shl.b32 %r5221, %r5211, %r9094;
and.b32 %r5222, %r5221, %r8736;
setp.ne.s32 %p556, %r5222, 0;
and.b32 %r5223, %r9111, 127;
selp.u32 %r5224, 1, 0, %p556;
bfi.b32 %r5225, %r5223, %r5224, 1, 15;
cvt.u16.u32 %rs1096, %r5225;
add.s32 %r8530, %r9112, -1;
setp.ne.s32 %p557, %r8530, 0;
mov.u32 %r9114, %r9110;
@%p557 bra $L__BB2_487;
setp.gt.u32 %p558, %r8524, 191;
mov.u32 %r8530, 0;
mov.u32 %r9114, 1;
@%p558 bra $L__BB2_487;
add.s32 %r5228, %r8524, 17477;
cvt.u64.u32 %rd348, %r5228;
add.s64 %rd349, %rd348, %rd5;
add.s64 %rd350, %rd1, %rd349;
and.b16 %rs700, %rs1096, 255;
st.global.u8 [%rd350], %rs1096;
add.s32 %r8524, %r8524, 1;
setp.eq.s16 %p559, %rs700, 255;
selp.b32 %r8530, 7, 8, %p559;
mov.u16 %rs1096, 0;
mov.u32 %r9114, %r9110;
$L__BB2_487:
setp.ne.s32 %p560, %r9094, 0;
@%p560 bra $L__BB2_475;
$L__BB2_488:
add.s32 %r5230, %r8735, -1;
setp.eq.s32 %p561, %r8735, 0;
mov.u32 %r8736, 0;
selp.b32 %r8735, 0, %r5230, %p561;
setp.lt.u32 %p562, %r8735, 3;
mov.u32 %r9120, %r8736;
@%p562 bra $L__BB2_491;
setp.lt.u32 %p563, %r8735, 6;
mov.u32 %r9120, 1;
@%p563 bra $L__BB2_491;
setp.lt.u32 %p564, %r8735, 9;
setp.eq.s32 %p565, %r8735, 11;
selp.b32 %r5232, 4, 5, %p565;
setp.lt.u32 %p566, %r8735, 11;
selp.b32 %r5233, 3, %r5232, %p566;
selp.b32 %r9120, 2, %r5233, %p564;
$L__BB2_491:
mov.u32 %r5235, 1;
shl.b32 %r8734, %r5235, %r9120;
mov.u32 %r8733, %r9114;
bra.uni $L__BB2_500;
$L__BB2_492:
add.s32 %r8736, %r8736, 1;
setp.lt.u32 %p567, %r8736, %r8734;
@%p567 bra $L__BB2_500;
shl.b16 %rs701, %rs1096, 1;
or.b16 %rs1096, %rs701, 1;
add.s32 %r8530, %r8530, -1;
setp.ne.s32 %p568, %r8530, 0;
mov.u32 %r9121, %r8733;
@%p568 bra $L__BB2_496;
setp.gt.u32 %p569, %r8524, 191;
mov.u32 %r8530, 0;
mov.u32 %r9121, 1;
@%p569 bra $L__BB2_496;
and.b16 %rs703, %rs1096, 255;
st.global.u8 [%rd19], %rs1096;
add.s32 %r8524, %r8524, 1;
setp.eq.s16 %p570, %rs703, 255;
selp.b32 %r8530, 7, 8, %p570;
mov.u16 %rs1096, 0;
mov.u32 %r9121, %r8733;
$L__BB2_496:
add.s32 %r5239, %r8735, 1;
min.u32 %r8735, %r5239, 12;
setp.lt.u32 %p571, %r8735, 3;
mov.u32 %r8736, 0;
mov.u32 %r9124, %r8736;
@%p571 bra $L__BB2_499;
setp.lt.u32 %p572, %r8735, 6;
mov.u32 %r9124, 1;
@%p572 bra $L__BB2_499;
setp.lt.u32 %p573, %r8735, 9;
setp.eq.s32 %p574, %r8735, 11;
selp.b32 %r5241, 4, 5, %p574;
setp.lt.u32 %p575, %r8735, 11;
selp.b32 %r5242, 3, %r5241, %p575;
selp.b32 %r9124, 2, %r5242, %p573;
$L__BB2_499:
mov.u32 %r5244, 1;
shl.b32 %r8734, %r5244, %r9124;
mov.u32 %r8733, %r9121;
$L__BB2_500:
and.b16 %rs704, %rs155, 15;
cvt.u32.u16 %r1219, %rs704;
and.b32 %r5245, %r9050, 1;
setp.eq.b32 %p576, %r5245, 1;
mov.pred %p577, 0;
xor.pred %p578, %p576, %p577;
not.pred %p579, %p578;
mov.u32 %r9141, %r9186;
@%p579 bra $L__BB2_507;
and.b32 %r5246, %r1219, 1;
sub.s32 %r9131, %r1105, %r5246;
setp.eq.s32 %p580, %r9131, 0;
mov.u32 %r9141, %r9186;
@%p580 bra $L__BB2_507;
mov.u32 %r5247, -1;
shl.b32 %r5248, %r5247, %r9131;
not.b32 %r5249, %r5248;
and.b32 %r9132, %r9044, %r5249;
$L__BB2_503:
setp.gt.u32 %p581, %r9160, 17476;
mov.u32 %r9141, 1;
@%p581 bra $L__BB2_507;
sub.s32 %r5251, %r9159, %r9158;
min.u32 %r5252, %r5251, %r9131;
setp.eq.s32 %p582, %r5252, 32;
mov.u32 %r5253, -1;
shl.b32 %r5254, %r5253, %r5252;
not.b32 %r5255, %r5254;
selp.b32 %r5256, -1, %r5255, %p582;
and.b32 %r5257, %r5256, %r9132;
shl.b32 %r5258, %r5257, %r9158;
or.b32 %r9157, %r5258, %r9157;
add.s32 %r9158, %r5252, %r9158;
shr.u32 %r9132, %r9132, %r5252;
sub.s32 %r9131, %r9131, %r5252;
setp.lt.u32 %p583, %r9158, %r9159;
@%p583 bra $L__BB2_506;
cvt.u64.u32 %rd351, %r9160;
add.s64 %rd352, %rd351, %rd5;
add.s64 %rd353, %rd1, %rd352;
st.global.u8 [%rd353], %r9157;
add.s32 %r9160, %r9160, 1;
setp.eq.s32 %p584, %r9157, 255;
selp.b32 %r9159, 7, 8, %p584;
mov.u32 %r9157, 0;
mov.u32 %r9158, %r9157;
$L__BB2_506:
setp.ne.s32 %p585, %r9131, 0;
mov.u32 %r9141, %r9186;
@%p585 bra $L__BB2_503;
$L__BB2_507:
and.b32 %r1243, %r9050, 2;
setp.eq.s32 %p586, %r1243, 0;
mov.u32 %r9156, %r9141;
@%p586 bra $L__BB2_514;
shr.u32 %r5261, %r1219, 1;
and.b32 %r5262, %r5261, 1;
sub.s32 %r9146, %r1105, %r5262;
setp.eq.s32 %p587, %r9146, 0;
mov.u32 %r9156, %r9141;
@%p587 bra $L__BB2_514;
mov.u32 %r5263, -1;
shl.b32 %r5264, %r5263, %r9146;
not.b32 %r5265, %r5264;
and.b32 %r9147, %r9048, %r5265;
$L__BB2_510:
setp.gt.u32 %p588, %r9160, 17476;
mov.u32 %r9156, 1;
@%p588 bra $L__BB2_514;
sub.s32 %r5267, %r9159, %r9158;
min.u32 %r5268, %r5267, %r9146;
setp.eq.s32 %p589, %r5268, 32;
mov.u32 %r5269, -1;
shl.b32 %r5270, %r5269, %r5268;
not.b32 %r5271, %r5270;
selp.b32 %r5272, -1, %r5271, %p589;
and.b32 %r5273, %r5272, %r9147;
shl.b32 %r5274, %r5273, %r9158;
or.b32 %r9157, %r5274, %r9157;
add.s32 %r9158, %r5268, %r9158;
shr.u32 %r9147, %r9147, %r5268;
sub.s32 %r9146, %r9146, %r5268;
setp.lt.u32 %p590, %r9158, %r9159;
@%p590 bra $L__BB2_513;
cvt.u64.u32 %rd354, %r9160;
add.s64 %rd355, %rd354, %rd5;
add.s64 %rd356, %rd1, %rd355;
st.global.u8 [%rd356], %r9157;
add.s32 %r9160, %r9160, 1;
setp.eq.s32 %p591, %r9157, 255;
selp.b32 %r9159, 7, 8, %p591;
mov.u32 %r9157, 0;
mov.u32 %r9158, %r9157;
$L__BB2_513:
setp.ne.s32 %p592, %r9146, 0;
mov.u32 %r9156, %r9141;
@%p592 bra $L__BB2_510;
$L__BB2_514:
and.b32 %r1267, %r9050, 4;
setp.eq.s32 %p593, %r1267, 0;
mov.u32 %r9171, %r9156;
@%p593 bra $L__BB2_521;
shr.u32 %r5277, %r1219, 2;
and.b32 %r5278, %r5277, 1;
sub.s32 %r9161, %r1105, %r5278;
setp.eq.s32 %p594, %r9161, 0;
mov.u32 %r9171, %r9156;
@%p594 bra $L__BB2_521;
mov.u32 %r5279, -1;
shl.b32 %r5280, %r5279, %r9161;
not.b32 %r5281, %r5280;
and.b32 %r9162, %r9064, %r5281;
$L__BB2_517:
setp.gt.u32 %p595, %r9160, 17476;
mov.u32 %r9171, 1;
@%p595 bra $L__BB2_521;
sub.s32 %r5283, %r9159, %r9158;
min.u32 %r5284, %r5283, %r9161;
setp.eq.s32 %p596, %r5284, 32;
mov.u32 %r5285, -1;
shl.b32 %r5286, %r5285, %r5284;
not.b32 %r5287, %r5286;
selp.b32 %r5288, -1, %r5287, %p596;
and.b32 %r5289, %r5288, %r9162;
shl.b32 %r5290, %r5289, %r9158;
or.b32 %r9157, %r5290, %r9157;
add.s32 %r9158, %r5284, %r9158;
shr.u32 %r9162, %r9162, %r5284;
sub.s32 %r9161, %r9161, %r5284;
setp.lt.u32 %p597, %r9158, %r9159;
@%p597 bra $L__BB2_520;
cvt.u64.u32 %rd357, %r9160;
add.s64 %rd358, %rd357, %rd5;
add.s64 %rd359, %rd1, %rd358;
st.global.u8 [%rd359], %r9157;
add.s32 %r9160, %r9160, 1;
setp.eq.s32 %p598, %r9157, 255;
selp.b32 %r9159, 7, 8, %p598;
mov.u32 %r9157, 0;
mov.u32 %r9158, %r9157;
$L__BB2_520:
setp.ne.s32 %p599, %r9161, 0;
mov.u32 %r9171, %r9156;
@%p599 bra $L__BB2_517;
$L__BB2_521:
and.b32 %r1291, %r9050, 8;
setp.eq.s32 %p600, %r1291, 0;
mov.u32 %r9186, %r9171;
@%p600 bra $L__BB2_528;
shr.u32 %r5293, %r1219, 3;
sub.s32 %r9176, %r1105, %r5293;
setp.eq.s32 %p601, %r9176, 0;
mov.u32 %r9186, %r9171;
@%p601 bra $L__BB2_528;
mov.u32 %r5294, -1;
shl.b32 %r5295, %r5294, %r9176;
not.b32 %r5296, %r5295;
and.b32 %r9177, %r9063, %r5296;
$L__BB2_524:
setp.gt.u32 %p602, %r9160, 17476;
mov.u32 %r9186, 1;
@%p602 bra $L__BB2_528;
sub.s32 %r5298, %r9159, %r9158;
min.u32 %r5299, %r5298, %r9176;
setp.eq.s32 %p603, %r5299, 32;
mov.u32 %r5300, -1;
shl.b32 %r5301, %r5300, %r5299;
not.b32 %r5302, %r5301;
selp.b32 %r5303, -1, %r5302, %p603;
and.b32 %r5304, %r5303, %r9177;
shl.b32 %r5305, %r5304, %r9158;
or.b32 %r9157, %r5305, %r9157;
add.s32 %r9158, %r5299, %r9158;
shr.u32 %r9177, %r9177, %r5299;
sub.s32 %r9176, %r9176, %r5299;
setp.lt.u32 %p604, %r9158, %r9159;
@%p604 bra $L__BB2_527;
cvt.u64.u32 %rd360, %r9160;
add.s64 %rd361, %rd360, %rd5;
add.s64 %rd362, %rd1, %rd361;
st.global.u8 [%rd362], %r9157;
add.s32 %r9160, %r9160, 1;
setp.eq.s32 %p605, %r9157, 255;
selp.b32 %r9159, 7, 8, %p605;
mov.u32 %r9157, 0;
mov.u32 %r9158, %r9157;
$L__BB2_527:
setp.ne.s32 %p606, %r9176, 0;
mov.u32 %r9186, %r9171;
@%p606 bra $L__BB2_524;
$L__BB2_528:
add.s32 %r1315, %r4103, %r9024;
ld.shared.u8 %rs705, [%r1315];
mov.u32 %r9026, 0;
cvt.u32.u16 %r5311, %rs705;
and.b32 %r5312, %r5311, 255;
and.b32 %r5313, %r9047, 255;
setp.lt.u32 %p607, %r5313, %r5312;
cvt.u16.u32 %rs706, %r9047;
selp.b16 %rs707, %rs705, %rs706, %p607;
st.shared.u8 [%r1315], %rs707;
ld.shared.u8 %rs177, [%r1315+2];
ld.shared.u8 %rs708, [%r1315+1];
setp.gt.u16 %p608, %rs708, %rs177;
add.s32 %r9356, %r9024, 1;
add.s32 %r5314, %r9024, 2;
selp.b32 %r5315, %r9356, %r5314, %p608;
add.s32 %r5316, %r4103, %r5315;
ld.shared.u8 %rs178, [%r5316];
cvt.u32.u16 %r5317, %rs178;
and.b32 %r5318, %r5317, 255;
add.s32 %r9023, %r5318, -1;
cvt.u16.u32 %rs179, %r9061;
cvt.u16.u32 %rs709, %r1243;
shr.u16 %rs710, %rs709, 1;
mov.u32 %r5319, _ZZ44 j2k_htj2k_encode_codeblocks_multi_inputE14cleanup_cx_val;
add.s32 %r1318, %r5319, %r9022;
st.shared.u8 [%r1315+1], %r9061;
ld.shared.u8 %rs711, [%r1318];
or.b16 %rs712, %rs711, %rs710;
st.shared.u8 [%r1318], %rs712;
add.s32 %r9022, %r9022, 1;
ld.shared.u8 %rs180, [%r1318+1];
ld.shared.u8 %r1320, [%r1318+2];
shr.u32 %r1321, %r1291, 3;
st.shared.u8 [%r1318+1], %r1321;
add.s32 %r5320, %r9021, 2;
setp.ge.u32 %p609, %r5320, %r4057;
mov.u32 %r9360, %r9026;
@%p609 bra $L__BB2_635;
cvt.u64.u32 %rd363, %r9355;
add.s64 %rd364, %rd363, %rd4;
shl.b64 %rd365, %rd364, 2;
add.s64 %rd366, %rd3, %rd365;
ld.global.u32 %r1322, [%rd366];
setp.eq.s32 %p610, %r1322, 0;
mov.u32 %r9192, 0;
mov.u32 %r9191, %r9192;
@%p610 bra $L__BB2_531;
and.b32 %r5322, %r1322, -2147483648;
abs.s32 %r5323, %r1322;
shl.b32 %r5324, %r5323, %r1009;
or.b32 %r9191, %r5324, %r5322;
$L__BB2_531:
shl.b32 %r5328, %r9191, 1;
shr.u32 %r5329, %r5328, %r46;
and.b32 %r1325, %r5329, -2;
setp.eq.s32 %p611, %r1325, 0;
mov.u32 %r9193, %r9192;
mov.u32 %r9199, %r9192;
@%p611 bra $L__BB2_533;
add.s32 %r5331, %r1325, -1;
clz.b32 %r5332, %r5331;
mov.u32 %r5333, 32;
sub.s32 %r9192, %r5333, %r5332;
shr.u32 %r5334, %r9191, 31;
add.s32 %r5335, %r5334, %r1325;
add.s32 %r9193, %r5335, -2;
mov.u32 %r9199, 1;
$L__BB2_533:
mov.u32 %r9196, 0;
mov.u32 %r9195, %r9196;
@%p493 bra $L__BB2_536;
add.s32 %r5338, %r9355, %r4055;
cvt.u64.u32 %rd367, %r5338;
add.s64 %rd368, %rd367, %rd4;
shl.b64 %rd369, %rd368, 2;
add.s64 %rd370, %rd3, %rd369;
ld.global.u32 %r1331, [%rd370];
setp.eq.s32 %p613, %r1331, 0;
@%p613 bra $L__BB2_536;
and.b32 %r5339, %r1331, -2147483648;
abs.s32 %r5340, %r1331;
shl.b32 %r5341, %r5340, %r1009;
or.b32 %r9195, %r5341, %r5339;
$L__BB2_536:
shl.b32 %r5344, %r9195, 1;
shr.u32 %r5345, %r5344, %r46;
and.b32 %r1334, %r5345, -2;
setp.eq.s32 %p614, %r1334, 0;
mov.u32 %r9197, %r9196;
mov.u32 %r9214, %r9192;
@%p614 bra $L__BB2_538;
or.b32 %r9199, %r9199, 2;
add.s32 %r5346, %r1334, -1;
clz.b32 %r5347, %r5346;
mov.u32 %r5348, 32;
sub.s32 %r9196, %r5348, %r5347;
max.s32 %r9214, %r9192, %r9196;
shr.u32 %r5349, %r9195, 31;
add.s32 %r5350, %r5349, %r1334;
add.s32 %r9197, %r5350, -2;
$L__BB2_538:
add.s32 %r9216, %r9355, 1;
add.s32 %r5355, %r9021, 3;
setp.ge.u32 %p615, %r5355, %r4057;
mov.u32 %r9217, 0;
mov.u32 %r9210, %r9217;
mov.u32 %r9211, %r9217;
mov.u32 %r9212, %r9217;
mov.u32 %r9213, %r9217;
@%p615 bra $L__BB2_549;
cvt.u64.u32 %rd371, %r9216;
add.s64 %rd372, %rd371, %rd4;
shl.b64 %rd373, %rd372, 2;
add.s64 %rd374, %rd3, %rd373;
ld.global.u32 %r1344, [%rd374];
setp.eq.s32 %p616, %r1344, 0;
mov.u32 %r9211, 0;
mov.u32 %r9200, %r9211;
@%p616 bra $L__BB2_541;
and.b32 %r5357, %r1344, -2147483648;
abs.s32 %r5358, %r1344;
shl.b32 %r5359, %r5358, %r1009;
or.b32 %r9200, %r5359, %r5357;
$L__BB2_541:
shl.b32 %r5362, %r9200, 1;
shr.u32 %r5363, %r5362, %r46;
and.b32 %r1347, %r5363, -2;
setp.eq.s32 %p617, %r1347, 0;
mov.u32 %r9213, %r9211;
@%p617 bra $L__BB2_543;
or.b32 %r9199, %r9199, 4;
add.s32 %r5364, %r1347, -1;
clz.b32 %r5365, %r5364;
mov.u32 %r5366, 32;
sub.s32 %r9211, %r5366, %r5365;
max.s32 %r9214, %r9214, %r9211;
shr.u32 %r5367, %r9200, 31;
add.s32 %r5368, %r5367, %r1347;
add.s32 %r9213, %r5368, -2;
$L__BB2_543:
mov.u32 %r9210, 0;
mov.u32 %r9205, %r9210;
@%p493 bra $L__BB2_546;
add.s32 %r5371, %r9216, %r4055;
cvt.u64.u32 %rd375, %r5371;
add.s64 %rd376, %rd375, %rd4;
shl.b64 %rd377, %rd376, 2;
add.s64 %rd378, %rd3, %rd377;
ld.global.u32 %r1356, [%rd378];
setp.eq.s32 %p619, %r1356, 0;
@%p619 bra $L__BB2_546;
and.b32 %r5372, %r1356, -2147483648;
abs.s32 %r5373, %r1356;
shl.b32 %r5374, %r5373, %r1009;
or.b32 %r9205, %r5374, %r5372;
$L__BB2_546:
shl.b32 %r5377, %r9205, 1;
shr.u32 %r5378, %r5377, %r46;
and.b32 %r1359, %r5378, -2;
setp.eq.s32 %p620, %r1359, 0;
mov.u32 %r9212, %r9210;
@%p620 bra $L__BB2_548;
or.b32 %r9199, %r9199, 8;
add.s32 %r5379, %r1359, -1;
clz.b32 %r5380, %r5379;
mov.u32 %r5381, 32;
sub.s32 %r9210, %r5381, %r5380;
max.s32 %r9214, %r9214, %r9210;
shr.u32 %r5382, %r9205, 31;
add.s32 %r5383, %r5382, %r1359;
add.s32 %r9212, %r5383, -2;
$L__BB2_548:
add.s32 %r9216, %r9355, 2;
$L__BB2_549:
mov.u32 %r9355, %r9216;
shr.u32 %r5385, %r1291, 2;
shr.u32 %r5386, %r1267, 1;
or.b32 %r5387, %r5385, %r5386;
cvt.u32.u16 %r5388, %rs180;
and.b32 %r5389, %r5388, 255;
shl.b32 %r5390, %r1320, 2;
add.s32 %r5391, %r5390, %r5389;
or.b32 %r1376, %r5387, %r5391;
add.s32 %r5392, %r9199, -1;
and.b32 %r5393, %r5392, %r9199;
setp.ne.s32 %p621, %r5393, 0;
setp.gt.u16 %p622, %rs178, 2;
and.pred %p623, %p622, %p621;
selp.b32 %r5394, %r9023, 1, %p623;
max.s32 %r1377, %r5394, %r9214;
sub.s32 %r9360, %r1377, %r5394;
setp.lt.s32 %p624, %r9360, 1;
@%p624 bra $L__BB2_551;
setp.eq.s32 %p625, %r9192, %r9214;
selp.u32 %r5395, 1, 0, %p625;
setp.eq.s32 %p626, %r9196, %r9214;
selp.u32 %r5396, -1, 0, %p626;
bfi.b32 %r5397, %r5396, %r5395, 1, 1;
setp.eq.s32 %p627, %r9211, %r9214;
selp.u16 %rs714, 1, 0, %p627;
mul.wide.u16 %r5398, %rs714, 4;
or.b32 %r5399, %r5397, %r5398;
setp.eq.s32 %p628, %r9210, %r9214;
selp.u16 %rs715, 1, 0, %p628;
mul.wide.u16 %r5400, %rs715, 8;
or.b32 %r9217, %r5399, %r5400;
$L__BB2_551:
shl.b32 %r5401, %r9199, 4;
shl.b32 %r5402, %r1376, 8;
or.b32 %r5403, %r5401, %r5402;
or.b32 %r5404, %r5403, %r9217;
mul.wide.u32 %rd380, %r5404, 2;
add.s64 %rd381, %rd18, %rd380;
ld.global.u16 %rs181, [%rd381];
shr.u16 %rs716, %rs181, 4;
and.b16 %rs182, %rs716, 7;
setp.eq.s16 %p629, %rs182, 0;
mov.u32 %r9229, %r9080;
@%p629 bra $L__BB2_558;
cvt.u32.u16 %r9218, %rs182;
shr.u16 %rs717, %rs181, 8;
cvt.u32.u16 %r9219, %rs717;
$L__BB2_553:
mov.u32 %r1383, %r9218;
setp.gt.u32 %p630, %r8972, 2879;
mov.u32 %r9229, 1;
@%p630 bra $L__BB2_558;
mov.u32 %r5406, 8;
sub.s32 %r5407, %r5406, %r8970;
sub.s32 %r5408, %r5407, %r8971;
min.u32 %r5409, %r5408, %r1383;
setp.eq.s32 %p631, %r5409, 32;
mov.u32 %r5410, -1;
shl.b32 %r5411, %r5410, %r5409;
not.b32 %r5412, %r5411;
selp.b32 %r5413, -1, %r5412, %p631;
and.b32 %r5414, %r5413, %r9219;
shl.b32 %r5415, %r5414, %r8971;
cvt.u16.u32 %rs718, %r5415;
or.b16 %rs1165, %rs1165, %rs718;
add.s32 %r8971, %r5409, %r8971;
sub.s32 %r9218, %r1383, %r5409;
shr.u32 %r9219, %r9219, %r5409;
setp.gt.u32 %p632, %r5408, %r1383;
@%p632 bra $L__BB2_557;
setp.ne.s32 %p633, %r8970, 0;
mov.u32 %r8970, 0;
and.b16 %rs719, %rs1165, 255;
setp.ne.s16 %p634, %rs719, 127;
and.pred %p635, %p633, %p634;
@%p635 bra $L__BB2_557;
mov.u32 %r5418, 20548;
sub.s32 %r5419, %r5418, %r8972;
cvt.u64.u32 %rd382, %r5419;
add.s64 %rd383, %rd382, %rd5;
add.s64 %rd384, %rd1, %rd383;
st.global.u8 [%rd384], %rs1165;
add.s32 %r8972, %r8972, 1;
setp.gt.u16 %p636, %rs719, 143;
selp.u32 %r8970, 1, 0, %p636;
mov.u32 %r8971, 0;
mov.u16 %rs1165, 0;
$L__BB2_557:
setp.ne.s32 %p637, %r9218, 0;
mov.u32 %r9229, %r9080;
@%p637 bra $L__BB2_553;
$L__BB2_558:
setp.ne.s32 %p638, %r1376, 0;
@%p638 bra $L__BB2_606;
setp.eq.s32 %p639, %r9199, 0;
add.s32 %r5420, %r8524, 17477;
cvt.u64.u32 %rd385, %r5420;
add.s64 %rd386, %rd385, %rd5;
add.s64 %rd20, %rd1, %rd386;
@%p639 bra $L__BB2_598;
shl.b16 %rs1096, %rs1096, 1;
add.s32 %r8530, %r8530, -1;
setp.ne.s32 %p640, %r8530, 0;
mov.u32 %r9263, %r8733;
@%p640 bra $L__BB2_563;
setp.gt.u32 %p641, %r8524, 191;
mov.u32 %r8530, 0;
mov.u32 %r9263, 1;
@%p641 bra $L__BB2_563;
st.global.u8 [%rd20], %rs1096;
add.s32 %r8524, %r8524, 1;
mov.u32 %r8530, 8;
mov.u16 %rs1096, 0;
mov.u32 %r9263, %r8733;
$L__BB2_563:
setp.lt.u32 %p642, %r8735, 3;
mov.u32 %r9233, 0;
@%p642 bra $L__BB2_566;
setp.lt.u32 %p643, %r8735, 6;
mov.u32 %r9233, 1;
@%p643 bra $L__BB2_566;
setp.lt.u32 %p644, %r8735, 9;
setp.eq.s32 %p645, %r8735, 11;
selp.b32 %r5426, 4, 5, %p645;
setp.lt.u32 %p646, %r8735, 11;
selp.b32 %r5427, 3, %r5426, %p646;
selp.b32 %r9233, 2, %r5427, %p644;
$L__BB2_566:
setp.eq.s32 %p647, %r9233, 0;
@%p647 bra $L__BB2_594;
add.s32 %r1407, %r9233, -1;
and.b32 %r1408, %r9233, 3;
setp.eq.s32 %p648, %r1408, 0;
mov.u32 %r9243, %r9233;
mov.u32 %r9246, %r9263;
@%p648 bra $L__BB2_579;
mov.u32 %r5429, 1;
shl.b32 %r5430, %r5429, %r1407;
and.b32 %r5431, %r5430, %r8736;
setp.ne.s32 %p649, %r5431, 0;
selp.u32 %r5432, 1, 0, %p649;
cvt.u32.u16 %r5433, %rs1096;
bfi.b32 %r5434, %r5433, %r5432, 1, 8;
cvt.u16.u32 %rs1096, %r5434;
add.s32 %r8530, %r8530, -1;
setp.ne.s32 %p650, %r8530, 0;
mov.u32 %r9246, %r9263;
@%p650 bra $L__BB2_571;
setp.gt.u32 %p651, %r8524, 191;
mov.u32 %r8530, 0;
mov.u32 %r9246, %r5429;
@%p651 bra $L__BB2_571;
add.s32 %r5438, %r8524, 17477;
cvt.u64.u32 %rd387, %r5438;
add.s64 %rd388, %rd387, %rd5;
add.s64 %rd389, %rd1, %rd388;
st.global.u8 [%rd389], %rs1096;
add.s32 %r8524, %r8524, 1;
mov.u32 %r8530, 8;
mov.u16 %rs1096, 0;
mov.u32 %r9246, %r9263;
$L__BB2_571:
setp.eq.s32 %p652, %r1408, 1;
mov.u32 %r9263, %r9246;
mov.u32 %r9243, %r1407;
@%p652 bra $L__BB2_579;
add.s32 %r9243, %r9233, -2;
mov.u32 %r5439, 1;
shl.b32 %r5440, %r5439, %r9243;
and.b32 %r5441, %r5440, %r8736;
setp.ne.s32 %p653, %r5441, 0;
selp.u32 %r5442, 1, 0, %p653;
cvt.u32.u16 %r5443, %rs1096;
bfi.b32 %r5444, %r5443, %r5442, 1, 8;
cvt.u16.u32 %rs1096, %r5444;
add.s32 %r8530, %r8530, -1;
setp.ne.s32 %p654, %r8530, 0;
mov.u32 %r9237, %r9246;
@%p654 bra $L__BB2_575;
setp.gt.u32 %p655, %r8524, 191;
mov.u32 %r8530, 0;
mov.u32 %r9237, %r5439;
@%p655 bra $L__BB2_575;
add.s32 %r5447, %r8524, 17477;
cvt.u64.u32 %rd390, %r5447;
add.s64 %rd391, %rd390, %rd5;
add.s64 %rd392, %rd1, %rd391;
and.b16 %rs726, %rs1096, 255;
st.global.u8 [%rd392], %rs1096;
add.s32 %r8524, %r8524, 1;
setp.eq.s16 %p656, %rs726, 255;
selp.b32 %r8530, 7, 8, %p656;
mov.u16 %rs1096, 0;
mov.u32 %r9237, %r9246;
$L__BB2_575:
setp.eq.s32 %p657, %r1408, 2;
mov.u32 %r9263, %r9237;
mov.u32 %r9246, %r9237;
@%p657 bra $L__BB2_579;
add.s32 %r9243, %r9233, -3;
mov.u32 %r5448, 1;
shl.b32 %r5449, %r5448, %r9243;
and.b32 %r5450, %r5449, %r8736;
setp.ne.s32 %p658, %r5450, 0;
selp.u32 %r5451, 1, 0, %p658;
cvt.u32.u16 %r5452, %rs1096;
bfi.b32 %r5453, %r5452, %r5451, 1, 8;
cvt.u16.u32 %rs1096, %r5453;
add.s32 %r8530, %r8530, -1;
setp.ne.s32 %p659, %r8530, 0;
mov.u32 %r9263, %r9237;
mov.u32 %r9246, %r9237;
@%p659 bra $L__BB2_579;
setp.gt.u32 %p660, %r8524, 191;
mov.u32 %r8530, 0;
mov.u32 %r9263, %r5448;
mov.u32 %r9246, %r5448;
@%p660 bra $L__BB2_579;
add.s32 %r5458, %r8524, 17477;
cvt.u64.u32 %rd393, %r5458;
add.s64 %rd394, %rd393, %rd5;
add.s64 %rd395, %rd1, %rd394;
and.b16 %rs729, %rs1096, 255;
st.global.u8 [%rd395], %rs1096;
add.s32 %r8524, %r8524, 1;
setp.eq.s16 %p661, %rs729, 255;
selp.b32 %r8530, 7, 8, %p661;
mov.u16 %rs1096, 0;
mov.u32 %r9263, %r9237;
mov.u32 %r9246, %r9237;
$L__BB2_579:
setp.lt.u32 %p662, %r1407, 3;
@%p662 bra $L__BB2_594;
mov.u32 %r9263, %r9246;
$L__BB2_581:
add.s32 %r5459, %r9243, -1;
mov.u32 %r5460, 1;
shl.b32 %r5461, %r5460, %r5459;
and.b32 %r5462, %r5461, %r8736;
setp.ne.s32 %p663, %r5462, 0;
selp.u32 %r5463, 1, 0, %p663;
cvt.u32.u16 %r5464, %rs1096;
bfi.b32 %r9252, %r5464, %r5463, 1, 8;
add.s32 %r9253, %r8530, -1;
setp.ne.s32 %p664, %r9253, 0;
mov.u32 %r9251, %r9263;
@%p664 bra $L__BB2_584;
setp.gt.u32 %p665, %r8524, 191;
mov.u32 %r9253, 0;
mov.u32 %r9251, %r5460;
@%p665 bra $L__BB2_584;
cvt.u16.u32 %rs730, %r9252;
and.b16 %rs731, %rs730, 255;
add.s32 %r5468, %r8524, 17477;
cvt.u64.u32 %rd396, %r5468;
add.s64 %rd397, %rd396, %rd5;
add.s64 %rd398, %rd1, %rd397;
st.global.u8 [%rd398], %rs730;
add.s32 %r8524, %r8524, 1;
setp.eq.s16 %p666, %rs731, 255;
selp.b32 %r9253, 7, 8, %p666;
mov.u32 %r9252, 0;
mov.u32 %r9251, %r9263;
$L__BB2_584:
add.s32 %r5469, %r9243, -2;
shl.b32 %r5471, %r5460, %r5469;
and.b32 %r5472, %r5471, %r8736;
setp.ne.s32 %p667, %r5472, 0;
and.b32 %r5473, %r9252, 127;
selp.u32 %r5474, 1, 0, %p667;
bfi.b32 %r9256, %r5473, %r5474, 1, 7;
add.s32 %r9257, %r9253, -1;
setp.ne.s32 %p668, %r9257, 0;
mov.u32 %r9255, %r9251;
@%p668 bra $L__BB2_587;
setp.gt.u32 %p669, %r8524, 191;
mov.u32 %r9257, 0;
mov.u32 %r9255, 1;
@%p669 bra $L__BB2_587;
cvt.u16.u32 %rs732, %r9256;
and.b16 %rs733, %rs732, 255;
add.s32 %r5478, %r8524, 17477;
cvt.u64.u32 %rd399, %r5478;
add.s64 %rd400, %rd399, %rd5;
add.s64 %rd401, %rd1, %rd400;
st.global.u8 [%rd401], %rs732;
add.s32 %r8524, %r8524, 1;
setp.eq.s16 %p670, %rs733, 255;
selp.b32 %r9257, 7, 8, %p670;
mov.u32 %r9256, 0;
mov.u32 %r9255, %r9251;
$L__BB2_587:
add.s32 %r5479, %r9243, -3;
mov.u32 %r5480, 1;
shl.b32 %r5481, %r5480, %r5479;
and.b32 %r5482, %r5481, %r8736;
setp.ne.s32 %p671, %r5482, 0;
and.b32 %r5483, %r9256, 127;
selp.u32 %r5484, 1, 0, %p671;
bfi.b32 %r9260, %r5483, %r5484, 1, 7;
add.s32 %r9261, %r9257, -1;
setp.ne.s32 %p672, %r9261, 0;
mov.u32 %r9259, %r9255;
@%p672 bra $L__BB2_590;
setp.gt.u32 %p673, %r8524, 191;
mov.u32 %r9261, 0;
mov.u32 %r9259, %r5480;
@%p673 bra $L__BB2_590;
cvt.u16.u32 %rs734, %r9260;
and.b16 %rs735, %rs734, 255;
add.s32 %r5488, %r8524, 17477;
cvt.u64.u32 %rd402, %r5488;
add.s64 %rd403, %rd402, %rd5;
add.s64 %rd404, %rd1, %rd403;
st.global.u8 [%rd404], %rs734;
add.s32 %r8524, %r8524, 1;
setp.eq.s16 %p674, %rs735, 255;
selp.b32 %r9261, 7, 8, %p674;
mov.u32 %r9260, 0;
mov.u32 %r9259, %r9255;
$L__BB2_590:
add.s32 %r9243, %r9243, -4;
shl.b32 %r5490, %r5480, %r9243;
and.b32 %r5491, %r5490, %r8736;
setp.ne.s32 %p675, %r5491, 0;
and.b32 %r5492, %r9260, 127;
selp.u32 %r5493, 1, 0, %p675;
bfi.b32 %r5494, %r5492, %r5493, 1, 15;
cvt.u16.u32 %rs1096, %r5494;
add.s32 %r8530, %r9261, -1;
setp.ne.s32 %p676, %r8530, 0;
mov.u32 %r9263, %r9259;
@%p676 bra $L__BB2_593;
setp.gt.u32 %p677, %r8524, 191;
mov.u32 %r8530, 0;
mov.u32 %r9263, 1;
@%p677 bra $L__BB2_593;
add.s32 %r5497, %r8524, 17477;
cvt.u64.u32 %rd405, %r5497;
add.s64 %rd406, %rd405, %rd5;
add.s64 %rd407, %rd1, %rd406;
and.b16 %rs737, %rs1096, 255;
st.global.u8 [%rd407], %rs1096;
add.s32 %r8524, %r8524, 1;
setp.eq.s16 %p678, %rs737, 255;
selp.b32 %r8530, 7, 8, %p678;
mov.u16 %rs1096, 0;
mov.u32 %r9263, %r9259;
$L__BB2_593:
setp.ne.s32 %p679, %r9243, 0;
@%p679 bra $L__BB2_581;
$L__BB2_594:
add.s32 %r5499, %r8735, -1;
setp.eq.s32 %p680, %r8735, 0;
mov.u32 %r8736, 0;
selp.b32 %r8735, 0, %r5499, %p680;
setp.lt.u32 %p681, %r8735, 3;
mov.u32 %r9269, %r8736;
@%p681 bra $L__BB2_597;
setp.lt.u32 %p682, %r8735, 6;
mov.u32 %r9269, 1;
@%p682 bra $L__BB2_597;
setp.lt.u32 %p683, %r8735, 9;
setp.eq.s32 %p684, %r8735, 11;
selp.b32 %r5501, 4, 5, %p684;
setp.lt.u32 %p685, %r8735, 11;
selp.b32 %r5502, 3, %r5501, %p685;
selp.b32 %r9269, 2, %r5502, %p683;
$L__BB2_597:
mov.u32 %r5504, 1;
shl.b32 %r8734, %r5504, %r9269;
mov.u32 %r8733, %r9263;
bra.uni $L__BB2_606;
$L__BB2_598:
add.s32 %r8736, %r8736, 1;
setp.lt.u32 %p686, %r8736, %r8734;
@%p686 bra $L__BB2_606;
shl.b16 %rs738, %rs1096, 1;
or.b16 %rs1096, %rs738, 1;
add.s32 %r8530, %r8530, -1;
setp.ne.s32 %p687, %r8530, 0;
mov.u32 %r9270, %r8733;
@%p687 bra $L__BB2_602;
setp.gt.u32 %p688, %r8524, 191;
mov.u32 %r8530, 0;
mov.u32 %r9270, 1;
@%p688 bra $L__BB2_602;
and.b16 %rs740, %rs1096, 255;
st.global.u8 [%rd20], %rs1096;
add.s32 %r8524, %r8524, 1;
setp.eq.s16 %p689, %rs740, 255;
selp.b32 %r8530, 7, 8, %p689;
mov.u16 %rs1096, 0;
mov.u32 %r9270, %r8733;
$L__BB2_602:
add.s32 %r5508, %r8735, 1;
min.u32 %r8735, %r5508, 12;
setp.lt.u32 %p690, %r8735, 3;
mov.u32 %r8736, 0;
mov.u32 %r9273, %r8736;
@%p690 bra $L__BB2_605;
setp.lt.u32 %p691, %r8735, 6;
mov.u32 %r9273, 1;
@%p691 bra $L__BB2_605;
setp.lt.u32 %p692, %r8735, 9;
setp.eq.s32 %p693, %r8735, 11;
selp.b32 %r5510, 4, 5, %p693;
setp.lt.u32 %p694, %r8735, 11;
selp.b32 %r5511, 3, %r5510, %p694;
selp.b32 %r9273, 2, %r5511, %p692;
$L__BB2_605:
mov.u32 %r5513, 1;
shl.b32 %r8734, %r5513, %r9273;
mov.u32 %r8733, %r9270;
$L__BB2_606:
and.b16 %rs741, %rs181, 15;
cvt.u32.u16 %r1491, %rs741;
and.b32 %r5514, %r9199, 1;
setp.eq.b32 %p695, %r5514, 1;
mov.pred %p696, 0;
xor.pred %p697, %p695, %p696;
not.pred %p698, %p697;
mov.u32 %r9290, %r9186;
@%p698 bra $L__BB2_613;
and.b32 %r5515, %r1491, 1;
sub.s32 %r9280, %r1377, %r5515;
setp.eq.s32 %p699, %r9280, 0;
mov.u32 %r9290, %r9186;
@%p699 bra $L__BB2_613;
mov.u32 %r5516, -1;
shl.b32 %r5517, %r5516, %r9280;
not.b32 %r5518, %r5517;
and.b32 %r9281, %r9193, %r5518;
$L__BB2_609:
setp.gt.u32 %p700, %r9160, 17476;
mov.u32 %r9290, 1;
@%p700 bra $L__BB2_613;
sub.s32 %r5520, %r9159, %r9158;
min.u32 %r5521, %r5520, %r9280;
setp.eq.s32 %p701, %r5521, 32;
mov.u32 %r5522, -1;
shl.b32 %r5523, %r5522, %r5521;
not.b32 %r5524, %r5523;
selp.b32 %r5525, -1, %r5524, %p701;
and.b32 %r5526, %r5525, %r9281;
shl.b32 %r5527, %r5526, %r9158;
or.b32 %r9157, %r5527, %r9157;
add.s32 %r9158, %r5521, %r9158;
shr.u32 %r9281, %r9281, %r5521;
sub.s32 %r9280, %r9280, %r5521;
setp.lt.u32 %p702, %r9158, %r9159;
@%p702 bra $L__BB2_612;
cvt.u64.u32 %rd408, %r9160;
add.s64 %rd409, %rd408, %rd5;
add.s64 %rd410, %rd1, %rd409;
st.global.u8 [%rd410], %r9157;
add.s32 %r9160, %r9160, 1;
setp.eq.s32 %p703, %r9157, 255;
selp.b32 %r9159, 7, 8, %p703;
mov.u32 %r9157, 0;
mov.u32 %r9158, %r9157;
$L__BB2_612:
setp.ne.s32 %p704, %r9280, 0;
mov.u32 %r9290, %r9186;
@%p704 bra $L__BB2_609;
$L__BB2_613:
and.b32 %r1515, %r9199, 2;
setp.eq.s32 %p705, %r1515, 0;
mov.u32 %r9305, %r9290;
@%p705 bra $L__BB2_620;
shr.u32 %r5530, %r1491, 1;
and.b32 %r5531, %r5530, 1;
sub.s32 %r9295, %r1377, %r5531;
setp.eq.s32 %p706, %r9295, 0;
mov.u32 %r9305, %r9290;
@%p706 bra $L__BB2_620;
mov.u32 %r5532, -1;
shl.b32 %r5533, %r5532, %r9295;
not.b32 %r5534, %r5533;
and.b32 %r9296, %r9197, %r5534;
$L__BB2_616:
setp.gt.u32 %p707, %r9160, 17476;
mov.u32 %r9305, 1;
@%p707 bra $L__BB2_620;
sub.s32 %r5536, %r9159, %r9158;
min.u32 %r5537, %r5536, %r9295;
setp.eq.s32 %p708, %r5537, 32;
mov.u32 %r5538, -1;
shl.b32 %r5539, %r5538, %r5537;
not.b32 %r5540, %r5539;
selp.b32 %r5541, -1, %r5540, %p708;
and.b32 %r5542, %r5541, %r9296;
shl.b32 %r5543, %r5542, %r9158;
or.b32 %r9157, %r5543, %r9157;
add.s32 %r9158, %r5537, %r9158;
shr.u32 %r9296, %r9296, %r5537;
sub.s32 %r9295, %r9295, %r5537;
setp.lt.u32 %p709, %r9158, %r9159;
@%p709 bra $L__BB2_619;
cvt.u64.u32 %rd411, %r9160;
add.s64 %rd412, %rd411, %rd5;
add.s64 %rd413, %rd1, %rd412;
st.global.u8 [%rd413], %r9157;
add.s32 %r9160, %r9160, 1;
setp.eq.s32 %p710, %r9157, 255;
selp.b32 %r9159, 7, 8, %p710;
mov.u32 %r9157, 0;
mov.u32 %r9158, %r9157;
$L__BB2_619:
setp.ne.s32 %p711, %r9295, 0;
mov.u32 %r9305, %r9290;
@%p711 bra $L__BB2_616;
$L__BB2_620:
and.b32 %r1539, %r9199, 4;
setp.eq.s32 %p712, %r1539, 0;
mov.u32 %r9320, %r9305;
@%p712 bra $L__BB2_627;
shr.u32 %r5546, %r1491, 2;
and.b32 %r5547, %r5546, 1;
sub.s32 %r9310, %r1377, %r5547;
setp.eq.s32 %p713, %r9310, 0;
mov.u32 %r9320, %r9305;
@%p713 bra $L__BB2_627;
mov.u32 %r5548, -1;
shl.b32 %r5549, %r5548, %r9310;
not.b32 %r5550, %r5549;
and.b32 %r9311, %r9213, %r5550;
$L__BB2_623:
setp.gt.u32 %p714, %r9160, 17476;
mov.u32 %r9320, 1;
@%p714 bra $L__BB2_627;
sub.s32 %r5552, %r9159, %r9158;
min.u32 %r5553, %r5552, %r9310;
setp.eq.s32 %p715, %r5553, 32;
mov.u32 %r5554, -1;
shl.b32 %r5555, %r5554, %r5553;
not.b32 %r5556, %r5555;
selp.b32 %r5557, -1, %r5556, %p715;
and.b32 %r5558, %r5557, %r9311;
shl.b32 %r5559, %r5558, %r9158;
or.b32 %r9157, %r5559, %r9157;
add.s32 %r9158, %r5553, %r9158;
shr.u32 %r9311, %r9311, %r5553;
sub.s32 %r9310, %r9310, %r5553;
setp.lt.u32 %p716, %r9158, %r9159;
@%p716 bra $L__BB2_626;
cvt.u64.u32 %rd414, %r9160;
add.s64 %rd415, %rd414, %rd5;
add.s64 %rd416, %rd1, %rd415;
st.global.u8 [%rd416], %r9157;
add.s32 %r9160, %r9160, 1;
setp.eq.s32 %p717, %r9157, 255;
selp.b32 %r9159, 7, 8, %p717;
mov.u32 %r9157, 0;
mov.u32 %r9158, %r9157;
$L__BB2_626:
setp.ne.s32 %p718, %r9310, 0;
mov.u32 %r9320, %r9305;
@%p718 bra $L__BB2_623;
$L__BB2_627:
and.b32 %r1563, %r9199, 8;
setp.eq.s32 %p719, %r1563, 0;
mov.u32 %r9186, %r9320;
@%p719 bra $L__BB2_634;
shr.u32 %r5562, %r1491, 3;
sub.s32 %r9325, %r1377, %r5562;
setp.eq.s32 %p720, %r9325, 0;
mov.u32 %r9186, %r9320;
@%p720 bra $L__BB2_634;
mov.u32 %r5563, -1;
shl.b32 %r5564, %r5563, %r9325;
not.b32 %r5565, %r5564;
and.b32 %r9326, %r9212, %r5565;
$L__BB2_630:
setp.gt.u32 %p721, %r9160, 17476;
mov.u32 %r9186, 1;
@%p721 bra $L__BB2_634;
sub.s32 %r5567, %r9159, %r9158;
min.u32 %r5568, %r5567, %r9325;
setp.eq.s32 %p722, %r5568, 32;
mov.u32 %r5569, -1;
shl.b32 %r5570, %r5569, %r5568;
not.b32 %r5571, %r5570;
selp.b32 %r5572, -1, %r5571, %p722;
and.b32 %r5573, %r5572, %r9326;
shl.b32 %r5574, %r5573, %r9158;
or.b32 %r9157, %r5574, %r9157;
add.s32 %r9158, %r5568, %r9158;
shr.u32 %r9326, %r9326, %r5568;
sub.s32 %r9325, %r9325, %r5568;
setp.lt.u32 %p723, %r9158, %r9159;
@%p723 bra $L__BB2_633;
cvt.u64.u32 %rd417, %r9160;
add.s64 %rd418, %rd417, %rd5;
add.s64 %rd419, %rd1, %rd418;
st.global.u8 [%rd419], %r9157;
add.s32 %r9160, %r9160, 1;
setp.eq.s32 %p724, %r9157, 255;
selp.b32 %r9159, 7, 8, %p724;
mov.u32 %r9157, 0;
mov.u32 %r9158, %r9157;
$L__BB2_633:
setp.ne.s32 %p725, %r9325, 0;
mov.u32 %r9186, %r9320;
@%p725 bra $L__BB2_630;
$L__BB2_634:
and.b32 %r5577, %r9196, 255;
and.b32 %r5578, %r9061, 255;
setp.lt.u32 %p726, %r5577, %r5578;
cvt.u16.u32 %rs742, %r9196;
selp.b16 %rs743, %rs179, %rs742, %p726;
st.shared.u8 [%r1315+1], %rs743;
ld.shared.u8 %rs744, [%r1315+3];
setp.gt.u16 %p727, %rs177, %rs744;
add.s32 %r9356, %r9356, 1;
add.s32 %r5579, %r9024, 3;
selp.b32 %r5580, %r9356, %r5579, %p727;
add.s32 %r5582, %r4103, %r5580;
ld.shared.u8 %r5583, [%r5582];
add.s32 %r9023, %r5583, -1;
shr.u32 %r5584, %r1515, 1;
or.b32 %r5585, %r1321, %r5584;
st.shared.u8 [%r1315+2], %r9210;
st.shared.u8 [%r1318+1], %r5585;
ld.shared.u8 %rs745, [%r1318+3];
mul.wide.u16 %r5586, %rs745, 4;
add.s32 %r5587, %r5586, %r1320;
shr.u32 %r5588, %r1563, 3;
st.shared.u8 [%r1318+2], %r5588;
shr.u32 %r5589, %r1563, 2;
shr.u32 %r5590, %r1539, 1;
or.b32 %r5591, %r5589, %r5590;
or.b32 %r9026, %r5591, %r5587;
add.s32 %r9022, %r9022, 1;
mov.u32 %r9080, %r9229;
$L__BB2_635:
mov.u32 %r9024, %r9356;
mov.u32 %r9025, %r9355;
max.s32 %r5592, %r9360, 0;
mul.lo.s32 %r5593, %r1106, 6;
setp.gt.s32 %p728, %r1106, 0;
selp.b32 %r5594, %r5593, 0, %p728;
cvt.u64.u32 %rd420, %r5594;
add.s64 %rd21, %rd17, %rd420;
ld.global.u8 %rs205, [%rd21+1];
add.s32 %r5595, %r5594, 2;
cvt.u64.u32 %rd421, %r5595;
add.s64 %rd422, %rd17, %rd421;
ld.global.u8 %rs206, [%rd422];
ld.global.u8 %rs207, [%rd422+1];
mul.lo.s32 %r5596, %r5592, 6;
cvt.u64.u32 %rd423, %r5596;
add.s64 %rd424, %rd17, %rd423;
ld.global.u8 %rs208, [%rd424];
ld.global.u8 %rs209, [%rd424+1];
add.s32 %r5597, %r5596, 2;
cvt.u64.u32 %rd425, %r5597;
add.s64 %rd426, %rd17, %rd425;
ld.global.u8 %rs210, [%rd426];
ld.global.u8 %rs211, [%rd426+1];
setp.eq.s16 %p729, %rs205, 0;
mov.u32 %r9372, %r9080;
@%p729 bra $L__BB2_642;
ld.global.u8 %r9362, [%rd21];
cvt.u32.u16 %r9361, %rs205;
$L__BB2_637:
mov.u32 %r1614, %r9361;
setp.gt.u32 %p730, %r8972, 2879;
mov.u32 %r9372, 1;
@%p730 bra $L__BB2_642;
mov.u32 %r5599, 8;
sub.s32 %r5600, %r5599, %r8970;
sub.s32 %r5601, %r5600, %r8971;
min.u32 %r5602, %r5601, %r1614;
setp.eq.s32 %p731, %r5602, 32;
mov.u32 %r5603, -1;
shl.b32 %r5604, %r5603, %r5602;
not.b32 %r5605, %r5604;
selp.b32 %r5606, -1, %r5605, %p731;
and.b32 %r5607, %r5606, %r9362;
shl.b32 %r5608, %r5607, %r8971;
cvt.u16.u32 %rs746, %r5608;
or.b16 %rs1165, %rs1165, %rs746;
add.s32 %r8971, %r5602, %r8971;
sub.s32 %r9361, %r1614, %r5602;
shr.u32 %r9362, %r9362, %r5602;
setp.gt.u32 %p732, %r5601, %r1614;
@%p732 bra $L__BB2_641;
setp.ne.s32 %p733, %r8970, 0;
mov.u32 %r8970, 0;
and.b16 %rs747, %rs1165, 255;
setp.ne.s16 %p734, %rs747, 127;
and.pred %p735, %p733, %p734;
@%p735 bra $L__BB2_641;
mov.u32 %r5611, 20548;
sub.s32 %r5612, %r5611, %r8972;
cvt.u64.u32 %rd427, %r5612;
add.s64 %rd428, %rd427, %rd5;
add.s64 %rd429, %rd1, %rd428;
st.global.u8 [%rd429], %rs1165;
add.s32 %r8972, %r8972, 1;
setp.gt.u16 %p736, %rs747, 143;
selp.u32 %r8970, 1, 0, %p736;
mov.u32 %r8971, 0;
mov.u16 %rs1165, 0;
$L__BB2_641:
setp.ne.s32 %p737, %r9361, 0;
mov.u32 %r9372, %r9080;
@%p737 bra $L__BB2_637;
$L__BB2_642:
setp.eq.s16 %p738, %rs209, 0;
mov.u32 %r9384, %r9372;
@%p738 bra $L__BB2_649;
cvt.u32.u16 %r5613, %rs208;
and.b32 %r9374, %r5613, 255;
cvt.u32.u16 %r5614, %rs209;
and.b32 %r9373, %r5614, 255;
$L__BB2_644:
mov.u32 %r1633, %r9373;
setp.gt.u32 %p739, %r8972, 2879;
mov.u32 %r9384, 1;
@%p739 bra $L__BB2_649;
mov.u32 %r5616, 8;
sub.s32 %r5617, %r5616, %r8970;
sub.s32 %r5618, %r5617, %r8971;
min.u32 %r5619, %r5618, %r1633;
setp.eq.s32 %p740, %r5619, 32;
mov.u32 %r5620, -1;
shl.b32 %r5621, %r5620, %r5619;
not.b32 %r5622, %r5621;
selp.b32 %r5623, -1, %r5622, %p740;
and.b32 %r5624, %r5623, %r9374;
shl.b32 %r5625, %r5624, %r8971;
cvt.u16.u32 %rs751, %r5625;
or.b16 %rs1165, %rs1165, %rs751;
add.s32 %r8971, %r5619, %r8971;
sub.s32 %r9373, %r1633, %r5619;
shr.u32 %r9374, %r9374, %r5619;
setp.gt.u32 %p741, %r5618, %r1633;
@%p741 bra $L__BB2_648;
setp.ne.s32 %p742, %r8970, 0;
mov.u32 %r8970, 0;
and.b16 %rs752, %rs1165, 255;
setp.ne.s16 %p743, %rs752, 127;
and.pred %p744, %p742, %p743;
@%p744 bra $L__BB2_648;
mov.u32 %r5628, 20548;
sub.s32 %r5629, %r5628, %r8972;
cvt.u64.u32 %rd430, %r5629;
add.s64 %rd431, %rd430, %rd5;
add.s64 %rd432, %rd1, %rd431;
st.global.u8 [%rd432], %rs1165;
add.s32 %r8972, %r8972, 1;
setp.gt.u16 %p745, %rs752, 143;
selp.u32 %r8970, 1, 0, %p745;
mov.u32 %r8971, 0;
mov.u16 %rs1165, 0;
$L__BB2_648:
setp.ne.s32 %p746, %r9373, 0;
mov.u32 %r9384, %r9372;
@%p746 bra $L__BB2_644;
$L__BB2_649:
setp.eq.s16 %p747, %rs207, 0;
mov.u32 %r9396, %r9384;
@%p747 bra $L__BB2_656;
cvt.u32.u16 %r5630, %rs207;
and.b32 %r9385, %r5630, 255;
cvt.u32.u16 %r5631, %rs206;
and.b32 %r9386, %r5631, 255;
$L__BB2_651:
mov.u32 %r1652, %r9385;
setp.gt.u32 %p748, %r8972, 2879;
mov.u32 %r9396, 1;
@%p748 bra $L__BB2_656;
mov.u32 %r5633, 8;
sub.s32 %r5634, %r5633, %r8970;
sub.s32 %r5635, %r5634, %r8971;
min.u32 %r5636, %r5635, %r1652;
setp.eq.s32 %p749, %r5636, 32;
mov.u32 %r5637, -1;
shl.b32 %r5638, %r5637, %r5636;
not.b32 %r5639, %r5638;
selp.b32 %r5640, -1, %r5639, %p749;
and.b32 %r5641, %r5640, %r9386;
shl.b32 %r5642, %r5641, %r8971;
cvt.u16.u32 %rs756, %r5642;
or.b16 %rs1165, %rs1165, %rs756;
add.s32 %r8971, %r5636, %r8971;
sub.s32 %r9385, %r1652, %r5636;
shr.u32 %r9386, %r9386, %r5636;
setp.gt.u32 %p750, %r5635, %r1652;
@%p750 bra $L__BB2_655;
setp.ne.s32 %p751, %r8970, 0;
mov.u32 %r8970, 0;
and.b16 %rs757, %rs1165, 255;
setp.ne.s16 %p752, %rs757, 127;
and.pred %p753, %p751, %p752;
@%p753 bra $L__BB2_655;
mov.u32 %r5645, 20548;
sub.s32 %r5646, %r5645, %r8972;
cvt.u64.u32 %rd433, %r5646;
add.s64 %rd434, %rd433, %rd5;
add.s64 %rd435, %rd1, %rd434;
st.global.u8 [%rd435], %rs1165;
add.s32 %r8972, %r8972, 1;
setp.gt.u16 %p754, %rs757, 143;
selp.u32 %r8970, 1, 0, %p754;
mov.u32 %r8971, 0;
mov.u16 %rs1165, 0;
$L__BB2_655:
setp.ne.s32 %p755, %r9385, 0;
mov.u32 %r9396, %r9384;
@%p755 bra $L__BB2_651;
$L__BB2_656:
setp.eq.s16 %p756, %rs211, 0;
mov.u32 %r8969, %r9396;
@%p756 bra $L__BB2_663;
cvt.u32.u16 %r5647, %rs210;
and.b32 %r9398, %r5647, 255;
cvt.u32.u16 %r5648, %rs211;
and.b32 %r9397, %r5648, 255;
$L__BB2_658:
mov.u32 %r1671, %r9397;
setp.gt.u32 %p757, %r8972, 2879;
mov.u32 %r8969, 1;
@%p757 bra $L__BB2_663;
mov.u32 %r5650, 8;
sub.s32 %r5651, %r5650, %r8970;
sub.s32 %r5652, %r5651, %r8971;
min.u32 %r5653, %r5652, %r1671;
setp.eq.s32 %p758, %r5653, 32;
mov.u32 %r5654, -1;
shl.b32 %r5655, %r5654, %r5653;
not.b32 %r5656, %r5655;
selp.b32 %r5657, -1, %r5656, %p758;
and.b32 %r5658, %r5657, %r9398;
shl.b32 %r5659, %r5658, %r8971;
cvt.u16.u32 %rs761, %r5659;
or.b16 %rs1165, %rs1165, %rs761;
add.s32 %r8971, %r5653, %r8971;
sub.s32 %r9397, %r1671, %r5653;
shr.u32 %r9398, %r9398, %r5653;
setp.gt.u32 %p759, %r5652, %r1671;
@%p759 bra $L__BB2_662;
setp.ne.s32 %p760, %r8970, 0;
mov.u32 %r8970, 0;
and.b16 %rs762, %rs1165, 255;
setp.ne.s16 %p761, %rs762, 127;
and.pred %p762, %p760, %p761;
@%p762 bra $L__BB2_662;
mov.u32 %r5662, 20548;
sub.s32 %r5663, %r5662, %r8972;
cvt.u64.u32 %rd436, %r5663;
add.s64 %rd437, %rd436, %rd5;
add.s64 %rd438, %rd1, %rd437;
st.global.u8 [%rd438], %rs1165;
add.s32 %r8972, %r8972, 1;
setp.gt.u16 %p763, %rs762, 143;
selp.u32 %r8970, 1, 0, %p763;
mov.u32 %r8971, 0;
mov.u16 %rs1165, 0;
$L__BB2_662:
setp.ne.s32 %p764, %r9397, 0;
mov.u32 %r8969, %r9396;
@%p764 bra $L__BB2_658;
$L__BB2_663:
add.s32 %r9021, %r9021, 4;
setp.lt.u32 %p765, %r9021, %r4057;
@%p765 bra $L__BB2_423;
$L__BB2_664:
add.s32 %r9005, %r9005, 2;
setp.lt.u32 %p766, %r9005, %r4058;
@%p766 bra $L__BB2_421;
$L__BB2_665:
setp.eq.s32 %p767, %r8736, 0;
mov.u32 %r9436, %r8733;
@%p767 bra $L__BB2_669;
shl.b16 %rs765, %rs1096, 1;
or.b16 %rs1096, %rs765, 1;
add.s32 %r8530, %r8530, -1;
setp.ne.s32 %p768, %r8530, 0;
mov.u32 %r9436, %r8733;
@%p768 bra $L__BB2_669;
setp.gt.u32 %p769, %r8524, 191;
mov.u32 %r8530, 0;
mov.u32 %r9436, 1;
@%p769 bra $L__BB2_669;
add.s32 %r5666, %r8524, 17477;
cvt.u64.u32 %rd439, %r5666;
add.s64 %rd440, %rd439, %rd5;
add.s64 %rd441, %rd1, %rd440;
and.b16 %rs767, %rs1096, 255;
st.global.u8 [%rd441], %rs1096;
add.s32 %r8524, %r8524, 1;
setp.eq.s16 %p770, %rs767, 255;
selp.b32 %r8530, 7, 8, %p770;
mov.u16 %rs1096, 0;
mov.u32 %r9436, %r8733;
$L__BB2_669:
cvt.u32.u16 %r5667, %rs1096;
and.b32 %r5668, %r5667, 255;
shl.b32 %r5669, %r5668, %r8530;
cvt.u16.u32 %rs234, %r5669;
mov.u32 %r5670, -1;
shl.b32 %r5671, %r5670, %r8971;
not.b32 %r5672, %r5671;
mov.u32 %r5673, 255;
and.b32 %r5674, %r5672, 255;
setp.eq.s32 %p771, %r8971, 0;
selp.b32 %r1723, 0, %r5674, %p771;
shl.b32 %r1724, %r5673, %r8530;
and.b32 %r5675, %r1724, 255;
or.b32 %r5676, %r5675, %r1723;
setp.eq.s32 %p772, %r5676, 0;
mov.u32 %r9439, %r9436;
mov.u32 %r9441, %r8969;
@%p772 bra $L__BB2_675;
or.b16 %rs235, %rs1165, %rs234;
and.b16 %rs768, %rs235, 255;
xor.b16 %rs769, %rs235, %rs234;
cvt.u32.u16 %r5677, %rs769;
and.b32 %r5678, %r1724, %r5677;
and.b32 %r5679, %r5678, 255;
xor.b16 %rs770, %rs235, %rs1165;
cvt.u32.u16 %r5680, %rs770;
and.b32 %r5681, %r1723, %r5680;
or.b32 %r5682, %r5679, %r5681;
setp.eq.s32 %p773, %r5682, 0;
setp.ne.s16 %p774, %rs768, 255;
and.pred %p775, %p774, %p773;
setp.gt.u32 %p776, %r8972, 1;
and.pred %p777, %p776, %p775;
add.s32 %r5683, %r8524, 17477;
cvt.u64.u32 %rd442, %r5683;
add.s64 %rd443, %rd442, %rd5;
add.s64 %rd22, %rd1, %rd443;
@%p777 bra $L__BB2_673;
bra.uni $L__BB2_671;
$L__BB2_673:
setp.gt.u32 %p781, %r8524, 191;
mov.u32 %r9439, 1;
mov.u32 %r9441, %r8969;
@%p781 bra $L__BB2_675;
st.global.u8 [%rd22], %rs235;
add.s32 %r8524, %r8524, 1;
mov.u32 %r9439, %r9436;
mov.u32 %r9441, %r8969;
bra.uni $L__BB2_675;
$L__BB2_671:
setp.gt.u32 %p778, %r8524, 191;
setp.gt.u32 %p779, %r8972, 2879;
or.pred %p780, %p779, %p778;
mov.u32 %r9439, 1;
mov.u32 %r9441, %r9439;
@%p780 bra $L__BB2_675;
st.global.u8 [%rd22], %rs234;
add.s32 %r8524, %r8524, 1;
mov.u32 %r5686, 20548;
sub.s32 %r5687, %r5686, %r8972;
cvt.u64.u32 %rd444, %r5687;
add.s64 %rd445, %rd444, %rd5;
add.s64 %rd446, %rd1, %rd445;
st.global.u8 [%rd446], %rs1165;
add.s32 %r8972, %r8972, 1;
mov.u32 %r9439, %r9436;
mov.u32 %r9441, %r8969;
$L__BB2_675:
setp.eq.s32 %p782, %r9158, 0;
@%p782 bra $L__BB2_679;
sub.s32 %r5689, %r9159, %r9158;
mov.u32 %r5690, -1;
shl.b32 %r5691, %r5690, %r5689;
not.b32 %r5692, %r5691;
and.b32 %r5693, %r5692, 255;
shl.b32 %r5694, %r5693, %r9158;
or.b32 %r1732, %r5694, %r9157;
setp.eq.s32 %p783, %r1732, 255;
mov.u32 %r9443, %r9186;
@%p783 bra $L__BB2_681;
setp.gt.u32 %p784, %r9160, 17476;
mov.u32 %r9443, 1;
@%p784 bra $L__BB2_681;
cvt.u64.u32 %rd447, %r9160;
add.s64 %rd448, %rd447, %rd5;
add.s64 %rd449, %rd1, %rd448;
st.global.u8 [%rd449], %r1732;
add.s32 %r9160, %r9160, 1;
mov.u32 %r9443, %r9186;
bra.uni $L__BB2_681;
$L__BB2_679:
setp.ne.s32 %p785, %r9159, 7;
mov.u32 %r9443, %r9186;
@%p785 bra $L__BB2_681;
setp.eq.s32 %p786, %r9160, 0;
add.s32 %r5696, %r9160, -1;
selp.b32 %r9160, 0, %r5696, %p786;
mov.u32 %r9443, %r9186;
$L__BB2_681:
or.b32 %r5697, %r9441, %r9439;
or.b32 %r5698, %r5697, %r9443;
setp.eq.s32 %p787, %r5698, 0;
@%p787 bra $L__BB2_683;
mov.u32 %r5699, 1;
st.global.u32 [%rd6], %r5699;
mov.u32 %r5700, 3;
st.global.u32 [%rd6+4], %r5700;
mov.u32 %r5701, 0;
st.global.u32 [%rd6+8], %r5701;
st.global.u32 [%rd6+12], %r5701;
st.global.u32 [%rd6+16], %r5701;
st.global.u32 [%rd6+20], %r5701;
st.global.u32 [%rd6+24], %r5701;
st.global.u32 [%rd6+28], %r5701;
bra.uni $L__BB2_1905;
$L__BB2_683:
add.s32 %r1737, %r9160, %r8524;
add.s32 %r1738, %r1737, %r8972;
add.u64 %rd23, %SPL, 0;
mov.u32 %r9563, 1;
mov.u32 %r9561, 0;
mov.u32 %r9562, %r9561;
@%p38 bra $L__BB2_932;
setp.ne.s32 %p789, %r4062, 3;
@%p789 bra $L__BB2_931;
add.s32 %r5707, %r4057, 3;
shr.u32 %r5708, %r5707, 2;
add.s32 %r5709, %r5708, 8;
setp.gt.u32 %p791, %r5709, 513;
mov.pred %p790, -1;
mov.u32 %r9560, 0;
mov.pred %p2370, %p790;
@%p791 bra $L__BB2_928;
mov.u16 %rs1224, 0;
st.local.u16 [%rd23], %rs1224;
st.local.u16 [%rd23+2], %rs1224;
st.local.u16 [%rd23+4], %rs1224;
st.local.u16 [%rd23+6], %rs1224;
st.local.u16 [%rd23+8], %rs1224;
st.local.u16 [%rd23+10], %rs1224;
st.local.u16 [%rd23+12], %rs1224;
st.local.u16 [%rd23+14], %rs1224;
st.local.u16 [%rd23+16], %rs1224;
st.local.u16 [%rd23+18], %rs1224;
st.local.u16 [%rd23+20], %rs1224;
st.local.u16 [%rd23+22], %rs1224;
st.local.u16 [%rd23+24], %rs1224;
st.local.u16 [%rd23+26], %rs1224;
st.local.u16 [%rd23+28], %rs1224;
st.local.u16 [%rd23+30], %rs1224;
st.local.u16 [%rd23+32], %rs1224;
st.local.u16 [%rd23+34], %rs1224;
st.local.u16 [%rd23+36], %rs1224;
st.local.u16 [%rd23+38], %rs1224;
st.local.u16 [%rd23+40], %rs1224;
st.local.u16 [%rd23+42], %rs1224;
st.local.u16 [%rd23+44], %rs1224;
st.local.u16 [%rd23+46], %rs1224;
st.local.u16 [%rd23+48], %rs1224;
st.local.u16 [%rd23+50], %rs1224;
st.local.u16 [%rd23+52], %rs1224;
st.local.u16 [%rd23+54], %rs1224;
st.local.u16 [%rd23+56], %rs1224;
st.local.u16 [%rd23+58], %rs1224;
st.local.u16 [%rd23+60], %rs1224;
st.local.u16 [%rd23+62], %rs1224;
st.local.u16 [%rd23+64], %rs1224;
st.local.u16 [%rd23+66], %rs1224;
st.local.u16 [%rd23+68], %rs1224;
st.local.u16 [%rd23+70], %rs1224;
st.local.u16 [%rd23+72], %rs1224;
st.local.u16 [%rd23+74], %rs1224;
st.local.u16 [%rd23+76], %rs1224;
st.local.u16 [%rd23+78], %rs1224;
st.local.u16 [%rd23+80], %rs1224;
st.local.u16 [%rd23+82], %rs1224;
st.local.u16 [%rd23+84], %rs1224;
st.local.u16 [%rd23+86], %rs1224;
st.local.u16 [%rd23+88], %rs1224;
st.local.u16 [%rd23+90], %rs1224;
st.local.u16 [%rd23+92], %rs1224;
st.local.u16 [%rd23+94], %rs1224;
st.local.u16 [%rd23+96], %rs1224;
st.local.u16 [%rd23+98], %rs1224;
st.local.u16 [%rd23+100], %rs1224;
st.local.u16 [%rd23+102], %rs1224;
st.local.u16 [%rd23+104], %rs1224;
st.local.u16 [%rd23+106], %rs1224;
st.local.u16 [%rd23+108], %rs1224;
st.local.u16 [%rd23+110], %rs1224;
st.local.u16 [%rd23+112], %rs1224;
st.local.u16 [%rd23+114], %rs1224;
st.local.u16 [%rd23+116], %rs1224;
st.local.u16 [%rd23+118], %rs1224;
st.local.u16 [%rd23+120], %rs1224;
st.local.u16 [%rd23+122], %rs1224;
st.local.u16 [%rd23+124], %rs1224;
st.local.u16 [%rd23+126], %rs1224;
st.local.u16 [%rd23+128], %rs1224;
st.local.u16 [%rd23+130], %rs1224;
st.local.u16 [%rd23+132], %rs1224;
st.local.u16 [%rd23+134], %rs1224;
st.local.u16 [%rd23+136], %rs1224;
st.local.u16 [%rd23+138], %rs1224;
st.local.u16 [%rd23+140], %rs1224;
st.local.u16 [%rd23+142], %rs1224;
st.local.u16 [%rd23+144], %rs1224;
st.local.u16 [%rd23+146], %rs1224;
st.local.u16 [%rd23+148], %rs1224;
st.local.u16 [%rd23+150], %rs1224;
st.local.u16 [%rd23+152], %rs1224;
st.local.u16 [%rd23+154], %rs1224;
st.local.u16 [%rd23+156], %rs1224;
st.local.u16 [%rd23+158], %rs1224;
st.local.u16 [%rd23+160], %rs1224;
st.local.u16 [%rd23+162], %rs1224;
st.local.u16 [%rd23+164], %rs1224;
st.local.u16 [%rd23+166], %rs1224;
st.local.u16 [%rd23+168], %rs1224;
st.local.u16 [%rd23+170], %rs1224;
st.local.u16 [%rd23+172], %rs1224;
st.local.u16 [%rd23+174], %rs1224;
st.local.u16 [%rd23+176], %rs1224;
st.local.u16 [%rd23+178], %rs1224;
st.local.u16 [%rd23+180], %rs1224;
st.local.u16 [%rd23+182], %rs1224;
st.local.u16 [%rd23+184], %rs1224;
st.local.u16 [%rd23+186], %rs1224;
st.local.u16 [%rd23+188], %rs1224;
st.local.u16 [%rd23+190], %rs1224;
st.local.u16 [%rd23+192], %rs1224;
st.local.u16 [%rd23+194], %rs1224;
st.local.u16 [%rd23+196], %rs1224;
st.local.u16 [%rd23+198], %rs1224;
st.local.u16 [%rd23+200], %rs1224;
st.local.u16 [%rd23+202], %rs1224;
st.local.u16 [%rd23+204], %rs1224;
st.local.u16 [%rd23+206], %rs1224;
st.local.u16 [%rd23+208], %rs1224;
st.local.u16 [%rd23+210], %rs1224;
st.local.u16 [%rd23+212], %rs1224;
st.local.u16 [%rd23+214], %rs1224;
st.local.u16 [%rd23+216], %rs1224;
st.local.u16 [%rd23+218], %rs1224;
st.local.u16 [%rd23+220], %rs1224;
st.local.u16 [%rd23+222], %rs1224;
st.local.u16 [%rd23+224], %rs1224;
st.local.u16 [%rd23+226], %rs1224;
st.local.u16 [%rd23+228], %rs1224;
st.local.u16 [%rd23+230], %rs1224;
st.local.u16 [%rd23+232], %rs1224;
st.local.u16 [%rd23+234], %rs1224;
st.local.u16 [%rd23+236], %rs1224;
st.local.u16 [%rd23+238], %rs1224;
st.local.u16 [%rd23+240], %rs1224;
st.local.u16 [%rd23+242], %rs1224;
st.local.u16 [%rd23+244], %rs1224;
st.local.u16 [%rd23+246], %rs1224;
st.local.u16 [%rd23+248], %rs1224;
st.local.u16 [%rd23+250], %rs1224;
st.local.u16 [%rd23+252], %rs1224;
st.local.u16 [%rd23+254], %rs1224;
st.local.u16 [%rd23+256], %rs1224;
st.local.u16 [%rd23+258], %rs1224;
st.local.u16 [%rd23+260], %rs1224;
st.local.u16 [%rd23+262], %rs1224;
st.local.u16 [%rd23+264], %rs1224;
st.local.u16 [%rd23+266], %rs1224;
st.local.u16 [%rd23+268], %rs1224;
st.local.u16 [%rd23+270], %rs1224;
st.local.u16 [%rd23+272], %rs1224;
st.local.u16 [%rd23+274], %rs1224;
st.local.u16 [%rd23+276], %rs1224;
st.local.u16 [%rd23+278], %rs1224;
st.local.u16 [%rd23+280], %rs1224;
st.local.u16 [%rd23+282], %rs1224;
st.local.u16 [%rd23+284], %rs1224;
st.local.u16 [%rd23+286], %rs1224;
st.local.u16 [%rd23+288], %rs1224;
st.local.u16 [%rd23+290], %rs1224;
st.local.u16 [%rd23+292], %rs1224;
st.local.u16 [%rd23+294], %rs1224;
st.local.u16 [%rd23+296], %rs1224;
st.local.u16 [%rd23+298], %rs1224;
st.local.u16 [%rd23+300], %rs1224;
st.local.u16 [%rd23+302], %rs1224;
st.local.u16 [%rd23+304], %rs1224;
st.local.u16 [%rd23+306], %rs1224;
st.local.u16 [%rd23+308], %rs1224;
st.local.u16 [%rd23+310], %rs1224;
st.local.u16 [%rd23+312], %rs1224;
st.local.u16 [%rd23+314], %rs1224;
st.local.u16 [%rd23+316], %rs1224;
st.local.u16 [%rd23+318], %rs1224;
st.local.u16 [%rd23+320], %rs1224;
st.local.u16 [%rd23+322], %rs1224;
st.local.u16 [%rd23+324], %rs1224;
st.local.u16 [%rd23+326], %rs1224;
st.local.u16 [%rd23+328], %rs1224;
st.local.u16 [%rd23+330], %rs1224;
st.local.u16 [%rd23+332], %rs1224;
st.local.u16 [%rd23+334], %rs1224;
st.local.u16 [%rd23+336], %rs1224;
st.local.u16 [%rd23+338], %rs1224;
st.local.u16 [%rd23+340], %rs1224;
st.local.u16 [%rd23+342], %rs1224;
st.local.u16 [%rd23+344], %rs1224;
st.local.u16 [%rd23+346], %rs1224;
st.local.u16 [%rd23+348], %rs1224;
st.local.u16 [%rd23+350], %rs1224;
st.local.u16 [%rd23+352], %rs1224;
st.local.u16 [%rd23+354], %rs1224;
st.local.u16 [%rd23+356], %rs1224;
st.local.u16 [%rd23+358], %rs1224;
st.local.u16 [%rd23+360], %rs1224;
st.local.u16 [%rd23+362], %rs1224;
st.local.u16 [%rd23+364], %rs1224;
st.local.u16 [%rd23+366], %rs1224;
st.local.u16 [%rd23+368], %rs1224;
st.local.u16 [%rd23+370], %rs1224;
st.local.u16 [%rd23+372], %rs1224;
st.local.u16 [%rd23+374], %rs1224;
st.local.u16 [%rd23+376], %rs1224;
st.local.u16 [%rd23+378], %rs1224;
st.local.u16 [%rd23+380], %rs1224;
st.local.u16 [%rd23+382], %rs1224;
st.local.u16 [%rd23+384], %rs1224;
st.local.u16 [%rd23+386], %rs1224;
st.local.u16 [%rd23+388], %rs1224;
st.local.u16 [%rd23+390], %rs1224;
st.local.u16 [%rd23+392], %rs1224;
st.local.u16 [%rd23+394], %rs1224;
st.local.u16 [%rd23+396], %rs1224;
st.local.u16 [%rd23+398], %rs1224;
st.local.u16 [%rd23+400], %rs1224;
st.local.u16 [%rd23+402], %rs1224;
st.local.u16 [%rd23+404], %rs1224;
st.local.u16 [%rd23+406], %rs1224;
st.local.u16 [%rd23+408], %rs1224;
st.local.u16 [%rd23+410], %rs1224;
st.local.u16 [%rd23+412], %rs1224;
st.local.u16 [%rd23+414], %rs1224;
st.local.u16 [%rd23+416], %rs1224;
st.local.u16 [%rd23+418], %rs1224;
st.local.u16 [%rd23+420], %rs1224;
st.local.u16 [%rd23+422], %rs1224;
st.local.u16 [%rd23+424], %rs1224;
st.local.u16 [%rd23+426], %rs1224;
st.local.u16 [%rd23+428], %rs1224;
st.local.u16 [%rd23+430], %rs1224;
st.local.u16 [%rd23+432], %rs1224;
st.local.u16 [%rd23+434], %rs1224;
st.local.u16 [%rd23+436], %rs1224;
st.local.u16 [%rd23+438], %rs1224;
st.local.u16 [%rd23+440], %rs1224;
st.local.u16 [%rd23+442], %rs1224;
st.local.u16 [%rd23+444], %rs1224;
st.local.u16 [%rd23+446], %rs1224;
st.local.u16 [%rd23+448], %rs1224;
st.local.u16 [%rd23+450], %rs1224;
st.local.u16 [%rd23+452], %rs1224;
st.local.u16 [%rd23+454], %rs1224;
st.local.u16 [%rd23+456], %rs1224;
st.local.u16 [%rd23+458], %rs1224;
st.local.u16 [%rd23+460], %rs1224;
st.local.u16 [%rd23+462], %rs1224;
st.local.u16 [%rd23+464], %rs1224;
st.local.u16 [%rd23+466], %rs1224;
st.local.u16 [%rd23+468], %rs1224;
st.local.u16 [%rd23+470], %rs1224;
st.local.u16 [%rd23+472], %rs1224;
st.local.u16 [%rd23+474], %rs1224;
st.local.u16 [%rd23+476], %rs1224;
st.local.u16 [%rd23+478], %rs1224;
st.local.u16 [%rd23+480], %rs1224;
st.local.u16 [%rd23+482], %rs1224;
st.local.u16 [%rd23+484], %rs1224;
st.local.u16 [%rd23+486], %rs1224;
st.local.u16 [%rd23+488], %rs1224;
st.local.u16 [%rd23+490], %rs1224;
st.local.u16 [%rd23+492], %rs1224;
st.local.u16 [%rd23+494], %rs1224;
st.local.u16 [%rd23+496], %rs1224;
st.local.u16 [%rd23+498], %rs1224;
st.local.u16 [%rd23+500], %rs1224;
st.local.u16 [%rd23+502], %rs1224;
st.local.u16 [%rd23+504], %rs1224;
st.local.u16 [%rd23+506], %rs1224;
st.local.u16 [%rd23+508], %rs1224;
st.local.u16 [%rd23+510], %rs1224;
st.local.u16 [%rd23+512], %rs1224;
st.local.u16 [%rd23+514], %rs1224;
st.local.u16 [%rd23+516], %rs1224;
st.local.u16 [%rd23+518], %rs1224;
st.local.u16 [%rd23+520], %rs1224;
st.local.u16 [%rd23+522], %rs1224;
st.local.u16 [%rd23+524], %rs1224;
st.local.u16 [%rd23+526], %rs1224;
st.local.u16 [%rd23+528], %rs1224;
st.local.u16 [%rd23+530], %rs1224;
st.local.u16 [%rd23+532], %rs1224;
st.local.u16 [%rd23+534], %rs1224;
st.local.u16 [%rd23+536], %rs1224;
st.local.u16 [%rd23+538], %rs1224;
st.local.u16 [%rd23+540], %rs1224;
st.local.u16 [%rd23+542], %rs1224;
st.local.u16 [%rd23+544], %rs1224;
st.local.u16 [%rd23+546], %rs1224;
st.local.u16 [%rd23+548], %rs1224;
st.local.u16 [%rd23+550], %rs1224;
st.local.u16 [%rd23+552], %rs1224;
st.local.u16 [%rd23+554], %rs1224;
st.local.u16 [%rd23+556], %rs1224;
st.local.u16 [%rd23+558], %rs1224;
st.local.u16 [%rd23+560], %rs1224;
st.local.u16 [%rd23+562], %rs1224;
st.local.u16 [%rd23+564], %rs1224;
st.local.u16 [%rd23+566], %rs1224;
st.local.u16 [%rd23+568], %rs1224;
st.local.u16 [%rd23+570], %rs1224;
st.local.u16 [%rd23+572], %rs1224;
st.local.u16 [%rd23+574], %rs1224;
st.local.u16 [%rd23+576], %rs1224;
st.local.u16 [%rd23+578], %rs1224;
st.local.u16 [%rd23+580], %rs1224;
st.local.u16 [%rd23+582], %rs1224;
st.local.u16 [%rd23+584], %rs1224;
st.local.u16 [%rd23+586], %rs1224;
st.local.u16 [%rd23+588], %rs1224;
st.local.u16 [%rd23+590], %rs1224;
st.local.u16 [%rd23+592], %rs1224;
st.local.u16 [%rd23+594], %rs1224;
st.local.u16 [%rd23+596], %rs1224;
st.local.u16 [%rd23+598], %rs1224;
st.local.u16 [%rd23+600], %rs1224;
st.local.u16 [%rd23+602], %rs1224;
st.local.u16 [%rd23+604], %rs1224;
st.local.u16 [%rd23+606], %rs1224;
st.local.u16 [%rd23+608], %rs1224;
st.local.u16 [%rd23+610], %rs1224;
st.local.u16 [%rd23+612], %rs1224;
st.local.u16 [%rd23+614], %rs1224;
st.local.u16 [%rd23+616], %rs1224;
st.local.u16 [%rd23+618], %rs1224;
st.local.u16 [%rd23+620], %rs1224;
st.local.u16 [%rd23+622], %rs1224;
st.local.u16 [%rd23+624], %rs1224;
st.local.u16 [%rd23+626], %rs1224;
st.local.u16 [%rd23+628], %rs1224;
st.local.u16 [%rd23+630], %rs1224;
st.local.u16 [%rd23+632], %rs1224;
st.local.u16 [%rd23+634], %rs1224;
st.local.u16 [%rd23+636], %rs1224;
st.local.u16 [%rd23+638], %rs1224;
st.local.u16 [%rd23+640], %rs1224;
st.local.u16 [%rd23+642], %rs1224;
st.local.u16 [%rd23+644], %rs1224;
st.local.u16 [%rd23+646], %rs1224;
st.local.u16 [%rd23+648], %rs1224;
st.local.u16 [%rd23+650], %rs1224;
st.local.u16 [%rd23+652], %rs1224;
st.local.u16 [%rd23+654], %rs1224;
st.local.u16 [%rd23+656], %rs1224;
st.local.u16 [%rd23+658], %rs1224;
st.local.u16 [%rd23+660], %rs1224;
st.local.u16 [%rd23+662], %rs1224;
st.local.u16 [%rd23+664], %rs1224;
st.local.u16 [%rd23+666], %rs1224;
st.local.u16 [%rd23+668], %rs1224;
st.local.u16 [%rd23+670], %rs1224;
st.local.u16 [%rd23+672], %rs1224;
st.local.u16 [%rd23+674], %rs1224;
st.local.u16 [%rd23+676], %rs1224;
st.local.u16 [%rd23+678], %rs1224;
st.local.u16 [%rd23+680], %rs1224;
st.local.u16 [%rd23+682], %rs1224;
st.local.u16 [%rd23+684], %rs1224;
st.local.u16 [%rd23+686], %rs1224;
st.local.u16 [%rd23+688], %rs1224;
st.local.u16 [%rd23+690], %rs1224;
st.local.u16 [%rd23+692], %rs1224;
st.local.u16 [%rd23+694], %rs1224;
st.local.u16 [%rd23+696], %rs1224;
st.local.u16 [%rd23+698], %rs1224;
st.local.u16 [%rd23+700], %rs1224;
st.local.u16 [%rd23+702], %rs1224;
st.local.u16 [%rd23+704], %rs1224;
st.local.u16 [%rd23+706], %rs1224;
st.local.u16 [%rd23+708], %rs1224;
st.local.u16 [%rd23+710], %rs1224;
st.local.u16 [%rd23+712], %rs1224;
st.local.u16 [%rd23+714], %rs1224;
st.local.u16 [%rd23+716], %rs1224;
st.local.u16 [%rd23+718], %rs1224;
st.local.u16 [%rd23+720], %rs1224;
st.local.u16 [%rd23+722], %rs1224;
st.local.u16 [%rd23+724], %rs1224;
st.local.u16 [%rd23+726], %rs1224;
st.local.u16 [%rd23+728], %rs1224;
st.local.u16 [%rd23+730], %rs1224;
st.local.u16 [%rd23+732], %rs1224;
st.local.u16 [%rd23+734], %rs1224;
st.local.u16 [%rd23+736], %rs1224;
st.local.u16 [%rd23+738], %rs1224;
st.local.u16 [%rd23+740], %rs1224;
st.local.u16 [%rd23+742], %rs1224;
st.local.u16 [%rd23+744], %rs1224;
st.local.u16 [%rd23+746], %rs1224;
st.local.u16 [%rd23+748], %rs1224;
st.local.u16 [%rd23+750], %rs1224;
st.local.u16 [%rd23+752], %rs1224;
st.local.u16 [%rd23+754], %rs1224;
st.local.u16 [%rd23+756], %rs1224;
st.local.u16 [%rd23+758], %rs1224;
st.local.u16 [%rd23+760], %rs1224;
st.local.u16 [%rd23+762], %rs1224;
st.local.u16 [%rd23+764], %rs1224;
st.local.u16 [%rd23+766], %rs1224;
st.local.u16 [%rd23+768], %rs1224;
st.local.u16 [%rd23+770], %rs1224;
st.local.u16 [%rd23+772], %rs1224;
st.local.u16 [%rd23+774], %rs1224;
st.local.u16 [%rd23+776], %rs1224;
st.local.u16 [%rd23+778], %rs1224;
st.local.u16 [%rd23+780], %rs1224;
st.local.u16 [%rd23+782], %rs1224;
st.local.u16 [%rd23+784], %rs1224;
st.local.u16 [%rd23+786], %rs1224;
st.local.u16 [%rd23+788], %rs1224;
st.local.u16 [%rd23+790], %rs1224;
st.local.u16 [%rd23+792], %rs1224;
st.local.u16 [%rd23+794], %rs1224;
st.local.u16 [%rd23+796], %rs1224;
st.local.u16 [%rd23+798], %rs1224;
st.local.u16 [%rd23+800], %rs1224;
st.local.u16 [%rd23+802], %rs1224;
st.local.u16 [%rd23+804], %rs1224;
st.local.u16 [%rd23+806], %rs1224;
st.local.u16 [%rd23+808], %rs1224;
st.local.u16 [%rd23+810], %rs1224;
st.local.u16 [%rd23+812], %rs1224;
st.local.u16 [%rd23+814], %rs1224;
st.local.u16 [%rd23+816], %rs1224;
st.local.u16 [%rd23+818], %rs1224;
st.local.u16 [%rd23+820], %rs1224;
st.local.u16 [%rd23+822], %rs1224;
st.local.u16 [%rd23+824], %rs1224;
st.local.u16 [%rd23+826], %rs1224;
st.local.u16 [%rd23+828], %rs1224;
st.local.u16 [%rd23+830], %rs1224;
st.local.u16 [%rd23+832], %rs1224;
st.local.u16 [%rd23+834], %rs1224;
st.local.u16 [%rd23+836], %rs1224;
st.local.u16 [%rd23+838], %rs1224;
st.local.u16 [%rd23+840], %rs1224;
st.local.u16 [%rd23+842], %rs1224;
st.local.u16 [%rd23+844], %rs1224;
st.local.u16 [%rd23+846], %rs1224;
st.local.u16 [%rd23+848], %rs1224;
st.local.u16 [%rd23+850], %rs1224;
st.local.u16 [%rd23+852], %rs1224;
st.local.u16 [%rd23+854], %rs1224;
st.local.u16 [%rd23+856], %rs1224;
st.local.u16 [%rd23+858], %rs1224;
st.local.u16 [%rd23+860], %rs1224;
st.local.u16 [%rd23+862], %rs1224;
st.local.u16 [%rd23+864], %rs1224;
st.local.u16 [%rd23+866], %rs1224;
st.local.u16 [%rd23+868], %rs1224;
st.local.u16 [%rd23+870], %rs1224;
st.local.u16 [%rd23+872], %rs1224;
st.local.u16 [%rd23+874], %rs1224;
st.local.u16 [%rd23+876], %rs1224;
st.local.u16 [%rd23+878], %rs1224;
st.local.u16 [%rd23+880], %rs1224;
st.local.u16 [%rd23+882], %rs1224;
st.local.u16 [%rd23+884], %rs1224;
st.local.u16 [%rd23+886], %rs1224;
st.local.u16 [%rd23+888], %rs1224;
st.local.u16 [%rd23+890], %rs1224;
st.local.u16 [%rd23+892], %rs1224;
st.local.u16 [%rd23+894], %rs1224;
st.local.u16 [%rd23+896], %rs1224;
st.local.u16 [%rd23+898], %rs1224;
st.local.u16 [%rd23+900], %rs1224;
st.local.u16 [%rd23+902], %rs1224;
st.local.u16 [%rd23+904], %rs1224;
st.local.u16 [%rd23+906], %rs1224;
st.local.u16 [%rd23+908], %rs1224;
st.local.u16 [%rd23+910], %rs1224;
st.local.u16 [%rd23+912], %rs1224;
st.local.u16 [%rd23+914], %rs1224;
st.local.u16 [%rd23+916], %rs1224;
st.local.u16 [%rd23+918], %rs1224;
st.local.u16 [%rd23+920], %rs1224;
st.local.u16 [%rd23+922], %rs1224;
st.local.u16 [%rd23+924], %rs1224;
st.local.u16 [%rd23+926], %rs1224;
st.local.u16 [%rd23+928], %rs1224;
st.local.u16 [%rd23+930], %rs1224;
st.local.u16 [%rd23+932], %rs1224;
st.local.u16 [%rd23+934], %rs1224;
st.local.u16 [%rd23+936], %rs1224;
st.local.u16 [%rd23+938], %rs1224;
st.local.u16 [%rd23+940], %rs1224;
st.local.u16 [%rd23+942], %rs1224;
st.local.u16 [%rd23+944], %rs1224;
st.local.u16 [%rd23+946], %rs1224;
st.local.u16 [%rd23+948], %rs1224;
st.local.u16 [%rd23+950], %rs1224;
st.local.u16 [%rd23+952], %rs1224;
st.local.u16 [%rd23+954], %rs1224;
st.local.u16 [%rd23+956], %rs1224;
st.local.u16 [%rd23+958], %rs1224;
st.local.u16 [%rd23+960], %rs1224;
st.local.u16 [%rd23+962], %rs1224;
st.local.u16 [%rd23+964], %rs1224;
st.local.u16 [%rd23+966], %rs1224;
st.local.u16 [%rd23+968], %rs1224;
st.local.u16 [%rd23+970], %rs1224;
st.local.u16 [%rd23+972], %rs1224;
st.local.u16 [%rd23+974], %rs1224;
st.local.u16 [%rd23+976], %rs1224;
st.local.u16 [%rd23+978], %rs1224;
st.local.u16 [%rd23+980], %rs1224;
st.local.u16 [%rd23+982], %rs1224;
st.local.u16 [%rd23+984], %rs1224;
st.local.u16 [%rd23+986], %rs1224;
st.local.u16 [%rd23+988], %rs1224;
st.local.u16 [%rd23+990], %rs1224;
st.local.u16 [%rd23+992], %rs1224;
st.local.u16 [%rd23+994], %rs1224;
st.local.u16 [%rd23+996], %rs1224;
st.local.u16 [%rd23+998], %rs1224;
st.local.u16 [%rd23+1000], %rs1224;
st.local.u16 [%rd23+1002], %rs1224;
st.local.u16 [%rd23+1004], %rs1224;
st.local.u16 [%rd23+1006], %rs1224;
st.local.u16 [%rd23+1008], %rs1224;
st.local.u16 [%rd23+1010], %rs1224;
st.local.u16 [%rd23+1012], %rs1224;
st.local.u16 [%rd23+1014], %rs1224;
st.local.u16 [%rd23+1016], %rs1224;
st.local.u16 [%rd23+1018], %rs1224;
st.local.u16 [%rd23+1020], %rs1224;
st.local.u16 [%rd23+1022], %rs1224;
st.local.u16 [%rd23+1024], %rs1224;
mov.u32 %r9445, 0;
mov.u32 %r9555, %r9445;
mov.u32 %r9551, %r9445;
mov.u32 %r9553, %r9445;
$L__BB2_687:
@%p10 bra $L__BB2_926;
sub.s32 %r5716, %r4058, %r9445;
add.s32 %r1743, %r9445, 4;
mul.lo.s32 %r1744, %r1743, %r4055;
add.s32 %r1745, %r9445, 5;
add.s32 %r1746, %r1744, %r4055;
add.s32 %r1747, %r9445, 6;
shl.b32 %r5717, %r4055, 1;
add.s32 %r1748, %r1744, %r5717;
add.s32 %r1749, %r9445, 7;
mul.lo.s32 %r5718, %r4055, 3;
add.s32 %r1750, %r1744, %r5718;
add.s32 %r1751, %r9445, 1;
add.s32 %r1752, %r9445, 2;
add.s32 %r1753, %r9445, 3;
mul.lo.s32 %r1754, %r9445, %r4055;
add.s32 %r1755, %r1754, %r5718;
sub.s32 %r1756, %r1755, %r4055;
sub.s32 %r1757, %r1756, %r4055;
setp.lt.u32 %p793, %r5716, 2;
selp.b32 %r5719, 4369, 13107, %p793;
setp.lt.u32 %p794, %r5716, 3;
selp.b32 %r5720, %r5719, 30583, %p794;
setp.lt.u32 %p795, %r5716, 4;
selp.b32 %r1758, %r5720, 65535, %p795;
mov.u32 %r5715, 0;
mov.u32 %r9449, %r5715;
mov.u32 %r9450, %r5715;
$L__BB2_689:
shr.u32 %r5722, %r9449, 2;
mul.wide.u32 %rd451, %r5722, 2;
add.s64 %rd25, %rd23, %rd451;
ld.local.u16 %rs238, [%rd25];
ld.local.u16 %rs239, [%rd25+2];
setp.ge.u32 %p796, %r9449, %r4057;
mov.u32 %r9461, %r5715;
@%p796 bra $L__BB2_698;
setp.ge.u32 %p797, %r1743, %r4058;
mov.u32 %r9461, 0;
@%p797 bra $L__BB2_692;
add.s32 %r5724, %r1744, %r9449;
cvt.u64.u32 %rd452, %r5724;
add.s64 %rd453, %rd452, %rd4;
shl.b64 %rd454, %rd453, 2;
add.s64 %rd455, %rd3, %rd454;
ld.global.u32 %r5725, [%rd455];
abs.s32 %r5726, %r5725;
setp.gt.u32 %p798, %r5726, 4;
and.b32 %r5727, %r5726, 1;
setp.eq.b32 %p799, %r5727, 1;
and.pred %p800, %p798, %p799;
selp.u32 %r9461, 1, 0, %p800;
$L__BB2_692:
setp.ge.u32 %p801, %r1745, %r4058;
@%p801 bra $L__BB2_694;
add.s32 %r5728, %r1746, %r9449;
cvt.u64.u32 %rd456, %r5728;
add.s64 %rd457, %rd456, %rd4;
shl.b64 %rd458, %rd457, 2;
add.s64 %rd459, %rd3, %rd458;
ld.global.u32 %r5729, [%rd459];
abs.s32 %r5730, %r5729;
setp.gt.u32 %p802, %r5730, 4;
and.b32 %r5731, %r5730, 1;
setp.eq.b32 %p803, %r5731, 1;
and.pred %p804, %p802, %p803;
selp.b32 %r5732, 2, 0, %p804;
or.b32 %r9461, %r5732, %r9461;
$L__BB2_694:
setp.ge.u32 %p805, %r1747, %r4058;
@%p805 bra $L__BB2_696;
add.s32 %r5733, %r1748, %r9449;
cvt.u64.u32 %rd460, %r5733;
add.s64 %rd461, %rd460, %rd4;
shl.b64 %rd462, %rd461, 2;
add.s64 %rd463, %rd3, %rd462;
ld.global.u32 %r5734, [%rd463];
abs.s32 %r5735, %r5734;
setp.gt.u32 %p806, %r5735, 4;
and.b32 %r5736, %r5735, 1;
setp.eq.b32 %p807, %r5736, 1;
and.pred %p808, %p806, %p807;
selp.b32 %r5737, 4, 0, %p808;
or.b32 %r9461, %r5737, %r9461;
$L__BB2_696:
setp.ge.u32 %p809, %r1749, %r4058;
@%p809 bra $L__BB2_698;
add.s32 %r5738, %r1750, %r9449;
cvt.u64.u32 %rd464, %r5738;
add.s64 %rd465, %rd464, %rd4;
shl.b64 %rd466, %rd465, 2;
add.s64 %rd467, %rd3, %rd466;
ld.global.u32 %r5739, [%rd467];
abs.s32 %r5740, %r5739;
setp.gt.u32 %p810, %r5740, 4;
and.b32 %r5741, %r5740, 1;
setp.eq.b32 %p811, %r5741, 1;
and.pred %p812, %p810, %p811;
selp.b32 %r5742, 8, 0, %p812;
or.b32 %r9461, %r5742, %r9461;
$L__BB2_698:
add.s32 %r1772, %r9449, 1;
setp.ge.u32 %p813, %r1772, %r4057;
@%p813 bra $L__BB2_707;
setp.ge.u32 %p814, %r1743, %r4058;
@%p814 bra $L__BB2_701;
add.s32 %r5743, %r1744, %r1772;
cvt.u64.u32 %rd468, %r5743;
add.s64 %rd469, %rd468, %rd4;
shl.b64 %rd470, %rd469, 2;
add.s64 %rd471, %rd3, %rd470;
ld.global.u32 %r5744, [%rd471];
abs.s32 %r5745, %r5744;
setp.gt.u32 %p815, %r5745, 4;
and.b32 %r5746, %r5745, 1;
setp.eq.b32 %p816, %r5746, 1;
and.pred %p817, %p815, %p816;
selp.b32 %r5747, 16, 0, %p817;
or.b32 %r9461, %r5747, %r9461;
$L__BB2_701:
setp.ge.u32 %p818, %r1745, %r4058;
@%p818 bra $L__BB2_703;
add.s32 %r5748, %r1746, %r1772;
cvt.u64.u32 %rd472, %r5748;
add.s64 %rd473, %rd472, %rd4;
shl.b64 %rd474, %rd473, 2;
add.s64 %rd475, %rd3, %rd474;
ld.global.u32 %r5749, [%rd475];
abs.s32 %r5750, %r5749;
setp.gt.u32 %p819, %r5750, 4;
and.b32 %r5751, %r5750, 1;
setp.eq.b32 %p820, %r5751, 1;
and.pred %p821, %p819, %p820;
selp.b32 %r5752, 32, 0, %p821;
or.b32 %r9461, %r5752, %r9461;
$L__BB2_703:
setp.ge.u32 %p822, %r1747, %r4058;
@%p822 bra $L__BB2_705;
add.s32 %r5753, %r1748, %r1772;
cvt.u64.u32 %rd476, %r5753;
add.s64 %rd477, %rd476, %rd4;
shl.b64 %rd478, %rd477, 2;
add.s64 %rd479, %rd3, %rd478;
ld.global.u32 %r5754, [%rd479];
abs.s32 %r5755, %r5754;
setp.gt.u32 %p823, %r5755, 4;
and.b32 %r5756, %r5755, 1;
setp.eq.b32 %p824, %r5756, 1;
and.pred %p825, %p823, %p824;
selp.b32 %r5757, 64, 0, %p825;
or.b32 %r9461, %r5757, %r9461;
$L__BB2_705:
setp.ge.u32 %p826, %r1749, %r4058;
@%p826 bra $L__BB2_707;
add.s32 %r5758, %r1750, %r1772;
cvt.u64.u32 %rd480, %r5758;
add.s64 %rd481, %rd480, %rd4;
shl.b64 %rd482, %rd481, 2;
add.s64 %rd483, %rd3, %rd482;
ld.global.u32 %r5759, [%rd483];
abs.s32 %r5760, %r5759;
setp.gt.u32 %p827, %r5760, 4;
and.b32 %r5761, %r5760, 1;
setp.eq.b32 %p828, %r5761, 1;
and.pred %p829, %p827, %p828;
selp.b32 %r5762, 128, 0, %p829;
or.b32 %r9461, %r5762, %r9461;
$L__BB2_707:
add.s32 %r1781, %r9449, 2;
setp.ge.u32 %p830, %r1781, %r4057;
@%p830 bra $L__BB2_716;
setp.ge.u32 %p831, %r1743, %r4058;
@%p831 bra $L__BB2_710;
add.s32 %r5763, %r1744, %r1781;
cvt.u64.u32 %rd484, %r5763;
add.s64 %rd485, %rd484, %rd4;
shl.b64 %rd486, %rd485, 2;
add.s64 %rd487, %rd3, %rd486;
ld.global.u32 %r5764, [%rd487];
abs.s32 %r5765, %r5764;
setp.gt.u32 %p832, %r5765, 4;
and.b32 %r5766, %r5765, 1;
setp.eq.b32 %p833, %r5766, 1;
and.pred %p834, %p832, %p833;
selp.b32 %r5767, 256, 0, %p834;
or.b32 %r9461, %r5767, %r9461;
$L__BB2_710:
setp.ge.u32 %p835, %r1745, %r4058;
@%p835 bra $L__BB2_712;
add.s32 %r5768, %r1746, %r1781;
cvt.u64.u32 %rd488, %r5768;
add.s64 %rd489, %rd488, %rd4;
shl.b64 %rd490, %rd489, 2;
add.s64 %rd491, %rd3, %rd490;
ld.global.u32 %r5769, [%rd491];
abs.s32 %r5770, %r5769;
setp.gt.u32 %p836, %r5770, 4;
and.b32 %r5771, %r5770, 1;
setp.eq.b32 %p837, %r5771, 1;
and.pred %p838, %p836, %p837;
selp.b32 %r5772, 512, 0, %p838;
or.b32 %r9461, %r5772, %r9461;
$L__BB2_712:
setp.ge.u32 %p839, %r1747, %r4058;
@%p839 bra $L__BB2_714;
add.s32 %r5773, %r1748, %r1781;
cvt.u64.u32 %rd492, %r5773;
add.s64 %rd493, %rd492, %rd4;
shl.b64 %rd494, %rd493, 2;
add.s64 %rd495, %rd3, %rd494;
ld.global.u32 %r5774, [%rd495];
abs.s32 %r5775, %r5774;
setp.gt.u32 %p840, %r5775, 4;
and.b32 %r5776, %r5775, 1;
setp.eq.b32 %p841, %r5776, 1;
and.pred %p842, %p840, %p841;
selp.b32 %r5777, 1024, 0, %p842;
or.b32 %r9461, %r5777, %r9461;
$L__BB2_714:
setp.ge.u32 %p843, %r1749, %r4058;
@%p843 bra $L__BB2_716;
add.s32 %r5778, %r1750, %r1781;
cvt.u64.u32 %rd496, %r5778;
add.s64 %rd497, %rd496, %rd4;
shl.b64 %rd498, %rd497, 2;
add.s64 %rd499, %rd3, %rd498;
ld.global.u32 %r5779, [%rd499];
abs.s32 %r5780, %r5779;
setp.gt.u32 %p844, %r5780, 4;
and.b32 %r5781, %r5780, 1;
setp.eq.b32 %p845, %r5781, 1;
and.pred %p846, %p844, %p845;
selp.b32 %r5782, 2048, 0, %p846;
or.b32 %r9461, %r5782, %r9461;
$L__BB2_716:
add.s32 %r1790, %r9449, 3;
setp.ge.u32 %p847, %r1790, %r4057;
@%p847 bra $L__BB2_725;
setp.ge.u32 %p848, %r1743, %r4058;
@%p848 bra $L__BB2_719;
add.s32 %r5783, %r1744, %r1790;
cvt.u64.u32 %rd500, %r5783;
add.s64 %rd501, %rd500, %rd4;
shl.b64 %rd502, %rd501, 2;
add.s64 %rd503, %rd3, %rd502;
ld.global.u32 %r5784, [%rd503];
abs.s32 %r5785, %r5784;
setp.gt.u32 %p849, %r5785, 4;
and.b32 %r5786, %r5785, 1;
setp.eq.b32 %p850, %r5786, 1;
and.pred %p851, %p849, %p850;
selp.b32 %r5787, 4096, 0, %p851;
or.b32 %r9461, %r5787, %r9461;
$L__BB2_719:
setp.ge.u32 %p852, %r1745, %r4058;
@%p852 bra $L__BB2_721;
add.s32 %r5788, %r1746, %r1790;
cvt.u64.u32 %rd504, %r5788;
add.s64 %rd505, %rd504, %rd4;
shl.b64 %rd506, %rd505, 2;
add.s64 %rd507, %rd3, %rd506;
ld.global.u32 %r5789, [%rd507];
abs.s32 %r5790, %r5789;
setp.gt.u32 %p853, %r5790, 4;
and.b32 %r5791, %r5790, 1;
setp.eq.b32 %p854, %r5791, 1;
and.pred %p855, %p853, %p854;
selp.b32 %r5792, 8192, 0, %p855;
or.b32 %r9461, %r5792, %r9461;
$L__BB2_721:
setp.ge.u32 %p856, %r1747, %r4058;
@%p856 bra $L__BB2_723;
add.s32 %r5793, %r1748, %r1790;
cvt.u64.u32 %rd508, %r5793;
add.s64 %rd509, %rd508, %rd4;
shl.b64 %rd510, %rd509, 2;
add.s64 %rd511, %rd3, %rd510;
ld.global.u32 %r5794, [%rd511];
abs.s32 %r5795, %r5794;
setp.gt.u32 %p857, %r5795, 4;
and.b32 %r5796, %r5795, 1;
setp.eq.b32 %p858, %r5796, 1;
and.pred %p859, %p857, %p858;
selp.b32 %r5797, 16384, 0, %p859;
or.b32 %r9461, %r5797, %r9461;
$L__BB2_723:
setp.ge.u32 %p860, %r1749, %r4058;
@%p860 bra $L__BB2_725;
add.s32 %r5798, %r1750, %r1790;
cvt.u64.u32 %rd512, %r5798;
add.s64 %rd513, %rd512, %rd4;
shl.b64 %rd514, %rd513, 2;
add.s64 %rd515, %rd3, %rd514;
ld.global.u32 %r5799, [%rd515];
abs.s32 %r5800, %r5799;
setp.gt.u32 %p861, %r5800, 4;
and.b32 %r5801, %r5800, 1;
setp.eq.b32 %p862, %r5801, 1;
and.pred %p863, %p861, %p862;
selp.b32 %r5802, 32768, 0, %p863;
or.b32 %r9461, %r5802, %r9461;
$L__BB2_725:
add.s32 %r5804, %r9449, 4;
setp.ge.u32 %p864, %r5804, %r4057;
mov.u32 %r9477, 0;
@%p864 bra $L__BB2_734;
setp.ge.u32 %p865, %r1743, %r4058;
mov.u32 %r9477, 0;
@%p865 bra $L__BB2_728;
add.s32 %r5806, %r1744, %r9449;
add.s32 %r5807, %r5806, 4;
cvt.u64.u32 %rd516, %r5807;
add.s64 %rd517, %rd516, %rd4;
shl.b64 %rd518, %rd517, 2;
add.s64 %rd519, %rd3, %rd518;
ld.global.u32 %r5808, [%rd519];
abs.s32 %r5809, %r5808;
setp.gt.u32 %p866, %r5809, 4;
and.b32 %r5810, %r5809, 1;
setp.eq.b32 %p867, %r5810, 1;
and.pred %p868, %p866, %p867;
selp.u32 %r9477, 1, 0, %p868;
$L__BB2_728:
setp.ge.u32 %p869, %r1745, %r4058;
@%p869 bra $L__BB2_730;
add.s32 %r5811, %r1746, %r9449;
add.s32 %r5812, %r5811, 4;
cvt.u64.u32 %rd520, %r5812;
add.s64 %rd521, %rd520, %rd4;
shl.b64 %rd522, %rd521, 2;
add.s64 %rd523, %rd3, %rd522;
ld.global.u32 %r5813, [%rd523];
abs.s32 %r5814, %r5813;
setp.gt.u32 %p870, %r5814, 4;
and.b32 %r5815, %r5814, 1;
setp.eq.b32 %p871, %r5815, 1;
and.pred %p872, %p870, %p871;
selp.b32 %r5816, 2, 0, %p872;
or.b32 %r9477, %r5816, %r9477;
$L__BB2_730:
setp.ge.u32 %p873, %r1747, %r4058;
@%p873 bra $L__BB2_732;
add.s32 %r5817, %r1748, %r9449;
add.s32 %r5818, %r5817, 4;
cvt.u64.u32 %rd524, %r5818;
add.s64 %rd525, %rd524, %rd4;
shl.b64 %rd526, %rd525, 2;
add.s64 %rd527, %rd3, %rd526;
ld.global.u32 %r5819, [%rd527];
abs.s32 %r5820, %r5819;
setp.gt.u32 %p874, %r5820, 4;
and.b32 %r5821, %r5820, 1;
setp.eq.b32 %p875, %r5821, 1;
and.pred %p876, %p874, %p875;
selp.b32 %r5822, 4, 0, %p876;
or.b32 %r9477, %r5822, %r9477;
$L__BB2_732:
setp.ge.u32 %p877, %r1749, %r4058;
@%p877 bra $L__BB2_734;
add.s32 %r5823, %r1750, %r9449;
add.s32 %r5824, %r5823, 4;
cvt.u64.u32 %rd528, %r5824;
add.s64 %rd529, %rd528, %rd4;
shl.b64 %rd530, %rd529, 2;
add.s64 %rd531, %rd3, %rd530;
ld.global.u32 %r5825, [%rd531];
abs.s32 %r5826, %r5825;
setp.gt.u32 %p878, %r5826, 4;
and.b32 %r5827, %r5826, 1;
setp.eq.b32 %p879, %r5827, 1;
and.pred %p880, %p878, %p879;
selp.b32 %r5828, 8, 0, %p880;
or.b32 %r9477, %r5828, %r9477;
$L__BB2_734:
add.s32 %r1807, %r9449, 5;
setp.ge.u32 %p881, %r1807, %r4057;
@%p881 bra $L__BB2_743;
setp.ge.u32 %p882, %r1743, %r4058;
@%p882 bra $L__BB2_737;
add.s32 %r5829, %r1744, %r1807;
cvt.u64.u32 %rd532, %r5829;
add.s64 %rd533, %rd532, %rd4;
shl.b64 %rd534, %rd533, 2;
add.s64 %rd535, %rd3, %rd534;
ld.global.u32 %r5830, [%rd535];
abs.s32 %r5831, %r5830;
setp.gt.u32 %p883, %r5831, 4;
and.b32 %r5832, %r5831, 1;
setp.eq.b32 %p884, %r5832, 1;
and.pred %p885, %p883, %p884;
selp.b32 %r5833, 16, 0, %p885;
or.b32 %r9477, %r5833, %r9477;
$L__BB2_737:
setp.ge.u32 %p886, %r1745, %r4058;
@%p886 bra $L__BB2_739;
add.s32 %r5834, %r1746, %r1807;
cvt.u64.u32 %rd536, %r5834;
add.s64 %rd537, %rd536, %rd4;
shl.b64 %rd538, %rd537, 2;
add.s64 %rd539, %rd3, %rd538;
ld.global.u32 %r5835, [%rd539];
abs.s32 %r5836, %r5835;
setp.gt.u32 %p887, %r5836, 4;
and.b32 %r5837, %r5836, 1;
setp.eq.b32 %p888, %r5837, 1;
and.pred %p889, %p887, %p888;
selp.b32 %r5838, 32, 0, %p889;
or.b32 %r9477, %r5838, %r9477;
$L__BB2_739:
setp.ge.u32 %p890, %r1747, %r4058;
@%p890 bra $L__BB2_741;
add.s32 %r5839, %r1748, %r1807;
cvt.u64.u32 %rd540, %r5839;
add.s64 %rd541, %rd540, %rd4;
shl.b64 %rd542, %rd541, 2;
add.s64 %rd543, %rd3, %rd542;
ld.global.u32 %r5840, [%rd543];
abs.s32 %r5841, %r5840;
setp.gt.u32 %p891, %r5841, 4;
and.b32 %r5842, %r5841, 1;
setp.eq.b32 %p892, %r5842, 1;
and.pred %p893, %p891, %p892;
selp.b32 %r5843, 64, 0, %p893;
or.b32 %r9477, %r5843, %r9477;
$L__BB2_741:
setp.ge.u32 %p894, %r1749, %r4058;
@%p894 bra $L__BB2_743;
add.s32 %r5844, %r1750, %r1807;
cvt.u64.u32 %rd544, %r5844;
add.s64 %rd545, %rd544, %rd4;
shl.b64 %rd546, %rd545, 2;
add.s64 %rd547, %rd3, %rd546;
ld.global.u32 %r5845, [%rd547];
abs.s32 %r5846, %r5845;
setp.gt.u32 %p895, %r5846, 4;
and.b32 %r5847, %r5846, 1;
setp.eq.b32 %p896, %r5847, 1;
and.pred %p897, %p895, %p896;
selp.b32 %r5848, 128, 0, %p897;
or.b32 %r9477, %r5848, %r9477;
$L__BB2_743:
add.s32 %r1816, %r9449, 6;
setp.ge.u32 %p898, %r1816, %r4057;
@%p898 bra $L__BB2_752;
setp.ge.u32 %p899, %r1743, %r4058;
@%p899 bra $L__BB2_746;
add.s32 %r5849, %r1744, %r1816;
cvt.u64.u32 %rd548, %r5849;
add.s64 %rd549, %rd548, %rd4;
shl.b64 %rd550, %rd549, 2;
add.s64 %rd551, %rd3, %rd550;
ld.global.u32 %r5850, [%rd551];
abs.s32 %r5851, %r5850;
setp.gt.u32 %p900, %r5851, 4;
and.b32 %r5852, %r5851, 1;
setp.eq.b32 %p901, %r5852, 1;
and.pred %p902, %p900, %p901;
selp.b32 %r5853, 256, 0, %p902;
or.b32 %r9477, %r5853, %r9477;
$L__BB2_746:
setp.ge.u32 %p903, %r1745, %r4058;
@%p903 bra $L__BB2_748;
add.s32 %r5854, %r1746, %r1816;
cvt.u64.u32 %rd552, %r5854;
add.s64 %rd553, %rd552, %rd4;
shl.b64 %rd554, %rd553, 2;
add.s64 %rd555, %rd3, %rd554;
ld.global.u32 %r5855, [%rd555];
abs.s32 %r5856, %r5855;
setp.gt.u32 %p904, %r5856, 4;
and.b32 %r5857, %r5856, 1;
setp.eq.b32 %p905, %r5857, 1;
and.pred %p906, %p904, %p905;
selp.b32 %r5858, 512, 0, %p906;
or.b32 %r9477, %r5858, %r9477;
$L__BB2_748:
setp.ge.u32 %p907, %r1747, %r4058;
@%p907 bra $L__BB2_750;
add.s32 %r5859, %r1748, %r1816;
cvt.u64.u32 %rd556, %r5859;
add.s64 %rd557, %rd556, %rd4;
shl.b64 %rd558, %rd557, 2;
add.s64 %rd559, %rd3, %rd558;
ld.global.u32 %r5860, [%rd559];
abs.s32 %r5861, %r5860;
setp.gt.u32 %p908, %r5861, 4;
and.b32 %r5862, %r5861, 1;
setp.eq.b32 %p909, %r5862, 1;
and.pred %p910, %p908, %p909;
selp.b32 %r5863, 1024, 0, %p910;
or.b32 %r9477, %r5863, %r9477;
$L__BB2_750:
setp.ge.u32 %p911, %r1749, %r4058;
@%p911 bra $L__BB2_752;
add.s32 %r5864, %r1750, %r1816;
cvt.u64.u32 %rd560, %r5864;
add.s64 %rd561, %rd560, %rd4;
shl.b64 %rd562, %rd561, 2;
add.s64 %rd563, %rd3, %rd562;
ld.global.u32 %r5865, [%rd563];
abs.s32 %r5866, %r5865;
setp.gt.u32 %p912, %r5866, 4;
and.b32 %r5867, %r5866, 1;
setp.eq.b32 %p913, %r5867, 1;
and.pred %p914, %p912, %p913;
selp.b32 %r5868, 2048, 0, %p914;
or.b32 %r9477, %r5868, %r9477;
$L__BB2_752:
add.s32 %r1825, %r9449, 7;
setp.ge.u32 %p915, %r1825, %r4057;
@%p915 bra $L__BB2_761;
setp.ge.u32 %p916, %r1743, %r4058;
@%p916 bra $L__BB2_755;
add.s32 %r5869, %r1744, %r1825;
cvt.u64.u32 %rd564, %r5869;
add.s64 %rd565, %rd564, %rd4;
shl.b64 %rd566, %rd565, 2;
add.s64 %rd567, %rd3, %rd566;
ld.global.u32 %r5870, [%rd567];
abs.s32 %r5871, %r5870;
setp.gt.u32 %p917, %r5871, 4;
and.b32 %r5872, %r5871, 1;
setp.eq.b32 %p918, %r5872, 1;
and.pred %p919, %p917, %p918;
selp.b32 %r5873, 4096, 0, %p919;
or.b32 %r9477, %r5873, %r9477;
$L__BB2_755:
setp.ge.u32 %p920, %r1745, %r4058;
@%p920 bra $L__BB2_757;
add.s32 %r5874, %r1746, %r1825;
cvt.u64.u32 %rd568, %r5874;
add.s64 %rd569, %rd568, %rd4;
shl.b64 %rd570, %rd569, 2;
add.s64 %rd571, %rd3, %rd570;
ld.global.u32 %r5875, [%rd571];
abs.s32 %r5876, %r5875;
setp.gt.u32 %p921, %r5876, 4;
and.b32 %r5877, %r5876, 1;
setp.eq.b32 %p922, %r5877, 1;
and.pred %p923, %p921, %p922;
selp.b32 %r5878, 8192, 0, %p923;
or.b32 %r9477, %r5878, %r9477;
$L__BB2_757:
setp.ge.u32 %p924, %r1747, %r4058;
@%p924 bra $L__BB2_759;
add.s32 %r5879, %r1748, %r1825;
cvt.u64.u32 %rd572, %r5879;
add.s64 %rd573, %rd572, %rd4;
shl.b64 %rd574, %rd573, 2;
add.s64 %rd575, %rd3, %rd574;
ld.global.u32 %r5880, [%rd575];
abs.s32 %r5881, %r5880;
setp.gt.u32 %p925, %r5881, 4;
and.b32 %r5882, %r5881, 1;
setp.eq.b32 %p926, %r5882, 1;
and.pred %p927, %p925, %p926;
selp.b32 %r5883, 16384, 0, %p927;
or.b32 %r9477, %r5883, %r9477;
$L__BB2_759:
setp.ge.u32 %p928, %r1749, %r4058;
@%p928 bra $L__BB2_761;
add.s32 %r5884, %r1750, %r1825;
cvt.u64.u32 %rd576, %r5884;
add.s64 %rd577, %rd576, %rd4;
shl.b64 %rd578, %rd577, 2;
add.s64 %rd579, %rd3, %rd578;
ld.global.u32 %r5885, [%rd579];
abs.s32 %r5886, %r5885;
setp.gt.u32 %p929, %r5886, 4;
and.b32 %r5887, %r5886, 1;
setp.eq.b32 %p930, %r5887, 1;
and.pred %p931, %p929, %p930;
selp.b32 %r5888, 32768, 0, %p931;
or.b32 %r9477, %r5888, %r9477;
$L__BB2_761:
mov.b32 %r1834, {%rs238, %rs239};
add.s32 %r5890, %r1754, %r9449;
cvt.u64.u32 %rd580, %r5890;
add.s64 %rd581, %rd580, %rd4;
shl.b64 %rd582, %rd581, 2;
add.s64 %rd26, %rd3, %rd582;
add.s32 %r5891, %r1757, %r9449;
cvt.u64.u32 %rd583, %r5891;
add.s64 %rd584, %rd583, %rd4;
shl.b64 %rd585, %rd584, 2;
add.s64 %rd27, %rd3, %rd585;
add.s32 %r5892, %r1756, %r9449;
cvt.u64.u32 %rd586, %r5892;
add.s64 %rd587, %rd586, %rd4;
shl.b64 %rd588, %rd587, 2;
add.s64 %rd28, %rd3, %rd588;
add.s32 %r5893, %r1755, %r9449;
cvt.u64.u32 %rd589, %r5893;
add.s64 %rd590, %rd589, %rd4;
shl.b64 %rd591, %rd590, 2;
add.s64 %rd29, %rd3, %rd591;
mov.u32 %r9493, 0;
@%p796 bra $L__BB2_770;
setp.le.u32 %p933, %r4058, %r9445;
mov.u32 %r9493, 0;
@%p933 bra $L__BB2_764;
ld.global.u32 %r5895, [%rd26];
abs.s32 %r5896, %r5895;
setp.gt.u32 %p934, %r5896, 4;
and.b32 %r5897, %r5896, 1;
setp.eq.b32 %p935, %r5897, 1;
and.pred %p936, %p934, %p935;
selp.u32 %r9493, 1, 0, %p936;
$L__BB2_764:
setp.ge.u32 %p937, %r1751, %r4058;
@%p937 bra $L__BB2_766;
ld.global.u32 %r5898, [%rd27];
abs.s32 %r5899, %r5898;
setp.gt.u32 %p938, %r5899, 4;
and.b32 %r5900, %r5899, 1;
setp.eq.b32 %p939, %r5900, 1;
and.pred %p940, %p938, %p939;
selp.b32 %r5901, 2, 0, %p940;
or.b32 %r9493, %r5901, %r9493;
$L__BB2_766:
setp.ge.u32 %p941, %r1752, %r4058;
@%p941 bra $L__BB2_768;
ld.global.u32 %r5902, [%rd28];
abs.s32 %r5903, %r5902;
setp.gt.u32 %p942, %r5903, 4;
and.b32 %r5904, %r5903, 1;
setp.eq.b32 %p943, %r5904, 1;
and.pred %p944, %p942, %p943;
selp.b32 %r5905, 4, 0, %p944;
or.b32 %r9493, %r5905, %r9493;
$L__BB2_768:
setp.ge.u32 %p945, %r1753, %r4058;
@%p945 bra $L__BB2_770;
ld.global.u32 %r5906, [%rd29];
abs.s32 %r5907, %r5906;
setp.gt.u32 %p946, %r5907, 4;
and.b32 %r5908, %r5907, 1;
setp.eq.b32 %p947, %r5908, 1;
and.pred %p948, %p946, %p947;
selp.b32 %r5909, 8, 0, %p948;
or.b32 %r9493, %r5909, %r9493;
$L__BB2_770:
add.s32 %r5910, %r1754, %r1772;
cvt.u64.u32 %rd592, %r5910;
add.s64 %rd593, %rd592, %rd4;
shl.b64 %rd594, %rd593, 2;
add.s64 %rd30, %rd3, %rd594;
add.s32 %r5911, %r1757, %r1772;
cvt.u64.u32 %rd595, %r5911;
add.s64 %rd596, %rd595, %rd4;
shl.b64 %rd597, %rd596, 2;
add.s64 %rd31, %rd3, %rd597;
add.s32 %r5912, %r1756, %r1772;
cvt.u64.u32 %rd598, %r5912;
add.s64 %rd599, %rd598, %rd4;
shl.b64 %rd600, %rd599, 2;
add.s64 %rd32, %rd3, %rd600;
add.s32 %r5913, %r1755, %r1772;
cvt.u64.u32 %rd601, %r5913;
add.s64 %rd602, %rd601, %rd4;
shl.b64 %rd603, %rd602, 2;
add.s64 %rd33, %rd3, %rd603;
shl.b32 %r5914, %r9477, 16;
or.b32 %r1843, %r5914, %r9461;
@%p813 bra $L__BB2_779;
setp.le.u32 %p950, %r4058, %r9445;
@%p950 bra $L__BB2_773;
ld.global.u32 %r5915, [%rd30];
abs.s32 %r5916, %r5915;
setp.gt.u32 %p951, %r5916, 4;
and.b32 %r5917, %r5916, 1;
setp.eq.b32 %p952, %r5917, 1;
and.pred %p953, %p951, %p952;
selp.b32 %r5918, 16, 0, %p953;
or.b32 %r9493, %r5918, %r9493;
$L__BB2_773:
setp.ge.u32 %p954, %r1751, %r4058;
@%p954 bra $L__BB2_775;
ld.global.u32 %r5919, [%rd31];
abs.s32 %r5920, %r5919;
setp.gt.u32 %p955, %r5920, 4;
and.b32 %r5921, %r5920, 1;
setp.eq.b32 %p956, %r5921, 1;
and.pred %p957, %p955, %p956;
selp.b32 %r5922, 32, 0, %p957;
or.b32 %r9493, %r5922, %r9493;
$L__BB2_775:
setp.ge.u32 %p958, %r1752, %r4058;
@%p958 bra $L__BB2_777;
ld.global.u32 %r5923, [%rd32];
abs.s32 %r5924, %r5923;
setp.gt.u32 %p959, %r5924, 4;
and.b32 %r5925, %r5924, 1;
setp.eq.b32 %p960, %r5925, 1;
and.pred %p961, %p959, %p960;
selp.b32 %r5926, 64, 0, %p961;
or.b32 %r9493, %r5926, %r9493;
$L__BB2_777:
setp.ge.u32 %p962, %r1753, %r4058;
@%p962 bra $L__BB2_779;
ld.global.u32 %r5927, [%rd33];
abs.s32 %r5928, %r5927;
setp.gt.u32 %p963, %r5928, 4;
and.b32 %r5929, %r5928, 1;
setp.eq.b32 %p964, %r5929, 1;
and.pred %p965, %p963, %p964;
selp.b32 %r5930, 128, 0, %p965;
or.b32 %r9493, %r5930, %r9493;
$L__BB2_779:
add.s32 %r5931, %r1754, %r1781;
cvt.u64.u32 %rd604, %r5931;
add.s64 %rd605, %rd604, %rd4;
shl.b64 %rd606, %rd605, 2;
add.s64 %rd34, %rd3, %rd606;
add.s32 %r5932, %r1757, %r1781;
cvt.u64.u32 %rd607, %r5932;
add.s64 %rd608, %rd607, %rd4;
shl.b64 %rd609, %rd608, 2;
add.s64 %rd35, %rd3, %rd609;
add.s32 %r5933, %r1756, %r1781;
cvt.u64.u32 %rd610, %r5933;
add.s64 %rd611, %rd610, %rd4;
shl.b64 %rd612, %rd611, 2;
add.s64 %rd36, %rd3, %rd612;
add.s32 %r5934, %r1755, %r1781;
cvt.u64.u32 %rd613, %r5934;
add.s64 %rd614, %rd613, %rd4;
shl.b64 %rd615, %rd614, 2;
add.s64 %rd37, %rd3, %rd615;
@%p830 bra $L__BB2_788;
setp.le.u32 %p967, %r4058, %r9445;
@%p967 bra $L__BB2_782;
ld.global.u32 %r5935, [%rd34];
abs.s32 %r5936, %r5935;
setp.gt.u32 %p968, %r5936, 4;
and.b32 %r5937, %r5936, 1;
setp.eq.b32 %p969, %r5937, 1;
and.pred %p970, %p968, %p969;
selp.b32 %r5938, 256, 0, %p970;
or.b32 %r9493, %r5938, %r9493;
$L__BB2_782:
setp.ge.u32 %p971, %r1751, %r4058;
@%p971 bra $L__BB2_784;
ld.global.u32 %r5939, [%rd35];
abs.s32 %r5940, %r5939;
setp.gt.u32 %p972, %r5940, 4;
and.b32 %r5941, %r5940, 1;
setp.eq.b32 %p973, %r5941, 1;
and.pred %p974, %p972, %p973;
selp.b32 %r5942, 512, 0, %p974;
or.b32 %r9493, %r5942, %r9493;
$L__BB2_784:
setp.ge.u32 %p975, %r1752, %r4058;
@%p975 bra $L__BB2_786;
ld.global.u32 %r5943, [%rd36];
abs.s32 %r5944, %r5943;
setp.gt.u32 %p976, %r5944, 4;
and.b32 %r5945, %r5944, 1;
setp.eq.b32 %p977, %r5945, 1;
and.pred %p978, %p976, %p977;
selp.b32 %r5946, 1024, 0, %p978;
or.b32 %r9493, %r5946, %r9493;
$L__BB2_786:
setp.ge.u32 %p979, %r1753, %r4058;
@%p979 bra $L__BB2_788;
ld.global.u32 %r5947, [%rd37];
abs.s32 %r5948, %r5947;
setp.gt.u32 %p980, %r5948, 4;
and.b32 %r5949, %r5948, 1;
setp.eq.b32 %p981, %r5949, 1;
and.pred %p982, %p980, %p981;
selp.b32 %r5950, 2048, 0, %p982;
or.b32 %r9493, %r5950, %r9493;
$L__BB2_788:
add.s32 %r5951, %r1754, %r1790;
cvt.u64.u32 %rd616, %r5951;
add.s64 %rd617, %rd616, %rd4;
shl.b64 %rd618, %rd617, 2;
add.s64 %rd38, %rd3, %rd618;
add.s32 %r5952, %r1757, %r1790;
cvt.u64.u32 %rd619, %r5952;
add.s64 %rd620, %rd619, %rd4;
shl.b64 %rd621, %rd620, 2;
add.s64 %rd39, %rd3, %rd621;
add.s32 %r5953, %r1756, %r1790;
cvt.u64.u32 %rd622, %r5953;
add.s64 %rd623, %rd622, %rd4;
shl.b64 %rd624, %rd623, 2;
add.s64 %rd40, %rd3, %rd624;
add.s32 %r5954, %r1755, %r1790;
cvt.u64.u32 %rd625, %r5954;
add.s64 %rd626, %rd625, %rd4;
shl.b64 %rd627, %rd626, 2;
add.s64 %rd41, %rd3, %rd627;
@%p847 bra $L__BB2_797;
setp.le.u32 %p984, %r4058, %r9445;
@%p984 bra $L__BB2_791;
ld.global.u32 %r5955, [%rd38];
abs.s32 %r5956, %r5955;
setp.gt.u32 %p985, %r5956, 4;
and.b32 %r5957, %r5956, 1;
setp.eq.b32 %p986, %r5957, 1;
and.pred %p987, %p985, %p986;
selp.b32 %r5958, 4096, 0, %p987;
or.b32 %r9493, %r5958, %r9493;
$L__BB2_791:
setp.ge.u32 %p988, %r1751, %r4058;
@%p988 bra $L__BB2_793;
ld.global.u32 %r5959, [%rd39];
abs.s32 %r5960, %r5959;
setp.gt.u32 %p989, %r5960, 4;
and.b32 %r5961, %r5960, 1;
setp.eq.b32 %p990, %r5961, 1;
and.pred %p991, %p989, %p990;
selp.b32 %r5962, 8192, 0, %p991;
or.b32 %r9493, %r5962, %r9493;
$L__BB2_793:
setp.ge.u32 %p992, %r1752, %r4058;
@%p992 bra $L__BB2_795;
ld.global.u32 %r5963, [%rd40];
abs.s32 %r5964, %r5963;
setp.gt.u32 %p993, %r5964, 4;
and.b32 %r5965, %r5964, 1;
setp.eq.b32 %p994, %r5965, 1;
and.pred %p995, %p993, %p994;
selp.b32 %r5966, 16384, 0, %p995;
or.b32 %r9493, %r5966, %r9493;
$L__BB2_795:
setp.ge.u32 %p996, %r1753, %r4058;
@%p996 bra $L__BB2_797;
ld.global.u32 %r5967, [%rd41];
abs.s32 %r5968, %r5967;
setp.gt.u32 %p997, %r5968, 4;
and.b32 %r5969, %r5968, 1;
setp.eq.b32 %p998, %r5969, 1;
and.pred %p999, %p997, %p998;
selp.b32 %r5970, 32768, 0, %p999;
or.b32 %r9493, %r5970, %r9493;
$L__BB2_797:
mov.u32 %r9509, 0;
@%p864 bra $L__BB2_806;
setp.le.u32 %p1001, %r4058, %r9445;
mov.u32 %r9509, 0;
@%p1001 bra $L__BB2_800;
add.s32 %r5975, %r5890, 4;
cvt.u64.u32 %rd628, %r5975;
add.s64 %rd629, %rd628, %rd4;
shl.b64 %rd630, %rd629, 2;
add.s64 %rd631, %rd3, %rd630;
ld.global.u32 %r5976, [%rd631];
abs.s32 %r5977, %r5976;
setp.gt.u32 %p1002, %r5977, 4;
and.b32 %r5978, %r5977, 1;
setp.eq.b32 %p1003, %r5978, 1;
and.pred %p1004, %p1002, %p1003;
selp.u32 %r9509, 1, 0, %p1004;
$L__BB2_800:
setp.ge.u32 %p1005, %r1751, %r4058;
@%p1005 bra $L__BB2_802;
add.s32 %r5980, %r5891, 4;
cvt.u64.u32 %rd632, %r5980;
add.s64 %rd633, %rd632, %rd4;
shl.b64 %rd634, %rd633, 2;
add.s64 %rd635, %rd3, %rd634;
ld.global.u32 %r5981, [%rd635];
abs.s32 %r5982, %r5981;
setp.gt.u32 %p1006, %r5982, 4;
and.b32 %r5983, %r5982, 1;
setp.eq.b32 %p1007, %r5983, 1;
and.pred %p1008, %p1006, %p1007;
selp.b32 %r5984, 2, 0, %p1008;
or.b32 %r9509, %r5984, %r9509;
$L__BB2_802:
setp.ge.u32 %p1009, %r1752, %r4058;
@%p1009 bra $L__BB2_804;
add.s32 %r5986, %r5892, 4;
cvt.u64.u32 %rd636, %r5986;
add.s64 %rd637, %rd636, %rd4;
shl.b64 %rd638, %rd637, 2;
add.s64 %rd639, %rd3, %rd638;
ld.global.u32 %r5987, [%rd639];
abs.s32 %r5988, %r5987;
setp.gt.u32 %p1010, %r5988, 4;
and.b32 %r5989, %r5988, 1;
setp.eq.b32 %p1011, %r5989, 1;
and.pred %p1012, %p1010, %p1011;
selp.b32 %r5990, 4, 0, %p1012;
or.b32 %r9509, %r5990, %r9509;
$L__BB2_804:
setp.ge.u32 %p1013, %r1753, %r4058;
@%p1013 bra $L__BB2_806;
add.s32 %r5992, %r5893, 4;
cvt.u64.u32 %rd640, %r5992;
add.s64 %rd641, %rd640, %rd4;
shl.b64 %rd642, %rd641, 2;
add.s64 %rd643, %rd3, %rd642;
ld.global.u32 %r5993, [%rd643];
abs.s32 %r5994, %r5993;
setp.gt.u32 %p1014, %r5994, 4;
and.b32 %r5995, %r5994, 1;
setp.eq.b32 %p1015, %r5995, 1;
and.pred %p1016, %p1014, %p1015;
selp.b32 %r5996, 8, 0, %p1016;
or.b32 %r9509, %r5996, %r9509;
$L__BB2_806:
@%p881 bra $L__BB2_815;
setp.le.u32 %p1018, %r4058, %r9445;
@%p1018 bra $L__BB2_809;
add.s32 %r5997, %r1754, %r1807;
cvt.u64.u32 %rd644, %r5997;
add.s64 %rd645, %rd644, %rd4;
shl.b64 %rd646, %rd645, 2;
add.s64 %rd647, %rd3, %rd646;
ld.global.u32 %r5998, [%rd647];
abs.s32 %r5999, %r5998;
setp.gt.u32 %p1019, %r5999, 4;
and.b32 %r6000, %r5999, 1;
setp.eq.b32 %p1020, %r6000, 1;
and.pred %p1021, %p1019, %p1020;
selp.b32 %r6001, 16, 0, %p1021;
or.b32 %r9509, %r6001, %r9509;
$L__BB2_809:
setp.ge.u32 %p1022, %r1751, %r4058;
@%p1022 bra $L__BB2_811;
add.s32 %r6002, %r1757, %r1807;
cvt.u64.u32 %rd648, %r6002;
add.s64 %rd649, %rd648, %rd4;
shl.b64 %rd650, %rd649, 2;
add.s64 %rd651, %rd3, %rd650;
ld.global.u32 %r6003, [%rd651];
abs.s32 %r6004, %r6003;
setp.gt.u32 %p1023, %r6004, 4;
and.b32 %r6005, %r6004, 1;
setp.eq.b32 %p1024, %r6005, 1;
and.pred %p1025, %p1023, %p1024;
selp.b32 %r6006, 32, 0, %p1025;
or.b32 %r9509, %r6006, %r9509;
$L__BB2_811:
setp.ge.u32 %p1026, %r1752, %r4058;
@%p1026 bra $L__BB2_813;
add.s32 %r6007, %r1756, %r1807;
cvt.u64.u32 %rd652, %r6007;
add.s64 %rd653, %rd652, %rd4;
shl.b64 %rd654, %rd653, 2;
add.s64 %rd655, %rd3, %rd654;
ld.global.u32 %r6008, [%rd655];
abs.s32 %r6009, %r6008;
setp.gt.u32 %p1027, %r6009, 4;
and.b32 %r6010, %r6009, 1;
setp.eq.b32 %p1028, %r6010, 1;
and.pred %p1029, %p1027, %p1028;
selp.b32 %r6011, 64, 0, %p1029;
or.b32 %r9509, %r6011, %r9509;
$L__BB2_813:
setp.ge.u32 %p1030, %r1753, %r4058;
@%p1030 bra $L__BB2_815;
add.s32 %r6012, %r1755, %r1807;
cvt.u64.u32 %rd656, %r6012;
add.s64 %rd657, %rd656, %rd4;
shl.b64 %rd658, %rd657, 2;
add.s64 %rd659, %rd3, %rd658;
ld.global.u32 %r6013, [%rd659];
abs.s32 %r6014, %r6013;
setp.gt.u32 %p1031, %r6014, 4;
and.b32 %r6015, %r6014, 1;
setp.eq.b32 %p1032, %r6015, 1;
and.pred %p1033, %p1031, %p1032;
selp.b32 %r6016, 128, 0, %p1033;
or.b32 %r9509, %r6016, %r9509;
$L__BB2_815:
@%p898 bra $L__BB2_824;
setp.le.u32 %p1035, %r4058, %r9445;
@%p1035 bra $L__BB2_818;
add.s32 %r6017, %r1754, %r1816;
cvt.u64.u32 %rd660, %r6017;
add.s64 %rd661, %rd660, %rd4;
shl.b64 %rd662, %rd661, 2;
add.s64 %rd663, %rd3, %rd662;
ld.global.u32 %r6018, [%rd663];
abs.s32 %r6019, %r6018;
setp.gt.u32 %p1036, %r6019, 4;
and.b32 %r6020, %r6019, 1;
setp.eq.b32 %p1037, %r6020, 1;
and.pred %p1038, %p1036, %p1037;
selp.b32 %r6021, 256, 0, %p1038;
or.b32 %r9509, %r6021, %r9509;
$L__BB2_818:
setp.ge.u32 %p1039, %r1751, %r4058;
@%p1039 bra $L__BB2_820;
add.s32 %r6022, %r1757, %r1816;
cvt.u64.u32 %rd664, %r6022;
add.s64 %rd665, %rd664, %rd4;
shl.b64 %rd666, %rd665, 2;
add.s64 %rd667, %rd3, %rd666;
ld.global.u32 %r6023, [%rd667];
abs.s32 %r6024, %r6023;
setp.gt.u32 %p1040, %r6024, 4;
and.b32 %r6025, %r6024, 1;
setp.eq.b32 %p1041, %r6025, 1;
and.pred %p1042, %p1040, %p1041;
selp.b32 %r6026, 512, 0, %p1042;
or.b32 %r9509, %r6026, %r9509;
$L__BB2_820:
setp.ge.u32 %p1043, %r1752, %r4058;
@%p1043 bra $L__BB2_822;
add.s32 %r6027, %r1756, %r1816;
cvt.u64.u32 %rd668, %r6027;
add.s64 %rd669, %rd668, %rd4;
shl.b64 %rd670, %rd669, 2;
add.s64 %rd671, %rd3, %rd670;
ld.global.u32 %r6028, [%rd671];
abs.s32 %r6029, %r6028;
setp.gt.u32 %p1044, %r6029, 4;
and.b32 %r6030, %r6029, 1;
setp.eq.b32 %p1045, %r6030, 1;
and.pred %p1046, %p1044, %p1045;
selp.b32 %r6031, 1024, 0, %p1046;
or.b32 %r9509, %r6031, %r9509;
$L__BB2_822:
setp.ge.u32 %p1047, %r1753, %r4058;
@%p1047 bra $L__BB2_824;
add.s32 %r6032, %r1755, %r1816;
cvt.u64.u32 %rd672, %r6032;
add.s64 %rd673, %rd672, %rd4;
shl.b64 %rd674, %rd673, 2;
add.s64 %rd675, %rd3, %rd674;
ld.global.u32 %r6033, [%rd675];
abs.s32 %r6034, %r6033;
setp.gt.u32 %p1048, %r6034, 4;
and.b32 %r6035, %r6034, 1;
setp.eq.b32 %p1049, %r6035, 1;
and.pred %p1050, %p1048, %p1049;
selp.b32 %r6036, 2048, 0, %p1050;
or.b32 %r9509, %r6036, %r9509;
$L__BB2_824:
@%p915 bra $L__BB2_833;
setp.le.u32 %p1052, %r4058, %r9445;
@%p1052 bra $L__BB2_827;
add.s32 %r6037, %r1754, %r1825;
cvt.u64.u32 %rd676, %r6037;
add.s64 %rd677, %rd676, %rd4;
shl.b64 %rd678, %rd677, 2;
add.s64 %rd679, %rd3, %rd678;
ld.global.u32 %r6038, [%rd679];
abs.s32 %r6039, %r6038;
setp.gt.u32 %p1053, %r6039, 4;
and.b32 %r6040, %r6039, 1;
setp.eq.b32 %p1054, %r6040, 1;
and.pred %p1055, %p1053, %p1054;
selp.b32 %r6041, 4096, 0, %p1055;
or.b32 %r9509, %r6041, %r9509;
$L__BB2_827:
setp.ge.u32 %p1056, %r1751, %r4058;
@%p1056 bra $L__BB2_829;
add.s32 %r6042, %r1757, %r1825;
cvt.u64.u32 %rd680, %r6042;
add.s64 %rd681, %rd680, %rd4;
shl.b64 %rd682, %rd681, 2;
add.s64 %rd683, %rd3, %rd682;
ld.global.u32 %r6043, [%rd683];
abs.s32 %r6044, %r6043;
setp.gt.u32 %p1057, %r6044, 4;
and.b32 %r6045, %r6044, 1;
setp.eq.b32 %p1058, %r6045, 1;
and.pred %p1059, %p1057, %p1058;
selp.b32 %r6046, 8192, 0, %p1059;
or.b32 %r9509, %r6046, %r9509;
$L__BB2_829:
setp.ge.u32 %p1060, %r1752, %r4058;
@%p1060 bra $L__BB2_831;
add.s32 %r6047, %r1756, %r1825;
cvt.u64.u32 %rd684, %r6047;
add.s64 %rd685, %rd684, %rd4;
shl.b64 %rd686, %rd685, 2;
add.s64 %rd687, %rd3, %rd686;
ld.global.u32 %r6048, [%rd687];
abs.s32 %r6049, %r6048;
setp.gt.u32 %p1061, %r6049, 4;
and.b32 %r6050, %r6049, 1;
setp.eq.b32 %p1062, %r6050, 1;
and.pred %p1063, %p1061, %p1062;
selp.b32 %r6051, 16384, 0, %p1063;
or.b32 %r9509, %r6051, %r9509;
$L__BB2_831:
setp.ge.u32 %p1064, %r1753, %r4058;
@%p1064 bra $L__BB2_833;
add.s32 %r6052, %r1755, %r1825;
cvt.u64.u32 %rd688, %r6052;
add.s64 %rd689, %rd688, %rd4;
shl.b64 %rd690, %rd689, 2;
add.s64 %rd691, %rd3, %rd690;
ld.global.u32 %r6053, [%rd691];
abs.s32 %r6054, %r6053;
setp.gt.u32 %p1065, %r6054, 4;
and.b32 %r6055, %r6054, 1;
setp.eq.b32 %p1066, %r6055, 1;
and.pred %p1067, %p1065, %p1066;
selp.b32 %r6056, 32768, 0, %p1067;
or.b32 %r9509, %r6056, %r9509;
$L__BB2_833:
sub.s32 %r6059, %r5804, %r4057;
shl.b32 %r6060, %r9509, 16;
or.b32 %r1900, %r6060, %r9493;
and.b32 %r6061, %r1834, -2004318072;
shr.u32 %r6062, %r6061, 3;
shl.b32 %r6063, %r1843, 3;
and.b32 %r6064, %r6063, -2004318072;
or.b32 %r1901, %r6064, %r6062;
not.b32 %r6065, %r1900;
setp.gt.s32 %p1068, %r6059, 0;
mov.u32 %r9525, 0;
shl.b32 %r6066, %r6059, 2;
selp.b32 %r6067, %r6066, 0, %p1068;
shr.u32 %r1902, %r1758, %r6067;
and.b32 %r1903, %r1902, %r6065;
@%p796 bra $L__BB2_842;
setp.le.u32 %p1070, %r4058, %r9445;
mov.u32 %r9525, 0;
@%p1070 bra $L__BB2_836;
ld.global.u32 %r6069, [%rd26];
abs.s32 %r6070, %r6069;
setp.eq.s32 %p1071, %r6070, 3;
selp.u32 %r9525, 1, 0, %p1071;
$L__BB2_836:
setp.ge.u32 %p1072, %r1751, %r4058;
@%p1072 bra $L__BB2_838;
ld.global.u32 %r6071, [%rd27];
abs.s32 %r6072, %r6071;
setp.eq.s32 %p1073, %r6072, 3;
selp.b32 %r6073, 2, 0, %p1073;
or.b32 %r9525, %r6073, %r9525;
$L__BB2_838:
setp.ge.u32 %p1074, %r1752, %r4058;
@%p1074 bra $L__BB2_840;
ld.global.u32 %r6074, [%rd28];
abs.s32 %r6075, %r6074;
setp.eq.s32 %p1075, %r6075, 3;
selp.b32 %r6076, 4, 0, %p1075;
or.b32 %r9525, %r6076, %r9525;
$L__BB2_840:
setp.ge.u32 %p1076, %r1753, %r4058;
@%p1076 bra $L__BB2_842;
ld.global.u32 %r6077, [%rd29];
abs.s32 %r6078, %r6077;
setp.eq.s32 %p1077, %r6078, 3;
selp.b32 %r6079, 8, 0, %p1077;
or.b32 %r9525, %r6079, %r9525;
$L__BB2_842:
@%p813 bra $L__BB2_851;
setp.le.u32 %p1079, %r4058, %r9445;
@%p1079 bra $L__BB2_845;
ld.global.u32 %r6080, [%rd30];
abs.s32 %r6081, %r6080;
setp.eq.s32 %p1080, %r6081, 3;
selp.b32 %r6082, 16, 0, %p1080;
or.b32 %r9525, %r6082, %r9525;
$L__BB2_845:
setp.ge.u32 %p1081, %r1751, %r4058;
@%p1081 bra $L__BB2_847;
ld.global.u32 %r6083, [%rd31];
abs.s32 %r6084, %r6083;
setp.eq.s32 %p1082, %r6084, 3;
selp.b32 %r6085, 32, 0, %p1082;
or.b32 %r9525, %r6085, %r9525;
$L__BB2_847:
setp.ge.u32 %p1083, %r1752, %r4058;
@%p1083 bra $L__BB2_849;
ld.global.u32 %r6086, [%rd32];
abs.s32 %r6087, %r6086;
setp.eq.s32 %p1084, %r6087, 3;
selp.b32 %r6088, 64, 0, %p1084;
or.b32 %r9525, %r6088, %r9525;
$L__BB2_849:
setp.ge.u32 %p1085, %r1753, %r4058;
@%p1085 bra $L__BB2_851;
ld.global.u32 %r6089, [%rd33];
abs.s32 %r6090, %r6089;
setp.eq.s32 %p1086, %r6090, 3;
selp.b32 %r6091, 128, 0, %p1086;
or.b32 %r9525, %r6091, %r9525;
$L__BB2_851:
@%p830 bra $L__BB2_860;
setp.le.u32 %p1088, %r4058, %r9445;
@%p1088 bra $L__BB2_854;
ld.global.u32 %r6092, [%rd34];
abs.s32 %r6093, %r6092;
setp.eq.s32 %p1089, %r6093, 3;
selp.b32 %r6094, 256, 0, %p1089;
or.b32 %r9525, %r6094, %r9525;
$L__BB2_854:
setp.ge.u32 %p1090, %r1751, %r4058;
@%p1090 bra $L__BB2_856;
ld.global.u32 %r6095, [%rd35];
abs.s32 %r6096, %r6095;
setp.eq.s32 %p1091, %r6096, 3;
selp.b32 %r6097, 512, 0, %p1091;
or.b32 %r9525, %r6097, %r9525;
$L__BB2_856:
setp.ge.u32 %p1092, %r1752, %r4058;
@%p1092 bra $L__BB2_858;
ld.global.u32 %r6098, [%rd36];
abs.s32 %r6099, %r6098;
setp.eq.s32 %p1093, %r6099, 3;
selp.b32 %r6100, 1024, 0, %p1093;
or.b32 %r9525, %r6100, %r9525;
$L__BB2_858:
setp.ge.u32 %p1094, %r1753, %r4058;
@%p1094 bra $L__BB2_860;
ld.global.u32 %r6101, [%rd37];
abs.s32 %r6102, %r6101;
setp.eq.s32 %p1095, %r6102, 3;
selp.b32 %r6103, 2048, 0, %p1095;
or.b32 %r9525, %r6103, %r9525;
$L__BB2_860:
@%p847 bra $L__BB2_869;
setp.le.u32 %p1097, %r4058, %r9445;
@%p1097 bra $L__BB2_863;
ld.global.u32 %r6104, [%rd38];
abs.s32 %r6105, %r6104;
setp.eq.s32 %p1098, %r6105, 3;
selp.b32 %r6106, 4096, 0, %p1098;
or.b32 %r9525, %r6106, %r9525;
$L__BB2_863:
setp.ge.u32 %p1099, %r1751, %r4058;
@%p1099 bra $L__BB2_865;
ld.global.u32 %r6107, [%rd39];
abs.s32 %r6108, %r6107;
setp.eq.s32 %p1100, %r6108, 3;
selp.b32 %r6109, 8192, 0, %p1100;
or.b32 %r9525, %r6109, %r9525;
$L__BB2_865:
setp.ge.u32 %p1101, %r1752, %r4058;
@%p1101 bra $L__BB2_867;
ld.global.u32 %r6110, [%rd40];
abs.s32 %r6111, %r6110;
setp.eq.s32 %p1102, %r6111, 3;
selp.b32 %r6112, 16384, 0, %p1102;
or.b32 %r9525, %r6112, %r9525;
$L__BB2_867:
setp.ge.u32 %p1103, %r1753, %r4058;
@%p1103 bra $L__BB2_869;
ld.global.u32 %r6113, [%rd41];
abs.s32 %r6114, %r6113;
setp.eq.s32 %p1104, %r6114, 3;
selp.b32 %r6115, 32768, 0, %p1104;
or.b32 %r9525, %r6115, %r9525;
$L__BB2_869:
and.b32 %r6117, %r1900, -286331154;
shr.u32 %r6118, %r6117, 1;
shl.b32 %r6119, %r1900, 1;
and.b32 %r6120, %r6119, -286331154;
or.b32 %r6121, %r1900, %r1901;
or.b32 %r6122, %r6121, %r6120;
or.b32 %r6123, %r6122, %r6118;
and.b32 %r1936, %r9525, %r1902;
shr.u32 %r6124, %r6123, 4;
shl.b32 %r6125, %r6123, 4;
shr.u32 %r6126, %r9450, 12;
or.b32 %r6127, %r6123, %r6126;
or.b32 %r6128, %r6127, %r6125;
or.b32 %r6129, %r6128, %r6124;
and.b32 %r9535, %r1903, %r6129;
setp.eq.s32 %p1105, %r9535, 0;
mov.u32 %r6116, 0;
mov.u32 %r9556, %r6116;
@%p1105 bra $L__BB2_924;
mov.u32 %r9534, 0;
mov.u32 %r9536, %r9534;
mov.u32 %r9537, %r9555;
$L__BB2_871:
brev.b32 %r6132, %r9535;
bfind.shiftamt.u32 %r1944, %r6132;
mov.pred %p2370, -1;
mov.u32 %r6133, 1;
shl.b32 %r1945, %r6133, %r1944;
mov.u32 %r6134, -2;
shf.l.wrap.b32 %r6135, %r6134, %r6134, %r1944;
and.b32 %r9535, %r9535, %r6135;
or.b32 %r9534, %r1945, %r9534;
and.b32 %r1948, %r1945, %r1936;
setp.ne.s32 %p1107, %r1948, 0;
selp.u32 %r6136, 1, 0, %p1107;
setp.eq.s32 %p1108, %r9553, 0;
selp.b32 %r6137, 8, 7, %p1108;
shl.b32 %r6138, %r6136, %r9551;
cvt.u16.u32 %rs772, %r6138;
or.b16 %rs1224, %rs1224, %rs772;
add.s32 %r9551, %r9551, 1;
setp.lt.u32 %p1109, %r9551, %r6137;
mov.pred %p2368, %p2370;
@%p1109 bra $L__BB2_874;
setp.eq.s32 %p1111, %r9537, -1;
mov.u32 %r9555, -1;
mov.pred %p2368, 0;
@%p1111 bra $L__BB2_874;
and.b16 %rs774, %rs1224, 255;
setp.eq.s16 %p1113, %rs774, 255;
selp.u32 %r9553, 1, 0, %p1113;
add.s32 %r9555, %r9537, 1;
mov.u32 %r9551, 0;
mov.u16 %rs1224, 0;
mov.pred %p2368, %p2370;
$L__BB2_874:
mov.u32 %r9560, 0;
not.pred %p1115, %p2368;
@%p1115 bra $L__BB2_928;
setp.eq.s32 %p1116, %r1948, 0;
@%p1116 bra $L__BB2_916;
or.b32 %r9536, %r1945, %r9536;
mov.u32 %r9543, 51;
setp.gt.s32 %p1117, %r1944, 7;
@%p1117 bra $L__BB2_892;
setp.gt.s32 %p1129, %r1944, 3;
@%p1129 bra $L__BB2_885;
setp.gt.s32 %p1135, %r1944, 1;
@%p1135 bra $L__BB2_882;
setp.eq.s32 %p1138, %r1944, 0;
@%p1138 bra $L__BB2_915;
setp.eq.s32 %p1139, %r1944, 1;
@%p1139 bra $L__BB2_881;
bra.uni $L__BB2_914;
$L__BB2_881:
mov.u32 %r9543, 118;
bra.uni $L__BB2_915;
$L__BB2_892:
setp.gt.s32 %p1118, %r1944, 11;
@%p1118 bra $L__BB2_900;
setp.gt.s32 %p1124, %r1944, 9;
@%p1124 bra $L__BB2_897;
setp.eq.s32 %p1127, %r1944, 8;
@%p1127 bra $L__BB2_910;
setp.eq.s32 %p1128, %r1944, 9;
@%p1128 bra $L__BB2_896;
bra.uni $L__BB2_914;
$L__BB2_896:
mov.u32 %r9543, 30208;
bra.uni $L__BB2_915;
$L__BB2_885:
setp.gt.s32 %p1130, %r1944, 5;
@%p1130 bra $L__BB2_889;
setp.eq.s32 %p1133, %r1944, 4;
@%p1133 bra $L__BB2_912;
setp.eq.s32 %p1134, %r1944, 5;
@%p1134 bra $L__BB2_888;
bra.uni $L__BB2_914;
$L__BB2_888:
mov.u32 %r9543, 1888;
bra.uni $L__BB2_915;
$L__BB2_900:
setp.gt.s32 %p1119, %r1944, 13;
@%p1119 bra $L__BB2_904;
setp.eq.s32 %p1122, %r1944, 12;
@%p1122 bra $L__BB2_908;
setp.eq.s32 %p1123, %r1944, 13;
@%p1123 bra $L__BB2_903;
bra.uni $L__BB2_914;
$L__BB2_903:
mov.u32 %r9543, 483328;
bra.uni $L__BB2_915;
$L__BB2_882:
setp.eq.s32 %p1136, %r1944, 2;
@%p1136 bra $L__BB2_913;
setp.eq.s32 %p1137, %r1944, 3;
@%p1137 bra $L__BB2_884;
bra.uni $L__BB2_914;
$L__BB2_884:
mov.u32 %r9543, 200;
bra.uni $L__BB2_915;
$L__BB2_897:
setp.eq.s32 %p1125, %r1944, 10;
@%p1125 bra $L__BB2_909;
setp.eq.s32 %p1126, %r1944, 11;
@%p1126 bra $L__BB2_899;
bra.uni $L__BB2_914;
$L__BB2_899:
mov.u32 %r9543, 51200;
bra.uni $L__BB2_915;
$L__BB2_889:
setp.eq.s32 %p1131, %r1944, 6;
@%p1131 bra $L__BB2_911;
setp.eq.s32 %p1132, %r1944, 7;
@%p1132 bra $L__BB2_891;
bra.uni $L__BB2_914;
$L__BB2_891:
mov.u32 %r9543, 3200;
bra.uni $L__BB2_915;
$L__BB2_904:
setp.eq.s32 %p1120, %r1944, 14;
@%p1120 bra $L__BB2_907;
setp.ne.s32 %p1121, %r1944, 15;
@%p1121 bra $L__BB2_914;
mov.u32 %r9543, 819200;
bra.uni $L__BB2_915;
$L__BB2_910:
mov.u32 %r9543, 13056;
bra.uni $L__BB2_915;
$L__BB2_912:
mov.u32 %r9543, 816;
bra.uni $L__BB2_915;
$L__BB2_908:
mov.u32 %r9543, 208896;
bra.uni $L__BB2_915;
$L__BB2_913:
mov.u32 %r9543, 236;
bra.uni $L__BB2_915;
$L__BB2_909:
mov.u32 %r9543, 60416;
bra.uni $L__BB2_915;
$L__BB2_911:
mov.u32 %r9543, 3776;
bra.uni $L__BB2_915;
$L__BB2_907:
mov.u32 %r9543, 966656;
bra.uni $L__BB2_915;
$L__BB2_914:
mov.u32 %r9543, 0;
$L__BB2_915:
not.b32 %r6159, %r9534;
and.b32 %r6160, %r1903, %r6159;
and.b32 %r6161, %r6160, %r9543;
or.b32 %r9535, %r6161, %r9535;
$L__BB2_916:
setp.ne.s32 %p1140, %r9535, 0;
mov.u32 %r9537, %r9555;
@%p1140 bra $L__BB2_871;
setp.eq.s32 %p1141, %r9536, 0;
mov.u32 %r9556, 0;
@%p1141 bra $L__BB2_924;
mov.u32 %r9552, %r9555;
mov.u32 %r9549, %r9536;
$L__BB2_919:
mov.u32 %r9555, %r9552;
setp.eq.s32 %p1142, %r9549, 0;
mov.u32 %r9556, %r9536;
@%p1142 bra $L__BB2_924;
brev.b32 %r6163, %r9549;
bfind.shiftamt.u32 %r6164, %r6163;
mov.pred %p2370, -1;
mov.u32 %r6165, -2;
shf.l.wrap.b32 %r6166, %r6165, %r6165, %r6164;
and.b32 %r9549, %r9549, %r6166;
shr.u32 %r6167, %r6164, 2;
and.b32 %r6168, %r6164, 3;
add.s32 %r6169, %r6168, %r9445;
add.s32 %r6170, %r6167, %r9449;
mad.lo.s32 %r6171, %r6169, %r4055, %r6170;
cvt.u64.u32 %rd692, %r6171;
add.s64 %rd693, %rd692, %rd4;
shl.b64 %rd694, %rd693, 2;
add.s64 %rd695, %rd3, %rd694;
ld.global.u32 %r6172, [%rd695];
shr.u32 %r6173, %r6172, 31;
setp.eq.s32 %p1144, %r9553, 0;
selp.b32 %r6174, 8, 7, %p1144;
shl.b32 %r6175, %r6173, %r9551;
cvt.u16.u32 %rs775, %r6175;
or.b16 %rs1224, %rs1224, %rs775;
add.s32 %r9551, %r9551, 1;
setp.lt.u32 %p1145, %r9551, %r6174;
mov.pred %p2369, %p2370;
mov.u32 %r9552, %r9555;
@%p1145 bra $L__BB2_923;
setp.eq.s32 %p1147, %r9555, -1;
mov.u32 %r9552, -1;
mov.pred %p2369, 0;
@%p1147 bra $L__BB2_923;
and.b16 %rs777, %rs1224, 255;
setp.eq.s16 %p1149, %rs777, 255;
selp.u32 %r9553, 1, 0, %p1149;
add.s32 %r9552, %r9555, 1;
mov.u32 %r9551, 0;
mov.u16 %rs1224, 0;
mov.pred %p2369, %p2370;
$L__BB2_923:
mov.u32 %r9560, 0;
@%p2369 bra $L__BB2_919;
bra.uni $L__BB2_928;
$L__BB2_924:
not.b32 %r6180, %r9556;
and.b32 %r6181, %r1936, %r6180;
setp.ne.s32 %p1152, %r6181, 0;
mov.u32 %r9560, %r6116;
mov.pred %p2370, %p790;
@%p1152 bra $L__BB2_928;
setp.lt.u32 %p1153, %r5804, %r4057;
or.b32 %r6182, %r9556, %r1900;
st.local.u16 [%rd25], %r6182;
shr.u32 %r6183, %r6182, 16;
st.local.u16 [%rd25+2], %r6183;
shl.b32 %r6184, %r6182, 1;
and.b32 %r6185, %r6184, 57344;
and.b32 %r6186, %r6182, 57344;
shr.u32 %r6187, %r6186, 1;
or.b32 %r6188, %r6182, %r1901;
and.b32 %r6189, %r6188, 61440;
or.b32 %r6190, %r6189, %r6185;
or.b32 %r9450, %r6190, %r6187;
mov.u32 %r9449, %r5804;
@%p1153 bra $L__BB2_689;
$L__BB2_926:
add.s32 %r9445, %r9445, 4;
setp.gt.u32 %p1154, %r4058, %r9445;
@%p1154 bra $L__BB2_687;
setp.eq.s32 %p1155, %r9551, 0;
add.s32 %r6191, %r9555, 1;
setp.eq.s32 %p1156, %r9555, -1;
selp.b32 %r6192, -1, %r6191, %p1156;
selp.b32 %r6193, %r9555, %r6192, %p1155;
setp.ne.s32 %p1157, %r9555, -1;
or.pred %p1158, %p1155, %p1157;
selp.b32 %r9560, %r6193, 0, %p1158;
not.pred %p2370, %p1158;
$L__BB2_928:
@%p2370 bra $L__BB2_930;
bra.uni $L__BB2_929;
$L__BB2_930:
mov.u32 %r6201, 2;
st.global.u32 [%rd6], %r6201;
mov.u32 %r6202, 6;
st.global.u32 [%rd6+4], %r6202;
mov.u32 %r6203, 0;
st.global.u32 [%rd6+8], %r6203;
st.global.u32 [%rd6+12], %r6203;
st.global.u32 [%rd6+16], %r6203;
st.global.u32 [%rd6+20], %r6203;
st.global.u32 [%rd6+24], %r6203;
st.global.u32 [%rd6+28], %r6203;
bra.uni $L__BB2_1905;
$L__BB2_931:
mov.u32 %r9561, 0;
mov.u32 %r9562, %r9561;
mov.u32 %r9563, %r9561;
bra.uni $L__BB2_932;
$L__BB2_929:
mad.lo.s32 %r6194, %r4058, %r4057, 7;
shr.u32 %r6195, %r6194, 3;
max.u32 %r9561, %r6195, %r9560;
add.s32 %r6196, %r8433, 6;
mul.wide.u32 %rd696, %r6196, 613566757;
shr.u64 %rd697, %rd696, 32;
cvt.u32.u64 %r6197, %rd697;
sub.s32 %r6198, %r6196, %r6197;
shr.u32 %r6199, %r6198, 1;
add.s32 %r6200, %r6199, %r6197;
shr.u32 %r9562, %r6200, 2;
add.s32 %r9563, %r9561, %r9562;
$L__BB2_932:
add.s32 %r1989, %r9563, %r1738;
setp.gt.u32 %p1159, %r1989, %r4061;
setp.lt.u32 %p1160, %r1738, 2;
or.pred %p1161, %p1160, %p1159;
@%p1161 bra $L__BB2_1247;
bra.uni $L__BB2_933;
$L__BB2_1247:
mov.u32 %r6782, 1;
st.global.u32 [%rd6], %r6782;
mov.u32 %r6783, 4;
st.global.u32 [%rd6+4], %r6783;
mov.u32 %r6784, 0;
st.global.u32 [%rd6+8], %r6784;
st.global.u32 [%rd6+12], %r6784;
st.global.u32 [%rd6+16], %r6784;
st.global.u32 [%rd6+20], %r6784;
st.global.u32 [%rd6+24], %r6784;
st.global.u32 [%rd6+28], %r6784;
bra.uni $L__BB2_1905;
$L__BB2_933:
setp.eq.s32 %p1162, %r8524, 0;
@%p1162 bra $L__BB2_939;
add.s32 %r6208, %r8524, -1;
and.b32 %r9568, %r8524, 3;
setp.lt.u32 %p1163, %r6208, 3;
mov.u32 %r9566, 0;
@%p1163 bra $L__BB2_937;
sub.s32 %r9565, %r8524, %r9568;
mov.u32 %r9566, 0;
$L__BB2_936:
add.s32 %r6210, %r9566, 17477;
cvt.u64.u32 %rd698, %r6210;
add.s64 %rd699, %rd698, %rd5;
add.s64 %rd700, %rd1, %rd699;
ld.global.u8 %rs778, [%rd700];
add.s32 %r6211, %r9566, %r9160;
cvt.u64.u32 %rd701, %r6211;
add.s64 %rd702, %rd701, %rd5;
add.s64 %rd703, %rd1, %rd702;
st.global.u8 [%rd703], %rs778;
ld.global.u8 %rs779, [%rd700+1];
add.s32 %r6212, %r6211, 1;
cvt.u64.u32 %rd704, %r6212;
add.s64 %rd705, %rd704, %rd5;
add.s64 %rd706, %rd1, %rd705;
st.global.u8 [%rd706], %rs779;
ld.global.u8 %rs780, [%rd700+2];
add.s32 %r6213, %r6211, 2;
cvt.u64.u32 %rd707, %r6213;
add.s64 %rd708, %rd707, %rd5;
add.s64 %rd709, %rd1, %rd708;
st.global.u8 [%rd709], %rs780;
add.s32 %r6214, %r9566, 17480;
cvt.u64.u32 %rd710, %r6214;
add.s64 %rd711, %rd710, %rd5;
add.s64 %rd712, %rd1, %rd711;
ld.global.u8 %rs781, [%rd712];
add.s32 %r6215, %r6211, 3;
cvt.u64.u32 %rd713, %r6215;
add.s64 %rd714, %rd713, %rd5;
add.s64 %rd715, %rd1, %rd714;
st.global.u8 [%rd715], %rs781;
add.s32 %r9566, %r9566, 4;
add.s32 %r9565, %r9565, -4;
setp.ne.s32 %p1164, %r9565, 0;
@%p1164 bra $L__BB2_936;
$L__BB2_937:
setp.eq.s32 %p1165, %r9568, 0;
@%p1165 bra $L__BB2_939;
$L__BB2_938:
.pragma "nounroll";
add.s32 %r6216, %r9566, 17477;
cvt.u64.u32 %rd716, %r6216;
add.s64 %rd717, %rd716, %rd5;
add.s64 %rd718, %rd1, %rd717;
ld.global.u8 %rs782, [%rd718];
add.s32 %r6217, %r9566, %r9160;
cvt.u64.u32 %rd719, %r6217;
add.s64 %rd720, %rd719, %rd5;
add.s64 %rd721, %rd1, %rd720;
st.global.u8 [%rd721], %rs782;
add.s32 %r9566, %r9566, 1;
add.s32 %r9568, %r9568, -1;
setp.ne.s32 %p1166, %r9568, 0;
@%p1166 bra $L__BB2_938;
$L__BB2_939:
setp.eq.s32 %p1167, %r8972, 0;
@%p1167 bra $L__BB2_945;
mov.u32 %r6219, 20549;
sub.s32 %r2001, %r6219, %r8972;
and.b32 %r9573, %r8972, 3;
add.s32 %r6220, %r8972, -1;
setp.lt.u32 %p1168, %r6220, 3;
mov.u32 %r9571, 0;
@%p1168 bra $L__BB2_943;
sub.s32 %r9570, %r8972, %r9573;
mov.u32 %r9571, 0;
$L__BB2_942:
add.s32 %r6222, %r2001, %r9571;
cvt.u64.u32 %rd722, %r6222;
add.s64 %rd723, %rd722, %rd5;
add.s64 %rd724, %rd1, %rd723;
ld.global.u8 %rs783, [%rd724];
add.s32 %r6223, %r9571, %r1737;
cvt.u64.u32 %rd725, %r6223;
add.s64 %rd726, %rd725, %rd5;
add.s64 %rd727, %rd1, %rd726;
st.global.u8 [%rd727], %rs783;
add.s32 %r6224, %r9571, 1;
add.s32 %r6225, %r2001, %r6224;
cvt.u64.u32 %rd728, %r6225;
add.s64 %rd729, %rd728, %rd5;
add.s64 %rd730, %rd1, %rd729;
ld.global.u8 %rs784, [%rd730];
add.s32 %r6226, %r6224, %r1737;
cvt.u64.u32 %rd731, %r6226;
add.s64 %rd732, %rd731, %rd5;
add.s64 %rd733, %rd1, %rd732;
st.global.u8 [%rd733], %rs784;
add.s32 %r6227, %r9571, 2;
add.s32 %r6228, %r2001, %r6227;
cvt.u64.u32 %rd734, %r6228;
add.s64 %rd735, %rd734, %rd5;
add.s64 %rd736, %rd1, %rd735;
ld.global.u8 %rs785, [%rd736];
add.s32 %r6229, %r6227, %r1737;
cvt.u64.u32 %rd737, %r6229;
add.s64 %rd738, %rd737, %rd5;
add.s64 %rd739, %rd1, %rd738;
st.global.u8 [%rd739], %rs785;
add.s32 %r6230, %r9571, 3;
add.s32 %r6231, %r2001, %r6230;
cvt.u64.u32 %rd740, %r6231;
add.s64 %rd741, %rd740, %rd5;
add.s64 %rd742, %rd1, %rd741;
ld.global.u8 %rs786, [%rd742];
add.s32 %r6232, %r6230, %r1737;
cvt.u64.u32 %rd743, %r6232;
add.s64 %rd744, %rd743, %rd5;
add.s64 %rd745, %rd1, %rd744;
st.global.u8 [%rd745], %rs786;
add.s32 %r9571, %r9571, 4;
add.s32 %r9570, %r9570, -4;
setp.ne.s32 %p1169, %r9570, 0;
@%p1169 bra $L__BB2_942;
$L__BB2_943:
setp.eq.s32 %p1170, %r9573, 0;
@%p1170 bra $L__BB2_945;
$L__BB2_944:
.pragma "nounroll";
add.s32 %r6233, %r2001, %r9571;
cvt.u64.u32 %rd746, %r6233;
add.s64 %rd747, %rd746, %rd5;
add.s64 %rd748, %rd1, %rd747;
ld.global.u8 %rs787, [%rd748];
add.s32 %r6234, %r9571, %r1737;
cvt.u64.u32 %rd749, %r6234;
add.s64 %rd750, %rd749, %rd5;
add.s64 %rd751, %rd1, %rd750;
st.global.u8 [%rd751], %rs787;
add.s32 %r9571, %r9571, 1;
add.s32 %r9573, %r9573, -1;
setp.ne.s32 %p1171, %r9573, 0;
@%p1171 bra $L__BB2_944;
$L__BB2_945:
add.s32 %r6235, %r8972, %r8524;
shr.u32 %r6236, %r6235, 4;
add.s32 %r6237, %r1738, -1;
cvt.u64.u32 %rd752, %r6237;
add.s64 %rd753, %rd752, %rd5;
add.s64 %rd754, %rd1, %rd753;
st.global.u8 [%rd754], %r6236;
add.s32 %r6238, %r1738, -2;
cvt.u64.u32 %rd755, %r6238;
add.s64 %rd756, %rd755, %rd5;
add.s64 %rd757, %rd1, %rd756;
ld.global.u8 %rs788, [%rd757];
and.b16 %rs789, %rs788, 240;
cvt.u16.u32 %rs790, %r6235;
and.b16 %rs791, %rs790, 15;
or.b16 %rs792, %rs789, %rs791;
st.global.u8 [%rd757], %rs792;
setp.eq.s32 %p1172, %r9563, 0;
@%p1172 bra $L__BB2_951;
add.s32 %r6240, %r9563, -1;
and.b32 %r9578, %r9563, 3;
setp.lt.u32 %p1173, %r6240, 3;
mov.u32 %r9576, 0;
@%p1173 bra $L__BB2_949;
sub.s32 %r9575, %r9563, %r9578;
mov.u32 %r9576, 0;
$L__BB2_948:
add.s32 %r6242, %r9576, %r1738;
cvt.u64.u32 %rd758, %r6242;
add.s64 %rd759, %rd758, %rd5;
add.s64 %rd760, %rd1, %rd759;
mov.u16 %rs793, 0;
st.global.u8 [%rd760], %rs793;
add.s32 %r6243, %r6242, 1;
cvt.u64.u32 %rd761, %r6243;
add.s64 %rd762, %rd761, %rd5;
add.s64 %rd763, %rd1, %rd762;
st.global.u8 [%rd763], %rs793;
add.s32 %r6244, %r6242, 2;
cvt.u64.u32 %rd764, %r6244;
add.s64 %rd765, %rd764, %rd5;
add.s64 %rd766, %rd1, %rd765;
st.global.u8 [%rd766], %rs793;
add.s32 %r6245, %r6242, 3;
cvt.u64.u32 %rd767, %r6245;
add.s64 %rd768, %rd767, %rd5;
add.s64 %rd769, %rd1, %rd768;
st.global.u8 [%rd769], %rs793;
add.s32 %r9576, %r9576, 4;
add.s32 %r9575, %r9575, -4;
setp.ne.s32 %p1174, %r9575, 0;
@%p1174 bra $L__BB2_948;
$L__BB2_949:
setp.eq.s32 %p1175, %r9578, 0;
@%p1175 bra $L__BB2_951;
$L__BB2_950:
.pragma "nounroll";
add.s32 %r6246, %r9576, %r1738;
cvt.u64.u32 %rd770, %r6246;
add.s64 %rd771, %rd770, %rd5;
add.s64 %rd772, %rd1, %rd771;
mov.u16 %rs794, 0;
st.global.u8 [%rd772], %rs794;
add.s32 %r9576, %r9576, 1;
add.s32 %r9578, %r9578, -1;
setp.ne.s32 %p1176, %r9578, 0;
@%p1176 bra $L__BB2_950;
$L__BB2_951:
setp.ne.s32 %p1177, %r4062, 3;
@%p1177 bra $L__BB2_1243;
ld.param.u64 %rd1410, [ j2k_htj2k_encode_codeblocks_multi_input_param_0];
cvt.u64.u32 %rd773, %r1738;
add.s64 %rd42, %rd773, %rd5;
add.s64 %rd43, %rd1410, %rd42;
add.s32 %r6247, %r4057, 3;
shr.u32 %r6248, %r6247, 2;
add.s32 %r6249, %r6248, 8;
setp.gt.u32 %p1179, %r6249, 513;
mov.pred %p1178, -1;
mov.pred %p2373, %p1178;
@%p1179 bra $L__BB2_1202;
mov.u16 %rs1232, 0;
st.local.u16 [%rd23], %rs1232;
st.local.u16 [%rd23+2], %rs1232;
st.local.u16 [%rd23+4], %rs1232;
st.local.u16 [%rd23+6], %rs1232;
st.local.u16 [%rd23+8], %rs1232;
st.local.u16 [%rd23+10], %rs1232;
st.local.u16 [%rd23+12], %rs1232;
st.local.u16 [%rd23+14], %rs1232;
st.local.u16 [%rd23+16], %rs1232;
st.local.u16 [%rd23+18], %rs1232;
st.local.u16 [%rd23+20], %rs1232;
st.local.u16 [%rd23+22], %rs1232;
st.local.u16 [%rd23+24], %rs1232;
st.local.u16 [%rd23+26], %rs1232;
st.local.u16 [%rd23+28], %rs1232;
st.local.u16 [%rd23+30], %rs1232;
st.local.u16 [%rd23+32], %rs1232;
st.local.u16 [%rd23+34], %rs1232;
st.local.u16 [%rd23+36], %rs1232;
st.local.u16 [%rd23+38], %rs1232;
st.local.u16 [%rd23+40], %rs1232;
st.local.u16 [%rd23+42], %rs1232;
st.local.u16 [%rd23+44], %rs1232;
st.local.u16 [%rd23+46], %rs1232;
st.local.u16 [%rd23+48], %rs1232;
st.local.u16 [%rd23+50], %rs1232;
st.local.u16 [%rd23+52], %rs1232;
st.local.u16 [%rd23+54], %rs1232;
st.local.u16 [%rd23+56], %rs1232;
st.local.u16 [%rd23+58], %rs1232;
st.local.u16 [%rd23+60], %rs1232;
st.local.u16 [%rd23+62], %rs1232;
st.local.u16 [%rd23+64], %rs1232;
st.local.u16 [%rd23+66], %rs1232;
st.local.u16 [%rd23+68], %rs1232;
st.local.u16 [%rd23+70], %rs1232;
st.local.u16 [%rd23+72], %rs1232;
st.local.u16 [%rd23+74], %rs1232;
st.local.u16 [%rd23+76], %rs1232;
st.local.u16 [%rd23+78], %rs1232;
st.local.u16 [%rd23+80], %rs1232;
st.local.u16 [%rd23+82], %rs1232;
st.local.u16 [%rd23+84], %rs1232;
st.local.u16 [%rd23+86], %rs1232;
st.local.u16 [%rd23+88], %rs1232;
st.local.u16 [%rd23+90], %rs1232;
st.local.u16 [%rd23+92], %rs1232;
st.local.u16 [%rd23+94], %rs1232;
st.local.u16 [%rd23+96], %rs1232;
st.local.u16 [%rd23+98], %rs1232;
st.local.u16 [%rd23+100], %rs1232;
st.local.u16 [%rd23+102], %rs1232;
st.local.u16 [%rd23+104], %rs1232;
st.local.u16 [%rd23+106], %rs1232;
st.local.u16 [%rd23+108], %rs1232;
st.local.u16 [%rd23+110], %rs1232;
st.local.u16 [%rd23+112], %rs1232;
st.local.u16 [%rd23+114], %rs1232;
st.local.u16 [%rd23+116], %rs1232;
st.local.u16 [%rd23+118], %rs1232;
st.local.u16 [%rd23+120], %rs1232;
st.local.u16 [%rd23+122], %rs1232;
st.local.u16 [%rd23+124], %rs1232;
st.local.u16 [%rd23+126], %rs1232;
st.local.u16 [%rd23+128], %rs1232;
st.local.u16 [%rd23+130], %rs1232;
st.local.u16 [%rd23+132], %rs1232;
st.local.u16 [%rd23+134], %rs1232;
st.local.u16 [%rd23+136], %rs1232;
st.local.u16 [%rd23+138], %rs1232;
st.local.u16 [%rd23+140], %rs1232;
st.local.u16 [%rd23+142], %rs1232;
st.local.u16 [%rd23+144], %rs1232;
st.local.u16 [%rd23+146], %rs1232;
st.local.u16 [%rd23+148], %rs1232;
st.local.u16 [%rd23+150], %rs1232;
st.local.u16 [%rd23+152], %rs1232;
st.local.u16 [%rd23+154], %rs1232;
st.local.u16 [%rd23+156], %rs1232;
st.local.u16 [%rd23+158], %rs1232;
st.local.u16 [%rd23+160], %rs1232;
st.local.u16 [%rd23+162], %rs1232;
st.local.u16 [%rd23+164], %rs1232;
st.local.u16 [%rd23+166], %rs1232;
st.local.u16 [%rd23+168], %rs1232;
st.local.u16 [%rd23+170], %rs1232;
st.local.u16 [%rd23+172], %rs1232;
st.local.u16 [%rd23+174], %rs1232;
st.local.u16 [%rd23+176], %rs1232;
st.local.u16 [%rd23+178], %rs1232;
st.local.u16 [%rd23+180], %rs1232;
st.local.u16 [%rd23+182], %rs1232;
st.local.u16 [%rd23+184], %rs1232;
st.local.u16 [%rd23+186], %rs1232;
st.local.u16 [%rd23+188], %rs1232;
st.local.u16 [%rd23+190], %rs1232;
st.local.u16 [%rd23+192], %rs1232;
st.local.u16 [%rd23+194], %rs1232;
st.local.u16 [%rd23+196], %rs1232;
st.local.u16 [%rd23+198], %rs1232;
st.local.u16 [%rd23+200], %rs1232;
st.local.u16 [%rd23+202], %rs1232;
st.local.u16 [%rd23+204], %rs1232;
st.local.u16 [%rd23+206], %rs1232;
st.local.u16 [%rd23+208], %rs1232;
st.local.u16 [%rd23+210], %rs1232;
st.local.u16 [%rd23+212], %rs1232;
st.local.u16 [%rd23+214], %rs1232;
st.local.u16 [%rd23+216], %rs1232;
st.local.u16 [%rd23+218], %rs1232;
st.local.u16 [%rd23+220], %rs1232;
st.local.u16 [%rd23+222], %rs1232;
st.local.u16 [%rd23+224], %rs1232;
st.local.u16 [%rd23+226], %rs1232;
st.local.u16 [%rd23+228], %rs1232;
st.local.u16 [%rd23+230], %rs1232;
st.local.u16 [%rd23+232], %rs1232;
st.local.u16 [%rd23+234], %rs1232;
st.local.u16 [%rd23+236], %rs1232;
st.local.u16 [%rd23+238], %rs1232;
st.local.u16 [%rd23+240], %rs1232;
st.local.u16 [%rd23+242], %rs1232;
st.local.u16 [%rd23+244], %rs1232;
st.local.u16 [%rd23+246], %rs1232;
st.local.u16 [%rd23+248], %rs1232;
st.local.u16 [%rd23+250], %rs1232;
st.local.u16 [%rd23+252], %rs1232;
st.local.u16 [%rd23+254], %rs1232;
st.local.u16 [%rd23+256], %rs1232;
st.local.u16 [%rd23+258], %rs1232;
st.local.u16 [%rd23+260], %rs1232;
st.local.u16 [%rd23+262], %rs1232;
st.local.u16 [%rd23+264], %rs1232;
st.local.u16 [%rd23+266], %rs1232;
st.local.u16 [%rd23+268], %rs1232;
st.local.u16 [%rd23+270], %rs1232;
st.local.u16 [%rd23+272], %rs1232;
st.local.u16 [%rd23+274], %rs1232;
st.local.u16 [%rd23+276], %rs1232;
st.local.u16 [%rd23+278], %rs1232;
st.local.u16 [%rd23+280], %rs1232;
st.local.u16 [%rd23+282], %rs1232;
st.local.u16 [%rd23+284], %rs1232;
st.local.u16 [%rd23+286], %rs1232;
st.local.u16 [%rd23+288], %rs1232;
st.local.u16 [%rd23+290], %rs1232;
st.local.u16 [%rd23+292], %rs1232;
st.local.u16 [%rd23+294], %rs1232;
st.local.u16 [%rd23+296], %rs1232;
st.local.u16 [%rd23+298], %rs1232;
st.local.u16 [%rd23+300], %rs1232;
st.local.u16 [%rd23+302], %rs1232;
st.local.u16 [%rd23+304], %rs1232;
st.local.u16 [%rd23+306], %rs1232;
st.local.u16 [%rd23+308], %rs1232;
st.local.u16 [%rd23+310], %rs1232;
st.local.u16 [%rd23+312], %rs1232;
st.local.u16 [%rd23+314], %rs1232;
st.local.u16 [%rd23+316], %rs1232;
st.local.u16 [%rd23+318], %rs1232;
st.local.u16 [%rd23+320], %rs1232;
st.local.u16 [%rd23+322], %rs1232;
st.local.u16 [%rd23+324], %rs1232;
st.local.u16 [%rd23+326], %rs1232;
st.local.u16 [%rd23+328], %rs1232;
st.local.u16 [%rd23+330], %rs1232;
st.local.u16 [%rd23+332], %rs1232;
st.local.u16 [%rd23+334], %rs1232;
st.local.u16 [%rd23+336], %rs1232;
st.local.u16 [%rd23+338], %rs1232;
st.local.u16 [%rd23+340], %rs1232;
st.local.u16 [%rd23+342], %rs1232;
st.local.u16 [%rd23+344], %rs1232;
st.local.u16 [%rd23+346], %rs1232;
st.local.u16 [%rd23+348], %rs1232;
st.local.u16 [%rd23+350], %rs1232;
st.local.u16 [%rd23+352], %rs1232;
st.local.u16 [%rd23+354], %rs1232;
st.local.u16 [%rd23+356], %rs1232;
st.local.u16 [%rd23+358], %rs1232;
st.local.u16 [%rd23+360], %rs1232;
st.local.u16 [%rd23+362], %rs1232;
st.local.u16 [%rd23+364], %rs1232;
st.local.u16 [%rd23+366], %rs1232;
st.local.u16 [%rd23+368], %rs1232;
st.local.u16 [%rd23+370], %rs1232;
st.local.u16 [%rd23+372], %rs1232;
st.local.u16 [%rd23+374], %rs1232;
st.local.u16 [%rd23+376], %rs1232;
st.local.u16 [%rd23+378], %rs1232;
st.local.u16 [%rd23+380], %rs1232;
st.local.u16 [%rd23+382], %rs1232;
st.local.u16 [%rd23+384], %rs1232;
st.local.u16 [%rd23+386], %rs1232;
st.local.u16 [%rd23+388], %rs1232;
st.local.u16 [%rd23+390], %rs1232;
st.local.u16 [%rd23+392], %rs1232;
st.local.u16 [%rd23+394], %rs1232;
st.local.u16 [%rd23+396], %rs1232;
st.local.u16 [%rd23+398], %rs1232;
st.local.u16 [%rd23+400], %rs1232;
st.local.u16 [%rd23+402], %rs1232;
st.local.u16 [%rd23+404], %rs1232;
st.local.u16 [%rd23+406], %rs1232;
st.local.u16 [%rd23+408], %rs1232;
st.local.u16 [%rd23+410], %rs1232;
st.local.u16 [%rd23+412], %rs1232;
st.local.u16 [%rd23+414], %rs1232;
st.local.u16 [%rd23+416], %rs1232;
st.local.u16 [%rd23+418], %rs1232;
st.local.u16 [%rd23+420], %rs1232;
st.local.u16 [%rd23+422], %rs1232;
st.local.u16 [%rd23+424], %rs1232;
st.local.u16 [%rd23+426], %rs1232;
st.local.u16 [%rd23+428], %rs1232;
st.local.u16 [%rd23+430], %rs1232;
st.local.u16 [%rd23+432], %rs1232;
st.local.u16 [%rd23+434], %rs1232;
st.local.u16 [%rd23+436], %rs1232;
st.local.u16 [%rd23+438], %rs1232;
st.local.u16 [%rd23+440], %rs1232;
st.local.u16 [%rd23+442], %rs1232;
st.local.u16 [%rd23+444], %rs1232;
st.local.u16 [%rd23+446], %rs1232;
st.local.u16 [%rd23+448], %rs1232;
st.local.u16 [%rd23+450], %rs1232;
st.local.u16 [%rd23+452], %rs1232;
st.local.u16 [%rd23+454], %rs1232;
st.local.u16 [%rd23+456], %rs1232;
st.local.u16 [%rd23+458], %rs1232;
st.local.u16 [%rd23+460], %rs1232;
st.local.u16 [%rd23+462], %rs1232;
st.local.u16 [%rd23+464], %rs1232;
st.local.u16 [%rd23+466], %rs1232;
st.local.u16 [%rd23+468], %rs1232;
st.local.u16 [%rd23+470], %rs1232;
st.local.u16 [%rd23+472], %rs1232;
st.local.u16 [%rd23+474], %rs1232;
st.local.u16 [%rd23+476], %rs1232;
st.local.u16 [%rd23+478], %rs1232;
st.local.u16 [%rd23+480], %rs1232;
st.local.u16 [%rd23+482], %rs1232;
st.local.u16 [%rd23+484], %rs1232;
st.local.u16 [%rd23+486], %rs1232;
st.local.u16 [%rd23+488], %rs1232;
st.local.u16 [%rd23+490], %rs1232;
st.local.u16 [%rd23+492], %rs1232;
st.local.u16 [%rd23+494], %rs1232;
st.local.u16 [%rd23+496], %rs1232;
st.local.u16 [%rd23+498], %rs1232;
st.local.u16 [%rd23+500], %rs1232;
st.local.u16 [%rd23+502], %rs1232;
st.local.u16 [%rd23+504], %rs1232;
st.local.u16 [%rd23+506], %rs1232;
st.local.u16 [%rd23+508], %rs1232;
st.local.u16 [%rd23+510], %rs1232;
st.local.u16 [%rd23+512], %rs1232;
st.local.u16 [%rd23+514], %rs1232;
st.local.u16 [%rd23+516], %rs1232;
st.local.u16 [%rd23+518], %rs1232;
st.local.u16 [%rd23+520], %rs1232;
st.local.u16 [%rd23+522], %rs1232;
st.local.u16 [%rd23+524], %rs1232;
st.local.u16 [%rd23+526], %rs1232;
st.local.u16 [%rd23+528], %rs1232;
st.local.u16 [%rd23+530], %rs1232;
st.local.u16 [%rd23+532], %rs1232;
st.local.u16 [%rd23+534], %rs1232;
st.local.u16 [%rd23+536], %rs1232;
st.local.u16 [%rd23+538], %rs1232;
st.local.u16 [%rd23+540], %rs1232;
st.local.u16 [%rd23+542], %rs1232;
st.local.u16 [%rd23+544], %rs1232;
st.local.u16 [%rd23+546], %rs1232;
st.local.u16 [%rd23+548], %rs1232;
st.local.u16 [%rd23+550], %rs1232;
st.local.u16 [%rd23+552], %rs1232;
st.local.u16 [%rd23+554], %rs1232;
st.local.u16 [%rd23+556], %rs1232;
st.local.u16 [%rd23+558], %rs1232;
st.local.u16 [%rd23+560], %rs1232;
st.local.u16 [%rd23+562], %rs1232;
st.local.u16 [%rd23+564], %rs1232;
st.local.u16 [%rd23+566], %rs1232;
st.local.u16 [%rd23+568], %rs1232;
st.local.u16 [%rd23+570], %rs1232;
st.local.u16 [%rd23+572], %rs1232;
st.local.u16 [%rd23+574], %rs1232;
st.local.u16 [%rd23+576], %rs1232;
st.local.u16 [%rd23+578], %rs1232;
st.local.u16 [%rd23+580], %rs1232;
st.local.u16 [%rd23+582], %rs1232;
st.local.u16 [%rd23+584], %rs1232;
st.local.u16 [%rd23+586], %rs1232;
st.local.u16 [%rd23+588], %rs1232;
st.local.u16 [%rd23+590], %rs1232;
st.local.u16 [%rd23+592], %rs1232;
st.local.u16 [%rd23+594], %rs1232;
st.local.u16 [%rd23+596], %rs1232;
st.local.u16 [%rd23+598], %rs1232;
st.local.u16 [%rd23+600], %rs1232;
st.local.u16 [%rd23+602], %rs1232;
st.local.u16 [%rd23+604], %rs1232;
st.local.u16 [%rd23+606], %rs1232;
st.local.u16 [%rd23+608], %rs1232;
st.local.u16 [%rd23+610], %rs1232;
st.local.u16 [%rd23+612], %rs1232;
st.local.u16 [%rd23+614], %rs1232;
st.local.u16 [%rd23+616], %rs1232;
st.local.u16 [%rd23+618], %rs1232;
st.local.u16 [%rd23+620], %rs1232;
st.local.u16 [%rd23+622], %rs1232;
st.local.u16 [%rd23+624], %rs1232;
st.local.u16 [%rd23+626], %rs1232;
st.local.u16 [%rd23+628], %rs1232;
st.local.u16 [%rd23+630], %rs1232;
st.local.u16 [%rd23+632], %rs1232;
st.local.u16 [%rd23+634], %rs1232;
st.local.u16 [%rd23+636], %rs1232;
st.local.u16 [%rd23+638], %rs1232;
st.local.u16 [%rd23+640], %rs1232;
st.local.u16 [%rd23+642], %rs1232;
st.local.u16 [%rd23+644], %rs1232;
st.local.u16 [%rd23+646], %rs1232;
st.local.u16 [%rd23+648], %rs1232;
st.local.u16 [%rd23+650], %rs1232;
st.local.u16 [%rd23+652], %rs1232;
st.local.u16 [%rd23+654], %rs1232;
st.local.u16 [%rd23+656], %rs1232;
st.local.u16 [%rd23+658], %rs1232;
st.local.u16 [%rd23+660], %rs1232;
st.local.u16 [%rd23+662], %rs1232;
st.local.u16 [%rd23+664], %rs1232;
st.local.u16 [%rd23+666], %rs1232;
st.local.u16 [%rd23+668], %rs1232;
st.local.u16 [%rd23+670], %rs1232;
st.local.u16 [%rd23+672], %rs1232;
st.local.u16 [%rd23+674], %rs1232;
st.local.u16 [%rd23+676], %rs1232;
st.local.u16 [%rd23+678], %rs1232;
st.local.u16 [%rd23+680], %rs1232;
st.local.u16 [%rd23+682], %rs1232;
st.local.u16 [%rd23+684], %rs1232;
st.local.u16 [%rd23+686], %rs1232;
st.local.u16 [%rd23+688], %rs1232;
st.local.u16 [%rd23+690], %rs1232;
st.local.u16 [%rd23+692], %rs1232;
st.local.u16 [%rd23+694], %rs1232;
st.local.u16 [%rd23+696], %rs1232;
st.local.u16 [%rd23+698], %rs1232;
st.local.u16 [%rd23+700], %rs1232;
st.local.u16 [%rd23+702], %rs1232;
st.local.u16 [%rd23+704], %rs1232;
st.local.u16 [%rd23+706], %rs1232;
st.local.u16 [%rd23+708], %rs1232;
st.local.u16 [%rd23+710], %rs1232;
st.local.u16 [%rd23+712], %rs1232;
st.local.u16 [%rd23+714], %rs1232;
st.local.u16 [%rd23+716], %rs1232;
st.local.u16 [%rd23+718], %rs1232;
st.local.u16 [%rd23+720], %rs1232;
st.local.u16 [%rd23+722], %rs1232;
st.local.u16 [%rd23+724], %rs1232;
st.local.u16 [%rd23+726], %rs1232;
st.local.u16 [%rd23+728], %rs1232;
st.local.u16 [%rd23+730], %rs1232;
st.local.u16 [%rd23+732], %rs1232;
st.local.u16 [%rd23+734], %rs1232;
st.local.u16 [%rd23+736], %rs1232;
st.local.u16 [%rd23+738], %rs1232;
st.local.u16 [%rd23+740], %rs1232;
st.local.u16 [%rd23+742], %rs1232;
st.local.u16 [%rd23+744], %rs1232;
st.local.u16 [%rd23+746], %rs1232;
st.local.u16 [%rd23+748], %rs1232;
st.local.u16 [%rd23+750], %rs1232;
st.local.u16 [%rd23+752], %rs1232;
st.local.u16 [%rd23+754], %rs1232;
st.local.u16 [%rd23+756], %rs1232;
st.local.u16 [%rd23+758], %rs1232;
st.local.u16 [%rd23+760], %rs1232;
st.local.u16 [%rd23+762], %rs1232;
st.local.u16 [%rd23+764], %rs1232;
st.local.u16 [%rd23+766], %rs1232;
st.local.u16 [%rd23+768], %rs1232;
st.local.u16 [%rd23+770], %rs1232;
st.local.u16 [%rd23+772], %rs1232;
st.local.u16 [%rd23+774], %rs1232;
st.local.u16 [%rd23+776], %rs1232;
st.local.u16 [%rd23+778], %rs1232;
st.local.u16 [%rd23+780], %rs1232;
st.local.u16 [%rd23+782], %rs1232;
st.local.u16 [%rd23+784], %rs1232;
st.local.u16 [%rd23+786], %rs1232;
st.local.u16 [%rd23+788], %rs1232;
st.local.u16 [%rd23+790], %rs1232;
st.local.u16 [%rd23+792], %rs1232;
st.local.u16 [%rd23+794], %rs1232;
st.local.u16 [%rd23+796], %rs1232;
st.local.u16 [%rd23+798], %rs1232;
st.local.u16 [%rd23+800], %rs1232;
st.local.u16 [%rd23+802], %rs1232;
st.local.u16 [%rd23+804], %rs1232;
st.local.u16 [%rd23+806], %rs1232;
st.local.u16 [%rd23+808], %rs1232;
st.local.u16 [%rd23+810], %rs1232;
st.local.u16 [%rd23+812], %rs1232;
st.local.u16 [%rd23+814], %rs1232;
st.local.u16 [%rd23+816], %rs1232;
st.local.u16 [%rd23+818], %rs1232;
st.local.u16 [%rd23+820], %rs1232;
st.local.u16 [%rd23+822], %rs1232;
st.local.u16 [%rd23+824], %rs1232;
st.local.u16 [%rd23+826], %rs1232;
st.local.u16 [%rd23+828], %rs1232;
st.local.u16 [%rd23+830], %rs1232;
st.local.u16 [%rd23+832], %rs1232;
st.local.u16 [%rd23+834], %rs1232;
st.local.u16 [%rd23+836], %rs1232;
st.local.u16 [%rd23+838], %rs1232;
st.local.u16 [%rd23+840], %rs1232;
st.local.u16 [%rd23+842], %rs1232;
st.local.u16 [%rd23+844], %rs1232;
st.local.u16 [%rd23+846], %rs1232;
st.local.u16 [%rd23+848], %rs1232;
st.local.u16 [%rd23+850], %rs1232;
st.local.u16 [%rd23+852], %rs1232;
st.local.u16 [%rd23+854], %rs1232;
st.local.u16 [%rd23+856], %rs1232;
st.local.u16 [%rd23+858], %rs1232;
st.local.u16 [%rd23+860], %rs1232;
st.local.u16 [%rd23+862], %rs1232;
st.local.u16 [%rd23+864], %rs1232;
st.local.u16 [%rd23+866], %rs1232;
st.local.u16 [%rd23+868], %rs1232;
st.local.u16 [%rd23+870], %rs1232;
st.local.u16 [%rd23+872], %rs1232;
st.local.u16 [%rd23+874], %rs1232;
st.local.u16 [%rd23+876], %rs1232;
st.local.u16 [%rd23+878], %rs1232;
st.local.u16 [%rd23+880], %rs1232;
st.local.u16 [%rd23+882], %rs1232;
st.local.u16 [%rd23+884], %rs1232;
st.local.u16 [%rd23+886], %rs1232;
st.local.u16 [%rd23+888], %rs1232;
st.local.u16 [%rd23+890], %rs1232;
st.local.u16 [%rd23+892], %rs1232;
st.local.u16 [%rd23+894], %rs1232;
st.local.u16 [%rd23+896], %rs1232;
st.local.u16 [%rd23+898], %rs1232;
st.local.u16 [%rd23+900], %rs1232;
st.local.u16 [%rd23+902], %rs1232;
st.local.u16 [%rd23+904], %rs1232;
st.local.u16 [%rd23+906], %rs1232;
st.local.u16 [%rd23+908], %rs1232;
st.local.u16 [%rd23+910], %rs1232;
st.local.u16 [%rd23+912], %rs1232;
st.local.u16 [%rd23+914], %rs1232;
st.local.u16 [%rd23+916], %rs1232;
st.local.u16 [%rd23+918], %rs1232;
st.local.u16 [%rd23+920], %rs1232;
st.local.u16 [%rd23+922], %rs1232;
st.local.u16 [%rd23+924], %rs1232;
st.local.u16 [%rd23+926], %rs1232;
st.local.u16 [%rd23+928], %rs1232;
st.local.u16 [%rd23+930], %rs1232;
st.local.u16 [%rd23+932], %rs1232;
st.local.u16 [%rd23+934], %rs1232;
st.local.u16 [%rd23+936], %rs1232;
st.local.u16 [%rd23+938], %rs1232;
st.local.u16 [%rd23+940], %rs1232;
st.local.u16 [%rd23+942], %rs1232;
st.local.u16 [%rd23+944], %rs1232;
st.local.u16 [%rd23+946], %rs1232;
st.local.u16 [%rd23+948], %rs1232;
st.local.u16 [%rd23+950], %rs1232;
st.local.u16 [%rd23+952], %rs1232;
st.local.u16 [%rd23+954], %rs1232;
st.local.u16 [%rd23+956], %rs1232;
st.local.u16 [%rd23+958], %rs1232;
st.local.u16 [%rd23+960], %rs1232;
st.local.u16 [%rd23+962], %rs1232;
st.local.u16 [%rd23+964], %rs1232;
st.local.u16 [%rd23+966], %rs1232;
st.local.u16 [%rd23+968], %rs1232;
st.local.u16 [%rd23+970], %rs1232;
st.local.u16 [%rd23+972], %rs1232;
st.local.u16 [%rd23+974], %rs1232;
st.local.u16 [%rd23+976], %rs1232;
st.local.u16 [%rd23+978], %rs1232;
st.local.u16 [%rd23+980], %rs1232;
st.local.u16 [%rd23+982], %rs1232;
st.local.u16 [%rd23+984], %rs1232;
st.local.u16 [%rd23+986], %rs1232;
st.local.u16 [%rd23+988], %rs1232;
st.local.u16 [%rd23+990], %rs1232;
st.local.u16 [%rd23+992], %rs1232;
st.local.u16 [%rd23+994], %rs1232;
st.local.u16 [%rd23+996], %rs1232;
st.local.u16 [%rd23+998], %rs1232;
st.local.u16 [%rd23+1000], %rs1232;
st.local.u16 [%rd23+1002], %rs1232;
st.local.u16 [%rd23+1004], %rs1232;
st.local.u16 [%rd23+1006], %rs1232;
st.local.u16 [%rd23+1008], %rs1232;
st.local.u16 [%rd23+1010], %rs1232;
st.local.u16 [%rd23+1012], %rs1232;
st.local.u16 [%rd23+1014], %rs1232;
st.local.u16 [%rd23+1016], %rs1232;
st.local.u16 [%rd23+1018], %rs1232;
st.local.u16 [%rd23+1020], %rs1232;
st.local.u16 [%rd23+1022], %rs1232;
st.local.u16 [%rd23+1024], %rs1232;
mov.u32 %r9579, 0;
mov.u32 %r9689, %r9579;
mov.u32 %r9685, %r9579;
mov.u32 %r9687, %r9579;
$L__BB2_954:
@%p10 bra $L__BB2_1197;
sub.s32 %r6256, %r4058, %r9579;
add.s32 %r2028, %r9579, 4;
mul.lo.s32 %r2029, %r2028, %r4055;
add.s32 %r2030, %r9579, 5;
add.s32 %r2031, %r2029, %r4055;
add.s32 %r2032, %r9579, 6;
shl.b32 %r6257, %r4055, 1;
add.s32 %r2033, %r2029, %r6257;
add.s32 %r2034, %r9579, 7;
mul.lo.s32 %r6258, %r4055, 3;
add.s32 %r2035, %r2029, %r6258;
add.s32 %r2036, %r9579, 1;
add.s32 %r2037, %r9579, 2;
add.s32 %r2038, %r9579, 3;
mul.lo.s32 %r2039, %r9579, %r4055;
add.s32 %r2040, %r2039, %r6258;
sub.s32 %r2041, %r2040, %r4055;
sub.s32 %r2042, %r2041, %r4055;
setp.lt.u32 %p1181, %r6256, 2;
selp.b32 %r6259, 4369, 13107, %p1181;
setp.lt.u32 %p1182, %r6256, 3;
selp.b32 %r6260, %r6259, 30583, %p1182;
setp.lt.u32 %p1183, %r6256, 4;
selp.b32 %r2043, %r6260, 65535, %p1183;
mov.u32 %r6255, 0;
mov.u32 %r9583, %r6255;
mov.u32 %r9584, %r6255;
$L__BB2_956:
shr.u32 %r6262, %r9583, 2;
mul.wide.u32 %rd774, %r6262, 2;
add.s64 %rd44, %rd23, %rd774;
ld.local.u16 %rs250, [%rd44];
ld.local.u16 %rs251, [%rd44+2];
setp.ge.u32 %p1184, %r9583, %r4057;
mov.u32 %r9595, %r6255;
@%p1184 bra $L__BB2_965;
setp.ge.u32 %p1185, %r2028, %r4058;
mov.u32 %r9595, 0;
@%p1185 bra $L__BB2_959;
add.s32 %r6264, %r2029, %r9583;
cvt.u64.u32 %rd775, %r6264;
add.s64 %rd776, %rd775, %rd4;
shl.b64 %rd777, %rd776, 2;
add.s64 %rd778, %rd3, %rd777;
ld.global.u32 %r6265, [%rd778];
abs.s32 %r6266, %r6265;
setp.gt.u32 %p1186, %r6266, 4;
and.b32 %r6267, %r6266, 1;
setp.eq.b32 %p1187, %r6267, 1;
and.pred %p1188, %p1186, %p1187;
selp.u32 %r9595, 1, 0, %p1188;
$L__BB2_959:
setp.ge.u32 %p1189, %r2030, %r4058;
@%p1189 bra $L__BB2_961;
add.s32 %r6268, %r2031, %r9583;
cvt.u64.u32 %rd779, %r6268;
add.s64 %rd780, %rd779, %rd4;
shl.b64 %rd781, %rd780, 2;
add.s64 %rd782, %rd3, %rd781;
ld.global.u32 %r6269, [%rd782];
abs.s32 %r6270, %r6269;
setp.gt.u32 %p1190, %r6270, 4;
and.b32 %r6271, %r6270, 1;
setp.eq.b32 %p1191, %r6271, 1;
and.pred %p1192, %p1190, %p1191;
selp.b32 %r6272, 2, 0, %p1192;
or.b32 %r9595, %r6272, %r9595;
$L__BB2_961:
setp.ge.u32 %p1193, %r2032, %r4058;
@%p1193 bra $L__BB2_963;
add.s32 %r6273, %r2033, %r9583;
cvt.u64.u32 %rd783, %r6273;
add.s64 %rd784, %rd783, %rd4;
shl.b64 %rd785, %rd784, 2;
add.s64 %rd786, %rd3, %rd785;
ld.global.u32 %r6274, [%rd786];
abs.s32 %r6275, %r6274;
setp.gt.u32 %p1194, %r6275, 4;
and.b32 %r6276, %r6275, 1;
setp.eq.b32 %p1195, %r6276, 1;
and.pred %p1196, %p1194, %p1195;
selp.b32 %r6277, 4, 0, %p1196;
or.b32 %r9595, %r6277, %r9595;
$L__BB2_963:
setp.ge.u32 %p1197, %r2034, %r4058;
@%p1197 bra $L__BB2_965;
add.s32 %r6278, %r2035, %r9583;
cvt.u64.u32 %rd787, %r6278;
add.s64 %rd788, %rd787, %rd4;
shl.b64 %rd789, %rd788, 2;
add.s64 %rd790, %rd3, %rd789;
ld.global.u32 %r6279, [%rd790];
abs.s32 %r6280, %r6279;
setp.gt.u32 %p1198, %r6280, 4;
and.b32 %r6281, %r6280, 1;
setp.eq.b32 %p1199, %r6281, 1;
and.pred %p1200, %p1198, %p1199;
selp.b32 %r6282, 8, 0, %p1200;
or.b32 %r9595, %r6282, %r9595;
$L__BB2_965:
add.s32 %r2057, %r9583, 1;
setp.ge.u32 %p1201, %r2057, %r4057;
@%p1201 bra $L__BB2_974;
setp.ge.u32 %p1202, %r2028, %r4058;
@%p1202 bra $L__BB2_968;
add.s32 %r6283, %r2029, %r2057;
cvt.u64.u32 %rd791, %r6283;
add.s64 %rd792, %rd791, %rd4;
shl.b64 %rd793, %rd792, 2;
add.s64 %rd794, %rd3, %rd793;
ld.global.u32 %r6284, [%rd794];
abs.s32 %r6285, %r6284;
setp.gt.u32 %p1203, %r6285, 4;
and.b32 %r6286, %r6285, 1;
setp.eq.b32 %p1204, %r6286, 1;
and.pred %p1205, %p1203, %p1204;
selp.b32 %r6287, 16, 0, %p1205;
or.b32 %r9595, %r6287, %r9595;
$L__BB2_968:
setp.ge.u32 %p1206, %r2030, %r4058;
@%p1206 bra $L__BB2_970;
add.s32 %r6288, %r2031, %r2057;
cvt.u64.u32 %rd795, %r6288;
add.s64 %rd796, %rd795, %rd4;
shl.b64 %rd797, %rd796, 2;
add.s64 %rd798, %rd3, %rd797;
ld.global.u32 %r6289, [%rd798];
abs.s32 %r6290, %r6289;
setp.gt.u32 %p1207, %r6290, 4;
and.b32 %r6291, %r6290, 1;
setp.eq.b32 %p1208, %r6291, 1;
and.pred %p1209, %p1207, %p1208;
selp.b32 %r6292, 32, 0, %p1209;
or.b32 %r9595, %r6292, %r9595;
$L__BB2_970:
setp.ge.u32 %p1210, %r2032, %r4058;
@%p1210 bra $L__BB2_972;
add.s32 %r6293, %r2033, %r2057;
cvt.u64.u32 %rd799, %r6293;
add.s64 %rd800, %rd799, %rd4;
shl.b64 %rd801, %rd800, 2;
add.s64 %rd802, %rd3, %rd801;
ld.global.u32 %r6294, [%rd802];
abs.s32 %r6295, %r6294;
setp.gt.u32 %p1211, %r6295, 4;
and.b32 %r6296, %r6295, 1;
setp.eq.b32 %p1212, %r6296, 1;
and.pred %p1213, %p1211, %p1212;
selp.b32 %r6297, 64, 0, %p1213;
or.b32 %r9595, %r6297, %r9595;
$L__BB2_972:
setp.ge.u32 %p1214, %r2034, %r4058;
@%p1214 bra $L__BB2_974;
add.s32 %r6298, %r2035, %r2057;
cvt.u64.u32 %rd803, %r6298;
add.s64 %rd804, %rd803, %rd4;
shl.b64 %rd805, %rd804, 2;
add.s64 %rd806, %rd3, %rd805;
ld.global.u32 %r6299, [%rd806];
abs.s32 %r6300, %r6299;
setp.gt.u32 %p1215, %r6300, 4;
and.b32 %r6301, %r6300, 1;
setp.eq.b32 %p1216, %r6301, 1;
and.pred %p1217, %p1215, %p1216;
selp.b32 %r6302, 128, 0, %p1217;
or.b32 %r9595, %r6302, %r9595;
$L__BB2_974:
add.s32 %r2066, %r9583, 2;
setp.ge.u32 %p1218, %r2066, %r4057;
@%p1218 bra $L__BB2_983;
setp.ge.u32 %p1219, %r2028, %r4058;
@%p1219 bra $L__BB2_977;
add.s32 %r6303, %r2029, %r2066;
cvt.u64.u32 %rd807, %r6303;
add.s64 %rd808, %rd807, %rd4;
shl.b64 %rd809, %rd808, 2;
add.s64 %rd810, %rd3, %rd809;
ld.global.u32 %r6304, [%rd810];
abs.s32 %r6305, %r6304;
setp.gt.u32 %p1220, %r6305, 4;
and.b32 %r6306, %r6305, 1;
setp.eq.b32 %p1221, %r6306, 1;
and.pred %p1222, %p1220, %p1221;
selp.b32 %r6307, 256, 0, %p1222;
or.b32 %r9595, %r6307, %r9595;
$L__BB2_977:
setp.ge.u32 %p1223, %r2030, %r4058;
@%p1223 bra $L__BB2_979;
add.s32 %r6308, %r2031, %r2066;
cvt.u64.u32 %rd811, %r6308;
add.s64 %rd812, %rd811, %rd4;
shl.b64 %rd813, %rd812, 2;
add.s64 %rd814, %rd3, %rd813;
ld.global.u32 %r6309, [%rd814];
abs.s32 %r6310, %r6309;
setp.gt.u32 %p1224, %r6310, 4;
and.b32 %r6311, %r6310, 1;
setp.eq.b32 %p1225, %r6311, 1;
and.pred %p1226, %p1224, %p1225;
selp.b32 %r6312, 512, 0, %p1226;
or.b32 %r9595, %r6312, %r9595;
$L__BB2_979:
setp.ge.u32 %p1227, %r2032, %r4058;
@%p1227 bra $L__BB2_981;
add.s32 %r6313, %r2033, %r2066;
cvt.u64.u32 %rd815, %r6313;
add.s64 %rd816, %rd815, %rd4;
shl.b64 %rd817, %rd816, 2;
add.s64 %rd818, %rd3, %rd817;
ld.global.u32 %r6314, [%rd818];
abs.s32 %r6315, %r6314;
setp.gt.u32 %p1228, %r6315, 4;
and.b32 %r6316, %r6315, 1;
setp.eq.b32 %p1229, %r6316, 1;
and.pred %p1230, %p1228, %p1229;
selp.b32 %r6317, 1024, 0, %p1230;
or.b32 %r9595, %r6317, %r9595;
$L__BB2_981:
setp.ge.u32 %p1231, %r2034, %r4058;
@%p1231 bra $L__BB2_983;
add.s32 %r6318, %r2035, %r2066;
cvt.u64.u32 %rd819, %r6318;
add.s64 %rd820, %rd819, %rd4;
shl.b64 %rd821, %rd820, 2;
add.s64 %rd822, %rd3, %rd821;
ld.global.u32 %r6319, [%rd822];
abs.s32 %r6320, %r6319;
setp.gt.u32 %p1232, %r6320, 4;
and.b32 %r6321, %r6320, 1;
setp.eq.b32 %p1233, %r6321, 1;
and.pred %p1234, %p1232, %p1233;
selp.b32 %r6322, 2048, 0, %p1234;
or.b32 %r9595, %r6322, %r9595;
$L__BB2_983:
add.s32 %r2075, %r9583, 3;
setp.ge.u32 %p1235, %r2075, %r4057;
@%p1235 bra $L__BB2_992;
setp.ge.u32 %p1236, %r2028, %r4058;
@%p1236 bra $L__BB2_986;
add.s32 %r6323, %r2029, %r2075;
cvt.u64.u32 %rd823, %r6323;
add.s64 %rd824, %rd823, %rd4;
shl.b64 %rd825, %rd824, 2;
add.s64 %rd826, %rd3, %rd825;
ld.global.u32 %r6324, [%rd826];
abs.s32 %r6325, %r6324;
setp.gt.u32 %p1237, %r6325, 4;
and.b32 %r6326, %r6325, 1;
setp.eq.b32 %p1238, %r6326, 1;
and.pred %p1239, %p1237, %p1238;
selp.b32 %r6327, 4096, 0, %p1239;
or.b32 %r9595, %r6327, %r9595;
$L__BB2_986:
setp.ge.u32 %p1240, %r2030, %r4058;
@%p1240 bra $L__BB2_988;
add.s32 %r6328, %r2031, %r2075;
cvt.u64.u32 %rd827, %r6328;
add.s64 %rd828, %rd827, %rd4;
shl.b64 %rd829, %rd828, 2;
add.s64 %rd830, %rd3, %rd829;
ld.global.u32 %r6329, [%rd830];
abs.s32 %r6330, %r6329;
setp.gt.u32 %p1241, %r6330, 4;
and.b32 %r6331, %r6330, 1;
setp.eq.b32 %p1242, %r6331, 1;
and.pred %p1243, %p1241, %p1242;
selp.b32 %r6332, 8192, 0, %p1243;
or.b32 %r9595, %r6332, %r9595;
$L__BB2_988:
setp.ge.u32 %p1244, %r2032, %r4058;
@%p1244 bra $L__BB2_990;
add.s32 %r6333, %r2033, %r2075;
cvt.u64.u32 %rd831, %r6333;
add.s64 %rd832, %rd831, %rd4;
shl.b64 %rd833, %rd832, 2;
add.s64 %rd834, %rd3, %rd833;
ld.global.u32 %r6334, [%rd834];
abs.s32 %r6335, %r6334;
setp.gt.u32 %p1245, %r6335, 4;
and.b32 %r6336, %r6335, 1;
setp.eq.b32 %p1246, %r6336, 1;
and.pred %p1247, %p1245, %p1246;
selp.b32 %r6337, 16384, 0, %p1247;
or.b32 %r9595, %r6337, %r9595;
$L__BB2_990:
setp.ge.u32 %p1248, %r2034, %r4058;
@%p1248 bra $L__BB2_992;
add.s32 %r6338, %r2035, %r2075;
cvt.u64.u32 %rd835, %r6338;
add.s64 %rd836, %rd835, %rd4;
shl.b64 %rd837, %rd836, 2;
add.s64 %rd838, %rd3, %rd837;
ld.global.u32 %r6339, [%rd838];
abs.s32 %r6340, %r6339;
setp.gt.u32 %p1249, %r6340, 4;
and.b32 %r6341, %r6340, 1;
setp.eq.b32 %p1250, %r6341, 1;
and.pred %p1251, %p1249, %p1250;
selp.b32 %r6342, 32768, 0, %p1251;
or.b32 %r9595, %r6342, %r9595;
$L__BB2_992:
add.s32 %r6344, %r9583, 4;
setp.ge.u32 %p1252, %r6344, %r4057;
mov.u32 %r9611, 0;
@%p1252 bra $L__BB2_1001;
setp.ge.u32 %p1253, %r2028, %r4058;
mov.u32 %r9611, 0;
@%p1253 bra $L__BB2_995;
add.s32 %r6346, %r2029, %r9583;
add.s32 %r6347, %r6346, 4;
cvt.u64.u32 %rd839, %r6347;
add.s64 %rd840, %rd839, %rd4;
shl.b64 %rd841, %rd840, 2;
add.s64 %rd842, %rd3, %rd841;
ld.global.u32 %r6348, [%rd842];
abs.s32 %r6349, %r6348;
setp.gt.u32 %p1254, %r6349, 4;
and.b32 %r6350, %r6349, 1;
setp.eq.b32 %p1255, %r6350, 1;
and.pred %p1256, %p1254, %p1255;
selp.u32 %r9611, 1, 0, %p1256;
$L__BB2_995:
setp.ge.u32 %p1257, %r2030, %r4058;
@%p1257 bra $L__BB2_997;
add.s32 %r6351, %r2031, %r9583;
add.s32 %r6352, %r6351, 4;
cvt.u64.u32 %rd843, %r6352;
add.s64 %rd844, %rd843, %rd4;
shl.b64 %rd845, %rd844, 2;
add.s64 %rd846, %rd3, %rd845;
ld.global.u32 %r6353, [%rd846];
abs.s32 %r6354, %r6353;
setp.gt.u32 %p1258, %r6354, 4;
and.b32 %r6355, %r6354, 1;
setp.eq.b32 %p1259, %r6355, 1;
and.pred %p1260, %p1258, %p1259;
selp.b32 %r6356, 2, 0, %p1260;
or.b32 %r9611, %r6356, %r9611;
$L__BB2_997:
setp.ge.u32 %p1261, %r2032, %r4058;
@%p1261 bra $L__BB2_999;
add.s32 %r6357, %r2033, %r9583;
add.s32 %r6358, %r6357, 4;
cvt.u64.u32 %rd847, %r6358;
add.s64 %rd848, %rd847, %rd4;
shl.b64 %rd849, %rd848, 2;
add.s64 %rd850, %rd3, %rd849;
ld.global.u32 %r6359, [%rd850];
abs.s32 %r6360, %r6359;
setp.gt.u32 %p1262, %r6360, 4;
and.b32 %r6361, %r6360, 1;
setp.eq.b32 %p1263, %r6361, 1;
and.pred %p1264, %p1262, %p1263;
selp.b32 %r6362, 4, 0, %p1264;
or.b32 %r9611, %r6362, %r9611;
$L__BB2_999:
setp.ge.u32 %p1265, %r2034, %r4058;
@%p1265 bra $L__BB2_1001;
add.s32 %r6363, %r2035, %r9583;
add.s32 %r6364, %r6363, 4;
cvt.u64.u32 %rd851, %r6364;
add.s64 %rd852, %rd851, %rd4;
shl.b64 %rd853, %rd852, 2;
add.s64 %rd854, %rd3, %rd853;
ld.global.u32 %r6365, [%rd854];
abs.s32 %r6366, %r6365;
setp.gt.u32 %p1266, %r6366, 4;
and.b32 %r6367, %r6366, 1;
setp.eq.b32 %p1267, %r6367, 1;
and.pred %p1268, %p1266, %p1267;
selp.b32 %r6368, 8, 0, %p1268;
or.b32 %r9611, %r6368, %r9611;
$L__BB2_1001:
add.s32 %r2092, %r9583, 5;
setp.ge.u32 %p1269, %r2092, %r4057;
@%p1269 bra $L__BB2_1010;
setp.ge.u32 %p1270, %r2028, %r4058;
@%p1270 bra $L__BB2_1004;
add.s32 %r6369, %r2029, %r2092;
cvt.u64.u32 %rd855, %r6369;
add.s64 %rd856, %rd855, %rd4;
shl.b64 %rd857, %rd856, 2;
add.s64 %rd858, %rd3, %rd857;
ld.global.u32 %r6370, [%rd858];
abs.s32 %r6371, %r6370;
setp.gt.u32 %p1271, %r6371, 4;
and.b32 %r6372, %r6371, 1;
setp.eq.b32 %p1272, %r6372, 1;
and.pred %p1273, %p1271, %p1272;
selp.b32 %r6373, 16, 0, %p1273;
or.b32 %r9611, %r6373, %r9611;
$L__BB2_1004:
setp.ge.u32 %p1274, %r2030, %r4058;
@%p1274 bra $L__BB2_1006;
add.s32 %r6374, %r2031, %r2092;
cvt.u64.u32 %rd859, %r6374;
add.s64 %rd860, %rd859, %rd4;
shl.b64 %rd861, %rd860, 2;
add.s64 %rd862, %rd3, %rd861;
ld.global.u32 %r6375, [%rd862];
abs.s32 %r6376, %r6375;
setp.gt.u32 %p1275, %r6376, 4;
and.b32 %r6377, %r6376, 1;
setp.eq.b32 %p1276, %r6377, 1;
and.pred %p1277, %p1275, %p1276;
selp.b32 %r6378, 32, 0, %p1277;
or.b32 %r9611, %r6378, %r9611;
$L__BB2_1006:
setp.ge.u32 %p1278, %r2032, %r4058;
@%p1278 bra $L__BB2_1008;
add.s32 %r6379, %r2033, %r2092;
cvt.u64.u32 %rd863, %r6379;
add.s64 %rd864, %rd863, %rd4;
shl.b64 %rd865, %rd864, 2;
add.s64 %rd866, %rd3, %rd865;
ld.global.u32 %r6380, [%rd866];
abs.s32 %r6381, %r6380;
setp.gt.u32 %p1279, %r6381, 4;
and.b32 %r6382, %r6381, 1;
setp.eq.b32 %p1280, %r6382, 1;
and.pred %p1281, %p1279, %p1280;
selp.b32 %r6383, 64, 0, %p1281;
or.b32 %r9611, %r6383, %r9611;
$L__BB2_1008:
setp.ge.u32 %p1282, %r2034, %r4058;
@%p1282 bra $L__BB2_1010;
add.s32 %r6384, %r2035, %r2092;
cvt.u64.u32 %rd867, %r6384;
add.s64 %rd868, %rd867, %rd4;
shl.b64 %rd869, %rd868, 2;
add.s64 %rd870, %rd3, %rd869;
ld.global.u32 %r6385, [%rd870];
abs.s32 %r6386, %r6385;
setp.gt.u32 %p1283, %r6386, 4;
and.b32 %r6387, %r6386, 1;
setp.eq.b32 %p1284, %r6387, 1;
and.pred %p1285, %p1283, %p1284;
selp.b32 %r6388, 128, 0, %p1285;
or.b32 %r9611, %r6388, %r9611;
$L__BB2_1010:
add.s32 %r2101, %r9583, 6;
setp.ge.u32 %p1286, %r2101, %r4057;
@%p1286 bra $L__BB2_1019;
setp.ge.u32 %p1287, %r2028, %r4058;
@%p1287 bra $L__BB2_1013;
add.s32 %r6389, %r2029, %r2101;
cvt.u64.u32 %rd871, %r6389;
add.s64 %rd872, %rd871, %rd4;
shl.b64 %rd873, %rd872, 2;
add.s64 %rd874, %rd3, %rd873;
ld.global.u32 %r6390, [%rd874];
abs.s32 %r6391, %r6390;
setp.gt.u32 %p1288, %r6391, 4;
and.b32 %r6392, %r6391, 1;
setp.eq.b32 %p1289, %r6392, 1;
and.pred %p1290, %p1288, %p1289;
selp.b32 %r6393, 256, 0, %p1290;
or.b32 %r9611, %r6393, %r9611;
$L__BB2_1013:
setp.ge.u32 %p1291, %r2030, %r4058;
@%p1291 bra $L__BB2_1015;
add.s32 %r6394, %r2031, %r2101;
cvt.u64.u32 %rd875, %r6394;
add.s64 %rd876, %rd875, %rd4;
shl.b64 %rd877, %rd876, 2;
add.s64 %rd878, %rd3, %rd877;
ld.global.u32 %r6395, [%rd878];
abs.s32 %r6396, %r6395;
setp.gt.u32 %p1292, %r6396, 4;
and.b32 %r6397, %r6396, 1;
setp.eq.b32 %p1293, %r6397, 1;
and.pred %p1294, %p1292, %p1293;
selp.b32 %r6398, 512, 0, %p1294;
or.b32 %r9611, %r6398, %r9611;
$L__BB2_1015:
setp.ge.u32 %p1295, %r2032, %r4058;
@%p1295 bra $L__BB2_1017;
add.s32 %r6399, %r2033, %r2101;
cvt.u64.u32 %rd879, %r6399;
add.s64 %rd880, %rd879, %rd4;
shl.b64 %rd881, %rd880, 2;
add.s64 %rd882, %rd3, %rd881;
ld.global.u32 %r6400, [%rd882];
abs.s32 %r6401, %r6400;
setp.gt.u32 %p1296, %r6401, 4;
and.b32 %r6402, %r6401, 1;
setp.eq.b32 %p1297, %r6402, 1;
and.pred %p1298, %p1296, %p1297;
selp.b32 %r6403, 1024, 0, %p1298;
or.b32 %r9611, %r6403, %r9611;
$L__BB2_1017:
setp.ge.u32 %p1299, %r2034, %r4058;
@%p1299 bra $L__BB2_1019;
add.s32 %r6404, %r2035, %r2101;
cvt.u64.u32 %rd883, %r6404;
add.s64 %rd884, %rd883, %rd4;
shl.b64 %rd885, %rd884, 2;
add.s64 %rd886, %rd3, %rd885;
ld.global.u32 %r6405, [%rd886];
abs.s32 %r6406, %r6405;
setp.gt.u32 %p1300, %r6406, 4;
and.b32 %r6407, %r6406, 1;
setp.eq.b32 %p1301, %r6407, 1;
and.pred %p1302, %p1300, %p1301;
selp.b32 %r6408, 2048, 0, %p1302;
or.b32 %r9611, %r6408, %r9611;
$L__BB2_1019:
add.s32 %r2110, %r9583, 7;
setp.ge.u32 %p1303, %r2110, %r4057;
@%p1303 bra $L__BB2_1028;
setp.ge.u32 %p1304, %r2028, %r4058;
@%p1304 bra $L__BB2_1022;
add.s32 %r6409, %r2029, %r2110;
cvt.u64.u32 %rd887, %r6409;
add.s64 %rd888, %rd887, %rd4;
shl.b64 %rd889, %rd888, 2;
add.s64 %rd890, %rd3, %rd889;
ld.global.u32 %r6410, [%rd890];
abs.s32 %r6411, %r6410;
setp.gt.u32 %p1305, %r6411, 4;
and.b32 %r6412, %r6411, 1;
setp.eq.b32 %p1306, %r6412, 1;
and.pred %p1307, %p1305, %p1306;
selp.b32 %r6413, 4096, 0, %p1307;
or.b32 %r9611, %r6413, %r9611;
$L__BB2_1022:
setp.ge.u32 %p1308, %r2030, %r4058;
@%p1308 bra $L__BB2_1024;
add.s32 %r6414, %r2031, %r2110;
cvt.u64.u32 %rd891, %r6414;
add.s64 %rd892, %rd891, %rd4;
shl.b64 %rd893, %rd892, 2;
add.s64 %rd894, %rd3, %rd893;
ld.global.u32 %r6415, [%rd894];
abs.s32 %r6416, %r6415;
setp.gt.u32 %p1309, %r6416, 4;
and.b32 %r6417, %r6416, 1;
setp.eq.b32 %p1310, %r6417, 1;
and.pred %p1311, %p1309, %p1310;
selp.b32 %r6418, 8192, 0, %p1311;
or.b32 %r9611, %r6418, %r9611;
$L__BB2_1024:
setp.ge.u32 %p1312, %r2032, %r4058;
@%p1312 bra $L__BB2_1026;
add.s32 %r6419, %r2033, %r2110;
cvt.u64.u32 %rd895, %r6419;
add.s64 %rd896, %rd895, %rd4;
shl.b64 %rd897, %rd896, 2;
add.s64 %rd898, %rd3, %rd897;
ld.global.u32 %r6420, [%rd898];
abs.s32 %r6421, %r6420;
setp.gt.u32 %p1313, %r6421, 4;
and.b32 %r6422, %r6421, 1;
setp.eq.b32 %p1314, %r6422, 1;
and.pred %p1315, %p1313, %p1314;
selp.b32 %r6423, 16384, 0, %p1315;
or.b32 %r9611, %r6423, %r9611;
$L__BB2_1026:
setp.ge.u32 %p1316, %r2034, %r4058;
@%p1316 bra $L__BB2_1028;
add.s32 %r6424, %r2035, %r2110;
cvt.u64.u32 %rd899, %r6424;
add.s64 %rd900, %rd899, %rd4;
shl.b64 %rd901, %rd900, 2;
add.s64 %rd902, %rd3, %rd901;
ld.global.u32 %r6425, [%rd902];
abs.s32 %r6426, %r6425;
setp.gt.u32 %p1317, %r6426, 4;
and.b32 %r6427, %r6426, 1;
setp.eq.b32 %p1318, %r6427, 1;
and.pred %p1319, %p1317, %p1318;
selp.b32 %r6428, 32768, 0, %p1319;
or.b32 %r9611, %r6428, %r9611;
$L__BB2_1028:
mov.b32 %r2119, {%rs250, %rs251};
add.s32 %r6430, %r2039, %r9583;
cvt.u64.u32 %rd903, %r6430;
add.s64 %rd904, %rd903, %rd4;
shl.b64 %rd905, %rd904, 2;
add.s64 %rd45, %rd3, %rd905;
add.s32 %r6431, %r2042, %r9583;
cvt.u64.u32 %rd906, %r6431;
add.s64 %rd907, %rd906, %rd4;
shl.b64 %rd908, %rd907, 2;
add.s64 %rd46, %rd3, %rd908;
add.s32 %r6432, %r2041, %r9583;
cvt.u64.u32 %rd909, %r6432;
add.s64 %rd910, %rd909, %rd4;
shl.b64 %rd911, %rd910, 2;
add.s64 %rd47, %rd3, %rd911;
add.s32 %r6433, %r2040, %r9583;
cvt.u64.u32 %rd912, %r6433;
add.s64 %rd913, %rd912, %rd4;
shl.b64 %rd914, %rd913, 2;
add.s64 %rd48, %rd3, %rd914;
mov.u32 %r9627, 0;
@%p1184 bra $L__BB2_1037;
setp.le.u32 %p1321, %r4058, %r9579;
mov.u32 %r9627, 0;
@%p1321 bra $L__BB2_1031;
ld.global.u32 %r6435, [%rd45];
abs.s32 %r6436, %r6435;
setp.gt.u32 %p1322, %r6436, 4;
and.b32 %r6437, %r6436, 1;
setp.eq.b32 %p1323, %r6437, 1;
and.pred %p1324, %p1322, %p1323;
selp.u32 %r9627, 1, 0, %p1324;
$L__BB2_1031:
setp.ge.u32 %p1325, %r2036, %r4058;
@%p1325 bra $L__BB2_1033;
ld.global.u32 %r6438, [%rd46];
abs.s32 %r6439, %r6438;
setp.gt.u32 %p1326, %r6439, 4;
and.b32 %r6440, %r6439, 1;
setp.eq.b32 %p1327, %r6440, 1;
and.pred %p1328, %p1326, %p1327;
selp.b32 %r6441, 2, 0, %p1328;
or.b32 %r9627, %r6441, %r9627;
$L__BB2_1033:
setp.ge.u32 %p1329, %r2037, %r4058;
@%p1329 bra $L__BB2_1035;
ld.global.u32 %r6442, [%rd47];
abs.s32 %r6443, %r6442;
setp.gt.u32 %p1330, %r6443, 4;
and.b32 %r6444, %r6443, 1;
setp.eq.b32 %p1331, %r6444, 1;
and.pred %p1332, %p1330, %p1331;
selp.b32 %r6445, 4, 0, %p1332;
or.b32 %r9627, %r6445, %r9627;
$L__BB2_1035:
setp.ge.u32 %p1333, %r2038, %r4058;
@%p1333 bra $L__BB2_1037;
ld.global.u32 %r6446, [%rd48];
abs.s32 %r6447, %r6446;
setp.gt.u32 %p1334, %r6447, 4;
and.b32 %r6448, %r6447, 1;
setp.eq.b32 %p1335, %r6448, 1;
and.pred %p1336, %p1334, %p1335;
selp.b32 %r6449, 8, 0, %p1336;
or.b32 %r9627, %r6449, %r9627;
$L__BB2_1037:
add.s32 %r6450, %r2039, %r2057;
cvt.u64.u32 %rd915, %r6450;
add.s64 %rd916, %rd915, %rd4;
shl.b64 %rd917, %rd916, 2;
add.s64 %rd49, %rd3, %rd917;
add.s32 %r6451, %r2042, %r2057;
cvt.u64.u32 %rd918, %r6451;
add.s64 %rd919, %rd918, %rd4;
shl.b64 %rd920, %rd919, 2;
add.s64 %rd50, %rd3, %rd920;
add.s32 %r6452, %r2041, %r2057;
cvt.u64.u32 %rd921, %r6452;
add.s64 %rd922, %rd921, %rd4;
shl.b64 %rd923, %rd922, 2;
add.s64 %rd51, %rd3, %rd923;
add.s32 %r6453, %r2040, %r2057;
cvt.u64.u32 %rd924, %r6453;
add.s64 %rd925, %rd924, %rd4;
shl.b64 %rd926, %rd925, 2;
add.s64 %rd52, %rd3, %rd926;
shl.b32 %r6454, %r9611, 16;
or.b32 %r2128, %r6454, %r9595;
@%p1201 bra $L__BB2_1046;
setp.le.u32 %p1338, %r4058, %r9579;
@%p1338 bra $L__BB2_1040;
ld.global.u32 %r6455, [%rd49];
abs.s32 %r6456, %r6455;
setp.gt.u32 %p1339, %r6456, 4;
and.b32 %r6457, %r6456, 1;
setp.eq.b32 %p1340, %r6457, 1;
and.pred %p1341, %p1339, %p1340;
selp.b32 %r6458, 16, 0, %p1341;
or.b32 %r9627, %r6458, %r9627;
$L__BB2_1040:
setp.ge.u32 %p1342, %r2036, %r4058;
@%p1342 bra $L__BB2_1042;
ld.global.u32 %r6459, [%rd50];
abs.s32 %r6460, %r6459;
setp.gt.u32 %p1343, %r6460, 4;
and.b32 %r6461, %r6460, 1;
setp.eq.b32 %p1344, %r6461, 1;
and.pred %p1345, %p1343, %p1344;
selp.b32 %r6462, 32, 0, %p1345;
or.b32 %r9627, %r6462, %r9627;
$L__BB2_1042:
setp.ge.u32 %p1346, %r2037, %r4058;
@%p1346 bra $L__BB2_1044;
ld.global.u32 %r6463, [%rd51];
abs.s32 %r6464, %r6463;
setp.gt.u32 %p1347, %r6464, 4;
and.b32 %r6465, %r6464, 1;
setp.eq.b32 %p1348, %r6465, 1;
and.pred %p1349, %p1347, %p1348;
selp.b32 %r6466, 64, 0, %p1349;
or.b32 %r9627, %r6466, %r9627;
$L__BB2_1044:
setp.ge.u32 %p1350, %r2038, %r4058;
@%p1350 bra $L__BB2_1046;
ld.global.u32 %r6467, [%rd52];
abs.s32 %r6468, %r6467;
setp.gt.u32 %p1351, %r6468, 4;
and.b32 %r6469, %r6468, 1;
setp.eq.b32 %p1352, %r6469, 1;
and.pred %p1353, %p1351, %p1352;
selp.b32 %r6470, 128, 0, %p1353;
or.b32 %r9627, %r6470, %r9627;
$L__BB2_1046:
add.s32 %r6471, %r2039, %r2066;
cvt.u64.u32 %rd927, %r6471;
add.s64 %rd928, %rd927, %rd4;
shl.b64 %rd929, %rd928, 2;
add.s64 %rd53, %rd3, %rd929;
add.s32 %r6472, %r2042, %r2066;
cvt.u64.u32 %rd930, %r6472;
add.s64 %rd931, %rd930, %rd4;
shl.b64 %rd932, %rd931, 2;
add.s64 %rd54, %rd3, %rd932;
add.s32 %r6473, %r2041, %r2066;
cvt.u64.u32 %rd933, %r6473;
add.s64 %rd934, %rd933, %rd4;
shl.b64 %rd935, %rd934, 2;
add.s64 %rd55, %rd3, %rd935;
add.s32 %r6474, %r2040, %r2066;
cvt.u64.u32 %rd936, %r6474;
add.s64 %rd937, %rd936, %rd4;
shl.b64 %rd938, %rd937, 2;
add.s64 %rd56, %rd3, %rd938;
@%p1218 bra $L__BB2_1055;
setp.le.u32 %p1355, %r4058, %r9579;
@%p1355 bra $L__BB2_1049;
ld.global.u32 %r6475, [%rd53];
abs.s32 %r6476, %r6475;
setp.gt.u32 %p1356, %r6476, 4;
and.b32 %r6477, %r6476, 1;
setp.eq.b32 %p1357, %r6477, 1;
and.pred %p1358, %p1356, %p1357;
selp.b32 %r6478, 256, 0, %p1358;
or.b32 %r9627, %r6478, %r9627;
$L__BB2_1049:
setp.ge.u32 %p1359, %r2036, %r4058;
@%p1359 bra $L__BB2_1051;
ld.global.u32 %r6479, [%rd54];
abs.s32 %r6480, %r6479;
setp.gt.u32 %p1360, %r6480, 4;
and.b32 %r6481, %r6480, 1;
setp.eq.b32 %p1361, %r6481, 1;
and.pred %p1362, %p1360, %p1361;
selp.b32 %r6482, 512, 0, %p1362;
or.b32 %r9627, %r6482, %r9627;
$L__BB2_1051:
setp.ge.u32 %p1363, %r2037, %r4058;
@%p1363 bra $L__BB2_1053;
ld.global.u32 %r6483, [%rd55];
abs.s32 %r6484, %r6483;
setp.gt.u32 %p1364, %r6484, 4;
and.b32 %r6485, %r6484, 1;
setp.eq.b32 %p1365, %r6485, 1;
and.pred %p1366, %p1364, %p1365;
selp.b32 %r6486, 1024, 0, %p1366;
or.b32 %r9627, %r6486, %r9627;
$L__BB2_1053:
setp.ge.u32 %p1367, %r2038, %r4058;
@%p1367 bra $L__BB2_1055;
ld.global.u32 %r6487, [%rd56];
abs.s32 %r6488, %r6487;
setp.gt.u32 %p1368, %r6488, 4;
and.b32 %r6489, %r6488, 1;
setp.eq.b32 %p1369, %r6489, 1;
and.pred %p1370, %p1368, %p1369;
selp.b32 %r6490, 2048, 0, %p1370;
or.b32 %r9627, %r6490, %r9627;
$L__BB2_1055:
add.s32 %r6491, %r2039, %r2075;
cvt.u64.u32 %rd939, %r6491;
add.s64 %rd940, %rd939, %rd4;
shl.b64 %rd941, %rd940, 2;
add.s64 %rd57, %rd3, %rd941;
add.s32 %r6492, %r2042, %r2075;
cvt.u64.u32 %rd942, %r6492;
add.s64 %rd943, %rd942, %rd4;
shl.b64 %rd944, %rd943, 2;
add.s64 %rd58, %rd3, %rd944;
add.s32 %r6493, %r2041, %r2075;
cvt.u64.u32 %rd945, %r6493;
add.s64 %rd946, %rd945, %rd4;
shl.b64 %rd947, %rd946, 2;
add.s64 %rd59, %rd3, %rd947;
add.s32 %r6494, %r2040, %r2075;
cvt.u64.u32 %rd948, %r6494;
add.s64 %rd949, %rd948, %rd4;
shl.b64 %rd950, %rd949, 2;
add.s64 %rd60, %rd3, %rd950;
@%p1235 bra $L__BB2_1064;
setp.le.u32 %p1372, %r4058, %r9579;
@%p1372 bra $L__BB2_1058;
ld.global.u32 %r6495, [%rd57];
abs.s32 %r6496, %r6495;
setp.gt.u32 %p1373, %r6496, 4;
and.b32 %r6497, %r6496, 1;
setp.eq.b32 %p1374, %r6497, 1;
and.pred %p1375, %p1373, %p1374;
selp.b32 %r6498, 4096, 0, %p1375;
or.b32 %r9627, %r6498, %r9627;
$L__BB2_1058:
setp.ge.u32 %p1376, %r2036, %r4058;
@%p1376 bra $L__BB2_1060;
ld.global.u32 %r6499, [%rd58];
abs.s32 %r6500, %r6499;
setp.gt.u32 %p1377, %r6500, 4;
and.b32 %r6501, %r6500, 1;
setp.eq.b32 %p1378, %r6501, 1;
and.pred %p1379, %p1377, %p1378;
selp.b32 %r6502, 8192, 0, %p1379;
or.b32 %r9627, %r6502, %r9627;
$L__BB2_1060:
setp.ge.u32 %p1380, %r2037, %r4058;
@%p1380 bra $L__BB2_1062;
ld.global.u32 %r6503, [%rd59];
abs.s32 %r6504, %r6503;
setp.gt.u32 %p1381, %r6504, 4;
and.b32 %r6505, %r6504, 1;
setp.eq.b32 %p1382, %r6505, 1;
and.pred %p1383, %p1381, %p1382;
selp.b32 %r6506, 16384, 0, %p1383;
or.b32 %r9627, %r6506, %r9627;
$L__BB2_1062:
setp.ge.u32 %p1384, %r2038, %r4058;
@%p1384 bra $L__BB2_1064;
ld.global.u32 %r6507, [%rd60];
abs.s32 %r6508, %r6507;
setp.gt.u32 %p1385, %r6508, 4;
and.b32 %r6509, %r6508, 1;
setp.eq.b32 %p1386, %r6509, 1;
and.pred %p1387, %p1385, %p1386;
selp.b32 %r6510, 32768, 0, %p1387;
or.b32 %r9627, %r6510, %r9627;
$L__BB2_1064:
mov.u32 %r9643, 0;
@%p1252 bra $L__BB2_1073;
setp.le.u32 %p1389, %r4058, %r9579;
mov.u32 %r9643, 0;
@%p1389 bra $L__BB2_1067;
add.s32 %r6515, %r6430, 4;
cvt.u64.u32 %rd951, %r6515;
add.s64 %rd952, %rd951, %rd4;
shl.b64 %rd953, %rd952, 2;
add.s64 %rd954, %rd3, %rd953;
ld.global.u32 %r6516, [%rd954];
abs.s32 %r6517, %r6516;
setp.gt.u32 %p1390, %r6517, 4;
and.b32 %r6518, %r6517, 1;
setp.eq.b32 %p1391, %r6518, 1;
and.pred %p1392, %p1390, %p1391;
selp.u32 %r9643, 1, 0, %p1392;
$L__BB2_1067:
setp.ge.u32 %p1393, %r2036, %r4058;
@%p1393 bra $L__BB2_1069;
add.s32 %r6520, %r6431, 4;
cvt.u64.u32 %rd955, %r6520;
add.s64 %rd956, %rd955, %rd4;
shl.b64 %rd957, %rd956, 2;
add.s64 %rd958, %rd3, %rd957;
ld.global.u32 %r6521, [%rd958];
abs.s32 %r6522, %r6521;
setp.gt.u32 %p1394, %r6522, 4;
and.b32 %r6523, %r6522, 1;
setp.eq.b32 %p1395, %r6523, 1;
and.pred %p1396, %p1394, %p1395;
selp.b32 %r6524, 2, 0, %p1396;
or.b32 %r9643, %r6524, %r9643;
$L__BB2_1069:
setp.ge.u32 %p1397, %r2037, %r4058;
@%p1397 bra $L__BB2_1071;
add.s32 %r6526, %r6432, 4;
cvt.u64.u32 %rd959, %r6526;
add.s64 %rd960, %rd959, %rd4;
shl.b64 %rd961, %rd960, 2;
add.s64 %rd962, %rd3, %rd961;
ld.global.u32 %r6527, [%rd962];
abs.s32 %r6528, %r6527;
setp.gt.u32 %p1398, %r6528, 4;
and.b32 %r6529, %r6528, 1;
setp.eq.b32 %p1399, %r6529, 1;
and.pred %p1400, %p1398, %p1399;
selp.b32 %r6530, 4, 0, %p1400;
or.b32 %r9643, %r6530, %r9643;
$L__BB2_1071:
setp.ge.u32 %p1401, %r2038, %r4058;
@%p1401 bra $L__BB2_1073;
add.s32 %r6532, %r6433, 4;
cvt.u64.u32 %rd963, %r6532;
add.s64 %rd964, %rd963, %rd4;
shl.b64 %rd965, %rd964, 2;
add.s64 %rd966, %rd3, %rd965;
ld.global.u32 %r6533, [%rd966];
abs.s32 %r6534, %r6533;
setp.gt.u32 %p1402, %r6534, 4;
and.b32 %r6535, %r6534, 1;
setp.eq.b32 %p1403, %r6535, 1;
and.pred %p1404, %p1402, %p1403;
selp.b32 %r6536, 8, 0, %p1404;
or.b32 %r9643, %r6536, %r9643;
$L__BB2_1073:
@%p1269 bra $L__BB2_1082;
setp.le.u32 %p1406, %r4058, %r9579;
@%p1406 bra $L__BB2_1076;
add.s32 %r6537, %r2039, %r2092;
cvt.u64.u32 %rd967, %r6537;
add.s64 %rd968, %rd967, %rd4;
shl.b64 %rd969, %rd968, 2;
add.s64 %rd970, %rd3, %rd969;
ld.global.u32 %r6538, [%rd970];
abs.s32 %r6539, %r6538;
setp.gt.u32 %p1407, %r6539, 4;
and.b32 %r6540, %r6539, 1;
setp.eq.b32 %p1408, %r6540, 1;
and.pred %p1409, %p1407, %p1408;
selp.b32 %r6541, 16, 0, %p1409;
or.b32 %r9643, %r6541, %r9643;
$L__BB2_1076:
setp.ge.u32 %p1410, %r2036, %r4058;
@%p1410 bra $L__BB2_1078;
add.s32 %r6542, %r2042, %r2092;
cvt.u64.u32 %rd971, %r6542;
add.s64 %rd972, %rd971, %rd4;
shl.b64 %rd973, %rd972, 2;
add.s64 %rd974, %rd3, %rd973;
ld.global.u32 %r6543, [%rd974];
abs.s32 %r6544, %r6543;
setp.gt.u32 %p1411, %r6544, 4;
and.b32 %r6545, %r6544, 1;
setp.eq.b32 %p1412, %r6545, 1;
and.pred %p1413, %p1411, %p1412;
selp.b32 %r6546, 32, 0, %p1413;
or.b32 %r9643, %r6546, %r9643;
$L__BB2_1078:
setp.ge.u32 %p1414, %r2037, %r4058;
@%p1414 bra $L__BB2_1080;
add.s32 %r6547, %r2041, %r2092;
cvt.u64.u32 %rd975, %r6547;
add.s64 %rd976, %rd975, %rd4;
shl.b64 %rd977, %rd976, 2;
add.s64 %rd978, %rd3, %rd977;
ld.global.u32 %r6548, [%rd978];
abs.s32 %r6549, %r6548;
setp.gt.u32 %p1415, %r6549, 4;
and.b32 %r6550, %r6549, 1;
setp.eq.b32 %p1416, %r6550, 1;
and.pred %p1417, %p1415, %p1416;
selp.b32 %r6551, 64, 0, %p1417;
or.b32 %r9643, %r6551, %r9643;
$L__BB2_1080:
setp.ge.u32 %p1418, %r2038, %r4058;
@%p1418 bra $L__BB2_1082;
add.s32 %r6552, %r2040, %r2092;
cvt.u64.u32 %rd979, %r6552;
add.s64 %rd980, %rd979, %rd4;
shl.b64 %rd981, %rd980, 2;
add.s64 %rd982, %rd3, %rd981;
ld.global.u32 %r6553, [%rd982];
abs.s32 %r6554, %r6553;
setp.gt.u32 %p1419, %r6554, 4;
and.b32 %r6555, %r6554, 1;
setp.eq.b32 %p1420, %r6555, 1;
and.pred %p1421, %p1419, %p1420;
selp.b32 %r6556, 128, 0, %p1421;
or.b32 %r9643, %r6556, %r9643;
$L__BB2_1082:
@%p1286 bra $L__BB2_1091;
setp.le.u32 %p1423, %r4058, %r9579;
@%p1423 bra $L__BB2_1085;
add.s32 %r6557, %r2039, %r2101;
cvt.u64.u32 %rd983, %r6557;
add.s64 %rd984, %rd983, %rd4;
shl.b64 %rd985, %rd984, 2;
add.s64 %rd986, %rd3, %rd985;
ld.global.u32 %r6558, [%rd986];
abs.s32 %r6559, %r6558;
setp.gt.u32 %p1424, %r6559, 4;
and.b32 %r6560, %r6559, 1;
setp.eq.b32 %p1425, %r6560, 1;
and.pred %p1426, %p1424, %p1425;
selp.b32 %r6561, 256, 0, %p1426;
or.b32 %r9643, %r6561, %r9643;
$L__BB2_1085:
setp.ge.u32 %p1427, %r2036, %r4058;
@%p1427 bra $L__BB2_1087;
add.s32 %r6562, %r2042, %r2101;
cvt.u64.u32 %rd987, %r6562;
add.s64 %rd988, %rd987, %rd4;
shl.b64 %rd989, %rd988, 2;
add.s64 %rd990, %rd3, %rd989;
ld.global.u32 %r6563, [%rd990];
abs.s32 %r6564, %r6563;
setp.gt.u32 %p1428, %r6564, 4;
and.b32 %r6565, %r6564, 1;
setp.eq.b32 %p1429, %r6565, 1;
and.pred %p1430, %p1428, %p1429;
selp.b32 %r6566, 512, 0, %p1430;
or.b32 %r9643, %r6566, %r9643;
$L__BB2_1087:
setp.ge.u32 %p1431, %r2037, %r4058;
@%p1431 bra $L__BB2_1089;
add.s32 %r6567, %r2041, %r2101;
cvt.u64.u32 %rd991, %r6567;
add.s64 %rd992, %rd991, %rd4;
shl.b64 %rd993, %rd992, 2;
add.s64 %rd994, %rd3, %rd993;
ld.global.u32 %r6568, [%rd994];
abs.s32 %r6569, %r6568;
setp.gt.u32 %p1432, %r6569, 4;
and.b32 %r6570, %r6569, 1;
setp.eq.b32 %p1433, %r6570, 1;
and.pred %p1434, %p1432, %p1433;
selp.b32 %r6571, 1024, 0, %p1434;
or.b32 %r9643, %r6571, %r9643;
$L__BB2_1089:
setp.ge.u32 %p1435, %r2038, %r4058;
@%p1435 bra $L__BB2_1091;
add.s32 %r6572, %r2040, %r2101;
cvt.u64.u32 %rd995, %r6572;
add.s64 %rd996, %rd995, %rd4;
shl.b64 %rd997, %rd996, 2;
add.s64 %rd998, %rd3, %rd997;
ld.global.u32 %r6573, [%rd998];
abs.s32 %r6574, %r6573;
setp.gt.u32 %p1436, %r6574, 4;
and.b32 %r6575, %r6574, 1;
setp.eq.b32 %p1437, %r6575, 1;
and.pred %p1438, %p1436, %p1437;
selp.b32 %r6576, 2048, 0, %p1438;
or.b32 %r9643, %r6576, %r9643;
$L__BB2_1091:
@%p1303 bra $L__BB2_1100;
setp.le.u32 %p1440, %r4058, %r9579;
@%p1440 bra $L__BB2_1094;
add.s32 %r6577, %r2039, %r2110;
cvt.u64.u32 %rd999, %r6577;
add.s64 %rd1000, %rd999, %rd4;
shl.b64 %rd1001, %rd1000, 2;
add.s64 %rd1002, %rd3, %rd1001;
ld.global.u32 %r6578, [%rd1002];
abs.s32 %r6579, %r6578;
setp.gt.u32 %p1441, %r6579, 4;
and.b32 %r6580, %r6579, 1;
setp.eq.b32 %p1442, %r6580, 1;
and.pred %p1443, %p1441, %p1442;
selp.b32 %r6581, 4096, 0, %p1443;
or.b32 %r9643, %r6581, %r9643;
$L__BB2_1094:
setp.ge.u32 %p1444, %r2036, %r4058;
@%p1444 bra $L__BB2_1096;
add.s32 %r6582, %r2042, %r2110;
cvt.u64.u32 %rd1003, %r6582;
add.s64 %rd1004, %rd1003, %rd4;
shl.b64 %rd1005, %rd1004, 2;
add.s64 %rd1006, %rd3, %rd1005;
ld.global.u32 %r6583, [%rd1006];
abs.s32 %r6584, %r6583;
setp.gt.u32 %p1445, %r6584, 4;
and.b32 %r6585, %r6584, 1;
setp.eq.b32 %p1446, %r6585, 1;
and.pred %p1447, %p1445, %p1446;
selp.b32 %r6586, 8192, 0, %p1447;
or.b32 %r9643, %r6586, %r9643;
$L__BB2_1096:
setp.ge.u32 %p1448, %r2037, %r4058;
@%p1448 bra $L__BB2_1098;
add.s32 %r6587, %r2041, %r2110;
cvt.u64.u32 %rd1007, %r6587;
add.s64 %rd1008, %rd1007, %rd4;
shl.b64 %rd1009, %rd1008, 2;
add.s64 %rd1010, %rd3, %rd1009;
ld.global.u32 %r6588, [%rd1010];
abs.s32 %r6589, %r6588;
setp.gt.u32 %p1449, %r6589, 4;
and.b32 %r6590, %r6589, 1;
setp.eq.b32 %p1450, %r6590, 1;
and.pred %p1451, %p1449, %p1450;
selp.b32 %r6591, 16384, 0, %p1451;
or.b32 %r9643, %r6591, %r9643;
$L__BB2_1098:
setp.ge.u32 %p1452, %r2038, %r4058;
@%p1452 bra $L__BB2_1100;
add.s32 %r6592, %r2040, %r2110;
cvt.u64.u32 %rd1011, %r6592;
add.s64 %rd1012, %rd1011, %rd4;
shl.b64 %rd1013, %rd1012, 2;
add.s64 %rd1014, %rd3, %rd1013;
ld.global.u32 %r6593, [%rd1014];
abs.s32 %r6594, %r6593;
setp.gt.u32 %p1453, %r6594, 4;
and.b32 %r6595, %r6594, 1;
setp.eq.b32 %p1454, %r6595, 1;
and.pred %p1455, %p1453, %p1454;
selp.b32 %r6596, 32768, 0, %p1455;
or.b32 %r9643, %r6596, %r9643;
$L__BB2_1100:
sub.s32 %r6599, %r6344, %r4057;
shl.b32 %r6600, %r9643, 16;
or.b32 %r2185, %r6600, %r9627;
and.b32 %r6601, %r2119, -2004318072;
shr.u32 %r6602, %r6601, 3;
shl.b32 %r6603, %r2128, 3;
and.b32 %r6604, %r6603, -2004318072;
or.b32 %r2186, %r6604, %r6602;
not.b32 %r6605, %r2185;
setp.gt.s32 %p1456, %r6599, 0;
mov.u32 %r9659, 0;
shl.b32 %r6606, %r6599, 2;
selp.b32 %r6607, %r6606, 0, %p1456;
shr.u32 %r2187, %r2043, %r6607;
and.b32 %r2188, %r2187, %r6605;
@%p1184 bra $L__BB2_1109;
setp.le.u32 %p1458, %r4058, %r9579;
mov.u32 %r9659, 0;
@%p1458 bra $L__BB2_1103;
ld.global.u32 %r6609, [%rd45];
abs.s32 %r6610, %r6609;
setp.eq.s32 %p1459, %r6610, 3;
selp.u32 %r9659, 1, 0, %p1459;
$L__BB2_1103:
setp.ge.u32 %p1460, %r2036, %r4058;
@%p1460 bra $L__BB2_1105;
ld.global.u32 %r6611, [%rd46];
abs.s32 %r6612, %r6611;
setp.eq.s32 %p1461, %r6612, 3;
selp.b32 %r6613, 2, 0, %p1461;
or.b32 %r9659, %r6613, %r9659;
$L__BB2_1105:
setp.ge.u32 %p1462, %r2037, %r4058;
@%p1462 bra $L__BB2_1107;
ld.global.u32 %r6614, [%rd47];
abs.s32 %r6615, %r6614;
setp.eq.s32 %p1463, %r6615, 3;
selp.b32 %r6616, 4, 0, %p1463;
or.b32 %r9659, %r6616, %r9659;
$L__BB2_1107:
setp.ge.u32 %p1464, %r2038, %r4058;
@%p1464 bra $L__BB2_1109;
ld.global.u32 %r6617, [%rd48];
abs.s32 %r6618, %r6617;
setp.eq.s32 %p1465, %r6618, 3;
selp.b32 %r6619, 8, 0, %p1465;
or.b32 %r9659, %r6619, %r9659;
$L__BB2_1109:
@%p1201 bra $L__BB2_1118;
setp.le.u32 %p1467, %r4058, %r9579;
@%p1467 bra $L__BB2_1112;
ld.global.u32 %r6620, [%rd49];
abs.s32 %r6621, %r6620;
setp.eq.s32 %p1468, %r6621, 3;
selp.b32 %r6622, 16, 0, %p1468;
or.b32 %r9659, %r6622, %r9659;
$L__BB2_1112:
setp.ge.u32 %p1469, %r2036, %r4058;
@%p1469 bra $L__BB2_1114;
ld.global.u32 %r6623, [%rd50];
abs.s32 %r6624, %r6623;
setp.eq.s32 %p1470, %r6624, 3;
selp.b32 %r6625, 32, 0, %p1470;
or.b32 %r9659, %r6625, %r9659;
$L__BB2_1114:
setp.ge.u32 %p1471, %r2037, %r4058;
@%p1471 bra $L__BB2_1116;
ld.global.u32 %r6626, [%rd51];
abs.s32 %r6627, %r6626;
setp.eq.s32 %p1472, %r6627, 3;
selp.b32 %r6628, 64, 0, %p1472;
or.b32 %r9659, %r6628, %r9659;
$L__BB2_1116:
setp.ge.u32 %p1473, %r2038, %r4058;
@%p1473 bra $L__BB2_1118;
ld.global.u32 %r6629, [%rd52];
abs.s32 %r6630, %r6629;
setp.eq.s32 %p1474, %r6630, 3;
selp.b32 %r6631, 128, 0, %p1474;
or.b32 %r9659, %r6631, %r9659;
$L__BB2_1118:
@%p1218 bra $L__BB2_1127;
setp.le.u32 %p1476, %r4058, %r9579;
@%p1476 bra $L__BB2_1121;
ld.global.u32 %r6632, [%rd53];
abs.s32 %r6633, %r6632;
setp.eq.s32 %p1477, %r6633, 3;
selp.b32 %r6634, 256, 0, %p1477;
or.b32 %r9659, %r6634, %r9659;
$L__BB2_1121:
setp.ge.u32 %p1478, %r2036, %r4058;
@%p1478 bra $L__BB2_1123;
ld.global.u32 %r6635, [%rd54];
abs.s32 %r6636, %r6635;
setp.eq.s32 %p1479, %r6636, 3;
selp.b32 %r6637, 512, 0, %p1479;
or.b32 %r9659, %r6637, %r9659;
$L__BB2_1123:
setp.ge.u32 %p1480, %r2037, %r4058;
@%p1480 bra $L__BB2_1125;
ld.global.u32 %r6638, [%rd55];
abs.s32 %r6639, %r6638;
setp.eq.s32 %p1481, %r6639, 3;
selp.b32 %r6640, 1024, 0, %p1481;
or.b32 %r9659, %r6640, %r9659;
$L__BB2_1125:
setp.ge.u32 %p1482, %r2038, %r4058;
@%p1482 bra $L__BB2_1127;
ld.global.u32 %r6641, [%rd56];
abs.s32 %r6642, %r6641;
setp.eq.s32 %p1483, %r6642, 3;
selp.b32 %r6643, 2048, 0, %p1483;
or.b32 %r9659, %r6643, %r9659;
$L__BB2_1127:
@%p1235 bra $L__BB2_1136;
setp.le.u32 %p1485, %r4058, %r9579;
@%p1485 bra $L__BB2_1130;
ld.global.u32 %r6644, [%rd57];
abs.s32 %r6645, %r6644;
setp.eq.s32 %p1486, %r6645, 3;
selp.b32 %r6646, 4096, 0, %p1486;
or.b32 %r9659, %r6646, %r9659;
$L__BB2_1130:
setp.ge.u32 %p1487, %r2036, %r4058;
@%p1487 bra $L__BB2_1132;
ld.global.u32 %r6647, [%rd58];
abs.s32 %r6648, %r6647;
setp.eq.s32 %p1488, %r6648, 3;
selp.b32 %r6649, 8192, 0, %p1488;
or.b32 %r9659, %r6649, %r9659;
$L__BB2_1132:
setp.ge.u32 %p1489, %r2037, %r4058;
@%p1489 bra $L__BB2_1134;
ld.global.u32 %r6650, [%rd59];
abs.s32 %r6651, %r6650;
setp.eq.s32 %p1490, %r6651, 3;
selp.b32 %r6652, 16384, 0, %p1490;
or.b32 %r9659, %r6652, %r9659;
$L__BB2_1134:
setp.ge.u32 %p1491, %r2038, %r4058;
@%p1491 bra $L__BB2_1136;
ld.global.u32 %r6653, [%rd60];
abs.s32 %r6654, %r6653;
setp.eq.s32 %p1492, %r6654, 3;
selp.b32 %r6655, 32768, 0, %p1492;
or.b32 %r9659, %r6655, %r9659;
$L__BB2_1136:
and.b32 %r6657, %r2185, -286331154;
shr.u32 %r6658, %r6657, 1;
shl.b32 %r6659, %r2185, 1;
and.b32 %r6660, %r6659, -286331154;
or.b32 %r6661, %r2185, %r2186;
or.b32 %r6662, %r6661, %r6660;
or.b32 %r6663, %r6662, %r6658;
and.b32 %r2221, %r9659, %r2187;
shr.u32 %r6664, %r6663, 4;
shl.b32 %r6665, %r6663, 4;
shr.u32 %r6666, %r9584, 12;
or.b32 %r6667, %r6663, %r6666;
or.b32 %r6668, %r6667, %r6665;
or.b32 %r6669, %r6668, %r6664;
and.b32 %r9669, %r2188, %r6669;
setp.eq.s32 %p1493, %r9669, 0;
mov.u32 %r9690, 0;
@%p1493 bra $L__BB2_1195;
mov.u32 %r9668, 0;
mov.u32 %r9670, %r9668;
$L__BB2_1138:
brev.b32 %r6672, %r9669;
bfind.shiftamt.u32 %r2229, %r6672;
mov.pred %p2373, -1;
mov.u32 %r6673, 1;
shl.b32 %r2230, %r6673, %r2229;
mov.u32 %r6674, -2;
shf.l.wrap.b32 %r6675, %r6674, %r6674, %r2229;
and.b32 %r9669, %r9669, %r6675;
or.b32 %r9668, %r2230, %r9668;
and.b32 %r2233, %r2230, %r2221;
setp.ne.s32 %p1495, %r2233, 0;
selp.u32 %r6676, 1, 0, %p1495;
setp.eq.s32 %p1496, %r9687, 0;
selp.b32 %r6677, 8, 7, %p1496;
shl.b32 %r6678, %r6676, %r9685;
cvt.u16.u32 %rs796, %r6678;
or.b16 %rs1232, %rs1232, %rs796;
add.s32 %r9685, %r9685, 1;
setp.lt.u32 %p1497, %r9685, %r6677;
mov.pred %p2371, %p2373;
@%p1497 bra $L__BB2_1143;
setp.ge.u32 %p1499, %r9689, %r9561;
mov.pred %p2371, 0;
@%p1499 bra $L__BB2_1143;
setp.eq.s64 %p1500, %rd43, 0;
@%p1500 bra $L__BB2_1142;
cvt.u64.u32 %rd1015, %r9689;
add.s64 %rd1016, %rd42, %rd1015;
add.s64 %rd1017, %rd1, %rd1016;
st.global.u8 [%rd1017], %rs1232;
$L__BB2_1142:
and.b16 %rs798, %rs1232, 255;
setp.eq.s16 %p1502, %rs798, 255;
selp.u32 %r9687, 1, 0, %p1502;
add.s32 %r9689, %r9689, 1;
mov.u32 %r9685, 0;
mov.u16 %rs1232, 0;
mov.pred %p2371, %p2373;
$L__BB2_1143:
not.pred %p1504, %p2371;
@%p1504 bra $L__BB2_1202;
setp.eq.s32 %p1505, %r2233, 0;
@%p1505 bra $L__BB2_1185;
or.b32 %r9670, %r2230, %r9670;
mov.u32 %r9677, 51;
setp.gt.s32 %p1506, %r2229, 7;
@%p1506 bra $L__BB2_1161;
setp.gt.s32 %p1518, %r2229, 3;
@%p1518 bra $L__BB2_1154;
setp.gt.s32 %p1524, %r2229, 1;
@%p1524 bra $L__BB2_1151;
setp.eq.s32 %p1527, %r2229, 0;
@%p1527 bra $L__BB2_1184;
setp.eq.s32 %p1528, %r2229, 1;
@%p1528 bra $L__BB2_1150;
bra.uni $L__BB2_1183;
$L__BB2_1150:
mov.u32 %r9677, 118;
bra.uni $L__BB2_1184;
$L__BB2_1161:
setp.gt.s32 %p1507, %r2229, 11;
@%p1507 bra $L__BB2_1169;
setp.gt.s32 %p1513, %r2229, 9;
@%p1513 bra $L__BB2_1166;
setp.eq.s32 %p1516, %r2229, 8;
@%p1516 bra $L__BB2_1179;
setp.eq.s32 %p1517, %r2229, 9;
@%p1517 bra $L__BB2_1165;
bra.uni $L__BB2_1183;
$L__BB2_1165:
mov.u32 %r9677, 30208;
bra.uni $L__BB2_1184;
$L__BB2_1154:
setp.gt.s32 %p1519, %r2229, 5;
@%p1519 bra $L__BB2_1158;
setp.eq.s32 %p1522, %r2229, 4;
@%p1522 bra $L__BB2_1181;
setp.eq.s32 %p1523, %r2229, 5;
@%p1523 bra $L__BB2_1157;
bra.uni $L__BB2_1183;
$L__BB2_1157:
mov.u32 %r9677, 1888;
bra.uni $L__BB2_1184;
$L__BB2_1169:
setp.gt.s32 %p1508, %r2229, 13;
@%p1508 bra $L__BB2_1173;
setp.eq.s32 %p1511, %r2229, 12;
@%p1511 bra $L__BB2_1177;
setp.eq.s32 %p1512, %r2229, 13;
@%p1512 bra $L__BB2_1172;
bra.uni $L__BB2_1183;
$L__BB2_1172:
mov.u32 %r9677, 483328;
bra.uni $L__BB2_1184;
$L__BB2_1151:
setp.eq.s32 %p1525, %r2229, 2;
@%p1525 bra $L__BB2_1182;
setp.eq.s32 %p1526, %r2229, 3;
@%p1526 bra $L__BB2_1153;
bra.uni $L__BB2_1183;
$L__BB2_1153:
mov.u32 %r9677, 200;
bra.uni $L__BB2_1184;
$L__BB2_1166:
setp.eq.s32 %p1514, %r2229, 10;
@%p1514 bra $L__BB2_1178;
setp.eq.s32 %p1515, %r2229, 11;
@%p1515 bra $L__BB2_1168;
bra.uni $L__BB2_1183;
$L__BB2_1168:
mov.u32 %r9677, 51200;
bra.uni $L__BB2_1184;
$L__BB2_1158:
setp.eq.s32 %p1520, %r2229, 6;
@%p1520 bra $L__BB2_1180;
setp.eq.s32 %p1521, %r2229, 7;
@%p1521 bra $L__BB2_1160;
bra.uni $L__BB2_1183;
$L__BB2_1160:
mov.u32 %r9677, 3200;
bra.uni $L__BB2_1184;
$L__BB2_1173:
setp.eq.s32 %p1509, %r2229, 14;
@%p1509 bra $L__BB2_1176;
setp.ne.s32 %p1510, %r2229, 15;
@%p1510 bra $L__BB2_1183;
mov.u32 %r9677, 819200;
bra.uni $L__BB2_1184;
$L__BB2_1179:
mov.u32 %r9677, 13056;
bra.uni $L__BB2_1184;
$L__BB2_1181:
mov.u32 %r9677, 816;
bra.uni $L__BB2_1184;
$L__BB2_1177:
mov.u32 %r9677, 208896;
bra.uni $L__BB2_1184;
$L__BB2_1182:
mov.u32 %r9677, 236;
bra.uni $L__BB2_1184;
$L__BB2_1178:
mov.u32 %r9677, 60416;
bra.uni $L__BB2_1184;
$L__BB2_1180:
mov.u32 %r9677, 3776;
bra.uni $L__BB2_1184;
$L__BB2_1176:
mov.u32 %r9677, 966656;
bra.uni $L__BB2_1184;
$L__BB2_1183:
mov.u32 %r9677, 0;
$L__BB2_1184:
not.b32 %r6697, %r9668;
and.b32 %r6698, %r2188, %r6697;
and.b32 %r6699, %r6698, %r9677;
or.b32 %r9669, %r6699, %r9669;
$L__BB2_1185:
setp.ne.s32 %p1529, %r9669, 0;
@%p1529 bra $L__BB2_1138;
setp.eq.s32 %p1530, %r9670, 0;
mov.u32 %r9690, 0;
@%p1530 bra $L__BB2_1195;
mov.u32 %r9683, %r9670;
$L__BB2_1188:
setp.eq.s32 %p1531, %r9683, 0;
mov.u32 %r9690, %r9670;
@%p1531 bra $L__BB2_1195;
brev.b32 %r6701, %r9683;
bfind.shiftamt.u32 %r6702, %r6701;
mov.pred %p2373, -1;
mov.u32 %r6703, -2;
shf.l.wrap.b32 %r6704, %r6703, %r6703, %r6702;
and.b32 %r9683, %r9683, %r6704;
shr.u32 %r6705, %r6702, 2;
and.b32 %r6706, %r6702, 3;
add.s32 %r6707, %r6706, %r9579;
add.s32 %r6708, %r6705, %r9583;
mad.lo.s32 %r6709, %r6707, %r4055, %r6708;
cvt.u64.u32 %rd1018, %r6709;
add.s64 %rd1019, %rd1018, %rd4;
shl.b64 %rd1020, %rd1019, 2;
add.s64 %rd1021, %rd3, %rd1020;
ld.global.u32 %r6710, [%rd1021];
shr.u32 %r6711, %r6710, 31;
setp.eq.s32 %p1533, %r9687, 0;
selp.b32 %r6712, 8, 7, %p1533;
shl.b32 %r6713, %r6711, %r9685;
cvt.u16.u32 %rs799, %r6713;
or.b16 %rs1232, %rs1232, %rs799;
add.s32 %r9685, %r9685, 1;
setp.lt.u32 %p1534, %r9685, %r6712;
mov.pred %p2372, %p2373;
@%p1534 bra $L__BB2_1194;
setp.ge.u32 %p1536, %r9689, %r9561;
mov.pred %p2372, 0;
@%p1536 bra $L__BB2_1194;
setp.eq.s64 %p1537, %rd43, 0;
@%p1537 bra $L__BB2_1193;
cvt.u64.u32 %rd1022, %r9689;
add.s64 %rd1023, %rd42, %rd1022;
add.s64 %rd1024, %rd1, %rd1023;
st.global.u8 [%rd1024], %rs1232;
$L__BB2_1193:
and.b16 %rs801, %rs1232, 255;
setp.eq.s16 %p1539, %rs801, 255;
selp.u32 %r9687, 1, 0, %p1539;
add.s32 %r9689, %r9689, 1;
mov.u32 %r9685, 0;
mov.u16 %rs1232, 0;
mov.pred %p2372, %p2373;
$L__BB2_1194:
@%p2372 bra $L__BB2_1188;
bra.uni $L__BB2_1202;
$L__BB2_1195:
not.b32 %r6715, %r9690;
and.b32 %r6716, %r2221, %r6715;
setp.ne.s32 %p1542, %r6716, 0;
mov.pred %p2373, %p1178;
@%p1542 bra $L__BB2_1202;
setp.lt.u32 %p1543, %r6344, %r4057;
or.b32 %r6717, %r9690, %r2185;
st.local.u16 [%rd44], %r6717;
shr.u32 %r6718, %r6717, 16;
st.local.u16 [%rd44+2], %r6718;
shl.b32 %r6719, %r6717, 1;
and.b32 %r6720, %r6719, 57344;
and.b32 %r6721, %r6717, 57344;
shr.u32 %r6722, %r6721, 1;
or.b32 %r6723, %r6717, %r2186;
and.b32 %r6724, %r6723, 61440;
or.b32 %r6725, %r6724, %r6720;
or.b32 %r9584, %r6725, %r6722;
mov.u32 %r9583, %r6344;
@%p1543 bra $L__BB2_956;
$L__BB2_1197:
add.s32 %r9579, %r9579, 4;
setp.gt.u32 %p1544, %r4058, %r9579;
@%p1544 bra $L__BB2_954;
setp.eq.s32 %p1546, %r9685, 0;
mov.pred %p1545, 0;
mov.pred %p2373, %p1545;
@%p1546 bra $L__BB2_1202;
setp.ge.u32 %p1548, %r9689, %r9561;
mov.pred %p2373, %p1178;
@%p1548 bra $L__BB2_1202;
setp.eq.s64 %p1550, %rd43, 0;
mov.pred %p2373, %p1545;
@%p1550 bra $L__BB2_1202;
cvt.u64.u32 %rd1025, %r9689;
add.s64 %rd1026, %rd42, %rd1025;
add.s64 %rd1027, %rd1, %rd1026;
st.global.u8 [%rd1027], %rs1232;
mov.pred %p2373, %p1545;
$L__BB2_1202:
@%p2373 bra $L__BB2_1246;
bra.uni $L__BB2_1203;
$L__BB2_1246:
mov.u32 %r6779, 2;
st.global.u32 [%rd6], %r6779;
mov.u32 %r6780, 6;
st.global.u32 [%rd6+4], %r6780;
mov.u32 %r6781, 0;
st.global.u32 [%rd6+8], %r6781;
st.global.u32 [%rd6+12], %r6781;
st.global.u32 [%rd6+16], %r6781;
st.global.u32 [%rd6+20], %r6781;
st.global.u32 [%rd6+24], %r6781;
st.global.u32 [%rd6+28], %r6781;
bra.uni $L__BB2_1905;
$L__BB2_1203:
cvt.u64.u32 %rd1028, %r9561;
add.s64 %rd61, %rd42, %rd1028;
setp.eq.s32 %p1552, %r9562, 0;
@%p1552 bra $L__BB2_1244;
add.s32 %r6727, %r9562, -1;
and.b32 %r9698, %r9562, 3;
setp.lt.u32 %p1553, %r6727, 3;
mov.u32 %r9696, 0;
@%p1553 bra $L__BB2_1207;
sub.s32 %r9695, %r9562, %r9698;
mov.u32 %r9696, 0;
$L__BB2_1206:
cvt.u64.u32 %rd1029, %r9696;
add.s64 %rd1030, %rd61, %rd1029;
add.s64 %rd1031, %rd1, %rd1030;
mov.u16 %rs802, 0;
st.global.u8 [%rd1031], %rs802;
st.global.u8 [%rd1031+1], %rs802;
st.global.u8 [%rd1031+2], %rs802;
st.global.u8 [%rd1031+3], %rs802;
add.s32 %r9696, %r9696, 4;
add.s32 %r9695, %r9695, -4;
setp.ne.s32 %p1554, %r9695, 0;
@%p1554 bra $L__BB2_1206;
$L__BB2_1207:
setp.eq.s32 %p1555, %r9698, 0;
@%p1555 bra $L__BB2_1209;
$L__BB2_1208:
.pragma "nounroll";
cvt.u64.u32 %rd1032, %r9696;
add.s64 %rd1033, %rd61, %rd1032;
add.s64 %rd1034, %rd1, %rd1033;
mov.u16 %rs803, 0;
st.global.u8 [%rd1034], %rs803;
add.s32 %r9696, %r9696, 1;
add.s32 %r9698, %r9698, -1;
setp.ne.s32 %p1556, %r9698, 0;
@%p1556 bra $L__BB2_1208;
$L__BB2_1209:
@%p10 bra $L__BB2_1237;
mov.u32 %r6733, 0;
mov.u32 %r9725, 1;
mov.u16 %rs1239, 0;
mov.u32 %r9699, %r6733;
mov.u32 %r9724, %r6733;
mov.u32 %r9723, %r6733;
mov.u32 %r9722, %r6733;
$L__BB2_1211:
mul.lo.s32 %r2282, %r9699, %r4055;
add.s32 %r2283, %r9699, 3;
mul.lo.s32 %r2284, %r4055, %r2283;
add.s32 %r2285, %r9699, 2;
mul.lo.s32 %r2286, %r4055, %r2285;
add.s32 %r2287, %r9699, 1;
mul.lo.s32 %r2288, %r4055, %r2287;
mov.u32 %r9704, %r6733;
$L__BB2_1212:
add.s32 %r9712, %r2284, %r9704;
add.s32 %r9711, %r2286, %r9704;
add.s32 %r9710, %r2288, %r9704;
add.s32 %r9709, %r2282, %r9704;
mov.u32 %r9713, 0;
$L__BB2_1213:
add.s32 %r6736, %r9704, %r9713;
setp.ge.u32 %p1558, %r6736, %r4057;
@%p1558 bra $L__BB2_1234;
setp.ge.u32 %p1559, %r9699, %r4058;
@%p1559 bra $L__BB2_1219;
cvt.u64.u32 %rd1035, %r9709;
add.s64 %rd1036, %rd1035, %rd4;
shl.b64 %rd1037, %rd1036, 2;
add.s64 %rd1038, %rd3, %rd1037;
ld.global.u32 %r6737, [%rd1038];
abs.s32 %r2307, %r6737;
setp.lt.u32 %p1560, %r2307, 5;
and.b32 %r6738, %r2307, 1;
setp.eq.b32 %p1561, %r6738, 1;
not.pred %p1562, %p1561;
or.pred %p1563, %p1560, %p1562;
@%p1563 bra $L__BB2_1219;
shr.u32 %r6739, %r2307, 1;
and.b32 %r6740, %r6739, 1;
shl.b32 %r6741, %r6740, %r9722;
cvt.u16.u32 %rs805, %r6741;
or.b16 %rs1239, %rs1239, %rs805;
add.s32 %r9724, %r9724, 1;
add.s32 %r9722, %r9722, 1;
setp.ne.s32 %p1564, %r9722, 7;
setp.eq.s32 %p1565, %r9725, 0;
or.pred %p1566, %p1564, %p1565;
and.b16 %rs806, %rs1239, 127;
setp.ne.s16 %p1567, %rs806, 127;
or.pred %p1568, %p1566, %p1567;
setp.ne.s32 %p1569, %r9722, 8;
and.pred %p1570, %p1569, %p1568;
@%p1570 bra $L__BB2_1219;
setp.ge.u32 %p1571, %r9723, %r9562;
@%p1571 bra $L__BB2_1245;
not.b32 %r6743, %r9723;
add.s32 %r6744, %r9562, %r6743;
cvt.u64.u32 %rd1039, %r6744;
add.s64 %rd1040, %rd61, %rd1039;
add.s64 %rd1041, %rd1, %rd1040;
and.b16 %rs808, %rs1239, 255;
st.global.u8 [%rd1041], %rs1239;
add.s32 %r9723, %r9723, 1;
setp.gt.u16 %p1572, %rs808, 143;
selp.u32 %r9725, 1, 0, %p1572;
mov.u16 %rs1239, 0;
mov.u32 %r9722, 0;
$L__BB2_1219:
setp.ge.u32 %p1573, %r2287, %r4058;
@%p1573 bra $L__BB2_1224;
cvt.u64.u32 %rd1042, %r9710;
add.s64 %rd1043, %rd1042, %rd4;
shl.b64 %rd1044, %rd1043, 2;
add.s64 %rd1045, %rd3, %rd1044;
ld.global.u32 %r6745, [%rd1045];
abs.s32 %r2316, %r6745;
setp.lt.u32 %p1574, %r2316, 5;
and.b32 %r6746, %r2316, 1;
setp.eq.b32 %p1575, %r6746, 1;
not.pred %p1576, %p1575;
or.pred %p1577, %p1574, %p1576;
@%p1577 bra $L__BB2_1224;
shr.u32 %r6747, %r2316, 1;
and.b32 %r6748, %r6747, 1;
shl.b32 %r6749, %r6748, %r9722;
cvt.u16.u32 %rs809, %r6749;
or.b16 %rs1239, %rs1239, %rs809;
add.s32 %r9724, %r9724, 1;
add.s32 %r9722, %r9722, 1;
setp.ne.s32 %p1578, %r9722, 7;
setp.eq.s32 %p1579, %r9725, 0;
or.pred %p1580, %p1578, %p1579;
and.b16 %rs810, %rs1239, 127;
setp.ne.s16 %p1581, %rs810, 127;
or.pred %p1582, %p1580, %p1581;
setp.ne.s32 %p1583, %r9722, 8;
and.pred %p1584, %p1583, %p1582;
@%p1584 bra $L__BB2_1224;
setp.ge.u32 %p1585, %r9723, %r9562;
@%p1585 bra $L__BB2_1245;
not.b32 %r6751, %r9723;
add.s32 %r6752, %r9562, %r6751;
cvt.u64.u32 %rd1046, %r6752;
add.s64 %rd1047, %rd61, %rd1046;
add.s64 %rd1048, %rd1, %rd1047;
and.b16 %rs812, %rs1239, 255;
st.global.u8 [%rd1048], %rs1239;
add.s32 %r9723, %r9723, 1;
setp.gt.u16 %p1586, %rs812, 143;
selp.u32 %r9725, 1, 0, %p1586;
mov.u16 %rs1239, 0;
mov.u32 %r9722, 0;
$L__BB2_1224:
setp.ge.u32 %p1587, %r2285, %r4058;
@%p1587 bra $L__BB2_1229;
cvt.u64.u32 %rd1049, %r9711;
add.s64 %rd1050, %rd1049, %rd4;
shl.b64 %rd1051, %rd1050, 2;
add.s64 %rd1052, %rd3, %rd1051;
ld.global.u32 %r6753, [%rd1052];
abs.s32 %r2325, %r6753;
setp.lt.u32 %p1588, %r2325, 5;
and.b32 %r6754, %r2325, 1;
setp.eq.b32 %p1589, %r6754, 1;
not.pred %p1590, %p1589;
or.pred %p1591, %p1588, %p1590;
@%p1591 bra $L__BB2_1229;
shr.u32 %r6755, %r2325, 1;
and.b32 %r6756, %r6755, 1;
shl.b32 %r6757, %r6756, %r9722;
cvt.u16.u32 %rs813, %r6757;
or.b16 %rs1239, %rs1239, %rs813;
add.s32 %r9724, %r9724, 1;
add.s32 %r9722, %r9722, 1;
setp.ne.s32 %p1592, %r9722, 7;
setp.eq.s32 %p1593, %r9725, 0;
or.pred %p1594, %p1592, %p1593;
and.b16 %rs814, %rs1239, 127;
setp.ne.s16 %p1595, %rs814, 127;
or.pred %p1596, %p1594, %p1595;
setp.ne.s32 %p1597, %r9722, 8;
and.pred %p1598, %p1597, %p1596;
@%p1598 bra $L__BB2_1229;
setp.ge.u32 %p1599, %r9723, %r9562;
@%p1599 bra $L__BB2_1245;
not.b32 %r6759, %r9723;
add.s32 %r6760, %r9562, %r6759;
cvt.u64.u32 %rd1053, %r6760;
add.s64 %rd1054, %rd61, %rd1053;
add.s64 %rd1055, %rd1, %rd1054;
and.b16 %rs816, %rs1239, 255;
st.global.u8 [%rd1055], %rs1239;
add.s32 %r9723, %r9723, 1;
setp.gt.u16 %p1600, %rs816, 143;
selp.u32 %r9725, 1, 0, %p1600;
mov.u16 %rs1239, 0;
mov.u32 %r9722, 0;
$L__BB2_1229:
setp.ge.u32 %p1601, %r2283, %r4058;
@%p1601 bra $L__BB2_1234;
cvt.u64.u32 %rd1056, %r9712;
add.s64 %rd1057, %rd1056, %rd4;
shl.b64 %rd1058, %rd1057, 2;
add.s64 %rd1059, %rd3, %rd1058;
ld.global.u32 %r6761, [%rd1059];
abs.s32 %r2334, %r6761;
setp.lt.u32 %p1602, %r2334, 5;
and.b32 %r6762, %r2334, 1;
setp.eq.b32 %p1603, %r6762, 1;
not.pred %p1604, %p1603;
or.pred %p1605, %p1602, %p1604;
@%p1605 bra $L__BB2_1234;
shr.u32 %r6763, %r2334, 1;
and.b32 %r6764, %r6763, 1;
shl.b32 %r6765, %r6764, %r9722;
cvt.u16.u32 %rs817, %r6765;
or.b16 %rs1239, %rs1239, %rs817;
add.s32 %r9724, %r9724, 1;
add.s32 %r9722, %r9722, 1;
setp.ne.s32 %p1606, %r9722, 7;
setp.eq.s32 %p1607, %r9725, 0;
or.pred %p1608, %p1606, %p1607;
and.b16 %rs818, %rs1239, 127;
setp.ne.s16 %p1609, %rs818, 127;
or.pred %p1610, %p1608, %p1609;
setp.ne.s32 %p1611, %r9722, 8;
and.pred %p1612, %p1611, %p1610;
@%p1612 bra $L__BB2_1234;
setp.ge.u32 %p1613, %r9723, %r9562;
@%p1613 bra $L__BB2_1245;
not.b32 %r6767, %r9723;
add.s32 %r6768, %r9562, %r6767;
cvt.u64.u32 %rd1060, %r6768;
add.s64 %rd1061, %rd61, %rd1060;
add.s64 %rd1062, %rd1, %rd1061;
and.b16 %rs820, %rs1239, 255;
st.global.u8 [%rd1062], %rs1239;
add.s32 %r9723, %r9723, 1;
setp.gt.u16 %p1614, %rs820, 143;
selp.u32 %r9725, 1, 0, %p1614;
mov.u16 %rs1239, 0;
mov.u32 %r9722, 0;
$L__BB2_1234:
add.s32 %r9712, %r9712, 1;
add.s32 %r9711, %r9711, 1;
add.s32 %r9710, %r9710, 1;
add.s32 %r9709, %r9709, 1;
add.s32 %r9713, %r9713, 1;
setp.lt.u32 %p1615, %r9713, 8;
@%p1615 bra $L__BB2_1213;
add.s32 %r9704, %r9704, 8;
setp.lt.u32 %p1616, %r9704, %r4057;
@%p1616 bra $L__BB2_1212;
add.s32 %r9699, %r9699, 4;
setp.lt.u32 %p1617, %r9699, %r4058;
@%p1617 bra $L__BB2_1211;
bra.uni $L__BB2_1239;
$L__BB2_1244:
setp.eq.s32 %p1624, %r8433, 0;
@%p1624 bra $L__BB2_1243;
bra.uni $L__BB2_1245;
$L__BB2_1237:
mov.u32 %r9722, 0;
mov.u32 %r9734, %r9722;
$L__BB2_1238:
add.s32 %r9734, %r9734, 4;
setp.lt.u32 %p1618, %r9734, %r4058;
mov.u16 %rs1239, 0;
mov.u32 %r9723, %r9722;
mov.u32 %r9724, %r9722;
@%p1618 bra $L__BB2_1238;
$L__BB2_1239:
setp.eq.s32 %p1619, %r9722, 0;
@%p1619 bra $L__BB2_1242;
setp.ge.u32 %p1620, %r9723, %r9562;
@%p1620 bra $L__BB2_1245;
not.b32 %r6773, %r9723;
add.s32 %r6774, %r9562, %r6773;
cvt.u64.u32 %rd1063, %r6774;
add.s64 %rd1064, %rd61, %rd1063;
add.s64 %rd1065, %rd1, %rd1064;
st.global.u8 [%rd1065], %rs1239;
add.s32 %r9723, %r9723, 1;
$L__BB2_1242:
setp.le.u32 %p1621, %r9723, %r9562;
setp.eq.s32 %p1622, %r9724, %r8433;
and.pred %p1623, %p1622, %p1621;
@%p1623 bra $L__BB2_1243;
bra.uni $L__BB2_1245;
$L__BB2_1243:
mov.u32 %r6778, 0;
st.global.u32 [%rd6], %r6778;
st.global.u32 [%rd6+4], %r6778;
st.global.u32 [%rd6+8], %r1989;
st.global.u32 [%rd6+12], %r4062;
st.global.u32 [%rd6+16], %r45;
st.global.u32 [%rd6+20], %r1738;
st.global.u32 [%rd6+24], %r9563;
st.global.u32 [%rd6+28], %r6778;
bra.uni $L__BB2_1905;
$L__BB2_1245:
mov.u32 %r6775, 2;
st.global.u32 [%rd6], %r6775;
mov.u32 %r6776, 7;
st.global.u32 [%rd6+4], %r6776;
mov.u32 %r6777, 0;
st.global.u32 [%rd6+8], %r6777;
st.global.u32 [%rd6+12], %r6777;
st.global.u32 [%rd6+16], %r6777;
st.global.u32 [%rd6+20], %r6777;
st.global.u32 [%rd6+24], %r6777;
st.global.u32 [%rd6+28], %r6777;
bra.uni $L__BB2_1905;
}
// .globl j2k_htj2k_encode_codeblocks_multi_input_cleanup
.visible .entry j2k_htj2k_encode_codeblocks_multi_input_cleanup(
.param .u64 j2k_htj2k_encode_codeblocks_multi_input_cleanup_param_0,
.param .u64 j2k_htj2k_encode_codeblocks_multi_input_cleanup_param_1,
.param .u64 j2k_htj2k_encode_codeblocks_multi_input_cleanup_param_2,
.param .u64 j2k_htj2k_encode_codeblocks_multi_input_cleanup_param_3,
.param .u64 j2k_htj2k_encode_codeblocks_multi_input_cleanup_param_4,
.param .u64 j2k_htj2k_encode_codeblocks_multi_input_cleanup_param_5,
.param .u64 j2k_htj2k_encode_codeblocks_multi_input_cleanup_param_6
)
.maxntid 128, 1, 1
{
.reg .pred %p<1453>;
.reg .b16 %rs<1196>;
.reg .b32 %r<8155>;
.reg .b64 %rd<690>;
// demoted variable
.shared .align 4 .b8 _ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE9block_max[512];
// demoted variable
.shared .align 1 .b8 _ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val[513];
// demoted variable
.shared .align 1 .b8 _ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE14cleanup_cx_val[513];
ld.param.u64 %rd50, [ j2k_htj2k_encode_codeblocks_multi_input_cleanup_param_0];
ld.param.u64 %rd48, [ j2k_htj2k_encode_codeblocks_multi_input_cleanup_param_4];
ld.param.u64 %rd51, [ j2k_htj2k_encode_codeblocks_multi_input_cleanup_param_6];
cvta.to.global.u64 %rd1, %rd50;
mov.u32 %r3196, %ctaid.x;
cvt.u64.u32 %rd2, %r3196;
setp.ge.u64 %p1, %rd2, %rd51;
@%p1 bra $L__BB3_1261;
ld.param.u64 %rd686, [ j2k_htj2k_encode_codeblocks_multi_input_cleanup_param_1];
cvta.to.global.u64 %rd52, %rd686;
mul.lo.s64 %rd53, %rd2, 40;
add.s64 %rd54, %rd52, %rd53;
ld.global.u64 %rd55, [%rd54];
cvta.to.global.u64 %rd3, %rd55;
ld.global.v2.u32 {%r3197, %r3198}, [%rd54+8];
ld.global.v2.u32 {%r3200, %r3201}, [%rd54+16];
ld.global.v2.u32 {%r3202, %r3203}, [%rd54+24];
ld.global.v2.u32 {%r3204, %r3205}, [%rd54+32];
cvt.u64.u32 %rd4, %r3197;
setp.eq.s32 %p2, %r3200, 0;
setp.eq.s32 %p3, %r3201, 0;
or.pred %p4, %p2, %p3;
@%p4 bra $L__BB3_14;
bra.uni $L__BB3_2;
$L__BB3_14:
mov.u32 %r25, 0;
bra.uni $L__BB3_15;
$L__BB3_2:
mul.lo.s32 %r8, %r3201, %r3200;
setp.eq.s32 %p5, %r3198, %r3200;
@%p5 bra $L__BB3_6;
bra.uni $L__BB3_3;
$L__BB3_6:
mov.u32 %r6289, %tid.x;
setp.ge.u32 %p8, %r6289, %r8;
mov.u32 %r6291, 0;
@%p8 bra $L__BB3_9;
mov.u32 %r6291, 0;
$L__BB3_8:
cvt.u64.u32 %rd60, %r6289;
add.s64 %rd61, %rd60, %rd4;
shl.b64 %rd62, %rd61, 2;
add.s64 %rd63, %rd3, %rd62;
ld.global.u32 %r3217, [%rd63];
abs.s32 %r3218, %r3217;
max.u32 %r6291, %r6291, %r3218;
mov.u32 %r3219, %ntid.x;
add.s32 %r6289, %r6289, %r3219;
setp.lt.u32 %p9, %r6289, %r8;
@%p9 bra $L__BB3_8;
bra.uni $L__BB3_9;
$L__BB3_3:
mov.u32 %r6287, %tid.x;
setp.ge.u32 %p6, %r6287, %r8;
mov.u32 %r6291, 0;
@%p6 bra $L__BB3_9;
sub.s32 %r9, %r3198, %r3200;
mov.u32 %r6291, 0;
$L__BB3_5:
div.u32 %r3209, %r6287, %r3200;
mad.lo.s32 %r3210, %r9, %r3209, %r6287;
cvt.u64.u32 %rd56, %r3210;
add.s64 %rd57, %rd56, %rd4;
shl.b64 %rd58, %rd57, 2;
add.s64 %rd59, %rd3, %rd58;
ld.global.u32 %r3211, [%rd59];
abs.s32 %r3212, %r3211;
max.u32 %r6291, %r6291, %r3212;
mov.u32 %r3213, %ntid.x;
add.s32 %r6287, %r6287, %r3213;
setp.lt.u32 %p7, %r6287, %r8;
@%p7 bra $L__BB3_5;
$L__BB3_9:
mov.u32 %r3220, %tid.x;
shl.b32 %r3221, %r3220, 2;
mov.u32 %r3222, _ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE9block_max;
add.s32 %r3223, %r3222, %r3221;
st.shared.u32 [%r3223], %r6291;
bar.sync 0;
mov.u32 %r3224, %ntid.x;
shr.u32 %r6292, %r3224, 1;
setp.eq.s32 %p10, %r6292, 0;
@%p10 bra $L__BB3_13;
$L__BB3_10:
setp.ge.u32 %p11, %r3220, %r6292;
@%p11 bra $L__BB3_12;
add.s32 %r3230, %r6292, %r3220;
shl.b32 %r3231, %r6292, 2;
add.s32 %r3232, %r3223, %r3231;
ld.shared.u32 %r3233, [%r3232];
ld.shared.u32 %r3234, [%r3223];
setp.gt.u32 %p12, %r3234, %r3233;
selp.b32 %r3235, %r3220, %r3230, %p12;
shl.b32 %r3236, %r3235, 2;
add.s32 %r3237, %r3222, %r3236;
ld.shared.u32 %r3238, [%r3237];
st.shared.u32 [%r3223], %r3238;
$L__BB3_12:
bar.sync 0;
shr.u32 %r6292, %r6292, 1;
setp.ne.s32 %p13, %r6292, 0;
@%p13 bra $L__BB3_10;
$L__BB3_13:
ld.shared.u32 %r25, [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE9block_max];
$L__BB3_15:
mov.u32 %r3240, %tid.x;
setp.ne.s32 %p14, %r3240, 0;
@%p14 bra $L__BB3_1261;
bra.uni $L__BB3_16;
$L__BB3_1261:
ret;
$L__BB3_16:
mov.u32 %r6286, %ctaid.x;
ld.param.u64 %rd681, [ j2k_htj2k_encode_codeblocks_multi_input_cleanup_param_5];
setp.eq.s32 %p15, %r3200, 64;
setp.eq.s32 %p16, %r3201, 64;
and.pred %p17, %p15, %p16;
setp.eq.s32 %p18, %r3198, 64;
and.pred %p19, %p18, %p17;
mov.u32 %r3241, 1;
cvt.u64.u32 %rd5, %r3203;
cvta.to.global.u64 %rd64, %rd681;
mul.wide.u32 %rd65, %r6286, 32;
add.s64 %rd6, %rd64, %rd65;
st.global.u32 [%rd6], %r3241;
mov.u32 %r3243, 0;
st.global.u32 [%rd6+4], %r3243;
st.global.u32 [%rd6+8], %r3243;
st.global.u32 [%rd6+12], %r3243;
st.global.u32 [%rd6+16], %r3243;
st.global.u32 [%rd6+20], %r3243;
st.global.u32 [%rd6+24], %r3243;
st.global.u32 [%rd6+28], %r3243;
add.s64 %rd66, %rd1, %rd5;
add.s64 %rd7, %rd66, 20548;
@%p19 bra $L__BB3_669;
bra.uni $L__BB3_17;
$L__BB3_669:
add.s32 %r1718, %r3202, -1;
setp.gt.u32 %p768, %r1718, 29;
setp.lt.u32 %p769, %r3204, 20549;
or.pred %p770, %p768, %p769;
@%p770 bra $L__BB3_1260;
bra.uni $L__BB3_670;
$L__BB3_1260:
mov.u32 %r6281, 2;
st.global.u32 [%rd6], %r6281;
mov.u32 %r6282, 1;
st.global.u32 [%rd6+4], %r6282;
mov.u32 %r6283, 0;
st.global.u32 [%rd6+8], %r6283;
st.global.u32 [%rd6+12], %r6283;
st.global.u32 [%rd6+16], %r6283;
st.global.u32 [%rd6+20], %r6283;
st.global.u32 [%rd6+24], %r6283;
st.global.u32 [%rd6+28], %r6283;
bra.uni $L__BB3_1261;
$L__BB3_17:
setp.eq.s32 %p1452, %r3201, 0;
add.s32 %r3244, %r3200, -1;
setp.ge.u32 %p21, %r3244, %r3198;
or.pred %p22, %p21, %p1452;
setp.gt.u32 %p23, %r3200, 1024;
or.pred %p24, %p23, %p22;
@%p24 bra $L__BB3_668;
cvt.u16.u32 %rs449, %r3200;
mov.u16 %rs450, 4096;
div.u16 %rs451, %rs450, %rs449;
cvt.u32.u16 %r3245, %rs451;
setp.gt.u32 %p25, %r3201, %r3245;
add.s32 %r26, %r3202, -1;
setp.gt.u32 %p26, %r26, 29;
or.pred %p27, %p26, %p25;
setp.lt.u32 %p28, %r3204, 20549;
or.pred %p29, %p28, %p27;
@%p29 bra $L__BB3_668;
bra.uni $L__BB3_19;
$L__BB3_668:
mov.u32 %r4834, 2;
st.global.u32 [%rd6], %r4834;
mov.u32 %r4835, 1;
st.global.u32 [%rd6+4], %r4835;
mov.u32 %r4836, 0;
st.global.u32 [%rd6+8], %r4836;
st.global.u32 [%rd6+12], %r4836;
st.global.u32 [%rd6+16], %r4836;
st.global.u32 [%rd6+20], %r4836;
st.global.u32 [%rd6+24], %r4836;
st.global.u32 [%rd6+28], %r4836;
bra.uni $L__BB3_1261;
$L__BB3_670:
setp.eq.s32 %p771, %r3205, 1;
@%p771 bra $L__BB3_672;
bra.uni $L__BB3_671;
$L__BB3_672:
setp.eq.s32 %p772, %r25, 0;
@%p772 bra $L__BB3_1259;
clz.b32 %r4840, %r25;
mov.u32 %r4841, 32;
sub.s32 %r4842, %r4841, %r4840;
setp.gt.u32 %p773, %r4842, %r3202;
@%p773 bra $L__BB3_1258;
bra.uni $L__BB3_674;
$L__BB3_1258:
mov.u32 %r6277, 1;
st.global.u32 [%rd6], %r6277;
mov.u32 %r6278, 2;
st.global.u32 [%rd6+4], %r6278;
mov.u32 %r6279, 0;
st.global.u32 [%rd6+8], %r6279;
st.global.u32 [%rd6+12], %r6279;
st.global.u32 [%rd6+16], %r6279;
st.global.u32 [%rd6+20], %r6279;
st.global.u32 [%rd6+24], %r6279;
st.global.u32 [%rd6+28], %r6279;
bra.uni $L__BB3_1261;
$L__BB3_19:
setp.eq.s32 %p30, %r3205, 1;
@%p30 bra $L__BB3_21;
bra.uni $L__BB3_20;
$L__BB3_21:
setp.eq.s32 %p31, %r25, 0;
@%p31 bra $L__BB3_667;
clz.b32 %r3249, %r25;
mov.u32 %r3250, 32;
sub.s32 %r3251, %r3250, %r3249;
setp.gt.u32 %p32, %r3251, %r3202;
@%p32 bra $L__BB3_666;
bra.uni $L__BB3_23;
$L__BB3_666:
mov.u32 %r4830, 1;
st.global.u32 [%rd6], %r4830;
mov.u32 %r4831, 2;
st.global.u32 [%rd6+4], %r4831;
mov.u32 %r4832, 0;
st.global.u32 [%rd6+8], %r4832;
st.global.u32 [%rd6+12], %r4832;
st.global.u32 [%rd6+16], %r4832;
st.global.u32 [%rd6+20], %r4832;
st.global.u32 [%rd6+24], %r4832;
st.global.u32 [%rd6+28], %r4832;
bra.uni $L__BB3_1261;
$L__BB3_671:
mov.u32 %r4837, 2;
st.global.u32 [%rd6], %r4837;
mov.u32 %r4838, 5;
st.global.u32 [%rd6+4], %r4838;
mov.u32 %r4839, 0;
st.global.u32 [%rd6+8], %r4839;
st.global.u32 [%rd6+12], %r4839;
st.global.u32 [%rd6+16], %r4839;
st.global.u32 [%rd6+20], %r4839;
st.global.u32 [%rd6+24], %r4839;
st.global.u32 [%rd6+28], %r4839;
bra.uni $L__BB3_1261;
$L__BB3_20:
mov.u32 %r3246, 2;
st.global.u32 [%rd6], %r3246;
mov.u32 %r3247, 5;
st.global.u32 [%rd6+4], %r3247;
mov.u32 %r3248, 0;
st.global.u32 [%rd6+8], %r3248;
st.global.u32 [%rd6+12], %r3248;
st.global.u32 [%rd6+16], %r3248;
st.global.u32 [%rd6+20], %r3248;
st.global.u32 [%rd6+24], %r3248;
st.global.u32 [%rd6+28], %r3248;
bra.uni $L__BB3_1261;
$L__BB3_1259:
mov.u32 %r6280, 0;
st.global.u32 [%rd6], %r6280;
st.global.u32 [%rd6+4], %r6280;
st.global.u32 [%rd6+8], %r6280;
st.global.u32 [%rd6+12], %r6280;
st.global.u32 [%rd6+16], %r3202;
st.global.u32 [%rd6+20], %r6280;
st.global.u32 [%rd6+24], %r6280;
st.global.u32 [%rd6+28], %r6280;
bra.uni $L__BB3_1261;
$L__BB3_674:
ld.param.u64 %rd683, [ j2k_htj2k_encode_codeblocks_multi_input_cleanup_param_2];
mov.u16 %rs715, 255;
st.global.u8 [%rd7], %rs715;
mov.u32 %r4859, 0;
mov.u16 %rs1089, 0;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE14cleanup_cx_val], %rs1089;
mov.u32 %r7779, 1;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val+1], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE14cleanup_cx_val+1], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val+2], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE14cleanup_cx_val+2], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val+3], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE14cleanup_cx_val+3], %rs1089;
mov.u32 %r7778, 4;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val+4], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE14cleanup_cx_val+4], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val+5], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE14cleanup_cx_val+5], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val+6], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE14cleanup_cx_val+6], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val+7], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE14cleanup_cx_val+7], %rs1089;
mov.u32 %r7924, 8;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val+8], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE14cleanup_cx_val+8], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val+9], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE14cleanup_cx_val+9], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val+10], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE14cleanup_cx_val+10], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val+11], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE14cleanup_cx_val+11], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val+12], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE14cleanup_cx_val+12], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val+13], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE14cleanup_cx_val+13], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val+14], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE14cleanup_cx_val+14], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val+15], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE14cleanup_cx_val+15], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val+16], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE14cleanup_cx_val+16], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val+17], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE14cleanup_cx_val+17], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val+18], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE14cleanup_cx_val+18], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val+19], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE14cleanup_cx_val+19], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val+20], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE14cleanup_cx_val+20], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val+21], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE14cleanup_cx_val+21], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val+22], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE14cleanup_cx_val+22], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val+23], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE14cleanup_cx_val+23], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val+24], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE14cleanup_cx_val+24], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val+25], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE14cleanup_cx_val+25], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val+26], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE14cleanup_cx_val+26], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val+27], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE14cleanup_cx_val+27], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val+28], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE14cleanup_cx_val+28], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val+29], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE14cleanup_cx_val+29], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val+30], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE14cleanup_cx_val+30], %rs1089;
mov.u32 %r4860, 31;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val+31], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE14cleanup_cx_val+31], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val+32], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE14cleanup_cx_val+32], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val+33], %rs1089;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE14cleanup_cx_val+33], %rs1089;
sub.s32 %r1719, %r4860, %r3202;
shl.b64 %rd410, %rd4, 2;
add.s64 %rd687, %rd3, %rd410;
cvta.to.global.u64 %rd24, %rd683;
mov.u16 %rs1147, 15;
mov.u32 %r7305, %r4859;
mov.u32 %r7306, %r4859;
mov.u32 %r7925, %r4859;
mov.u32 %r7923, %r4859;
mov.u32 %r7922, %r4859;
mov.u32 %r8093, %r4859;
mov.u32 %r7777, %r7779;
mov.u32 %r7776, %r4859;
mov.u32 %r7382, %r4859;
mov.u32 %r7388, %r7924;
mov.u32 %r7543, %r4859;
mov.u32 %r7542, %r4859;
mov.u32 %r7541, %r7779;
mov.u32 %r7540, %r4859;
$L__BB3_675:
ld.global.u32 %r1737, [%rd687];
setp.eq.s32 %p774, %r1737, 0;
mov.u32 %r7322, %r4859;
@%p774 bra $L__BB3_677;
and.b32 %r4862, %r1737, -2147483648;
abs.s32 %r4863, %r1737;
shl.b32 %r4864, %r4863, %r1719;
or.b32 %r7322, %r4864, %r4862;
$L__BB3_677:
shl.b32 %r4868, %r7322, 1;
shr.u32 %r4869, %r4868, %r1719;
and.b32 %r1740, %r4869, -2;
setp.eq.s32 %p775, %r1740, 0;
mov.u32 %r7326, 0;
mov.u32 %r7323, %r7326;
mov.u32 %r7324, %r7326;
mov.u32 %r7330, %r7326;
@%p775 bra $L__BB3_679;
add.s32 %r4871, %r1740, -1;
clz.b32 %r4872, %r4871;
mov.u32 %r4873, 32;
sub.s32 %r7323, %r4873, %r4872;
shr.u32 %r4874, %r7322, 31;
add.s32 %r4875, %r4874, %r1740;
add.s32 %r7324, %r4875, -2;
mov.u32 %r7330, 1;
$L__BB3_679:
ld.global.u32 %r1746, [%rd687+256];
setp.eq.s32 %p776, %r1746, 0;
@%p776 bra $L__BB3_681;
and.b32 %r4877, %r1746, -2147483648;
abs.s32 %r4878, %r1746;
shl.b32 %r4879, %r4878, %r1719;
or.b32 %r7326, %r4879, %r4877;
$L__BB3_681:
shl.b32 %r4882, %r7326, 1;
shr.u32 %r4883, %r4882, %r1719;
and.b32 %r1749, %r4883, -2;
setp.eq.s32 %p777, %r1749, 0;
mov.u32 %r7331, 0;
mov.u32 %r7327, %r7331;
mov.u32 %r7328, %r7331;
mov.u32 %r7334, %r7323;
@%p777 bra $L__BB3_683;
or.b32 %r7330, %r7330, 2;
add.s32 %r4884, %r1749, -1;
clz.b32 %r4885, %r4884;
mov.u32 %r4886, 32;
sub.s32 %r7327, %r4886, %r4885;
max.s32 %r7334, %r7323, %r7327;
shr.u32 %r4887, %r7326, 31;
add.s32 %r4888, %r4887, %r1749;
add.s32 %r7328, %r4888, -2;
$L__BB3_683:
ld.global.u32 %r1758, [%rd687+4];
setp.eq.s32 %p778, %r1758, 0;
@%p778 bra $L__BB3_685;
and.b32 %r4890, %r1758, -2147483648;
abs.s32 %r4891, %r1758;
shl.b32 %r4892, %r4891, %r1719;
or.b32 %r7331, %r4892, %r4890;
$L__BB3_685:
shl.b32 %r4895, %r7331, 1;
shr.u32 %r4896, %r4895, %r1719;
and.b32 %r1761, %r4896, -2;
setp.eq.s32 %p779, %r1761, 0;
mov.u32 %r7336, 0;
mov.u32 %r7332, %r7336;
mov.u32 %r7333, %r7336;
@%p779 bra $L__BB3_687;
or.b32 %r7330, %r7330, 4;
add.s32 %r4897, %r1761, -1;
clz.b32 %r4898, %r4897;
mov.u32 %r4899, 32;
sub.s32 %r7332, %r4899, %r4898;
max.s32 %r7334, %r7334, %r7332;
shr.u32 %r4900, %r7331, 31;
add.s32 %r4901, %r4900, %r1761;
add.s32 %r7333, %r4901, -2;
$L__BB3_687:
ld.global.u32 %r1770, [%rd687+260];
setp.eq.s32 %p780, %r1770, 0;
@%p780 bra $L__BB3_689;
and.b32 %r4903, %r1770, -2147483648;
abs.s32 %r4904, %r1770;
shl.b32 %r4905, %r4904, %r1719;
or.b32 %r7336, %r4905, %r4903;
$L__BB3_689:
shl.b32 %r4908, %r7336, 1;
shr.u32 %r4909, %r4908, %r1719;
and.b32 %r1773, %r4909, -2;
setp.eq.s32 %p781, %r1773, 0;
mov.u32 %r7341, 0;
mov.u32 %r7337, %r7341;
mov.u32 %r7338, %r7341;
@%p781 bra $L__BB3_691;
or.b32 %r7330, %r7330, 8;
add.s32 %r4910, %r1773, -1;
clz.b32 %r4911, %r4910;
mov.u32 %r4912, 32;
sub.s32 %r7337, %r4912, %r4911;
max.s32 %r7334, %r7334, %r7337;
shr.u32 %r4913, %r7336, 31;
add.s32 %r4914, %r4913, %r1773;
add.s32 %r7338, %r4914, -2;
$L__BB3_691:
add.s32 %r4916, %r7334, -1;
setp.lt.s32 %p782, %r7334, 2;
setp.gt.s32 %p783, %r7334, 1;
selp.b32 %r1782, %r4916, 0, %p783;
@%p782 bra $L__BB3_693;
setp.eq.s32 %p784, %r7323, %r7334;
selp.u32 %r4917, 1, 0, %p784;
setp.eq.s32 %p785, %r7327, %r7334;
selp.u32 %r4918, -1, 0, %p785;
bfi.b32 %r4919, %r4918, %r4917, 1, 1;
setp.eq.s32 %p786, %r7332, %r7334;
selp.u16 %rs716, 1, 0, %p786;
mul.wide.u16 %r4920, %rs716, 4;
or.b32 %r4921, %r4919, %r4920;
setp.eq.s32 %p787, %r7337, %r7334;
selp.u16 %rs717, 1, 0, %p787;
mul.wide.u16 %r4922, %rs717, 8;
or.b32 %r7341, %r4921, %r4922;
$L__BB3_693:
shr.u32 %r4923, %r7305, 1;
mov.u32 %r4924, _ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val;
add.s32 %r1785, %r4924, %r4923;
ld.shared.u8 %rs718, [%r1785];
cvt.u32.u16 %r4925, %rs718;
and.b32 %r4926, %r4925, 255;
and.b32 %r4927, %r7327, 255;
setp.lt.u32 %p788, %r4927, %r4926;
cvt.u16.u32 %rs719, %r7327;
selp.b16 %rs720, %rs718, %rs719, %p788;
st.shared.u8 [%r1785], %rs720;
cvt.u16.u32 %rs238, %r7337;
st.shared.u8 [%r1785+1], %rs238;
and.b32 %r1786, %r7330, 2;
cvt.u16.u32 %rs721, %r1786;
shr.u16 %rs722, %rs721, 1;
mov.u32 %r4928, _ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE14cleanup_cx_val;
add.s32 %r1787, %r4928, %r4923;
ld.shared.u8 %rs723, [%r1787];
or.b16 %rs724, %rs723, %rs722;
st.shared.u8 [%r1787], %rs724;
and.b32 %r1788, %r7330, 8;
shr.u32 %r1789, %r1788, 3;
st.shared.u8 [%r1787+1], %r1789;
shl.b32 %r4929, %r7330, 4;
shl.b32 %r4930, %r7306, 8;
or.b32 %r4931, %r4929, %r4930;
or.b32 %r4932, %r4931, %r7341;
mul.wide.u32 %rd411, %r4932, 2;
add.s64 %rd412, %rd24, %rd411;
ld.global.u16 %rs239, [%rd412];
shr.u16 %rs725, %rs239, 4;
and.b16 %rs240, %rs725, 7;
setp.eq.s16 %p789, %rs240, 0;
mov.u32 %r7353, %r7776;
@%p789 bra $L__BB3_700;
cvt.u32.u16 %r7342, %rs240;
shr.u16 %rs726, %rs239, 8;
cvt.u32.u16 %r7343, %rs726;
$L__BB3_695:
mov.u32 %r1792, %r7342;
setp.gt.u32 %p790, %r7779, 2879;
mov.u32 %r7353, 1;
@%p790 bra $L__BB3_700;
mov.u32 %r4934, 8;
sub.s32 %r4935, %r4934, %r7777;
sub.s32 %r4936, %r4935, %r7778;
min.u32 %r4937, %r4936, %r1792;
setp.eq.s32 %p791, %r4937, 32;
mov.u32 %r4938, -1;
shl.b32 %r4939, %r4938, %r4937;
not.b32 %r4940, %r4939;
selp.b32 %r4941, -1, %r4940, %p791;
and.b32 %r4942, %r4941, %r7343;
shl.b32 %r4943, %r4942, %r7778;
cvt.u16.u32 %rs727, %r4943;
or.b16 %rs1147, %rs1147, %rs727;
add.s32 %r7778, %r4937, %r7778;
sub.s32 %r7342, %r1792, %r4937;
shr.u32 %r7343, %r7343, %r4937;
setp.gt.u32 %p792, %r4936, %r1792;
@%p792 bra $L__BB3_699;
setp.ne.s32 %p793, %r7777, 0;
mov.u32 %r7777, 0;
and.b16 %rs728, %rs1147, 255;
setp.ne.s16 %p794, %rs728, 127;
and.pred %p795, %p793, %p794;
@%p795 bra $L__BB3_699;
mov.u32 %r4946, 20548;
sub.s32 %r4947, %r4946, %r7779;
cvt.u64.u32 %rd413, %r4947;
add.s64 %rd414, %rd413, %rd5;
add.s64 %rd415, %rd1, %rd414;
st.global.u8 [%rd415], %rs1147;
add.s32 %r7779, %r7779, 1;
setp.gt.u16 %p796, %rs728, 143;
selp.u32 %r7777, 1, 0, %p796;
mov.u32 %r7778, 0;
mov.u16 %rs1147, 0;
$L__BB3_699:
setp.ne.s32 %p797, %r7342, 0;
mov.u32 %r7353, %r7776;
@%p797 bra $L__BB3_695;
$L__BB3_700:
setp.ne.s32 %p798, %r7306, 0;
@%p798 bra $L__BB3_748;
setp.eq.s32 %p799, %r7330, 0;
add.s32 %r4948, %r7382, 17477;
cvt.u64.u32 %rd416, %r4948;
add.s64 %rd417, %rd416, %rd5;
add.s64 %rd26, %rd1, %rd417;
@%p799 bra $L__BB3_740;
shl.b16 %rs1089, %rs1089, 1;
add.s32 %r7388, %r7388, -1;
setp.ne.s32 %p800, %r7388, 0;
mov.u32 %r7387, %r7540;
@%p800 bra $L__BB3_705;
setp.gt.u32 %p801, %r7382, 191;
mov.u32 %r7388, 0;
mov.u32 %r7387, 1;
@%p801 bra $L__BB3_705;
st.global.u8 [%rd26], %rs1089;
add.s32 %r7382, %r7382, 1;
mov.u32 %r7388, 8;
mov.u16 %rs1089, 0;
mov.u32 %r7387, %r7540;
$L__BB3_705:
setp.lt.u32 %p802, %r7542, 3;
mov.u32 %r7357, 0;
@%p802 bra $L__BB3_708;
setp.lt.u32 %p803, %r7542, 6;
mov.u32 %r7357, 1;
@%p803 bra $L__BB3_708;
setp.lt.u32 %p804, %r7542, 9;
setp.eq.s32 %p805, %r7542, 11;
selp.b32 %r4954, 4, 5, %p805;
setp.lt.u32 %p806, %r7542, 11;
selp.b32 %r4955, 3, %r4954, %p806;
selp.b32 %r7357, 2, %r4955, %p804;
$L__BB3_708:
setp.eq.s32 %p807, %r7357, 0;
@%p807 bra $L__BB3_736;
add.s32 %r1816, %r7357, -1;
and.b32 %r1817, %r7357, 3;
setp.eq.s32 %p808, %r1817, 0;
mov.u32 %r7367, %r7357;
mov.u32 %r7370, %r7387;
@%p808 bra $L__BB3_721;
mov.u32 %r4957, 1;
shl.b32 %r4958, %r4957, %r1816;
and.b32 %r4959, %r4958, %r7543;
setp.ne.s32 %p809, %r4959, 0;
selp.u32 %r4960, 1, 0, %p809;
cvt.u32.u16 %r4961, %rs1089;
bfi.b32 %r4962, %r4961, %r4960, 1, 8;
cvt.u16.u32 %rs1089, %r4962;
add.s32 %r7388, %r7388, -1;
setp.ne.s32 %p810, %r7388, 0;
mov.u32 %r7370, %r7387;
@%p810 bra $L__BB3_713;
setp.gt.u32 %p811, %r7382, 191;
mov.u32 %r7388, 0;
mov.u32 %r7370, %r4957;
@%p811 bra $L__BB3_713;
add.s32 %r4966, %r7382, 17477;
cvt.u64.u32 %rd418, %r4966;
add.s64 %rd419, %rd418, %rd5;
add.s64 %rd420, %rd1, %rd419;
st.global.u8 [%rd420], %rs1089;
add.s32 %r7382, %r7382, 1;
mov.u32 %r7388, 8;
mov.u16 %rs1089, 0;
mov.u32 %r7370, %r7387;
$L__BB3_713:
setp.eq.s32 %p812, %r1817, 1;
mov.u32 %r7387, %r7370;
mov.u32 %r7367, %r1816;
@%p812 bra $L__BB3_721;
add.s32 %r7367, %r7357, -2;
mov.u32 %r4967, 1;
shl.b32 %r4968, %r4967, %r7367;
and.b32 %r4969, %r4968, %r7543;
setp.ne.s32 %p813, %r4969, 0;
selp.u32 %r4970, 1, 0, %p813;
cvt.u32.u16 %r4971, %rs1089;
bfi.b32 %r4972, %r4971, %r4970, 1, 8;
cvt.u16.u32 %rs1089, %r4972;
add.s32 %r7388, %r7388, -1;
setp.ne.s32 %p814, %r7388, 0;
mov.u32 %r7361, %r7370;
@%p814 bra $L__BB3_717;
setp.gt.u32 %p815, %r7382, 191;
mov.u32 %r7388, 0;
mov.u32 %r7361, %r4967;
@%p815 bra $L__BB3_717;
add.s32 %r4975, %r7382, 17477;
cvt.u64.u32 %rd421, %r4975;
add.s64 %rd422, %rd421, %rd5;
add.s64 %rd423, %rd1, %rd422;
and.b16 %rs735, %rs1089, 255;
st.global.u8 [%rd423], %rs1089;
add.s32 %r7382, %r7382, 1;
setp.eq.s16 %p816, %rs735, 255;
selp.b32 %r7388, 7, 8, %p816;
mov.u16 %rs1089, 0;
mov.u32 %r7361, %r7370;
$L__BB3_717:
setp.eq.s32 %p817, %r1817, 2;
mov.u32 %r7387, %r7361;
mov.u32 %r7370, %r7361;
@%p817 bra $L__BB3_721;
add.s32 %r7367, %r7357, -3;
mov.u32 %r4976, 1;
shl.b32 %r4977, %r4976, %r7367;
and.b32 %r4978, %r4977, %r7543;
setp.ne.s32 %p818, %r4978, 0;
selp.u32 %r4979, 1, 0, %p818;
cvt.u32.u16 %r4980, %rs1089;
bfi.b32 %r4981, %r4980, %r4979, 1, 8;
cvt.u16.u32 %rs1089, %r4981;
add.s32 %r7388, %r7388, -1;
setp.ne.s32 %p819, %r7388, 0;
mov.u32 %r7387, %r7361;
mov.u32 %r7370, %r7361;
@%p819 bra $L__BB3_721;
setp.gt.u32 %p820, %r7382, 191;
mov.u32 %r7388, 0;
mov.u32 %r7387, %r4976;
mov.u32 %r7370, %r4976;
@%p820 bra $L__BB3_721;
add.s32 %r4986, %r7382, 17477;
cvt.u64.u32 %rd424, %r4986;
add.s64 %rd425, %rd424, %rd5;
add.s64 %rd426, %rd1, %rd425;
and.b16 %rs738, %rs1089, 255;
st.global.u8 [%rd426], %rs1089;
add.s32 %r7382, %r7382, 1;
setp.eq.s16 %p821, %rs738, 255;
selp.b32 %r7388, 7, 8, %p821;
mov.u16 %rs1089, 0;
mov.u32 %r7387, %r7361;
mov.u32 %r7370, %r7361;
$L__BB3_721:
setp.lt.u32 %p822, %r1816, 3;
@%p822 bra $L__BB3_736;
mov.u32 %r7387, %r7370;
$L__BB3_723:
add.s32 %r4987, %r7367, -1;
mov.u32 %r4988, 1;
shl.b32 %r4989, %r4988, %r4987;
and.b32 %r4990, %r4989, %r7543;
setp.ne.s32 %p823, %r4990, 0;
selp.u32 %r4991, 1, 0, %p823;
cvt.u32.u16 %r4992, %rs1089;
bfi.b32 %r7376, %r4992, %r4991, 1, 8;
add.s32 %r7377, %r7388, -1;
setp.ne.s32 %p824, %r7377, 0;
mov.u32 %r7375, %r7387;
@%p824 bra $L__BB3_726;
setp.gt.u32 %p825, %r7382, 191;
mov.u32 %r7377, 0;
mov.u32 %r7375, %r4988;
@%p825 bra $L__BB3_726;
cvt.u16.u32 %rs739, %r7376;
and.b16 %rs740, %rs739, 255;
add.s32 %r4996, %r7382, 17477;
cvt.u64.u32 %rd427, %r4996;
add.s64 %rd428, %rd427, %rd5;
add.s64 %rd429, %rd1, %rd428;
st.global.u8 [%rd429], %rs739;
add.s32 %r7382, %r7382, 1;
setp.eq.s16 %p826, %rs740, 255;
selp.b32 %r7377, 7, 8, %p826;
mov.u32 %r7376, 0;
mov.u32 %r7375, %r7387;
$L__BB3_726:
add.s32 %r4997, %r7367, -2;
shl.b32 %r4999, %r4988, %r4997;
and.b32 %r5000, %r4999, %r7543;
setp.ne.s32 %p827, %r5000, 0;
and.b32 %r5001, %r7376, 127;
selp.u32 %r5002, 1, 0, %p827;
bfi.b32 %r7380, %r5001, %r5002, 1, 7;
add.s32 %r7381, %r7377, -1;
setp.ne.s32 %p828, %r7381, 0;
mov.u32 %r7379, %r7375;
@%p828 bra $L__BB3_729;
setp.gt.u32 %p829, %r7382, 191;
mov.u32 %r7381, 0;
mov.u32 %r7379, 1;
@%p829 bra $L__BB3_729;
cvt.u16.u32 %rs741, %r7380;
and.b16 %rs742, %rs741, 255;
add.s32 %r5006, %r7382, 17477;
cvt.u64.u32 %rd430, %r5006;
add.s64 %rd431, %rd430, %rd5;
add.s64 %rd432, %rd1, %rd431;
st.global.u8 [%rd432], %rs741;
add.s32 %r7382, %r7382, 1;
setp.eq.s16 %p830, %rs742, 255;
selp.b32 %r7381, 7, 8, %p830;
mov.u32 %r7380, 0;
mov.u32 %r7379, %r7375;
$L__BB3_729:
add.s32 %r5007, %r7367, -3;
mov.u32 %r5008, 1;
shl.b32 %r5009, %r5008, %r5007;
and.b32 %r5010, %r5009, %r7543;
setp.ne.s32 %p831, %r5010, 0;
and.b32 %r5011, %r7380, 127;
selp.u32 %r5012, 1, 0, %p831;
bfi.b32 %r7384, %r5011, %r5012, 1, 7;
add.s32 %r7385, %r7381, -1;
setp.ne.s32 %p832, %r7385, 0;
mov.u32 %r7383, %r7379;
@%p832 bra $L__BB3_732;
setp.gt.u32 %p833, %r7382, 191;
mov.u32 %r7385, 0;
mov.u32 %r7383, %r5008;
@%p833 bra $L__BB3_732;
cvt.u16.u32 %rs743, %r7384;
and.b16 %rs744, %rs743, 255;
add.s32 %r5016, %r7382, 17477;
cvt.u64.u32 %rd433, %r5016;
add.s64 %rd434, %rd433, %rd5;
add.s64 %rd435, %rd1, %rd434;
st.global.u8 [%rd435], %rs743;
add.s32 %r7382, %r7382, 1;
setp.eq.s16 %p834, %rs744, 255;
selp.b32 %r7385, 7, 8, %p834;
mov.u32 %r7384, 0;
mov.u32 %r7383, %r7379;
$L__BB3_732:
add.s32 %r7367, %r7367, -4;
shl.b32 %r5018, %r5008, %r7367;
and.b32 %r5019, %r5018, %r7543;
setp.ne.s32 %p835, %r5019, 0;
and.b32 %r5020, %r7384, 127;
selp.u32 %r5021, 1, 0, %p835;
bfi.b32 %r5022, %r5020, %r5021, 1, 15;
cvt.u16.u32 %rs1089, %r5022;
add.s32 %r7388, %r7385, -1;
setp.ne.s32 %p836, %r7388, 0;
mov.u32 %r7387, %r7383;
@%p836 bra $L__BB3_735;
setp.gt.u32 %p837, %r7382, 191;
mov.u32 %r7388, 0;
mov.u32 %r7387, 1;
@%p837 bra $L__BB3_735;
add.s32 %r5025, %r7382, 17477;
cvt.u64.u32 %rd436, %r5025;
add.s64 %rd437, %rd436, %rd5;
add.s64 %rd438, %rd1, %rd437;
and.b16 %rs746, %rs1089, 255;
st.global.u8 [%rd438], %rs1089;
add.s32 %r7382, %r7382, 1;
setp.eq.s16 %p838, %rs746, 255;
selp.b32 %r7388, 7, 8, %p838;
mov.u16 %rs1089, 0;
mov.u32 %r7387, %r7383;
$L__BB3_735:
setp.ne.s32 %p839, %r7367, 0;
@%p839 bra $L__BB3_723;
$L__BB3_736:
add.s32 %r5027, %r7542, -1;
setp.eq.s32 %p840, %r7542, 0;
mov.u32 %r7543, 0;
selp.b32 %r7542, 0, %r5027, %p840;
setp.lt.u32 %p841, %r7542, 3;
mov.u32 %r7393, %r7543;
@%p841 bra $L__BB3_739;
setp.lt.u32 %p842, %r7542, 6;
mov.u32 %r7393, 1;
@%p842 bra $L__BB3_739;
setp.lt.u32 %p843, %r7542, 9;
setp.eq.s32 %p844, %r7542, 11;
selp.b32 %r5029, 4, 5, %p844;
setp.lt.u32 %p845, %r7542, 11;
selp.b32 %r5030, 3, %r5029, %p845;
selp.b32 %r7393, 2, %r5030, %p843;
$L__BB3_739:
mov.u32 %r5032, 1;
shl.b32 %r7541, %r5032, %r7393;
mov.u32 %r7540, %r7387;
bra.uni $L__BB3_748;
$L__BB3_740:
add.s32 %r7543, %r7543, 1;
setp.lt.u32 %p846, %r7543, %r7541;
@%p846 bra $L__BB3_748;
shl.b16 %rs747, %rs1089, 1;
or.b16 %rs1089, %rs747, 1;
add.s32 %r7388, %r7388, -1;
setp.ne.s32 %p847, %r7388, 0;
mov.u32 %r7394, %r7540;
@%p847 bra $L__BB3_744;
setp.gt.u32 %p848, %r7382, 191;
mov.u32 %r7388, 0;
mov.u32 %r7394, 1;
@%p848 bra $L__BB3_744;
and.b16 %rs749, %rs1089, 255;
st.global.u8 [%rd26], %rs1089;
add.s32 %r7382, %r7382, 1;
setp.eq.s16 %p849, %rs749, 255;
selp.b32 %r7388, 7, 8, %p849;
mov.u16 %rs1089, 0;
mov.u32 %r7394, %r7540;
$L__BB3_744:
add.s32 %r5036, %r7542, 1;
min.u32 %r7542, %r5036, 12;
setp.lt.u32 %p850, %r7542, 3;
mov.u32 %r7543, 0;
mov.u32 %r7397, %r7543;
@%p850 bra $L__BB3_747;
setp.lt.u32 %p851, %r7542, 6;
mov.u32 %r7397, 1;
@%p851 bra $L__BB3_747;
setp.lt.u32 %p852, %r7542, 9;
setp.eq.s32 %p853, %r7542, 11;
selp.b32 %r5038, 4, 5, %p853;
setp.lt.u32 %p854, %r7542, 11;
selp.b32 %r5039, 3, %r5038, %p854;
selp.b32 %r7397, 2, %r5039, %p852;
$L__BB3_747:
mov.u32 %r5041, 1;
shl.b32 %r7541, %r5041, %r7397;
mov.u32 %r7540, %r7394;
$L__BB3_748:
max.s32 %r1900, %r7334, 1;
and.b16 %rs750, %rs239, 15;
cvt.u32.u16 %r1901, %rs750;
and.b32 %r1902, %r7330, 1;
setp.eq.s32 %p855, %r1902, 0;
mov.u32 %r7414, %r8093;
@%p855 bra $L__BB3_755;
and.b32 %r5042, %r1901, 1;
sub.s32 %r7404, %r1900, %r5042;
setp.eq.s32 %p856, %r7404, 0;
mov.u32 %r7414, %r8093;
@%p856 bra $L__BB3_755;
mov.u32 %r5043, -1;
shl.b32 %r5044, %r5043, %r7404;
not.b32 %r5045, %r5044;
and.b32 %r7405, %r7324, %r5045;
$L__BB3_751:
setp.gt.u32 %p857, %r7925, 17476;
mov.u32 %r7414, 1;
@%p857 bra $L__BB3_755;
sub.s32 %r5047, %r7924, %r7923;
min.u32 %r5048, %r5047, %r7404;
setp.eq.s32 %p858, %r5048, 32;
mov.u32 %r5049, -1;
shl.b32 %r5050, %r5049, %r5048;
not.b32 %r5051, %r5050;
selp.b32 %r5052, -1, %r5051, %p858;
and.b32 %r5053, %r5052, %r7405;
shl.b32 %r5054, %r5053, %r7923;
or.b32 %r7922, %r5054, %r7922;
add.s32 %r7923, %r5048, %r7923;
shr.u32 %r7405, %r7405, %r5048;
sub.s32 %r7404, %r7404, %r5048;
setp.lt.u32 %p859, %r7923, %r7924;
@%p859 bra $L__BB3_754;
cvt.u64.u32 %rd439, %r7925;
add.s64 %rd440, %rd439, %rd5;
add.s64 %rd441, %rd1, %rd440;
st.global.u8 [%rd441], %r7922;
add.s32 %r7925, %r7925, 1;
setp.eq.s32 %p860, %r7922, 255;
selp.b32 %r7924, 7, 8, %p860;
mov.u32 %r7922, 0;
mov.u32 %r7923, %r7922;
$L__BB3_754:
setp.ne.s32 %p861, %r7404, 0;
mov.u32 %r7414, %r8093;
@%p861 bra $L__BB3_751;
$L__BB3_755:
setp.eq.s32 %p862, %r1786, 0;
mov.u32 %r7429, %r7414;
@%p862 bra $L__BB3_762;
shr.u32 %r5057, %r1901, 1;
and.b32 %r5058, %r5057, 1;
sub.s32 %r7419, %r1900, %r5058;
setp.eq.s32 %p863, %r7419, 0;
mov.u32 %r7429, %r7414;
@%p863 bra $L__BB3_762;
mov.u32 %r5059, -1;
shl.b32 %r5060, %r5059, %r7419;
not.b32 %r5061, %r5060;
and.b32 %r7420, %r7328, %r5061;
$L__BB3_758:
setp.gt.u32 %p864, %r7925, 17476;
mov.u32 %r7429, 1;
@%p864 bra $L__BB3_762;
sub.s32 %r5063, %r7924, %r7923;
min.u32 %r5064, %r5063, %r7419;
setp.eq.s32 %p865, %r5064, 32;
mov.u32 %r5065, -1;
shl.b32 %r5066, %r5065, %r5064;
not.b32 %r5067, %r5066;
selp.b32 %r5068, -1, %r5067, %p865;
and.b32 %r5069, %r5068, %r7420;
shl.b32 %r5070, %r5069, %r7923;
or.b32 %r7922, %r5070, %r7922;
add.s32 %r7923, %r5064, %r7923;
shr.u32 %r7420, %r7420, %r5064;
sub.s32 %r7419, %r7419, %r5064;
setp.lt.u32 %p866, %r7923, %r7924;
@%p866 bra $L__BB3_761;
cvt.u64.u32 %rd442, %r7925;
add.s64 %rd443, %rd442, %rd5;
add.s64 %rd444, %rd1, %rd443;
st.global.u8 [%rd444], %r7922;
add.s32 %r7925, %r7925, 1;
setp.eq.s32 %p867, %r7922, 255;
selp.b32 %r7924, 7, 8, %p867;
mov.u32 %r7922, 0;
mov.u32 %r7923, %r7922;
$L__BB3_761:
setp.ne.s32 %p868, %r7419, 0;
mov.u32 %r7429, %r7414;
@%p868 bra $L__BB3_758;
$L__BB3_762:
and.b32 %r5073, %r7330, 4;
setp.eq.s32 %p869, %r5073, 0;
mov.u32 %r7444, %r7429;
@%p869 bra $L__BB3_769;
shr.u32 %r5074, %r1901, 2;
and.b32 %r5075, %r5074, 1;
sub.s32 %r7434, %r1900, %r5075;
setp.eq.s32 %p870, %r7434, 0;
mov.u32 %r7444, %r7429;
@%p870 bra $L__BB3_769;
mov.u32 %r5076, -1;
shl.b32 %r5077, %r5076, %r7434;
not.b32 %r5078, %r5077;
and.b32 %r7435, %r7333, %r5078;
$L__BB3_765:
setp.gt.u32 %p871, %r7925, 17476;
mov.u32 %r7444, 1;
@%p871 bra $L__BB3_769;
sub.s32 %r5080, %r7924, %r7923;
min.u32 %r5081, %r5080, %r7434;
setp.eq.s32 %p872, %r5081, 32;
mov.u32 %r5082, -1;
shl.b32 %r5083, %r5082, %r5081;
not.b32 %r5084, %r5083;
selp.b32 %r5085, -1, %r5084, %p872;
and.b32 %r5086, %r5085, %r7435;
shl.b32 %r5087, %r5086, %r7923;
or.b32 %r7922, %r5087, %r7922;
add.s32 %r7923, %r5081, %r7923;
shr.u32 %r7435, %r7435, %r5081;
sub.s32 %r7434, %r7434, %r5081;
setp.lt.u32 %p873, %r7923, %r7924;
@%p873 bra $L__BB3_768;
cvt.u64.u32 %rd445, %r7925;
add.s64 %rd446, %rd445, %rd5;
add.s64 %rd447, %rd1, %rd446;
st.global.u8 [%rd447], %r7922;
add.s32 %r7925, %r7925, 1;
setp.eq.s32 %p874, %r7922, 255;
selp.b32 %r7924, 7, 8, %p874;
mov.u32 %r7922, 0;
mov.u32 %r7923, %r7922;
$L__BB3_768:
setp.ne.s32 %p875, %r7434, 0;
mov.u32 %r7444, %r7429;
@%p875 bra $L__BB3_765;
$L__BB3_769:
setp.eq.s32 %p876, %r1788, 0;
mov.u32 %r7459, %r7444;
@%p876 bra $L__BB3_776;
shr.u32 %r5090, %r1901, 3;
sub.s32 %r7449, %r1900, %r5090;
setp.eq.s32 %p877, %r7449, 0;
mov.u32 %r7459, %r7444;
@%p877 bra $L__BB3_776;
mov.u32 %r5091, -1;
shl.b32 %r5092, %r5091, %r7449;
not.b32 %r5093, %r5092;
and.b32 %r7450, %r7338, %r5093;
$L__BB3_772:
setp.gt.u32 %p878, %r7925, 17476;
mov.u32 %r7459, 1;
@%p878 bra $L__BB3_776;
sub.s32 %r5095, %r7924, %r7923;
min.u32 %r5096, %r5095, %r7449;
setp.eq.s32 %p879, %r5096, 32;
mov.u32 %r5097, -1;
shl.b32 %r5098, %r5097, %r5096;
not.b32 %r5099, %r5098;
selp.b32 %r5100, -1, %r5099, %p879;
and.b32 %r5101, %r5100, %r7450;
shl.b32 %r5102, %r5101, %r7923;
or.b32 %r7922, %r5102, %r7922;
add.s32 %r7923, %r5096, %r7923;
shr.u32 %r7450, %r7450, %r5096;
sub.s32 %r7449, %r7449, %r5096;
setp.lt.u32 %p880, %r7923, %r7924;
@%p880 bra $L__BB3_775;
cvt.u64.u32 %rd448, %r7925;
add.s64 %rd449, %rd448, %rd5;
add.s64 %rd450, %rd1, %rd449;
st.global.u8 [%rd450], %r7922;
add.s32 %r7925, %r7925, 1;
setp.eq.s32 %p881, %r7922, 255;
selp.b32 %r7924, 7, 8, %p881;
mov.u32 %r7922, 0;
mov.u32 %r7923, %r7922;
$L__BB3_775:
setp.ne.s32 %p882, %r7449, 0;
mov.u32 %r7459, %r7444;
@%p882 bra $L__BB3_772;
$L__BB3_776:
ld.global.u32 %r1995, [%rd687+8];
setp.eq.s32 %p883, %r1995, 0;
mov.u32 %r7465, 0;
mov.u32 %r7464, %r7465;
@%p883 bra $L__BB3_778;
and.b32 %r5106, %r1995, -2147483648;
abs.s32 %r5107, %r1995;
shl.b32 %r5108, %r5107, %r1719;
or.b32 %r7464, %r5108, %r5106;
$L__BB3_778:
shl.b32 %r5112, %r7464, 1;
shr.u32 %r5113, %r5112, %r1719;
and.b32 %r1998, %r5113, -2;
setp.eq.s32 %p884, %r1998, 0;
mov.u32 %r7466, %r7465;
mov.u32 %r7472, %r7465;
@%p884 bra $L__BB3_780;
add.s32 %r5115, %r1998, -1;
clz.b32 %r5116, %r5115;
mov.u32 %r5117, 32;
sub.s32 %r7465, %r5117, %r5116;
shr.u32 %r5118, %r7464, 31;
add.s32 %r5119, %r5118, %r1998;
add.s32 %r7466, %r5119, -2;
mov.u32 %r7472, 1;
$L__BB3_780:
ld.global.u32 %r2004, [%rd687+264];
setp.eq.s32 %p885, %r2004, 0;
mov.u32 %r7469, 0;
mov.u32 %r7468, %r7469;
@%p885 bra $L__BB3_782;
and.b32 %r5121, %r2004, -2147483648;
abs.s32 %r5122, %r2004;
shl.b32 %r5123, %r5122, %r1719;
or.b32 %r7468, %r5123, %r5121;
$L__BB3_782:
shl.b32 %r5126, %r7468, 1;
shr.u32 %r5127, %r5126, %r1719;
and.b32 %r2007, %r5127, -2;
setp.eq.s32 %p886, %r2007, 0;
mov.u32 %r7470, %r7469;
mov.u32 %r7476, %r7465;
@%p886 bra $L__BB3_784;
or.b32 %r7472, %r7472, 2;
add.s32 %r5128, %r2007, -1;
clz.b32 %r5129, %r5128;
mov.u32 %r5130, 32;
sub.s32 %r7469, %r5130, %r5129;
max.s32 %r7476, %r7465, %r7469;
shr.u32 %r5131, %r7468, 31;
add.s32 %r5132, %r5131, %r2007;
add.s32 %r7470, %r5132, -2;
$L__BB3_784:
ld.global.u32 %r2016, [%rd687+12];
setp.eq.s32 %p887, %r2016, 0;
mov.u32 %r7474, 0;
mov.u32 %r7473, %r7474;
@%p887 bra $L__BB3_786;
and.b32 %r5134, %r2016, -2147483648;
abs.s32 %r5135, %r2016;
shl.b32 %r5136, %r5135, %r1719;
or.b32 %r7473, %r5136, %r5134;
$L__BB3_786:
shl.b32 %r5139, %r7473, 1;
shr.u32 %r5140, %r5139, %r1719;
and.b32 %r2019, %r5140, -2;
setp.eq.s32 %p888, %r2019, 0;
mov.u32 %r7475, %r7474;
@%p888 bra $L__BB3_788;
or.b32 %r7472, %r7472, 4;
add.s32 %r5141, %r2019, -1;
clz.b32 %r5142, %r5141;
mov.u32 %r5143, 32;
sub.s32 %r7474, %r5143, %r5142;
max.s32 %r7476, %r7476, %r7474;
shr.u32 %r5144, %r7473, 31;
add.s32 %r5145, %r5144, %r2019;
add.s32 %r7475, %r5145, -2;
$L__BB3_788:
ld.global.u32 %r2028, [%rd687+268];
setp.eq.s32 %p889, %r2028, 0;
mov.u32 %r7479, 0;
mov.u32 %r7478, %r7479;
@%p889 bra $L__BB3_790;
and.b32 %r5147, %r2028, -2147483648;
abs.s32 %r5148, %r2028;
shl.b32 %r5149, %r5148, %r1719;
or.b32 %r7478, %r5149, %r5147;
$L__BB3_790:
shl.b32 %r5152, %r7478, 1;
shr.u32 %r5153, %r5152, %r1719;
and.b32 %r2031, %r5153, -2;
setp.eq.s32 %p890, %r2031, 0;
mov.u32 %r7480, %r7479;
@%p890 bra $L__BB3_792;
or.b32 %r7472, %r7472, 8;
add.s32 %r5154, %r2031, -1;
clz.b32 %r5155, %r5154;
mov.u32 %r5156, 32;
sub.s32 %r7479, %r5156, %r5155;
max.s32 %r7476, %r7476, %r7479;
shr.u32 %r5157, %r7478, 31;
add.s32 %r5158, %r5157, %r2031;
add.s32 %r7480, %r5158, -2;
$L__BB3_792:
shr.u32 %r5160, %r7330, 1;
or.b32 %r2040, %r5160, %r1902;
add.s32 %r5161, %r7476, -1;
setp.lt.s32 %p891, %r7476, 2;
setp.gt.s32 %p892, %r7476, 1;
selp.b32 %r2041, %r5161, 0, %p892;
mov.u32 %r7483, 0;
@%p891 bra $L__BB3_794;
setp.eq.s32 %p893, %r7465, %r7476;
selp.u32 %r5162, 1, 0, %p893;
setp.eq.s32 %p894, %r7469, %r7476;
selp.u32 %r5163, -1, 0, %p894;
bfi.b32 %r5164, %r5163, %r5162, 1, 1;
setp.eq.s32 %p895, %r7474, %r7476;
selp.u16 %rs751, 1, 0, %p895;
mul.wide.u16 %r5165, %rs751, 4;
or.b32 %r5166, %r5164, %r5165;
setp.eq.s32 %p896, %r7479, %r7476;
selp.u16 %rs752, 1, 0, %p896;
mul.wide.u16 %r5167, %rs752, 8;
or.b32 %r7483, %r5166, %r5167;
$L__BB3_794:
and.b32 %r5168, %r7469, 255;
and.b32 %r5169, %r7337, 255;
setp.lt.u32 %p897, %r5168, %r5169;
cvt.u16.u32 %rs753, %r7469;
selp.b16 %rs754, %rs238, %rs753, %p897;
st.shared.u8 [%r1785+1], %rs754;
st.shared.u8 [%r1785+2], %r7479;
and.b32 %r2044, %r7472, 2;
shr.u32 %r5170, %r2044, 1;
or.b32 %r5171, %r1789, %r5170;
st.shared.u8 [%r1787+1], %r5171;
and.b32 %r2045, %r7472, 8;
shr.u32 %r5172, %r2045, 3;
st.shared.u8 [%r1787+2], %r5172;
shl.b32 %r5173, %r7472, 4;
shl.b32 %r5174, %r2040, 8;
or.b32 %r5175, %r5173, %r5174;
or.b32 %r5176, %r5175, %r7483;
mul.wide.u32 %rd451, %r5176, 2;
add.s64 %rd452, %rd24, %rd451;
ld.global.u16 %rs261, [%rd452];
shr.u16 %rs755, %rs261, 4;
and.b16 %rs262, %rs755, 7;
setp.eq.s16 %p898, %rs262, 0;
mov.u32 %r7495, %r7353;
@%p898 bra $L__BB3_801;
cvt.u32.u16 %r7484, %rs262;
shr.u16 %rs756, %rs261, 8;
cvt.u32.u16 %r7485, %rs756;
$L__BB3_796:
mov.u32 %r2048, %r7484;
setp.gt.u32 %p899, %r7779, 2879;
mov.u32 %r7495, 1;
@%p899 bra $L__BB3_801;
mov.u32 %r5178, 8;
sub.s32 %r5179, %r5178, %r7777;
sub.s32 %r5180, %r5179, %r7778;
min.u32 %r5181, %r5180, %r2048;
setp.eq.s32 %p900, %r5181, 32;
mov.u32 %r5182, -1;
shl.b32 %r5183, %r5182, %r5181;
not.b32 %r5184, %r5183;
selp.b32 %r5185, -1, %r5184, %p900;
and.b32 %r5186, %r5185, %r7485;
shl.b32 %r5187, %r5186, %r7778;
cvt.u16.u32 %rs757, %r5187;
or.b16 %rs1147, %rs1147, %rs757;
add.s32 %r7778, %r5181, %r7778;
sub.s32 %r7484, %r2048, %r5181;
shr.u32 %r7485, %r7485, %r5181;
setp.gt.u32 %p901, %r5180, %r2048;
@%p901 bra $L__BB3_800;
setp.ne.s32 %p902, %r7777, 0;
mov.u32 %r7777, 0;
and.b16 %rs758, %rs1147, 255;
setp.ne.s16 %p903, %rs758, 127;
and.pred %p904, %p902, %p903;
@%p904 bra $L__BB3_800;
mov.u32 %r5190, 20548;
sub.s32 %r5191, %r5190, %r7779;
cvt.u64.u32 %rd453, %r5191;
add.s64 %rd454, %rd453, %rd5;
add.s64 %rd455, %rd1, %rd454;
st.global.u8 [%rd455], %rs1147;
add.s32 %r7779, %r7779, 1;
setp.gt.u16 %p905, %rs758, 143;
selp.u32 %r7777, 1, 0, %p905;
mov.u32 %r7778, 0;
mov.u16 %rs1147, 0;
$L__BB3_800:
setp.ne.s32 %p906, %r7484, 0;
mov.u32 %r7495, %r7353;
@%p906 bra $L__BB3_796;
$L__BB3_801:
setp.ne.s32 %p907, %r2040, 0;
@%p907 bra $L__BB3_849;
setp.eq.s32 %p908, %r7472, 0;
add.s32 %r5192, %r7382, 17477;
cvt.u64.u32 %rd456, %r5192;
add.s64 %rd457, %rd456, %rd5;
add.s64 %rd27, %rd1, %rd457;
@%p908 bra $L__BB3_841;
shl.b16 %rs1089, %rs1089, 1;
add.s32 %r7388, %r7388, -1;
setp.ne.s32 %p909, %r7388, 0;
mov.u32 %r7529, %r7540;
@%p909 bra $L__BB3_806;
setp.gt.u32 %p910, %r7382, 191;
mov.u32 %r7388, 0;
mov.u32 %r7529, 1;
@%p910 bra $L__BB3_806;
st.global.u8 [%rd27], %rs1089;
add.s32 %r7382, %r7382, 1;
mov.u32 %r7388, 8;
mov.u16 %rs1089, 0;
mov.u32 %r7529, %r7540;
$L__BB3_806:
setp.lt.u32 %p911, %r7542, 3;
mov.u32 %r7499, 0;
@%p911 bra $L__BB3_809;
setp.lt.u32 %p912, %r7542, 6;
mov.u32 %r7499, 1;
@%p912 bra $L__BB3_809;
setp.lt.u32 %p913, %r7542, 9;
setp.eq.s32 %p914, %r7542, 11;
selp.b32 %r5198, 4, 5, %p914;
setp.lt.u32 %p915, %r7542, 11;
selp.b32 %r5199, 3, %r5198, %p915;
selp.b32 %r7499, 2, %r5199, %p913;
$L__BB3_809:
setp.eq.s32 %p916, %r7499, 0;
@%p916 bra $L__BB3_837;
add.s32 %r2072, %r7499, -1;
and.b32 %r2073, %r7499, 3;
setp.eq.s32 %p917, %r2073, 0;
mov.u32 %r7509, %r7499;
mov.u32 %r7512, %r7529;
@%p917 bra $L__BB3_822;
mov.u32 %r5201, 1;
shl.b32 %r5202, %r5201, %r2072;
and.b32 %r5203, %r5202, %r7543;
setp.ne.s32 %p918, %r5203, 0;
selp.u32 %r5204, 1, 0, %p918;
cvt.u32.u16 %r5205, %rs1089;
bfi.b32 %r5206, %r5205, %r5204, 1, 8;
cvt.u16.u32 %rs1089, %r5206;
add.s32 %r7388, %r7388, -1;
setp.ne.s32 %p919, %r7388, 0;
mov.u32 %r7512, %r7529;
@%p919 bra $L__BB3_814;
setp.gt.u32 %p920, %r7382, 191;
mov.u32 %r7388, 0;
mov.u32 %r7512, %r5201;
@%p920 bra $L__BB3_814;
add.s32 %r5210, %r7382, 17477;
cvt.u64.u32 %rd458, %r5210;
add.s64 %rd459, %rd458, %rd5;
add.s64 %rd460, %rd1, %rd459;
st.global.u8 [%rd460], %rs1089;
add.s32 %r7382, %r7382, 1;
mov.u32 %r7388, 8;
mov.u16 %rs1089, 0;
mov.u32 %r7512, %r7529;
$L__BB3_814:
setp.eq.s32 %p921, %r2073, 1;
mov.u32 %r7529, %r7512;
mov.u32 %r7509, %r2072;
@%p921 bra $L__BB3_822;
add.s32 %r7509, %r7499, -2;
mov.u32 %r5211, 1;
shl.b32 %r5212, %r5211, %r7509;
and.b32 %r5213, %r5212, %r7543;
setp.ne.s32 %p922, %r5213, 0;
selp.u32 %r5214, 1, 0, %p922;
cvt.u32.u16 %r5215, %rs1089;
bfi.b32 %r5216, %r5215, %r5214, 1, 8;
cvt.u16.u32 %rs1089, %r5216;
add.s32 %r7388, %r7388, -1;
setp.ne.s32 %p923, %r7388, 0;
mov.u32 %r7503, %r7512;
@%p923 bra $L__BB3_818;
setp.gt.u32 %p924, %r7382, 191;
mov.u32 %r7388, 0;
mov.u32 %r7503, %r5211;
@%p924 bra $L__BB3_818;
add.s32 %r5219, %r7382, 17477;
cvt.u64.u32 %rd461, %r5219;
add.s64 %rd462, %rd461, %rd5;
add.s64 %rd463, %rd1, %rd462;
and.b16 %rs765, %rs1089, 255;
st.global.u8 [%rd463], %rs1089;
add.s32 %r7382, %r7382, 1;
setp.eq.s16 %p925, %rs765, 255;
selp.b32 %r7388, 7, 8, %p925;
mov.u16 %rs1089, 0;
mov.u32 %r7503, %r7512;
$L__BB3_818:
setp.eq.s32 %p926, %r2073, 2;
mov.u32 %r7529, %r7503;
mov.u32 %r7512, %r7503;
@%p926 bra $L__BB3_822;
add.s32 %r7509, %r7499, -3;
mov.u32 %r5220, 1;
shl.b32 %r5221, %r5220, %r7509;
and.b32 %r5222, %r5221, %r7543;
setp.ne.s32 %p927, %r5222, 0;
selp.u32 %r5223, 1, 0, %p927;
cvt.u32.u16 %r5224, %rs1089;
bfi.b32 %r5225, %r5224, %r5223, 1, 8;
cvt.u16.u32 %rs1089, %r5225;
add.s32 %r7388, %r7388, -1;
setp.ne.s32 %p928, %r7388, 0;
mov.u32 %r7529, %r7503;
mov.u32 %r7512, %r7503;
@%p928 bra $L__BB3_822;
setp.gt.u32 %p929, %r7382, 191;
mov.u32 %r7388, 0;
mov.u32 %r7529, %r5220;
mov.u32 %r7512, %r5220;
@%p929 bra $L__BB3_822;
add.s32 %r5230, %r7382, 17477;
cvt.u64.u32 %rd464, %r5230;
add.s64 %rd465, %rd464, %rd5;
add.s64 %rd466, %rd1, %rd465;
and.b16 %rs768, %rs1089, 255;
st.global.u8 [%rd466], %rs1089;
add.s32 %r7382, %r7382, 1;
setp.eq.s16 %p930, %rs768, 255;
selp.b32 %r7388, 7, 8, %p930;
mov.u16 %rs1089, 0;
mov.u32 %r7529, %r7503;
mov.u32 %r7512, %r7503;
$L__BB3_822:
setp.lt.u32 %p931, %r2072, 3;
@%p931 bra $L__BB3_837;
mov.u32 %r7529, %r7512;
$L__BB3_824:
add.s32 %r5231, %r7509, -1;
mov.u32 %r5232, 1;
shl.b32 %r5233, %r5232, %r5231;
and.b32 %r5234, %r5233, %r7543;
setp.ne.s32 %p932, %r5234, 0;
selp.u32 %r5235, 1, 0, %p932;
cvt.u32.u16 %r5236, %rs1089;
bfi.b32 %r7518, %r5236, %r5235, 1, 8;
add.s32 %r7519, %r7388, -1;
setp.ne.s32 %p933, %r7519, 0;
mov.u32 %r7517, %r7529;
@%p933 bra $L__BB3_827;
setp.gt.u32 %p934, %r7382, 191;
mov.u32 %r7519, 0;
mov.u32 %r7517, %r5232;
@%p934 bra $L__BB3_827;
cvt.u16.u32 %rs769, %r7518;
and.b16 %rs770, %rs769, 255;
add.s32 %r5240, %r7382, 17477;
cvt.u64.u32 %rd467, %r5240;
add.s64 %rd468, %rd467, %rd5;
add.s64 %rd469, %rd1, %rd468;
st.global.u8 [%rd469], %rs769;
add.s32 %r7382, %r7382, 1;
setp.eq.s16 %p935, %rs770, 255;
selp.b32 %r7519, 7, 8, %p935;
mov.u32 %r7518, 0;
mov.u32 %r7517, %r7529;
$L__BB3_827:
add.s32 %r5241, %r7509, -2;
shl.b32 %r5243, %r5232, %r5241;
and.b32 %r5244, %r5243, %r7543;
setp.ne.s32 %p936, %r5244, 0;
and.b32 %r5245, %r7518, 127;
selp.u32 %r5246, 1, 0, %p936;
bfi.b32 %r7522, %r5245, %r5246, 1, 7;
add.s32 %r7523, %r7519, -1;
setp.ne.s32 %p937, %r7523, 0;
mov.u32 %r7521, %r7517;
@%p937 bra $L__BB3_830;
setp.gt.u32 %p938, %r7382, 191;
mov.u32 %r7523, 0;
mov.u32 %r7521, 1;
@%p938 bra $L__BB3_830;
cvt.u16.u32 %rs771, %r7522;
and.b16 %rs772, %rs771, 255;
add.s32 %r5250, %r7382, 17477;
cvt.u64.u32 %rd470, %r5250;
add.s64 %rd471, %rd470, %rd5;
add.s64 %rd472, %rd1, %rd471;
st.global.u8 [%rd472], %rs771;
add.s32 %r7382, %r7382, 1;
setp.eq.s16 %p939, %rs772, 255;
selp.b32 %r7523, 7, 8, %p939;
mov.u32 %r7522, 0;
mov.u32 %r7521, %r7517;
$L__BB3_830:
add.s32 %r5251, %r7509, -3;
mov.u32 %r5252, 1;
shl.b32 %r5253, %r5252, %r5251;
and.b32 %r5254, %r5253, %r7543;
setp.ne.s32 %p940, %r5254, 0;
and.b32 %r5255, %r7522, 127;
selp.u32 %r5256, 1, 0, %p940;
bfi.b32 %r7526, %r5255, %r5256, 1, 7;
add.s32 %r7527, %r7523, -1;
setp.ne.s32 %p941, %r7527, 0;
mov.u32 %r7525, %r7521;
@%p941 bra $L__BB3_833;
setp.gt.u32 %p942, %r7382, 191;
mov.u32 %r7527, 0;
mov.u32 %r7525, %r5252;
@%p942 bra $L__BB3_833;
cvt.u16.u32 %rs773, %r7526;
and.b16 %rs774, %rs773, 255;
add.s32 %r5260, %r7382, 17477;
cvt.u64.u32 %rd473, %r5260;
add.s64 %rd474, %rd473, %rd5;
add.s64 %rd475, %rd1, %rd474;
st.global.u8 [%rd475], %rs773;
add.s32 %r7382, %r7382, 1;
setp.eq.s16 %p943, %rs774, 255;
selp.b32 %r7527, 7, 8, %p943;
mov.u32 %r7526, 0;
mov.u32 %r7525, %r7521;
$L__BB3_833:
add.s32 %r7509, %r7509, -4;
shl.b32 %r5262, %r5252, %r7509;
and.b32 %r5263, %r5262, %r7543;
setp.ne.s32 %p944, %r5263, 0;
and.b32 %r5264, %r7526, 127;
selp.u32 %r5265, 1, 0, %p944;
bfi.b32 %r5266, %r5264, %r5265, 1, 15;
cvt.u16.u32 %rs1089, %r5266;
add.s32 %r7388, %r7527, -1;
setp.ne.s32 %p945, %r7388, 0;
mov.u32 %r7529, %r7525;
@%p945 bra $L__BB3_836;
setp.gt.u32 %p946, %r7382, 191;
mov.u32 %r7388, 0;
mov.u32 %r7529, 1;
@%p946 bra $L__BB3_836;
add.s32 %r5269, %r7382, 17477;
cvt.u64.u32 %rd476, %r5269;
add.s64 %rd477, %rd476, %rd5;
add.s64 %rd478, %rd1, %rd477;
and.b16 %rs776, %rs1089, 255;
st.global.u8 [%rd478], %rs1089;
add.s32 %r7382, %r7382, 1;
setp.eq.s16 %p947, %rs776, 255;
selp.b32 %r7388, 7, 8, %p947;
mov.u16 %rs1089, 0;
mov.u32 %r7529, %r7525;
$L__BB3_836:
setp.ne.s32 %p948, %r7509, 0;
@%p948 bra $L__BB3_824;
$L__BB3_837:
add.s32 %r5271, %r7542, -1;
setp.eq.s32 %p949, %r7542, 0;
mov.u32 %r7543, 0;
selp.b32 %r7542, 0, %r5271, %p949;
setp.lt.u32 %p950, %r7542, 3;
mov.u32 %r7535, %r7543;
@%p950 bra $L__BB3_840;
setp.lt.u32 %p951, %r7542, 6;
mov.u32 %r7535, 1;
@%p951 bra $L__BB3_840;
setp.lt.u32 %p952, %r7542, 9;
setp.eq.s32 %p953, %r7542, 11;
selp.b32 %r5273, 4, 5, %p953;
setp.lt.u32 %p954, %r7542, 11;
selp.b32 %r5274, 3, %r5273, %p954;
selp.b32 %r7535, 2, %r5274, %p952;
$L__BB3_840:
mov.u32 %r5276, 1;
shl.b32 %r7541, %r5276, %r7535;
mov.u32 %r7540, %r7529;
bra.uni $L__BB3_849;
$L__BB3_841:
add.s32 %r7543, %r7543, 1;
setp.lt.u32 %p955, %r7543, %r7541;
@%p955 bra $L__BB3_849;
shl.b16 %rs777, %rs1089, 1;
or.b16 %rs1089, %rs777, 1;
add.s32 %r7388, %r7388, -1;
setp.ne.s32 %p956, %r7388, 0;
mov.u32 %r7536, %r7540;
@%p956 bra $L__BB3_845;
setp.gt.u32 %p957, %r7382, 191;
mov.u32 %r7388, 0;
mov.u32 %r7536, 1;
@%p957 bra $L__BB3_845;
and.b16 %rs779, %rs1089, 255;
st.global.u8 [%rd27], %rs1089;
add.s32 %r7382, %r7382, 1;
setp.eq.s16 %p958, %rs779, 255;
selp.b32 %r7388, 7, 8, %p958;
mov.u16 %rs1089, 0;
mov.u32 %r7536, %r7540;
$L__BB3_845:
add.s32 %r5280, %r7542, 1;
min.u32 %r7542, %r5280, 12;
setp.lt.u32 %p959, %r7542, 3;
mov.u32 %r7543, 0;
mov.u32 %r7539, %r7543;
@%p959 bra $L__BB3_848;
setp.lt.u32 %p960, %r7542, 6;
mov.u32 %r7539, 1;
@%p960 bra $L__BB3_848;
setp.lt.u32 %p961, %r7542, 9;
setp.eq.s32 %p962, %r7542, 11;
selp.b32 %r5282, 4, 5, %p962;
setp.lt.u32 %p963, %r7542, 11;
selp.b32 %r5283, 3, %r5282, %p963;
selp.b32 %r7539, 2, %r5283, %p961;
$L__BB3_848:
mov.u32 %r5285, 1;
shl.b32 %r7541, %r5285, %r7539;
mov.u32 %r7540, %r7536;
$L__BB3_849:
max.s32 %r2156, %r7476, 1;
and.b16 %rs780, %rs261, 15;
cvt.u32.u16 %r2157, %rs780;
and.b32 %r2158, %r7472, 1;
setp.eq.s32 %p964, %r2158, 0;
mov.u32 %r7556, %r7459;
@%p964 bra $L__BB3_856;
and.b32 %r5286, %r2157, 1;
sub.s32 %r7546, %r2156, %r5286;
setp.eq.s32 %p965, %r7546, 0;
mov.u32 %r7556, %r7459;
@%p965 bra $L__BB3_856;
mov.u32 %r5287, -1;
shl.b32 %r5288, %r5287, %r7546;
not.b32 %r5289, %r5288;
and.b32 %r7547, %r7466, %r5289;
$L__BB3_852:
setp.gt.u32 %p966, %r7925, 17476;
mov.u32 %r7556, 1;
@%p966 bra $L__BB3_856;
sub.s32 %r5291, %r7924, %r7923;
min.u32 %r5292, %r5291, %r7546;
setp.eq.s32 %p967, %r5292, 32;
mov.u32 %r5293, -1;
shl.b32 %r5294, %r5293, %r5292;
not.b32 %r5295, %r5294;
selp.b32 %r5296, -1, %r5295, %p967;
and.b32 %r5297, %r5296, %r7547;
shl.b32 %r5298, %r5297, %r7923;
or.b32 %r7922, %r5298, %r7922;
add.s32 %r7923, %r5292, %r7923;
shr.u32 %r7547, %r7547, %r5292;
sub.s32 %r7546, %r7546, %r5292;
setp.lt.u32 %p968, %r7923, %r7924;
@%p968 bra $L__BB3_855;
cvt.u64.u32 %rd479, %r7925;
add.s64 %rd480, %rd479, %rd5;
add.s64 %rd481, %rd1, %rd480;
st.global.u8 [%rd481], %r7922;
add.s32 %r7925, %r7925, 1;
setp.eq.s32 %p969, %r7922, 255;
selp.b32 %r7924, 7, 8, %p969;
mov.u32 %r7922, 0;
mov.u32 %r7923, %r7922;
$L__BB3_855:
setp.ne.s32 %p970, %r7546, 0;
mov.u32 %r7556, %r7459;
@%p970 bra $L__BB3_852;
$L__BB3_856:
setp.eq.s32 %p971, %r2044, 0;
mov.u32 %r7571, %r7556;
@%p971 bra $L__BB3_863;
shr.u32 %r5301, %r2157, 1;
and.b32 %r5302, %r5301, 1;
sub.s32 %r7561, %r2156, %r5302;
setp.eq.s32 %p972, %r7561, 0;
mov.u32 %r7571, %r7556;
@%p972 bra $L__BB3_863;
mov.u32 %r5303, -1;
shl.b32 %r5304, %r5303, %r7561;
not.b32 %r5305, %r5304;
and.b32 %r7562, %r7470, %r5305;
$L__BB3_859:
setp.gt.u32 %p973, %r7925, 17476;
mov.u32 %r7571, 1;
@%p973 bra $L__BB3_863;
sub.s32 %r5307, %r7924, %r7923;
min.u32 %r5308, %r5307, %r7561;
setp.eq.s32 %p974, %r5308, 32;
mov.u32 %r5309, -1;
shl.b32 %r5310, %r5309, %r5308;
not.b32 %r5311, %r5310;
selp.b32 %r5312, -1, %r5311, %p974;
and.b32 %r5313, %r5312, %r7562;
shl.b32 %r5314, %r5313, %r7923;
or.b32 %r7922, %r5314, %r7922;
add.s32 %r7923, %r5308, %r7923;
shr.u32 %r7562, %r7562, %r5308;
sub.s32 %r7561, %r7561, %r5308;
setp.lt.u32 %p975, %r7923, %r7924;
@%p975 bra $L__BB3_862;
cvt.u64.u32 %rd482, %r7925;
add.s64 %rd483, %rd482, %rd5;
add.s64 %rd484, %rd1, %rd483;
st.global.u8 [%rd484], %r7922;
add.s32 %r7925, %r7925, 1;
setp.eq.s32 %p976, %r7922, 255;
selp.b32 %r7924, 7, 8, %p976;
mov.u32 %r7922, 0;
mov.u32 %r7923, %r7922;
$L__BB3_862:
setp.ne.s32 %p977, %r7561, 0;
mov.u32 %r7571, %r7556;
@%p977 bra $L__BB3_859;
$L__BB3_863:
and.b32 %r5317, %r7472, 4;
setp.eq.s32 %p978, %r5317, 0;
mov.u32 %r7586, %r7571;
@%p978 bra $L__BB3_870;
shr.u32 %r5318, %r2157, 2;
and.b32 %r5319, %r5318, 1;
sub.s32 %r7576, %r2156, %r5319;
setp.eq.s32 %p979, %r7576, 0;
mov.u32 %r7586, %r7571;
@%p979 bra $L__BB3_870;
mov.u32 %r5320, -1;
shl.b32 %r5321, %r5320, %r7576;
not.b32 %r5322, %r5321;
and.b32 %r7577, %r7475, %r5322;
$L__BB3_866:
setp.gt.u32 %p980, %r7925, 17476;
mov.u32 %r7586, 1;
@%p980 bra $L__BB3_870;
sub.s32 %r5324, %r7924, %r7923;
min.u32 %r5325, %r5324, %r7576;
setp.eq.s32 %p981, %r5325, 32;
mov.u32 %r5326, -1;
shl.b32 %r5327, %r5326, %r5325;
not.b32 %r5328, %r5327;
selp.b32 %r5329, -1, %r5328, %p981;
and.b32 %r5330, %r5329, %r7577;
shl.b32 %r5331, %r5330, %r7923;
or.b32 %r7922, %r5331, %r7922;
add.s32 %r7923, %r5325, %r7923;
shr.u32 %r7577, %r7577, %r5325;
sub.s32 %r7576, %r7576, %r5325;
setp.lt.u32 %p982, %r7923, %r7924;
@%p982 bra $L__BB3_869;
cvt.u64.u32 %rd485, %r7925;
add.s64 %rd486, %rd485, %rd5;
add.s64 %rd487, %rd1, %rd486;
st.global.u8 [%rd487], %r7922;
add.s32 %r7925, %r7925, 1;
setp.eq.s32 %p983, %r7922, 255;
selp.b32 %r7924, 7, 8, %p983;
mov.u32 %r7922, 0;
mov.u32 %r7923, %r7922;
$L__BB3_869:
setp.ne.s32 %p984, %r7576, 0;
mov.u32 %r7586, %r7571;
@%p984 bra $L__BB3_866;
$L__BB3_870:
setp.eq.s32 %p985, %r2045, 0;
mov.u32 %r8093, %r7586;
@%p985 bra $L__BB3_877;
shr.u32 %r5334, %r2157, 3;
sub.s32 %r7591, %r2156, %r5334;
setp.eq.s32 %p986, %r7591, 0;
mov.u32 %r8093, %r7586;
@%p986 bra $L__BB3_877;
mov.u32 %r5335, -1;
shl.b32 %r5336, %r5335, %r7591;
not.b32 %r5337, %r5336;
and.b32 %r7592, %r7480, %r5337;
$L__BB3_873:
setp.gt.u32 %p987, %r7925, 17476;
mov.u32 %r8093, 1;
@%p987 bra $L__BB3_877;
sub.s32 %r5339, %r7924, %r7923;
min.u32 %r5340, %r5339, %r7591;
setp.eq.s32 %p988, %r5340, 32;
mov.u32 %r5341, -1;
shl.b32 %r5342, %r5341, %r5340;
not.b32 %r5343, %r5342;
selp.b32 %r5344, -1, %r5343, %p988;
and.b32 %r5345, %r5344, %r7592;
shl.b32 %r5346, %r5345, %r7923;
or.b32 %r7922, %r5346, %r7922;
add.s32 %r7923, %r5340, %r7923;
shr.u32 %r7592, %r7592, %r5340;
sub.s32 %r7591, %r7591, %r5340;
setp.lt.u32 %p989, %r7923, %r7924;
@%p989 bra $L__BB3_876;
cvt.u64.u32 %rd488, %r7925;
add.s64 %rd489, %rd488, %rd5;
add.s64 %rd490, %rd1, %rd489;
st.global.u8 [%rd490], %r7922;
add.s32 %r7925, %r7925, 1;
setp.eq.s32 %p990, %r7922, 255;
selp.b32 %r7924, 7, 8, %p990;
mov.u32 %r7922, 0;
mov.u32 %r7923, %r7922;
$L__BB3_876:
setp.ne.s32 %p991, %r7591, 0;
mov.u32 %r8093, %r7586;
@%p991 bra $L__BB3_873;
$L__BB3_877:
setp.lt.s32 %p992, %r2041, 1;
setp.lt.s32 %p993, %r1782, 1;
or.pred %p994, %p993, %p992;
@%p994 bra $L__BB3_925;
min.s32 %r5349, %r1782, %r2041;
setp.lt.s32 %p995, %r5349, 3;
add.s32 %r5350, %r7382, 17477;
cvt.u64.u32 %rd491, %r5350;
add.s64 %rd492, %rd491, %rd5;
add.s64 %rd28, %rd1, %rd492;
@%p995 bra $L__BB3_917;
bra.uni $L__BB3_879;
$L__BB3_917:
add.s32 %r7543, %r7543, 1;
setp.lt.u32 %p1042, %r7543, %r7541;
@%p1042 bra $L__BB3_925;
shl.b16 %rs797, %rs1089, 1;
or.b16 %rs1089, %rs797, 1;
add.s32 %r7388, %r7388, -1;
setp.ne.s32 %p1043, %r7388, 0;
mov.u32 %r7646, %r7540;
@%p1043 bra $L__BB3_921;
setp.gt.u32 %p1044, %r7382, 191;
mov.u32 %r7388, 0;
mov.u32 %r7646, 1;
@%p1044 bra $L__BB3_921;
and.b16 %rs799, %rs1089, 255;
st.global.u8 [%rd28], %rs1089;
add.s32 %r7382, %r7382, 1;
setp.eq.s16 %p1045, %rs799, 255;
selp.b32 %r7388, 7, 8, %p1045;
mov.u16 %rs1089, 0;
mov.u32 %r7646, %r7540;
$L__BB3_921:
add.s32 %r5438, %r7542, 1;
min.u32 %r7542, %r5438, 12;
setp.lt.u32 %p1046, %r7542, 3;
mov.u32 %r7543, 0;
mov.u32 %r7649, %r7543;
@%p1046 bra $L__BB3_924;
setp.lt.u32 %p1047, %r7542, 6;
mov.u32 %r7649, 1;
@%p1047 bra $L__BB3_924;
setp.lt.u32 %p1048, %r7542, 9;
setp.eq.s32 %p1049, %r7542, 11;
selp.b32 %r5440, 4, 5, %p1049;
setp.lt.u32 %p1050, %r7542, 11;
selp.b32 %r5441, 3, %r5440, %p1050;
selp.b32 %r7649, 2, %r5441, %p1048;
$L__BB3_924:
mov.u32 %r5443, 1;
shl.b32 %r7541, %r5443, %r7649;
mov.u32 %r7540, %r7646;
bra.uni $L__BB3_925;
$L__BB3_879:
shl.b16 %rs1089, %rs1089, 1;
add.s32 %r7388, %r7388, -1;
setp.ne.s32 %p996, %r7388, 0;
mov.u32 %r7639, %r7540;
@%p996 bra $L__BB3_882;
setp.gt.u32 %p997, %r7382, 191;
mov.u32 %r7388, 0;
mov.u32 %r7639, 1;
@%p997 bra $L__BB3_882;
st.global.u8 [%rd28], %rs1089;
add.s32 %r7382, %r7382, 1;
mov.u32 %r7388, 8;
mov.u16 %rs1089, 0;
mov.u32 %r7639, %r7540;
$L__BB3_882:
setp.lt.u32 %p998, %r7542, 3;
mov.u32 %r7609, 0;
@%p998 bra $L__BB3_885;
setp.lt.u32 %p999, %r7542, 6;
mov.u32 %r7609, 1;
@%p999 bra $L__BB3_885;
setp.lt.u32 %p1000, %r7542, 9;
setp.eq.s32 %p1001, %r7542, 11;
selp.b32 %r5356, 4, 5, %p1001;
setp.lt.u32 %p1002, %r7542, 11;
selp.b32 %r5357, 3, %r5356, %p1002;
selp.b32 %r7609, 2, %r5357, %p1000;
$L__BB3_885:
setp.eq.s32 %p1003, %r7609, 0;
@%p1003 bra $L__BB3_913;
add.s32 %r2258, %r7609, -1;
and.b32 %r2259, %r7609, 3;
setp.eq.s32 %p1004, %r2259, 0;
mov.u32 %r7619, %r7609;
mov.u32 %r7622, %r7639;
@%p1004 bra $L__BB3_898;
mov.u32 %r5359, 1;
shl.b32 %r5360, %r5359, %r2258;
and.b32 %r5361, %r5360, %r7543;
setp.ne.s32 %p1005, %r5361, 0;
selp.u32 %r5362, 1, 0, %p1005;
cvt.u32.u16 %r5363, %rs1089;
bfi.b32 %r5364, %r5363, %r5362, 1, 8;
cvt.u16.u32 %rs1089, %r5364;
add.s32 %r7388, %r7388, -1;
setp.ne.s32 %p1006, %r7388, 0;
mov.u32 %r7622, %r7639;
@%p1006 bra $L__BB3_890;
setp.gt.u32 %p1007, %r7382, 191;
mov.u32 %r7388, 0;
mov.u32 %r7622, %r5359;
@%p1007 bra $L__BB3_890;
add.s32 %r5368, %r7382, 17477;
cvt.u64.u32 %rd493, %r5368;
add.s64 %rd494, %rd493, %rd5;
add.s64 %rd495, %rd1, %rd494;
st.global.u8 [%rd495], %rs1089;
add.s32 %r7382, %r7382, 1;
mov.u32 %r7388, 8;
mov.u16 %rs1089, 0;
mov.u32 %r7622, %r7639;
$L__BB3_890:
setp.eq.s32 %p1008, %r2259, 1;
mov.u32 %r7639, %r7622;
mov.u32 %r7619, %r2258;
@%p1008 bra $L__BB3_898;
add.s32 %r7619, %r7609, -2;
mov.u32 %r5369, 1;
shl.b32 %r5370, %r5369, %r7619;
and.b32 %r5371, %r5370, %r7543;
setp.ne.s32 %p1009, %r5371, 0;
selp.u32 %r5372, 1, 0, %p1009;
cvt.u32.u16 %r5373, %rs1089;
bfi.b32 %r5374, %r5373, %r5372, 1, 8;
cvt.u16.u32 %rs1089, %r5374;
add.s32 %r7388, %r7388, -1;
setp.ne.s32 %p1010, %r7388, 0;
mov.u32 %r7613, %r7622;
@%p1010 bra $L__BB3_894;
setp.gt.u32 %p1011, %r7382, 191;
mov.u32 %r7388, 0;
mov.u32 %r7613, %r5369;
@%p1011 bra $L__BB3_894;
add.s32 %r5377, %r7382, 17477;
cvt.u64.u32 %rd496, %r5377;
add.s64 %rd497, %rd496, %rd5;
add.s64 %rd498, %rd1, %rd497;
and.b16 %rs785, %rs1089, 255;
st.global.u8 [%rd498], %rs1089;
add.s32 %r7382, %r7382, 1;
setp.eq.s16 %p1012, %rs785, 255;
selp.b32 %r7388, 7, 8, %p1012;
mov.u16 %rs1089, 0;
mov.u32 %r7613, %r7622;
$L__BB3_894:
setp.eq.s32 %p1013, %r2259, 2;
mov.u32 %r7639, %r7613;
mov.u32 %r7622, %r7613;
@%p1013 bra $L__BB3_898;
add.s32 %r7619, %r7609, -3;
mov.u32 %r5378, 1;
shl.b32 %r5379, %r5378, %r7619;
and.b32 %r5380, %r5379, %r7543;
setp.ne.s32 %p1014, %r5380, 0;
selp.u32 %r5381, 1, 0, %p1014;
cvt.u32.u16 %r5382, %rs1089;
bfi.b32 %r5383, %r5382, %r5381, 1, 8;
cvt.u16.u32 %rs1089, %r5383;
add.s32 %r7388, %r7388, -1;
setp.ne.s32 %p1015, %r7388, 0;
mov.u32 %r7639, %r7613;
mov.u32 %r7622, %r7613;
@%p1015 bra $L__BB3_898;
setp.gt.u32 %p1016, %r7382, 191;
mov.u32 %r7388, 0;
mov.u32 %r7639, %r5378;
mov.u32 %r7622, %r5378;
@%p1016 bra $L__BB3_898;
add.s32 %r5388, %r7382, 17477;
cvt.u64.u32 %rd499, %r5388;
add.s64 %rd500, %rd499, %rd5;
add.s64 %rd501, %rd1, %rd500;
and.b16 %rs788, %rs1089, 255;
st.global.u8 [%rd501], %rs1089;
add.s32 %r7382, %r7382, 1;
setp.eq.s16 %p1017, %rs788, 255;
selp.b32 %r7388, 7, 8, %p1017;
mov.u16 %rs1089, 0;
mov.u32 %r7639, %r7613;
mov.u32 %r7622, %r7613;
$L__BB3_898:
setp.lt.u32 %p1018, %r2258, 3;
@%p1018 bra $L__BB3_913;
mov.u32 %r7639, %r7622;
$L__BB3_900:
add.s32 %r5389, %r7619, -1;
mov.u32 %r5390, 1;
shl.b32 %r5391, %r5390, %r5389;
and.b32 %r5392, %r5391, %r7543;
setp.ne.s32 %p1019, %r5392, 0;
selp.u32 %r5393, 1, 0, %p1019;
cvt.u32.u16 %r5394, %rs1089;
bfi.b32 %r7628, %r5394, %r5393, 1, 8;
add.s32 %r7629, %r7388, -1;
setp.ne.s32 %p1020, %r7629, 0;
mov.u32 %r7627, %r7639;
@%p1020 bra $L__BB3_903;
setp.gt.u32 %p1021, %r7382, 191;
mov.u32 %r7629, 0;
mov.u32 %r7627, %r5390;
@%p1021 bra $L__BB3_903;
cvt.u16.u32 %rs789, %r7628;
and.b16 %rs790, %rs789, 255;
add.s32 %r5398, %r7382, 17477;
cvt.u64.u32 %rd502, %r5398;
add.s64 %rd503, %rd502, %rd5;
add.s64 %rd504, %rd1, %rd503;
st.global.u8 [%rd504], %rs789;
add.s32 %r7382, %r7382, 1;
setp.eq.s16 %p1022, %rs790, 255;
selp.b32 %r7629, 7, 8, %p1022;
mov.u32 %r7628, 0;
mov.u32 %r7627, %r7639;
$L__BB3_903:
add.s32 %r5399, %r7619, -2;
shl.b32 %r5401, %r5390, %r5399;
and.b32 %r5402, %r5401, %r7543;
setp.ne.s32 %p1023, %r5402, 0;
and.b32 %r5403, %r7628, 127;
selp.u32 %r5404, 1, 0, %p1023;
bfi.b32 %r7632, %r5403, %r5404, 1, 7;
add.s32 %r7633, %r7629, -1;
setp.ne.s32 %p1024, %r7633, 0;
mov.u32 %r7631, %r7627;
@%p1024 bra $L__BB3_906;
setp.gt.u32 %p1025, %r7382, 191;
mov.u32 %r7633, 0;
mov.u32 %r7631, 1;
@%p1025 bra $L__BB3_906;
cvt.u16.u32 %rs791, %r7632;
and.b16 %rs792, %rs791, 255;
add.s32 %r5408, %r7382, 17477;
cvt.u64.u32 %rd505, %r5408;
add.s64 %rd506, %rd505, %rd5;
add.s64 %rd507, %rd1, %rd506;
st.global.u8 [%rd507], %rs791;
add.s32 %r7382, %r7382, 1;
setp.eq.s16 %p1026, %rs792, 255;
selp.b32 %r7633, 7, 8, %p1026;
mov.u32 %r7632, 0;
mov.u32 %r7631, %r7627;
$L__BB3_906:
add.s32 %r5409, %r7619, -3;
mov.u32 %r5410, 1;
shl.b32 %r5411, %r5410, %r5409;
and.b32 %r5412, %r5411, %r7543;
setp.ne.s32 %p1027, %r5412, 0;
and.b32 %r5413, %r7632, 127;
selp.u32 %r5414, 1, 0, %p1027;
bfi.b32 %r7636, %r5413, %r5414, 1, 7;
add.s32 %r7637, %r7633, -1;
setp.ne.s32 %p1028, %r7637, 0;
mov.u32 %r7635, %r7631;
@%p1028 bra $L__BB3_909;
setp.gt.u32 %p1029, %r7382, 191;
mov.u32 %r7637, 0;
mov.u32 %r7635, %r5410;
@%p1029 bra $L__BB3_909;
cvt.u16.u32 %rs793, %r7636;
and.b16 %rs794, %rs793, 255;
add.s32 %r5418, %r7382, 17477;
cvt.u64.u32 %rd508, %r5418;
add.s64 %rd509, %rd508, %rd5;
add.s64 %rd510, %rd1, %rd509;
st.global.u8 [%rd510], %rs793;
add.s32 %r7382, %r7382, 1;
setp.eq.s16 %p1030, %rs794, 255;
selp.b32 %r7637, 7, 8, %p1030;
mov.u32 %r7636, 0;
mov.u32 %r7635, %r7631;
$L__BB3_909:
add.s32 %r7619, %r7619, -4;
shl.b32 %r5420, %r5410, %r7619;
and.b32 %r5421, %r5420, %r7543;
setp.ne.s32 %p1031, %r5421, 0;
and.b32 %r5422, %r7636, 127;
selp.u32 %r5423, 1, 0, %p1031;
bfi.b32 %r5424, %r5422, %r5423, 1, 15;
cvt.u16.u32 %rs1089, %r5424;
add.s32 %r7388, %r7637, -1;
setp.ne.s32 %p1032, %r7388, 0;
mov.u32 %r7639, %r7635;
@%p1032 bra $L__BB3_912;
setp.gt.u32 %p1033, %r7382, 191;
mov.u32 %r7388, 0;
mov.u32 %r7639, 1;
@%p1033 bra $L__BB3_912;
add.s32 %r5427, %r7382, 17477;
cvt.u64.u32 %rd511, %r5427;
add.s64 %rd512, %rd511, %rd5;
add.s64 %rd513, %rd1, %rd512;
and.b16 %rs796, %rs1089, 255;
st.global.u8 [%rd513], %rs1089;
add.s32 %r7382, %r7382, 1;
setp.eq.s16 %p1034, %rs796, 255;
selp.b32 %r7388, 7, 8, %p1034;
mov.u16 %rs1089, 0;
mov.u32 %r7639, %r7635;
$L__BB3_912:
setp.ne.s32 %p1035, %r7619, 0;
@%p1035 bra $L__BB3_900;
$L__BB3_913:
add.s32 %r5429, %r7542, -1;
setp.eq.s32 %p1036, %r7542, 0;
mov.u32 %r7543, 0;
selp.b32 %r7542, 0, %r5429, %p1036;
setp.lt.u32 %p1037, %r7542, 3;
mov.u32 %r7645, %r7543;
@%p1037 bra $L__BB3_916;
setp.lt.u32 %p1038, %r7542, 6;
mov.u32 %r7645, 1;
@%p1038 bra $L__BB3_916;
setp.lt.u32 %p1039, %r7542, 9;
setp.eq.s32 %p1040, %r7542, 11;
selp.b32 %r5431, 4, 5, %p1040;
setp.lt.u32 %p1041, %r7542, 11;
selp.b32 %r5432, 3, %r5431, %p1041;
selp.b32 %r7645, 2, %r5432, %p1039;
$L__BB3_916:
mov.u32 %r5434, 1;
shl.b32 %r7541, %r5434, %r7645;
mov.u32 %r7540, %r7639;
$L__BB3_925:
setp.gt.s32 %p1051, %r2041, 2;
setp.gt.s32 %p1052, %r1782, 2;
and.pred %p1053, %p1052, %p1051;
@%p1053 bra $L__BB3_974;
bra.uni $L__BB3_926;
$L__BB3_974:
mul.lo.s32 %r5564, %r1782, 6;
add.s32 %r5565, %r5564, -11;
cvt.u64.u32 %rd547, %r5565;
cvta.to.global.u64 %rd548, %rd48;
add.s64 %rd31, %rd548, %rd547;
ld.global.u8 %rs335, [%rd31];
add.s32 %r5566, %r5564, -10;
cvt.u64.u32 %rd549, %r5566;
add.s64 %rd550, %rd548, %rd549;
ld.global.u8 %rs336, [%rd550];
ld.global.u8 %rs337, [%rd550+1];
mul.lo.s32 %r5567, %r2041, 6;
add.s32 %r5568, %r5567, -12;
cvt.u64.u32 %rd551, %r5568;
add.s64 %rd552, %rd548, %rd551;
ld.global.u8 %rs338, [%rd552];
ld.global.u8 %rs339, [%rd552+1];
add.s32 %r5569, %r5567, -10;
cvt.u64.u32 %rd553, %r5569;
add.s64 %rd554, %rd548, %rd553;
ld.global.u8 %rs340, [%rd554];
ld.global.u8 %rs341, [%rd554+1];
setp.eq.s16 %p1121, %rs335, 0;
mov.u32 %r7743, %r7495;
@%p1121 bra $L__BB3_981;
ld.global.u8 %r7733, [%rd31+-1];
cvt.u32.u16 %r7732, %rs335;
$L__BB3_976:
mov.u32 %r2469, %r7732;
setp.gt.u32 %p1122, %r7779, 2879;
mov.u32 %r7743, 1;
@%p1122 bra $L__BB3_981;
mov.u32 %r5571, 8;
sub.s32 %r5572, %r5571, %r7777;
sub.s32 %r5573, %r5572, %r7778;
min.u32 %r5574, %r5573, %r2469;
setp.eq.s32 %p1123, %r5574, 32;
mov.u32 %r5575, -1;
shl.b32 %r5576, %r5575, %r5574;
not.b32 %r5577, %r5576;
selp.b32 %r5578, -1, %r5577, %p1123;
and.b32 %r5579, %r5578, %r7733;
shl.b32 %r5580, %r5579, %r7778;
cvt.u16.u32 %rs832, %r5580;
or.b16 %rs1147, %rs1147, %rs832;
add.s32 %r7778, %r5574, %r7778;
sub.s32 %r7732, %r2469, %r5574;
shr.u32 %r7733, %r7733, %r5574;
setp.gt.u32 %p1124, %r5573, %r2469;
@%p1124 bra $L__BB3_980;
setp.ne.s32 %p1125, %r7777, 0;
mov.u32 %r7777, 0;
and.b16 %rs833, %rs1147, 255;
setp.ne.s16 %p1126, %rs833, 127;
and.pred %p1127, %p1125, %p1126;
@%p1127 bra $L__BB3_980;
mov.u32 %r5583, 20548;
sub.s32 %r5584, %r5583, %r7779;
cvt.u64.u32 %rd555, %r5584;
add.s64 %rd556, %rd555, %rd5;
add.s64 %rd557, %rd1, %rd556;
st.global.u8 [%rd557], %rs1147;
add.s32 %r7779, %r7779, 1;
setp.gt.u16 %p1128, %rs833, 143;
selp.u32 %r7777, 1, 0, %p1128;
mov.u32 %r7778, 0;
mov.u16 %rs1147, 0;
$L__BB3_980:
setp.ne.s32 %p1129, %r7732, 0;
mov.u32 %r7743, %r7495;
@%p1129 bra $L__BB3_976;
$L__BB3_981:
setp.eq.s16 %p1130, %rs339, 0;
mov.u32 %r7755, %r7743;
@%p1130 bra $L__BB3_988;
cvt.u32.u16 %r5585, %rs338;
and.b32 %r7745, %r5585, 255;
cvt.u32.u16 %r5586, %rs339;
and.b32 %r7744, %r5586, 255;
$L__BB3_983:
mov.u32 %r2488, %r7744;
setp.gt.u32 %p1131, %r7779, 2879;
mov.u32 %r7755, 1;
@%p1131 bra $L__BB3_988;
mov.u32 %r5588, 8;
sub.s32 %r5589, %r5588, %r7777;
sub.s32 %r5590, %r5589, %r7778;
min.u32 %r5591, %r5590, %r2488;
setp.eq.s32 %p1132, %r5591, 32;
mov.u32 %r5592, -1;
shl.b32 %r5593, %r5592, %r5591;
not.b32 %r5594, %r5593;
selp.b32 %r5595, -1, %r5594, %p1132;
and.b32 %r5596, %r5595, %r7745;
shl.b32 %r5597, %r5596, %r7778;
cvt.u16.u32 %rs837, %r5597;
or.b16 %rs1147, %rs1147, %rs837;
add.s32 %r7778, %r5591, %r7778;
sub.s32 %r7744, %r2488, %r5591;
shr.u32 %r7745, %r7745, %r5591;
setp.gt.u32 %p1133, %r5590, %r2488;
@%p1133 bra $L__BB3_987;
setp.ne.s32 %p1134, %r7777, 0;
mov.u32 %r7777, 0;
and.b16 %rs838, %rs1147, 255;
setp.ne.s16 %p1135, %rs838, 127;
and.pred %p1136, %p1134, %p1135;
@%p1136 bra $L__BB3_987;
mov.u32 %r5600, 20548;
sub.s32 %r5601, %r5600, %r7779;
cvt.u64.u32 %rd558, %r5601;
add.s64 %rd559, %rd558, %rd5;
add.s64 %rd560, %rd1, %rd559;
st.global.u8 [%rd560], %rs1147;
add.s32 %r7779, %r7779, 1;
setp.gt.u16 %p1137, %rs838, 143;
selp.u32 %r7777, 1, 0, %p1137;
mov.u32 %r7778, 0;
mov.u16 %rs1147, 0;
$L__BB3_987:
setp.ne.s32 %p1138, %r7744, 0;
mov.u32 %r7755, %r7743;
@%p1138 bra $L__BB3_983;
$L__BB3_988:
setp.eq.s16 %p1139, %rs337, 0;
mov.u32 %r7767, %r7755;
@%p1139 bra $L__BB3_995;
cvt.u32.u16 %r5602, %rs337;
and.b32 %r7756, %r5602, 255;
cvt.u32.u16 %r5603, %rs336;
and.b32 %r7757, %r5603, 255;
$L__BB3_990:
mov.u32 %r2507, %r7756;
setp.gt.u32 %p1140, %r7779, 2879;
mov.u32 %r7767, 1;
@%p1140 bra $L__BB3_995;
mov.u32 %r5605, 8;
sub.s32 %r5606, %r5605, %r7777;
sub.s32 %r5607, %r5606, %r7778;
min.u32 %r5608, %r5607, %r2507;
setp.eq.s32 %p1141, %r5608, 32;
mov.u32 %r5609, -1;
shl.b32 %r5610, %r5609, %r5608;
not.b32 %r5611, %r5610;
selp.b32 %r5612, -1, %r5611, %p1141;
and.b32 %r5613, %r5612, %r7757;
shl.b32 %r5614, %r5613, %r7778;
cvt.u16.u32 %rs842, %r5614;
or.b16 %rs1147, %rs1147, %rs842;
add.s32 %r7778, %r5608, %r7778;
sub.s32 %r7756, %r2507, %r5608;
shr.u32 %r7757, %r7757, %r5608;
setp.gt.u32 %p1142, %r5607, %r2507;
@%p1142 bra $L__BB3_994;
setp.ne.s32 %p1143, %r7777, 0;
mov.u32 %r7777, 0;
and.b16 %rs843, %rs1147, 255;
setp.ne.s16 %p1144, %rs843, 127;
and.pred %p1145, %p1143, %p1144;
@%p1145 bra $L__BB3_994;
mov.u32 %r5617, 20548;
sub.s32 %r5618, %r5617, %r7779;
cvt.u64.u32 %rd561, %r5618;
add.s64 %rd562, %rd561, %rd5;
add.s64 %rd563, %rd1, %rd562;
st.global.u8 [%rd563], %rs1147;
add.s32 %r7779, %r7779, 1;
setp.gt.u16 %p1146, %rs843, 143;
selp.u32 %r7777, 1, 0, %p1146;
mov.u32 %r7778, 0;
mov.u16 %rs1147, 0;
$L__BB3_994:
setp.ne.s32 %p1147, %r7756, 0;
mov.u32 %r7767, %r7755;
@%p1147 bra $L__BB3_990;
$L__BB3_995:
setp.eq.s16 %p1148, %rs341, 0;
mov.u32 %r7776, %r7767;
@%p1148 bra $L__BB3_1002;
cvt.u32.u16 %r5619, %rs340;
and.b32 %r7769, %r5619, 255;
cvt.u32.u16 %r5620, %rs341;
and.b32 %r7768, %r5620, 255;
$L__BB3_997:
mov.u32 %r2526, %r7768;
setp.gt.u32 %p1149, %r7779, 2879;
mov.u32 %r7776, 1;
@%p1149 bra $L__BB3_1002;
mov.u32 %r5622, 8;
sub.s32 %r5623, %r5622, %r7777;
sub.s32 %r5624, %r5623, %r7778;
min.u32 %r5625, %r5624, %r2526;
setp.eq.s32 %p1150, %r5625, 32;
mov.u32 %r5626, -1;
shl.b32 %r5627, %r5626, %r5625;
not.b32 %r5628, %r5627;
selp.b32 %r5629, -1, %r5628, %p1150;
and.b32 %r5630, %r5629, %r7769;
shl.b32 %r5631, %r5630, %r7778;
cvt.u16.u32 %rs847, %r5631;
or.b16 %rs1147, %rs1147, %rs847;
add.s32 %r7778, %r5625, %r7778;
sub.s32 %r7768, %r2526, %r5625;
shr.u32 %r7769, %r7769, %r5625;
setp.gt.u32 %p1151, %r5624, %r2526;
@%p1151 bra $L__BB3_1001;
setp.ne.s32 %p1152, %r7777, 0;
mov.u32 %r7777, 0;
and.b16 %rs848, %rs1147, 255;
setp.ne.s16 %p1153, %rs848, 127;
and.pred %p1154, %p1152, %p1153;
@%p1154 bra $L__BB3_1001;
mov.u32 %r5634, 20548;
sub.s32 %r5635, %r5634, %r7779;
cvt.u64.u32 %rd564, %r5635;
add.s64 %rd565, %rd564, %rd5;
add.s64 %rd566, %rd1, %rd565;
st.global.u8 [%rd566], %rs1147;
add.s32 %r7779, %r7779, 1;
setp.gt.u16 %p1155, %rs848, 143;
selp.u32 %r7777, 1, 0, %p1155;
mov.u32 %r7778, 0;
mov.u16 %rs1147, 0;
$L__BB3_1001:
setp.ne.s32 %p1156, %r7768, 0;
mov.u32 %r7776, %r7767;
@%p1156 bra $L__BB3_997;
bra.uni $L__BB3_1002;
$L__BB3_926:
setp.gt.s32 %p1054, %r2041, 0;
and.pred %p1056, %p1052, %p1054;
mul.lo.s32 %r2342, %r1782, 6;
@%p1056 bra $L__BB3_955;
bra.uni $L__BB3_927;
$L__BB3_955:
cvt.u64.u32 %rd534, %r2342;
cvta.to.global.u64 %rd535, %rd48;
add.s64 %rd30, %rd535, %rd534;
ld.global.u8 %rs321, [%rd30+1];
add.s32 %r5515, %r2342, 2;
cvt.u64.u32 %rd536, %r5515;
add.s64 %rd537, %rd535, %rd536;
ld.global.u8 %rs322, [%rd537];
ld.global.u8 %rs323, [%rd537+1];
setp.eq.s16 %p1095, %rs321, 0;
mov.u32 %r7711, %r7495;
@%p1095 bra $L__BB3_962;
ld.global.u8 %r7701, [%rd30];
cvt.u32.u16 %r7700, %rs321;
$L__BB3_957:
mov.u32 %r2417, %r7700;
setp.gt.u32 %p1096, %r7779, 2879;
mov.u32 %r7711, 1;
@%p1096 bra $L__BB3_962;
mov.u32 %r5517, 8;
sub.s32 %r5518, %r5517, %r7777;
sub.s32 %r5519, %r5518, %r7778;
min.u32 %r5520, %r5519, %r2417;
setp.eq.s32 %p1097, %r5520, 32;
mov.u32 %r5521, -1;
shl.b32 %r5522, %r5521, %r5520;
not.b32 %r5523, %r5522;
selp.b32 %r5524, -1, %r5523, %p1097;
and.b32 %r5525, %r5524, %r7701;
shl.b32 %r5526, %r5525, %r7778;
cvt.u16.u32 %rs819, %r5526;
or.b16 %rs1147, %rs1147, %rs819;
add.s32 %r7778, %r5520, %r7778;
sub.s32 %r7700, %r2417, %r5520;
shr.u32 %r7701, %r7701, %r5520;
setp.gt.u32 %p1098, %r5519, %r2417;
@%p1098 bra $L__BB3_961;
setp.ne.s32 %p1099, %r7777, 0;
mov.u32 %r7777, 0;
and.b16 %rs820, %rs1147, 255;
setp.ne.s16 %p1100, %rs820, 127;
and.pred %p1101, %p1099, %p1100;
@%p1101 bra $L__BB3_961;
mov.u32 %r5529, 20548;
sub.s32 %r5530, %r5529, %r7779;
cvt.u64.u32 %rd538, %r5530;
add.s64 %rd539, %rd538, %rd5;
add.s64 %rd540, %rd1, %rd539;
st.global.u8 [%rd540], %rs1147;
add.s32 %r7779, %r7779, 1;
setp.gt.u16 %p1102, %rs820, 143;
selp.u32 %r7777, 1, 0, %p1102;
mov.u32 %r7778, 0;
mov.u16 %rs1147, 0;
$L__BB3_961:
setp.ne.s32 %p1103, %r7700, 0;
mov.u32 %r7711, %r7495;
@%p1103 bra $L__BB3_957;
$L__BB3_962:
add.s32 %r7713, %r2041, -1;
cvt.u32.u16 %r5532, %rs323;
and.b32 %r7724, %r5532, 255;
cvt.u32.u16 %r5533, %rs322;
and.b32 %r7725, %r5533, 255;
mov.u32 %r5531, 1;
mov.u32 %r7712, %r5531;
$L__BB3_963:
mov.u32 %r2437, %r7712;
setp.gt.u32 %p1104, %r7779, 2879;
mov.u32 %r7723, %r5531;
@%p1104 bra $L__BB3_968;
mov.u32 %r5535, 8;
sub.s32 %r5536, %r5535, %r7777;
sub.s32 %r5537, %r5536, %r7778;
min.u32 %r5538, %r5537, %r2437;
setp.eq.s32 %p1105, %r5538, 32;
mov.u32 %r5539, -1;
shl.b32 %r5540, %r5539, %r5538;
not.b32 %r5541, %r5540;
selp.b32 %r5542, -1, %r5541, %p1105;
and.b32 %r5543, %r5542, %r7713;
shl.b32 %r5544, %r5543, %r7778;
cvt.u16.u32 %rs823, %r5544;
or.b16 %rs1147, %rs1147, %rs823;
add.s32 %r7778, %r5538, %r7778;
sub.s32 %r7712, %r2437, %r5538;
shr.u32 %r7713, %r7713, %r5538;
setp.gt.u32 %p1106, %r5537, %r2437;
@%p1106 bra $L__BB3_967;
setp.ne.s32 %p1107, %r7777, 0;
mov.u32 %r7777, 0;
and.b16 %rs824, %rs1147, 255;
setp.ne.s16 %p1108, %rs824, 127;
and.pred %p1109, %p1107, %p1108;
@%p1109 bra $L__BB3_967;
mov.u32 %r5547, 20548;
sub.s32 %r5548, %r5547, %r7779;
cvt.u64.u32 %rd541, %r5548;
add.s64 %rd542, %rd541, %rd5;
add.s64 %rd543, %rd1, %rd542;
st.global.u8 [%rd543], %rs1147;
add.s32 %r7779, %r7779, 1;
setp.gt.u16 %p1110, %rs824, 143;
selp.u32 %r7777, 1, 0, %p1110;
mov.u32 %r7778, 0;
mov.u16 %rs1147, 0;
$L__BB3_967:
setp.ne.s32 %p1111, %r7712, 0;
mov.u32 %r7723, %r7711;
@%p1111 bra $L__BB3_963;
$L__BB3_968:
setp.eq.s16 %p1112, %rs323, 0;
mov.u32 %r7776, %r7723;
@%p1112 bra $L__BB3_1002;
$L__BB3_969:
mov.u32 %r2454, %r7724;
setp.gt.u32 %p1113, %r7779, 2879;
mov.u32 %r7776, 1;
@%p1113 bra $L__BB3_1002;
mov.u32 %r5550, 8;
sub.s32 %r5551, %r5550, %r7777;
sub.s32 %r5552, %r5551, %r7778;
min.u32 %r5553, %r5552, %r2454;
setp.eq.s32 %p1114, %r5553, 32;
mov.u32 %r5554, -1;
shl.b32 %r5555, %r5554, %r5553;
not.b32 %r5556, %r5555;
selp.b32 %r5557, -1, %r5556, %p1114;
and.b32 %r5558, %r5557, %r7725;
shl.b32 %r5559, %r5558, %r7778;
cvt.u16.u32 %rs828, %r5559;
or.b16 %rs1147, %rs1147, %rs828;
add.s32 %r7778, %r5553, %r7778;
sub.s32 %r7724, %r2454, %r5553;
shr.u32 %r7725, %r7725, %r5553;
setp.gt.u32 %p1115, %r5552, %r2454;
@%p1115 bra $L__BB3_973;
setp.ne.s32 %p1116, %r7777, 0;
mov.u32 %r7777, 0;
and.b16 %rs829, %rs1147, 255;
setp.ne.s16 %p1117, %rs829, 127;
and.pred %p1118, %p1116, %p1117;
@%p1118 bra $L__BB3_973;
mov.u32 %r5562, 20548;
sub.s32 %r5563, %r5562, %r7779;
cvt.u64.u32 %rd544, %r5563;
add.s64 %rd545, %rd544, %rd5;
add.s64 %rd546, %rd1, %rd545;
st.global.u8 [%rd546], %rs1147;
add.s32 %r7779, %r7779, 1;
setp.gt.u16 %p1119, %rs829, 143;
selp.u32 %r7777, 1, 0, %p1119;
mov.u32 %r7778, 0;
mov.u16 %rs1147, 0;
$L__BB3_973:
setp.eq.s32 %p1120, %r7724, 0;
mov.u32 %r7776, %r7723;
@%p1120 bra $L__BB3_1002;
bra.uni $L__BB3_969;
$L__BB3_927:
setp.gt.s32 %p1058, %r1782, 0;
selp.b32 %r5444, %r2342, 0, %p1058;
cvt.u64.u32 %rd514, %r5444;
cvta.to.global.u64 %rd515, %rd48;
add.s64 %rd29, %rd515, %rd514;
ld.global.u8 %rs299, [%rd29+1];
add.s32 %r5445, %r5444, 2;
cvt.u64.u32 %rd516, %r5445;
add.s64 %rd517, %rd515, %rd516;
ld.global.u8 %rs300, [%rd517];
ld.global.u8 %rs301, [%rd517+1];
mul.lo.s32 %r5446, %r2041, 6;
selp.b32 %r5447, %r5446, 0, %p1054;
cvt.u64.u32 %rd518, %r5447;
add.s64 %rd519, %rd515, %rd518;
ld.global.u8 %rs302, [%rd519];
ld.global.u8 %rs303, [%rd519+1];
add.s32 %r5448, %r5447, 2;
cvt.u64.u32 %rd520, %r5448;
add.s64 %rd521, %rd515, %rd520;
ld.global.u8 %rs304, [%rd521];
ld.global.u8 %rs305, [%rd521+1];
setp.eq.s16 %p1059, %rs299, 0;
mov.u32 %r7667, %r7495;
@%p1059 bra $L__BB3_934;
ld.global.u8 %r7657, [%rd29];
cvt.u32.u16 %r7656, %rs299;
$L__BB3_929:
mov.u32 %r2345, %r7656;
setp.gt.u32 %p1060, %r7779, 2879;
mov.u32 %r7667, 1;
@%p1060 bra $L__BB3_934;
mov.u32 %r5450, 8;
sub.s32 %r5451, %r5450, %r7777;
sub.s32 %r5452, %r5451, %r7778;
min.u32 %r5453, %r5452, %r2345;
setp.eq.s32 %p1061, %r5453, 32;
mov.u32 %r5454, -1;
shl.b32 %r5455, %r5454, %r5453;
not.b32 %r5456, %r5455;
selp.b32 %r5457, -1, %r5456, %p1061;
and.b32 %r5458, %r5457, %r7657;
shl.b32 %r5459, %r5458, %r7778;
cvt.u16.u32 %rs800, %r5459;
or.b16 %rs1147, %rs1147, %rs800;
add.s32 %r7778, %r5453, %r7778;
sub.s32 %r7656, %r2345, %r5453;
shr.u32 %r7657, %r7657, %r5453;
setp.gt.u32 %p1062, %r5452, %r2345;
@%p1062 bra $L__BB3_933;
setp.ne.s32 %p1063, %r7777, 0;
mov.u32 %r7777, 0;
and.b16 %rs801, %rs1147, 255;
setp.ne.s16 %p1064, %rs801, 127;
and.pred %p1065, %p1063, %p1064;
@%p1065 bra $L__BB3_933;
mov.u32 %r5462, 20548;
sub.s32 %r5463, %r5462, %r7779;
cvt.u64.u32 %rd522, %r5463;
add.s64 %rd523, %rd522, %rd5;
add.s64 %rd524, %rd1, %rd523;
st.global.u8 [%rd524], %rs1147;
add.s32 %r7779, %r7779, 1;
setp.gt.u16 %p1066, %rs801, 143;
selp.u32 %r7777, 1, 0, %p1066;
mov.u32 %r7778, 0;
mov.u16 %rs1147, 0;
$L__BB3_933:
setp.ne.s32 %p1067, %r7656, 0;
mov.u32 %r7667, %r7495;
@%p1067 bra $L__BB3_929;
$L__BB3_934:
setp.eq.s16 %p1068, %rs303, 0;
mov.u32 %r7679, %r7667;
@%p1068 bra $L__BB3_941;
cvt.u32.u16 %r5464, %rs302;
and.b32 %r7669, %r5464, 255;
cvt.u32.u16 %r5465, %rs303;
and.b32 %r7668, %r5465, 255;
$L__BB3_936:
mov.u32 %r2364, %r7668;
setp.gt.u32 %p1069, %r7779, 2879;
mov.u32 %r7679, 1;
@%p1069 bra $L__BB3_941;
mov.u32 %r5467, 8;
sub.s32 %r5468, %r5467, %r7777;
sub.s32 %r5469, %r5468, %r7778;
min.u32 %r5470, %r5469, %r2364;
setp.eq.s32 %p1070, %r5470, 32;
mov.u32 %r5471, -1;
shl.b32 %r5472, %r5471, %r5470;
not.b32 %r5473, %r5472;
selp.b32 %r5474, -1, %r5473, %p1070;
and.b32 %r5475, %r5474, %r7669;
shl.b32 %r5476, %r5475, %r7778;
cvt.u16.u32 %rs805, %r5476;
or.b16 %rs1147, %rs1147, %rs805;
add.s32 %r7778, %r5470, %r7778;
sub.s32 %r7668, %r2364, %r5470;
shr.u32 %r7669, %r7669, %r5470;
setp.gt.u32 %p1071, %r5469, %r2364;
@%p1071 bra $L__BB3_940;
setp.ne.s32 %p1072, %r7777, 0;
mov.u32 %r7777, 0;
and.b16 %rs806, %rs1147, 255;
setp.ne.s16 %p1073, %rs806, 127;
and.pred %p1074, %p1072, %p1073;
@%p1074 bra $L__BB3_940;
mov.u32 %r5479, 20548;
sub.s32 %r5480, %r5479, %r7779;
cvt.u64.u32 %rd525, %r5480;
add.s64 %rd526, %rd525, %rd5;
add.s64 %rd527, %rd1, %rd526;
st.global.u8 [%rd527], %rs1147;
add.s32 %r7779, %r7779, 1;
setp.gt.u16 %p1075, %rs806, 143;
selp.u32 %r7777, 1, 0, %p1075;
mov.u32 %r7778, 0;
mov.u16 %rs1147, 0;
$L__BB3_940:
setp.ne.s32 %p1076, %r7668, 0;
mov.u32 %r7679, %r7667;
@%p1076 bra $L__BB3_936;
$L__BB3_941:
setp.eq.s16 %p1077, %rs301, 0;
mov.u32 %r7691, %r7679;
@%p1077 bra $L__BB3_948;
cvt.u32.u16 %r5481, %rs301;
and.b32 %r7680, %r5481, 255;
cvt.u32.u16 %r5482, %rs300;
and.b32 %r7681, %r5482, 255;
$L__BB3_943:
mov.u32 %r2383, %r7680;
setp.gt.u32 %p1078, %r7779, 2879;
mov.u32 %r7691, 1;
@%p1078 bra $L__BB3_948;
mov.u32 %r5484, 8;
sub.s32 %r5485, %r5484, %r7777;
sub.s32 %r5486, %r5485, %r7778;
min.u32 %r5487, %r5486, %r2383;
setp.eq.s32 %p1079, %r5487, 32;
mov.u32 %r5488, -1;
shl.b32 %r5489, %r5488, %r5487;
not.b32 %r5490, %r5489;
selp.b32 %r5491, -1, %r5490, %p1079;
and.b32 %r5492, %r5491, %r7681;
shl.b32 %r5493, %r5492, %r7778;
cvt.u16.u32 %rs810, %r5493;
or.b16 %rs1147, %rs1147, %rs810;
add.s32 %r7778, %r5487, %r7778;
sub.s32 %r7680, %r2383, %r5487;
shr.u32 %r7681, %r7681, %r5487;
setp.gt.u32 %p1080, %r5486, %r2383;
@%p1080 bra $L__BB3_947;
setp.ne.s32 %p1081, %r7777, 0;
mov.u32 %r7777, 0;
and.b16 %rs811, %rs1147, 255;
setp.ne.s16 %p1082, %rs811, 127;
and.pred %p1083, %p1081, %p1082;
@%p1083 bra $L__BB3_947;
mov.u32 %r5496, 20548;
sub.s32 %r5497, %r5496, %r7779;
cvt.u64.u32 %rd528, %r5497;
add.s64 %rd529, %rd528, %rd5;
add.s64 %rd530, %rd1, %rd529;
st.global.u8 [%rd530], %rs1147;
add.s32 %r7779, %r7779, 1;
setp.gt.u16 %p1084, %rs811, 143;
selp.u32 %r7777, 1, 0, %p1084;
mov.u32 %r7778, 0;
mov.u16 %rs1147, 0;
$L__BB3_947:
setp.ne.s32 %p1085, %r7680, 0;
mov.u32 %r7691, %r7679;
@%p1085 bra $L__BB3_943;
$L__BB3_948:
setp.eq.s16 %p1086, %rs305, 0;
mov.u32 %r7776, %r7691;
@%p1086 bra $L__BB3_1002;
cvt.u32.u16 %r5498, %rs304;
and.b32 %r7693, %r5498, 255;
cvt.u32.u16 %r5499, %rs305;
and.b32 %r7692, %r5499, 255;
$L__BB3_950:
mov.u32 %r2402, %r7692;
setp.gt.u32 %p1087, %r7779, 2879;
mov.u32 %r7776, 1;
@%p1087 bra $L__BB3_1002;
mov.u32 %r5501, 8;
sub.s32 %r5502, %r5501, %r7777;
sub.s32 %r5503, %r5502, %r7778;
min.u32 %r5504, %r5503, %r2402;
setp.eq.s32 %p1088, %r5504, 32;
mov.u32 %r5505, -1;
shl.b32 %r5506, %r5505, %r5504;
not.b32 %r5507, %r5506;
selp.b32 %r5508, -1, %r5507, %p1088;
and.b32 %r5509, %r5508, %r7693;
shl.b32 %r5510, %r5509, %r7778;
cvt.u16.u32 %rs815, %r5510;
or.b16 %rs1147, %rs1147, %rs815;
add.s32 %r7778, %r5504, %r7778;
sub.s32 %r7692, %r2402, %r5504;
shr.u32 %r7693, %r7693, %r5504;
setp.gt.u32 %p1089, %r5503, %r2402;
@%p1089 bra $L__BB3_954;
setp.ne.s32 %p1090, %r7777, 0;
mov.u32 %r7777, 0;
and.b16 %rs816, %rs1147, 255;
setp.ne.s16 %p1091, %rs816, 127;
and.pred %p1092, %p1090, %p1091;
@%p1092 bra $L__BB3_954;
mov.u32 %r5513, 20548;
sub.s32 %r5514, %r5513, %r7779;
cvt.u64.u32 %rd531, %r5514;
add.s64 %rd532, %rd531, %rd5;
add.s64 %rd533, %rd1, %rd532;
st.global.u8 [%rd533], %rs1147;
add.s32 %r7779, %r7779, 1;
setp.gt.u16 %p1093, %rs816, 143;
selp.u32 %r7777, 1, 0, %p1093;
mov.u32 %r7778, 0;
mov.u16 %rs1147, 0;
$L__BB3_954:
setp.eq.s32 %p1094, %r7692, 0;
mov.u32 %r7776, %r7691;
@%p1094 bra $L__BB3_1002;
bra.uni $L__BB3_950;
$L__BB3_1002:
add.s64 %rd687, %rd687, 16;
shr.u32 %r5636, %r7472, 1;
or.b32 %r7306, %r5636, %r2158;
add.s32 %r7305, %r7305, 4;
setp.lt.u32 %p1157, %r7305, 64;
@%p1157 bra $L__BB3_675;
ld.param.u64 %rd685, [ j2k_htj2k_encode_codeblocks_multi_input_cleanup_param_3];
mov.u16 %rs851, 0;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val+33], %rs851;
add.s64 %rd33, %rd4, 128;
cvta.to.global.u64 %rd34, %rd48;
cvta.to.global.u64 %rd35, %rd685;
mov.u32 %r7780, 2;
mov.u64 %rd688, 0;
$L__BB3_1004:
shl.b64 %rd568, %rd688, 7;
add.s64 %rd569, %rd33, %rd568;
shl.b64 %rd570, %rd569, 2;
add.s64 %rd689, %rd3, %rd570;
ld.shared.u8 %rs1152, [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val+1];
mov.u32 %r5639, 0;
ld.shared.u8 %rs854, [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val];
max.u16 %rs1154, %rs854, %rs1152;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val], %rs851;
ld.shared.u8 %r5640, [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE14cleanup_cx_val];
ld.shared.u8 %rs1150, [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE14cleanup_cx_val+1];
mul.wide.u16 %r5641, %rs1150, 4;
add.s32 %r7798, %r5641, %r5640;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE14cleanup_cx_val], %rs851;
mov.u16 %rs1151, %rs851;
mov.u16 %rs1153, %rs851;
mov.u32 %r7796, %r5639;
mov.u32 %r7797, %r5639;
bra.uni $L__BB3_1005;
$L__BB3_1072:
setp.gt.u32 %p1233, %r7382, 191;
mov.u32 %r7388, 0;
mov.u32 %r7886, 1;
@%p1233 bra $L__BB3_1074;
and.b16 %rs882, %rs1089, 255;
st.global.u8 [%rd39], %rs1089;
add.s32 %r7382, %r7382, 1;
setp.eq.s16 %p1234, %rs882, 255;
selp.b32 %r7388, 7, 8, %p1234;
mov.u16 %rs1089, 0;
mov.u32 %r7886, %r7540;
bra.uni $L__BB3_1074;
$L__BB3_1173:
setp.gt.u32 %p1348, %r7382, 191;
mov.u32 %r7388, 0;
mov.u32 %r8028, 1;
@%p1348 bra $L__BB3_1175;
and.b16 %rs917, %rs1089, 255;
st.global.u8 [%rd40], %rs1089;
add.s32 %r7382, %r7382, 1;
setp.eq.s16 %p1349, %rs917, 255;
selp.b32 %r7388, 7, 8, %p1349;
mov.u16 %rs1089, 0;
mov.u32 %r8028, %r7540;
bra.uni $L__BB3_1175;
$L__BB3_1005:
mov.u32 %r2563, %r7797;
ld.global.u32 %r2580, [%rd689];
setp.eq.s32 %p1158, %r2580, 0;
mov.u32 %r7814, %r5639;
@%p1158 bra $L__BB3_1007;
and.b32 %r5643, %r2580, -2147483648;
abs.s32 %r5644, %r2580;
shl.b32 %r5645, %r5644, %r1719;
or.b32 %r7814, %r5645, %r5643;
$L__BB3_1007:
shl.b32 %r5649, %r7814, 1;
shr.u32 %r5650, %r5649, %r1719;
and.b32 %r2583, %r5650, -2;
setp.eq.s32 %p1159, %r2583, 0;
mov.u32 %r7818, 0;
mov.u32 %r7815, %r7818;
mov.u32 %r7816, %r7818;
mov.u32 %r7822, %r7818;
@%p1159 bra $L__BB3_1009;
add.s32 %r5652, %r2583, -1;
clz.b32 %r5653, %r5652;
mov.u32 %r5654, 32;
sub.s32 %r7815, %r5654, %r5653;
shr.u32 %r5655, %r7814, 31;
add.s32 %r5656, %r5655, %r2583;
add.s32 %r7816, %r5656, -2;
mov.u32 %r7822, 1;
$L__BB3_1009:
ld.global.u32 %r2589, [%rd689+256];
setp.eq.s32 %p1160, %r2589, 0;
@%p1160 bra $L__BB3_1011;
and.b32 %r5658, %r2589, -2147483648;
abs.s32 %r5659, %r2589;
shl.b32 %r5660, %r5659, %r1719;
or.b32 %r7818, %r5660, %r5658;
$L__BB3_1011:
shl.b32 %r5663, %r7818, 1;
shr.u32 %r5664, %r5663, %r1719;
and.b32 %r2592, %r5664, -2;
setp.eq.s32 %p1161, %r2592, 0;
mov.u32 %r7823, 0;
mov.u32 %r7819, %r7823;
mov.u32 %r7820, %r7823;
mov.u32 %r7826, %r7815;
@%p1161 bra $L__BB3_1013;
or.b32 %r7822, %r7822, 2;
add.s32 %r5665, %r2592, -1;
clz.b32 %r5666, %r5665;
mov.u32 %r5667, 32;
sub.s32 %r7819, %r5667, %r5666;
max.s32 %r7826, %r7815, %r7819;
shr.u32 %r5668, %r7818, 31;
add.s32 %r5669, %r5668, %r2592;
add.s32 %r7820, %r5669, -2;
$L__BB3_1013:
ld.global.u32 %r2601, [%rd689+4];
setp.eq.s32 %p1162, %r2601, 0;
@%p1162 bra $L__BB3_1015;
and.b32 %r5671, %r2601, -2147483648;
abs.s32 %r5672, %r2601;
shl.b32 %r5673, %r5672, %r1719;
or.b32 %r7823, %r5673, %r5671;
$L__BB3_1015:
shl.b32 %r5676, %r7823, 1;
shr.u32 %r5677, %r5676, %r1719;
and.b32 %r2604, %r5677, -2;
setp.eq.s32 %p1163, %r2604, 0;
mov.u32 %r7828, 0;
mov.u32 %r7824, %r7828;
mov.u32 %r7825, %r7828;
@%p1163 bra $L__BB3_1017;
or.b32 %r7822, %r7822, 4;
add.s32 %r5678, %r2604, -1;
clz.b32 %r5679, %r5678;
mov.u32 %r5680, 32;
sub.s32 %r7824, %r5680, %r5679;
max.s32 %r7826, %r7826, %r7824;
shr.u32 %r5681, %r7823, 31;
add.s32 %r5682, %r5681, %r2604;
add.s32 %r7825, %r5682, -2;
$L__BB3_1017:
ld.global.u32 %r2613, [%rd689+260];
setp.eq.s32 %p1164, %r2613, 0;
@%p1164 bra $L__BB3_1019;
and.b32 %r5684, %r2613, -2147483648;
abs.s32 %r5685, %r2613;
shl.b32 %r5686, %r5685, %r1719;
or.b32 %r7828, %r5686, %r5684;
$L__BB3_1019:
shl.b32 %r5689, %r7828, 1;
shr.u32 %r5690, %r5689, %r1719;
and.b32 %r2616, %r5690, -2;
setp.eq.s32 %p1165, %r2616, 0;
mov.u32 %r7833, 0;
mov.u32 %r7829, %r7833;
mov.u32 %r7830, %r7833;
@%p1165 bra $L__BB3_1021;
or.b32 %r7822, %r7822, 8;
add.s32 %r5691, %r2616, -1;
clz.b32 %r5692, %r5691;
mov.u32 %r5693, 32;
sub.s32 %r7829, %r5693, %r5692;
max.s32 %r7826, %r7826, %r7829;
shr.u32 %r5694, %r7828, 31;
add.s32 %r5695, %r5694, %r2616;
add.s32 %r7830, %r5695, -2;
$L__BB3_1021:
add.s32 %r5697, %r7822, -1;
and.b32 %r5698, %r5697, %r7822;
setp.ne.s32 %p1166, %r5698, 0;
and.b16 %rs855, %rs1154, 255;
setp.gt.u16 %p1167, %rs855, 2;
and.pred %p1168, %p1167, %p1166;
cvt.u32.u16 %r5699, %rs1154;
and.b32 %r5700, %r5699, 255;
add.s32 %r5701, %r5700, -1;
selp.b32 %r5702, %r5701, 1, %p1168;
max.s32 %r2625, %r5702, %r7826;
sub.s32 %r2626, %r2625, %r5702;
setp.lt.s32 %p1169, %r2626, 1;
@%p1169 bra $L__BB3_1023;
setp.eq.s32 %p1170, %r7815, %r7826;
selp.u32 %r5703, 1, 0, %p1170;
setp.eq.s32 %p1171, %r7819, %r7826;
selp.u32 %r5704, -1, 0, %p1171;
bfi.b32 %r5705, %r5704, %r5703, 1, 1;
setp.eq.s32 %p1172, %r7824, %r7826;
selp.u16 %rs856, 1, 0, %p1172;
mul.wide.u16 %r5706, %rs856, 4;
or.b32 %r5707, %r5705, %r5706;
setp.eq.s32 %p1173, %r7829, %r7826;
selp.u16 %rs857, 1, 0, %p1173;
mul.wide.u16 %r5708, %rs857, 8;
or.b32 %r7833, %r5707, %r5708;
$L__BB3_1023:
shl.b32 %r5709, %r7822, 4;
shl.b32 %r5710, %r7798, 8;
or.b32 %r5711, %r5709, %r5710;
or.b32 %r5712, %r5711, %r7833;
mul.wide.u32 %rd571, %r5712, 2;
add.s64 %rd572, %rd35, %rd571;
ld.global.u16 %rs370, [%rd572];
shr.u16 %rs858, %rs370, 4;
and.b16 %rs371, %rs858, 7;
setp.eq.s16 %p1174, %rs371, 0;
mov.u32 %r7845, %r7776;
@%p1174 bra $L__BB3_1030;
cvt.u32.u16 %r7834, %rs371;
shr.u16 %rs859, %rs370, 8;
cvt.u32.u16 %r7835, %rs859;
$L__BB3_1025:
mov.u32 %r2631, %r7834;
setp.gt.u32 %p1175, %r7779, 2879;
mov.u32 %r7845, 1;
@%p1175 bra $L__BB3_1030;
mov.u32 %r5714, 8;
sub.s32 %r5715, %r5714, %r7777;
sub.s32 %r5716, %r5715, %r7778;
min.u32 %r5717, %r5716, %r2631;
setp.eq.s32 %p1176, %r5717, 32;
mov.u32 %r5718, -1;
shl.b32 %r5719, %r5718, %r5717;
not.b32 %r5720, %r5719;
selp.b32 %r5721, -1, %r5720, %p1176;
and.b32 %r5722, %r5721, %r7835;
shl.b32 %r5723, %r5722, %r7778;
cvt.u16.u32 %rs860, %r5723;
or.b16 %rs1147, %rs1147, %rs860;
add.s32 %r7778, %r5717, %r7778;
sub.s32 %r7834, %r2631, %r5717;
shr.u32 %r7835, %r7835, %r5717;
setp.gt.u32 %p1177, %r5716, %r2631;
@%p1177 bra $L__BB3_1029;
setp.ne.s32 %p1178, %r7777, 0;
mov.u32 %r7777, 0;
and.b16 %rs861, %rs1147, 255;
setp.ne.s16 %p1179, %rs861, 127;
and.pred %p1180, %p1178, %p1179;
@%p1180 bra $L__BB3_1029;
mov.u32 %r5726, 20548;
sub.s32 %r5727, %r5726, %r7779;
cvt.u64.u32 %rd573, %r5727;
add.s64 %rd574, %rd573, %rd5;
add.s64 %rd575, %rd1, %rd574;
st.global.u8 [%rd575], %rs1147;
add.s32 %r7779, %r7779, 1;
setp.gt.u16 %p1181, %rs861, 143;
selp.u32 %r7777, 1, 0, %p1181;
mov.u32 %r7778, 0;
mov.u16 %rs1147, 0;
$L__BB3_1029:
setp.ne.s32 %p1182, %r7834, 0;
mov.u32 %r7845, %r7776;
@%p1182 bra $L__BB3_1025;
$L__BB3_1030:
setp.ne.s32 %p1183, %r7798, 0;
@%p1183 bra $L__BB3_1078;
setp.eq.s32 %p1184, %r7822, 0;
add.s32 %r5728, %r7382, 17477;
cvt.u64.u32 %rd576, %r5728;
add.s64 %rd577, %rd576, %rd5;
add.s64 %rd39, %rd1, %rd577;
@%p1184 bra $L__BB3_1070;
shl.b16 %rs1089, %rs1089, 1;
add.s32 %r7388, %r7388, -1;
setp.ne.s32 %p1185, %r7388, 0;
mov.u32 %r7879, %r7540;
@%p1185 bra $L__BB3_1035;
setp.gt.u32 %p1186, %r7382, 191;
mov.u32 %r7388, 0;
mov.u32 %r7879, 1;
@%p1186 bra $L__BB3_1035;
st.global.u8 [%rd39], %rs1089;
add.s32 %r7382, %r7382, 1;
mov.u32 %r7388, 8;
mov.u16 %rs1089, 0;
mov.u32 %r7879, %r7540;
$L__BB3_1035:
setp.lt.u32 %p1187, %r7542, 3;
mov.u32 %r7849, 0;
@%p1187 bra $L__BB3_1038;
setp.lt.u32 %p1188, %r7542, 6;
mov.u32 %r7849, 1;
@%p1188 bra $L__BB3_1038;
setp.lt.u32 %p1189, %r7542, 9;
setp.eq.s32 %p1190, %r7542, 11;
selp.b32 %r5734, 4, 5, %p1190;
setp.lt.u32 %p1191, %r7542, 11;
selp.b32 %r5735, 3, %r5734, %p1191;
selp.b32 %r7849, 2, %r5735, %p1189;
$L__BB3_1038:
setp.eq.s32 %p1192, %r7849, 0;
@%p1192 bra $L__BB3_1066;
add.s32 %r2655, %r7849, -1;
and.b32 %r2656, %r7849, 3;
setp.eq.s32 %p1193, %r2656, 0;
mov.u32 %r7859, %r7849;
mov.u32 %r7862, %r7879;
@%p1193 bra $L__BB3_1051;
mov.u32 %r5737, 1;
shl.b32 %r5738, %r5737, %r2655;
and.b32 %r5739, %r5738, %r7543;
setp.ne.s32 %p1194, %r5739, 0;
selp.u32 %r5740, 1, 0, %p1194;
cvt.u32.u16 %r5741, %rs1089;
bfi.b32 %r5742, %r5741, %r5740, 1, 8;
cvt.u16.u32 %rs1089, %r5742;
add.s32 %r7388, %r7388, -1;
setp.ne.s32 %p1195, %r7388, 0;
mov.u32 %r7862, %r7879;
@%p1195 bra $L__BB3_1043;
setp.gt.u32 %p1196, %r7382, 191;
mov.u32 %r7388, 0;
mov.u32 %r7862, %r5737;
@%p1196 bra $L__BB3_1043;
add.s32 %r5746, %r7382, 17477;
cvt.u64.u32 %rd578, %r5746;
add.s64 %rd579, %rd578, %rd5;
add.s64 %rd580, %rd1, %rd579;
st.global.u8 [%rd580], %rs1089;
add.s32 %r7382, %r7382, 1;
mov.u32 %r7388, 8;
mov.u16 %rs1089, 0;
mov.u32 %r7862, %r7879;
$L__BB3_1043:
setp.eq.s32 %p1197, %r2656, 1;
mov.u32 %r7879, %r7862;
mov.u32 %r7859, %r2655;
@%p1197 bra $L__BB3_1051;
add.s32 %r7859, %r7849, -2;
mov.u32 %r5747, 1;
shl.b32 %r5748, %r5747, %r7859;
and.b32 %r5749, %r5748, %r7543;
setp.ne.s32 %p1198, %r5749, 0;
selp.u32 %r5750, 1, 0, %p1198;
cvt.u32.u16 %r5751, %rs1089;
bfi.b32 %r5752, %r5751, %r5750, 1, 8;
cvt.u16.u32 %rs1089, %r5752;
add.s32 %r7388, %r7388, -1;
setp.ne.s32 %p1199, %r7388, 0;
mov.u32 %r7853, %r7862;
@%p1199 bra $L__BB3_1047;
setp.gt.u32 %p1200, %r7382, 191;
mov.u32 %r7388, 0;
mov.u32 %r7853, %r5747;
@%p1200 bra $L__BB3_1047;
add.s32 %r5755, %r7382, 17477;
cvt.u64.u32 %rd581, %r5755;
add.s64 %rd582, %rd581, %rd5;
add.s64 %rd583, %rd1, %rd582;
and.b16 %rs868, %rs1089, 255;
st.global.u8 [%rd583], %rs1089;
add.s32 %r7382, %r7382, 1;
setp.eq.s16 %p1201, %rs868, 255;
selp.b32 %r7388, 7, 8, %p1201;
mov.u16 %rs1089, 0;
mov.u32 %r7853, %r7862;
$L__BB3_1047:
setp.eq.s32 %p1202, %r2656, 2;
mov.u32 %r7879, %r7853;
mov.u32 %r7862, %r7853;
@%p1202 bra $L__BB3_1051;
add.s32 %r7859, %r7849, -3;
mov.u32 %r5756, 1;
shl.b32 %r5757, %r5756, %r7859;
and.b32 %r5758, %r5757, %r7543;
setp.ne.s32 %p1203, %r5758, 0;
selp.u32 %r5759, 1, 0, %p1203;
cvt.u32.u16 %r5760, %rs1089;
bfi.b32 %r5761, %r5760, %r5759, 1, 8;
cvt.u16.u32 %rs1089, %r5761;
add.s32 %r7388, %r7388, -1;
setp.ne.s32 %p1204, %r7388, 0;
mov.u32 %r7879, %r7853;
mov.u32 %r7862, %r7853;
@%p1204 bra $L__BB3_1051;
setp.gt.u32 %p1205, %r7382, 191;
mov.u32 %r7388, 0;
mov.u32 %r7879, %r5756;
mov.u32 %r7862, %r5756;
@%p1205 bra $L__BB3_1051;
add.s32 %r5766, %r7382, 17477;
cvt.u64.u32 %rd584, %r5766;
add.s64 %rd585, %rd584, %rd5;
add.s64 %rd586, %rd1, %rd585;
and.b16 %rs871, %rs1089, 255;
st.global.u8 [%rd586], %rs1089;
add.s32 %r7382, %r7382, 1;
setp.eq.s16 %p1206, %rs871, 255;
selp.b32 %r7388, 7, 8, %p1206;
mov.u16 %rs1089, 0;
mov.u32 %r7879, %r7853;
mov.u32 %r7862, %r7853;
$L__BB3_1051:
setp.lt.u32 %p1207, %r2655, 3;
@%p1207 bra $L__BB3_1066;
mov.u32 %r7879, %r7862;
$L__BB3_1053:
add.s32 %r5767, %r7859, -1;
mov.u32 %r5768, 1;
shl.b32 %r5769, %r5768, %r5767;
and.b32 %r5770, %r5769, %r7543;
setp.ne.s32 %p1208, %r5770, 0;
selp.u32 %r5771, 1, 0, %p1208;
cvt.u32.u16 %r5772, %rs1089;
bfi.b32 %r7868, %r5772, %r5771, 1, 8;
add.s32 %r7869, %r7388, -1;
setp.ne.s32 %p1209, %r7869, 0;
mov.u32 %r7867, %r7879;
@%p1209 bra $L__BB3_1056;
setp.gt.u32 %p1210, %r7382, 191;
mov.u32 %r7869, 0;
mov.u32 %r7867, %r5768;
@%p1210 bra $L__BB3_1056;
cvt.u16.u32 %rs872, %r7868;
and.b16 %rs873, %rs872, 255;
add.s32 %r5776, %r7382, 17477;
cvt.u64.u32 %rd587, %r5776;
add.s64 %rd588, %rd587, %rd5;
add.s64 %rd589, %rd1, %rd588;
st.global.u8 [%rd589], %rs872;
add.s32 %r7382, %r7382, 1;
setp.eq.s16 %p1211, %rs873, 255;
selp.b32 %r7869, 7, 8, %p1211;
mov.u32 %r7868, 0;
mov.u32 %r7867, %r7879;
$L__BB3_1056:
add.s32 %r5777, %r7859, -2;
shl.b32 %r5779, %r5768, %r5777;
and.b32 %r5780, %r5779, %r7543;
setp.ne.s32 %p1212, %r5780, 0;
and.b32 %r5781, %r7868, 127;
selp.u32 %r5782, 1, 0, %p1212;
bfi.b32 %r7872, %r5781, %r5782, 1, 7;
add.s32 %r7873, %r7869, -1;
setp.ne.s32 %p1213, %r7873, 0;
mov.u32 %r7871, %r7867;
@%p1213 bra $L__BB3_1059;
setp.gt.u32 %p1214, %r7382, 191;
mov.u32 %r7873, 0;
mov.u32 %r7871, 1;
@%p1214 bra $L__BB3_1059;
cvt.u16.u32 %rs874, %r7872;
and.b16 %rs875, %rs874, 255;
add.s32 %r5786, %r7382, 17477;
cvt.u64.u32 %rd590, %r5786;
add.s64 %rd591, %rd590, %rd5;
add.s64 %rd592, %rd1, %rd591;
st.global.u8 [%rd592], %rs874;
add.s32 %r7382, %r7382, 1;
setp.eq.s16 %p1215, %rs875, 255;
selp.b32 %r7873, 7, 8, %p1215;
mov.u32 %r7872, 0;
mov.u32 %r7871, %r7867;
$L__BB3_1059:
add.s32 %r5787, %r7859, -3;
mov.u32 %r5788, 1;
shl.b32 %r5789, %r5788, %r5787;
and.b32 %r5790, %r5789, %r7543;
setp.ne.s32 %p1216, %r5790, 0;
and.b32 %r5791, %r7872, 127;
selp.u32 %r5792, 1, 0, %p1216;
bfi.b32 %r7876, %r5791, %r5792, 1, 7;
add.s32 %r7877, %r7873, -1;
setp.ne.s32 %p1217, %r7877, 0;
mov.u32 %r7875, %r7871;
@%p1217 bra $L__BB3_1062;
setp.gt.u32 %p1218, %r7382, 191;
mov.u32 %r7877, 0;
mov.u32 %r7875, %r5788;
@%p1218 bra $L__BB3_1062;
cvt.u16.u32 %rs876, %r7876;
and.b16 %rs877, %rs876, 255;
add.s32 %r5796, %r7382, 17477;
cvt.u64.u32 %rd593, %r5796;
add.s64 %rd594, %rd593, %rd5;
add.s64 %rd595, %rd1, %rd594;
st.global.u8 [%rd595], %rs876;
add.s32 %r7382, %r7382, 1;
setp.eq.s16 %p1219, %rs877, 255;
selp.b32 %r7877, 7, 8, %p1219;
mov.u32 %r7876, 0;
mov.u32 %r7875, %r7871;
$L__BB3_1062:
add.s32 %r7859, %r7859, -4;
shl.b32 %r5798, %r5788, %r7859;
and.b32 %r5799, %r5798, %r7543;
setp.ne.s32 %p1220, %r5799, 0;
and.b32 %r5800, %r7876, 127;
selp.u32 %r5801, 1, 0, %p1220;
bfi.b32 %r5802, %r5800, %r5801, 1, 15;
cvt.u16.u32 %rs1089, %r5802;
add.s32 %r7388, %r7877, -1;
setp.ne.s32 %p1221, %r7388, 0;
mov.u32 %r7879, %r7875;
@%p1221 bra $L__BB3_1065;
setp.gt.u32 %p1222, %r7382, 191;
mov.u32 %r7388, 0;
mov.u32 %r7879, 1;
@%p1222 bra $L__BB3_1065;
add.s32 %r5805, %r7382, 17477;
cvt.u64.u32 %rd596, %r5805;
add.s64 %rd597, %rd596, %rd5;
add.s64 %rd598, %rd1, %rd597;
and.b16 %rs879, %rs1089, 255;
st.global.u8 [%rd598], %rs1089;
add.s32 %r7382, %r7382, 1;
setp.eq.s16 %p1223, %rs879, 255;
selp.b32 %r7388, 7, 8, %p1223;
mov.u16 %rs1089, 0;
mov.u32 %r7879, %r7875;
$L__BB3_1065:
setp.ne.s32 %p1224, %r7859, 0;
@%p1224 bra $L__BB3_1053;
$L__BB3_1066:
add.s32 %r5807, %r7542, -1;
setp.eq.s32 %p1225, %r7542, 0;
mov.u32 %r7543, 0;
selp.b32 %r7542, 0, %r5807, %p1225;
setp.lt.u32 %p1226, %r7542, 3;
mov.u32 %r7885, %r7543;
@%p1226 bra $L__BB3_1069;
setp.lt.u32 %p1227, %r7542, 6;
mov.u32 %r7885, 1;
@%p1227 bra $L__BB3_1069;
setp.lt.u32 %p1228, %r7542, 9;
setp.eq.s32 %p1229, %r7542, 11;
selp.b32 %r5809, 4, 5, %p1229;
setp.lt.u32 %p1230, %r7542, 11;
selp.b32 %r5810, 3, %r5809, %p1230;
selp.b32 %r7885, 2, %r5810, %p1228;
$L__BB3_1069:
mov.u32 %r5812, 1;
shl.b32 %r7541, %r5812, %r7885;
mov.u32 %r7540, %r7879;
bra.uni $L__BB3_1078;
$L__BB3_1070:
add.s32 %r7543, %r7543, 1;
setp.lt.u32 %p1231, %r7543, %r7541;
@%p1231 bra $L__BB3_1078;
shl.b16 %rs880, %rs1089, 1;
or.b16 %rs1089, %rs880, 1;
add.s32 %r7388, %r7388, -1;
setp.ne.s32 %p1232, %r7388, 0;
mov.u32 %r7886, %r7540;
@%p1232 bra $L__BB3_1074;
bra.uni $L__BB3_1072;
$L__BB3_1074:
add.s32 %r5816, %r7542, 1;
min.u32 %r7542, %r5816, 12;
setp.lt.u32 %p1235, %r7542, 3;
mov.u32 %r7543, 0;
mov.u32 %r7889, %r7543;
@%p1235 bra $L__BB3_1077;
setp.lt.u32 %p1236, %r7542, 6;
mov.u32 %r7889, 1;
@%p1236 bra $L__BB3_1077;
setp.lt.u32 %p1237, %r7542, 9;
setp.eq.s32 %p1238, %r7542, 11;
selp.b32 %r5818, 4, 5, %p1238;
setp.lt.u32 %p1239, %r7542, 11;
selp.b32 %r5819, 3, %r5818, %p1239;
selp.b32 %r7889, 2, %r5819, %p1237;
$L__BB3_1077:
mov.u32 %r5821, 1;
shl.b32 %r7541, %r5821, %r7889;
mov.u32 %r7540, %r7886;
$L__BB3_1078:
and.b16 %rs883, %rs370, 15;
cvt.u32.u16 %r2739, %rs883;
and.b32 %r5822, %r7822, 1;
setp.eq.b32 %p1240, %r5822, 1;
mov.pred %p1241, 0;
xor.pred %p1242, %p1240, %p1241;
not.pred %p1243, %p1242;
mov.u32 %r7906, %r8093;
@%p1243 bra $L__BB3_1085;
and.b32 %r5823, %r2739, 1;
sub.s32 %r7896, %r2625, %r5823;
setp.eq.s32 %p1244, %r7896, 0;
mov.u32 %r7906, %r8093;
@%p1244 bra $L__BB3_1085;
mov.u32 %r5824, -1;
shl.b32 %r5825, %r5824, %r7896;
not.b32 %r5826, %r5825;
and.b32 %r7897, %r7816, %r5826;
$L__BB3_1081:
setp.gt.u32 %p1245, %r7925, 17476;
mov.u32 %r7906, 1;
@%p1245 bra $L__BB3_1085;
sub.s32 %r5828, %r7924, %r7923;
min.u32 %r5829, %r5828, %r7896;
setp.eq.s32 %p1246, %r5829, 32;
mov.u32 %r5830, -1;
shl.b32 %r5831, %r5830, %r5829;
not.b32 %r5832, %r5831;
selp.b32 %r5833, -1, %r5832, %p1246;
and.b32 %r5834, %r5833, %r7897;
shl.b32 %r5835, %r5834, %r7923;
or.b32 %r7922, %r5835, %r7922;
add.s32 %r7923, %r5829, %r7923;
shr.u32 %r7897, %r7897, %r5829;
sub.s32 %r7896, %r7896, %r5829;
setp.lt.u32 %p1247, %r7923, %r7924;
@%p1247 bra $L__BB3_1084;
cvt.u64.u32 %rd599, %r7925;
add.s64 %rd600, %rd599, %rd5;
add.s64 %rd601, %rd1, %rd600;
st.global.u8 [%rd601], %r7922;
add.s32 %r7925, %r7925, 1;
setp.eq.s32 %p1248, %r7922, 255;
selp.b32 %r7924, 7, 8, %p1248;
mov.u32 %r7922, 0;
mov.u32 %r7923, %r7922;
$L__BB3_1084:
setp.ne.s32 %p1249, %r7896, 0;
mov.u32 %r7906, %r8093;
@%p1249 bra $L__BB3_1081;
$L__BB3_1085:
and.b32 %r2763, %r7822, 2;
setp.eq.s32 %p1250, %r2763, 0;
mov.u32 %r7921, %r7906;
@%p1250 bra $L__BB3_1092;
shr.u32 %r5838, %r2739, 1;
and.b32 %r5839, %r5838, 1;
sub.s32 %r7911, %r2625, %r5839;
setp.eq.s32 %p1251, %r7911, 0;
mov.u32 %r7921, %r7906;
@%p1251 bra $L__BB3_1092;
mov.u32 %r5840, -1;
shl.b32 %r5841, %r5840, %r7911;
not.b32 %r5842, %r5841;
and.b32 %r7912, %r7820, %r5842;
$L__BB3_1088:
setp.gt.u32 %p1252, %r7925, 17476;
mov.u32 %r7921, 1;
@%p1252 bra $L__BB3_1092;
sub.s32 %r5844, %r7924, %r7923;
min.u32 %r5845, %r5844, %r7911;
setp.eq.s32 %p1253, %r5845, 32;
mov.u32 %r5846, -1;
shl.b32 %r5847, %r5846, %r5845;
not.b32 %r5848, %r5847;
selp.b32 %r5849, -1, %r5848, %p1253;
and.b32 %r5850, %r5849, %r7912;
shl.b32 %r5851, %r5850, %r7923;
or.b32 %r7922, %r5851, %r7922;
add.s32 %r7923, %r5845, %r7923;
shr.u32 %r7912, %r7912, %r5845;
sub.s32 %r7911, %r7911, %r5845;
setp.lt.u32 %p1254, %r7923, %r7924;
@%p1254 bra $L__BB3_1091;
cvt.u64.u32 %rd602, %r7925;
add.s64 %rd603, %rd602, %rd5;
add.s64 %rd604, %rd1, %rd603;
st.global.u8 [%rd604], %r7922;
add.s32 %r7925, %r7925, 1;
setp.eq.s32 %p1255, %r7922, 255;
selp.b32 %r7924, 7, 8, %p1255;
mov.u32 %r7922, 0;
mov.u32 %r7923, %r7922;
$L__BB3_1091:
setp.ne.s32 %p1256, %r7911, 0;
mov.u32 %r7921, %r7906;
@%p1256 bra $L__BB3_1088;
$L__BB3_1092:
and.b32 %r2787, %r7822, 4;
setp.eq.s32 %p1257, %r2787, 0;
mov.u32 %r7936, %r7921;
@%p1257 bra $L__BB3_1099;
shr.u32 %r5854, %r2739, 2;
and.b32 %r5855, %r5854, 1;
sub.s32 %r7926, %r2625, %r5855;
setp.eq.s32 %p1258, %r7926, 0;
mov.u32 %r7936, %r7921;
@%p1258 bra $L__BB3_1099;
mov.u32 %r5856, -1;
shl.b32 %r5857, %r5856, %r7926;
not.b32 %r5858, %r5857;
and.b32 %r7927, %r7825, %r5858;
$L__BB3_1095:
setp.gt.u32 %p1259, %r7925, 17476;
mov.u32 %r7936, 1;
@%p1259 bra $L__BB3_1099;
sub.s32 %r5860, %r7924, %r7923;
min.u32 %r5861, %r5860, %r7926;
setp.eq.s32 %p1260, %r5861, 32;
mov.u32 %r5862, -1;
shl.b32 %r5863, %r5862, %r5861;
not.b32 %r5864, %r5863;
selp.b32 %r5865, -1, %r5864, %p1260;
and.b32 %r5866, %r5865, %r7927;
shl.b32 %r5867, %r5866, %r7923;
or.b32 %r7922, %r5867, %r7922;
add.s32 %r7923, %r5861, %r7923;
shr.u32 %r7927, %r7927, %r5861;
sub.s32 %r7926, %r7926, %r5861;
setp.lt.u32 %p1261, %r7923, %r7924;
@%p1261 bra $L__BB3_1098;
cvt.u64.u32 %rd605, %r7925;
add.s64 %rd606, %rd605, %rd5;
add.s64 %rd607, %rd1, %rd606;
st.global.u8 [%rd607], %r7922;
add.s32 %r7925, %r7925, 1;
setp.eq.s32 %p1262, %r7922, 255;
selp.b32 %r7924, 7, 8, %p1262;
mov.u32 %r7922, 0;
mov.u32 %r7923, %r7922;
$L__BB3_1098:
setp.ne.s32 %p1263, %r7926, 0;
mov.u32 %r7936, %r7921;
@%p1263 bra $L__BB3_1095;
$L__BB3_1099:
and.b32 %r2811, %r7822, 8;
setp.eq.s32 %p1264, %r2811, 0;
mov.u32 %r7951, %r7936;
@%p1264 bra $L__BB3_1106;
shr.u32 %r5870, %r2739, 3;
sub.s32 %r7941, %r2625, %r5870;
setp.eq.s32 %p1265, %r7941, 0;
mov.u32 %r7951, %r7936;
@%p1265 bra $L__BB3_1106;
mov.u32 %r5871, -1;
shl.b32 %r5872, %r5871, %r7941;
not.b32 %r5873, %r5872;
and.b32 %r7942, %r7830, %r5873;
$L__BB3_1102:
setp.gt.u32 %p1266, %r7925, 17476;
mov.u32 %r7951, 1;
@%p1266 bra $L__BB3_1106;
sub.s32 %r5875, %r7924, %r7923;
min.u32 %r5876, %r5875, %r7941;
setp.eq.s32 %p1267, %r5876, 32;
mov.u32 %r5877, -1;
shl.b32 %r5878, %r5877, %r5876;
not.b32 %r5879, %r5878;
selp.b32 %r5880, -1, %r5879, %p1267;
and.b32 %r5881, %r5880, %r7942;
shl.b32 %r5882, %r5881, %r7923;
or.b32 %r7922, %r5882, %r7922;
add.s32 %r7923, %r5876, %r7923;
shr.u32 %r7942, %r7942, %r5876;
sub.s32 %r7941, %r7941, %r5876;
setp.lt.u32 %p1268, %r7923, %r7924;
@%p1268 bra $L__BB3_1105;
cvt.u64.u32 %rd608, %r7925;
add.s64 %rd609, %rd608, %rd5;
add.s64 %rd610, %rd1, %rd609;
st.global.u8 [%rd610], %r7922;
add.s32 %r7925, %r7925, 1;
setp.eq.s32 %p1269, %r7922, 255;
selp.b32 %r7924, 7, 8, %p1269;
mov.u32 %r7922, 0;
mov.u32 %r7923, %r7922;
$L__BB3_1105:
setp.ne.s32 %p1270, %r7941, 0;
mov.u32 %r7951, %r7936;
@%p1270 bra $L__BB3_1102;
$L__BB3_1106:
and.b32 %r5886, %r7819, 255;
cvt.u32.u16 %r5887, %rs1153;
and.b32 %r5888, %r5887, 255;
setp.lt.u32 %p1271, %r5886, %r5888;
cvt.u16.u32 %rs884, %r7819;
selp.b16 %rs885, %rs1153, %rs884, %p1271;
add.s32 %r2835, %r4924, %r2563;
mov.u32 %r7957, 0;
st.shared.u8 [%r2835], %rs885;
ld.shared.u8 %rs392, [%r2835+2];
setp.gt.u16 %p1272, %rs1152, %rs392;
add.s32 %r7797, %r2563, 2;
add.s32 %r5890, %r2563, 1;
selp.b32 %r5891, %r5890, %r7797, %p1272;
add.s32 %r5892, %r4924, %r5891;
ld.shared.u8 %rs393, [%r5892];
cvt.u16.u32 %rs394, %r7829;
st.shared.u8 [%r2835+1], %r7829;
cvt.u16.u32 %rs887, %r2763;
shr.u16 %rs888, %rs887, 1;
or.b16 %rs889, %rs1151, %rs888;
add.s32 %r2837, %r4928, %r2563;
st.shared.u8 [%r2837], %rs889;
ld.shared.u8 %r2838, [%r2837+2];
shr.u32 %r2839, %r2811, 3;
st.shared.u8 [%r2837+1], %r2839;
ld.global.u32 %r2840, [%rd689+8];
setp.eq.s32 %p1273, %r2840, 0;
mov.u32 %r7956, %r7957;
@%p1273 bra $L__BB3_1108;
and.b32 %r5894, %r2840, -2147483648;
abs.s32 %r5895, %r2840;
shl.b32 %r5896, %r5895, %r1719;
or.b32 %r7956, %r5896, %r5894;
$L__BB3_1108:
shl.b32 %r5900, %r7956, 1;
shr.u32 %r5901, %r5900, %r1719;
and.b32 %r2843, %r5901, -2;
setp.eq.s32 %p1274, %r2843, 0;
mov.u32 %r7958, %r7957;
mov.u32 %r7964, %r7957;
@%p1274 bra $L__BB3_1110;
add.s32 %r5903, %r2843, -1;
clz.b32 %r5904, %r5903;
mov.u32 %r5905, 32;
sub.s32 %r7957, %r5905, %r5904;
shr.u32 %r5906, %r7956, 31;
add.s32 %r5907, %r5906, %r2843;
add.s32 %r7958, %r5907, -2;
mov.u32 %r7964, 1;
$L__BB3_1110:
ld.global.u32 %r2849, [%rd689+264];
setp.eq.s32 %p1275, %r2849, 0;
mov.u32 %r7961, 0;
mov.u32 %r7960, %r7961;
@%p1275 bra $L__BB3_1112;
and.b32 %r5909, %r2849, -2147483648;
abs.s32 %r5910, %r2849;
shl.b32 %r5911, %r5910, %r1719;
or.b32 %r7960, %r5911, %r5909;
$L__BB3_1112:
shl.b32 %r5914, %r7960, 1;
shr.u32 %r5915, %r5914, %r1719;
and.b32 %r2852, %r5915, -2;
setp.eq.s32 %p1276, %r2852, 0;
mov.u32 %r7962, %r7961;
mov.u32 %r7968, %r7957;
@%p1276 bra $L__BB3_1114;
or.b32 %r7964, %r7964, 2;
add.s32 %r5916, %r2852, -1;
clz.b32 %r5917, %r5916;
mov.u32 %r5918, 32;
sub.s32 %r7961, %r5918, %r5917;
max.s32 %r7968, %r7957, %r7961;
shr.u32 %r5919, %r7960, 31;
add.s32 %r5920, %r5919, %r2852;
add.s32 %r7962, %r5920, -2;
$L__BB3_1114:
ld.global.u32 %r2861, [%rd689+12];
setp.eq.s32 %p1277, %r2861, 0;
mov.u32 %r7966, 0;
mov.u32 %r7965, %r7966;
@%p1277 bra $L__BB3_1116;
and.b32 %r5922, %r2861, -2147483648;
abs.s32 %r5923, %r2861;
shl.b32 %r5924, %r5923, %r1719;
or.b32 %r7965, %r5924, %r5922;
$L__BB3_1116:
shl.b32 %r5927, %r7965, 1;
shr.u32 %r5928, %r5927, %r1719;
and.b32 %r2864, %r5928, -2;
setp.eq.s32 %p1278, %r2864, 0;
mov.u32 %r7967, %r7966;
@%p1278 bra $L__BB3_1118;
or.b32 %r7964, %r7964, 4;
add.s32 %r5929, %r2864, -1;
clz.b32 %r5930, %r5929;
mov.u32 %r5931, 32;
sub.s32 %r7966, %r5931, %r5930;
max.s32 %r7968, %r7968, %r7966;
shr.u32 %r5932, %r7965, 31;
add.s32 %r5933, %r5932, %r2864;
add.s32 %r7967, %r5933, -2;
$L__BB3_1118:
ld.global.u32 %r2873, [%rd689+268];
setp.eq.s32 %p1279, %r2873, 0;
mov.u32 %r7971, 0;
mov.u32 %r7970, %r7971;
@%p1279 bra $L__BB3_1120;
and.b32 %r5935, %r2873, -2147483648;
abs.s32 %r5936, %r2873;
shl.b32 %r5937, %r5936, %r1719;
or.b32 %r7970, %r5937, %r5935;
$L__BB3_1120:
shl.b32 %r5940, %r7970, 1;
shr.u32 %r5941, %r5940, %r1719;
and.b32 %r2876, %r5941, -2;
setp.eq.s32 %p1280, %r2876, 0;
mov.u32 %r7972, %r7971;
@%p1280 bra $L__BB3_1122;
or.b32 %r7964, %r7964, 8;
add.s32 %r5942, %r2876, -1;
clz.b32 %r5943, %r5942;
mov.u32 %r5944, 32;
sub.s32 %r7971, %r5944, %r5943;
max.s32 %r7968, %r7968, %r7971;
shr.u32 %r5945, %r7970, 31;
add.s32 %r5946, %r5945, %r2876;
add.s32 %r7972, %r5946, -2;
$L__BB3_1122:
shr.u32 %r5948, %r2811, 2;
shr.u32 %r5949, %r2787, 1;
or.b32 %r5950, %r5948, %r5949;
shl.b32 %r5951, %r2838, 2;
cvt.u32.u16 %r5952, %rs1150;
and.b32 %r5953, %r5952, 255;
add.s32 %r5954, %r5951, %r5953;
or.b32 %r2885, %r5950, %r5954;
add.s32 %r5955, %r7964, -1;
and.b32 %r5956, %r5955, %r7964;
setp.ne.s32 %p1281, %r5956, 0;
mov.u32 %r7975, 0;
setp.gt.u16 %p1282, %rs393, 2;
and.pred %p1283, %p1282, %p1281;
cvt.u32.u16 %r5957, %rs393;
and.b32 %r5958, %r5957, 255;
add.s32 %r5959, %r5958, -1;
selp.b32 %r5960, %r5959, 1, %p1283;
max.s32 %r2886, %r5960, %r7968;
sub.s32 %r2887, %r2886, %r5960;
setp.lt.s32 %p1284, %r2887, 1;
@%p1284 bra $L__BB3_1124;
setp.eq.s32 %p1285, %r7957, %r7968;
selp.u32 %r5961, 1, 0, %p1285;
setp.eq.s32 %p1286, %r7961, %r7968;
selp.u32 %r5962, -1, 0, %p1286;
bfi.b32 %r5963, %r5962, %r5961, 1, 1;
setp.eq.s32 %p1287, %r7966, %r7968;
selp.u16 %rs891, 1, 0, %p1287;
mul.wide.u16 %r5964, %rs891, 4;
or.b32 %r5965, %r5963, %r5964;
setp.eq.s32 %p1288, %r7971, %r7968;
selp.u16 %rs892, 1, 0, %p1288;
mul.wide.u16 %r5966, %rs892, 8;
or.b32 %r7975, %r5965, %r5966;
$L__BB3_1124:
shl.b32 %r5967, %r7964, 4;
shl.b32 %r5968, %r2885, 8;
or.b32 %r5969, %r5967, %r5968;
or.b32 %r5970, %r5969, %r7975;
mul.wide.u32 %rd611, %r5970, 2;
add.s64 %rd612, %rd35, %rd611;
ld.global.u16 %rs395, [%rd612];
shr.u16 %rs893, %rs395, 4;
and.b16 %rs396, %rs893, 7;
setp.eq.s16 %p1289, %rs396, 0;
mov.u32 %r7987, %r7845;
@%p1289 bra $L__BB3_1131;
cvt.u32.u16 %r7976, %rs396;
shr.u16 %rs894, %rs395, 8;
cvt.u32.u16 %r7977, %rs894;
$L__BB3_1126:
mov.u32 %r2892, %r7976;
setp.gt.u32 %p1290, %r7779, 2879;
mov.u32 %r7987, 1;
@%p1290 bra $L__BB3_1131;
mov.u32 %r5972, 8;
sub.s32 %r5973, %r5972, %r7777;
sub.s32 %r5974, %r5973, %r7778;
min.u32 %r5975, %r5974, %r2892;
setp.eq.s32 %p1291, %r5975, 32;
mov.u32 %r5976, -1;
shl.b32 %r5977, %r5976, %r5975;
not.b32 %r5978, %r5977;
selp.b32 %r5979, -1, %r5978, %p1291;
and.b32 %r5980, %r5979, %r7977;
shl.b32 %r5981, %r5980, %r7778;
cvt.u16.u32 %rs895, %r5981;
or.b16 %rs1147, %rs1147, %rs895;
add.s32 %r7778, %r5975, %r7778;
sub.s32 %r7976, %r2892, %r5975;
shr.u32 %r7977, %r7977, %r5975;
setp.gt.u32 %p1292, %r5974, %r2892;
@%p1292 bra $L__BB3_1130;
setp.ne.s32 %p1293, %r7777, 0;
mov.u32 %r7777, 0;
and.b16 %rs896, %rs1147, 255;
setp.ne.s16 %p1294, %rs896, 127;
and.pred %p1295, %p1293, %p1294;
@%p1295 bra $L__BB3_1130;
mov.u32 %r5984, 20548;
sub.s32 %r5985, %r5984, %r7779;
cvt.u64.u32 %rd613, %r5985;
add.s64 %rd614, %rd613, %rd5;
add.s64 %rd615, %rd1, %rd614;
st.global.u8 [%rd615], %rs1147;
add.s32 %r7779, %r7779, 1;
setp.gt.u16 %p1296, %rs896, 143;
selp.u32 %r7777, 1, 0, %p1296;
mov.u32 %r7778, 0;
mov.u16 %rs1147, 0;
$L__BB3_1130:
setp.ne.s32 %p1297, %r7976, 0;
mov.u32 %r7987, %r7845;
@%p1297 bra $L__BB3_1126;
$L__BB3_1131:
setp.ne.s32 %p1298, %r2885, 0;
@%p1298 bra $L__BB3_1179;
setp.eq.s32 %p1299, %r7964, 0;
add.s32 %r5986, %r7382, 17477;
cvt.u64.u32 %rd616, %r5986;
add.s64 %rd617, %rd616, %rd5;
add.s64 %rd40, %rd1, %rd617;
@%p1299 bra $L__BB3_1171;
shl.b16 %rs1089, %rs1089, 1;
add.s32 %r7388, %r7388, -1;
setp.ne.s32 %p1300, %r7388, 0;
mov.u32 %r8021, %r7540;
@%p1300 bra $L__BB3_1136;
setp.gt.u32 %p1301, %r7382, 191;
mov.u32 %r7388, 0;
mov.u32 %r8021, 1;
@%p1301 bra $L__BB3_1136;
st.global.u8 [%rd40], %rs1089;
add.s32 %r7382, %r7382, 1;
mov.u32 %r7388, 8;
mov.u16 %rs1089, 0;
mov.u32 %r8021, %r7540;
$L__BB3_1136:
setp.lt.u32 %p1302, %r7542, 3;
mov.u32 %r7991, 0;
@%p1302 bra $L__BB3_1139;
setp.lt.u32 %p1303, %r7542, 6;
mov.u32 %r7991, 1;
@%p1303 bra $L__BB3_1139;
setp.lt.u32 %p1304, %r7542, 9;
setp.eq.s32 %p1305, %r7542, 11;
selp.b32 %r5992, 4, 5, %p1305;
setp.lt.u32 %p1306, %r7542, 11;
selp.b32 %r5993, 3, %r5992, %p1306;
selp.b32 %r7991, 2, %r5993, %p1304;
$L__BB3_1139:
setp.eq.s32 %p1307, %r7991, 0;
@%p1307 bra $L__BB3_1167;
add.s32 %r2916, %r7991, -1;
and.b32 %r2917, %r7991, 3;
setp.eq.s32 %p1308, %r2917, 0;
mov.u32 %r8001, %r7991;
mov.u32 %r8004, %r8021;
@%p1308 bra $L__BB3_1152;
mov.u32 %r5995, 1;
shl.b32 %r5996, %r5995, %r2916;
and.b32 %r5997, %r5996, %r7543;
setp.ne.s32 %p1309, %r5997, 0;
selp.u32 %r5998, 1, 0, %p1309;
cvt.u32.u16 %r5999, %rs1089;
bfi.b32 %r6000, %r5999, %r5998, 1, 8;
cvt.u16.u32 %rs1089, %r6000;
add.s32 %r7388, %r7388, -1;
setp.ne.s32 %p1310, %r7388, 0;
mov.u32 %r8004, %r8021;
@%p1310 bra $L__BB3_1144;
setp.gt.u32 %p1311, %r7382, 191;
mov.u32 %r7388, 0;
mov.u32 %r8004, %r5995;
@%p1311 bra $L__BB3_1144;
add.s32 %r6004, %r7382, 17477;
cvt.u64.u32 %rd618, %r6004;
add.s64 %rd619, %rd618, %rd5;
add.s64 %rd620, %rd1, %rd619;
st.global.u8 [%rd620], %rs1089;
add.s32 %r7382, %r7382, 1;
mov.u32 %r7388, 8;
mov.u16 %rs1089, 0;
mov.u32 %r8004, %r8021;
$L__BB3_1144:
setp.eq.s32 %p1312, %r2917, 1;
mov.u32 %r8021, %r8004;
mov.u32 %r8001, %r2916;
@%p1312 bra $L__BB3_1152;
add.s32 %r8001, %r7991, -2;
mov.u32 %r6005, 1;
shl.b32 %r6006, %r6005, %r8001;
and.b32 %r6007, %r6006, %r7543;
setp.ne.s32 %p1313, %r6007, 0;
selp.u32 %r6008, 1, 0, %p1313;
cvt.u32.u16 %r6009, %rs1089;
bfi.b32 %r6010, %r6009, %r6008, 1, 8;
cvt.u16.u32 %rs1089, %r6010;
add.s32 %r7388, %r7388, -1;
setp.ne.s32 %p1314, %r7388, 0;
mov.u32 %r7995, %r8004;
@%p1314 bra $L__BB3_1148;
setp.gt.u32 %p1315, %r7382, 191;
mov.u32 %r7388, 0;
mov.u32 %r7995, %r6005;
@%p1315 bra $L__BB3_1148;
add.s32 %r6013, %r7382, 17477;
cvt.u64.u32 %rd621, %r6013;
add.s64 %rd622, %rd621, %rd5;
add.s64 %rd623, %rd1, %rd622;
and.b16 %rs903, %rs1089, 255;
st.global.u8 [%rd623], %rs1089;
add.s32 %r7382, %r7382, 1;
setp.eq.s16 %p1316, %rs903, 255;
selp.b32 %r7388, 7, 8, %p1316;
mov.u16 %rs1089, 0;
mov.u32 %r7995, %r8004;
$L__BB3_1148:
setp.eq.s32 %p1317, %r2917, 2;
mov.u32 %r8021, %r7995;
mov.u32 %r8004, %r7995;
@%p1317 bra $L__BB3_1152;
add.s32 %r8001, %r7991, -3;
mov.u32 %r6014, 1;
shl.b32 %r6015, %r6014, %r8001;
and.b32 %r6016, %r6015, %r7543;
setp.ne.s32 %p1318, %r6016, 0;
selp.u32 %r6017, 1, 0, %p1318;
cvt.u32.u16 %r6018, %rs1089;
bfi.b32 %r6019, %r6018, %r6017, 1, 8;
cvt.u16.u32 %rs1089, %r6019;
add.s32 %r7388, %r7388, -1;
setp.ne.s32 %p1319, %r7388, 0;
mov.u32 %r8021, %r7995;
mov.u32 %r8004, %r7995;
@%p1319 bra $L__BB3_1152;
setp.gt.u32 %p1320, %r7382, 191;
mov.u32 %r7388, 0;
mov.u32 %r8021, %r6014;
mov.u32 %r8004, %r6014;
@%p1320 bra $L__BB3_1152;
add.s32 %r6024, %r7382, 17477;
cvt.u64.u32 %rd624, %r6024;
add.s64 %rd625, %rd624, %rd5;
add.s64 %rd626, %rd1, %rd625;
and.b16 %rs906, %rs1089, 255;
st.global.u8 [%rd626], %rs1089;
add.s32 %r7382, %r7382, 1;
setp.eq.s16 %p1321, %rs906, 255;
selp.b32 %r7388, 7, 8, %p1321;
mov.u16 %rs1089, 0;
mov.u32 %r8021, %r7995;
mov.u32 %r8004, %r7995;
$L__BB3_1152:
setp.lt.u32 %p1322, %r2916, 3;
@%p1322 bra $L__BB3_1167;
mov.u32 %r8021, %r8004;
$L__BB3_1154:
add.s32 %r6025, %r8001, -1;
mov.u32 %r6026, 1;
shl.b32 %r6027, %r6026, %r6025;
and.b32 %r6028, %r6027, %r7543;
setp.ne.s32 %p1323, %r6028, 0;
selp.u32 %r6029, 1, 0, %p1323;
cvt.u32.u16 %r6030, %rs1089;
bfi.b32 %r8010, %r6030, %r6029, 1, 8;
add.s32 %r8011, %r7388, -1;
setp.ne.s32 %p1324, %r8011, 0;
mov.u32 %r8009, %r8021;
@%p1324 bra $L__BB3_1157;
setp.gt.u32 %p1325, %r7382, 191;
mov.u32 %r8011, 0;
mov.u32 %r8009, %r6026;
@%p1325 bra $L__BB3_1157;
cvt.u16.u32 %rs907, %r8010;
and.b16 %rs908, %rs907, 255;
add.s32 %r6034, %r7382, 17477;
cvt.u64.u32 %rd627, %r6034;
add.s64 %rd628, %rd627, %rd5;
add.s64 %rd629, %rd1, %rd628;
st.global.u8 [%rd629], %rs907;
add.s32 %r7382, %r7382, 1;
setp.eq.s16 %p1326, %rs908, 255;
selp.b32 %r8011, 7, 8, %p1326;
mov.u32 %r8010, 0;
mov.u32 %r8009, %r8021;
$L__BB3_1157:
add.s32 %r6035, %r8001, -2;
shl.b32 %r6037, %r6026, %r6035;
and.b32 %r6038, %r6037, %r7543;
setp.ne.s32 %p1327, %r6038, 0;
and.b32 %r6039, %r8010, 127;
selp.u32 %r6040, 1, 0, %p1327;
bfi.b32 %r8014, %r6039, %r6040, 1, 7;
add.s32 %r8015, %r8011, -1;
setp.ne.s32 %p1328, %r8015, 0;
mov.u32 %r8013, %r8009;
@%p1328 bra $L__BB3_1160;
setp.gt.u32 %p1329, %r7382, 191;
mov.u32 %r8015, 0;
mov.u32 %r8013, 1;
@%p1329 bra $L__BB3_1160;
cvt.u16.u32 %rs909, %r8014;
and.b16 %rs910, %rs909, 255;
add.s32 %r6044, %r7382, 17477;
cvt.u64.u32 %rd630, %r6044;
add.s64 %rd631, %rd630, %rd5;
add.s64 %rd632, %rd1, %rd631;
st.global.u8 [%rd632], %rs909;
add.s32 %r7382, %r7382, 1;
setp.eq.s16 %p1330, %rs910, 255;
selp.b32 %r8015, 7, 8, %p1330;
mov.u32 %r8014, 0;
mov.u32 %r8013, %r8009;
$L__BB3_1160:
add.s32 %r6045, %r8001, -3;
mov.u32 %r6046, 1;
shl.b32 %r6047, %r6046, %r6045;
and.b32 %r6048, %r6047, %r7543;
setp.ne.s32 %p1331, %r6048, 0;
and.b32 %r6049, %r8014, 127;
selp.u32 %r6050, 1, 0, %p1331;
bfi.b32 %r8018, %r6049, %r6050, 1, 7;
add.s32 %r8019, %r8015, -1;
setp.ne.s32 %p1332, %r8019, 0;
mov.u32 %r8017, %r8013;
@%p1332 bra $L__BB3_1163;
setp.gt.u32 %p1333, %r7382, 191;
mov.u32 %r8019, 0;
mov.u32 %r8017, %r6046;
@%p1333 bra $L__BB3_1163;
cvt.u16.u32 %rs911, %r8018;
and.b16 %rs912, %rs911, 255;
add.s32 %r6054, %r7382, 17477;
cvt.u64.u32 %rd633, %r6054;
add.s64 %rd634, %rd633, %rd5;
add.s64 %rd635, %rd1, %rd634;
st.global.u8 [%rd635], %rs911;
add.s32 %r7382, %r7382, 1;
setp.eq.s16 %p1334, %rs912, 255;
selp.b32 %r8019, 7, 8, %p1334;
mov.u32 %r8018, 0;
mov.u32 %r8017, %r8013;
$L__BB3_1163:
add.s32 %r8001, %r8001, -4;
shl.b32 %r6056, %r6046, %r8001;
and.b32 %r6057, %r6056, %r7543;
setp.ne.s32 %p1335, %r6057, 0;
and.b32 %r6058, %r8018, 127;
selp.u32 %r6059, 1, 0, %p1335;
bfi.b32 %r6060, %r6058, %r6059, 1, 15;
cvt.u16.u32 %rs1089, %r6060;
add.s32 %r7388, %r8019, -1;
setp.ne.s32 %p1336, %r7388, 0;
mov.u32 %r8021, %r8017;
@%p1336 bra $L__BB3_1166;
setp.gt.u32 %p1337, %r7382, 191;
mov.u32 %r7388, 0;
mov.u32 %r8021, 1;
@%p1337 bra $L__BB3_1166;
add.s32 %r6063, %r7382, 17477;
cvt.u64.u32 %rd636, %r6063;
add.s64 %rd637, %rd636, %rd5;
add.s64 %rd638, %rd1, %rd637;
and.b16 %rs914, %rs1089, 255;
st.global.u8 [%rd638], %rs1089;
add.s32 %r7382, %r7382, 1;
setp.eq.s16 %p1338, %rs914, 255;
selp.b32 %r7388, 7, 8, %p1338;
mov.u16 %rs1089, 0;
mov.u32 %r8021, %r8017;
$L__BB3_1166:
setp.ne.s32 %p1339, %r8001, 0;
@%p1339 bra $L__BB3_1154;
$L__BB3_1167:
add.s32 %r6065, %r7542, -1;
setp.eq.s32 %p1340, %r7542, 0;
mov.u32 %r7543, 0;
selp.b32 %r7542, 0, %r6065, %p1340;
setp.lt.u32 %p1341, %r7542, 3;
mov.u32 %r8027, %r7543;
@%p1341 bra $L__BB3_1170;
setp.lt.u32 %p1342, %r7542, 6;
mov.u32 %r8027, 1;
@%p1342 bra $L__BB3_1170;
setp.lt.u32 %p1343, %r7542, 9;
setp.eq.s32 %p1344, %r7542, 11;
selp.b32 %r6067, 4, 5, %p1344;
setp.lt.u32 %p1345, %r7542, 11;
selp.b32 %r6068, 3, %r6067, %p1345;
selp.b32 %r8027, 2, %r6068, %p1343;
$L__BB3_1170:
mov.u32 %r6070, 1;
shl.b32 %r7541, %r6070, %r8027;
mov.u32 %r7540, %r8021;
bra.uni $L__BB3_1179;
$L__BB3_1171:
add.s32 %r7543, %r7543, 1;
setp.lt.u32 %p1346, %r7543, %r7541;
@%p1346 bra $L__BB3_1179;
shl.b16 %rs915, %rs1089, 1;
or.b16 %rs1089, %rs915, 1;
add.s32 %r7388, %r7388, -1;
setp.ne.s32 %p1347, %r7388, 0;
mov.u32 %r8028, %r7540;
@%p1347 bra $L__BB3_1175;
bra.uni $L__BB3_1173;
$L__BB3_1175:
add.s32 %r6074, %r7542, 1;
min.u32 %r7542, %r6074, 12;
setp.lt.u32 %p1350, %r7542, 3;
mov.u32 %r7543, 0;
mov.u32 %r8031, %r7543;
@%p1350 bra $L__BB3_1178;
setp.lt.u32 %p1351, %r7542, 6;
mov.u32 %r8031, 1;
@%p1351 bra $L__BB3_1178;
setp.lt.u32 %p1352, %r7542, 9;
setp.eq.s32 %p1353, %r7542, 11;
selp.b32 %r6076, 4, 5, %p1353;
setp.lt.u32 %p1354, %r7542, 11;
selp.b32 %r6077, 3, %r6076, %p1354;
selp.b32 %r8031, 2, %r6077, %p1352;
$L__BB3_1178:
mov.u32 %r6079, 1;
shl.b32 %r7541, %r6079, %r8031;
mov.u32 %r7540, %r8028;
$L__BB3_1179:
and.b16 %rs918, %rs395, 15;
cvt.u32.u16 %r3000, %rs918;
and.b32 %r6080, %r7964, 1;
setp.eq.b32 %p1355, %r6080, 1;
mov.pred %p1356, 0;
xor.pred %p1357, %p1355, %p1356;
not.pred %p1358, %p1357;
mov.u32 %r8048, %r7951;
@%p1358 bra $L__BB3_1186;
and.b32 %r6081, %r3000, 1;
sub.s32 %r8038, %r2886, %r6081;
setp.eq.s32 %p1359, %r8038, 0;
mov.u32 %r8048, %r7951;
@%p1359 bra $L__BB3_1186;
mov.u32 %r6082, -1;
shl.b32 %r6083, %r6082, %r8038;
not.b32 %r6084, %r6083;
and.b32 %r8039, %r7958, %r6084;
$L__BB3_1182:
setp.gt.u32 %p1360, %r7925, 17476;
mov.u32 %r8048, 1;
@%p1360 bra $L__BB3_1186;
sub.s32 %r6086, %r7924, %r7923;
min.u32 %r6087, %r6086, %r8038;
setp.eq.s32 %p1361, %r6087, 32;
mov.u32 %r6088, -1;
shl.b32 %r6089, %r6088, %r6087;
not.b32 %r6090, %r6089;
selp.b32 %r6091, -1, %r6090, %p1361;
and.b32 %r6092, %r6091, %r8039;
shl.b32 %r6093, %r6092, %r7923;
or.b32 %r7922, %r6093, %r7922;
add.s32 %r7923, %r6087, %r7923;
shr.u32 %r8039, %r8039, %r6087;
sub.s32 %r8038, %r8038, %r6087;
setp.lt.u32 %p1362, %r7923, %r7924;
@%p1362 bra $L__BB3_1185;
cvt.u64.u32 %rd639, %r7925;
add.s64 %rd640, %rd639, %rd5;
add.s64 %rd641, %rd1, %rd640;
st.global.u8 [%rd641], %r7922;
add.s32 %r7925, %r7925, 1;
setp.eq.s32 %p1363, %r7922, 255;
selp.b32 %r7924, 7, 8, %p1363;
mov.u32 %r7922, 0;
mov.u32 %r7923, %r7922;
$L__BB3_1185:
setp.ne.s32 %p1364, %r8038, 0;
mov.u32 %r8048, %r7951;
@%p1364 bra $L__BB3_1182;
$L__BB3_1186:
and.b32 %r3024, %r7964, 2;
setp.eq.s32 %p1365, %r3024, 0;
mov.u32 %r8063, %r8048;
@%p1365 bra $L__BB3_1193;
shr.u32 %r6096, %r3000, 1;
and.b32 %r6097, %r6096, 1;
sub.s32 %r8053, %r2886, %r6097;
setp.eq.s32 %p1366, %r8053, 0;
mov.u32 %r8063, %r8048;
@%p1366 bra $L__BB3_1193;
mov.u32 %r6098, -1;
shl.b32 %r6099, %r6098, %r8053;
not.b32 %r6100, %r6099;
and.b32 %r8054, %r7962, %r6100;
$L__BB3_1189:
setp.gt.u32 %p1367, %r7925, 17476;
mov.u32 %r8063, 1;
@%p1367 bra $L__BB3_1193;
sub.s32 %r6102, %r7924, %r7923;
min.u32 %r6103, %r6102, %r8053;
setp.eq.s32 %p1368, %r6103, 32;
mov.u32 %r6104, -1;
shl.b32 %r6105, %r6104, %r6103;
not.b32 %r6106, %r6105;
selp.b32 %r6107, -1, %r6106, %p1368;
and.b32 %r6108, %r6107, %r8054;
shl.b32 %r6109, %r6108, %r7923;
or.b32 %r7922, %r6109, %r7922;
add.s32 %r7923, %r6103, %r7923;
shr.u32 %r8054, %r8054, %r6103;
sub.s32 %r8053, %r8053, %r6103;
setp.lt.u32 %p1369, %r7923, %r7924;
@%p1369 bra $L__BB3_1192;
cvt.u64.u32 %rd642, %r7925;
add.s64 %rd643, %rd642, %rd5;
add.s64 %rd644, %rd1, %rd643;
st.global.u8 [%rd644], %r7922;
add.s32 %r7925, %r7925, 1;
setp.eq.s32 %p1370, %r7922, 255;
selp.b32 %r7924, 7, 8, %p1370;
mov.u32 %r7922, 0;
mov.u32 %r7923, %r7922;
$L__BB3_1192:
setp.ne.s32 %p1371, %r8053, 0;
mov.u32 %r8063, %r8048;
@%p1371 bra $L__BB3_1189;
$L__BB3_1193:
and.b32 %r3048, %r7964, 4;
setp.eq.s32 %p1372, %r3048, 0;
mov.u32 %r8078, %r8063;
@%p1372 bra $L__BB3_1200;
shr.u32 %r6112, %r3000, 2;
and.b32 %r6113, %r6112, 1;
sub.s32 %r8068, %r2886, %r6113;
setp.eq.s32 %p1373, %r8068, 0;
mov.u32 %r8078, %r8063;
@%p1373 bra $L__BB3_1200;
mov.u32 %r6114, -1;
shl.b32 %r6115, %r6114, %r8068;
not.b32 %r6116, %r6115;
and.b32 %r8069, %r7967, %r6116;
$L__BB3_1196:
setp.gt.u32 %p1374, %r7925, 17476;
mov.u32 %r8078, 1;
@%p1374 bra $L__BB3_1200;
sub.s32 %r6118, %r7924, %r7923;
min.u32 %r6119, %r6118, %r8068;
setp.eq.s32 %p1375, %r6119, 32;
mov.u32 %r6120, -1;
shl.b32 %r6121, %r6120, %r6119;
not.b32 %r6122, %r6121;
selp.b32 %r6123, -1, %r6122, %p1375;
and.b32 %r6124, %r6123, %r8069;
shl.b32 %r6125, %r6124, %r7923;
or.b32 %r7922, %r6125, %r7922;
add.s32 %r7923, %r6119, %r7923;
shr.u32 %r8069, %r8069, %r6119;
sub.s32 %r8068, %r8068, %r6119;
setp.lt.u32 %p1376, %r7923, %r7924;
@%p1376 bra $L__BB3_1199;
cvt.u64.u32 %rd645, %r7925;
add.s64 %rd646, %rd645, %rd5;
add.s64 %rd647, %rd1, %rd646;
st.global.u8 [%rd647], %r7922;
add.s32 %r7925, %r7925, 1;
setp.eq.s32 %p1377, %r7922, 255;
selp.b32 %r7924, 7, 8, %p1377;
mov.u32 %r7922, 0;
mov.u32 %r7923, %r7922;
$L__BB3_1199:
setp.ne.s32 %p1378, %r8068, 0;
mov.u32 %r8078, %r8063;
@%p1378 bra $L__BB3_1196;
$L__BB3_1200:
and.b32 %r3072, %r7964, 8;
setp.eq.s32 %p1379, %r3072, 0;
mov.u32 %r8093, %r8078;
@%p1379 bra $L__BB3_1207;
shr.u32 %r6128, %r3000, 3;
sub.s32 %r8083, %r2886, %r6128;
setp.eq.s32 %p1380, %r8083, 0;
mov.u32 %r8093, %r8078;
@%p1380 bra $L__BB3_1207;
mov.u32 %r6129, -1;
shl.b32 %r6130, %r6129, %r8083;
not.b32 %r6131, %r6130;
and.b32 %r8084, %r7972, %r6131;
$L__BB3_1203:
setp.gt.u32 %p1381, %r7925, 17476;
mov.u32 %r8093, 1;
@%p1381 bra $L__BB3_1207;
sub.s32 %r6133, %r7924, %r7923;
min.u32 %r6134, %r6133, %r8083;
setp.eq.s32 %p1382, %r6134, 32;
mov.u32 %r6135, -1;
shl.b32 %r6136, %r6135, %r6134;
not.b32 %r6137, %r6136;
selp.b32 %r6138, -1, %r6137, %p1382;
and.b32 %r6139, %r6138, %r8084;
shl.b32 %r6140, %r6139, %r7923;
or.b32 %r7922, %r6140, %r7922;
add.s32 %r7923, %r6134, %r7923;
shr.u32 %r8084, %r8084, %r6134;
sub.s32 %r8083, %r8083, %r6134;
setp.lt.u32 %p1383, %r7923, %r7924;
@%p1383 bra $L__BB3_1206;
cvt.u64.u32 %rd648, %r7925;
add.s64 %rd649, %rd648, %rd5;
add.s64 %rd650, %rd1, %rd649;
st.global.u8 [%rd650], %r7922;
add.s32 %r7925, %r7925, 1;
setp.eq.s32 %p1384, %r7922, 255;
selp.b32 %r7924, 7, 8, %p1384;
mov.u32 %r7922, 0;
mov.u32 %r7923, %r7922;
$L__BB3_1206:
setp.ne.s32 %p1385, %r8083, 0;
mov.u32 %r8093, %r8078;
@%p1385 bra $L__BB3_1203;
$L__BB3_1207:
and.b32 %r6143, %r7961, 255;
and.b32 %r6144, %r7829, 255;
setp.lt.u32 %p1386, %r6143, %r6144;
cvt.u16.u32 %rs919, %r7961;
selp.b16 %rs920, %rs394, %rs919, %p1386;
st.shared.u8 [%r2835+1], %rs920;
ld.shared.u8 %rs1152, [%r2835+3];
setp.gt.u16 %p1387, %rs392, %rs1152;
add.s32 %r6145, %r2563, 3;
selp.b32 %r6146, %r7797, %r6145, %p1387;
add.s32 %r6148, %r4924, %r6146;
ld.shared.u8 %rs1154, [%r6148];
cvt.u16.u32 %rs1153, %r7971;
shr.u32 %r6149, %r3024, 1;
or.b32 %r6150, %r2839, %r6149;
st.shared.u8 [%r2835+2], %r7971;
st.shared.u8 [%r2837+1], %r6150;
ld.shared.u8 %rs1150, [%r2837+3];
mul.wide.u16 %r6151, %rs1150, 4;
add.s32 %r6152, %r6151, %r2838;
shr.u32 %r6153, %r3072, 3;
cvt.u16.u32 %rs1151, %r6153;
st.shared.u8 [%r2837+2], %r6153;
shr.u32 %r6154, %r3072, 2;
shr.u32 %r6155, %r3048, 1;
or.b32 %r6156, %r6154, %r6155;
or.b32 %r7798, %r6156, %r6152;
mul.lo.s32 %r6157, %r2626, 6;
setp.gt.s32 %p1388, %r2626, 0;
selp.b32 %r6158, %r6157, 0, %p1388;
cvt.u64.u32 %rd651, %r6158;
add.s64 %rd41, %rd34, %rd651;
ld.global.u8 %rs422, [%rd41+1];
add.s32 %r6159, %r6158, 2;
cvt.u64.u32 %rd652, %r6159;
add.s64 %rd653, %rd34, %rd652;
ld.global.u8 %rs423, [%rd653];
ld.global.u8 %rs424, [%rd653+1];
mul.lo.s32 %r6160, %r2887, 6;
setp.gt.s32 %p1389, %r2887, 0;
selp.b32 %r6161, %r6160, 0, %p1389;
cvt.u64.u32 %rd654, %r6161;
add.s64 %rd655, %rd34, %rd654;
ld.global.u8 %rs425, [%rd655];
ld.global.u8 %rs426, [%rd655+1];
add.s32 %r6162, %r6161, 2;
cvt.u64.u32 %rd656, %r6162;
add.s64 %rd657, %rd34, %rd656;
ld.global.u8 %rs427, [%rd657];
ld.global.u8 %rs428, [%rd657+1];
setp.eq.s16 %p1390, %rs422, 0;
mov.u32 %r8109, %r7987;
@%p1390 bra $L__BB3_1214;
ld.global.u8 %r8099, [%rd41];
cvt.u32.u16 %r8098, %rs422;
$L__BB3_1209:
mov.u32 %r3099, %r8098;
setp.gt.u32 %p1391, %r7779, 2879;
mov.u32 %r8109, 1;
@%p1391 bra $L__BB3_1214;
mov.u32 %r6164, 8;
sub.s32 %r6165, %r6164, %r7777;
sub.s32 %r6166, %r6165, %r7778;
min.u32 %r6167, %r6166, %r3099;
setp.eq.s32 %p1392, %r6167, 32;
mov.u32 %r6168, -1;
shl.b32 %r6169, %r6168, %r6167;
not.b32 %r6170, %r6169;
selp.b32 %r6171, -1, %r6170, %p1392;
and.b32 %r6172, %r6171, %r8099;
shl.b32 %r6173, %r6172, %r7778;
cvt.u16.u32 %rs921, %r6173;
or.b16 %rs1147, %rs1147, %rs921;
add.s32 %r7778, %r6167, %r7778;
sub.s32 %r8098, %r3099, %r6167;
shr.u32 %r8099, %r8099, %r6167;
setp.gt.u32 %p1393, %r6166, %r3099;
@%p1393 bra $L__BB3_1213;
setp.ne.s32 %p1394, %r7777, 0;
mov.u32 %r7777, 0;
and.b16 %rs922, %rs1147, 255;
setp.ne.s16 %p1395, %rs922, 127;
and.pred %p1396, %p1394, %p1395;
@%p1396 bra $L__BB3_1213;
mov.u32 %r6176, 20548;
sub.s32 %r6177, %r6176, %r7779;
cvt.u64.u32 %rd658, %r6177;
add.s64 %rd659, %rd658, %rd5;
add.s64 %rd660, %rd1, %rd659;
st.global.u8 [%rd660], %rs1147;
add.s32 %r7779, %r7779, 1;
setp.gt.u16 %p1397, %rs922, 143;
selp.u32 %r7777, 1, 0, %p1397;
mov.u32 %r7778, 0;
mov.u16 %rs1147, 0;
$L__BB3_1213:
setp.ne.s32 %p1398, %r8098, 0;
mov.u32 %r8109, %r7987;
@%p1398 bra $L__BB3_1209;
$L__BB3_1214:
setp.eq.s16 %p1399, %rs426, 0;
mov.u32 %r8121, %r8109;
@%p1399 bra $L__BB3_1221;
cvt.u32.u16 %r6178, %rs425;
and.b32 %r8111, %r6178, 255;
cvt.u32.u16 %r6179, %rs426;
and.b32 %r8110, %r6179, 255;
$L__BB3_1216:
mov.u32 %r3118, %r8110;
setp.gt.u32 %p1400, %r7779, 2879;
mov.u32 %r8121, 1;
@%p1400 bra $L__BB3_1221;
mov.u32 %r6181, 8;
sub.s32 %r6182, %r6181, %r7777;
sub.s32 %r6183, %r6182, %r7778;
min.u32 %r6184, %r6183, %r3118;
setp.eq.s32 %p1401, %r6184, 32;
mov.u32 %r6185, -1;
shl.b32 %r6186, %r6185, %r6184;
not.b32 %r6187, %r6186;
selp.b32 %r6188, -1, %r6187, %p1401;
and.b32 %r6189, %r6188, %r8111;
shl.b32 %r6190, %r6189, %r7778;
cvt.u16.u32 %rs926, %r6190;
or.b16 %rs1147, %rs1147, %rs926;
add.s32 %r7778, %r6184, %r7778;
sub.s32 %r8110, %r3118, %r6184;
shr.u32 %r8111, %r8111, %r6184;
setp.gt.u32 %p1402, %r6183, %r3118;
@%p1402 bra $L__BB3_1220;
setp.ne.s32 %p1403, %r7777, 0;
mov.u32 %r7777, 0;
and.b16 %rs927, %rs1147, 255;
setp.ne.s16 %p1404, %rs927, 127;
and.pred %p1405, %p1403, %p1404;
@%p1405 bra $L__BB3_1220;
mov.u32 %r6193, 20548;
sub.s32 %r6194, %r6193, %r7779;
cvt.u64.u32 %rd661, %r6194;
add.s64 %rd662, %rd661, %rd5;
add.s64 %rd663, %rd1, %rd662;
st.global.u8 [%rd663], %rs1147;
add.s32 %r7779, %r7779, 1;
setp.gt.u16 %p1406, %rs927, 143;
selp.u32 %r7777, 1, 0, %p1406;
mov.u32 %r7778, 0;
mov.u16 %rs1147, 0;
$L__BB3_1220:
setp.ne.s32 %p1407, %r8110, 0;
mov.u32 %r8121, %r8109;
@%p1407 bra $L__BB3_1216;
$L__BB3_1221:
setp.eq.s16 %p1408, %rs424, 0;
mov.u32 %r8133, %r8121;
@%p1408 bra $L__BB3_1228;
cvt.u32.u16 %r6195, %rs424;
and.b32 %r8122, %r6195, 255;
cvt.u32.u16 %r6196, %rs423;
and.b32 %r8123, %r6196, 255;
$L__BB3_1223:
mov.u32 %r3137, %r8122;
setp.gt.u32 %p1409, %r7779, 2879;
mov.u32 %r8133, 1;
@%p1409 bra $L__BB3_1228;
mov.u32 %r6198, 8;
sub.s32 %r6199, %r6198, %r7777;
sub.s32 %r6200, %r6199, %r7778;
min.u32 %r6201, %r6200, %r3137;
setp.eq.s32 %p1410, %r6201, 32;
mov.u32 %r6202, -1;
shl.b32 %r6203, %r6202, %r6201;
not.b32 %r6204, %r6203;
selp.b32 %r6205, -1, %r6204, %p1410;
and.b32 %r6206, %r6205, %r8123;
shl.b32 %r6207, %r6206, %r7778;
cvt.u16.u32 %rs931, %r6207;
or.b16 %rs1147, %rs1147, %rs931;
add.s32 %r7778, %r6201, %r7778;
sub.s32 %r8122, %r3137, %r6201;
shr.u32 %r8123, %r8123, %r6201;
setp.gt.u32 %p1411, %r6200, %r3137;
@%p1411 bra $L__BB3_1227;
setp.ne.s32 %p1412, %r7777, 0;
mov.u32 %r7777, 0;
and.b16 %rs932, %rs1147, 255;
setp.ne.s16 %p1413, %rs932, 127;
and.pred %p1414, %p1412, %p1413;
@%p1414 bra $L__BB3_1227;
mov.u32 %r6210, 20548;
sub.s32 %r6211, %r6210, %r7779;
cvt.u64.u32 %rd664, %r6211;
add.s64 %rd665, %rd664, %rd5;
add.s64 %rd666, %rd1, %rd665;
st.global.u8 [%rd666], %rs1147;
add.s32 %r7779, %r7779, 1;
setp.gt.u16 %p1415, %rs932, 143;
selp.u32 %r7777, 1, 0, %p1415;
mov.u32 %r7778, 0;
mov.u16 %rs1147, 0;
$L__BB3_1227:
setp.ne.s32 %p1416, %r8122, 0;
mov.u32 %r8133, %r8121;
@%p1416 bra $L__BB3_1223;
$L__BB3_1228:
setp.eq.s16 %p1417, %rs428, 0;
mov.u32 %r7776, %r8133;
@%p1417 bra $L__BB3_1235;
cvt.u32.u16 %r6212, %rs427;
and.b32 %r8135, %r6212, 255;
cvt.u32.u16 %r6213, %rs428;
and.b32 %r8134, %r6213, 255;
$L__BB3_1230:
mov.u32 %r3156, %r8134;
setp.gt.u32 %p1418, %r7779, 2879;
mov.u32 %r7776, 1;
@%p1418 bra $L__BB3_1235;
mov.u32 %r6215, 8;
sub.s32 %r6216, %r6215, %r7777;
sub.s32 %r6217, %r6216, %r7778;
min.u32 %r6218, %r6217, %r3156;
setp.eq.s32 %p1419, %r6218, 32;
mov.u32 %r6219, -1;
shl.b32 %r6220, %r6219, %r6218;
not.b32 %r6221, %r6220;
selp.b32 %r6222, -1, %r6221, %p1419;
and.b32 %r6223, %r6222, %r8135;
shl.b32 %r6224, %r6223, %r7778;
cvt.u16.u32 %rs936, %r6224;
or.b16 %rs1147, %rs1147, %rs936;
add.s32 %r7778, %r6218, %r7778;
sub.s32 %r8134, %r3156, %r6218;
shr.u32 %r8135, %r8135, %r6218;
setp.gt.u32 %p1420, %r6217, %r3156;
@%p1420 bra $L__BB3_1234;
setp.ne.s32 %p1421, %r7777, 0;
mov.u32 %r7777, 0;
and.b16 %rs937, %rs1147, 255;
setp.ne.s16 %p1422, %rs937, 127;
and.pred %p1423, %p1421, %p1422;
@%p1423 bra $L__BB3_1234;
mov.u32 %r6227, 20548;
sub.s32 %r6228, %r6227, %r7779;
cvt.u64.u32 %rd667, %r6228;
add.s64 %rd668, %rd667, %rd5;
add.s64 %rd669, %rd1, %rd668;
st.global.u8 [%rd669], %rs1147;
add.s32 %r7779, %r7779, 1;
setp.gt.u16 %p1424, %rs937, 143;
selp.u32 %r7777, 1, 0, %p1424;
mov.u32 %r7778, 0;
mov.u16 %rs1147, 0;
$L__BB3_1234:
setp.ne.s32 %p1425, %r8134, 0;
mov.u32 %r7776, %r8133;
@%p1425 bra $L__BB3_1230;
$L__BB3_1235:
add.s64 %rd689, %rd689, 16;
add.s32 %r7796, %r7796, 4;
setp.lt.u32 %p1426, %r7796, 64;
@%p1426 bra $L__BB3_1005;
add.s32 %r7780, %r7780, 2;
setp.lt.u32 %p1427, %r7780, 64;
add.s64 %rd688, %rd688, 1;
@%p1427 bra $L__BB3_1004;
setp.eq.s32 %p1428, %r7543, 0;
mov.u32 %r8146, %r7540;
@%p1428 bra $L__BB3_1241;
shl.b16 %rs940, %rs1089, 1;
or.b16 %rs1089, %rs940, 1;
add.s32 %r7388, %r7388, -1;
setp.ne.s32 %p1429, %r7388, 0;
mov.u32 %r8146, %r7540;
@%p1429 bra $L__BB3_1241;
setp.gt.u32 %p1430, %r7382, 191;
mov.u32 %r7388, 0;
mov.u32 %r8146, 1;
@%p1430 bra $L__BB3_1241;
add.s32 %r6231, %r7382, 17477;
cvt.u64.u32 %rd670, %r6231;
add.s64 %rd671, %rd670, %rd5;
add.s64 %rd672, %rd1, %rd671;
and.b16 %rs942, %rs1089, 255;
st.global.u8 [%rd672], %rs1089;
add.s32 %r7382, %r7382, 1;
setp.eq.s16 %p1431, %rs942, 255;
selp.b32 %r7388, 7, 8, %p1431;
mov.u16 %rs1089, 0;
mov.u32 %r8146, %r7540;
$L__BB3_1241:
cvt.u32.u16 %r6232, %rs1089;
and.b32 %r6233, %r6232, 255;
shl.b32 %r6234, %r6233, %r7388;
cvt.u16.u32 %rs447, %r6234;
mov.u32 %r6235, -1;
shl.b32 %r6236, %r6235, %r7778;
not.b32 %r6237, %r6236;
mov.u32 %r6238, 255;
and.b32 %r6239, %r6237, 255;
setp.eq.s32 %p1432, %r7778, 0;
selp.b32 %r3181, 0, %r6239, %p1432;
shl.b32 %r3182, %r6238, %r7388;
and.b32 %r6240, %r3182, 255;
or.b32 %r6241, %r6240, %r3181;
setp.eq.s32 %p1433, %r6241, 0;
mov.u32 %r8149, %r8146;
mov.u32 %r8151, %r7776;
@%p1433 bra $L__BB3_1247;
or.b16 %rs448, %rs1147, %rs447;
and.b16 %rs943, %rs448, 255;
xor.b16 %rs944, %rs448, %rs447;
cvt.u32.u16 %r6242, %rs944;
and.b32 %r6243, %r3182, %r6242;
and.b32 %r6244, %r6243, 255;
xor.b16 %rs945, %rs448, %rs1147;
cvt.u32.u16 %r6245, %rs945;
and.b32 %r6246, %r3181, %r6245;
or.b32 %r6247, %r6244, %r6246;
setp.eq.s32 %p1434, %r6247, 0;
setp.ne.s16 %p1435, %rs943, 255;
and.pred %p1436, %p1435, %p1434;
setp.gt.u32 %p1437, %r7779, 1;
and.pred %p1438, %p1437, %p1436;
add.s32 %r6248, %r7382, 17477;
cvt.u64.u32 %rd673, %r6248;
add.s64 %rd674, %rd673, %rd5;
add.s64 %rd44, %rd1, %rd674;
@%p1438 bra $L__BB3_1245;
bra.uni $L__BB3_1243;
$L__BB3_1245:
setp.gt.u32 %p1442, %r7382, 191;
mov.u32 %r8149, 1;
mov.u32 %r8151, %r7776;
@%p1442 bra $L__BB3_1247;
st.global.u8 [%rd44], %rs448;
add.s32 %r7382, %r7382, 1;
mov.u32 %r8149, %r8146;
mov.u32 %r8151, %r7776;
bra.uni $L__BB3_1247;
$L__BB3_667:
mov.u32 %r4833, 0;
st.global.u32 [%rd6], %r4833;
st.global.u32 [%rd6+4], %r4833;
st.global.u32 [%rd6+8], %r4833;
st.global.u32 [%rd6+12], %r4833;
st.global.u32 [%rd6+16], %r3202;
st.global.u32 [%rd6+20], %r4833;
st.global.u32 [%rd6+24], %r4833;
st.global.u32 [%rd6+28], %r4833;
bra.uni $L__BB3_1261;
$L__BB3_23:
mov.u32 %r6296, 0;
mov.u32 %r3253, 31;
sub.s32 %r27, %r3253, %r3202;
mov.u16 %rs452, 255;
st.global.u8 [%rd7], %rs452;
add.s32 %r3254, %r3200, 1;
shr.u32 %r28, %r3254, 1;
add.s32 %r3255, %r28, 2;
min.u32 %r29, %r3255, 513;
mov.u32 %r3256, -3;
sub.s32 %r3257, %r3256, %r28;
max.u32 %r3258, %r3257, -514;
mov.u32 %r3259, -2;
sub.s32 %r3260, %r3259, %r3258;
and.b32 %r6298, %r29, 3;
setp.lt.u32 %p33, %r3260, 3;
@%p33 bra $L__BB3_26;
sub.s32 %r6295, %r29, %r6298;
mov.u32 %r6296, 0;
$L__BB3_25:
mov.u32 %r3262, _ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val;
add.s32 %r3263, %r3262, %r6296;
mov.u16 %rs453, 0;
st.shared.u8 [%r3263], %rs453;
mov.u32 %r3264, _ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE14cleanup_cx_val;
add.s32 %r3265, %r3264, %r6296;
st.shared.u8 [%r3265], %rs453;
st.shared.u8 [%r3263+1], %rs453;
st.shared.u8 [%r3265+1], %rs453;
st.shared.u8 [%r3263+2], %rs453;
st.shared.u8 [%r3265+2], %rs453;
st.shared.u8 [%r3263+3], %rs453;
st.shared.u8 [%r3265+3], %rs453;
add.s32 %r6296, %r6296, 4;
add.s32 %r6295, %r6295, -4;
setp.ne.s32 %p34, %r6295, 0;
@%p34 bra $L__BB3_25;
$L__BB3_26:
setp.eq.s32 %p35, %r6298, 0;
@%p35 bra $L__BB3_29;
mov.u32 %r3266, _ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val;
mov.u32 %r3268, _ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE14cleanup_cx_val;
$L__BB3_28:
.pragma "nounroll";
add.s32 %r3267, %r3266, %r6296;
mov.u16 %rs454, 0;
st.shared.u8 [%r3267], %rs454;
add.s32 %r3269, %r3268, %r6296;
st.shared.u8 [%r3269], %rs454;
add.s32 %r6296, %r6296, 1;
add.s32 %r6298, %r6298, -1;
setp.ne.s32 %p36, %r6298, 0;
@%p36 bra $L__BB3_28;
$L__BB3_29:
mov.u32 %r7016, 0;
mov.u32 %r6829, 1;
mov.u16 %rs956, 0;
mov.u32 %r7017, 8;
mov.u16 %rs1025, 15;
mov.u32 %r6830, 4;
mov.u32 %r7018, %r7016;
mov.u32 %r7019, %r7016;
mov.u32 %r7050, %r7016;
mov.u32 %r6831, %r6829;
mov.u32 %r6832, %r7016;
mov.u32 %r6381, %r7016;
mov.u32 %r6390, %r7017;
mov.u32 %r6595, %r7016;
mov.u32 %r6596, %r7016;
mov.u32 %r6597, %r6829;
mov.u32 %r6598, %r7016;
@%p2 bra $L__BB3_397;
ld.param.u64 %rd682, [ j2k_htj2k_encode_codeblocks_multi_input_cleanup_param_2];
cvta.to.global.u64 %rd8, %rd682;
cvta.to.global.u64 %rd9, %rd48;
mov.u32 %r3302, 0;
mov.u32 %r6390, 8;
mov.u32 %r6597, 1;
mov.u32 %r6830, 4;
mov.u16 %rs1025, 15;
mov.u16 %rs956, 0;
mov.u32 %r6299, %r3302;
mov.u32 %r6598, %r3302;
mov.u32 %r6596, %r3302;
mov.u32 %r6595, %r3302;
mov.u32 %r6381, %r3302;
mov.u32 %r6832, %r3302;
mov.u32 %r6831, %r6597;
mov.u32 %r6829, %r6597;
mov.u32 %r7050, %r3302;
mov.u32 %r7019, %r3302;
mov.u32 %r7018, %r3302;
mov.u32 %r7017, %r6390;
mov.u32 %r7016, %r3302;
mov.u32 %r6315, %r3302;
mov.u32 %r6316, %r3302;
bra.uni $L__BB3_31;
$L__BB3_273:
setp.gt.u32 %p304, %r6381, 191;
mov.u32 %r6694, 1;
mov.u32 %r6390, 0;
@%p304 bra $L__BB3_275;
st.global.u8 [%rd14], %rs956;
add.s32 %r6381, %r6381, 1;
mov.u16 %rs956, 0;
mov.u32 %r6390, 8;
mov.u32 %r6694, %r6598;
bra.uni $L__BB3_275;
$L__BB3_197:
setp.gt.u32 %p217, %r6381, 191;
mov.u32 %r6584, 1;
mov.u32 %r6390, 0;
@%p217 bra $L__BB3_199;
st.global.u8 [%rd13], %rs956;
add.s32 %r6381, %r6381, 1;
mov.u16 %rs956, 0;
mov.u32 %r6390, 8;
mov.u32 %r6584, %r6598;
bra.uni $L__BB3_199;
$L__BB3_312:
setp.gt.u32 %p351, %r6381, 191;
mov.u32 %r6701, 1;
mov.u32 %r6390, 0;
@%p351 bra $L__BB3_314;
and.b16 %rs561, %rs956, 255;
st.global.u8 [%rd14], %rs956;
add.s32 %r6381, %r6381, 1;
setp.eq.s16 %p352, %rs561, 255;
selp.b32 %r6390, 7, 8, %p352;
mov.u16 %rs956, 0;
mov.u32 %r6701, %r6598;
bra.uni $L__BB3_314;
$L__BB3_236:
setp.gt.u32 %p264, %r6381, 191;
mov.u32 %r6591, 1;
mov.u32 %r6390, 0;
@%p264 bra $L__BB3_238;
and.b16 %rs541, %rs956, 255;
st.global.u8 [%rd13], %rs956;
add.s32 %r6381, %r6381, 1;
setp.eq.s16 %p265, %rs541, 255;
selp.b32 %r6390, 7, 8, %p265;
mov.u16 %rs956, 0;
mov.u32 %r6591, %r6598;
bra.uni $L__BB3_238;
$L__BB3_31:
cvt.u64.u32 %rd67, %r6316;
add.s64 %rd68, %rd67, %rd4;
shl.b64 %rd69, %rd68, 2;
add.s64 %rd70, %rd3, %rd69;
ld.global.u32 %r59, [%rd70];
setp.eq.s32 %p38, %r59, 0;
mov.u32 %r6317, %r3302;
@%p38 bra $L__BB3_33;
and.b32 %r3304, %r59, -2147483648;
abs.s32 %r3305, %r59;
shl.b32 %r3306, %r3305, %r27;
or.b32 %r6317, %r3306, %r3304;
$L__BB3_33:
shl.b32 %r3310, %r6317, 1;
shr.u32 %r3311, %r3310, %r27;
and.b32 %r62, %r3311, -2;
setp.eq.s32 %p39, %r62, 0;
mov.u32 %r6321, 0;
mov.u32 %r6318, %r6321;
mov.u32 %r6319, %r6321;
mov.u32 %r6325, %r6321;
@%p39 bra $L__BB3_35;
add.s32 %r3313, %r62, -1;
clz.b32 %r3314, %r3313;
mov.u32 %r3315, 32;
sub.s32 %r6318, %r3315, %r3314;
shr.u32 %r3316, %r6317, 31;
add.s32 %r3317, %r3316, %r62;
add.s32 %r6319, %r3317, -2;
mov.u32 %r6325, 1;
$L__BB3_35:
setp.lt.u32 %p40, %r3201, 2;
@%p40 bra $L__BB3_38;
add.s32 %r3320, %r6316, %r3198;
cvt.u64.u32 %rd71, %r3320;
add.s64 %rd72, %rd71, %rd4;
shl.b64 %rd73, %rd72, 2;
add.s64 %rd74, %rd3, %rd73;
ld.global.u32 %r68, [%rd74];
setp.eq.s32 %p41, %r68, 0;
@%p41 bra $L__BB3_38;
and.b32 %r3321, %r68, -2147483648;
abs.s32 %r3322, %r68;
shl.b32 %r3323, %r3322, %r27;
or.b32 %r6321, %r3323, %r3321;
$L__BB3_38:
shl.b32 %r3326, %r6321, 1;
shr.u32 %r3327, %r3326, %r27;
and.b32 %r71, %r3327, -2;
setp.eq.s32 %p42, %r71, 0;
mov.u32 %r6336, 0;
mov.u32 %r6322, %r6336;
mov.u32 %r6323, %r6336;
mov.u32 %r6341, %r6318;
@%p42 bra $L__BB3_40;
or.b32 %r6325, %r6325, 2;
add.s32 %r3328, %r71, -1;
clz.b32 %r3329, %r3328;
mov.u32 %r3330, 32;
sub.s32 %r6322, %r3330, %r3329;
max.s32 %r6341, %r6318, %r6322;
shr.u32 %r3331, %r6321, 31;
add.s32 %r3332, %r3331, %r71;
add.s32 %r6323, %r3332, -2;
$L__BB3_40:
add.s32 %r6340, %r6316, 1;
add.s32 %r3337, %r6299, 1;
setp.ge.u32 %p43, %r3337, %r3200;
mov.u32 %r6337, %r6336;
mov.u32 %r6338, %r6336;
mov.u32 %r6339, %r6336;
@%p43 bra $L__BB3_51;
cvt.u64.u32 %rd75, %r6340;
add.s64 %rd76, %rd75, %rd4;
shl.b64 %rd77, %rd76, 2;
add.s64 %rd78, %rd3, %rd77;
ld.global.u32 %r81, [%rd78];
setp.eq.s32 %p44, %r81, 0;
mov.u32 %r6337, 0;
mov.u32 %r6326, %r6337;
@%p44 bra $L__BB3_43;
and.b32 %r3339, %r81, -2147483648;
abs.s32 %r3340, %r81;
shl.b32 %r3341, %r3340, %r27;
or.b32 %r6326, %r3341, %r3339;
$L__BB3_43:
shl.b32 %r3344, %r6326, 1;
shr.u32 %r3345, %r3344, %r27;
and.b32 %r84, %r3345, -2;
setp.eq.s32 %p45, %r84, 0;
mov.u32 %r6339, %r6337;
@%p45 bra $L__BB3_45;
or.b32 %r6325, %r6325, 4;
add.s32 %r3346, %r84, -1;
clz.b32 %r3347, %r3346;
mov.u32 %r3348, 32;
sub.s32 %r6337, %r3348, %r3347;
max.s32 %r6341, %r6341, %r6337;
shr.u32 %r3349, %r6326, 31;
add.s32 %r3350, %r3349, %r84;
add.s32 %r6339, %r3350, -2;
$L__BB3_45:
mov.u32 %r6336, 0;
mov.u32 %r6331, %r6336;
@%p40 bra $L__BB3_48;
add.s32 %r3353, %r6340, %r3198;
cvt.u64.u32 %rd79, %r3353;
add.s64 %rd80, %rd79, %rd4;
shl.b64 %rd81, %rd80, 2;
add.s64 %rd82, %rd3, %rd81;
ld.global.u32 %r93, [%rd82];
setp.eq.s32 %p47, %r93, 0;
@%p47 bra $L__BB3_48;
and.b32 %r3354, %r93, -2147483648;
abs.s32 %r3355, %r93;
shl.b32 %r3356, %r3355, %r27;
or.b32 %r6331, %r3356, %r3354;
$L__BB3_48:
shl.b32 %r3359, %r6331, 1;
shr.u32 %r3360, %r3359, %r27;
and.b32 %r96, %r3360, -2;
setp.eq.s32 %p48, %r96, 0;
mov.u32 %r6338, %r6336;
@%p48 bra $L__BB3_50;
or.b32 %r6325, %r6325, 8;
add.s32 %r3361, %r96, -1;
clz.b32 %r3362, %r3361;
mov.u32 %r3363, 32;
sub.s32 %r6336, %r3363, %r3362;
max.s32 %r6341, %r6341, %r6336;
shr.u32 %r3364, %r6331, 31;
add.s32 %r3365, %r3364, %r96;
add.s32 %r6338, %r3365, -2;
$L__BB3_50:
add.s32 %r6340, %r6316, 2;
$L__BB3_51:
mov.u32 %r6316, %r6340;
add.s32 %r3367, %r6341, -1;
setp.lt.s32 %p49, %r6341, 2;
setp.gt.s32 %p50, %r6341, 1;
selp.b32 %r113, %r3367, 0, %p50;
mov.u32 %r6343, 0;
@%p49 bra $L__BB3_53;
setp.eq.s32 %p51, %r6318, %r6341;
selp.u32 %r3368, 1, 0, %p51;
setp.eq.s32 %p52, %r6322, %r6341;
selp.u32 %r3369, -1, 0, %p52;
bfi.b32 %r3370, %r3369, %r3368, 1, 1;
setp.eq.s32 %p53, %r6337, %r6341;
selp.u16 %rs459, 1, 0, %p53;
mul.wide.u16 %r3371, %rs459, 4;
or.b32 %r3372, %r3370, %r3371;
setp.eq.s32 %p54, %r6336, %r6341;
selp.u16 %rs460, 1, 0, %p54;
mul.wide.u16 %r3373, %rs460, 8;
or.b32 %r6343, %r3372, %r3373;
$L__BB3_53:
shr.u32 %r3374, %r6299, 1;
mov.u32 %r3375, _ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val;
add.s32 %r116, %r3375, %r3374;
ld.shared.u8 %rs461, [%r116];
cvt.u32.u16 %r3376, %rs461;
and.b32 %r3377, %r3376, 255;
and.b32 %r3378, %r6322, 255;
setp.lt.u32 %p55, %r3378, %r3377;
cvt.u16.u32 %rs462, %r6322;
selp.b16 %rs463, %rs461, %rs462, %p55;
st.shared.u8 [%r116], %rs463;
cvt.u16.u32 %rs3, %r6336;
st.shared.u8 [%r116+1], %rs3;
and.b32 %r117, %r6325, 2;
cvt.u16.u32 %rs464, %r117;
shr.u16 %rs465, %rs464, 1;
mov.u32 %r3379, _ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE14cleanup_cx_val;
add.s32 %r118, %r3379, %r3374;
ld.shared.u8 %rs466, [%r118];
or.b16 %rs467, %rs466, %rs465;
st.shared.u8 [%r118], %rs467;
and.b32 %r119, %r6325, 8;
shr.u32 %r120, %r119, 3;
st.shared.u8 [%r118+1], %r120;
shl.b32 %r3380, %r6325, 4;
shl.b32 %r3381, %r6315, 8;
or.b32 %r3382, %r3380, %r3381;
or.b32 %r3383, %r3382, %r6343;
mul.wide.u32 %rd83, %r3383, 2;
add.s64 %rd84, %rd8, %rd83;
ld.global.u16 %rs4, [%rd84];
shr.u16 %rs468, %rs4, 4;
and.b16 %rs5, %rs468, 7;
setp.eq.s16 %p56, %rs5, 0;
mov.u32 %r6355, %r6832;
@%p56 bra $L__BB3_60;
cvt.u32.u16 %r6344, %rs5;
shr.u16 %rs469, %rs4, 8;
cvt.u32.u16 %r6345, %rs469;
$L__BB3_55:
mov.u32 %r123, %r6344;
setp.gt.u32 %p57, %r6829, 2879;
mov.u32 %r6355, 1;
@%p57 bra $L__BB3_60;
mov.u32 %r3385, 8;
sub.s32 %r3386, %r3385, %r6831;
sub.s32 %r3387, %r3386, %r6830;
min.u32 %r3388, %r3387, %r123;
setp.eq.s32 %p58, %r3388, 32;
mov.u32 %r3389, -1;
shl.b32 %r3390, %r3389, %r3388;
not.b32 %r3391, %r3390;
selp.b32 %r3392, -1, %r3391, %p58;
and.b32 %r3393, %r3392, %r6345;
shl.b32 %r3394, %r3393, %r6830;
cvt.u16.u32 %rs470, %r3394;
or.b16 %rs1025, %rs1025, %rs470;
add.s32 %r6830, %r3388, %r6830;
sub.s32 %r6344, %r123, %r3388;
shr.u32 %r6345, %r6345, %r3388;
setp.gt.u32 %p59, %r3387, %r123;
@%p59 bra $L__BB3_59;
setp.ne.s32 %p60, %r6831, 0;
mov.u32 %r6831, 0;
and.b16 %rs471, %rs1025, 255;
setp.ne.s16 %p61, %rs471, 127;
and.pred %p62, %p60, %p61;
@%p62 bra $L__BB3_59;
mov.u32 %r3397, 20548;
sub.s32 %r3398, %r3397, %r6829;
cvt.u64.u32 %rd85, %r3398;
add.s64 %rd86, %rd85, %rd5;
add.s64 %rd87, %rd1, %rd86;
st.global.u8 [%rd87], %rs1025;
add.s32 %r6829, %r6829, 1;
setp.gt.u16 %p63, %rs471, 143;
selp.u32 %r6831, 1, 0, %p63;
mov.u16 %rs1025, 0;
mov.u32 %r6830, 0;
$L__BB3_59:
setp.ne.s32 %p64, %r6344, 0;
mov.u32 %r6355, %r6832;
@%p64 bra $L__BB3_55;
$L__BB3_60:
setp.ne.s32 %p65, %r6315, 0;
@%p65 bra $L__BB3_108;
setp.eq.s32 %p66, %r6325, 0;
add.s32 %r3399, %r6381, 17477;
cvt.u64.u32 %rd88, %r3399;
add.s64 %rd89, %rd88, %rd5;
add.s64 %rd10, %rd1, %rd89;
@%p66 bra $L__BB3_100;
shl.b16 %rs956, %rs956, 1;
add.s32 %r6390, %r6390, -1;
setp.ne.s32 %p67, %r6390, 0;
mov.u32 %r6391, %r6598;
@%p67 bra $L__BB3_65;
bra.uni $L__BB3_63;
$L__BB3_65:
setp.lt.u32 %p69, %r6596, 3;
mov.u32 %r6359, 0;
@%p69 bra $L__BB3_68;
setp.lt.u32 %p70, %r6596, 6;
mov.u32 %r6359, 1;
@%p70 bra $L__BB3_68;
setp.lt.u32 %p71, %r6596, 9;
setp.eq.s32 %p72, %r6596, 11;
selp.b32 %r3405, 4, 5, %p72;
setp.lt.u32 %p73, %r6596, 11;
selp.b32 %r3406, 3, %r3405, %p73;
selp.b32 %r6359, 2, %r3406, %p71;
$L__BB3_68:
setp.eq.s32 %p74, %r6359, 0;
@%p74 bra $L__BB3_96;
add.s32 %r147, %r6359, -1;
and.b32 %r148, %r6359, 3;
setp.eq.s32 %p75, %r148, 0;
mov.u32 %r6369, %r6359;
mov.u32 %r6370, %r6391;
@%p75 bra $L__BB3_81;
mov.u32 %r3408, 1;
shl.b32 %r3409, %r3408, %r147;
and.b32 %r3410, %r3409, %r6595;
setp.ne.s32 %p76, %r3410, 0;
selp.u32 %r3411, 1, 0, %p76;
cvt.u32.u16 %r3412, %rs956;
bfi.b32 %r3413, %r3412, %r3411, 1, 8;
cvt.u16.u32 %rs956, %r3413;
add.s32 %r6390, %r6390, -1;
setp.ne.s32 %p77, %r6390, 0;
mov.u32 %r6370, %r6391;
@%p77 bra $L__BB3_73;
setp.gt.u32 %p78, %r6381, 191;
mov.u32 %r6390, 0;
mov.u32 %r6370, %r3408;
@%p78 bra $L__BB3_73;
add.s32 %r3417, %r6381, 17477;
cvt.u64.u32 %rd90, %r3417;
add.s64 %rd91, %rd90, %rd5;
add.s64 %rd92, %rd1, %rd91;
st.global.u8 [%rd92], %rs956;
add.s32 %r6381, %r6381, 1;
mov.u16 %rs956, 0;
mov.u32 %r6390, 8;
mov.u32 %r6370, %r6391;
$L__BB3_73:
setp.eq.s32 %p79, %r148, 1;
mov.u32 %r6391, %r6370;
mov.u32 %r6369, %r147;
@%p79 bra $L__BB3_81;
add.s32 %r6369, %r6359, -2;
mov.u32 %r3418, 1;
shl.b32 %r3419, %r3418, %r6369;
and.b32 %r3420, %r3419, %r6595;
setp.ne.s32 %p80, %r3420, 0;
selp.u32 %r3421, 1, 0, %p80;
cvt.u32.u16 %r3422, %rs956;
bfi.b32 %r3423, %r3422, %r3421, 1, 8;
cvt.u16.u32 %rs956, %r3423;
add.s32 %r6390, %r6390, -1;
setp.ne.s32 %p81, %r6390, 0;
mov.u32 %r6365, %r6370;
@%p81 bra $L__BB3_77;
setp.gt.u32 %p82, %r6381, 191;
mov.u32 %r6390, 0;
mov.u32 %r6365, %r3418;
@%p82 bra $L__BB3_77;
add.s32 %r3426, %r6381, 17477;
cvt.u64.u32 %rd93, %r3426;
add.s64 %rd94, %rd93, %rd5;
add.s64 %rd95, %rd1, %rd94;
and.b16 %rs478, %rs956, 255;
st.global.u8 [%rd95], %rs956;
add.s32 %r6381, %r6381, 1;
setp.eq.s16 %p83, %rs478, 255;
selp.b32 %r6390, 7, 8, %p83;
mov.u16 %rs956, 0;
mov.u32 %r6365, %r6370;
$L__BB3_77:
setp.eq.s32 %p84, %r148, 2;
mov.u32 %r6391, %r6365;
mov.u32 %r6370, %r6365;
@%p84 bra $L__BB3_81;
add.s32 %r6369, %r6359, -3;
mov.u32 %r3427, 1;
shl.b32 %r3428, %r3427, %r6369;
and.b32 %r3429, %r3428, %r6595;
setp.ne.s32 %p85, %r3429, 0;
selp.u32 %r3430, 1, 0, %p85;
cvt.u32.u16 %r3431, %rs956;
bfi.b32 %r3432, %r3431, %r3430, 1, 8;
cvt.u16.u32 %rs956, %r3432;
add.s32 %r6390, %r6390, -1;
setp.ne.s32 %p86, %r6390, 0;
mov.u32 %r6391, %r6365;
mov.u32 %r6370, %r6365;
@%p86 bra $L__BB3_81;
setp.gt.u32 %p87, %r6381, 191;
mov.u32 %r6390, 0;
mov.u32 %r6391, %r3427;
mov.u32 %r6370, %r3427;
@%p87 bra $L__BB3_81;
add.s32 %r3437, %r6381, 17477;
cvt.u64.u32 %rd96, %r3437;
add.s64 %rd97, %rd96, %rd5;
add.s64 %rd98, %rd1, %rd97;
and.b16 %rs481, %rs956, 255;
st.global.u8 [%rd98], %rs956;
add.s32 %r6381, %r6381, 1;
setp.eq.s16 %p88, %rs481, 255;
selp.b32 %r6390, 7, 8, %p88;
mov.u16 %rs956, 0;
mov.u32 %r6391, %r6365;
mov.u32 %r6370, %r6365;
$L__BB3_81:
setp.lt.u32 %p89, %r147, 3;
@%p89 bra $L__BB3_96;
mov.u32 %r6391, %r6370;
$L__BB3_83:
add.s32 %r3438, %r6369, -1;
mov.u32 %r3439, 1;
shl.b32 %r3440, %r3439, %r3438;
and.b32 %r3441, %r3440, %r6595;
setp.ne.s32 %p90, %r3441, 0;
selp.u32 %r3442, 1, 0, %p90;
cvt.u32.u16 %r3443, %rs956;
bfi.b32 %r6379, %r3443, %r3442, 1, 8;
add.s32 %r6378, %r6390, -1;
setp.ne.s32 %p91, %r6378, 0;
mov.u32 %r6380, %r6391;
@%p91 bra $L__BB3_86;
setp.gt.u32 %p92, %r6381, 191;
mov.u32 %r6378, 0;
mov.u32 %r6380, %r3439;
@%p92 bra $L__BB3_86;
cvt.u16.u32 %rs482, %r6379;
and.b16 %rs483, %rs482, 255;
add.s32 %r3447, %r6381, 17477;
cvt.u64.u32 %rd99, %r3447;
add.s64 %rd100, %rd99, %rd5;
add.s64 %rd101, %rd1, %rd100;
st.global.u8 [%rd101], %rs482;
add.s32 %r6381, %r6381, 1;
setp.eq.s16 %p93, %rs483, 255;
selp.b32 %r6378, 7, 8, %p93;
mov.u32 %r6379, 0;
mov.u32 %r6380, %r6391;
$L__BB3_86:
add.s32 %r3448, %r6369, -2;
shl.b32 %r3450, %r3439, %r3448;
and.b32 %r3451, %r3450, %r6595;
setp.ne.s32 %p94, %r3451, 0;
and.b32 %r3452, %r6379, 127;
selp.u32 %r3453, 1, 0, %p94;
bfi.b32 %r6383, %r3452, %r3453, 1, 7;
add.s32 %r6382, %r6378, -1;
setp.ne.s32 %p95, %r6382, 0;
mov.u32 %r6384, %r6380;
@%p95 bra $L__BB3_89;
setp.gt.u32 %p96, %r6381, 191;
mov.u32 %r6384, 1;
mov.u32 %r6382, 0;
@%p96 bra $L__BB3_89;
cvt.u16.u32 %rs484, %r6383;
and.b16 %rs485, %rs484, 255;
add.s32 %r3457, %r6381, 17477;
cvt.u64.u32 %rd102, %r3457;
add.s64 %rd103, %rd102, %rd5;
add.s64 %rd104, %rd1, %rd103;
st.global.u8 [%rd104], %rs484;
add.s32 %r6381, %r6381, 1;
setp.eq.s16 %p97, %rs485, 255;
selp.b32 %r6382, 7, 8, %p97;
mov.u32 %r6383, 0;
mov.u32 %r6384, %r6380;
$L__BB3_89:
add.s32 %r3458, %r6369, -3;
mov.u32 %r3459, 1;
shl.b32 %r3460, %r3459, %r3458;
and.b32 %r3461, %r3460, %r6595;
setp.ne.s32 %p98, %r3461, 0;
and.b32 %r3462, %r6383, 127;
selp.u32 %r3463, 1, 0, %p98;
bfi.b32 %r6387, %r3462, %r3463, 1, 7;
add.s32 %r6386, %r6382, -1;
setp.ne.s32 %p99, %r6386, 0;
mov.u32 %r6388, %r6384;
@%p99 bra $L__BB3_92;
setp.gt.u32 %p100, %r6381, 191;
mov.u32 %r6386, 0;
mov.u32 %r6388, %r3459;
@%p100 bra $L__BB3_92;
cvt.u16.u32 %rs486, %r6387;
and.b16 %rs487, %rs486, 255;
add.s32 %r3467, %r6381, 17477;
cvt.u64.u32 %rd105, %r3467;
add.s64 %rd106, %rd105, %rd5;
add.s64 %rd107, %rd1, %rd106;
st.global.u8 [%rd107], %rs486;
add.s32 %r6381, %r6381, 1;
setp.eq.s16 %p101, %rs487, 255;
selp.b32 %r6386, 7, 8, %p101;
mov.u32 %r6387, 0;
mov.u32 %r6388, %r6384;
$L__BB3_92:
add.s32 %r6369, %r6369, -4;
shl.b32 %r3469, %r3459, %r6369;
and.b32 %r3470, %r3469, %r6595;
setp.ne.s32 %p102, %r3470, 0;
and.b32 %r3471, %r6387, 127;
selp.u32 %r3472, 1, 0, %p102;
bfi.b32 %r3473, %r3471, %r3472, 1, 15;
cvt.u16.u32 %rs956, %r3473;
add.s32 %r6390, %r6386, -1;
setp.ne.s32 %p103, %r6390, 0;
mov.u32 %r6391, %r6388;
@%p103 bra $L__BB3_95;
setp.gt.u32 %p104, %r6381, 191;
mov.u32 %r6391, 1;
mov.u32 %r6390, 0;
@%p104 bra $L__BB3_95;
add.s32 %r3476, %r6381, 17477;
cvt.u64.u32 %rd108, %r3476;
add.s64 %rd109, %rd108, %rd5;
add.s64 %rd110, %rd1, %rd109;
and.b16 %rs489, %rs956, 255;
st.global.u8 [%rd110], %rs956;
add.s32 %r6381, %r6381, 1;
setp.eq.s16 %p105, %rs489, 255;
selp.b32 %r6390, 7, 8, %p105;
mov.u16 %rs956, 0;
mov.u32 %r6391, %r6388;
$L__BB3_95:
setp.ne.s32 %p106, %r6369, 0;
@%p106 bra $L__BB3_83;
$L__BB3_96:
add.s32 %r3478, %r6596, -1;
setp.eq.s32 %p107, %r6596, 0;
mov.u32 %r6595, 0;
selp.b32 %r6596, 0, %r3478, %p107;
setp.lt.u32 %p108, %r6596, 3;
mov.u32 %r6395, %r6595;
@%p108 bra $L__BB3_99;
setp.lt.u32 %p109, %r6596, 6;
mov.u32 %r6395, 1;
@%p109 bra $L__BB3_99;
setp.lt.u32 %p110, %r6596, 9;
setp.eq.s32 %p111, %r6596, 11;
selp.b32 %r3480, 4, 5, %p111;
setp.lt.u32 %p112, %r6596, 11;
selp.b32 %r3481, 3, %r3480, %p112;
selp.b32 %r6395, 2, %r3481, %p110;
$L__BB3_99:
mov.u32 %r3483, 1;
shl.b32 %r6597, %r3483, %r6395;
mov.u32 %r6598, %r6391;
bra.uni $L__BB3_108;
$L__BB3_100:
add.s32 %r6595, %r6595, 1;
setp.lt.u32 %p113, %r6595, %r6597;
@%p113 bra $L__BB3_108;
shl.b16 %rs490, %rs956, 1;
or.b16 %rs956, %rs490, 1;
add.s32 %r6390, %r6390, -1;
setp.ne.s32 %p114, %r6390, 0;
mov.u32 %r6398, %r6598;
@%p114 bra $L__BB3_104;
setp.gt.u32 %p115, %r6381, 191;
mov.u32 %r6398, 1;
mov.u32 %r6390, 0;
@%p115 bra $L__BB3_104;
and.b16 %rs492, %rs956, 255;
st.global.u8 [%rd10], %rs956;
add.s32 %r6381, %r6381, 1;
setp.eq.s16 %p116, %rs492, 255;
selp.b32 %r6390, 7, 8, %p116;
mov.u16 %rs956, 0;
mov.u32 %r6398, %r6598;
$L__BB3_104:
add.s32 %r3487, %r6596, 1;
min.u32 %r6596, %r3487, 12;
setp.lt.u32 %p117, %r6596, 3;
mov.u32 %r6595, 0;
mov.u32 %r6399, %r6595;
@%p117 bra $L__BB3_107;
setp.lt.u32 %p118, %r6596, 6;
mov.u32 %r6399, 1;
@%p118 bra $L__BB3_107;
setp.lt.u32 %p119, %r6596, 9;
setp.eq.s32 %p120, %r6596, 11;
selp.b32 %r3489, 4, 5, %p120;
setp.lt.u32 %p121, %r6596, 11;
selp.b32 %r3490, 3, %r3489, %p121;
selp.b32 %r6399, 2, %r3490, %p119;
$L__BB3_107:
mov.u32 %r3492, 1;
shl.b32 %r6597, %r3492, %r6399;
mov.u32 %r6598, %r6398;
$L__BB3_108:
max.s32 %r231, %r6341, 1;
and.b16 %rs493, %rs4, 15;
cvt.u32.u16 %r232, %rs493;
and.b32 %r233, %r6325, 1;
setp.eq.s32 %p122, %r233, 0;
mov.u32 %r6420, %r7050;
@%p122 bra $L__BB3_115;
and.b32 %r3493, %r232, 1;
sub.s32 %r6406, %r231, %r3493;
setp.eq.s32 %p123, %r6406, 0;
mov.u32 %r6420, %r7050;
@%p123 bra $L__BB3_115;
mov.u32 %r3494, -1;
shl.b32 %r3495, %r3494, %r6406;
not.b32 %r3496, %r3495;
and.b32 %r6407, %r6319, %r3496;
$L__BB3_111:
setp.gt.u32 %p124, %r7016, 17476;
mov.u32 %r6420, 1;
@%p124 bra $L__BB3_115;
sub.s32 %r3498, %r7017, %r7018;
min.u32 %r3499, %r3498, %r6406;
setp.eq.s32 %p125, %r3499, 32;
mov.u32 %r3500, -1;
shl.b32 %r3501, %r3500, %r3499;
not.b32 %r3502, %r3501;
selp.b32 %r3503, -1, %r3502, %p125;
and.b32 %r3504, %r3503, %r6407;
shl.b32 %r3505, %r3504, %r7018;
or.b32 %r7019, %r3505, %r7019;
add.s32 %r7018, %r3499, %r7018;
shr.u32 %r6407, %r6407, %r3499;
sub.s32 %r6406, %r6406, %r3499;
setp.lt.u32 %p126, %r7018, %r7017;
@%p126 bra $L__BB3_114;
cvt.u64.u32 %rd111, %r7016;
add.s64 %rd112, %rd111, %rd5;
add.s64 %rd113, %rd1, %rd112;
st.global.u8 [%rd113], %r7019;
add.s32 %r7016, %r7016, 1;
setp.eq.s32 %p127, %r7019, 255;
selp.b32 %r7017, 7, 8, %p127;
mov.u32 %r7018, 0;
mov.u32 %r7019, %r7018;
$L__BB3_114:
setp.ne.s32 %p128, %r6406, 0;
mov.u32 %r6420, %r7050;
@%p128 bra $L__BB3_111;
$L__BB3_115:
setp.eq.s32 %p129, %r117, 0;
mov.u32 %r6435, %r6420;
@%p129 bra $L__BB3_122;
shr.u32 %r3508, %r232, 1;
and.b32 %r3509, %r3508, 1;
sub.s32 %r6421, %r231, %r3509;
setp.eq.s32 %p130, %r6421, 0;
mov.u32 %r6435, %r6420;
@%p130 bra $L__BB3_122;
mov.u32 %r3510, -1;
shl.b32 %r3511, %r3510, %r6421;
not.b32 %r3512, %r3511;
and.b32 %r6422, %r6323, %r3512;
$L__BB3_118:
setp.gt.u32 %p131, %r7016, 17476;
mov.u32 %r6435, 1;
@%p131 bra $L__BB3_122;
sub.s32 %r3514, %r7017, %r7018;
min.u32 %r3515, %r3514, %r6421;
setp.eq.s32 %p132, %r3515, 32;
mov.u32 %r3516, -1;
shl.b32 %r3517, %r3516, %r3515;
not.b32 %r3518, %r3517;
selp.b32 %r3519, -1, %r3518, %p132;
and.b32 %r3520, %r3519, %r6422;
shl.b32 %r3521, %r3520, %r7018;
or.b32 %r7019, %r3521, %r7019;
add.s32 %r7018, %r3515, %r7018;
shr.u32 %r6422, %r6422, %r3515;
sub.s32 %r6421, %r6421, %r3515;
setp.lt.u32 %p133, %r7018, %r7017;
@%p133 bra $L__BB3_121;
cvt.u64.u32 %rd114, %r7016;
add.s64 %rd115, %rd114, %rd5;
add.s64 %rd116, %rd1, %rd115;
st.global.u8 [%rd116], %r7019;
add.s32 %r7016, %r7016, 1;
setp.eq.s32 %p134, %r7019, 255;
selp.b32 %r7017, 7, 8, %p134;
mov.u32 %r7018, 0;
mov.u32 %r7019, %r7018;
$L__BB3_121:
setp.ne.s32 %p135, %r6421, 0;
mov.u32 %r6435, %r6420;
@%p135 bra $L__BB3_118;
$L__BB3_122:
and.b32 %r3524, %r6325, 4;
setp.eq.s32 %p136, %r3524, 0;
mov.u32 %r6450, %r6435;
@%p136 bra $L__BB3_129;
shr.u32 %r3525, %r232, 2;
and.b32 %r3526, %r3525, 1;
sub.s32 %r6436, %r231, %r3526;
setp.eq.s32 %p137, %r6436, 0;
mov.u32 %r6450, %r6435;
@%p137 bra $L__BB3_129;
mov.u32 %r3527, -1;
shl.b32 %r3528, %r3527, %r6436;
not.b32 %r3529, %r3528;
and.b32 %r6437, %r6339, %r3529;
$L__BB3_125:
setp.gt.u32 %p138, %r7016, 17476;
mov.u32 %r6450, 1;
@%p138 bra $L__BB3_129;
sub.s32 %r3531, %r7017, %r7018;
min.u32 %r3532, %r3531, %r6436;
setp.eq.s32 %p139, %r3532, 32;
mov.u32 %r3533, -1;
shl.b32 %r3534, %r3533, %r3532;
not.b32 %r3535, %r3534;
selp.b32 %r3536, -1, %r3535, %p139;
and.b32 %r3537, %r3536, %r6437;
shl.b32 %r3538, %r3537, %r7018;
or.b32 %r7019, %r3538, %r7019;
add.s32 %r7018, %r3532, %r7018;
shr.u32 %r6437, %r6437, %r3532;
sub.s32 %r6436, %r6436, %r3532;
setp.lt.u32 %p140, %r7018, %r7017;
@%p140 bra $L__BB3_128;
cvt.u64.u32 %rd117, %r7016;
add.s64 %rd118, %rd117, %rd5;
add.s64 %rd119, %rd1, %rd118;
st.global.u8 [%rd119], %r7019;
add.s32 %r7016, %r7016, 1;
setp.eq.s32 %p141, %r7019, 255;
selp.b32 %r7017, 7, 8, %p141;
mov.u32 %r7018, 0;
mov.u32 %r7019, %r7018;
$L__BB3_128:
setp.ne.s32 %p142, %r6436, 0;
mov.u32 %r6450, %r6435;
@%p142 bra $L__BB3_125;
$L__BB3_129:
setp.eq.s32 %p143, %r119, 0;
mov.u32 %r7050, %r6450;
@%p143 bra $L__BB3_136;
shr.u32 %r3541, %r232, 3;
sub.s32 %r6451, %r231, %r3541;
setp.eq.s32 %p144, %r6451, 0;
mov.u32 %r7050, %r6450;
@%p144 bra $L__BB3_136;
mov.u32 %r3542, -1;
shl.b32 %r3543, %r3542, %r6451;
not.b32 %r3544, %r3543;
and.b32 %r6452, %r6338, %r3544;
$L__BB3_132:
setp.gt.u32 %p145, %r7016, 17476;
mov.u32 %r7050, 1;
@%p145 bra $L__BB3_136;
sub.s32 %r3546, %r7017, %r7018;
min.u32 %r3547, %r3546, %r6451;
setp.eq.s32 %p146, %r3547, 32;
mov.u32 %r3548, -1;
shl.b32 %r3549, %r3548, %r3547;
not.b32 %r3550, %r3549;
selp.b32 %r3551, -1, %r3550, %p146;
and.b32 %r3552, %r3551, %r6452;
shl.b32 %r3553, %r3552, %r7018;
or.b32 %r7019, %r3553, %r7019;
add.s32 %r7018, %r3547, %r7018;
shr.u32 %r6452, %r6452, %r3547;
sub.s32 %r6451, %r6451, %r3547;
setp.lt.u32 %p147, %r7018, %r7017;
@%p147 bra $L__BB3_135;
cvt.u64.u32 %rd120, %r7016;
add.s64 %rd121, %rd120, %rd5;
add.s64 %rd122, %rd1, %rd121;
st.global.u8 [%rd122], %r7019;
add.s32 %r7016, %r7016, 1;
setp.eq.s32 %p148, %r7019, 255;
selp.b32 %r7017, 7, 8, %p148;
mov.u32 %r7018, 0;
mov.u32 %r7019, %r7018;
$L__BB3_135:
setp.ne.s32 %p149, %r6451, 0;
mov.u32 %r7050, %r6450;
@%p149 bra $L__BB3_132;
$L__BB3_136:
add.s32 %r3556, %r6299, 2;
setp.lt.u32 %p150, %r3556, %r3200;
mul.lo.s32 %r326, %r113, 6;
cvt.u64.u32 %rd123, %r326;
add.s64 %rd11, %rd9, %rd123;
add.s32 %r3557, %r326, 2;
cvt.u64.u32 %rd124, %r3557;
add.s64 %rd12, %rd9, %rd124;
@%p150 bra $L__BB3_165;
bra.uni $L__BB3_137;
$L__BB3_165:
cvt.u64.u32 %rd138, %r6316;
add.s64 %rd139, %rd138, %rd4;
shl.b64 %rd140, %rd139, 2;
add.s64 %rd141, %rd3, %rd140;
ld.global.u32 %r399, [%rd141];
setp.eq.s32 %p187, %r399, 0;
mov.u32 %r6511, 0;
mov.u32 %r6510, %r6511;
@%p187 bra $L__BB3_167;
and.b32 %r3628, %r399, -2147483648;
abs.s32 %r3629, %r399;
shl.b32 %r3630, %r3629, %r27;
or.b32 %r6510, %r3630, %r3628;
$L__BB3_167:
shl.b32 %r3634, %r6510, 1;
shr.u32 %r3635, %r3634, %r27;
and.b32 %r402, %r3635, -2;
setp.eq.s32 %p188, %r402, 0;
mov.u32 %r6512, %r6511;
mov.u32 %r6518, %r6511;
@%p188 bra $L__BB3_169;
add.s32 %r3637, %r402, -1;
clz.b32 %r3638, %r3637;
mov.u32 %r3639, 32;
sub.s32 %r6511, %r3639, %r3638;
shr.u32 %r3640, %r6510, 31;
add.s32 %r3641, %r3640, %r402;
add.s32 %r6512, %r3641, -2;
mov.u32 %r6518, 1;
$L__BB3_169:
mov.u32 %r6515, 0;
mov.u32 %r6514, %r6515;
@%p40 bra $L__BB3_172;
add.s32 %r3644, %r6316, %r3198;
cvt.u64.u32 %rd142, %r3644;
add.s64 %rd143, %rd142, %rd4;
shl.b64 %rd144, %rd143, 2;
add.s64 %rd145, %rd3, %rd144;
ld.global.u32 %r408, [%rd145];
setp.eq.s32 %p190, %r408, 0;
@%p190 bra $L__BB3_172;
and.b32 %r3645, %r408, -2147483648;
abs.s32 %r3646, %r408;
shl.b32 %r3647, %r3646, %r27;
or.b32 %r6514, %r3647, %r3645;
$L__BB3_172:
shl.b32 %r3650, %r6514, 1;
shr.u32 %r3651, %r3650, %r27;
and.b32 %r411, %r3651, -2;
setp.eq.s32 %p191, %r411, 0;
mov.u32 %r6516, %r6515;
mov.u32 %r6534, %r6511;
@%p191 bra $L__BB3_174;
or.b32 %r6518, %r6518, 2;
add.s32 %r3652, %r411, -1;
clz.b32 %r3653, %r3652;
mov.u32 %r3654, 32;
sub.s32 %r6515, %r3654, %r3653;
max.s32 %r6534, %r6511, %r6515;
shr.u32 %r3655, %r6514, 31;
add.s32 %r3656, %r3655, %r411;
add.s32 %r6516, %r3656, -2;
$L__BB3_174:
add.s32 %r6533, %r6316, 1;
add.s32 %r3661, %r6299, 3;
setp.ge.u32 %p192, %r3661, %r3200;
mov.u32 %r6536, 0;
mov.u32 %r6529, %r6536;
mov.u32 %r6530, %r6536;
mov.u32 %r6531, %r6536;
mov.u32 %r6532, %r6536;
@%p192 bra $L__BB3_185;
cvt.u64.u32 %rd146, %r6533;
add.s64 %rd147, %rd146, %rd4;
shl.b64 %rd148, %rd147, 2;
add.s64 %rd149, %rd3, %rd148;
ld.global.u32 %r421, [%rd149];
setp.eq.s32 %p193, %r421, 0;
mov.u32 %r6530, 0;
mov.u32 %r6519, %r6530;
@%p193 bra $L__BB3_177;
and.b32 %r3663, %r421, -2147483648;
abs.s32 %r3664, %r421;
shl.b32 %r3665, %r3664, %r27;
or.b32 %r6519, %r3665, %r3663;
$L__BB3_177:
shl.b32 %r3668, %r6519, 1;
shr.u32 %r3669, %r3668, %r27;
and.b32 %r424, %r3669, -2;
setp.eq.s32 %p194, %r424, 0;
mov.u32 %r6532, %r6530;
@%p194 bra $L__BB3_179;
or.b32 %r6518, %r6518, 4;
add.s32 %r3670, %r424, -1;
clz.b32 %r3671, %r3670;
mov.u32 %r3672, 32;
sub.s32 %r6530, %r3672, %r3671;
max.s32 %r6534, %r6534, %r6530;
shr.u32 %r3673, %r6519, 31;
add.s32 %r3674, %r3673, %r424;
add.s32 %r6532, %r3674, -2;
$L__BB3_179:
mov.u32 %r6529, 0;
mov.u32 %r6524, %r6529;
@%p40 bra $L__BB3_182;
add.s32 %r3677, %r6533, %r3198;
cvt.u64.u32 %rd150, %r3677;
add.s64 %rd151, %rd150, %rd4;
shl.b64 %rd152, %rd151, 2;
add.s64 %rd153, %rd3, %rd152;
ld.global.u32 %r433, [%rd153];
setp.eq.s32 %p196, %r433, 0;
@%p196 bra $L__BB3_182;
and.b32 %r3678, %r433, -2147483648;
abs.s32 %r3679, %r433;
shl.b32 %r3680, %r3679, %r27;
or.b32 %r6524, %r3680, %r3678;
$L__BB3_182:
shl.b32 %r3683, %r6524, 1;
shr.u32 %r3684, %r3683, %r27;
and.b32 %r436, %r3684, -2;
setp.eq.s32 %p197, %r436, 0;
mov.u32 %r6531, %r6529;
@%p197 bra $L__BB3_184;
or.b32 %r6518, %r6518, 8;
add.s32 %r3685, %r436, -1;
clz.b32 %r3686, %r3685;
mov.u32 %r3687, 32;
sub.s32 %r6529, %r3687, %r3686;
max.s32 %r6534, %r6534, %r6529;
shr.u32 %r3688, %r6524, 31;
add.s32 %r3689, %r3688, %r436;
add.s32 %r6531, %r3689, -2;
$L__BB3_184:
add.s32 %r6533, %r6316, 2;
$L__BB3_185:
mov.u32 %r6316, %r6533;
shr.u32 %r3691, %r6325, 1;
or.b32 %r453, %r3691, %r233;
add.s32 %r3692, %r6534, -1;
setp.lt.s32 %p198, %r6534, 2;
setp.gt.s32 %p199, %r6534, 1;
selp.b32 %r454, %r3692, 0, %p199;
@%p198 bra $L__BB3_187;
setp.eq.s32 %p200, %r6511, %r6534;
selp.u32 %r3693, 1, 0, %p200;
setp.eq.s32 %p201, %r6515, %r6534;
selp.u32 %r3694, -1, 0, %p201;
bfi.b32 %r3695, %r3694, %r3693, 1, 1;
setp.eq.s32 %p202, %r6530, %r6534;
selp.u16 %rs513, 1, 0, %p202;
mul.wide.u16 %r3696, %rs513, 4;
or.b32 %r3697, %r3695, %r3696;
setp.eq.s32 %p203, %r6529, %r6534;
selp.u16 %rs514, 1, 0, %p203;
mul.wide.u16 %r3698, %rs514, 8;
or.b32 %r6536, %r3697, %r3698;
$L__BB3_187:
and.b32 %r3699, %r6515, 255;
and.b32 %r3700, %r6336, 255;
setp.lt.u32 %p204, %r3699, %r3700;
cvt.u16.u32 %rs515, %r6515;
selp.b16 %rs516, %rs3, %rs515, %p204;
st.shared.u8 [%r116+1], %rs516;
st.shared.u8 [%r116+2], %r6529;
and.b32 %r457, %r6518, 2;
shr.u32 %r3701, %r457, 1;
or.b32 %r3702, %r120, %r3701;
st.shared.u8 [%r118+1], %r3702;
and.b32 %r458, %r6518, 8;
shr.u32 %r3703, %r458, 3;
st.shared.u8 [%r118+2], %r3703;
shl.b32 %r3704, %r6518, 4;
shl.b32 %r3705, %r453, 8;
or.b32 %r3706, %r3704, %r3705;
or.b32 %r3707, %r3706, %r6536;
mul.wide.u32 %rd155, %r3707, 2;
add.s64 %rd156, %rd8, %rd155;
ld.global.u16 %rs48, [%rd156];
shr.u16 %rs517, %rs48, 4;
and.b16 %rs49, %rs517, 7;
setp.eq.s16 %p205, %rs49, 0;
mov.u32 %r6548, %r6355;
@%p205 bra $L__BB3_194;
cvt.u32.u16 %r6537, %rs49;
shr.u16 %rs518, %rs48, 8;
cvt.u32.u16 %r6538, %rs518;
$L__BB3_189:
mov.u32 %r461, %r6537;
setp.gt.u32 %p206, %r6829, 2879;
mov.u32 %r6548, 1;
@%p206 bra $L__BB3_194;
mov.u32 %r3709, 8;
sub.s32 %r3710, %r3709, %r6831;
sub.s32 %r3711, %r3710, %r6830;
min.u32 %r3712, %r3711, %r461;
setp.eq.s32 %p207, %r3712, 32;
mov.u32 %r3713, -1;
shl.b32 %r3714, %r3713, %r3712;
not.b32 %r3715, %r3714;
selp.b32 %r3716, -1, %r3715, %p207;
and.b32 %r3717, %r3716, %r6538;
shl.b32 %r3718, %r3717, %r6830;
cvt.u16.u32 %rs519, %r3718;
or.b16 %rs1025, %rs1025, %rs519;
add.s32 %r6830, %r3712, %r6830;
sub.s32 %r6537, %r461, %r3712;
shr.u32 %r6538, %r6538, %r3712;
setp.gt.u32 %p208, %r3711, %r461;
@%p208 bra $L__BB3_193;
setp.ne.s32 %p209, %r6831, 0;
mov.u32 %r6831, 0;
and.b16 %rs520, %rs1025, 255;
setp.ne.s16 %p210, %rs520, 127;
and.pred %p211, %p209, %p210;
@%p211 bra $L__BB3_193;
mov.u32 %r3721, 20548;
sub.s32 %r3722, %r3721, %r6829;
cvt.u64.u32 %rd157, %r3722;
add.s64 %rd158, %rd157, %rd5;
add.s64 %rd159, %rd1, %rd158;
st.global.u8 [%rd159], %rs1025;
add.s32 %r6829, %r6829, 1;
setp.gt.u16 %p212, %rs520, 143;
selp.u32 %r6831, 1, 0, %p212;
mov.u16 %rs1025, 0;
mov.u32 %r6830, 0;
$L__BB3_193:
setp.ne.s32 %p213, %r6537, 0;
mov.u32 %r6548, %r6355;
@%p213 bra $L__BB3_189;
$L__BB3_194:
setp.ne.s32 %p214, %r453, 0;
@%p214 bra $L__BB3_242;
setp.eq.s32 %p215, %r6518, 0;
add.s32 %r3723, %r6381, 17477;
cvt.u64.u32 %rd160, %r3723;
add.s64 %rd161, %rd160, %rd5;
add.s64 %rd13, %rd1, %rd161;
@%p215 bra $L__BB3_234;
shl.b16 %rs956, %rs956, 1;
add.s32 %r6390, %r6390, -1;
setp.ne.s32 %p216, %r6390, 0;
mov.u32 %r6584, %r6598;
@%p216 bra $L__BB3_199;
bra.uni $L__BB3_197;
$L__BB3_199:
setp.lt.u32 %p218, %r6596, 3;
mov.u32 %r6552, 0;
@%p218 bra $L__BB3_202;
setp.lt.u32 %p219, %r6596, 6;
mov.u32 %r6552, 1;
@%p219 bra $L__BB3_202;
setp.lt.u32 %p220, %r6596, 9;
setp.eq.s32 %p221, %r6596, 11;
selp.b32 %r3729, 4, 5, %p221;
setp.lt.u32 %p222, %r6596, 11;
selp.b32 %r3730, 3, %r3729, %p222;
selp.b32 %r6552, 2, %r3730, %p220;
$L__BB3_202:
setp.eq.s32 %p223, %r6552, 0;
@%p223 bra $L__BB3_230;
add.s32 %r485, %r6552, -1;
and.b32 %r486, %r6552, 3;
setp.eq.s32 %p224, %r486, 0;
mov.u32 %r6562, %r6552;
mov.u32 %r6563, %r6584;
@%p224 bra $L__BB3_215;
mov.u32 %r3732, 1;
shl.b32 %r3733, %r3732, %r485;
and.b32 %r3734, %r3733, %r6595;
setp.ne.s32 %p225, %r3734, 0;
selp.u32 %r3735, 1, 0, %p225;
cvt.u32.u16 %r3736, %rs956;
bfi.b32 %r3737, %r3736, %r3735, 1, 8;
cvt.u16.u32 %rs956, %r3737;
add.s32 %r6390, %r6390, -1;
setp.ne.s32 %p226, %r6390, 0;
mov.u32 %r6563, %r6584;
@%p226 bra $L__BB3_207;
setp.gt.u32 %p227, %r6381, 191;
mov.u32 %r6390, 0;
mov.u32 %r6563, %r3732;
@%p227 bra $L__BB3_207;
add.s32 %r3741, %r6381, 17477;
cvt.u64.u32 %rd162, %r3741;
add.s64 %rd163, %rd162, %rd5;
add.s64 %rd164, %rd1, %rd163;
st.global.u8 [%rd164], %rs956;
add.s32 %r6381, %r6381, 1;
mov.u16 %rs956, 0;
mov.u32 %r6390, 8;
mov.u32 %r6563, %r6584;
$L__BB3_207:
setp.eq.s32 %p228, %r486, 1;
mov.u32 %r6584, %r6563;
mov.u32 %r6562, %r485;
@%p228 bra $L__BB3_215;
add.s32 %r6562, %r6552, -2;
mov.u32 %r3742, 1;
shl.b32 %r3743, %r3742, %r6562;
and.b32 %r3744, %r3743, %r6595;
setp.ne.s32 %p229, %r3744, 0;
selp.u32 %r3745, 1, 0, %p229;
cvt.u32.u16 %r3746, %rs956;
bfi.b32 %r3747, %r3746, %r3745, 1, 8;
cvt.u16.u32 %rs956, %r3747;
add.s32 %r6390, %r6390, -1;
setp.ne.s32 %p230, %r6390, 0;
mov.u32 %r6558, %r6563;
@%p230 bra $L__BB3_211;
setp.gt.u32 %p231, %r6381, 191;
mov.u32 %r6390, 0;
mov.u32 %r6558, %r3742;
@%p231 bra $L__BB3_211;
add.s32 %r3750, %r6381, 17477;
cvt.u64.u32 %rd165, %r3750;
add.s64 %rd166, %rd165, %rd5;
add.s64 %rd167, %rd1, %rd166;
and.b16 %rs527, %rs956, 255;
st.global.u8 [%rd167], %rs956;
add.s32 %r6381, %r6381, 1;
setp.eq.s16 %p232, %rs527, 255;
selp.b32 %r6390, 7, 8, %p232;
mov.u16 %rs956, 0;
mov.u32 %r6558, %r6563;
$L__BB3_211:
setp.eq.s32 %p233, %r486, 2;
mov.u32 %r6584, %r6558;
mov.u32 %r6563, %r6558;
@%p233 bra $L__BB3_215;
add.s32 %r6562, %r6552, -3;
mov.u32 %r3751, 1;
shl.b32 %r3752, %r3751, %r6562;
and.b32 %r3753, %r3752, %r6595;
setp.ne.s32 %p234, %r3753, 0;
selp.u32 %r3754, 1, 0, %p234;
cvt.u32.u16 %r3755, %rs956;
bfi.b32 %r3756, %r3755, %r3754, 1, 8;
cvt.u16.u32 %rs956, %r3756;
add.s32 %r6390, %r6390, -1;
setp.ne.s32 %p235, %r6390, 0;
mov.u32 %r6584, %r6558;
mov.u32 %r6563, %r6558;
@%p235 bra $L__BB3_215;
setp.gt.u32 %p236, %r6381, 191;
mov.u32 %r6390, 0;
mov.u32 %r6584, %r3751;
mov.u32 %r6563, %r3751;
@%p236 bra $L__BB3_215;
add.s32 %r3761, %r6381, 17477;
cvt.u64.u32 %rd168, %r3761;
add.s64 %rd169, %rd168, %rd5;
add.s64 %rd170, %rd1, %rd169;
and.b16 %rs530, %rs956, 255;
st.global.u8 [%rd170], %rs956;
add.s32 %r6381, %r6381, 1;
setp.eq.s16 %p237, %rs530, 255;
selp.b32 %r6390, 7, 8, %p237;
mov.u16 %rs956, 0;
mov.u32 %r6584, %r6558;
mov.u32 %r6563, %r6558;
$L__BB3_215:
setp.lt.u32 %p238, %r485, 3;
@%p238 bra $L__BB3_230;
mov.u32 %r6584, %r6563;
$L__BB3_217:
add.s32 %r3762, %r6562, -1;
mov.u32 %r3763, 1;
shl.b32 %r3764, %r3763, %r3762;
and.b32 %r3765, %r3764, %r6595;
setp.ne.s32 %p239, %r3765, 0;
selp.u32 %r3766, 1, 0, %p239;
cvt.u32.u16 %r3767, %rs956;
bfi.b32 %r6572, %r3767, %r3766, 1, 8;
add.s32 %r6571, %r6390, -1;
setp.ne.s32 %p240, %r6571, 0;
mov.u32 %r6573, %r6584;
@%p240 bra $L__BB3_220;
setp.gt.u32 %p241, %r6381, 191;
mov.u32 %r6571, 0;
mov.u32 %r6573, %r3763;
@%p241 bra $L__BB3_220;
cvt.u16.u32 %rs531, %r6572;
and.b16 %rs532, %rs531, 255;
add.s32 %r3771, %r6381, 17477;
cvt.u64.u32 %rd171, %r3771;
add.s64 %rd172, %rd171, %rd5;
add.s64 %rd173, %rd1, %rd172;
st.global.u8 [%rd173], %rs531;
add.s32 %r6381, %r6381, 1;
setp.eq.s16 %p242, %rs532, 255;
selp.b32 %r6571, 7, 8, %p242;
mov.u32 %r6572, 0;
mov.u32 %r6573, %r6584;
$L__BB3_220:
add.s32 %r3772, %r6562, -2;
shl.b32 %r3774, %r3763, %r3772;
and.b32 %r3775, %r3774, %r6595;
setp.ne.s32 %p243, %r3775, 0;
and.b32 %r3776, %r6572, 127;
selp.u32 %r3777, 1, 0, %p243;
bfi.b32 %r6576, %r3776, %r3777, 1, 7;
add.s32 %r6575, %r6571, -1;
setp.ne.s32 %p244, %r6575, 0;
mov.u32 %r6577, %r6573;
@%p244 bra $L__BB3_223;
setp.gt.u32 %p245, %r6381, 191;
mov.u32 %r6577, 1;
mov.u32 %r6575, 0;
@%p245 bra $L__BB3_223;
cvt.u16.u32 %rs533, %r6576;
and.b16 %rs534, %rs533, 255;
add.s32 %r3781, %r6381, 17477;
cvt.u64.u32 %rd174, %r3781;
add.s64 %rd175, %rd174, %rd5;
add.s64 %rd176, %rd1, %rd175;
st.global.u8 [%rd176], %rs533;
add.s32 %r6381, %r6381, 1;
setp.eq.s16 %p246, %rs534, 255;
selp.b32 %r6575, 7, 8, %p246;
mov.u32 %r6576, 0;
mov.u32 %r6577, %r6573;
$L__BB3_223:
add.s32 %r3782, %r6562, -3;
mov.u32 %r3783, 1;
shl.b32 %r3784, %r3783, %r3782;
and.b32 %r3785, %r3784, %r6595;
setp.ne.s32 %p247, %r3785, 0;
and.b32 %r3786, %r6576, 127;
selp.u32 %r3787, 1, 0, %p247;
bfi.b32 %r6580, %r3786, %r3787, 1, 7;
add.s32 %r6579, %r6575, -1;
setp.ne.s32 %p248, %r6579, 0;
mov.u32 %r6581, %r6577;
@%p248 bra $L__BB3_226;
setp.gt.u32 %p249, %r6381, 191;
mov.u32 %r6579, 0;
mov.u32 %r6581, %r3783;
@%p249 bra $L__BB3_226;
cvt.u16.u32 %rs535, %r6580;
and.b16 %rs536, %rs535, 255;
add.s32 %r3791, %r6381, 17477;
cvt.u64.u32 %rd177, %r3791;
add.s64 %rd178, %rd177, %rd5;
add.s64 %rd179, %rd1, %rd178;
st.global.u8 [%rd179], %rs535;
add.s32 %r6381, %r6381, 1;
setp.eq.s16 %p250, %rs536, 255;
selp.b32 %r6579, 7, 8, %p250;
mov.u32 %r6580, 0;
mov.u32 %r6581, %r6577;
$L__BB3_226:
add.s32 %r6562, %r6562, -4;
shl.b32 %r3793, %r3783, %r6562;
and.b32 %r3794, %r3793, %r6595;
setp.ne.s32 %p251, %r3794, 0;
and.b32 %r3795, %r6580, 127;
selp.u32 %r3796, 1, 0, %p251;
bfi.b32 %r3797, %r3795, %r3796, 1, 15;
cvt.u16.u32 %rs956, %r3797;
add.s32 %r6390, %r6579, -1;
setp.ne.s32 %p252, %r6390, 0;
mov.u32 %r6584, %r6581;
@%p252 bra $L__BB3_229;
setp.gt.u32 %p253, %r6381, 191;
mov.u32 %r6584, 1;
mov.u32 %r6390, 0;
@%p253 bra $L__BB3_229;
add.s32 %r3800, %r6381, 17477;
cvt.u64.u32 %rd180, %r3800;
add.s64 %rd181, %rd180, %rd5;
add.s64 %rd182, %rd1, %rd181;
and.b16 %rs538, %rs956, 255;
st.global.u8 [%rd182], %rs956;
add.s32 %r6381, %r6381, 1;
setp.eq.s16 %p254, %rs538, 255;
selp.b32 %r6390, 7, 8, %p254;
mov.u16 %rs956, 0;
mov.u32 %r6584, %r6581;
$L__BB3_229:
setp.ne.s32 %p255, %r6562, 0;
@%p255 bra $L__BB3_217;
$L__BB3_230:
add.s32 %r3802, %r6596, -1;
setp.eq.s32 %p256, %r6596, 0;
mov.u32 %r6595, 0;
selp.b32 %r6596, 0, %r3802, %p256;
setp.lt.u32 %p257, %r6596, 3;
mov.u32 %r6588, %r6595;
@%p257 bra $L__BB3_233;
setp.lt.u32 %p258, %r6596, 6;
mov.u32 %r6588, 1;
@%p258 bra $L__BB3_233;
setp.lt.u32 %p259, %r6596, 9;
setp.eq.s32 %p260, %r6596, 11;
selp.b32 %r3804, 4, 5, %p260;
setp.lt.u32 %p261, %r6596, 11;
selp.b32 %r3805, 3, %r3804, %p261;
selp.b32 %r6588, 2, %r3805, %p259;
$L__BB3_233:
mov.u32 %r3807, 1;
shl.b32 %r6597, %r3807, %r6588;
mov.u32 %r6598, %r6584;
bra.uni $L__BB3_242;
$L__BB3_137:
ld.global.u8 %rs26, [%rd11+1];
ld.global.u8 %rs27, [%rd12];
ld.global.u8 %rs28, [%rd12+1];
ld.global.u8 %rs29, [%rd9];
ld.global.u8 %rs30, [%rd9+1];
ld.global.u8 %rs31, [%rd9+2];
ld.global.u8 %rs32, [%rd9+3];
setp.eq.s16 %p151, %rs26, 0;
mov.u32 %r6477, %r6355;
@%p151 bra $L__BB3_144;
ld.global.u8 %r6467, [%rd11];
cvt.u32.u16 %r6466, %rs26;
$L__BB3_139:
mov.u32 %r329, %r6466;
setp.gt.u32 %p152, %r6829, 2879;
mov.u32 %r6477, 1;
@%p152 bra $L__BB3_144;
mov.u32 %r3559, 8;
sub.s32 %r3560, %r3559, %r6831;
sub.s32 %r3561, %r3560, %r6830;
min.u32 %r3562, %r3561, %r329;
setp.eq.s32 %p153, %r3562, 32;
mov.u32 %r3563, -1;
shl.b32 %r3564, %r3563, %r3562;
not.b32 %r3565, %r3564;
selp.b32 %r3566, -1, %r3565, %p153;
and.b32 %r3567, %r3566, %r6467;
shl.b32 %r3568, %r3567, %r6830;
cvt.u16.u32 %rs494, %r3568;
or.b16 %rs1025, %rs1025, %rs494;
add.s32 %r6830, %r3562, %r6830;
sub.s32 %r6466, %r329, %r3562;
shr.u32 %r6467, %r6467, %r3562;
setp.gt.u32 %p154, %r3561, %r329;
@%p154 bra $L__BB3_143;
setp.ne.s32 %p155, %r6831, 0;
mov.u32 %r6831, 0;
and.b16 %rs495, %rs1025, 255;
setp.ne.s16 %p156, %rs495, 127;
and.pred %p157, %p155, %p156;
@%p157 bra $L__BB3_143;
mov.u32 %r3571, 20548;
sub.s32 %r3572, %r3571, %r6829;
cvt.u64.u32 %rd126, %r3572;
add.s64 %rd127, %rd126, %rd5;
add.s64 %rd128, %rd1, %rd127;
st.global.u8 [%rd128], %rs1025;
add.s32 %r6829, %r6829, 1;
setp.gt.u16 %p158, %rs495, 143;
selp.u32 %r6831, 1, 0, %p158;
mov.u16 %rs1025, 0;
mov.u32 %r6830, 0;
$L__BB3_143:
setp.ne.s32 %p159, %r6466, 0;
mov.u32 %r6477, %r6355;
@%p159 bra $L__BB3_139;
$L__BB3_144:
setp.eq.s16 %p160, %rs30, 0;
mov.u32 %r6489, %r6477;
@%p160 bra $L__BB3_151;
cvt.u32.u16 %r3573, %rs29;
and.b32 %r6479, %r3573, 255;
cvt.u32.u16 %r3574, %rs30;
and.b32 %r6478, %r3574, 255;
$L__BB3_146:
mov.u32 %r348, %r6478;
setp.gt.u32 %p161, %r6829, 2879;
mov.u32 %r6489, 1;
@%p161 bra $L__BB3_151;
mov.u32 %r3576, 8;
sub.s32 %r3577, %r3576, %r6831;
sub.s32 %r3578, %r3577, %r6830;
min.u32 %r3579, %r3578, %r348;
setp.eq.s32 %p162, %r3579, 32;
mov.u32 %r3580, -1;
shl.b32 %r3581, %r3580, %r3579;
not.b32 %r3582, %r3581;
selp.b32 %r3583, -1, %r3582, %p162;
and.b32 %r3584, %r3583, %r6479;
shl.b32 %r3585, %r3584, %r6830;
cvt.u16.u32 %rs499, %r3585;
or.b16 %rs1025, %rs1025, %rs499;
add.s32 %r6830, %r3579, %r6830;
sub.s32 %r6478, %r348, %r3579;
shr.u32 %r6479, %r6479, %r3579;
setp.gt.u32 %p163, %r3578, %r348;
@%p163 bra $L__BB3_150;
setp.ne.s32 %p164, %r6831, 0;
mov.u32 %r6831, 0;
and.b16 %rs500, %rs1025, 255;
setp.ne.s16 %p165, %rs500, 127;
and.pred %p166, %p164, %p165;
@%p166 bra $L__BB3_150;
mov.u32 %r3588, 20548;
sub.s32 %r3589, %r3588, %r6829;
cvt.u64.u32 %rd129, %r3589;
add.s64 %rd130, %rd129, %rd5;
add.s64 %rd131, %rd1, %rd130;
st.global.u8 [%rd131], %rs1025;
add.s32 %r6829, %r6829, 1;
setp.gt.u16 %p167, %rs500, 143;
selp.u32 %r6831, 1, 0, %p167;
mov.u16 %rs1025, 0;
mov.u32 %r6830, 0;
$L__BB3_150:
setp.ne.s32 %p168, %r6478, 0;
mov.u32 %r6489, %r6477;
@%p168 bra $L__BB3_146;
$L__BB3_151:
setp.eq.s16 %p169, %rs28, 0;
mov.u32 %r6501, %r6489;
@%p169 bra $L__BB3_158;
cvt.u32.u16 %r3590, %rs28;
and.b32 %r6490, %r3590, 255;
cvt.u32.u16 %r3591, %rs27;
and.b32 %r6491, %r3591, 255;
$L__BB3_153:
mov.u32 %r367, %r6490;
setp.gt.u32 %p170, %r6829, 2879;
mov.u32 %r6501, 1;
@%p170 bra $L__BB3_158;
mov.u32 %r3593, 8;
sub.s32 %r3594, %r3593, %r6831;
sub.s32 %r3595, %r3594, %r6830;
min.u32 %r3596, %r3595, %r367;
setp.eq.s32 %p171, %r3596, 32;
mov.u32 %r3597, -1;
shl.b32 %r3598, %r3597, %r3596;
not.b32 %r3599, %r3598;
selp.b32 %r3600, -1, %r3599, %p171;
and.b32 %r3601, %r3600, %r6491;
shl.b32 %r3602, %r3601, %r6830;
cvt.u16.u32 %rs504, %r3602;
or.b16 %rs1025, %rs1025, %rs504;
add.s32 %r6830, %r3596, %r6830;
sub.s32 %r6490, %r367, %r3596;
shr.u32 %r6491, %r6491, %r3596;
setp.gt.u32 %p172, %r3595, %r367;
@%p172 bra $L__BB3_157;
setp.ne.s32 %p173, %r6831, 0;
mov.u32 %r6831, 0;
and.b16 %rs505, %rs1025, 255;
setp.ne.s16 %p174, %rs505, 127;
and.pred %p175, %p173, %p174;
@%p175 bra $L__BB3_157;
mov.u32 %r3605, 20548;
sub.s32 %r3606, %r3605, %r6829;
cvt.u64.u32 %rd132, %r3606;
add.s64 %rd133, %rd132, %rd5;
add.s64 %rd134, %rd1, %rd133;
st.global.u8 [%rd134], %rs1025;
add.s32 %r6829, %r6829, 1;
setp.gt.u16 %p176, %rs505, 143;
selp.u32 %r6831, 1, 0, %p176;
mov.u16 %rs1025, 0;
mov.u32 %r6830, 0;
$L__BB3_157:
setp.ne.s32 %p177, %r6490, 0;
mov.u32 %r6501, %r6489;
@%p177 bra $L__BB3_153;
$L__BB3_158:
setp.eq.s16 %p178, %rs32, 0;
mov.u32 %r6315, 0;
mov.u32 %r6832, %r6501;
@%p178 bra $L__BB3_396;
cvt.u32.u16 %r3608, %rs31;
and.b32 %r6503, %r3608, 255;
cvt.u32.u16 %r3609, %rs32;
and.b32 %r6502, %r3609, 255;
$L__BB3_160:
mov.u32 %r386, %r6502;
setp.gt.u32 %p179, %r6829, 2879;
mov.u32 %r6832, 1;
@%p179 bra $L__BB3_396;
mov.u32 %r3612, 8;
sub.s32 %r3613, %r3612, %r6831;
sub.s32 %r3614, %r3613, %r6830;
min.u32 %r3615, %r3614, %r386;
setp.eq.s32 %p180, %r3615, 32;
mov.u32 %r3616, -1;
shl.b32 %r3617, %r3616, %r3615;
not.b32 %r3618, %r3617;
selp.b32 %r3619, -1, %r3618, %p180;
and.b32 %r3620, %r3619, %r6503;
shl.b32 %r3621, %r3620, %r6830;
cvt.u16.u32 %rs509, %r3621;
or.b16 %rs1025, %rs1025, %rs509;
add.s32 %r6830, %r3615, %r6830;
sub.s32 %r6502, %r386, %r3615;
shr.u32 %r6503, %r6503, %r3615;
setp.gt.u32 %p181, %r3614, %r386;
@%p181 bra $L__BB3_164;
setp.ne.s32 %p182, %r6831, 0;
mov.u32 %r6831, 0;
and.b16 %rs510, %rs1025, 255;
setp.ne.s16 %p183, %rs510, 127;
and.pred %p184, %p182, %p183;
@%p184 bra $L__BB3_164;
mov.u32 %r3624, 20548;
sub.s32 %r3625, %r3624, %r6829;
cvt.u64.u32 %rd135, %r3625;
add.s64 %rd136, %rd135, %rd5;
add.s64 %rd137, %rd1, %rd136;
st.global.u8 [%rd137], %rs1025;
add.s32 %r6829, %r6829, 1;
setp.gt.u16 %p185, %rs510, 143;
selp.u32 %r6831, 1, 0, %p185;
mov.u16 %rs1025, 0;
mov.u32 %r6830, 0;
$L__BB3_164:
setp.eq.s32 %p186, %r6502, 0;
mov.u32 %r6832, %r6501;
@%p186 bra $L__BB3_396;
bra.uni $L__BB3_160;
$L__BB3_63:
setp.gt.u32 %p68, %r6381, 191;
mov.u32 %r6391, 1;
mov.u32 %r6390, 0;
@%p68 bra $L__BB3_65;
st.global.u8 [%rd10], %rs956;
add.s32 %r6381, %r6381, 1;
mov.u16 %rs956, 0;
mov.u32 %r6390, 8;
mov.u32 %r6391, %r6598;
bra.uni $L__BB3_65;
$L__BB3_234:
add.s32 %r6595, %r6595, 1;
setp.lt.u32 %p262, %r6595, %r6597;
@%p262 bra $L__BB3_242;
shl.b16 %rs539, %rs956, 1;
or.b16 %rs956, %rs539, 1;
add.s32 %r6390, %r6390, -1;
setp.ne.s32 %p263, %r6390, 0;
mov.u32 %r6591, %r6598;
@%p263 bra $L__BB3_238;
bra.uni $L__BB3_236;
$L__BB3_238:
add.s32 %r3811, %r6596, 1;
min.u32 %r6596, %r3811, 12;
setp.lt.u32 %p266, %r6596, 3;
mov.u32 %r6595, 0;
mov.u32 %r6592, %r6595;
@%p266 bra $L__BB3_241;
setp.lt.u32 %p267, %r6596, 6;
mov.u32 %r6592, 1;
@%p267 bra $L__BB3_241;
setp.lt.u32 %p268, %r6596, 9;
setp.eq.s32 %p269, %r6596, 11;
selp.b32 %r3813, 4, 5, %p269;
setp.lt.u32 %p270, %r6596, 11;
selp.b32 %r3814, 3, %r3813, %p270;
selp.b32 %r6592, 2, %r3814, %p268;
$L__BB3_241:
mov.u32 %r3816, 1;
shl.b32 %r6597, %r3816, %r6592;
mov.u32 %r6598, %r6591;
$L__BB3_242:
max.s32 %r569, %r6534, 1;
and.b16 %rs542, %rs48, 15;
cvt.u32.u16 %r570, %rs542;
and.b32 %r571, %r6518, 1;
setp.eq.s32 %p271, %r571, 0;
mov.u32 %r6613, %r7050;
@%p271 bra $L__BB3_249;
and.b32 %r3817, %r570, 1;
sub.s32 %r6599, %r569, %r3817;
setp.eq.s32 %p272, %r6599, 0;
mov.u32 %r6613, %r7050;
@%p272 bra $L__BB3_249;
mov.u32 %r3818, -1;
shl.b32 %r3819, %r3818, %r6599;
not.b32 %r3820, %r3819;
and.b32 %r6600, %r6512, %r3820;
$L__BB3_245:
setp.gt.u32 %p273, %r7016, 17476;
mov.u32 %r6613, 1;
@%p273 bra $L__BB3_249;
sub.s32 %r3822, %r7017, %r7018;
min.u32 %r3823, %r3822, %r6599;
setp.eq.s32 %p274, %r3823, 32;
mov.u32 %r3824, -1;
shl.b32 %r3825, %r3824, %r3823;
not.b32 %r3826, %r3825;
selp.b32 %r3827, -1, %r3826, %p274;
and.b32 %r3828, %r3827, %r6600;
shl.b32 %r3829, %r3828, %r7018;
or.b32 %r7019, %r3829, %r7019;
add.s32 %r7018, %r3823, %r7018;
shr.u32 %r6600, %r6600, %r3823;
sub.s32 %r6599, %r6599, %r3823;
setp.lt.u32 %p275, %r7018, %r7017;
@%p275 bra $L__BB3_248;
cvt.u64.u32 %rd183, %r7016;
add.s64 %rd184, %rd183, %rd5;
add.s64 %rd185, %rd1, %rd184;
st.global.u8 [%rd185], %r7019;
add.s32 %r7016, %r7016, 1;
setp.eq.s32 %p276, %r7019, 255;
selp.b32 %r7017, 7, 8, %p276;
mov.u32 %r7018, 0;
mov.u32 %r7019, %r7018;
$L__BB3_248:
setp.ne.s32 %p277, %r6599, 0;
mov.u32 %r6613, %r7050;
@%p277 bra $L__BB3_245;
$L__BB3_249:
setp.eq.s32 %p278, %r457, 0;
mov.u32 %r6628, %r6613;
@%p278 bra $L__BB3_256;
shr.u32 %r3832, %r570, 1;
and.b32 %r3833, %r3832, 1;
sub.s32 %r6614, %r569, %r3833;
setp.eq.s32 %p279, %r6614, 0;
mov.u32 %r6628, %r6613;
@%p279 bra $L__BB3_256;
mov.u32 %r3834, -1;
shl.b32 %r3835, %r3834, %r6614;
not.b32 %r3836, %r3835;
and.b32 %r6615, %r6516, %r3836;
$L__BB3_252:
setp.gt.u32 %p280, %r7016, 17476;
mov.u32 %r6628, 1;
@%p280 bra $L__BB3_256;
sub.s32 %r3838, %r7017, %r7018;
min.u32 %r3839, %r3838, %r6614;
setp.eq.s32 %p281, %r3839, 32;
mov.u32 %r3840, -1;
shl.b32 %r3841, %r3840, %r3839;
not.b32 %r3842, %r3841;
selp.b32 %r3843, -1, %r3842, %p281;
and.b32 %r3844, %r3843, %r6615;
shl.b32 %r3845, %r3844, %r7018;
or.b32 %r7019, %r3845, %r7019;
add.s32 %r7018, %r3839, %r7018;
shr.u32 %r6615, %r6615, %r3839;
sub.s32 %r6614, %r6614, %r3839;
setp.lt.u32 %p282, %r7018, %r7017;
@%p282 bra $L__BB3_255;
cvt.u64.u32 %rd186, %r7016;
add.s64 %rd187, %rd186, %rd5;
add.s64 %rd188, %rd1, %rd187;
st.global.u8 [%rd188], %r7019;
add.s32 %r7016, %r7016, 1;
setp.eq.s32 %p283, %r7019, 255;
selp.b32 %r7017, 7, 8, %p283;
mov.u32 %r7018, 0;
mov.u32 %r7019, %r7018;
$L__BB3_255:
setp.ne.s32 %p284, %r6614, 0;
mov.u32 %r6628, %r6613;
@%p284 bra $L__BB3_252;
$L__BB3_256:
and.b32 %r3848, %r6518, 4;
setp.eq.s32 %p285, %r3848, 0;
mov.u32 %r6643, %r6628;
@%p285 bra $L__BB3_263;
shr.u32 %r3849, %r570, 2;
and.b32 %r3850, %r3849, 1;
sub.s32 %r6629, %r569, %r3850;
setp.eq.s32 %p286, %r6629, 0;
mov.u32 %r6643, %r6628;
@%p286 bra $L__BB3_263;
mov.u32 %r3851, -1;
shl.b32 %r3852, %r3851, %r6629;
not.b32 %r3853, %r3852;
and.b32 %r6630, %r6532, %r3853;
$L__BB3_259:
setp.gt.u32 %p287, %r7016, 17476;
mov.u32 %r6643, 1;
@%p287 bra $L__BB3_263;
sub.s32 %r3855, %r7017, %r7018;
min.u32 %r3856, %r3855, %r6629;
setp.eq.s32 %p288, %r3856, 32;
mov.u32 %r3857, -1;
shl.b32 %r3858, %r3857, %r3856;
not.b32 %r3859, %r3858;
selp.b32 %r3860, -1, %r3859, %p288;
and.b32 %r3861, %r3860, %r6630;
shl.b32 %r3862, %r3861, %r7018;
or.b32 %r7019, %r3862, %r7019;
add.s32 %r7018, %r3856, %r7018;
shr.u32 %r6630, %r6630, %r3856;
sub.s32 %r6629, %r6629, %r3856;
setp.lt.u32 %p289, %r7018, %r7017;
@%p289 bra $L__BB3_262;
cvt.u64.u32 %rd189, %r7016;
add.s64 %rd190, %rd189, %rd5;
add.s64 %rd191, %rd1, %rd190;
st.global.u8 [%rd191], %r7019;
add.s32 %r7016, %r7016, 1;
setp.eq.s32 %p290, %r7019, 255;
selp.b32 %r7017, 7, 8, %p290;
mov.u32 %r7018, 0;
mov.u32 %r7019, %r7018;
$L__BB3_262:
setp.ne.s32 %p291, %r6629, 0;
mov.u32 %r6643, %r6628;
@%p291 bra $L__BB3_259;
$L__BB3_263:
setp.eq.s32 %p292, %r458, 0;
mov.u32 %r7050, %r6643;
@%p292 bra $L__BB3_270;
shr.u32 %r3865, %r570, 3;
sub.s32 %r6644, %r569, %r3865;
setp.eq.s32 %p293, %r6644, 0;
mov.u32 %r7050, %r6643;
@%p293 bra $L__BB3_270;
mov.u32 %r3866, -1;
shl.b32 %r3867, %r3866, %r6644;
not.b32 %r3868, %r3867;
and.b32 %r6645, %r6531, %r3868;
$L__BB3_266:
setp.gt.u32 %p294, %r7016, 17476;
mov.u32 %r7050, 1;
@%p294 bra $L__BB3_270;
sub.s32 %r3870, %r7017, %r7018;
min.u32 %r3871, %r3870, %r6644;
setp.eq.s32 %p295, %r3871, 32;
mov.u32 %r3872, -1;
shl.b32 %r3873, %r3872, %r3871;
not.b32 %r3874, %r3873;
selp.b32 %r3875, -1, %r3874, %p295;
and.b32 %r3876, %r3875, %r6645;
shl.b32 %r3877, %r3876, %r7018;
or.b32 %r7019, %r3877, %r7019;
add.s32 %r7018, %r3871, %r7018;
shr.u32 %r6645, %r6645, %r3871;
sub.s32 %r6644, %r6644, %r3871;
setp.lt.u32 %p296, %r7018, %r7017;
@%p296 bra $L__BB3_269;
cvt.u64.u32 %rd192, %r7016;
add.s64 %rd193, %rd192, %rd5;
add.s64 %rd194, %rd1, %rd193;
st.global.u8 [%rd194], %r7019;
add.s32 %r7016, %r7016, 1;
setp.eq.s32 %p297, %r7019, 255;
selp.b32 %r7017, 7, 8, %p297;
mov.u32 %r7018, 0;
mov.u32 %r7019, %r7018;
$L__BB3_269:
setp.ne.s32 %p298, %r6644, 0;
mov.u32 %r7050, %r6643;
@%p298 bra $L__BB3_266;
$L__BB3_270:
setp.lt.s32 %p299, %r454, 1;
setp.lt.s32 %p300, %r113, 1;
or.pred %p301, %p300, %p299;
@%p301 bra $L__BB3_318;
min.s32 %r3880, %r113, %r454;
setp.lt.s32 %p302, %r3880, 3;
add.s32 %r3881, %r6381, 17477;
cvt.u64.u32 %rd195, %r3881;
add.s64 %rd196, %rd195, %rd5;
add.s64 %rd14, %rd1, %rd196;
@%p302 bra $L__BB3_310;
bra.uni $L__BB3_272;
$L__BB3_310:
add.s32 %r6595, %r6595, 1;
setp.lt.u32 %p349, %r6595, %r6597;
@%p349 bra $L__BB3_318;
shl.b16 %rs559, %rs956, 1;
or.b16 %rs956, %rs559, 1;
add.s32 %r6390, %r6390, -1;
setp.ne.s32 %p350, %r6390, 0;
mov.u32 %r6701, %r6598;
@%p350 bra $L__BB3_314;
bra.uni $L__BB3_312;
$L__BB3_314:
add.s32 %r3969, %r6596, 1;
min.u32 %r6596, %r3969, 12;
setp.lt.u32 %p353, %r6596, 3;
mov.u32 %r6595, 0;
mov.u32 %r6702, %r6595;
@%p353 bra $L__BB3_317;
setp.lt.u32 %p354, %r6596, 6;
mov.u32 %r6702, 1;
@%p354 bra $L__BB3_317;
setp.lt.u32 %p355, %r6596, 9;
setp.eq.s32 %p356, %r6596, 11;
selp.b32 %r3971, 4, 5, %p356;
setp.lt.u32 %p357, %r6596, 11;
selp.b32 %r3972, 3, %r3971, %p357;
selp.b32 %r6702, 2, %r3972, %p355;
$L__BB3_317:
mov.u32 %r3974, 1;
shl.b32 %r6597, %r3974, %r6702;
mov.u32 %r6598, %r6701;
bra.uni $L__BB3_318;
$L__BB3_272:
shl.b16 %rs956, %rs956, 1;
add.s32 %r6390, %r6390, -1;
setp.ne.s32 %p303, %r6390, 0;
mov.u32 %r6694, %r6598;
@%p303 bra $L__BB3_275;
bra.uni $L__BB3_273;
$L__BB3_275:
setp.lt.u32 %p305, %r6596, 3;
mov.u32 %r6662, 0;
@%p305 bra $L__BB3_278;
setp.lt.u32 %p306, %r6596, 6;
mov.u32 %r6662, 1;
@%p306 bra $L__BB3_278;
setp.lt.u32 %p307, %r6596, 9;
setp.eq.s32 %p308, %r6596, 11;
selp.b32 %r3887, 4, 5, %p308;
setp.lt.u32 %p309, %r6596, 11;
selp.b32 %r3888, 3, %r3887, %p309;
selp.b32 %r6662, 2, %r3888, %p307;
$L__BB3_278:
setp.eq.s32 %p310, %r6662, 0;
@%p310 bra $L__BB3_306;
add.s32 %r671, %r6662, -1;
and.b32 %r672, %r6662, 3;
setp.eq.s32 %p311, %r672, 0;
mov.u32 %r6672, %r6662;
mov.u32 %r6673, %r6694;
@%p311 bra $L__BB3_291;
mov.u32 %r3890, 1;
shl.b32 %r3891, %r3890, %r671;
and.b32 %r3892, %r3891, %r6595;
setp.ne.s32 %p312, %r3892, 0;
selp.u32 %r3893, 1, 0, %p312;
cvt.u32.u16 %r3894, %rs956;
bfi.b32 %r3895, %r3894, %r3893, 1, 8;
cvt.u16.u32 %rs956, %r3895;
add.s32 %r6390, %r6390, -1;
setp.ne.s32 %p313, %r6390, 0;
mov.u32 %r6673, %r6694;
@%p313 bra $L__BB3_283;
setp.gt.u32 %p314, %r6381, 191;
mov.u32 %r6390, 0;
mov.u32 %r6673, %r3890;
@%p314 bra $L__BB3_283;
add.s32 %r3899, %r6381, 17477;
cvt.u64.u32 %rd197, %r3899;
add.s64 %rd198, %rd197, %rd5;
add.s64 %rd199, %rd1, %rd198;
st.global.u8 [%rd199], %rs956;
add.s32 %r6381, %r6381, 1;
mov.u16 %rs956, 0;
mov.u32 %r6390, 8;
mov.u32 %r6673, %r6694;
$L__BB3_283:
setp.eq.s32 %p315, %r672, 1;
mov.u32 %r6694, %r6673;
mov.u32 %r6672, %r671;
@%p315 bra $L__BB3_291;
add.s32 %r6672, %r6662, -2;
mov.u32 %r3900, 1;
shl.b32 %r3901, %r3900, %r6672;
and.b32 %r3902, %r3901, %r6595;
setp.ne.s32 %p316, %r3902, 0;
selp.u32 %r3903, 1, 0, %p316;
cvt.u32.u16 %r3904, %rs956;
bfi.b32 %r3905, %r3904, %r3903, 1, 8;
cvt.u16.u32 %rs956, %r3905;
add.s32 %r6390, %r6390, -1;
setp.ne.s32 %p317, %r6390, 0;
mov.u32 %r6668, %r6673;
@%p317 bra $L__BB3_287;
setp.gt.u32 %p318, %r6381, 191;
mov.u32 %r6390, 0;
mov.u32 %r6668, %r3900;
@%p318 bra $L__BB3_287;
add.s32 %r3908, %r6381, 17477;
cvt.u64.u32 %rd200, %r3908;
add.s64 %rd201, %rd200, %rd5;
add.s64 %rd202, %rd1, %rd201;
and.b16 %rs547, %rs956, 255;
st.global.u8 [%rd202], %rs956;
add.s32 %r6381, %r6381, 1;
setp.eq.s16 %p319, %rs547, 255;
selp.b32 %r6390, 7, 8, %p319;
mov.u16 %rs956, 0;
mov.u32 %r6668, %r6673;
$L__BB3_287:
setp.eq.s32 %p320, %r672, 2;
mov.u32 %r6694, %r6668;
mov.u32 %r6673, %r6668;
@%p320 bra $L__BB3_291;
add.s32 %r6672, %r6662, -3;
mov.u32 %r3909, 1;
shl.b32 %r3910, %r3909, %r6672;
and.b32 %r3911, %r3910, %r6595;
setp.ne.s32 %p321, %r3911, 0;
selp.u32 %r3912, 1, 0, %p321;
cvt.u32.u16 %r3913, %rs956;
bfi.b32 %r3914, %r3913, %r3912, 1, 8;
cvt.u16.u32 %rs956, %r3914;
add.s32 %r6390, %r6390, -1;
setp.ne.s32 %p322, %r6390, 0;
mov.u32 %r6694, %r6668;
mov.u32 %r6673, %r6668;
@%p322 bra $L__BB3_291;
setp.gt.u32 %p323, %r6381, 191;
mov.u32 %r6390, 0;
mov.u32 %r6694, %r3909;
mov.u32 %r6673, %r3909;
@%p323 bra $L__BB3_291;
add.s32 %r3919, %r6381, 17477;
cvt.u64.u32 %rd203, %r3919;
add.s64 %rd204, %rd203, %rd5;
add.s64 %rd205, %rd1, %rd204;
and.b16 %rs550, %rs956, 255;
st.global.u8 [%rd205], %rs956;
add.s32 %r6381, %r6381, 1;
setp.eq.s16 %p324, %rs550, 255;
selp.b32 %r6390, 7, 8, %p324;
mov.u16 %rs956, 0;
mov.u32 %r6694, %r6668;
mov.u32 %r6673, %r6668;
$L__BB3_291:
setp.lt.u32 %p325, %r671, 3;
@%p325 bra $L__BB3_306;
mov.u32 %r6694, %r6673;
$L__BB3_293:
add.s32 %r3920, %r6672, -1;
mov.u32 %r3921, 1;
shl.b32 %r3922, %r3921, %r3920;
and.b32 %r3923, %r3922, %r6595;
setp.ne.s32 %p326, %r3923, 0;
selp.u32 %r3924, 1, 0, %p326;
cvt.u32.u16 %r3925, %rs956;
bfi.b32 %r6682, %r3925, %r3924, 1, 8;
add.s32 %r6681, %r6390, -1;
setp.ne.s32 %p327, %r6681, 0;
mov.u32 %r6683, %r6694;
@%p327 bra $L__BB3_296;
setp.gt.u32 %p328, %r6381, 191;
mov.u32 %r6681, 0;
mov.u32 %r6683, %r3921;
@%p328 bra $L__BB3_296;
cvt.u16.u32 %rs551, %r6682;
and.b16 %rs552, %rs551, 255;
add.s32 %r3929, %r6381, 17477;
cvt.u64.u32 %rd206, %r3929;
add.s64 %rd207, %rd206, %rd5;
add.s64 %rd208, %rd1, %rd207;
st.global.u8 [%rd208], %rs551;
add.s32 %r6381, %r6381, 1;
setp.eq.s16 %p329, %rs552, 255;
selp.b32 %r6681, 7, 8, %p329;
mov.u32 %r6682, 0;
mov.u32 %r6683, %r6694;
$L__BB3_296:
add.s32 %r3930, %r6672, -2;
shl.b32 %r3932, %r3921, %r3930;
and.b32 %r3933, %r3932, %r6595;
setp.ne.s32 %p330, %r3933, 0;
and.b32 %r3934, %r6682, 127;
selp.u32 %r3935, 1, 0, %p330;
bfi.b32 %r6686, %r3934, %r3935, 1, 7;
add.s32 %r6685, %r6681, -1;
setp.ne.s32 %p331, %r6685, 0;
mov.u32 %r6687, %r6683;
@%p331 bra $L__BB3_299;
setp.gt.u32 %p332, %r6381, 191;
mov.u32 %r6687, 1;
mov.u32 %r6685, 0;
@%p332 bra $L__BB3_299;
cvt.u16.u32 %rs553, %r6686;
and.b16 %rs554, %rs553, 255;
add.s32 %r3939, %r6381, 17477;
cvt.u64.u32 %rd209, %r3939;
add.s64 %rd210, %rd209, %rd5;
add.s64 %rd211, %rd1, %rd210;
st.global.u8 [%rd211], %rs553;
add.s32 %r6381, %r6381, 1;
setp.eq.s16 %p333, %rs554, 255;
selp.b32 %r6685, 7, 8, %p333;
mov.u32 %r6686, 0;
mov.u32 %r6687, %r6683;
$L__BB3_299:
add.s32 %r3940, %r6672, -3;
mov.u32 %r3941, 1;
shl.b32 %r3942, %r3941, %r3940;
and.b32 %r3943, %r3942, %r6595;
setp.ne.s32 %p334, %r3943, 0;
and.b32 %r3944, %r6686, 127;
selp.u32 %r3945, 1, 0, %p334;
bfi.b32 %r6690, %r3944, %r3945, 1, 7;
add.s32 %r6689, %r6685, -1;
setp.ne.s32 %p335, %r6689, 0;
mov.u32 %r6691, %r6687;
@%p335 bra $L__BB3_302;
setp.gt.u32 %p336, %r6381, 191;
mov.u32 %r6689, 0;
mov.u32 %r6691, %r3941;
@%p336 bra $L__BB3_302;
cvt.u16.u32 %rs555, %r6690;
and.b16 %rs556, %rs555, 255;
add.s32 %r3949, %r6381, 17477;
cvt.u64.u32 %rd212, %r3949;
add.s64 %rd213, %rd212, %rd5;
add.s64 %rd214, %rd1, %rd213;
st.global.u8 [%rd214], %rs555;
add.s32 %r6381, %r6381, 1;
setp.eq.s16 %p337, %rs556, 255;
selp.b32 %r6689, 7, 8, %p337;
mov.u32 %r6690, 0;
mov.u32 %r6691, %r6687;
$L__BB3_302:
add.s32 %r6672, %r6672, -4;
shl.b32 %r3951, %r3941, %r6672;
and.b32 %r3952, %r3951, %r6595;
setp.ne.s32 %p338, %r3952, 0;
and.b32 %r3953, %r6690, 127;
selp.u32 %r3954, 1, 0, %p338;
bfi.b32 %r3955, %r3953, %r3954, 1, 15;
cvt.u16.u32 %rs956, %r3955;
add.s32 %r6390, %r6689, -1;
setp.ne.s32 %p339, %r6390, 0;
mov.u32 %r6694, %r6691;
@%p339 bra $L__BB3_305;
setp.gt.u32 %p340, %r6381, 191;
mov.u32 %r6694, 1;
mov.u32 %r6390, 0;
@%p340 bra $L__BB3_305;
add.s32 %r3958, %r6381, 17477;
cvt.u64.u32 %rd215, %r3958;
add.s64 %rd216, %rd215, %rd5;
add.s64 %rd217, %rd1, %rd216;
and.b16 %rs558, %rs956, 255;
st.global.u8 [%rd217], %rs956;
add.s32 %r6381, %r6381, 1;
setp.eq.s16 %p341, %rs558, 255;
selp.b32 %r6390, 7, 8, %p341;
mov.u16 %rs956, 0;
mov.u32 %r6694, %r6691;
$L__BB3_305:
setp.ne.s32 %p342, %r6672, 0;
@%p342 bra $L__BB3_293;
$L__BB3_306:
add.s32 %r3960, %r6596, -1;
setp.eq.s32 %p343, %r6596, 0;
mov.u32 %r6595, 0;
selp.b32 %r6596, 0, %r3960, %p343;
setp.lt.u32 %p344, %r6596, 3;
mov.u32 %r6698, %r6595;
@%p344 bra $L__BB3_309;
setp.lt.u32 %p345, %r6596, 6;
mov.u32 %r6698, 1;
@%p345 bra $L__BB3_309;
setp.lt.u32 %p346, %r6596, 9;
setp.eq.s32 %p347, %r6596, 11;
selp.b32 %r3962, 4, 5, %p347;
setp.lt.u32 %p348, %r6596, 11;
selp.b32 %r3963, 3, %r3962, %p348;
selp.b32 %r6698, 2, %r3963, %p346;
$L__BB3_309:
mov.u32 %r3965, 1;
shl.b32 %r6597, %r3965, %r6698;
mov.u32 %r6598, %r6694;
$L__BB3_318:
setp.gt.s32 %p358, %r454, 2;
setp.gt.s32 %p359, %r113, 2;
and.pred %p360, %p359, %p358;
@%p360 bra $L__BB3_367;
bra.uni $L__BB3_319;
$L__BB3_367:
add.s32 %r4095, %r326, -11;
cvt.u64.u32 %rd247, %r4095;
add.s64 %rd16, %rd9, %rd247;
ld.global.u8 %rs122, [%rd16];
add.s32 %r4096, %r326, -10;
cvt.u64.u32 %rd249, %r4096;
add.s64 %rd250, %rd9, %rd249;
ld.global.u8 %rs123, [%rd250];
ld.global.u8 %rs124, [%rd250+1];
mul.lo.s32 %r4097, %r454, 6;
add.s32 %r4098, %r4097, -12;
cvt.u64.u32 %rd251, %r4098;
add.s64 %rd252, %rd9, %rd251;
ld.global.u8 %rs125, [%rd252];
ld.global.u8 %rs126, [%rd252+1];
add.s32 %r4099, %r4097, -10;
cvt.u64.u32 %rd253, %r4099;
add.s64 %rd254, %rd9, %rd253;
ld.global.u8 %rs127, [%rd254];
ld.global.u8 %rs128, [%rd254+1];
setp.eq.s16 %p428, %rs122, 0;
mov.u32 %r6796, %r6548;
@%p428 bra $L__BB3_374;
ld.global.u8 %r6786, [%rd16+-1];
cvt.u32.u16 %r6785, %rs122;
$L__BB3_369:
mov.u32 %r881, %r6785;
setp.gt.u32 %p429, %r6829, 2879;
mov.u32 %r6796, 1;
@%p429 bra $L__BB3_374;
mov.u32 %r4101, 8;
sub.s32 %r4102, %r4101, %r6831;
sub.s32 %r4103, %r4102, %r6830;
min.u32 %r4104, %r4103, %r881;
setp.eq.s32 %p430, %r4104, 32;
mov.u32 %r4105, -1;
shl.b32 %r4106, %r4105, %r4104;
not.b32 %r4107, %r4106;
selp.b32 %r4108, -1, %r4107, %p430;
and.b32 %r4109, %r4108, %r6786;
shl.b32 %r4110, %r4109, %r6830;
cvt.u16.u32 %rs594, %r4110;
or.b16 %rs1025, %rs1025, %rs594;
add.s32 %r6830, %r4104, %r6830;
sub.s32 %r6785, %r881, %r4104;
shr.u32 %r6786, %r6786, %r4104;
setp.gt.u32 %p431, %r4103, %r881;
@%p431 bra $L__BB3_373;
setp.ne.s32 %p432, %r6831, 0;
mov.u32 %r6831, 0;
and.b16 %rs595, %rs1025, 255;
setp.ne.s16 %p433, %rs595, 127;
and.pred %p434, %p432, %p433;
@%p434 bra $L__BB3_373;
mov.u32 %r4113, 20548;
sub.s32 %r4114, %r4113, %r6829;
cvt.u64.u32 %rd255, %r4114;
add.s64 %rd256, %rd255, %rd5;
add.s64 %rd257, %rd1, %rd256;
st.global.u8 [%rd257], %rs1025;
add.s32 %r6829, %r6829, 1;
setp.gt.u16 %p435, %rs595, 143;
selp.u32 %r6831, 1, 0, %p435;
mov.u16 %rs1025, 0;
mov.u32 %r6830, 0;
$L__BB3_373:
setp.ne.s32 %p436, %r6785, 0;
mov.u32 %r6796, %r6548;
@%p436 bra $L__BB3_369;
$L__BB3_374:
setp.eq.s16 %p437, %rs126, 0;
mov.u32 %r6808, %r6796;
@%p437 bra $L__BB3_381;
cvt.u32.u16 %r4115, %rs125;
and.b32 %r6798, %r4115, 255;
cvt.u32.u16 %r4116, %rs126;
and.b32 %r6797, %r4116, 255;
$L__BB3_376:
mov.u32 %r900, %r6797;
setp.gt.u32 %p438, %r6829, 2879;
mov.u32 %r6808, 1;
@%p438 bra $L__BB3_381;
mov.u32 %r4118, 8;
sub.s32 %r4119, %r4118, %r6831;
sub.s32 %r4120, %r4119, %r6830;
min.u32 %r4121, %r4120, %r900;
setp.eq.s32 %p439, %r4121, 32;
mov.u32 %r4122, -1;
shl.b32 %r4123, %r4122, %r4121;
not.b32 %r4124, %r4123;
selp.b32 %r4125, -1, %r4124, %p439;
and.b32 %r4126, %r4125, %r6798;
shl.b32 %r4127, %r4126, %r6830;
cvt.u16.u32 %rs599, %r4127;
or.b16 %rs1025, %rs1025, %rs599;
add.s32 %r6830, %r4121, %r6830;
sub.s32 %r6797, %r900, %r4121;
shr.u32 %r6798, %r6798, %r4121;
setp.gt.u32 %p440, %r4120, %r900;
@%p440 bra $L__BB3_380;
setp.ne.s32 %p441, %r6831, 0;
mov.u32 %r6831, 0;
and.b16 %rs600, %rs1025, 255;
setp.ne.s16 %p442, %rs600, 127;
and.pred %p443, %p441, %p442;
@%p443 bra $L__BB3_380;
mov.u32 %r4130, 20548;
sub.s32 %r4131, %r4130, %r6829;
cvt.u64.u32 %rd258, %r4131;
add.s64 %rd259, %rd258, %rd5;
add.s64 %rd260, %rd1, %rd259;
st.global.u8 [%rd260], %rs1025;
add.s32 %r6829, %r6829, 1;
setp.gt.u16 %p444, %rs600, 143;
selp.u32 %r6831, 1, 0, %p444;
mov.u16 %rs1025, 0;
mov.u32 %r6830, 0;
$L__BB3_380:
setp.ne.s32 %p445, %r6797, 0;
mov.u32 %r6808, %r6796;
@%p445 bra $L__BB3_376;
$L__BB3_381:
setp.eq.s16 %p446, %rs124, 0;
mov.u32 %r6820, %r6808;
@%p446 bra $L__BB3_388;
cvt.u32.u16 %r4132, %rs124;
and.b32 %r6809, %r4132, 255;
cvt.u32.u16 %r4133, %rs123;
and.b32 %r6810, %r4133, 255;
$L__BB3_383:
mov.u32 %r919, %r6809;
setp.gt.u32 %p447, %r6829, 2879;
mov.u32 %r6820, 1;
@%p447 bra $L__BB3_388;
mov.u32 %r4135, 8;
sub.s32 %r4136, %r4135, %r6831;
sub.s32 %r4137, %r4136, %r6830;
min.u32 %r4138, %r4137, %r919;
setp.eq.s32 %p448, %r4138, 32;
mov.u32 %r4139, -1;
shl.b32 %r4140, %r4139, %r4138;
not.b32 %r4141, %r4140;
selp.b32 %r4142, -1, %r4141, %p448;
and.b32 %r4143, %r4142, %r6810;
shl.b32 %r4144, %r4143, %r6830;
cvt.u16.u32 %rs604, %r4144;
or.b16 %rs1025, %rs1025, %rs604;
add.s32 %r6830, %r4138, %r6830;
sub.s32 %r6809, %r919, %r4138;
shr.u32 %r6810, %r6810, %r4138;
setp.gt.u32 %p449, %r4137, %r919;
@%p449 bra $L__BB3_387;
setp.ne.s32 %p450, %r6831, 0;
mov.u32 %r6831, 0;
and.b16 %rs605, %rs1025, 255;
setp.ne.s16 %p451, %rs605, 127;
and.pred %p452, %p450, %p451;
@%p452 bra $L__BB3_387;
mov.u32 %r4147, 20548;
sub.s32 %r4148, %r4147, %r6829;
cvt.u64.u32 %rd261, %r4148;
add.s64 %rd262, %rd261, %rd5;
add.s64 %rd263, %rd1, %rd262;
st.global.u8 [%rd263], %rs1025;
add.s32 %r6829, %r6829, 1;
setp.gt.u16 %p453, %rs605, 143;
selp.u32 %r6831, 1, 0, %p453;
mov.u16 %rs1025, 0;
mov.u32 %r6830, 0;
$L__BB3_387:
setp.ne.s32 %p454, %r6809, 0;
mov.u32 %r6820, %r6808;
@%p454 bra $L__BB3_383;
$L__BB3_388:
setp.eq.s16 %p455, %rs128, 0;
mov.u32 %r6832, %r6820;
@%p455 bra $L__BB3_395;
cvt.u32.u16 %r4149, %rs127;
and.b32 %r6822, %r4149, 255;
cvt.u32.u16 %r4150, %rs128;
and.b32 %r6821, %r4150, 255;
$L__BB3_390:
mov.u32 %r938, %r6821;
setp.gt.u32 %p456, %r6829, 2879;
mov.u32 %r6832, 1;
@%p456 bra $L__BB3_395;
mov.u32 %r4152, 8;
sub.s32 %r4153, %r4152, %r6831;
sub.s32 %r4154, %r4153, %r6830;
min.u32 %r4155, %r4154, %r938;
setp.eq.s32 %p457, %r4155, 32;
mov.u32 %r4156, -1;
shl.b32 %r4157, %r4156, %r4155;
not.b32 %r4158, %r4157;
selp.b32 %r4159, -1, %r4158, %p457;
and.b32 %r4160, %r4159, %r6822;
shl.b32 %r4161, %r4160, %r6830;
cvt.u16.u32 %rs609, %r4161;
or.b16 %rs1025, %rs1025, %rs609;
add.s32 %r6830, %r4155, %r6830;
sub.s32 %r6821, %r938, %r4155;
shr.u32 %r6822, %r6822, %r4155;
setp.gt.u32 %p458, %r4154, %r938;
@%p458 bra $L__BB3_394;
setp.ne.s32 %p459, %r6831, 0;
mov.u32 %r6831, 0;
and.b16 %rs610, %rs1025, 255;
setp.ne.s16 %p460, %rs610, 127;
and.pred %p461, %p459, %p460;
@%p461 bra $L__BB3_394;
mov.u32 %r4164, 20548;
sub.s32 %r4165, %r4164, %r6829;
cvt.u64.u32 %rd264, %r4165;
add.s64 %rd265, %rd264, %rd5;
add.s64 %rd266, %rd1, %rd265;
st.global.u8 [%rd266], %rs1025;
add.s32 %r6829, %r6829, 1;
setp.gt.u16 %p462, %rs610, 143;
selp.u32 %r6831, 1, 0, %p462;
mov.u16 %rs1025, 0;
mov.u32 %r6830, 0;
$L__BB3_394:
setp.ne.s32 %p463, %r6821, 0;
mov.u32 %r6832, %r6820;
@%p463 bra $L__BB3_390;
bra.uni $L__BB3_395;
$L__BB3_319:
setp.gt.s32 %p361, %r454, 0;
and.pred %p363, %p359, %p361;
@%p363 bra $L__BB3_348;
bra.uni $L__BB3_320;
$L__BB3_348:
ld.global.u8 %rs108, [%rd11+1];
ld.global.u8 %rs109, [%rd12];
ld.global.u8 %rs110, [%rd12+1];
setp.eq.s16 %p402, %rs108, 0;
mov.u32 %r6764, %r6548;
@%p402 bra $L__BB3_355;
ld.global.u8 %r6754, [%rd11];
cvt.u32.u16 %r6753, %rs108;
$L__BB3_350:
mov.u32 %r829, %r6753;
setp.gt.u32 %p403, %r6829, 2879;
mov.u32 %r6764, 1;
@%p403 bra $L__BB3_355;
mov.u32 %r4047, 8;
sub.s32 %r4048, %r4047, %r6831;
sub.s32 %r4049, %r4048, %r6830;
min.u32 %r4050, %r4049, %r829;
setp.eq.s32 %p404, %r4050, 32;
mov.u32 %r4051, -1;
shl.b32 %r4052, %r4051, %r4050;
not.b32 %r4053, %r4052;
selp.b32 %r4054, -1, %r4053, %p404;
and.b32 %r4055, %r4054, %r6754;
shl.b32 %r4056, %r4055, %r6830;
cvt.u16.u32 %rs581, %r4056;
or.b16 %rs1025, %rs1025, %rs581;
add.s32 %r6830, %r4050, %r6830;
sub.s32 %r6753, %r829, %r4050;
shr.u32 %r6754, %r6754, %r4050;
setp.gt.u32 %p405, %r4049, %r829;
@%p405 bra $L__BB3_354;
setp.ne.s32 %p406, %r6831, 0;
mov.u32 %r6831, 0;
and.b16 %rs582, %rs1025, 255;
setp.ne.s16 %p407, %rs582, 127;
and.pred %p408, %p406, %p407;
@%p408 bra $L__BB3_354;
mov.u32 %r4059, 20548;
sub.s32 %r4060, %r4059, %r6829;
cvt.u64.u32 %rd238, %r4060;
add.s64 %rd239, %rd238, %rd5;
add.s64 %rd240, %rd1, %rd239;
st.global.u8 [%rd240], %rs1025;
add.s32 %r6829, %r6829, 1;
setp.gt.u16 %p409, %rs582, 143;
selp.u32 %r6831, 1, 0, %p409;
mov.u16 %rs1025, 0;
mov.u32 %r6830, 0;
$L__BB3_354:
setp.ne.s32 %p410, %r6753, 0;
mov.u32 %r6764, %r6548;
@%p410 bra $L__BB3_350;
$L__BB3_355:
add.s32 %r6766, %r454, -1;
cvt.u32.u16 %r4062, %rs110;
and.b32 %r6777, %r4062, 255;
cvt.u32.u16 %r4063, %rs109;
and.b32 %r6778, %r4063, 255;
mov.u32 %r4061, 1;
mov.u32 %r6765, %r4061;
$L__BB3_356:
mov.u32 %r849, %r6765;
setp.gt.u32 %p411, %r6829, 2879;
mov.u32 %r6776, %r4061;
@%p411 bra $L__BB3_361;
mov.u32 %r4065, 8;
sub.s32 %r4066, %r4065, %r6831;
sub.s32 %r4067, %r4066, %r6830;
min.u32 %r4068, %r4067, %r849;
setp.eq.s32 %p412, %r4068, 32;
mov.u32 %r4069, -1;
shl.b32 %r4070, %r4069, %r4068;
not.b32 %r4071, %r4070;
selp.b32 %r4072, -1, %r4071, %p412;
and.b32 %r4073, %r4072, %r6766;
shl.b32 %r4074, %r4073, %r6830;
cvt.u16.u32 %rs585, %r4074;
or.b16 %rs1025, %rs1025, %rs585;
add.s32 %r6830, %r4068, %r6830;
sub.s32 %r6765, %r849, %r4068;
shr.u32 %r6766, %r6766, %r4068;
setp.gt.u32 %p413, %r4067, %r849;
@%p413 bra $L__BB3_360;
setp.ne.s32 %p414, %r6831, 0;
mov.u32 %r6831, 0;
and.b16 %rs586, %rs1025, 255;
setp.ne.s16 %p415, %rs586, 127;
and.pred %p416, %p414, %p415;
@%p416 bra $L__BB3_360;
mov.u32 %r4077, 20548;
sub.s32 %r4078, %r4077, %r6829;
cvt.u64.u32 %rd241, %r4078;
add.s64 %rd242, %rd241, %rd5;
add.s64 %rd243, %rd1, %rd242;
st.global.u8 [%rd243], %rs1025;
add.s32 %r6829, %r6829, 1;
setp.gt.u16 %p417, %rs586, 143;
selp.u32 %r6831, 1, 0, %p417;
mov.u16 %rs1025, 0;
mov.u32 %r6830, 0;
$L__BB3_360:
setp.ne.s32 %p418, %r6765, 0;
mov.u32 %r6776, %r6764;
@%p418 bra $L__BB3_356;
$L__BB3_361:
setp.eq.s16 %p419, %rs110, 0;
mov.u32 %r6832, %r6776;
@%p419 bra $L__BB3_395;
$L__BB3_362:
mov.u32 %r866, %r6777;
setp.gt.u32 %p420, %r6829, 2879;
mov.u32 %r6832, 1;
@%p420 bra $L__BB3_395;
mov.u32 %r4080, 8;
sub.s32 %r4081, %r4080, %r6831;
sub.s32 %r4082, %r4081, %r6830;
min.u32 %r4083, %r4082, %r866;
setp.eq.s32 %p421, %r4083, 32;
mov.u32 %r4084, -1;
shl.b32 %r4085, %r4084, %r4083;
not.b32 %r4086, %r4085;
selp.b32 %r4087, -1, %r4086, %p421;
and.b32 %r4088, %r4087, %r6778;
shl.b32 %r4089, %r4088, %r6830;
cvt.u16.u32 %rs590, %r4089;
or.b16 %rs1025, %rs1025, %rs590;
add.s32 %r6830, %r4083, %r6830;
sub.s32 %r6777, %r866, %r4083;
shr.u32 %r6778, %r6778, %r4083;
setp.gt.u32 %p422, %r4082, %r866;
@%p422 bra $L__BB3_366;
setp.ne.s32 %p423, %r6831, 0;
mov.u32 %r6831, 0;
and.b16 %rs591, %rs1025, 255;
setp.ne.s16 %p424, %rs591, 127;
and.pred %p425, %p423, %p424;
@%p425 bra $L__BB3_366;
mov.u32 %r4092, 20548;
sub.s32 %r4093, %r4092, %r6829;
cvt.u64.u32 %rd244, %r4093;
add.s64 %rd245, %rd244, %rd5;
add.s64 %rd246, %rd1, %rd245;
st.global.u8 [%rd246], %rs1025;
add.s32 %r6829, %r6829, 1;
setp.gt.u16 %p426, %rs591, 143;
selp.u32 %r6831, 1, 0, %p426;
mov.u16 %rs1025, 0;
mov.u32 %r6830, 0;
$L__BB3_366:
setp.eq.s32 %p427, %r6777, 0;
mov.u32 %r6832, %r6776;
@%p427 bra $L__BB3_395;
bra.uni $L__BB3_362;
$L__BB3_320:
setp.gt.s32 %p365, %r113, 0;
selp.b32 %r3975, %r326, 0, %p365;
cvt.u64.u32 %rd218, %r3975;
add.s64 %rd15, %rd9, %rd218;
ld.global.u8 %rs86, [%rd15+1];
add.s32 %r3976, %r3975, 2;
cvt.u64.u32 %rd220, %r3976;
add.s64 %rd221, %rd9, %rd220;
ld.global.u8 %rs87, [%rd221];
ld.global.u8 %rs88, [%rd221+1];
mul.lo.s32 %r3977, %r454, 6;
selp.b32 %r3978, %r3977, 0, %p361;
cvt.u64.u32 %rd222, %r3978;
add.s64 %rd223, %rd9, %rd222;
ld.global.u8 %rs89, [%rd223];
ld.global.u8 %rs90, [%rd223+1];
add.s32 %r3979, %r3978, 2;
cvt.u64.u32 %rd224, %r3979;
add.s64 %rd225, %rd9, %rd224;
ld.global.u8 %rs91, [%rd225];
ld.global.u8 %rs92, [%rd225+1];
setp.eq.s16 %p366, %rs86, 0;
mov.u32 %r6720, %r6548;
@%p366 bra $L__BB3_327;
ld.global.u8 %r6710, [%rd15];
cvt.u32.u16 %r6709, %rs86;
$L__BB3_322:
mov.u32 %r757, %r6709;
setp.gt.u32 %p367, %r6829, 2879;
mov.u32 %r6720, 1;
@%p367 bra $L__BB3_327;
mov.u32 %r3981, 8;
sub.s32 %r3982, %r3981, %r6831;
sub.s32 %r3983, %r3982, %r6830;
min.u32 %r3984, %r3983, %r757;
setp.eq.s32 %p368, %r3984, 32;
mov.u32 %r3985, -1;
shl.b32 %r3986, %r3985, %r3984;
not.b32 %r3987, %r3986;
selp.b32 %r3988, -1, %r3987, %p368;
and.b32 %r3989, %r3988, %r6710;
shl.b32 %r3990, %r3989, %r6830;
cvt.u16.u32 %rs562, %r3990;
or.b16 %rs1025, %rs1025, %rs562;
add.s32 %r6830, %r3984, %r6830;
sub.s32 %r6709, %r757, %r3984;
shr.u32 %r6710, %r6710, %r3984;
setp.gt.u32 %p369, %r3983, %r757;
@%p369 bra $L__BB3_326;
setp.ne.s32 %p370, %r6831, 0;
mov.u32 %r6831, 0;
and.b16 %rs563, %rs1025, 255;
setp.ne.s16 %p371, %rs563, 127;
and.pred %p372, %p370, %p371;
@%p372 bra $L__BB3_326;
mov.u32 %r3993, 20548;
sub.s32 %r3994, %r3993, %r6829;
cvt.u64.u32 %rd226, %r3994;
add.s64 %rd227, %rd226, %rd5;
add.s64 %rd228, %rd1, %rd227;
st.global.u8 [%rd228], %rs1025;
add.s32 %r6829, %r6829, 1;
setp.gt.u16 %p373, %rs563, 143;
selp.u32 %r6831, 1, 0, %p373;
mov.u16 %rs1025, 0;
mov.u32 %r6830, 0;
$L__BB3_326:
setp.ne.s32 %p374, %r6709, 0;
mov.u32 %r6720, %r6548;
@%p374 bra $L__BB3_322;
$L__BB3_327:
setp.eq.s16 %p375, %rs90, 0;
mov.u32 %r6732, %r6720;
@%p375 bra $L__BB3_334;
cvt.u32.u16 %r3995, %rs89;
and.b32 %r6722, %r3995, 255;
cvt.u32.u16 %r3996, %rs90;
and.b32 %r6721, %r3996, 255;
$L__BB3_329:
mov.u32 %r776, %r6721;
setp.gt.u32 %p376, %r6829, 2879;
mov.u32 %r6732, 1;
@%p376 bra $L__BB3_334;
mov.u32 %r3998, 8;
sub.s32 %r3999, %r3998, %r6831;
sub.s32 %r4000, %r3999, %r6830;
min.u32 %r4001, %r4000, %r776;
setp.eq.s32 %p377, %r4001, 32;
mov.u32 %r4002, -1;
shl.b32 %r4003, %r4002, %r4001;
not.b32 %r4004, %r4003;
selp.b32 %r4005, -1, %r4004, %p377;
and.b32 %r4006, %r4005, %r6722;
shl.b32 %r4007, %r4006, %r6830;
cvt.u16.u32 %rs567, %r4007;
or.b16 %rs1025, %rs1025, %rs567;
add.s32 %r6830, %r4001, %r6830;
sub.s32 %r6721, %r776, %r4001;
shr.u32 %r6722, %r6722, %r4001;
setp.gt.u32 %p378, %r4000, %r776;
@%p378 bra $L__BB3_333;
setp.ne.s32 %p379, %r6831, 0;
mov.u32 %r6831, 0;
and.b16 %rs568, %rs1025, 255;
setp.ne.s16 %p380, %rs568, 127;
and.pred %p381, %p379, %p380;
@%p381 bra $L__BB3_333;
mov.u32 %r4010, 20548;
sub.s32 %r4011, %r4010, %r6829;
cvt.u64.u32 %rd229, %r4011;
add.s64 %rd230, %rd229, %rd5;
add.s64 %rd231, %rd1, %rd230;
st.global.u8 [%rd231], %rs1025;
add.s32 %r6829, %r6829, 1;
setp.gt.u16 %p382, %rs568, 143;
selp.u32 %r6831, 1, 0, %p382;
mov.u16 %rs1025, 0;
mov.u32 %r6830, 0;
$L__BB3_333:
setp.ne.s32 %p383, %r6721, 0;
mov.u32 %r6732, %r6720;
@%p383 bra $L__BB3_329;
$L__BB3_334:
setp.eq.s16 %p384, %rs88, 0;
mov.u32 %r6744, %r6732;
@%p384 bra $L__BB3_341;
cvt.u32.u16 %r4012, %rs88;
and.b32 %r6733, %r4012, 255;
cvt.u32.u16 %r4013, %rs87;
and.b32 %r6734, %r4013, 255;
$L__BB3_336:
mov.u32 %r795, %r6733;
setp.gt.u32 %p385, %r6829, 2879;
mov.u32 %r6744, 1;
@%p385 bra $L__BB3_341;
mov.u32 %r4015, 8;
sub.s32 %r4016, %r4015, %r6831;
sub.s32 %r4017, %r4016, %r6830;
min.u32 %r4018, %r4017, %r795;
setp.eq.s32 %p386, %r4018, 32;
mov.u32 %r4019, -1;
shl.b32 %r4020, %r4019, %r4018;
not.b32 %r4021, %r4020;
selp.b32 %r4022, -1, %r4021, %p386;
and.b32 %r4023, %r4022, %r6734;
shl.b32 %r4024, %r4023, %r6830;
cvt.u16.u32 %rs572, %r4024;
or.b16 %rs1025, %rs1025, %rs572;
add.s32 %r6830, %r4018, %r6830;
sub.s32 %r6733, %r795, %r4018;
shr.u32 %r6734, %r6734, %r4018;
setp.gt.u32 %p387, %r4017, %r795;
@%p387 bra $L__BB3_340;
setp.ne.s32 %p388, %r6831, 0;
mov.u32 %r6831, 0;
and.b16 %rs573, %rs1025, 255;
setp.ne.s16 %p389, %rs573, 127;
and.pred %p390, %p388, %p389;
@%p390 bra $L__BB3_340;
mov.u32 %r4027, 20548;
sub.s32 %r4028, %r4027, %r6829;
cvt.u64.u32 %rd232, %r4028;
add.s64 %rd233, %rd232, %rd5;
add.s64 %rd234, %rd1, %rd233;
st.global.u8 [%rd234], %rs1025;
add.s32 %r6829, %r6829, 1;
setp.gt.u16 %p391, %rs573, 143;
selp.u32 %r6831, 1, 0, %p391;
mov.u16 %rs1025, 0;
mov.u32 %r6830, 0;
$L__BB3_340:
setp.ne.s32 %p392, %r6733, 0;
mov.u32 %r6744, %r6732;
@%p392 bra $L__BB3_336;
$L__BB3_341:
setp.eq.s16 %p393, %rs92, 0;
mov.u32 %r6832, %r6744;
@%p393 bra $L__BB3_395;
cvt.u32.u16 %r4029, %rs91;
and.b32 %r6746, %r4029, 255;
cvt.u32.u16 %r4030, %rs92;
and.b32 %r6745, %r4030, 255;
$L__BB3_343:
mov.u32 %r814, %r6745;
setp.gt.u32 %p394, %r6829, 2879;
mov.u32 %r6832, 1;
@%p394 bra $L__BB3_395;
mov.u32 %r4032, 8;
sub.s32 %r4033, %r4032, %r6831;
sub.s32 %r4034, %r4033, %r6830;
min.u32 %r4035, %r4034, %r814;
setp.eq.s32 %p395, %r4035, 32;
mov.u32 %r4036, -1;
shl.b32 %r4037, %r4036, %r4035;
not.b32 %r4038, %r4037;
selp.b32 %r4039, -1, %r4038, %p395;
and.b32 %r4040, %r4039, %r6746;
shl.b32 %r4041, %r4040, %r6830;
cvt.u16.u32 %rs577, %r4041;
or.b16 %rs1025, %rs1025, %rs577;
add.s32 %r6830, %r4035, %r6830;
sub.s32 %r6745, %r814, %r4035;
shr.u32 %r6746, %r6746, %r4035;
setp.gt.u32 %p396, %r4034, %r814;
@%p396 bra $L__BB3_347;
setp.ne.s32 %p397, %r6831, 0;
mov.u32 %r6831, 0;
and.b16 %rs578, %rs1025, 255;
setp.ne.s16 %p398, %rs578, 127;
and.pred %p399, %p397, %p398;
@%p399 bra $L__BB3_347;
mov.u32 %r4044, 20548;
sub.s32 %r4045, %r4044, %r6829;
cvt.u64.u32 %rd235, %r4045;
add.s64 %rd236, %rd235, %rd5;
add.s64 %rd237, %rd1, %rd236;
st.global.u8 [%rd237], %rs1025;
add.s32 %r6829, %r6829, 1;
setp.gt.u16 %p400, %rs578, 143;
selp.u32 %r6831, 1, 0, %p400;
mov.u16 %rs1025, 0;
mov.u32 %r6830, 0;
$L__BB3_347:
setp.eq.s32 %p401, %r6745, 0;
mov.u32 %r6832, %r6744;
@%p401 bra $L__BB3_395;
bra.uni $L__BB3_343;
$L__BB3_395:
shr.u32 %r4166, %r6518, 1;
or.b32 %r6315, %r4166, %r571;
$L__BB3_396:
add.s32 %r6299, %r6299, 4;
setp.lt.u32 %p464, %r6299, %r3200;
@%p464 bra $L__BB3_31;
$L__BB3_397:
add.s32 %r989, %r28, 1;
setp.gt.u32 %p465, %r989, 512;
@%p465 bra $L__BB3_399;
mov.u32 %r4167, _ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val;
add.s32 %r4168, %r4167, %r989;
mov.u16 %rs613, 0;
st.shared.u8 [%r4168], %rs613;
$L__BB3_399:
setp.lt.u32 %p466, %r3201, 3;
@%p466 bra $L__BB3_645;
ld.param.u64 %rd684, [ j2k_htj2k_encode_codeblocks_multi_input_cleanup_param_3];
mov.u32 %r6865, 2;
cvta.to.global.u64 %rd17, %rd48;
cvta.to.global.u64 %rd18, %rd684;
$L__BB3_401:
ld.shared.u8 %rs151, [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val];
mov.u16 %rs614, 0;
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val], %rs614;
ld.shared.u8 %rs152, [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE14cleanup_cx_val];
st.shared.u8 [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE14cleanup_cx_val], %rs614;
@%p2 bra $L__BB3_644;
mov.u32 %r4172, 0;
ld.shared.u8 %rs615, [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val+1];
ld.shared.u8 %rs616, [_ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE14cleanup_cx_val+1];
max.u16 %rs618, %rs151, %rs615;
cvt.u32.u16 %r4173, %rs618;
add.s32 %r6900, %r4173, -1;
add.s32 %r1007, %r6865, 1;
mul.lo.s32 %r6898, %r6865, %r3198;
mul.wide.u16 %r4174, %rs616, 4;
cvt.u32.u16 %r4175, %rs152;
and.b32 %r4176, %r4175, 255;
add.s32 %r6897, %r4174, %r4176;
mov.u32 %r6881, %r4172;
mov.u32 %r6899, %r4172;
mov.u32 %r6901, %r4172;
$L__BB3_403:
cvt.u64.u32 %rd267, %r6898;
add.s64 %rd268, %rd267, %rd4;
shl.b64 %rd269, %rd268, 2;
add.s64 %rd270, %rd3, %rd269;
ld.global.u32 %r1031, [%rd270];
setp.eq.s32 %p468, %r1031, 0;
mov.u32 %r6902, %r4172;
@%p468 bra $L__BB3_405;
and.b32 %r4178, %r1031, -2147483648;
abs.s32 %r4179, %r1031;
shl.b32 %r4180, %r4179, %r27;
or.b32 %r6902, %r4180, %r4178;
$L__BB3_405:
shl.b32 %r4184, %r6902, 1;
shr.u32 %r4185, %r4184, %r27;
and.b32 %r1034, %r4185, -2;
setp.eq.s32 %p469, %r1034, 0;
mov.u32 %r6906, 0;
mov.u32 %r6903, %r6906;
mov.u32 %r6904, %r6906;
mov.u32 %r6910, %r6906;
@%p469 bra $L__BB3_407;
add.s32 %r4187, %r1034, -1;
clz.b32 %r4188, %r4187;
mov.u32 %r4189, 32;
sub.s32 %r6903, %r4189, %r4188;
shr.u32 %r4190, %r6902, 31;
add.s32 %r4191, %r4190, %r1034;
add.s32 %r6904, %r4191, -2;
mov.u32 %r6910, 1;
$L__BB3_407:
setp.ge.u32 %p470, %r1007, %r3201;
@%p470 bra $L__BB3_410;
add.s32 %r4194, %r6898, %r3198;
cvt.u64.u32 %rd271, %r4194;
add.s64 %rd272, %rd271, %rd4;
shl.b64 %rd273, %rd272, 2;
add.s64 %rd274, %rd3, %rd273;
ld.global.u32 %r1040, [%rd274];
setp.eq.s32 %p471, %r1040, 0;
@%p471 bra $L__BB3_410;
and.b32 %r4195, %r1040, -2147483648;
abs.s32 %r4196, %r1040;
shl.b32 %r4197, %r4196, %r27;
or.b32 %r6906, %r4197, %r4195;
$L__BB3_410:
shl.b32 %r4200, %r6906, 1;
shr.u32 %r4201, %r4200, %r27;
and.b32 %r1043, %r4201, -2;
setp.eq.s32 %p472, %r1043, 0;
mov.u32 %r6921, 0;
mov.u32 %r6907, %r6921;
mov.u32 %r6908, %r6921;
mov.u32 %r6926, %r6903;
@%p472 bra $L__BB3_412;
or.b32 %r6910, %r6910, 2;
add.s32 %r4202, %r1043, -1;
clz.b32 %r4203, %r4202;
mov.u32 %r4204, 32;
sub.s32 %r6907, %r4204, %r4203;
max.s32 %r6926, %r6903, %r6907;
shr.u32 %r4205, %r6906, 31;
add.s32 %r4206, %r4205, %r1043;
add.s32 %r6908, %r4206, -2;
$L__BB3_412:
add.s32 %r7203, %r6898, 1;
add.s32 %r4211, %r6881, 1;
setp.ge.u32 %p473, %r4211, %r3200;
mov.u32 %r6922, %r6921;
mov.u32 %r6923, %r6921;
mov.u32 %r6924, %r6921;
@%p473 bra $L__BB3_423;
cvt.u64.u32 %rd275, %r7203;
add.s64 %rd276, %rd275, %rd4;
shl.b64 %rd277, %rd276, 2;
add.s64 %rd278, %rd3, %rd277;
ld.global.u32 %r1053, [%rd278];
setp.eq.s32 %p474, %r1053, 0;
mov.u32 %r6922, 0;
mov.u32 %r6911, %r6922;
@%p474 bra $L__BB3_415;
and.b32 %r4213, %r1053, -2147483648;
abs.s32 %r4214, %r1053;
shl.b32 %r4215, %r4214, %r27;
or.b32 %r6911, %r4215, %r4213;
$L__BB3_415:
shl.b32 %r4218, %r6911, 1;
shr.u32 %r4219, %r4218, %r27;
and.b32 %r1056, %r4219, -2;
setp.eq.s32 %p475, %r1056, 0;
mov.u32 %r6924, %r6922;
@%p475 bra $L__BB3_417;
or.b32 %r6910, %r6910, 4;
add.s32 %r4220, %r1056, -1;
clz.b32 %r4221, %r4220;
mov.u32 %r4222, 32;
sub.s32 %r6922, %r4222, %r4221;
max.s32 %r6926, %r6926, %r6922;
shr.u32 %r4223, %r6911, 31;
add.s32 %r4224, %r4223, %r1056;
add.s32 %r6924, %r4224, -2;
$L__BB3_417:
mov.u32 %r6921, 0;
mov.u32 %r6916, %r6921;
@%p470 bra $L__BB3_420;
add.s32 %r4227, %r7203, %r3198;
cvt.u64.u32 %rd279, %r4227;
add.s64 %rd280, %rd279, %rd4;
shl.b64 %rd281, %rd280, 2;
add.s64 %rd282, %rd3, %rd281;
ld.global.u32 %r1065, [%rd282];
setp.eq.s32 %p477, %r1065, 0;
@%p477 bra $L__BB3_420;
and.b32 %r4228, %r1065, -2147483648;
abs.s32 %r4229, %r1065;
shl.b32 %r4230, %r4229, %r27;
or.b32 %r6916, %r4230, %r4228;
$L__BB3_420:
shl.b32 %r4233, %r6916, 1;
shr.u32 %r4234, %r4233, %r27;
and.b32 %r1068, %r4234, -2;
setp.eq.s32 %p478, %r1068, 0;
mov.u32 %r6923, %r6921;
@%p478 bra $L__BB3_422;
or.b32 %r6910, %r6910, 8;
add.s32 %r4235, %r1068, -1;
clz.b32 %r4236, %r4235;
mov.u32 %r4237, 32;
sub.s32 %r6921, %r4237, %r4236;
max.s32 %r6926, %r6926, %r6921;
shr.u32 %r4238, %r6916, 31;
add.s32 %r4239, %r4238, %r1068;
add.s32 %r6923, %r4239, -2;
$L__BB3_422:
add.s32 %r7203, %r6898, 2;
$L__BB3_423:
add.s32 %r4241, %r6910, -1;
and.b32 %r4242, %r4241, %r6910;
setp.ne.s32 %p479, %r4242, 0;
mov.u32 %r6928, 0;
setp.gt.s32 %p480, %r6900, 1;
and.pred %p481, %p480, %p479;
selp.b32 %r4243, %r6900, 1, %p481;
max.s32 %r1085, %r4243, %r6926;
sub.s32 %r1086, %r1085, %r4243;
setp.lt.s32 %p482, %r1086, 1;
@%p482 bra $L__BB3_425;
setp.eq.s32 %p483, %r6903, %r6926;
selp.u32 %r4244, 1, 0, %p483;
setp.eq.s32 %p484, %r6907, %r6926;
selp.u32 %r4245, -1, 0, %p484;
bfi.b32 %r4246, %r4245, %r4244, 1, 1;
setp.eq.s32 %p485, %r6922, %r6926;
selp.u16 %rs619, 1, 0, %p485;
mul.wide.u16 %r4247, %rs619, 4;
or.b32 %r4248, %r4246, %r4247;
setp.eq.s32 %p486, %r6921, %r6926;
selp.u16 %rs620, 1, 0, %p486;
mul.wide.u16 %r4249, %rs620, 8;
or.b32 %r6928, %r4248, %r4249;
$L__BB3_425:
shl.b32 %r4250, %r6910, 4;
shl.b32 %r4251, %r6897, 8;
or.b32 %r4252, %r4250, %r4251;
or.b32 %r4253, %r4252, %r6928;
mul.wide.u32 %rd283, %r4253, 2;
add.s64 %rd284, %rd18, %rd283;
ld.global.u16 %rs155, [%rd284];
shr.u16 %rs621, %rs155, 4;
and.b16 %rs156, %rs621, 7;
setp.eq.s16 %p487, %rs156, 0;
mov.u32 %r6940, %r6832;
@%p487 bra $L__BB3_432;
cvt.u32.u16 %r6929, %rs156;
shr.u16 %rs622, %rs155, 8;
cvt.u32.u16 %r6930, %rs622;
$L__BB3_427:
mov.u32 %r1091, %r6929;
setp.gt.u32 %p488, %r6829, 2879;
mov.u32 %r6940, 1;
@%p488 bra $L__BB3_432;
mov.u32 %r4255, 8;
sub.s32 %r4256, %r4255, %r6831;
sub.s32 %r4257, %r4256, %r6830;
min.u32 %r4258, %r4257, %r1091;
setp.eq.s32 %p489, %r4258, 32;
mov.u32 %r4259, -1;
shl.b32 %r4260, %r4259, %r4258;
not.b32 %r4261, %r4260;
selp.b32 %r4262, -1, %r4261, %p489;
and.b32 %r4263, %r4262, %r6930;
shl.b32 %r4264, %r4263, %r6830;
cvt.u16.u32 %rs623, %r4264;
or.b16 %rs1025, %rs1025, %rs623;
add.s32 %r6830, %r4258, %r6830;
sub.s32 %r6929, %r1091, %r4258;
shr.u32 %r6930, %r6930, %r4258;
setp.gt.u32 %p490, %r4257, %r1091;
@%p490 bra $L__BB3_431;
setp.ne.s32 %p491, %r6831, 0;
mov.u32 %r6831, 0;
and.b16 %rs624, %rs1025, 255;
setp.ne.s16 %p492, %rs624, 127;
and.pred %p493, %p491, %p492;
@%p493 bra $L__BB3_431;
mov.u32 %r4267, 20548;
sub.s32 %r4268, %r4267, %r6829;
cvt.u64.u32 %rd285, %r4268;
add.s64 %rd286, %rd285, %rd5;
add.s64 %rd287, %rd1, %rd286;
st.global.u8 [%rd287], %rs1025;
add.s32 %r6829, %r6829, 1;
setp.gt.u16 %p494, %rs624, 143;
selp.u32 %r6831, 1, 0, %p494;
mov.u16 %rs1025, 0;
mov.u32 %r6830, 0;
$L__BB3_431:
setp.ne.s32 %p495, %r6929, 0;
mov.u32 %r6940, %r6832;
@%p495 bra $L__BB3_427;
$L__BB3_432:
setp.ne.s32 %p496, %r6897, 0;
@%p496 bra $L__BB3_480;
setp.eq.s32 %p497, %r6910, 0;
add.s32 %r4269, %r6381, 17477;
cvt.u64.u32 %rd288, %r4269;
add.s64 %rd289, %rd288, %rd5;
add.s64 %rd19, %rd1, %rd289;
@%p497 bra $L__BB3_472;
shl.b16 %rs956, %rs956, 1;
add.s32 %r6390, %r6390, -1;
setp.ne.s32 %p498, %r6390, 0;
mov.u32 %r6976, %r6598;
@%p498 bra $L__BB3_437;
bra.uni $L__BB3_435;
$L__BB3_437:
setp.lt.u32 %p500, %r6596, 3;
mov.u32 %r6944, 0;
@%p500 bra $L__BB3_440;
setp.lt.u32 %p501, %r6596, 6;
mov.u32 %r6944, 1;
@%p501 bra $L__BB3_440;
setp.lt.u32 %p502, %r6596, 9;
setp.eq.s32 %p503, %r6596, 11;
selp.b32 %r4275, 4, 5, %p503;
setp.lt.u32 %p504, %r6596, 11;
selp.b32 %r4276, 3, %r4275, %p504;
selp.b32 %r6944, 2, %r4276, %p502;
$L__BB3_440:
setp.eq.s32 %p505, %r6944, 0;
@%p505 bra $L__BB3_468;
add.s32 %r1115, %r6944, -1;
and.b32 %r1116, %r6944, 3;
setp.eq.s32 %p506, %r1116, 0;
mov.u32 %r6954, %r6944;
mov.u32 %r6955, %r6976;
@%p506 bra $L__BB3_453;
mov.u32 %r4278, 1;
shl.b32 %r4279, %r4278, %r1115;
and.b32 %r4280, %r4279, %r6595;
setp.ne.s32 %p507, %r4280, 0;
selp.u32 %r4281, 1, 0, %p507;
cvt.u32.u16 %r4282, %rs956;
bfi.b32 %r4283, %r4282, %r4281, 1, 8;
cvt.u16.u32 %rs956, %r4283;
add.s32 %r6390, %r6390, -1;
setp.ne.s32 %p508, %r6390, 0;
mov.u32 %r6955, %r6976;
@%p508 bra $L__BB3_445;
setp.gt.u32 %p509, %r6381, 191;
mov.u32 %r6390, 0;
mov.u32 %r6955, %r4278;
@%p509 bra $L__BB3_445;
add.s32 %r4287, %r6381, 17477;
cvt.u64.u32 %rd290, %r4287;
add.s64 %rd291, %rd290, %rd5;
add.s64 %rd292, %rd1, %rd291;
st.global.u8 [%rd292], %rs956;
add.s32 %r6381, %r6381, 1;
mov.u16 %rs956, 0;
mov.u32 %r6390, 8;
mov.u32 %r6955, %r6976;
$L__BB3_445:
setp.eq.s32 %p510, %r1116, 1;
mov.u32 %r6976, %r6955;
mov.u32 %r6954, %r1115;
@%p510 bra $L__BB3_453;
add.s32 %r6954, %r6944, -2;
mov.u32 %r4288, 1;
shl.b32 %r4289, %r4288, %r6954;
and.b32 %r4290, %r4289, %r6595;
setp.ne.s32 %p511, %r4290, 0;
selp.u32 %r4291, 1, 0, %p511;
cvt.u32.u16 %r4292, %rs956;
bfi.b32 %r4293, %r4292, %r4291, 1, 8;
cvt.u16.u32 %rs956, %r4293;
add.s32 %r6390, %r6390, -1;
setp.ne.s32 %p512, %r6390, 0;
mov.u32 %r6950, %r6955;
@%p512 bra $L__BB3_449;
setp.gt.u32 %p513, %r6381, 191;
mov.u32 %r6390, 0;
mov.u32 %r6950, %r4288;
@%p513 bra $L__BB3_449;
add.s32 %r4296, %r6381, 17477;
cvt.u64.u32 %rd293, %r4296;
add.s64 %rd294, %rd293, %rd5;
add.s64 %rd295, %rd1, %rd294;
and.b16 %rs631, %rs956, 255;
st.global.u8 [%rd295], %rs956;
add.s32 %r6381, %r6381, 1;
setp.eq.s16 %p514, %rs631, 255;
selp.b32 %r6390, 7, 8, %p514;
mov.u16 %rs956, 0;
mov.u32 %r6950, %r6955;
$L__BB3_449:
setp.eq.s32 %p515, %r1116, 2;
mov.u32 %r6976, %r6950;
mov.u32 %r6955, %r6950;
@%p515 bra $L__BB3_453;
add.s32 %r6954, %r6944, -3;
mov.u32 %r4297, 1;
shl.b32 %r4298, %r4297, %r6954;
and.b32 %r4299, %r4298, %r6595;
setp.ne.s32 %p516, %r4299, 0;
selp.u32 %r4300, 1, 0, %p516;
cvt.u32.u16 %r4301, %rs956;
bfi.b32 %r4302, %r4301, %r4300, 1, 8;
cvt.u16.u32 %rs956, %r4302;
add.s32 %r6390, %r6390, -1;
setp.ne.s32 %p517, %r6390, 0;
mov.u32 %r6976, %r6950;
mov.u32 %r6955, %r6950;
@%p517 bra $L__BB3_453;
setp.gt.u32 %p518, %r6381, 191;
mov.u32 %r6390, 0;
mov.u32 %r6976, %r4297;
mov.u32 %r6955, %r4297;
@%p518 bra $L__BB3_453;
add.s32 %r4307, %r6381, 17477;
cvt.u64.u32 %rd296, %r4307;
add.s64 %rd297, %rd296, %rd5;
add.s64 %rd298, %rd1, %rd297;
and.b16 %rs634, %rs956, 255;
st.global.u8 [%rd298], %rs956;
add.s32 %r6381, %r6381, 1;
setp.eq.s16 %p519, %rs634, 255;
selp.b32 %r6390, 7, 8, %p519;
mov.u16 %rs956, 0;
mov.u32 %r6976, %r6950;
mov.u32 %r6955, %r6950;
$L__BB3_453:
setp.lt.u32 %p520, %r1115, 3;
@%p520 bra $L__BB3_468;
mov.u32 %r6976, %r6955;
$L__BB3_455:
add.s32 %r4308, %r6954, -1;
mov.u32 %r4309, 1;
shl.b32 %r4310, %r4309, %r4308;
and.b32 %r4311, %r4310, %r6595;
setp.ne.s32 %p521, %r4311, 0;
selp.u32 %r4312, 1, 0, %p521;
cvt.u32.u16 %r4313, %rs956;
bfi.b32 %r6964, %r4313, %r4312, 1, 8;
add.s32 %r6963, %r6390, -1;
setp.ne.s32 %p522, %r6963, 0;
mov.u32 %r6965, %r6976;
@%p522 bra $L__BB3_458;
setp.gt.u32 %p523, %r6381, 191;
mov.u32 %r6963, 0;
mov.u32 %r6965, %r4309;
@%p523 bra $L__BB3_458;
cvt.u16.u32 %rs635, %r6964;
and.b16 %rs636, %rs635, 255;
add.s32 %r4317, %r6381, 17477;
cvt.u64.u32 %rd299, %r4317;
add.s64 %rd300, %rd299, %rd5;
add.s64 %rd301, %rd1, %rd300;
st.global.u8 [%rd301], %rs635;
add.s32 %r6381, %r6381, 1;
setp.eq.s16 %p524, %rs636, 255;
selp.b32 %r6963, 7, 8, %p524;
mov.u32 %r6964, 0;
mov.u32 %r6965, %r6976;
$L__BB3_458:
add.s32 %r4318, %r6954, -2;
shl.b32 %r4320, %r4309, %r4318;
and.b32 %r4321, %r4320, %r6595;
setp.ne.s32 %p525, %r4321, 0;
and.b32 %r4322, %r6964, 127;
selp.u32 %r4323, 1, 0, %p525;
bfi.b32 %r6968, %r4322, %r4323, 1, 7;
add.s32 %r6967, %r6963, -1;
setp.ne.s32 %p526, %r6967, 0;
mov.u32 %r6969, %r6965;
@%p526 bra $L__BB3_461;
setp.gt.u32 %p527, %r6381, 191;
mov.u32 %r6969, 1;
mov.u32 %r6967, 0;
@%p527 bra $L__BB3_461;
cvt.u16.u32 %rs637, %r6968;
and.b16 %rs638, %rs637, 255;
add.s32 %r4327, %r6381, 17477;
cvt.u64.u32 %rd302, %r4327;
add.s64 %rd303, %rd302, %rd5;
add.s64 %rd304, %rd1, %rd303;
st.global.u8 [%rd304], %rs637;
add.s32 %r6381, %r6381, 1;
setp.eq.s16 %p528, %rs638, 255;
selp.b32 %r6967, 7, 8, %p528;
mov.u32 %r6968, 0;
mov.u32 %r6969, %r6965;
$L__BB3_461:
add.s32 %r4328, %r6954, -3;
mov.u32 %r4329, 1;
shl.b32 %r4330, %r4329, %r4328;
and.b32 %r4331, %r4330, %r6595;
setp.ne.s32 %p529, %r4331, 0;
and.b32 %r4332, %r6968, 127;
selp.u32 %r4333, 1, 0, %p529;
bfi.b32 %r6972, %r4332, %r4333, 1, 7;
add.s32 %r6971, %r6967, -1;
setp.ne.s32 %p530, %r6971, 0;
mov.u32 %r6973, %r6969;
@%p530 bra $L__BB3_464;
setp.gt.u32 %p531, %r6381, 191;
mov.u32 %r6971, 0;
mov.u32 %r6973, %r4329;
@%p531 bra $L__BB3_464;
cvt.u16.u32 %rs639, %r6972;
and.b16 %rs640, %rs639, 255;
add.s32 %r4337, %r6381, 17477;
cvt.u64.u32 %rd305, %r4337;
add.s64 %rd306, %rd305, %rd5;
add.s64 %rd307, %rd1, %rd306;
st.global.u8 [%rd307], %rs639;
add.s32 %r6381, %r6381, 1;
setp.eq.s16 %p532, %rs640, 255;
selp.b32 %r6971, 7, 8, %p532;
mov.u32 %r6972, 0;
mov.u32 %r6973, %r6969;
$L__BB3_464:
add.s32 %r6954, %r6954, -4;
shl.b32 %r4339, %r4329, %r6954;
and.b32 %r4340, %r4339, %r6595;
setp.ne.s32 %p533, %r4340, 0;
and.b32 %r4341, %r6972, 127;
selp.u32 %r4342, 1, 0, %p533;
bfi.b32 %r4343, %r4341, %r4342, 1, 15;
cvt.u16.u32 %rs956, %r4343;
add.s32 %r6390, %r6971, -1;
setp.ne.s32 %p534, %r6390, 0;
mov.u32 %r6976, %r6973;
@%p534 bra $L__BB3_467;
setp.gt.u32 %p535, %r6381, 191;
mov.u32 %r6976, 1;
mov.u32 %r6390, 0;
@%p535 bra $L__BB3_467;
add.s32 %r4346, %r6381, 17477;
cvt.u64.u32 %rd308, %r4346;
add.s64 %rd309, %rd308, %rd5;
add.s64 %rd310, %rd1, %rd309;
and.b16 %rs642, %rs956, 255;
st.global.u8 [%rd310], %rs956;
add.s32 %r6381, %r6381, 1;
setp.eq.s16 %p536, %rs642, 255;
selp.b32 %r6390, 7, 8, %p536;
mov.u16 %rs956, 0;
mov.u32 %r6976, %r6973;
$L__BB3_467:
setp.ne.s32 %p537, %r6954, 0;
@%p537 bra $L__BB3_455;
$L__BB3_468:
add.s32 %r4348, %r6596, -1;
setp.eq.s32 %p538, %r6596, 0;
mov.u32 %r6595, 0;
selp.b32 %r6596, 0, %r4348, %p538;
setp.lt.u32 %p539, %r6596, 3;
mov.u32 %r6980, %r6595;
@%p539 bra $L__BB3_471;
setp.lt.u32 %p540, %r6596, 6;
mov.u32 %r6980, 1;
@%p540 bra $L__BB3_471;
setp.lt.u32 %p541, %r6596, 9;
setp.eq.s32 %p542, %r6596, 11;
selp.b32 %r4350, 4, 5, %p542;
setp.lt.u32 %p543, %r6596, 11;
selp.b32 %r4351, 3, %r4350, %p543;
selp.b32 %r6980, 2, %r4351, %p541;
$L__BB3_471:
mov.u32 %r4353, 1;
shl.b32 %r6597, %r4353, %r6980;
mov.u32 %r6598, %r6976;
bra.uni $L__BB3_480;
$L__BB3_472:
add.s32 %r6595, %r6595, 1;
setp.lt.u32 %p544, %r6595, %r6597;
@%p544 bra $L__BB3_480;
shl.b16 %rs643, %rs956, 1;
or.b16 %rs956, %rs643, 1;
add.s32 %r6390, %r6390, -1;
setp.ne.s32 %p545, %r6390, 0;
mov.u32 %r6983, %r6598;
@%p545 bra $L__BB3_476;
setp.gt.u32 %p546, %r6381, 191;
mov.u32 %r6983, 1;
mov.u32 %r6390, 0;
@%p546 bra $L__BB3_476;
and.b16 %rs645, %rs956, 255;
st.global.u8 [%rd19], %rs956;
add.s32 %r6381, %r6381, 1;
setp.eq.s16 %p547, %rs645, 255;
selp.b32 %r6390, 7, 8, %p547;
mov.u16 %rs956, 0;
mov.u32 %r6983, %r6598;
$L__BB3_476:
add.s32 %r4357, %r6596, 1;
min.u32 %r6596, %r4357, 12;
setp.lt.u32 %p548, %r6596, 3;
mov.u32 %r6595, 0;
mov.u32 %r6984, %r6595;
@%p548 bra $L__BB3_479;
setp.lt.u32 %p549, %r6596, 6;
mov.u32 %r6984, 1;
@%p549 bra $L__BB3_479;
setp.lt.u32 %p550, %r6596, 9;
setp.eq.s32 %p551, %r6596, 11;
selp.b32 %r4359, 4, 5, %p551;
setp.lt.u32 %p552, %r6596, 11;
selp.b32 %r4360, 3, %r4359, %p552;
selp.b32 %r6984, 2, %r4360, %p550;
$L__BB3_479:
mov.u32 %r4362, 1;
shl.b32 %r6597, %r4362, %r6984;
mov.u32 %r6598, %r6983;
$L__BB3_480:
and.b16 %rs646, %rs155, 15;
cvt.u32.u16 %r1199, %rs646;
and.b32 %r4363, %r6910, 1;
setp.eq.b32 %p553, %r4363, 1;
mov.pred %p554, 0;
xor.pred %p555, %p553, %p554;
not.pred %p556, %p555;
mov.u32 %r7005, %r7050;
@%p556 bra $L__BB3_487;
and.b32 %r4364, %r1199, 1;
sub.s32 %r6991, %r1085, %r4364;
setp.eq.s32 %p557, %r6991, 0;
mov.u32 %r7005, %r7050;
@%p557 bra $L__BB3_487;
mov.u32 %r4365, -1;
shl.b32 %r4366, %r4365, %r6991;
not.b32 %r4367, %r4366;
and.b32 %r6992, %r6904, %r4367;
$L__BB3_483:
setp.gt.u32 %p558, %r7016, 17476;
mov.u32 %r7005, 1;
@%p558 bra $L__BB3_487;
sub.s32 %r4369, %r7017, %r7018;
min.u32 %r4370, %r4369, %r6991;
setp.eq.s32 %p559, %r4370, 32;
mov.u32 %r4371, -1;
shl.b32 %r4372, %r4371, %r4370;
not.b32 %r4373, %r4372;
selp.b32 %r4374, -1, %r4373, %p559;
and.b32 %r4375, %r4374, %r6992;
shl.b32 %r4376, %r4375, %r7018;
or.b32 %r7019, %r4376, %r7019;
add.s32 %r7018, %r4370, %r7018;
shr.u32 %r6992, %r6992, %r4370;
sub.s32 %r6991, %r6991, %r4370;
setp.lt.u32 %p560, %r7018, %r7017;
@%p560 bra $L__BB3_486;
cvt.u64.u32 %rd311, %r7016;
add.s64 %rd312, %rd311, %rd5;
add.s64 %rd313, %rd1, %rd312;
st.global.u8 [%rd313], %r7019;
add.s32 %r7016, %r7016, 1;
setp.eq.s32 %p561, %r7019, 255;
selp.b32 %r7017, 7, 8, %p561;
mov.u32 %r7018, 0;
mov.u32 %r7019, %r7018;
$L__BB3_486:
setp.ne.s32 %p562, %r6991, 0;
mov.u32 %r7005, %r7050;
@%p562 bra $L__BB3_483;
$L__BB3_487:
and.b32 %r1223, %r6910, 2;
setp.eq.s32 %p563, %r1223, 0;
mov.u32 %r7020, %r7005;
@%p563 bra $L__BB3_494;
shr.u32 %r4379, %r1199, 1;
and.b32 %r4380, %r4379, 1;
sub.s32 %r7006, %r1085, %r4380;
setp.eq.s32 %p564, %r7006, 0;
mov.u32 %r7020, %r7005;
@%p564 bra $L__BB3_494;
mov.u32 %r4381, -1;
shl.b32 %r4382, %r4381, %r7006;
not.b32 %r4383, %r4382;
and.b32 %r7007, %r6908, %r4383;
$L__BB3_490:
setp.gt.u32 %p565, %r7016, 17476;
mov.u32 %r7020, 1;
@%p565 bra $L__BB3_494;
sub.s32 %r4385, %r7017, %r7018;
min.u32 %r4386, %r4385, %r7006;
setp.eq.s32 %p566, %r4386, 32;
mov.u32 %r4387, -1;
shl.b32 %r4388, %r4387, %r4386;
not.b32 %r4389, %r4388;
selp.b32 %r4390, -1, %r4389, %p566;
and.b32 %r4391, %r4390, %r7007;
shl.b32 %r4392, %r4391, %r7018;
or.b32 %r7019, %r4392, %r7019;
add.s32 %r7018, %r4386, %r7018;
shr.u32 %r7007, %r7007, %r4386;
sub.s32 %r7006, %r7006, %r4386;
setp.lt.u32 %p567, %r7018, %r7017;
@%p567 bra $L__BB3_493;
cvt.u64.u32 %rd314, %r7016;
add.s64 %rd315, %rd314, %rd5;
add.s64 %rd316, %rd1, %rd315;
st.global.u8 [%rd316], %r7019;
add.s32 %r7016, %r7016, 1;
setp.eq.s32 %p568, %r7019, 255;
selp.b32 %r7017, 7, 8, %p568;
mov.u32 %r7018, 0;
mov.u32 %r7019, %r7018;
$L__BB3_493:
setp.ne.s32 %p569, %r7006, 0;
mov.u32 %r7020, %r7005;
@%p569 bra $L__BB3_490;
$L__BB3_494:
and.b32 %r1247, %r6910, 4;
setp.eq.s32 %p570, %r1247, 0;
mov.u32 %r7035, %r7020;
@%p570 bra $L__BB3_501;
shr.u32 %r4395, %r1199, 2;
and.b32 %r4396, %r4395, 1;
sub.s32 %r7021, %r1085, %r4396;
setp.eq.s32 %p571, %r7021, 0;
mov.u32 %r7035, %r7020;
@%p571 bra $L__BB3_501;
mov.u32 %r4397, -1;
shl.b32 %r4398, %r4397, %r7021;
not.b32 %r4399, %r4398;
and.b32 %r7022, %r6924, %r4399;
$L__BB3_497:
setp.gt.u32 %p572, %r7016, 17476;
mov.u32 %r7035, 1;
@%p572 bra $L__BB3_501;
sub.s32 %r4401, %r7017, %r7018;
min.u32 %r4402, %r4401, %r7021;
setp.eq.s32 %p573, %r4402, 32;
mov.u32 %r4403, -1;
shl.b32 %r4404, %r4403, %r4402;
not.b32 %r4405, %r4404;
selp.b32 %r4406, -1, %r4405, %p573;
and.b32 %r4407, %r4406, %r7022;
shl.b32 %r4408, %r4407, %r7018;
or.b32 %r7019, %r4408, %r7019;
add.s32 %r7018, %r4402, %r7018;
shr.u32 %r7022, %r7022, %r4402;
sub.s32 %r7021, %r7021, %r4402;
setp.lt.u32 %p574, %r7018, %r7017;
@%p574 bra $L__BB3_500;
cvt.u64.u32 %rd317, %r7016;
add.s64 %rd318, %rd317, %rd5;
add.s64 %rd319, %rd1, %rd318;
st.global.u8 [%rd319], %r7019;
add.s32 %r7016, %r7016, 1;
setp.eq.s32 %p575, %r7019, 255;
selp.b32 %r7017, 7, 8, %p575;
mov.u32 %r7018, 0;
mov.u32 %r7019, %r7018;
$L__BB3_500:
setp.ne.s32 %p576, %r7021, 0;
mov.u32 %r7035, %r7020;
@%p576 bra $L__BB3_497;
$L__BB3_501:
and.b32 %r1271, %r6910, 8;
setp.eq.s32 %p577, %r1271, 0;
mov.u32 %r7050, %r7035;
@%p577 bra $L__BB3_508;
shr.u32 %r4411, %r1199, 3;
sub.s32 %r7036, %r1085, %r4411;
setp.eq.s32 %p578, %r7036, 0;
mov.u32 %r7050, %r7035;
@%p578 bra $L__BB3_508;
mov.u32 %r4412, -1;
shl.b32 %r4413, %r4412, %r7036;
not.b32 %r4414, %r4413;
and.b32 %r7037, %r6923, %r4414;
$L__BB3_504:
setp.gt.u32 %p579, %r7016, 17476;
mov.u32 %r7050, 1;
@%p579 bra $L__BB3_508;
sub.s32 %r4416, %r7017, %r7018;
min.u32 %r4417, %r4416, %r7036;
setp.eq.s32 %p580, %r4417, 32;
mov.u32 %r4418, -1;
shl.b32 %r4419, %r4418, %r4417;
not.b32 %r4420, %r4419;
selp.b32 %r4421, -1, %r4420, %p580;
and.b32 %r4422, %r4421, %r7037;
shl.b32 %r4423, %r4422, %r7018;
or.b32 %r7019, %r4423, %r7019;
add.s32 %r7018, %r4417, %r7018;
shr.u32 %r7037, %r7037, %r4417;
sub.s32 %r7036, %r7036, %r4417;
setp.lt.u32 %p581, %r7018, %r7017;
@%p581 bra $L__BB3_507;
cvt.u64.u32 %rd320, %r7016;
add.s64 %rd321, %rd320, %rd5;
add.s64 %rd322, %rd1, %rd321;
st.global.u8 [%rd322], %r7019;
add.s32 %r7016, %r7016, 1;
setp.eq.s32 %p582, %r7019, 255;
selp.b32 %r7017, 7, 8, %p582;
mov.u32 %r7018, 0;
mov.u32 %r7019, %r7018;
$L__BB3_507:
setp.ne.s32 %p583, %r7036, 0;
mov.u32 %r7050, %r7035;
@%p583 bra $L__BB3_504;
$L__BB3_508:
mov.u32 %r4428, _ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE13cleanup_e_val;
add.s32 %r1295, %r4428, %r6899;
ld.shared.u8 %rs647, [%r1295];
mov.u32 %r6897, 0;
cvt.u32.u16 %r4429, %rs647;
and.b32 %r4430, %r4429, 255;
and.b32 %r4431, %r6907, 255;
setp.lt.u32 %p584, %r4431, %r4430;
cvt.u16.u32 %rs648, %r6907;
selp.b16 %rs649, %rs647, %rs648, %p584;
st.shared.u8 [%r1295], %rs649;
ld.shared.u8 %rs177, [%r1295+2];
ld.shared.u8 %rs650, [%r1295+1];
setp.gt.u16 %p585, %rs650, %rs177;
add.s32 %r7202, %r6899, 1;
add.s32 %r4432, %r6899, 2;
selp.b32 %r4433, %r7202, %r4432, %p585;
add.s32 %r4434, %r4428, %r4433;
ld.shared.u8 %rs178, [%r4434];
cvt.u32.u16 %r4435, %rs178;
and.b32 %r4436, %r4435, 255;
add.s32 %r6900, %r4436, -1;
cvt.u16.u32 %rs179, %r6921;
cvt.u16.u32 %rs651, %r1223;
shr.u16 %rs652, %rs651, 1;
mov.u32 %r4437, _ZZ52 j2k_htj2k_encode_codeblocks_multi_input_cleanupE14cleanup_cx_val;
add.s32 %r1298, %r4437, %r6901;
st.shared.u8 [%r1295+1], %r6921;
ld.shared.u8 %rs653, [%r1298];
or.b16 %rs654, %rs653, %rs652;
st.shared.u8 [%r1298], %rs654;
add.s32 %r6901, %r6901, 1;
ld.shared.u8 %rs180, [%r1298+1];
ld.shared.u8 %r1300, [%r1298+2];
shr.u32 %r1301, %r1271, 3;
st.shared.u8 [%r1298+1], %r1301;
add.s32 %r4438, %r6881, 2;
setp.ge.u32 %p586, %r4438, %r3200;
mov.u32 %r7220, %r6897;
@%p586 bra $L__BB3_615;
cvt.u64.u32 %rd323, %r7203;
add.s64 %rd324, %rd323, %rd4;
shl.b64 %rd325, %rd324, 2;
add.s64 %rd326, %rd3, %rd325;
ld.global.u32 %r1302, [%rd326];
setp.eq.s32 %p587, %r1302, 0;
mov.u32 %r7052, 0;
mov.u32 %r7051, %r7052;
@%p587 bra $L__BB3_511;
and.b32 %r4440, %r1302, -2147483648;
abs.s32 %r4441, %r1302;
shl.b32 %r4442, %r4441, %r27;
or.b32 %r7051, %r4442, %r4440;
$L__BB3_511:
shl.b32 %r4446, %r7051, 1;
shr.u32 %r4447, %r4446, %r27;
and.b32 %r1305, %r4447, -2;
setp.eq.s32 %p588, %r1305, 0;
mov.u32 %r7053, %r7052;
mov.u32 %r7059, %r7052;
@%p588 bra $L__BB3_513;
add.s32 %r4449, %r1305, -1;
clz.b32 %r4450, %r4449;
mov.u32 %r4451, 32;
sub.s32 %r7052, %r4451, %r4450;
shr.u32 %r4452, %r7051, 31;
add.s32 %r4453, %r4452, %r1305;
add.s32 %r7053, %r4453, -2;
mov.u32 %r7059, 1;
$L__BB3_513:
mov.u32 %r7056, 0;
mov.u32 %r7055, %r7056;
@%p470 bra $L__BB3_516;
add.s32 %r4456, %r7203, %r3198;
cvt.u64.u32 %rd327, %r4456;
add.s64 %rd328, %rd327, %rd4;
shl.b64 %rd329, %rd328, 2;
add.s64 %rd330, %rd3, %rd329;
ld.global.u32 %r1311, [%rd330];
setp.eq.s32 %p590, %r1311, 0;
@%p590 bra $L__BB3_516;
and.b32 %r4457, %r1311, -2147483648;
abs.s32 %r4458, %r1311;
shl.b32 %r4459, %r4458, %r27;
or.b32 %r7055, %r4459, %r4457;
$L__BB3_516:
shl.b32 %r4462, %r7055, 1;
shr.u32 %r4463, %r4462, %r27;
and.b32 %r1314, %r4463, -2;
setp.eq.s32 %p591, %r1314, 0;
mov.u32 %r7057, %r7056;
mov.u32 %r7075, %r7052;
@%p591 bra $L__BB3_518;
or.b32 %r7059, %r7059, 2;
add.s32 %r4464, %r1314, -1;
clz.b32 %r4465, %r4464;
mov.u32 %r4466, 32;
sub.s32 %r7056, %r4466, %r4465;
max.s32 %r7075, %r7052, %r7056;
shr.u32 %r4467, %r7055, 31;
add.s32 %r4468, %r4467, %r1314;
add.s32 %r7057, %r4468, -2;
$L__BB3_518:
add.s32 %r7074, %r7203, 1;
add.s32 %r4473, %r6881, 3;
setp.ge.u32 %p592, %r4473, %r3200;
mov.u32 %r7077, 0;
mov.u32 %r7070, %r7077;
mov.u32 %r7071, %r7077;
mov.u32 %r7072, %r7077;
mov.u32 %r7073, %r7077;
@%p592 bra $L__BB3_529;
cvt.u64.u32 %rd331, %r7074;
add.s64 %rd332, %rd331, %rd4;
shl.b64 %rd333, %rd332, 2;
add.s64 %rd334, %rd3, %rd333;
ld.global.u32 %r1324, [%rd334];
setp.eq.s32 %p593, %r1324, 0;
mov.u32 %r7071, 0;
mov.u32 %r7060, %r7071;
@%p593 bra $L__BB3_521;
and.b32 %r4475, %r1324, -2147483648;
abs.s32 %r4476, %r1324;
shl.b32 %r4477, %r4476, %r27;
or.b32 %r7060, %r4477, %r4475;
$L__BB3_521:
shl.b32 %r4480, %r7060, 1;
shr.u32 %r4481, %r4480, %r27;
and.b32 %r1327, %r4481, -2;
setp.eq.s32 %p594, %r1327, 0;
mov.u32 %r7073, %r7071;
@%p594 bra $L__BB3_523;
or.b32 %r7059, %r7059, 4;
add.s32 %r4482, %r1327, -1;
clz.b32 %r4483, %r4482;
mov.u32 %r4484, 32;
sub.s32 %r7071, %r4484, %r4483;
max.s32 %r7075, %r7075, %r7071;
shr.u32 %r4485, %r7060, 31;
add.s32 %r4486, %r4485, %r1327;
add.s32 %r7073, %r4486, -2;
$L__BB3_523:
mov.u32 %r7070, 0;
mov.u32 %r7065, %r7070;
@%p470 bra $L__BB3_526;
add.s32 %r4489, %r7074, %r3198;
cvt.u64.u32 %rd335, %r4489;
add.s64 %rd336, %rd335, %rd4;
shl.b64 %rd337, %rd336, 2;
add.s64 %rd338, %rd3, %rd337;
ld.global.u32 %r1336, [%rd338];
setp.eq.s32 %p596, %r1336, 0;
@%p596 bra $L__BB3_526;
and.b32 %r4490, %r1336, -2147483648;
abs.s32 %r4491, %r1336;
shl.b32 %r4492, %r4491, %r27;
or.b32 %r7065, %r4492, %r4490;
$L__BB3_526:
shl.b32 %r4495, %r7065, 1;
shr.u32 %r4496, %r4495, %r27;
and.b32 %r1339, %r4496, -2;
setp.eq.s32 %p597, %r1339, 0;
mov.u32 %r7072, %r7070;
@%p597 bra $L__BB3_528;
or.b32 %r7059, %r7059, 8;
add.s32 %r4497, %r1339, -1;
clz.b32 %r4498, %r4497;
mov.u32 %r4499, 32;
sub.s32 %r7070, %r4499, %r4498;
max.s32 %r7075, %r7075, %r7070;
shr.u32 %r4500, %r7065, 31;
add.s32 %r4501, %r4500, %r1339;
add.s32 %r7072, %r4501, -2;
$L__BB3_528:
add.s32 %r7074, %r7203, 2;
$L__BB3_529:
mov.u32 %r7203, %r7074;
shr.u32 %r4503, %r1271, 2;
shr.u32 %r4504, %r1247, 1;
or.b32 %r4505, %r4503, %r4504;
cvt.u32.u16 %r4506, %rs180;
and.b32 %r4507, %r4506, 255;
shl.b32 %r4508, %r1300, 2;
add.s32 %r4509, %r4508, %r4507;
or.b32 %r1356, %r4505, %r4509;
add.s32 %r4510, %r7059, -1;
and.b32 %r4511, %r4510, %r7059;
setp.ne.s32 %p598, %r4511, 0;
setp.gt.u16 %p599, %rs178, 2;
and.pred %p600, %p599, %p598;
selp.b32 %r4512, %r6900, 1, %p600;
max.s32 %r1357, %r4512, %r7075;
sub.s32 %r7220, %r1357, %r4512;
setp.lt.s32 %p601, %r7220, 1;
@%p601 bra $L__BB3_531;
setp.eq.s32 %p602, %r7052, %r7075;
selp.u32 %r4513, 1, 0, %p602;
setp.eq.s32 %p603, %r7056, %r7075;
selp.u32 %r4514, -1, 0, %p603;
bfi.b32 %r4515, %r4514, %r4513, 1, 1;
setp.eq.s32 %p604, %r7071, %r7075;
selp.u16 %rs656, 1, 0, %p604;
mul.wide.u16 %r4516, %rs656, 4;
or.b32 %r4517, %r4515, %r4516;
setp.eq.s32 %p605, %r7070, %r7075;
selp.u16 %rs657, 1, 0, %p605;
mul.wide.u16 %r4518, %rs657, 8;
or.b32 %r7077, %r4517, %r4518;
$L__BB3_531:
shl.b32 %r4519, %r7059, 4;
shl.b32 %r4520, %r1356, 8;
or.b32 %r4521, %r4519, %r4520;
or.b32 %r4522, %r4521, %r7077;
mul.wide.u32 %rd340, %r4522, 2;
add.s64 %rd341, %rd18, %rd340;
ld.global.u16 %rs181, [%rd341];
shr.u16 %rs658, %rs181, 4;
and.b16 %rs182, %rs658, 7;
setp.eq.s16 %p606, %rs182, 0;
mov.u32 %r7089, %r6940;
@%p606 bra $L__BB3_538;
cvt.u32.u16 %r7078, %rs182;
shr.u16 %rs659, %rs181, 8;
cvt.u32.u16 %r7079, %rs659;
$L__BB3_533:
mov.u32 %r1363, %r7078;
setp.gt.u32 %p607, %r6829, 2879;
mov.u32 %r7089, 1;
@%p607 bra $L__BB3_538;
mov.u32 %r4524, 8;
sub.s32 %r4525, %r4524, %r6831;
sub.s32 %r4526, %r4525, %r6830;
min.u32 %r4527, %r4526, %r1363;
setp.eq.s32 %p608, %r4527, 32;
mov.u32 %r4528, -1;
shl.b32 %r4529, %r4528, %r4527;
not.b32 %r4530, %r4529;
selp.b32 %r4531, -1, %r4530, %p608;
and.b32 %r4532, %r4531, %r7079;
shl.b32 %r4533, %r4532, %r6830;
cvt.u16.u32 %rs660, %r4533;
or.b16 %rs1025, %rs1025, %rs660;
add.s32 %r6830, %r4527, %r6830;
sub.s32 %r7078, %r1363, %r4527;
shr.u32 %r7079, %r7079, %r4527;
setp.gt.u32 %p609, %r4526, %r1363;
@%p609 bra $L__BB3_537;
setp.ne.s32 %p610, %r6831, 0;
mov.u32 %r6831, 0;
and.b16 %rs661, %rs1025, 255;
setp.ne.s16 %p611, %rs661, 127;
and.pred %p612, %p610, %p611;
@%p612 bra $L__BB3_537;
mov.u32 %r4536, 20548;
sub.s32 %r4537, %r4536, %r6829;
cvt.u64.u32 %rd342, %r4537;
add.s64 %rd343, %rd342, %rd5;
add.s64 %rd344, %rd1, %rd343;
st.global.u8 [%rd344], %rs1025;
add.s32 %r6829, %r6829, 1;
setp.gt.u16 %p613, %rs661, 143;
selp.u32 %r6831, 1, 0, %p613;
mov.u16 %rs1025, 0;
mov.u32 %r6830, 0;
$L__BB3_537:
setp.ne.s32 %p614, %r7078, 0;
mov.u32 %r7089, %r6940;
@%p614 bra $L__BB3_533;
$L__BB3_538:
setp.ne.s32 %p615, %r1356, 0;
@%p615 bra $L__BB3_586;
setp.eq.s32 %p616, %r7059, 0;
add.s32 %r4538, %r6381, 17477;
cvt.u64.u32 %rd345, %r4538;
add.s64 %rd346, %rd345, %rd5;
add.s64 %rd20, %rd1, %rd346;
@%p616 bra $L__BB3_578;
shl.b16 %rs956, %rs956, 1;
add.s32 %r6390, %r6390, -1;
setp.ne.s32 %p617, %r6390, 0;
mov.u32 %r7125, %r6598;
@%p617 bra $L__BB3_543;
setp.gt.u32 %p618, %r6381, 191;
mov.u32 %r7125, 1;
mov.u32 %r6390, 0;
@%p618 bra $L__BB3_543;
st.global.u8 [%rd20], %rs956;
add.s32 %r6381, %r6381, 1;
mov.u16 %rs956, 0;
mov.u32 %r6390, 8;
mov.u32 %r7125, %r6598;
$L__BB3_543:
setp.lt.u32 %p619, %r6596, 3;
mov.u32 %r7093, 0;
@%p619 bra $L__BB3_546;
setp.lt.u32 %p620, %r6596, 6;
mov.u32 %r7093, 1;
@%p620 bra $L__BB3_546;
setp.lt.u32 %p621, %r6596, 9;
setp.eq.s32 %p622, %r6596, 11;
selp.b32 %r4544, 4, 5, %p622;
setp.lt.u32 %p623, %r6596, 11;
selp.b32 %r4545, 3, %r4544, %p623;
selp.b32 %r7093, 2, %r4545, %p621;
$L__BB3_546:
setp.eq.s32 %p624, %r7093, 0;
@%p624 bra $L__BB3_574;
add.s32 %r1387, %r7093, -1;
and.b32 %r1388, %r7093, 3;
setp.eq.s32 %p625, %r1388, 0;
mov.u32 %r7103, %r7093;
mov.u32 %r7104, %r7125;
@%p625 bra $L__BB3_559;
mov.u32 %r4547, 1;
shl.b32 %r4548, %r4547, %r1387;
and.b32 %r4549, %r4548, %r6595;
setp.ne.s32 %p626, %r4549, 0;
selp.u32 %r4550, 1, 0, %p626;
cvt.u32.u16 %r4551, %rs956;
bfi.b32 %r4552, %r4551, %r4550, 1, 8;
cvt.u16.u32 %rs956, %r4552;
add.s32 %r6390, %r6390, -1;
setp.ne.s32 %p627, %r6390, 0;
mov.u32 %r7104, %r7125;
@%p627 bra $L__BB3_551;
setp.gt.u32 %p628, %r6381, 191;
mov.u32 %r6390, 0;
mov.u32 %r7104, %r4547;
@%p628 bra $L__BB3_551;
add.s32 %r4556, %r6381, 17477;
cvt.u64.u32 %rd347, %r4556;
add.s64 %rd348, %rd347, %rd5;
add.s64 %rd349, %rd1, %rd348;
st.global.u8 [%rd349], %rs956;
add.s32 %r6381, %r6381, 1;
mov.u16 %rs956, 0;
mov.u32 %r6390, 8;
mov.u32 %r7104, %r7125;
$L__BB3_551:
setp.eq.s32 %p629, %r1388, 1;
mov.u32 %r7125, %r7104;
mov.u32 %r7103, %r1387;
@%p629 bra $L__BB3_559;
add.s32 %r7103, %r7093, -2;
mov.u32 %r4557, 1;
shl.b32 %r4558, %r4557, %r7103;
and.b32 %r4559, %r4558, %r6595;
setp.ne.s32 %p630, %r4559, 0;
selp.u32 %r4560, 1, 0, %p630;
cvt.u32.u16 %r4561, %rs956;
bfi.b32 %r4562, %r4561, %r4560, 1, 8;
cvt.u16.u32 %rs956, %r4562;
add.s32 %r6390, %r6390, -1;
setp.ne.s32 %p631, %r6390, 0;
mov.u32 %r7099, %r7104;
@%p631 bra $L__BB3_555;
setp.gt.u32 %p632, %r6381, 191;
mov.u32 %r6390, 0;
mov.u32 %r7099, %r4557;
@%p632 bra $L__BB3_555;
add.s32 %r4565, %r6381, 17477;
cvt.u64.u32 %rd350, %r4565;
add.s64 %rd351, %rd350, %rd5;
add.s64 %rd352, %rd1, %rd351;
and.b16 %rs668, %rs956, 255;
st.global.u8 [%rd352], %rs956;
add.s32 %r6381, %r6381, 1;
setp.eq.s16 %p633, %rs668, 255;
selp.b32 %r6390, 7, 8, %p633;
mov.u16 %rs956, 0;
mov.u32 %r7099, %r7104;
$L__BB3_555:
setp.eq.s32 %p634, %r1388, 2;
mov.u32 %r7125, %r7099;
mov.u32 %r7104, %r7099;
@%p634 bra $L__BB3_559;
add.s32 %r7103, %r7093, -3;
mov.u32 %r4566, 1;
shl.b32 %r4567, %r4566, %r7103;
and.b32 %r4568, %r4567, %r6595;
setp.ne.s32 %p635, %r4568, 0;
selp.u32 %r4569, 1, 0, %p635;
cvt.u32.u16 %r4570, %rs956;
bfi.b32 %r4571, %r4570, %r4569, 1, 8;
cvt.u16.u32 %rs956, %r4571;
add.s32 %r6390, %r6390, -1;
setp.ne.s32 %p636, %r6390, 0;
mov.u32 %r7125, %r7099;
mov.u32 %r7104, %r7099;
@%p636 bra $L__BB3_559;
setp.gt.u32 %p637, %r6381, 191;
mov.u32 %r6390, 0;
mov.u32 %r7125, %r4566;
mov.u32 %r7104, %r4566;
@%p637 bra $L__BB3_559;
add.s32 %r4576, %r6381, 17477;
cvt.u64.u32 %rd353, %r4576;
add.s64 %rd354, %rd353, %rd5;
add.s64 %rd355, %rd1, %rd354;
and.b16 %rs671, %rs956, 255;
st.global.u8 [%rd355], %rs956;
add.s32 %r6381, %r6381, 1;
setp.eq.s16 %p638, %rs671, 255;
selp.b32 %r6390, 7, 8, %p638;
mov.u16 %rs956, 0;
mov.u32 %r7125, %r7099;
mov.u32 %r7104, %r7099;
$L__BB3_559:
setp.lt.u32 %p639, %r1387, 3;
@%p639 bra $L__BB3_574;
mov.u32 %r7125, %r7104;
$L__BB3_561:
add.s32 %r4577, %r7103, -1;
mov.u32 %r4578, 1;
shl.b32 %r4579, %r4578, %r4577;
and.b32 %r4580, %r4579, %r6595;
setp.ne.s32 %p640, %r4580, 0;
selp.u32 %r4581, 1, 0, %p640;
cvt.u32.u16 %r4582, %rs956;
bfi.b32 %r7113, %r4582, %r4581, 1, 8;
add.s32 %r7112, %r6390, -1;
setp.ne.s32 %p641, %r7112, 0;
mov.u32 %r7114, %r7125;
@%p641 bra $L__BB3_564;
setp.gt.u32 %p642, %r6381, 191;
mov.u32 %r7112, 0;
mov.u32 %r7114, %r4578;
@%p642 bra $L__BB3_564;
cvt.u16.u32 %rs672, %r7113;
and.b16 %rs673, %rs672, 255;
add.s32 %r4586, %r6381, 17477;
cvt.u64.u32 %rd356, %r4586;
add.s64 %rd357, %rd356, %rd5;
add.s64 %rd358, %rd1, %rd357;
st.global.u8 [%rd358], %rs672;
add.s32 %r6381, %r6381, 1;
setp.eq.s16 %p643, %rs673, 255;
selp.b32 %r7112, 7, 8, %p643;
mov.u32 %r7113, 0;
mov.u32 %r7114, %r7125;
$L__BB3_564:
add.s32 %r4587, %r7103, -2;
shl.b32 %r4589, %r4578, %r4587;
and.b32 %r4590, %r4589, %r6595;
setp.ne.s32 %p644, %r4590, 0;
and.b32 %r4591, %r7113, 127;
selp.u32 %r4592, 1, 0, %p644;
bfi.b32 %r7117, %r4591, %r4592, 1, 7;
add.s32 %r7116, %r7112, -1;
setp.ne.s32 %p645, %r7116, 0;
mov.u32 %r7118, %r7114;
@%p645 bra $L__BB3_567;
setp.gt.u32 %p646, %r6381, 191;
mov.u32 %r7118, 1;
mov.u32 %r7116, 0;
@%p646 bra $L__BB3_567;
cvt.u16.u32 %rs674, %r7117;
and.b16 %rs675, %rs674, 255;
add.s32 %r4596, %r6381, 17477;
cvt.u64.u32 %rd359, %r4596;
add.s64 %rd360, %rd359, %rd5;
add.s64 %rd361, %rd1, %rd360;
st.global.u8 [%rd361], %rs674;
add.s32 %r6381, %r6381, 1;
setp.eq.s16 %p647, %rs675, 255;
selp.b32 %r7116, 7, 8, %p647;
mov.u32 %r7117, 0;
mov.u32 %r7118, %r7114;
$L__BB3_567:
add.s32 %r4597, %r7103, -3;
mov.u32 %r4598, 1;
shl.b32 %r4599, %r4598, %r4597;
and.b32 %r4600, %r4599, %r6595;
setp.ne.s32 %p648, %r4600, 0;
and.b32 %r4601, %r7117, 127;
selp.u32 %r4602, 1, 0, %p648;
bfi.b32 %r7121, %r4601, %r4602, 1, 7;
add.s32 %r7120, %r7116, -1;
setp.ne.s32 %p649, %r7120, 0;
mov.u32 %r7122, %r7118;
@%p649 bra $L__BB3_570;
setp.gt.u32 %p650, %r6381, 191;
mov.u32 %r7120, 0;
mov.u32 %r7122, %r4598;
@%p650 bra $L__BB3_570;
cvt.u16.u32 %rs676, %r7121;
and.b16 %rs677, %rs676, 255;
add.s32 %r4606, %r6381, 17477;
cvt.u64.u32 %rd362, %r4606;
add.s64 %rd363, %rd362, %rd5;
add.s64 %rd364, %rd1, %rd363;
st.global.u8 [%rd364], %rs676;
add.s32 %r6381, %r6381, 1;
setp.eq.s16 %p651, %rs677, 255;
selp.b32 %r7120, 7, 8, %p651;
mov.u32 %r7121, 0;
mov.u32 %r7122, %r7118;
$L__BB3_570:
add.s32 %r7103, %r7103, -4;
shl.b32 %r4608, %r4598, %r7103;
and.b32 %r4609, %r4608, %r6595;
setp.ne.s32 %p652, %r4609, 0;
and.b32 %r4610, %r7121, 127;
selp.u32 %r4611, 1, 0, %p652;
bfi.b32 %r4612, %r4610, %r4611, 1, 15;
cvt.u16.u32 %rs956, %r4612;
add.s32 %r6390, %r7120, -1;
setp.ne.s32 %p653, %r6390, 0;
mov.u32 %r7125, %r7122;
@%p653 bra $L__BB3_573;
setp.gt.u32 %p654, %r6381, 191;
mov.u32 %r7125, 1;
mov.u32 %r6390, 0;
@%p654 bra $L__BB3_573;
add.s32 %r4615, %r6381, 17477;
cvt.u64.u32 %rd365, %r4615;
add.s64 %rd366, %rd365, %rd5;
add.s64 %rd367, %rd1, %rd366;
and.b16 %rs679, %rs956, 255;
st.global.u8 [%rd367], %rs956;
add.s32 %r6381, %r6381, 1;
setp.eq.s16 %p655, %rs679, 255;
selp.b32 %r6390, 7, 8, %p655;
mov.u16 %rs956, 0;
mov.u32 %r7125, %r7122;
$L__BB3_573:
setp.ne.s32 %p656, %r7103, 0;
@%p656 bra $L__BB3_561;
$L__BB3_574:
add.s32 %r4617, %r6596, -1;
setp.eq.s32 %p657, %r6596, 0;
mov.u32 %r6595, 0;
selp.b32 %r6596, 0, %r4617, %p657;
setp.lt.u32 %p658, %r6596, 3;
mov.u32 %r7129, %r6595;
@%p658 bra $L__BB3_577;
setp.lt.u32 %p659, %r6596, 6;
mov.u32 %r7129, 1;
@%p659 bra $L__BB3_577;
setp.lt.u32 %p660, %r6596, 9;
setp.eq.s32 %p661, %r6596, 11;
selp.b32 %r4619, 4, 5, %p661;
setp.lt.u32 %p662, %r6596, 11;
selp.b32 %r4620, 3, %r4619, %p662;
selp.b32 %r7129, 2, %r4620, %p660;
$L__BB3_577:
mov.u32 %r4622, 1;
shl.b32 %r6597, %r4622, %r7129;
mov.u32 %r6598, %r7125;
bra.uni $L__BB3_586;
$L__BB3_435:
setp.gt.u32 %p499, %r6381, 191;
mov.u32 %r6976, 1;
mov.u32 %r6390, 0;
@%p499 bra $L__BB3_437;
st.global.u8 [%rd19], %rs956;
add.s32 %r6381, %r6381, 1;
mov.u16 %rs956, 0;
mov.u32 %r6390, 8;
mov.u32 %r6976, %r6598;
bra.uni $L__BB3_437;
$L__BB3_578:
add.s32 %r6595, %r6595, 1;
setp.lt.u32 %p663, %r6595, %r6597;
@%p663 bra $L__BB3_586;
shl.b16 %rs680, %rs956, 1;
or.b16 %rs956, %rs680, 1;
add.s32 %r6390, %r6390, -1;
setp.ne.s32 %p664, %r6390, 0;
mov.u32 %r7132, %r6598;
@%p664 bra $L__BB3_582;
setp.gt.u32 %p665, %r6381, 191;
mov.u32 %r7132, 1;
mov.u32 %r6390, 0;
@%p665 bra $L__BB3_582;
and.b16 %rs682, %rs956, 255;
st.global.u8 [%rd20], %rs956;
add.s32 %r6381, %r6381, 1;
setp.eq.s16 %p666, %rs682, 255;
selp.b32 %r6390, 7, 8, %p666;
mov.u16 %rs956, 0;
mov.u32 %r7132, %r6598;
$L__BB3_582:
add.s32 %r4626, %r6596, 1;
min.u32 %r6596, %r4626, 12;
setp.lt.u32 %p667, %r6596, 3;
mov.u32 %r6595, 0;
mov.u32 %r7133, %r6595;
@%p667 bra $L__BB3_585;
setp.lt.u32 %p668, %r6596, 6;
mov.u32 %r7133, 1;
@%p668 bra $L__BB3_585;
setp.lt.u32 %p669, %r6596, 9;
setp.eq.s32 %p670, %r6596, 11;
selp.b32 %r4628, 4, 5, %p670;
setp.lt.u32 %p671, %r6596, 11;
selp.b32 %r4629, 3, %r4628, %p671;
selp.b32 %r7133, 2, %r4629, %p669;
$L__BB3_585:
mov.u32 %r4631, 1;
shl.b32 %r6597, %r4631, %r7133;
mov.u32 %r6598, %r7132;
$L__BB3_586:
and.b16 %rs683, %rs181, 15;
cvt.u32.u16 %r1471, %rs683;
and.b32 %r4632, %r7059, 1;
setp.eq.b32 %p672, %r4632, 1;
mov.pred %p673, 0;
xor.pred %p674, %p672, %p673;
not.pred %p675, %p674;
mov.u32 %r7154, %r7050;
@%p675 bra $L__BB3_593;
and.b32 %r4633, %r1471, 1;
sub.s32 %r7140, %r1357, %r4633;
setp.eq.s32 %p676, %r7140, 0;
mov.u32 %r7154, %r7050;
@%p676 bra $L__BB3_593;
mov.u32 %r4634, -1;
shl.b32 %r4635, %r4634, %r7140;
not.b32 %r4636, %r4635;
and.b32 %r7141, %r7053, %r4636;
$L__BB3_589:
setp.gt.u32 %p677, %r7016, 17476;
mov.u32 %r7154, 1;
@%p677 bra $L__BB3_593;
sub.s32 %r4638, %r7017, %r7018;
min.u32 %r4639, %r4638, %r7140;
setp.eq.s32 %p678, %r4639, 32;
mov.u32 %r4640, -1;
shl.b32 %r4641, %r4640, %r4639;
not.b32 %r4642, %r4641;
selp.b32 %r4643, -1, %r4642, %p678;
and.b32 %r4644, %r4643, %r7141;
shl.b32 %r4645, %r4644, %r7018;
or.b32 %r7019, %r4645, %r7019;
add.s32 %r7018, %r4639, %r7018;
shr.u32 %r7141, %r7141, %r4639;
sub.s32 %r7140, %r7140, %r4639;
setp.lt.u32 %p679, %r7018, %r7017;
@%p679 bra $L__BB3_592;
cvt.u64.u32 %rd368, %r7016;
add.s64 %rd369, %rd368, %rd5;
add.s64 %rd370, %rd1, %rd369;
st.global.u8 [%rd370], %r7019;
add.s32 %r7016, %r7016, 1;
setp.eq.s32 %p680, %r7019, 255;
selp.b32 %r7017, 7, 8, %p680;
mov.u32 %r7018, 0;
mov.u32 %r7019, %r7018;
$L__BB3_592:
setp.ne.s32 %p681, %r7140, 0;
mov.u32 %r7154, %r7050;
@%p681 bra $L__BB3_589;
$L__BB3_593:
and.b32 %r1495, %r7059, 2;
setp.eq.s32 %p682, %r1495, 0;
mov.u32 %r7169, %r7154;
@%p682 bra $L__BB3_600;
shr.u32 %r4648, %r1471, 1;
and.b32 %r4649, %r4648, 1;
sub.s32 %r7155, %r1357, %r4649;
setp.eq.s32 %p683, %r7155, 0;
mov.u32 %r7169, %r7154;
@%p683 bra $L__BB3_600;
mov.u32 %r4650, -1;
shl.b32 %r4651, %r4650, %r7155;
not.b32 %r4652, %r4651;
and.b32 %r7156, %r7057, %r4652;
$L__BB3_596:
setp.gt.u32 %p684, %r7016, 17476;
mov.u32 %r7169, 1;
@%p684 bra $L__BB3_600;
sub.s32 %r4654, %r7017, %r7018;
min.u32 %r4655, %r4654, %r7155;
setp.eq.s32 %p685, %r4655, 32;
mov.u32 %r4656, -1;
shl.b32 %r4657, %r4656, %r4655;
not.b32 %r4658, %r4657;
selp.b32 %r4659, -1, %r4658, %p685;
and.b32 %r4660, %r4659, %r7156;
shl.b32 %r4661, %r4660, %r7018;
or.b32 %r7019, %r4661, %r7019;
add.s32 %r7018, %r4655, %r7018;
shr.u32 %r7156, %r7156, %r4655;
sub.s32 %r7155, %r7155, %r4655;
setp.lt.u32 %p686, %r7018, %r7017;
@%p686 bra $L__BB3_599;
cvt.u64.u32 %rd371, %r7016;
add.s64 %rd372, %rd371, %rd5;
add.s64 %rd373, %rd1, %rd372;
st.global.u8 [%rd373], %r7019;
add.s32 %r7016, %r7016, 1;
setp.eq.s32 %p687, %r7019, 255;
selp.b32 %r7017, 7, 8, %p687;
mov.u32 %r7018, 0;
mov.u32 %r7019, %r7018;
$L__BB3_599:
setp.ne.s32 %p688, %r7155, 0;
mov.u32 %r7169, %r7154;
@%p688 bra $L__BB3_596;
$L__BB3_600:
and.b32 %r1519, %r7059, 4;
setp.eq.s32 %p689, %r1519, 0;
mov.u32 %r7184, %r7169;
@%p689 bra $L__BB3_607;
shr.u32 %r4664, %r1471, 2;
and.b32 %r4665, %r4664, 1;
sub.s32 %r7170, %r1357, %r4665;
setp.eq.s32 %p690, %r7170, 0;
mov.u32 %r7184, %r7169;
@%p690 bra $L__BB3_607;
mov.u32 %r4666, -1;
shl.b32 %r4667, %r4666, %r7170;
not.b32 %r4668, %r4667;
and.b32 %r7171, %r7073, %r4668;
$L__BB3_603:
setp.gt.u32 %p691, %r7016, 17476;
mov.u32 %r7184, 1;
@%p691 bra $L__BB3_607;
sub.s32 %r4670, %r7017, %r7018;
min.u32 %r4671, %r4670, %r7170;
setp.eq.s32 %p692, %r4671, 32;
mov.u32 %r4672, -1;
shl.b32 %r4673, %r4672, %r4671;
not.b32 %r4674, %r4673;
selp.b32 %r4675, -1, %r4674, %p692;
and.b32 %r4676, %r4675, %r7171;
shl.b32 %r4677, %r4676, %r7018;
or.b32 %r7019, %r4677, %r7019;
add.s32 %r7018, %r4671, %r7018;
shr.u32 %r7171, %r7171, %r4671;
sub.s32 %r7170, %r7170, %r4671;
setp.lt.u32 %p693, %r7018, %r7017;
@%p693 bra $L__BB3_606;
cvt.u64.u32 %rd374, %r7016;
add.s64 %rd375, %rd374, %rd5;
add.s64 %rd376, %rd1, %rd375;
st.global.u8 [%rd376], %r7019;
add.s32 %r7016, %r7016, 1;
setp.eq.s32 %p694, %r7019, 255;
selp.b32 %r7017, 7, 8, %p694;
mov.u32 %r7018, 0;
mov.u32 %r7019, %r7018;
$L__BB3_606:
setp.ne.s32 %p695, %r7170, 0;
mov.u32 %r7184, %r7169;
@%p695 bra $L__BB3_603;
$L__BB3_607:
and.b32 %r1543, %r7059, 8;
setp.eq.s32 %p696, %r1543, 0;
mov.u32 %r7050, %r7184;
@%p696 bra $L__BB3_614;
shr.u32 %r4680, %r1471, 3;
sub.s32 %r7185, %r1357, %r4680;
setp.eq.s32 %p697, %r7185, 0;
mov.u32 %r7050, %r7184;
@%p697 bra $L__BB3_614;
mov.u32 %r4681, -1;
shl.b32 %r4682, %r4681, %r7185;
not.b32 %r4683, %r4682;
and.b32 %r7186, %r7072, %r4683;
$L__BB3_610:
setp.gt.u32 %p698, %r7016, 17476;
mov.u32 %r7050, 1;
@%p698 bra $L__BB3_614;
sub.s32 %r4685, %r7017, %r7018;
min.u32 %r4686, %r4685, %r7185;
setp.eq.s32 %p699, %r4686, 32;
mov.u32 %r4687, -1;
shl.b32 %r4688, %r4687, %r4686;
not.b32 %r4689, %r4688;
selp.b32 %r4690, -1, %r4689, %p699;
and.b32 %r4691, %r4690, %r7186;
shl.b32 %r4692, %r4691, %r7018;
or.b32 %r7019, %r4692, %r7019;
add.s32 %r7018, %r4686, %r7018;
shr.u32 %r7186, %r7186, %r4686;
sub.s32 %r7185, %r7185, %r4686;
setp.lt.u32 %p700, %r7018, %r7017;
@%p700 bra $L__BB3_613;
cvt.u64.u32 %rd377, %r7016;
add.s64 %rd378, %rd377, %rd5;
add.s64 %rd379, %rd1, %rd378;
st.global.u8 [%rd379], %r7019;
add.s32 %r7016, %r7016, 1;
setp.eq.s32 %p701, %r7019, 255;
selp.b32 %r7017, 7, 8, %p701;
mov.u32 %r7018, 0;
mov.u32 %r7019, %r7018;
$L__BB3_613:
setp.ne.s32 %p702, %r7185, 0;
mov.u32 %r7050, %r7184;
@%p702 bra $L__BB3_610;
$L__BB3_614:
and.b32 %r4695, %r7056, 255;
and.b32 %r4696, %r6921, 255;
setp.lt.u32 %p703, %r4695, %r4696;
cvt.u16.u32 %rs684, %r7056;
selp.b16 %rs685, %rs179, %rs684, %p703;
st.shared.u8 [%r1295+1], %rs685;
ld.shared.u8 %rs686, [%r1295+3];
setp.gt.u16 %p704, %rs177, %rs686;
add.s32 %r7202, %r7202, 1;
add.s32 %r4697, %r6899, 3;
selp.b32 %r4698, %r7202, %r4697, %p704;
add.s32 %r4700, %r4428, %r4698;
ld.shared.u8 %r4701, [%r4700];
add.s32 %r6900, %r4701, -1;
shr.u32 %r4702, %r1495, 1;
or.b32 %r4703, %r1301, %r4702;
st.shared.u8 [%r1295+2], %r7070;
st.shared.u8 [%r1298+1], %r4703;
ld.shared.u8 %rs687, [%r1298+3];
mul.wide.u16 %r4704, %rs687, 4;
add.s32 %r4705, %r4704, %r1300;
shr.u32 %r4706, %r1543, 3;
st.shared.u8 [%r1298+2], %r4706;
shr.u32 %r4707, %r1543, 2;
shr.u32 %r4708, %r1519, 1;
or.b32 %r4709, %r4707, %r4708;
or.b32 %r6897, %r4709, %r4705;
add.s32 %r6901, %r6901, 1;
mov.u32 %r6940, %r7089;
$L__BB3_615:
mov.u32 %r6898, %r7203;
mov.u32 %r6899, %r7202;
max.s32 %r4710, %r7220, 0;
mul.lo.s32 %r4711, %r1086, 6;
setp.gt.s32 %p705, %r1086, 0;
selp.b32 %r4712, %r4711, 0, %p705;
cvt.u64.u32 %rd380, %r4712;
add.s64 %rd21, %rd17, %rd380;
ld.global.u8 %rs205, [%rd21+1];
add.s32 %r4713, %r4712, 2;
cvt.u64.u32 %rd381, %r4713;
add.s64 %rd382, %rd17, %rd381;
ld.global.u8 %rs206, [%rd382];
ld.global.u8 %rs207, [%rd382+1];
mul.lo.s32 %r4714, %r4710, 6;
cvt.u64.u32 %rd383, %r4714;
add.s64 %rd384, %rd17, %rd383;
ld.global.u8 %rs208, [%rd384];
ld.global.u8 %rs209, [%rd384+1];
add.s32 %r4715, %r4714, 2;
cvt.u64.u32 %rd385, %r4715;
add.s64 %rd386, %rd17, %rd385;
ld.global.u8 %rs210, [%rd386];
ld.global.u8 %rs211, [%rd386+1];
setp.eq.s16 %p706, %rs205, 0;
mov.u32 %r7232, %r6940;
@%p706 bra $L__BB3_622;
ld.global.u8 %r7222, [%rd21];
cvt.u32.u16 %r7221, %rs205;
$L__BB3_617:
mov.u32 %r1594, %r7221;
setp.gt.u32 %p707, %r6829, 2879;
mov.u32 %r7232, 1;
@%p707 bra $L__BB3_622;
mov.u32 %r4717, 8;
sub.s32 %r4718, %r4717, %r6831;
sub.s32 %r4719, %r4718, %r6830;
min.u32 %r4720, %r4719, %r1594;
setp.eq.s32 %p708, %r4720, 32;
mov.u32 %r4721, -1;
shl.b32 %r4722, %r4721, %r4720;
not.b32 %r4723, %r4722;
selp.b32 %r4724, -1, %r4723, %p708;
and.b32 %r4725, %r4724, %r7222;
shl.b32 %r4726, %r4725, %r6830;
cvt.u16.u32 %rs688, %r4726;
or.b16 %rs1025, %rs1025, %rs688;
add.s32 %r6830, %r4720, %r6830;
sub.s32 %r7221, %r1594, %r4720;
shr.u32 %r7222, %r7222, %r4720;
setp.gt.u32 %p709, %r4719, %r1594;
@%p709 bra $L__BB3_621;
setp.ne.s32 %p710, %r6831, 0;
mov.u32 %r6831, 0;
and.b16 %rs689, %rs1025, 255;
setp.ne.s16 %p711, %rs689, 127;
and.pred %p712, %p710, %p711;
@%p712 bra $L__BB3_621;
mov.u32 %r4729, 20548;
sub.s32 %r4730, %r4729, %r6829;
cvt.u64.u32 %rd387, %r4730;
add.s64 %rd388, %rd387, %rd5;
add.s64 %rd389, %rd1, %rd388;
st.global.u8 [%rd389], %rs1025;
add.s32 %r6829, %r6829, 1;
setp.gt.u16 %p713, %rs689, 143;
selp.u32 %r6831, 1, 0, %p713;
mov.u16 %rs1025, 0;
mov.u32 %r6830, 0;
$L__BB3_621:
setp.ne.s32 %p714, %r7221, 0;
mov.u32 %r7232, %r6940;
@%p714 bra $L__BB3_617;
$L__BB3_622:
setp.eq.s16 %p715, %rs209, 0;
mov.u32 %r7244, %r7232;
@%p715 bra $L__BB3_629;
cvt.u32.u16 %r4731, %rs208;
and.b32 %r7234, %r4731, 255;
cvt.u32.u16 %r4732, %rs209;
and.b32 %r7233, %r4732, 255;
$L__BB3_624:
mov.u32 %r1613, %r7233;
setp.gt.u32 %p716, %r6829, 2879;
mov.u32 %r7244, 1;
@%p716 bra $L__BB3_629;
mov.u32 %r4734, 8;
sub.s32 %r4735, %r4734, %r6831;
sub.s32 %r4736, %r4735, %r6830;
min.u32 %r4737, %r4736, %r1613;
setp.eq.s32 %p717, %r4737, 32;
mov.u32 %r4738, -1;
shl.b32 %r4739, %r4738, %r4737;
not.b32 %r4740, %r4739;
selp.b32 %r4741, -1, %r4740, %p717;
and.b32 %r4742, %r4741, %r7234;
shl.b32 %r4743, %r4742, %r6830;
cvt.u16.u32 %rs693, %r4743;
or.b16 %rs1025, %rs1025, %rs693;
add.s32 %r6830, %r4737, %r6830;
sub.s32 %r7233, %r1613, %r4737;
shr.u32 %r7234, %r7234, %r4737;
setp.gt.u32 %p718, %r4736, %r1613;
@%p718 bra $L__BB3_628;
setp.ne.s32 %p719, %r6831, 0;
mov.u32 %r6831, 0;
and.b16 %rs694, %rs1025, 255;
setp.ne.s16 %p720, %rs694, 127;
and.pred %p721, %p719, %p720;
@%p721 bra $L__BB3_628;
mov.u32 %r4746, 20548;
sub.s32 %r4747, %r4746, %r6829;
cvt.u64.u32 %rd390, %r4747;
add.s64 %rd391, %rd390, %rd5;
add.s64 %rd392, %rd1, %rd391;
st.global.u8 [%rd392], %rs1025;
add.s32 %r6829, %r6829, 1;
setp.gt.u16 %p722, %rs694, 143;
selp.u32 %r6831, 1, 0, %p722;
mov.u16 %rs1025, 0;
mov.u32 %r6830, 0;
$L__BB3_628:
setp.ne.s32 %p723, %r7233, 0;
mov.u32 %r7244, %r7232;
@%p723 bra $L__BB3_624;
$L__BB3_629:
setp.eq.s16 %p724, %rs207, 0;
mov.u32 %r7256, %r7244;
@%p724 bra $L__BB3_636;
cvt.u32.u16 %r4748, %rs207;
and.b32 %r7245, %r4748, 255;
cvt.u32.u16 %r4749, %rs206;
and.b32 %r7246, %r4749, 255;
$L__BB3_631:
mov.u32 %r1632, %r7245;
setp.gt.u32 %p725, %r6829, 2879;
mov.u32 %r7256, 1;
@%p725 bra $L__BB3_636;
mov.u32 %r4751, 8;
sub.s32 %r4752, %r4751, %r6831;
sub.s32 %r4753, %r4752, %r6830;
min.u32 %r4754, %r4753, %r1632;
setp.eq.s32 %p726, %r4754, 32;
mov.u32 %r4755, -1;
shl.b32 %r4756, %r4755, %r4754;
not.b32 %r4757, %r4756;
selp.b32 %r4758, -1, %r4757, %p726;
and.b32 %r4759, %r4758, %r7246;
shl.b32 %r4760, %r4759, %r6830;
cvt.u16.u32 %rs698, %r4760;
or.b16 %rs1025, %rs1025, %rs698;
add.s32 %r6830, %r4754, %r6830;
sub.s32 %r7245, %r1632, %r4754;
shr.u32 %r7246, %r7246, %r4754;
setp.gt.u32 %p727, %r4753, %r1632;
@%p727 bra $L__BB3_635;
setp.ne.s32 %p728, %r6831, 0;
mov.u32 %r6831, 0;
and.b16 %rs699, %rs1025, 255;
setp.ne.s16 %p729, %rs699, 127;
and.pred %p730, %p728, %p729;
@%p730 bra $L__BB3_635;
mov.u32 %r4763, 20548;
sub.s32 %r4764, %r4763, %r6829;
cvt.u64.u32 %rd393, %r4764;
add.s64 %rd394, %rd393, %rd5;
add.s64 %rd395, %rd1, %rd394;
st.global.u8 [%rd395], %rs1025;
add.s32 %r6829, %r6829, 1;
setp.gt.u16 %p731, %rs699, 143;
selp.u32 %r6831, 1, 0, %p731;
mov.u16 %rs1025, 0;
mov.u32 %r6830, 0;
$L__BB3_635:
setp.ne.s32 %p732, %r7245, 0;
mov.u32 %r7256, %r7244;
@%p732 bra $L__BB3_631;
$L__BB3_636:
setp.eq.s16 %p733, %rs211, 0;
mov.u32 %r6832, %r7256;
@%p733 bra $L__BB3_643;
cvt.u32.u16 %r4765, %rs210;
and.b32 %r7258, %r4765, 255;
cvt.u32.u16 %r4766, %rs211;
and.b32 %r7257, %r4766, 255;
$L__BB3_638:
mov.u32 %r1651, %r7257;
setp.gt.u32 %p734, %r6829, 2879;
mov.u32 %r6832, 1;
@%p734 bra $L__BB3_643;
mov.u32 %r4768, 8;
sub.s32 %r4769, %r4768, %r6831;
sub.s32 %r4770, %r4769, %r6830;
min.u32 %r4771, %r4770, %r1651;
setp.eq.s32 %p735, %r4771, 32;
mov.u32 %r4772, -1;
shl.b32 %r4773, %r4772, %r4771;
not.b32 %r4774, %r4773;
selp.b32 %r4775, -1, %r4774, %p735;
and.b32 %r4776, %r4775, %r7258;
shl.b32 %r4777, %r4776, %r6830;
cvt.u16.u32 %rs703, %r4777;
or.b16 %rs1025, %rs1025, %rs703;
add.s32 %r6830, %r4771, %r6830;
sub.s32 %r7257, %r1651, %r4771;
shr.u32 %r7258, %r7258, %r4771;
setp.gt.u32 %p736, %r4770, %r1651;
@%p736 bra $L__BB3_642;
setp.ne.s32 %p737, %r6831, 0;
mov.u32 %r6831, 0;
and.b16 %rs704, %rs1025, 255;
setp.ne.s16 %p738, %rs704, 127;
and.pred %p739, %p737, %p738;
@%p739 bra $L__BB3_642;
mov.u32 %r4780, 20548;
sub.s32 %r4781, %r4780, %r6829;
cvt.u64.u32 %rd396, %r4781;
add.s64 %rd397, %rd396, %rd5;
add.s64 %rd398, %rd1, %rd397;
st.global.u8 [%rd398], %rs1025;
add.s32 %r6829, %r6829, 1;
setp.gt.u16 %p740, %rs704, 143;
selp.u32 %r6831, 1, 0, %p740;
mov.u16 %rs1025, 0;
mov.u32 %r6830, 0;
$L__BB3_642:
setp.ne.s32 %p741, %r7257, 0;
mov.u32 %r6832, %r7256;
@%p741 bra $L__BB3_638;
$L__BB3_643:
add.s32 %r6881, %r6881, 4;
setp.lt.u32 %p742, %r6881, %r3200;
@%p742 bra $L__BB3_403;
$L__BB3_644:
add.s32 %r6865, %r6865, 2;
setp.lt.u32 %p743, %r6865, %r3201;
@%p743 bra $L__BB3_401;
$L__BB3_645:
setp.eq.s32 %p744, %r6595, 0;
mov.u32 %r7298, %r6598;
@%p744 bra $L__BB3_649;
shl.b16 %rs707, %rs956, 1;
or.b16 %rs956, %rs707, 1;
add.s32 %r6390, %r6390, -1;
setp.ne.s32 %p745, %r6390, 0;
mov.u32 %r7298, %r6598;
@%p745 bra $L__BB3_649;
setp.gt.u32 %p746, %r6381, 191;
mov.u32 %r7298, 1;
mov.u32 %r6390, 0;
@%p746 bra $L__BB3_649;
add.s32 %r4784, %r6381, 17477;
cvt.u64.u32 %rd399, %r4784;
add.s64 %rd400, %rd399, %rd5;
add.s64 %rd401, %rd1, %rd400;
and.b16 %rs709, %rs956, 255;
st.global.u8 [%rd401], %rs956;
add.s32 %r6381, %r6381, 1;
setp.eq.s16 %p747, %rs709, 255;
selp.b32 %r6390, 7, 8, %p747;
mov.u16 %rs956, 0;
mov.u32 %r7298, %r6598;
$L__BB3_649:
cvt.u32.u16 %r4785, %rs956;
and.b32 %r4786, %r4785, 255;
shl.b32 %r4787, %r4786, %r6390;
cvt.u16.u32 %rs234, %r4787;
mov.u32 %r4788, -1;
shl.b32 %r4789, %r4788, %r6830;
not.b32 %r4790, %r4789;
mov.u32 %r4791, 255;
and.b32 %r4792, %r4790, 255;
setp.eq.s32 %p748, %r6830, 0;
selp.b32 %r1703, 0, %r4792, %p748;
shl.b32 %r1704, %r4791, %r6390;
and.b32 %r4793, %r1704, 255;
or.b32 %r4794, %r4793, %r1703;
setp.eq.s32 %p749, %r4794, 0;
mov.u32 %r7300, %r6832;
mov.u32 %r7302, %r7298;
@%p749 bra $L__BB3_655;
or.b16 %rs235, %rs1025, %rs234;
and.b16 %rs710, %rs235, 255;
xor.b16 %rs711, %rs235, %rs234;
cvt.u32.u16 %r4795, %rs711;
and.b32 %r4796, %r1704, %r4795;
and.b32 %r4797, %r4796, 255;
xor.b16 %rs712, %rs235, %rs1025;
cvt.u32.u16 %r4798, %rs712;
and.b32 %r4799, %r1703, %r4798;
or.b32 %r4800, %r4797, %r4799;
setp.eq.s32 %p750, %r4800, 0;
setp.ne.s16 %p751, %rs710, 255;
and.pred %p752, %p751, %p750;
setp.gt.u32 %p753, %r6829, 1;
and.pred %p754, %p753, %p752;
add.s32 %r4801, %r6381, 17477;
cvt.u64.u32 %rd402, %r4801;
add.s64 %rd403, %rd402, %rd5;
add.s64 %rd22, %rd1, %rd403;
@%p754 bra $L__BB3_653;
bra.uni $L__BB3_651;
$L__BB3_653:
setp.gt.u32 %p758, %r6381, 191;
mov.u32 %r7302, 1;
mov.u32 %r7300, %r6832;
@%p758 bra $L__BB3_655;
st.global.u8 [%rd22], %rs235;
add.s32 %r6381, %r6381, 1;
mov.u32 %r7300, %r6832;
mov.u32 %r7302, %r7298;
bra.uni $L__BB3_655;
$L__BB3_1243:
setp.gt.u32 %p1439, %r7382, 191;
setp.gt.u32 %p1440, %r7779, 2879;
or.pred %p1441, %p1440, %p1439;
mov.u32 %r8149, 1;
mov.u32 %r8151, %r8149;
@%p1441 bra $L__BB3_1247;
st.global.u8 [%rd44], %rs447;
add.s32 %r7382, %r7382, 1;
mov.u32 %r6251, 20548;
sub.s32 %r6252, %r6251, %r7779;
cvt.u64.u32 %rd675, %r6252;
add.s64 %rd676, %rd675, %rd5;
add.s64 %rd677, %rd1, %rd676;
st.global.u8 [%rd677], %rs1147;
add.s32 %r7779, %r7779, 1;
mov.u32 %r8149, %r8146;
mov.u32 %r8151, %r7776;
$L__BB3_1247:
setp.eq.s32 %p1443, %r7923, 0;
@%p1443 bra $L__BB3_1251;
sub.s32 %r6254, %r7924, %r7923;
mov.u32 %r6255, -1;
shl.b32 %r6256, %r6255, %r6254;
not.b32 %r6257, %r6256;
and.b32 %r6258, %r6257, 255;
shl.b32 %r6259, %r6258, %r7923;
or.b32 %r3190, %r6259, %r7922;
setp.eq.s32 %p1444, %r3190, 255;
mov.u32 %r8153, %r8093;
@%p1444 bra $L__BB3_1253;
setp.gt.u32 %p1445, %r7925, 17476;
mov.u32 %r8153, 1;
@%p1445 bra $L__BB3_1253;
cvt.u64.u32 %rd678, %r7925;
add.s64 %rd679, %rd678, %rd5;
add.s64 %rd680, %rd1, %rd679;
st.global.u8 [%rd680], %r3190;
add.s32 %r7925, %r7925, 1;
mov.u32 %r8153, %r8093;
bra.uni $L__BB3_1253;
$L__BB3_1251:
setp.ne.s32 %p1446, %r7924, 7;
mov.u32 %r8153, %r8093;
@%p1446 bra $L__BB3_1253;
setp.eq.s32 %p1447, %r7925, 0;
add.s32 %r6261, %r7925, -1;
selp.b32 %r7925, 0, %r6261, %p1447;
mov.u32 %r8153, %r8093;
$L__BB3_1253:
or.b32 %r6262, %r8151, %r8149;
or.b32 %r6263, %r6262, %r8153;
setp.eq.s32 %p1448, %r6263, 0;
@%p1448 bra $L__BB3_1255;
mov.u32 %r6264, 1;
st.global.u32 [%rd6], %r6264;
mov.u32 %r6265, 3;
st.global.u32 [%rd6+4], %r6265;
mov.u32 %r6266, 0;
st.global.u32 [%rd6+8], %r6266;
st.global.u32 [%rd6+12], %r6266;
st.global.u32 [%rd6+16], %r6266;
st.global.u32 [%rd6+20], %r6266;
st.global.u32 [%rd6+24], %r6266;
st.global.u32 [%rd6+28], %r6266;
bra.uni $L__BB3_1261;
$L__BB3_1255:
add.s32 %r6267, %r7779, %r7382;
add.s32 %r3195, %r6267, %r7925;
setp.lt.u32 %p1449, %r3195, 2;
setp.gt.u32 %p1450, %r3195, %r3204;
or.pred %p1451, %p1449, %p1450;
@%p1451 bra $L__BB3_1257;
bra.uni $L__BB3_1256;
$L__BB3_1257:
mov.u32 %r6274, 1;
st.global.u32 [%rd6], %r6274;
mov.u32 %r6275, 4;
st.global.u32 [%rd6+4], %r6275;
mov.u32 %r6276, 0;
st.global.u32 [%rd6+8], %r6276;
st.global.u32 [%rd6+12], %r6276;
st.global.u32 [%rd6+16], %r6276;
st.global.u32 [%rd6+20], %r6276;
st.global.u32 [%rd6+24], %r6276;
st.global.u32 [%rd6+28], %r6276;
bra.uni $L__BB3_1261;
$L__BB3_1256:
and.b32 %r6268, %r7382, 32767;
and.b32 %r6269, %r7779, 32767;
bfi.b32 %r6270, %r6269, %r6268, 15, 15;
or.b32 %r6271, %r6270, -2147483648;
mov.u32 %r6272, 0;
st.global.u32 [%rd6], %r6272;
st.global.u32 [%rd6+4], %r6272;
st.global.u32 [%rd6+8], %r3195;
mov.u32 %r6273, 1;
st.global.u32 [%rd6+12], %r6273;
add.s32 %r6285, %r3202, -1;
st.global.u32 [%rd6+16], %r6285;
st.global.u32 [%rd6+20], %r3195;
st.global.u32 [%rd6+24], %r6272;
st.global.u32 [%rd6+28], %r6271;
bra.uni $L__BB3_1261;
$L__BB3_651:
setp.gt.u32 %p755, %r6381, 191;
setp.gt.u32 %p756, %r6829, 2879;
or.pred %p757, %p756, %p755;
mov.u32 %r7300, 1;
mov.u32 %r7302, %r7300;
@%p757 bra $L__BB3_655;
st.global.u8 [%rd22], %rs234;
add.s32 %r6381, %r6381, 1;
mov.u32 %r4804, 20548;
sub.s32 %r4805, %r4804, %r6829;
cvt.u64.u32 %rd404, %r4805;
add.s64 %rd405, %rd404, %rd5;
add.s64 %rd406, %rd1, %rd405;
st.global.u8 [%rd406], %rs1025;
add.s32 %r6829, %r6829, 1;
mov.u32 %r7300, %r6832;
mov.u32 %r7302, %r7298;
$L__BB3_655:
setp.eq.s32 %p759, %r7018, 0;
@%p759 bra $L__BB3_659;
sub.s32 %r4807, %r7017, %r7018;
mov.u32 %r4808, -1;
shl.b32 %r4809, %r4808, %r4807;
not.b32 %r4810, %r4809;
and.b32 %r4811, %r4810, 255;
shl.b32 %r4812, %r4811, %r7018;
or.b32 %r1712, %r4812, %r7019;
setp.eq.s32 %p760, %r1712, 255;
mov.u32 %r7304, %r7050;
@%p760 bra $L__BB3_661;
setp.gt.u32 %p761, %r7016, 17476;
mov.u32 %r7304, 1;
@%p761 bra $L__BB3_661;
cvt.u64.u32 %rd407, %r7016;
add.s64 %rd408, %rd407, %rd5;
add.s64 %rd409, %rd1, %rd408;
st.global.u8 [%rd409], %r1712;
add.s32 %r7016, %r7016, 1;
mov.u32 %r7304, %r7050;
bra.uni $L__BB3_661;
$L__BB3_659:
setp.ne.s32 %p762, %r7017, 7;
mov.u32 %r7304, %r7050;
@%p762 bra $L__BB3_661;
setp.eq.s32 %p763, %r7016, 0;
add.s32 %r4814, %r7016, -1;
selp.b32 %r7016, 0, %r4814, %p763;
mov.u32 %r7304, %r7050;
$L__BB3_661:
or.b32 %r4815, %r7302, %r7300;
or.b32 %r4816, %r4815, %r7304;
setp.eq.s32 %p764, %r4816, 0;
@%p764 bra $L__BB3_663;
mov.u32 %r4817, 1;
st.global.u32 [%rd6], %r4817;
mov.u32 %r4818, 3;
st.global.u32 [%rd6+4], %r4818;
mov.u32 %r4819, 0;
st.global.u32 [%rd6+8], %r4819;
st.global.u32 [%rd6+12], %r4819;
st.global.u32 [%rd6+16], %r4819;
st.global.u32 [%rd6+20], %r4819;
st.global.u32 [%rd6+24], %r4819;
st.global.u32 [%rd6+28], %r4819;
bra.uni $L__BB3_1261;
$L__BB3_663:
add.s32 %r4820, %r6381, %r6829;
add.s32 %r1717, %r4820, %r7016;
setp.lt.u32 %p765, %r1717, 2;
setp.gt.u32 %p766, %r1717, %r3204;
or.pred %p767, %p765, %p766;
@%p767 bra $L__BB3_665;
bra.uni $L__BB3_664;
$L__BB3_665:
mov.u32 %r4827, 1;
st.global.u32 [%rd6], %r4827;
mov.u32 %r4828, 4;
st.global.u32 [%rd6+4], %r4828;
mov.u32 %r4829, 0;
st.global.u32 [%rd6+8], %r4829;
st.global.u32 [%rd6+12], %r4829;
st.global.u32 [%rd6+16], %r4829;
st.global.u32 [%rd6+20], %r4829;
st.global.u32 [%rd6+24], %r4829;
st.global.u32 [%rd6+28], %r4829;
bra.uni $L__BB3_1261;
$L__BB3_664:
and.b32 %r4821, %r6381, 32767;
and.b32 %r4822, %r6829, 32767;
bfi.b32 %r4823, %r4822, %r4821, 15, 15;
or.b32 %r4824, %r4823, -2147483648;
mov.u32 %r4825, 0;
st.global.u32 [%rd6], %r4825;
st.global.u32 [%rd6+4], %r4825;
st.global.u32 [%rd6+8], %r1717;
mov.u32 %r4826, 1;
st.global.u32 [%rd6+12], %r4826;
add.s32 %r6284, %r3202, -1;
st.global.u32 [%rd6+16], %r6284;
st.global.u32 [%rd6+20], %r1717;
st.global.u32 [%rd6+24], %r4825;
st.global.u32 [%rd6+28], %r4824;
bra.uni $L__BB3_1261;
}
// .globl j2k_htj2k_encode_codeblocks_multi_input_cleanup_64
.visible .entry j2k_htj2k_encode_codeblocks_multi_input_cleanup_64(
.param .u64 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64_param_0,
.param .u64 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64_param_1,
.param .u64 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64_param_2,
.param .u64 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64_param_3,
.param .u64 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64_param_4,
.param .u64 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64_param_5,
.param .u64 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64_param_6
)
.maxntid 128, 1, 1
{
.reg .pred %p<705>;
.reg .b16 %rs<581>;
.reg .b32 %r<3887>;
.reg .b64 %rd<339>;
// demoted variable
.shared .align 4 .b8 _ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E9block_max[512];
// demoted variable
.shared .align 1 .b8 _ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E13cleanup_e_val[513];
// demoted variable
.shared .align 1 .b8 _ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E14cleanup_cx_val[513];
ld.param.u64 %rd33, [ j2k_htj2k_encode_codeblocks_multi_input_cleanup_64_param_0];
ld.param.u64 %rd28, [ j2k_htj2k_encode_codeblocks_multi_input_cleanup_64_param_1];
ld.param.u64 %rd29, [ j2k_htj2k_encode_codeblocks_multi_input_cleanup_64_param_2];
ld.param.u64 %rd31, [ j2k_htj2k_encode_codeblocks_multi_input_cleanup_64_param_4];
ld.param.u64 %rd32, [ j2k_htj2k_encode_codeblocks_multi_input_cleanup_64_param_5];
ld.param.u64 %rd34, [ j2k_htj2k_encode_codeblocks_multi_input_cleanup_64_param_6];
cvta.to.global.u64 %rd1, %rd33;
mov.u32 %r1498, %ctaid.x;
cvt.u64.u32 %rd2, %r1498;
setp.ge.u64 %p1, %rd2, %rd34;
@%p1 bra $L__BB4_605;
cvta.to.global.u64 %rd35, %rd28;
mul.lo.s64 %rd36, %rd2, 40;
add.s64 %rd37, %rd35, %rd36;
ld.global.u64 %rd38, [%rd37];
cvta.to.global.u64 %rd3, %rd38;
ld.global.v2.u32 {%r1499, %r1500}, [%rd37+8];
ld.global.v2.u32 {%r1502, %r1503}, [%rd37+16];
ld.global.v2.u32 {%r1504, %r1505}, [%rd37+24];
ld.global.v2.u32 {%r1506, %r1507}, [%rd37+32];
cvt.u64.u32 %rd4, %r1499;
mov.u32 %r10, %tid.x;
mov.u32 %r3035, 0;
setp.lt.u32 %p2, %r10, 4096;
@%p2 bra $L__BB4_2;
bra.uni $L__BB4_4;
$L__BB4_2:
mov.u32 %r11, %ntid.x;
mov.u32 %r3033, %r10;
$L__BB4_3:
cvt.u64.u32 %rd39, %r3033;
add.s64 %rd40, %rd39, %rd4;
shl.b64 %rd41, %rd40, 2;
add.s64 %rd42, %rd3, %rd41;
ld.global.u32 %r1510, [%rd42];
abs.s32 %r1511, %r1510;
max.u32 %r3035, %r3035, %r1511;
add.s32 %r3033, %r3033, %r11;
setp.lt.u32 %p3, %r3033, 4096;
@%p3 bra $L__BB4_3;
$L__BB4_4:
shl.b32 %r1512, %r10, 2;
mov.u32 %r1513, _ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E9block_max;
add.s32 %r17, %r1513, %r1512;
st.shared.u32 [%r17], %r3035;
bar.sync 0;
mov.u32 %r1514, %ntid.x;
shr.u32 %r3036, %r1514, 1;
setp.eq.s32 %p4, %r3036, 0;
@%p4 bra $L__BB4_8;
$L__BB4_5:
setp.ge.u32 %p5, %r10, %r3036;
@%p5 bra $L__BB4_7;
ld.shared.u32 %r1515, [%r17];
shl.b32 %r1516, %r3036, 2;
add.s32 %r1517, %r17, %r1516;
ld.shared.u32 %r1518, [%r1517];
setp.gt.u32 %p6, %r1515, %r1518;
add.s32 %r1519, %r3036, %r10;
selp.b32 %r1520, %r10, %r1519, %p6;
shl.b32 %r1521, %r1520, 2;
add.s32 %r1523, %r1513, %r1521;
ld.shared.u32 %r1524, [%r1523];
st.shared.u32 [%r17], %r1524;
$L__BB4_7:
bar.sync 0;
shr.u32 %r3036, %r3036, 1;
setp.ne.s32 %p7, %r3036, 0;
@%p7 bra $L__BB4_5;
$L__BB4_8:
ld.shared.u32 %r21, [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E9block_max];
setp.ne.s32 %p8, %r10, 0;
@%p8 bra $L__BB4_605;
mov.u32 %r1525, 1;
cvt.u64.u32 %rd5, %r1505;
cvta.to.global.u64 %rd43, %rd32;
shl.b64 %rd44, %rd2, 5;
add.s64 %rd6, %rd43, %rd44;
st.global.u32 [%rd6], %r1525;
mov.u32 %r1526, 0;
st.global.u32 [%rd6+4], %r1526;
st.global.u32 [%rd6+8], %r1526;
st.global.u32 [%rd6+12], %r1526;
st.global.u32 [%rd6+16], %r1526;
st.global.u32 [%rd6+20], %r1526;
st.global.u32 [%rd6+24], %r1526;
st.global.u32 [%rd6+28], %r1526;
add.s32 %r1527, %r1502, -1;
setp.ge.u32 %p9, %r1527, %r1500;
setp.eq.s32 %p10, %r1503, 0;
or.pred %p11, %p9, %p10;
setp.gt.u32 %p12, %r1502, 1024;
or.pred %p13, %p12, %p11;
@%p13 bra $L__BB4_604;
cvt.u16.u32 %rs213, %r1502;
mov.u16 %rs214, 4096;
div.u16 %rs215, %rs214, %rs213;
cvt.u32.u16 %r1528, %rs215;
setp.gt.u32 %p14, %r1503, %r1528;
add.s32 %r22, %r1504, -1;
setp.gt.u32 %p15, %r22, 29;
or.pred %p16, %p15, %p14;
setp.lt.u32 %p17, %r1506, 20549;
or.pred %p18, %p17, %p16;
@%p18 bra $L__BB4_604;
bra.uni $L__BB4_11;
$L__BB4_604:
mov.u32 %r2986, 2;
st.global.u32 [%rd6], %r2986;
st.global.u32 [%rd6+4], %r1525;
st.global.u32 [%rd6+8], %r1526;
st.global.u32 [%rd6+12], %r1526;
st.global.u32 [%rd6+16], %r1526;
st.global.u32 [%rd6+20], %r1526;
st.global.u32 [%rd6+24], %r1526;
st.global.u32 [%rd6+28], %r1526;
$L__BB4_605:
ret;
$L__BB4_11:
setp.ne.s32 %p19, %r1502, 64;
setp.ne.s32 %p20, %r1503, 64;
or.pred %p21, %p19, %p20;
setp.ne.s32 %p22, %r1500, 64;
or.pred %p23, %p22, %p21;
@%p23 bra $L__BB4_603;
bra.uni $L__BB4_12;
$L__BB4_603:
mov.u32 %r2983, 2;
st.global.u32 [%rd6], %r2983;
mov.u32 %r2984, 1;
st.global.u32 [%rd6+4], %r2984;
mov.u32 %r2985, 0;
st.global.u32 [%rd6+8], %r2985;
st.global.u32 [%rd6+12], %r2985;
st.global.u32 [%rd6+16], %r2985;
st.global.u32 [%rd6+20], %r2985;
st.global.u32 [%rd6+24], %r2985;
st.global.u32 [%rd6+28], %r2985;
bra.uni $L__BB4_605;
$L__BB4_12:
setp.eq.s32 %p24, %r1507, 1;
@%p24 bra $L__BB4_14;
bra.uni $L__BB4_13;
$L__BB4_14:
setp.eq.s32 %p25, %r21, 0;
@%p25 bra $L__BB4_602;
clz.b32 %r1532, %r21;
mov.u32 %r1533, 32;
sub.s32 %r1534, %r1533, %r1532;
setp.gt.u32 %p26, %r1534, %r1504;
@%p26 bra $L__BB4_601;
bra.uni $L__BB4_16;
$L__BB4_601:
mov.u32 %r2979, 1;
st.global.u32 [%rd6], %r2979;
mov.u32 %r2980, 2;
st.global.u32 [%rd6+4], %r2980;
mov.u32 %r2981, 0;
st.global.u32 [%rd6+8], %r2981;
st.global.u32 [%rd6+12], %r2981;
st.global.u32 [%rd6+16], %r2981;
st.global.u32 [%rd6+20], %r2981;
st.global.u32 [%rd6+24], %r2981;
st.global.u32 [%rd6+28], %r2981;
bra.uni $L__BB4_605;
$L__BB4_13:
mov.u32 %r1529, 2;
st.global.u32 [%rd6], %r1529;
mov.u32 %r1530, 5;
st.global.u32 [%rd6+4], %r1530;
mov.u32 %r1531, 0;
st.global.u32 [%rd6+8], %r1531;
st.global.u32 [%rd6+12], %r1531;
st.global.u32 [%rd6+16], %r1531;
st.global.u32 [%rd6+20], %r1531;
st.global.u32 [%rd6+24], %r1531;
st.global.u32 [%rd6+28], %r1531;
bra.uni $L__BB4_605;
$L__BB4_602:
mov.u32 %r2982, 0;
st.global.u32 [%rd6], %r2982;
st.global.u32 [%rd6+4], %r2982;
st.global.u32 [%rd6+8], %r2982;
st.global.u32 [%rd6+12], %r2982;
st.global.u32 [%rd6+16], %r1504;
st.global.u32 [%rd6+20], %r2982;
st.global.u32 [%rd6+24], %r2982;
st.global.u32 [%rd6+28], %r2982;
bra.uni $L__BB4_605;
$L__BB4_16:
add.s64 %rd45, %rd1, %rd5;
mov.u16 %rs218, 255;
st.global.u8 [%rd45+20548], %rs218;
mov.u32 %r1550, 0;
mov.u16 %rs474, 0;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E13cleanup_e_val], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E14cleanup_cx_val], %rs474;
mov.u32 %r3276, 1;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E13cleanup_e_val+1], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E14cleanup_cx_val+1], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E13cleanup_e_val+2], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E14cleanup_cx_val+2], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E13cleanup_e_val+3], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E14cleanup_cx_val+3], %rs474;
mov.u32 %r3509, 4;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E13cleanup_e_val+4], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E14cleanup_cx_val+4], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E13cleanup_e_val+5], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E14cleanup_cx_val+5], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E13cleanup_e_val+6], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E14cleanup_cx_val+6], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E13cleanup_e_val+7], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E14cleanup_cx_val+7], %rs474;
mov.u32 %r3120, 8;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E13cleanup_e_val+8], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E14cleanup_cx_val+8], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E13cleanup_e_val+9], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E14cleanup_cx_val+9], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E13cleanup_e_val+10], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E14cleanup_cx_val+10], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E13cleanup_e_val+11], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E14cleanup_cx_val+11], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E13cleanup_e_val+12], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E14cleanup_cx_val+12], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E13cleanup_e_val+13], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E14cleanup_cx_val+13], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E13cleanup_e_val+14], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E14cleanup_cx_val+14], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E13cleanup_e_val+15], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E14cleanup_cx_val+15], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E13cleanup_e_val+16], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E14cleanup_cx_val+16], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E13cleanup_e_val+17], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E14cleanup_cx_val+17], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E13cleanup_e_val+18], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E14cleanup_cx_val+18], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E13cleanup_e_val+19], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E14cleanup_cx_val+19], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E13cleanup_e_val+20], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E14cleanup_cx_val+20], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E13cleanup_e_val+21], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E14cleanup_cx_val+21], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E13cleanup_e_val+22], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E14cleanup_cx_val+22], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E13cleanup_e_val+23], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E14cleanup_cx_val+23], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E13cleanup_e_val+24], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E14cleanup_cx_val+24], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E13cleanup_e_val+25], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E14cleanup_cx_val+25], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E13cleanup_e_val+26], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E14cleanup_cx_val+26], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E13cleanup_e_val+27], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E14cleanup_cx_val+27], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E13cleanup_e_val+28], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E14cleanup_cx_val+28], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E13cleanup_e_val+29], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E14cleanup_cx_val+29], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E13cleanup_e_val+30], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E14cleanup_cx_val+30], %rs474;
mov.u32 %r1552, 31;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E13cleanup_e_val+31], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E14cleanup_cx_val+31], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E13cleanup_e_val+32], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E14cleanup_cx_val+32], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E13cleanup_e_val+33], %rs474;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E14cleanup_cx_val+33], %rs474;
sub.s32 %r23, %r1552, %r1504;
shl.b64 %rd46, %rd4, 2;
add.s64 %rd336, %rd3, %rd46;
mov.u16 %rs532, 15;
mov.u32 %r3037, %r1550;
mov.u32 %r3277, %r1550;
mov.u32 %r3275, %r1550;
mov.u32 %r3274, %r1550;
mov.u32 %r3111, %r1550;
mov.u32 %r3511, %r1550;
mov.u32 %r3510, %r3276;
mov.u32 %r3508, %r3276;
mov.u32 %r3829, %r1550;
mov.u32 %r3656, %r1550;
mov.u32 %r3655, %r1550;
mov.u32 %r3051, %r1550;
mov.u32 %r3654, %r1550;
mov.u32 %r3653, %r3120;
bra.uni $L__BB4_17;
$L__BB4_45:
setp.gt.u32 %p54, %r3111, 191;
mov.u32 %r3121, 1;
mov.u32 %r3120, 0;
@%p54 bra $L__BB4_47;
st.global.u8 [%rd9], %rs474;
add.s32 %r3111, %r3111, 1;
mov.u16 %rs474, 0;
mov.u32 %r3120, 8;
mov.u32 %r3121, %r3277;
bra.uni $L__BB4_47;
$L__BB4_146:
setp.gt.u32 %p163, %r3111, 191;
mov.u32 %r3263, 1;
mov.u32 %r3120, 0;
@%p163 bra $L__BB4_148;
st.global.u8 [%rd10], %rs474;
add.s32 %r3111, %r3111, 1;
mov.u16 %rs474, 0;
mov.u32 %r3120, 8;
mov.u32 %r3263, %r3277;
bra.uni $L__BB4_148;
$L__BB4_261:
setp.gt.u32 %p297, %r3111, 191;
mov.u32 %r3380, 1;
mov.u32 %r3120, 0;
@%p297 bra $L__BB4_263;
and.b16 %rs303, %rs474, 255;
st.global.u8 [%rd11], %rs474;
add.s32 %r3111, %r3111, 1;
setp.eq.s16 %p298, %rs303, 255;
selp.b32 %r3120, 7, 8, %p298;
mov.u16 %rs474, 0;
mov.u32 %r3380, %r3277;
bra.uni $L__BB4_263;
$L__BB4_84:
setp.gt.u32 %p101, %r3111, 191;
mov.u32 %r3128, 1;
mov.u32 %r3120, 0;
@%p101 bra $L__BB4_86;
and.b16 %rs252, %rs474, 255;
st.global.u8 [%rd9], %rs474;
add.s32 %r3111, %r3111, 1;
setp.eq.s16 %p102, %rs252, 255;
selp.b32 %r3120, 7, 8, %p102;
mov.u16 %rs474, 0;
mov.u32 %r3128, %r3277;
bra.uni $L__BB4_86;
$L__BB4_185:
setp.gt.u32 %p210, %r3111, 191;
mov.u32 %r3270, 1;
mov.u32 %r3120, 0;
@%p210 bra $L__BB4_187;
and.b16 %rs283, %rs474, 255;
st.global.u8 [%rd10], %rs474;
add.s32 %r3111, %r3111, 1;
setp.eq.s16 %p211, %rs283, 255;
selp.b32 %r3120, 7, 8, %p211;
mov.u16 %rs474, 0;
mov.u32 %r3270, %r3277;
bra.uni $L__BB4_187;
$L__BB4_17:
ld.global.u32 %r41, [%rd336];
setp.eq.s32 %p27, %r41, 0;
mov.u32 %r3054, %r1550;
@%p27 bra $L__BB4_19;
and.b32 %r1554, %r41, -2147483648;
abs.s32 %r1555, %r41;
shl.b32 %r1556, %r1555, %r23;
or.b32 %r3054, %r1556, %r1554;
$L__BB4_19:
shl.b32 %r1560, %r3054, 1;
shr.u32 %r1561, %r1560, %r23;
and.b32 %r44, %r1561, -2;
setp.eq.s32 %p28, %r44, 0;
mov.u32 %r3058, 0;
mov.u32 %r3055, %r3058;
mov.u32 %r3056, %r3058;
mov.u32 %r3062, %r3058;
@%p28 bra $L__BB4_21;
add.s32 %r1563, %r44, -1;
clz.b32 %r1564, %r1563;
mov.u32 %r1565, 32;
sub.s32 %r3055, %r1565, %r1564;
shr.u32 %r1566, %r3054, 31;
add.s32 %r1567, %r1566, %r44;
add.s32 %r3056, %r1567, -2;
mov.u32 %r3062, 1;
$L__BB4_21:
ld.global.u32 %r50, [%rd336+256];
setp.eq.s32 %p29, %r50, 0;
@%p29 bra $L__BB4_23;
and.b32 %r1569, %r50, -2147483648;
abs.s32 %r1570, %r50;
shl.b32 %r1571, %r1570, %r23;
or.b32 %r3058, %r1571, %r1569;
$L__BB4_23:
shl.b32 %r1574, %r3058, 1;
shr.u32 %r1575, %r1574, %r23;
and.b32 %r53, %r1575, -2;
setp.eq.s32 %p30, %r53, 0;
mov.u32 %r3063, 0;
mov.u32 %r3059, %r3063;
mov.u32 %r3060, %r3063;
mov.u32 %r3066, %r3055;
@%p30 bra $L__BB4_25;
or.b32 %r3062, %r3062, 2;
add.s32 %r1576, %r53, -1;
clz.b32 %r1577, %r1576;
mov.u32 %r1578, 32;
sub.s32 %r3059, %r1578, %r1577;
max.s32 %r3066, %r3055, %r3059;
shr.u32 %r1579, %r3058, 31;
add.s32 %r1580, %r1579, %r53;
add.s32 %r3060, %r1580, -2;
$L__BB4_25:
ld.global.u32 %r62, [%rd336+4];
setp.eq.s32 %p31, %r62, 0;
@%p31 bra $L__BB4_27;
and.b32 %r1582, %r62, -2147483648;
abs.s32 %r1583, %r62;
shl.b32 %r1584, %r1583, %r23;
or.b32 %r3063, %r1584, %r1582;
$L__BB4_27:
shl.b32 %r1587, %r3063, 1;
shr.u32 %r1588, %r1587, %r23;
and.b32 %r65, %r1588, -2;
setp.eq.s32 %p32, %r65, 0;
mov.u32 %r3068, 0;
mov.u32 %r3064, %r3068;
mov.u32 %r3065, %r3068;
@%p32 bra $L__BB4_29;
or.b32 %r3062, %r3062, 4;
add.s32 %r1589, %r65, -1;
clz.b32 %r1590, %r1589;
mov.u32 %r1591, 32;
sub.s32 %r3064, %r1591, %r1590;
max.s32 %r3066, %r3066, %r3064;
shr.u32 %r1592, %r3063, 31;
add.s32 %r1593, %r1592, %r65;
add.s32 %r3065, %r1593, -2;
$L__BB4_29:
ld.global.u32 %r74, [%rd336+260];
setp.eq.s32 %p33, %r74, 0;
@%p33 bra $L__BB4_31;
and.b32 %r1595, %r74, -2147483648;
abs.s32 %r1596, %r74;
shl.b32 %r1597, %r1596, %r23;
or.b32 %r3068, %r1597, %r1595;
$L__BB4_31:
shl.b32 %r1600, %r3068, 1;
shr.u32 %r1601, %r1600, %r23;
and.b32 %r77, %r1601, -2;
setp.eq.s32 %p34, %r77, 0;
mov.u32 %r3073, 0;
mov.u32 %r3069, %r3073;
mov.u32 %r3070, %r3073;
@%p34 bra $L__BB4_33;
or.b32 %r3062, %r3062, 8;
add.s32 %r1602, %r77, -1;
clz.b32 %r1603, %r1602;
mov.u32 %r1604, 32;
sub.s32 %r3069, %r1604, %r1603;
max.s32 %r3066, %r3066, %r3069;
shr.u32 %r1605, %r3068, 31;
add.s32 %r1606, %r1605, %r77;
add.s32 %r3070, %r1606, -2;
$L__BB4_33:
add.s32 %r1608, %r3066, -1;
setp.lt.s32 %p35, %r3066, 2;
setp.gt.s32 %p36, %r3066, 1;
selp.b32 %r86, %r1608, 0, %p36;
@%p35 bra $L__BB4_35;
setp.eq.s32 %p37, %r3055, %r3066;
selp.u32 %r1609, 1, 0, %p37;
setp.eq.s32 %p38, %r3059, %r3066;
selp.u32 %r1610, -1, 0, %p38;
bfi.b32 %r1611, %r1610, %r1609, 1, 1;
setp.eq.s32 %p39, %r3064, %r3066;
selp.u16 %rs219, 1, 0, %p39;
mul.wide.u16 %r1612, %rs219, 4;
or.b32 %r1613, %r1611, %r1612;
setp.eq.s32 %p40, %r3069, %r3066;
selp.u16 %rs220, 1, 0, %p40;
mul.wide.u16 %r1614, %rs220, 8;
or.b32 %r3073, %r1613, %r1614;
$L__BB4_35:
shr.u32 %r1615, %r3037, 1;
mov.u32 %r1616, _ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E13cleanup_e_val;
add.s32 %r1617, %r1616, %r1615;
ld.shared.u8 %rs221, [%r1617];
cvt.u32.u16 %r1618, %rs221;
and.b32 %r1619, %r1618, 255;
and.b32 %r1620, %r3059, 255;
setp.lt.u32 %p41, %r1620, %r1619;
cvt.u16.u32 %rs222, %r3059;
selp.b16 %rs223, %rs221, %rs222, %p41;
st.shared.u8 [%r1617], %rs223;
st.shared.u8 [%r1617+1], %r3069;
mov.u32 %r1621, _ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E14cleanup_cx_val;
add.s32 %r1622, %r1621, %r1615;
and.b32 %r89, %r3062, 2;
cvt.u16.u32 %rs224, %r89;
shr.u16 %rs225, %rs224, 1;
ld.shared.u8 %rs226, [%r1622];
or.b16 %rs227, %rs226, %rs225;
st.shared.u8 [%r1622], %rs227;
and.b32 %r90, %r3062, 8;
shr.u32 %r91, %r90, 3;
st.shared.u8 [%r1622+1], %r91;
shl.b32 %r1623, %r3062, 4;
shl.b32 %r1624, %r3051, 8;
or.b32 %r1625, %r1623, %r1624;
or.b32 %r1626, %r1625, %r3073;
cvta.to.global.u64 %rd47, %rd29;
mul.wide.u32 %rd48, %r1626, 2;
add.s64 %rd49, %rd47, %rd48;
ld.global.u16 %rs3, [%rd49];
shr.u16 %rs228, %rs3, 4;
and.b16 %rs4, %rs228, 7;
setp.eq.s16 %p42, %rs4, 0;
mov.u32 %r3085, %r3511;
@%p42 bra $L__BB4_42;
cvt.u32.u16 %r3074, %rs4;
shr.u16 %rs229, %rs3, 8;
cvt.u32.u16 %r3075, %rs229;
$L__BB4_37:
mov.u16 %rs5, %rs532;
mov.u32 %r94, %r3074;
setp.gt.u32 %p43, %r3508, 2879;
mov.u32 %r3085, 1;
@%p43 bra $L__BB4_42;
mov.u32 %r1628, 8;
sub.s32 %r1629, %r1628, %r3510;
sub.s32 %r1630, %r1629, %r3509;
min.u32 %r1631, %r1630, %r94;
setp.eq.s32 %p44, %r1631, 32;
mov.u32 %r1632, -1;
shl.b32 %r1633, %r1632, %r1631;
not.b32 %r1634, %r1633;
selp.b32 %r1635, -1, %r1634, %p44;
and.b32 %r1636, %r1635, %r3075;
shl.b32 %r1637, %r1636, %r3509;
cvt.u16.u32 %rs230, %r1637;
or.b16 %rs532, %rs5, %rs230;
add.s32 %r3509, %r1631, %r3509;
sub.s32 %r3074, %r94, %r1631;
shr.u32 %r3075, %r3075, %r1631;
setp.gt.u32 %p45, %r1630, %r94;
@%p45 bra $L__BB4_41;
setp.ne.s32 %p46, %r3510, 0;
mov.u32 %r3510, 0;
and.b16 %rs231, %rs532, 255;
setp.ne.s16 %p47, %rs231, 127;
and.pred %p48, %p46, %p47;
@%p48 bra $L__BB4_41;
cvt.u16.u32 %rs451, %r1637;
or.b16 %rs450, %rs5, %rs451;
mov.u32 %r1640, 20548;
sub.s32 %r1641, %r1640, %r3508;
cvt.u64.u32 %rd50, %r1641;
add.s64 %rd51, %rd50, %rd5;
add.s64 %rd52, %rd1, %rd51;
st.global.u8 [%rd52], %rs450;
add.s32 %r3508, %r3508, 1;
setp.gt.u16 %p49, %rs231, 143;
selp.u32 %r3510, 1, 0, %p49;
mov.u16 %rs532, 0;
mov.u32 %r3509, 0;
$L__BB4_41:
setp.ne.s32 %p50, %r3074, 0;
mov.u32 %r3085, %r3511;
@%p50 bra $L__BB4_37;
$L__BB4_42:
setp.ne.s32 %p51, %r3051, 0;
@%p51 bra $L__BB4_90;
setp.eq.s32 %p52, %r3062, 0;
add.s32 %r1642, %r3111, 17477;
cvt.u64.u32 %rd53, %r1642;
add.s64 %rd54, %rd53, %rd5;
add.s64 %rd9, %rd1, %rd54;
@%p52 bra $L__BB4_82;
shl.b16 %rs474, %rs474, 1;
add.s32 %r3120, %r3120, -1;
setp.ne.s32 %p53, %r3120, 0;
mov.u32 %r3121, %r3277;
@%p53 bra $L__BB4_47;
bra.uni $L__BB4_45;
$L__BB4_47:
setp.lt.u32 %p55, %r3275, 3;
mov.u32 %r3089, 0;
@%p55 bra $L__BB4_50;
setp.lt.u32 %p56, %r3275, 6;
mov.u32 %r3089, 1;
@%p56 bra $L__BB4_50;
setp.lt.u32 %p57, %r3275, 9;
setp.eq.s32 %p58, %r3275, 11;
selp.b32 %r1648, 4, 5, %p58;
setp.lt.u32 %p59, %r3275, 11;
selp.b32 %r1649, 3, %r1648, %p59;
selp.b32 %r3089, 2, %r1649, %p57;
$L__BB4_50:
setp.eq.s32 %p60, %r3089, 0;
@%p60 bra $L__BB4_78;
and.b32 %r119, %r3089, 3;
setp.eq.s32 %p61, %r119, 0;
mov.u32 %r3099, %r3089;
mov.u32 %r3100, %r3121;
@%p61 bra $L__BB4_63;
add.s32 %r3005, %r3089, -1;
mov.u32 %r1651, 1;
shl.b32 %r1652, %r1651, %r3005;
and.b32 %r1653, %r1652, %r3274;
setp.ne.s32 %p62, %r1653, 0;
selp.u32 %r1654, 1, 0, %p62;
cvt.u32.u16 %r1655, %rs474;
bfi.b32 %r1656, %r1655, %r1654, 1, 8;
cvt.u16.u32 %rs474, %r1656;
add.s32 %r3120, %r3120, -1;
setp.ne.s32 %p63, %r3120, 0;
mov.u32 %r3100, %r3121;
@%p63 bra $L__BB4_55;
setp.gt.u32 %p64, %r3111, 191;
mov.u32 %r3120, 0;
mov.u32 %r3100, %r1651;
@%p64 bra $L__BB4_55;
add.s32 %r1660, %r3111, 17477;
cvt.u64.u32 %rd55, %r1660;
add.s64 %rd56, %rd55, %rd5;
add.s64 %rd57, %rd1, %rd56;
st.global.u8 [%rd57], %rs474;
add.s32 %r3111, %r3111, 1;
mov.u16 %rs474, 0;
mov.u32 %r3120, 8;
mov.u32 %r3100, %r3121;
$L__BB4_55:
and.b32 %r3007, %r3089, 3;
add.s32 %r3099, %r3089, -1;
setp.eq.s32 %p65, %r3007, 1;
mov.u32 %r3121, %r3100;
@%p65 bra $L__BB4_63;
add.s32 %r3099, %r3089, -2;
mov.u32 %r1661, 1;
shl.b32 %r1662, %r1661, %r3099;
and.b32 %r1663, %r1662, %r3274;
setp.ne.s32 %p66, %r1663, 0;
selp.u32 %r1664, 1, 0, %p66;
cvt.u32.u16 %r1665, %rs474;
bfi.b32 %r1666, %r1665, %r1664, 1, 8;
cvt.u16.u32 %rs474, %r1666;
add.s32 %r3120, %r3120, -1;
setp.ne.s32 %p67, %r3120, 0;
mov.u32 %r3095, %r3100;
@%p67 bra $L__BB4_59;
setp.gt.u32 %p68, %r3111, 191;
mov.u32 %r3120, 0;
mov.u32 %r3095, %r1661;
@%p68 bra $L__BB4_59;
add.s32 %r1669, %r3111, 17477;
cvt.u64.u32 %rd58, %r1669;
add.s64 %rd59, %rd58, %rd5;
add.s64 %rd60, %rd1, %rd59;
and.b16 %rs238, %rs474, 255;
st.global.u8 [%rd60], %rs474;
add.s32 %r3111, %r3111, 1;
setp.eq.s16 %p69, %rs238, 255;
selp.b32 %r3120, 7, 8, %p69;
mov.u16 %rs474, 0;
mov.u32 %r3095, %r3100;
$L__BB4_59:
and.b32 %r3008, %r3089, 3;
setp.eq.s32 %p70, %r3008, 2;
mov.u32 %r3121, %r3095;
mov.u32 %r3100, %r3095;
@%p70 bra $L__BB4_63;
add.s32 %r3099, %r3089, -3;
mov.u32 %r1670, 1;
shl.b32 %r1671, %r1670, %r3099;
and.b32 %r1672, %r1671, %r3274;
setp.ne.s32 %p71, %r1672, 0;
selp.u32 %r1673, 1, 0, %p71;
cvt.u32.u16 %r1674, %rs474;
bfi.b32 %r1675, %r1674, %r1673, 1, 8;
cvt.u16.u32 %rs474, %r1675;
add.s32 %r3120, %r3120, -1;
setp.ne.s32 %p72, %r3120, 0;
mov.u32 %r3121, %r3095;
mov.u32 %r3100, %r3095;
@%p72 bra $L__BB4_63;
add.s32 %r3099, %r3089, -3;
setp.gt.u32 %p73, %r3111, 191;
mov.u32 %r3120, 0;
mov.u32 %r3121, %r1670;
mov.u32 %r3100, %r1670;
@%p73 bra $L__BB4_63;
add.s32 %r3099, %r3089, -3;
add.s32 %r1680, %r3111, 17477;
cvt.u64.u32 %rd61, %r1680;
add.s64 %rd62, %rd61, %rd5;
add.s64 %rd63, %rd1, %rd62;
and.b16 %rs241, %rs474, 255;
st.global.u8 [%rd63], %rs474;
add.s32 %r3111, %r3111, 1;
setp.eq.s16 %p74, %rs241, 255;
selp.b32 %r3120, 7, 8, %p74;
mov.u16 %rs474, 0;
mov.u32 %r3121, %r3095;
mov.u32 %r3100, %r3095;
$L__BB4_63:
add.s32 %r3009, %r3089, -1;
setp.lt.u32 %p75, %r3009, 3;
@%p75 bra $L__BB4_78;
mov.u32 %r3121, %r3100;
$L__BB4_65:
add.s32 %r1681, %r3099, -1;
mov.u32 %r1682, 1;
shl.b32 %r1683, %r1682, %r1681;
and.b32 %r1684, %r1683, %r3274;
setp.ne.s32 %p76, %r1684, 0;
selp.u32 %r1685, 1, 0, %p76;
cvt.u32.u16 %r1686, %rs474;
bfi.b32 %r3109, %r1686, %r1685, 1, 8;
add.s32 %r3108, %r3120, -1;
setp.ne.s32 %p77, %r3108, 0;
mov.u32 %r3110, %r3121;
@%p77 bra $L__BB4_68;
setp.gt.u32 %p78, %r3111, 191;
mov.u32 %r3108, 0;
mov.u32 %r3110, %r1682;
@%p78 bra $L__BB4_68;
cvt.u16.u32 %rs242, %r3109;
and.b16 %rs243, %rs242, 255;
add.s32 %r1690, %r3111, 17477;
cvt.u64.u32 %rd64, %r1690;
add.s64 %rd65, %rd64, %rd5;
add.s64 %rd66, %rd1, %rd65;
st.global.u8 [%rd66], %rs242;
add.s32 %r3111, %r3111, 1;
setp.eq.s16 %p79, %rs243, 255;
selp.b32 %r3108, 7, 8, %p79;
mov.u32 %r3109, 0;
mov.u32 %r3110, %r3121;
$L__BB4_68:
add.s32 %r1691, %r3099, -2;
shl.b32 %r1693, %r1682, %r1691;
and.b32 %r1694, %r1693, %r3274;
setp.ne.s32 %p80, %r1694, 0;
and.b32 %r1695, %r3109, 127;
selp.u32 %r1696, 1, 0, %p80;
bfi.b32 %r3113, %r1695, %r1696, 1, 7;
add.s32 %r3112, %r3108, -1;
setp.ne.s32 %p81, %r3112, 0;
mov.u32 %r3114, %r3110;
@%p81 bra $L__BB4_71;
setp.gt.u32 %p82, %r3111, 191;
mov.u32 %r3114, 1;
mov.u32 %r3112, 0;
@%p82 bra $L__BB4_71;
cvt.u16.u32 %rs244, %r3113;
and.b16 %rs245, %rs244, 255;
add.s32 %r1700, %r3111, 17477;
cvt.u64.u32 %rd67, %r1700;
add.s64 %rd68, %rd67, %rd5;
add.s64 %rd69, %rd1, %rd68;
st.global.u8 [%rd69], %rs244;
add.s32 %r3111, %r3111, 1;
setp.eq.s16 %p83, %rs245, 255;
selp.b32 %r3112, 7, 8, %p83;
mov.u32 %r3113, 0;
mov.u32 %r3114, %r3110;
$L__BB4_71:
add.s32 %r1701, %r3099, -3;
mov.u32 %r1702, 1;
shl.b32 %r1703, %r1702, %r1701;
and.b32 %r1704, %r1703, %r3274;
setp.ne.s32 %p84, %r1704, 0;
and.b32 %r1705, %r3113, 127;
selp.u32 %r1706, 1, 0, %p84;
bfi.b32 %r3117, %r1705, %r1706, 1, 7;
add.s32 %r3116, %r3112, -1;
setp.ne.s32 %p85, %r3116, 0;
mov.u32 %r3118, %r3114;
@%p85 bra $L__BB4_74;
setp.gt.u32 %p86, %r3111, 191;
mov.u32 %r3116, 0;
mov.u32 %r3118, %r1702;
@%p86 bra $L__BB4_74;
cvt.u16.u32 %rs246, %r3117;
and.b16 %rs247, %rs246, 255;
add.s32 %r1710, %r3111, 17477;
cvt.u64.u32 %rd70, %r1710;
add.s64 %rd71, %rd70, %rd5;
add.s64 %rd72, %rd1, %rd71;
st.global.u8 [%rd72], %rs246;
add.s32 %r3111, %r3111, 1;
setp.eq.s16 %p87, %rs247, 255;
selp.b32 %r3116, 7, 8, %p87;
mov.u32 %r3117, 0;
mov.u32 %r3118, %r3114;
$L__BB4_74:
add.s32 %r3099, %r3099, -4;
shl.b32 %r1712, %r1702, %r3099;
and.b32 %r1713, %r1712, %r3274;
setp.ne.s32 %p88, %r1713, 0;
and.b32 %r1714, %r3117, 127;
selp.u32 %r1715, 1, 0, %p88;
bfi.b32 %r1716, %r1714, %r1715, 1, 15;
cvt.u16.u32 %rs474, %r1716;
add.s32 %r3120, %r3116, -1;
setp.ne.s32 %p89, %r3120, 0;
mov.u32 %r3121, %r3118;
@%p89 bra $L__BB4_77;
setp.gt.u32 %p90, %r3111, 191;
mov.u32 %r3121, 1;
mov.u32 %r3120, 0;
@%p90 bra $L__BB4_77;
add.s32 %r1719, %r3111, 17477;
cvt.u64.u32 %rd73, %r1719;
add.s64 %rd74, %rd73, %rd5;
add.s64 %rd75, %rd1, %rd74;
and.b16 %rs249, %rs474, 255;
st.global.u8 [%rd75], %rs474;
add.s32 %r3111, %r3111, 1;
setp.eq.s16 %p91, %rs249, 255;
selp.b32 %r3120, 7, 8, %p91;
mov.u16 %rs474, 0;
mov.u32 %r3121, %r3118;
$L__BB4_77:
setp.ne.s32 %p92, %r3099, 0;
@%p92 bra $L__BB4_65;
$L__BB4_78:
add.s32 %r1721, %r3275, -1;
setp.eq.s32 %p93, %r3275, 0;
mov.u32 %r3274, 0;
selp.b32 %r3275, 0, %r1721, %p93;
setp.lt.u32 %p94, %r3275, 3;
mov.u32 %r3125, %r3274;
@%p94 bra $L__BB4_81;
setp.lt.u32 %p95, %r3275, 6;
mov.u32 %r3125, 1;
@%p95 bra $L__BB4_81;
setp.lt.u32 %p96, %r3275, 9;
setp.eq.s32 %p97, %r3275, 11;
selp.b32 %r1723, 4, 5, %p97;
setp.lt.u32 %p98, %r3275, 11;
selp.b32 %r1724, 3, %r1723, %p98;
selp.b32 %r3125, 2, %r1724, %p96;
$L__BB4_81:
mov.u32 %r1726, 1;
shl.b32 %r3276, %r1726, %r3125;
mov.u32 %r3277, %r3121;
bra.uni $L__BB4_90;
$L__BB4_82:
add.s32 %r3274, %r3274, 1;
setp.lt.u32 %p99, %r3274, %r3276;
@%p99 bra $L__BB4_90;
shl.b16 %rs250, %rs474, 1;
or.b16 %rs474, %rs250, 1;
add.s32 %r3120, %r3120, -1;
setp.ne.s32 %p100, %r3120, 0;
mov.u32 %r3128, %r3277;
@%p100 bra $L__BB4_86;
bra.uni $L__BB4_84;
$L__BB4_86:
add.s32 %r1730, %r3275, 1;
min.u32 %r3275, %r1730, 12;
setp.lt.u32 %p103, %r3275, 3;
mov.u32 %r3274, 0;
mov.u32 %r3129, %r3274;
@%p103 bra $L__BB4_89;
setp.lt.u32 %p104, %r3275, 6;
mov.u32 %r3129, 1;
@%p104 bra $L__BB4_89;
setp.lt.u32 %p105, %r3275, 9;
setp.eq.s32 %p106, %r3275, 11;
selp.b32 %r1732, 4, 5, %p106;
setp.lt.u32 %p107, %r3275, 11;
selp.b32 %r1733, 3, %r1732, %p107;
selp.b32 %r3129, 2, %r1733, %p105;
$L__BB4_89:
mov.u32 %r1735, 1;
shl.b32 %r3276, %r1735, %r3129;
mov.u32 %r3277, %r3128;
$L__BB4_90:
max.s32 %r202, %r3066, 1;
and.b16 %rs253, %rs3, 15;
cvt.u32.u16 %r203, %rs253;
and.b32 %r204, %r3062, 1;
setp.eq.s32 %p108, %r204, 0;
mov.u32 %r3150, %r3829;
@%p108 bra $L__BB4_97;
and.b32 %r1736, %r203, 1;
sub.s32 %r3136, %r202, %r1736;
setp.eq.s32 %p109, %r3136, 0;
mov.u32 %r3150, %r3829;
@%p109 bra $L__BB4_97;
mov.u32 %r1737, -1;
shl.b32 %r1738, %r1737, %r3136;
not.b32 %r1739, %r1738;
and.b32 %r3137, %r3056, %r1739;
$L__BB4_93:
setp.gt.u32 %p110, %r3654, 17476;
mov.u32 %r3150, 1;
@%p110 bra $L__BB4_97;
sub.s32 %r1741, %r3653, %r3655;
min.u32 %r1742, %r1741, %r3136;
setp.eq.s32 %p111, %r1742, 32;
mov.u32 %r1743, -1;
shl.b32 %r1744, %r1743, %r1742;
not.b32 %r1745, %r1744;
selp.b32 %r1746, -1, %r1745, %p111;
and.b32 %r1747, %r1746, %r3137;
shl.b32 %r1748, %r1747, %r3655;
or.b32 %r3656, %r1748, %r3656;
add.s32 %r3655, %r1742, %r3655;
shr.u32 %r3137, %r3137, %r1742;
sub.s32 %r3136, %r3136, %r1742;
setp.lt.u32 %p112, %r3655, %r3653;
@%p112 bra $L__BB4_96;
cvt.u64.u32 %rd76, %r3654;
add.s64 %rd77, %rd76, %rd5;
add.s64 %rd78, %rd1, %rd77;
st.global.u8 [%rd78], %r3656;
add.s32 %r3654, %r3654, 1;
setp.eq.s32 %p113, %r3656, 255;
selp.b32 %r3653, 7, 8, %p113;
mov.u32 %r3655, 0;
mov.u32 %r3656, %r3655;
$L__BB4_96:
setp.ne.s32 %p114, %r3136, 0;
mov.u32 %r3150, %r3829;
@%p114 bra $L__BB4_93;
$L__BB4_97:
and.b32 %r2990, %r3062, 2;
setp.eq.s32 %p115, %r2990, 0;
mov.u32 %r3165, %r3150;
@%p115 bra $L__BB4_104;
shr.u32 %r1751, %r203, 1;
and.b32 %r1752, %r1751, 1;
sub.s32 %r3151, %r202, %r1752;
setp.eq.s32 %p116, %r3151, 0;
mov.u32 %r3165, %r3150;
@%p116 bra $L__BB4_104;
mov.u32 %r1753, -1;
shl.b32 %r1754, %r1753, %r3151;
not.b32 %r1755, %r1754;
and.b32 %r3152, %r3060, %r1755;
$L__BB4_100:
setp.gt.u32 %p117, %r3654, 17476;
mov.u32 %r3165, 1;
@%p117 bra $L__BB4_104;
sub.s32 %r1757, %r3653, %r3655;
min.u32 %r1758, %r1757, %r3151;
setp.eq.s32 %p118, %r1758, 32;
mov.u32 %r1759, -1;
shl.b32 %r1760, %r1759, %r1758;
not.b32 %r1761, %r1760;
selp.b32 %r1762, -1, %r1761, %p118;
and.b32 %r1763, %r1762, %r3152;
shl.b32 %r1764, %r1763, %r3655;
or.b32 %r3656, %r1764, %r3656;
add.s32 %r3655, %r1758, %r3655;
shr.u32 %r3152, %r3152, %r1758;
sub.s32 %r3151, %r3151, %r1758;
setp.lt.u32 %p119, %r3655, %r3653;
@%p119 bra $L__BB4_103;
cvt.u64.u32 %rd79, %r3654;
add.s64 %rd80, %rd79, %rd5;
add.s64 %rd81, %rd1, %rd80;
st.global.u8 [%rd81], %r3656;
add.s32 %r3654, %r3654, 1;
setp.eq.s32 %p120, %r3656, 255;
selp.b32 %r3653, 7, 8, %p120;
mov.u32 %r3655, 0;
mov.u32 %r3656, %r3655;
$L__BB4_103:
setp.ne.s32 %p121, %r3151, 0;
mov.u32 %r3165, %r3150;
@%p121 bra $L__BB4_100;
$L__BB4_104:
and.b32 %r1767, %r3062, 4;
setp.eq.s32 %p122, %r1767, 0;
mov.u32 %r3180, %r3165;
@%p122 bra $L__BB4_111;
shr.u32 %r1768, %r203, 2;
and.b32 %r1769, %r1768, 1;
sub.s32 %r3166, %r202, %r1769;
setp.eq.s32 %p123, %r3166, 0;
mov.u32 %r3180, %r3165;
@%p123 bra $L__BB4_111;
mov.u32 %r1770, -1;
shl.b32 %r1771, %r1770, %r3166;
not.b32 %r1772, %r1771;
and.b32 %r3167, %r3065, %r1772;
$L__BB4_107:
setp.gt.u32 %p124, %r3654, 17476;
mov.u32 %r3180, 1;
@%p124 bra $L__BB4_111;
sub.s32 %r1774, %r3653, %r3655;
min.u32 %r1775, %r1774, %r3166;
setp.eq.s32 %p125, %r1775, 32;
mov.u32 %r1776, -1;
shl.b32 %r1777, %r1776, %r1775;
not.b32 %r1778, %r1777;
selp.b32 %r1779, -1, %r1778, %p125;
and.b32 %r1780, %r1779, %r3167;
shl.b32 %r1781, %r1780, %r3655;
or.b32 %r3656, %r1781, %r3656;
add.s32 %r3655, %r1775, %r3655;
shr.u32 %r3167, %r3167, %r1775;
sub.s32 %r3166, %r3166, %r1775;
setp.lt.u32 %p126, %r3655, %r3653;
@%p126 bra $L__BB4_110;
cvt.u64.u32 %rd82, %r3654;
add.s64 %rd83, %rd82, %rd5;
add.s64 %rd84, %rd1, %rd83;
st.global.u8 [%rd84], %r3656;
add.s32 %r3654, %r3654, 1;
setp.eq.s32 %p127, %r3656, 255;
selp.b32 %r3653, 7, 8, %p127;
mov.u32 %r3655, 0;
mov.u32 %r3656, %r3655;
$L__BB4_110:
setp.ne.s32 %p128, %r3166, 0;
mov.u32 %r3180, %r3165;
@%p128 bra $L__BB4_107;
$L__BB4_111:
and.b32 %r2991, %r3062, 8;
setp.eq.s32 %p129, %r2991, 0;
mov.u32 %r3195, %r3180;
@%p129 bra $L__BB4_118;
shr.u32 %r1784, %r203, 3;
sub.s32 %r3181, %r202, %r1784;
setp.eq.s32 %p130, %r3181, 0;
mov.u32 %r3195, %r3180;
@%p130 bra $L__BB4_118;
mov.u32 %r1785, -1;
shl.b32 %r1786, %r1785, %r3181;
not.b32 %r1787, %r1786;
and.b32 %r3182, %r3070, %r1787;
$L__BB4_114:
setp.gt.u32 %p131, %r3654, 17476;
mov.u32 %r3195, 1;
@%p131 bra $L__BB4_118;
sub.s32 %r1789, %r3653, %r3655;
min.u32 %r1790, %r1789, %r3181;
setp.eq.s32 %p132, %r1790, 32;
mov.u32 %r1791, -1;
shl.b32 %r1792, %r1791, %r1790;
not.b32 %r1793, %r1792;
selp.b32 %r1794, -1, %r1793, %p132;
and.b32 %r1795, %r1794, %r3182;
shl.b32 %r1796, %r1795, %r3655;
or.b32 %r3656, %r1796, %r3656;
add.s32 %r3655, %r1790, %r3655;
shr.u32 %r3182, %r3182, %r1790;
sub.s32 %r3181, %r3181, %r1790;
setp.lt.u32 %p133, %r3655, %r3653;
@%p133 bra $L__BB4_117;
cvt.u64.u32 %rd85, %r3654;
add.s64 %rd86, %rd85, %rd5;
add.s64 %rd87, %rd1, %rd86;
st.global.u8 [%rd87], %r3656;
add.s32 %r3654, %r3654, 1;
setp.eq.s32 %p134, %r3656, 255;
selp.b32 %r3653, 7, 8, %p134;
mov.u32 %r3655, 0;
mov.u32 %r3656, %r3655;
$L__BB4_117:
setp.ne.s32 %p135, %r3181, 0;
mov.u32 %r3195, %r3180;
@%p135 bra $L__BB4_114;
$L__BB4_118:
ld.global.u32 %r297, [%rd336+8];
setp.eq.s32 %p136, %r297, 0;
mov.u32 %r3197, 0;
mov.u32 %r3196, %r3197;
@%p136 bra $L__BB4_120;
and.b32 %r1800, %r297, -2147483648;
abs.s32 %r1801, %r297;
shl.b32 %r1802, %r1801, %r23;
or.b32 %r3196, %r1802, %r1800;
$L__BB4_120:
shl.b32 %r1806, %r3196, 1;
shr.u32 %r1807, %r1806, %r23;
and.b32 %r300, %r1807, -2;
setp.eq.s32 %p137, %r300, 0;
mov.u32 %r3198, %r3197;
mov.u32 %r3204, %r3197;
@%p137 bra $L__BB4_122;
add.s32 %r1809, %r300, -1;
clz.b32 %r1810, %r1809;
mov.u32 %r1811, 32;
sub.s32 %r3197, %r1811, %r1810;
shr.u32 %r1812, %r3196, 31;
add.s32 %r1813, %r1812, %r300;
add.s32 %r3198, %r1813, -2;
mov.u32 %r3204, 1;
$L__BB4_122:
ld.global.u32 %r306, [%rd336+264];
setp.eq.s32 %p138, %r306, 0;
mov.u32 %r3201, 0;
mov.u32 %r3200, %r3201;
@%p138 bra $L__BB4_124;
and.b32 %r1815, %r306, -2147483648;
abs.s32 %r1816, %r306;
shl.b32 %r1817, %r1816, %r23;
or.b32 %r3200, %r1817, %r1815;
$L__BB4_124:
shl.b32 %r1820, %r3200, 1;
shr.u32 %r1821, %r1820, %r23;
and.b32 %r309, %r1821, -2;
setp.eq.s32 %p139, %r309, 0;
mov.u32 %r3202, %r3201;
mov.u32 %r3208, %r3197;
@%p139 bra $L__BB4_126;
or.b32 %r3204, %r3204, 2;
add.s32 %r1822, %r309, -1;
clz.b32 %r1823, %r1822;
mov.u32 %r1824, 32;
sub.s32 %r3201, %r1824, %r1823;
max.s32 %r3208, %r3197, %r3201;
shr.u32 %r1825, %r3200, 31;
add.s32 %r1826, %r1825, %r309;
add.s32 %r3202, %r1826, -2;
$L__BB4_126:
ld.global.u32 %r318, [%rd336+12];
setp.eq.s32 %p140, %r318, 0;
mov.u32 %r3206, 0;
mov.u32 %r3205, %r3206;
@%p140 bra $L__BB4_128;
and.b32 %r1828, %r318, -2147483648;
abs.s32 %r1829, %r318;
shl.b32 %r1830, %r1829, %r23;
or.b32 %r3205, %r1830, %r1828;
$L__BB4_128:
shl.b32 %r1833, %r3205, 1;
shr.u32 %r1834, %r1833, %r23;
and.b32 %r321, %r1834, -2;
setp.eq.s32 %p141, %r321, 0;
mov.u32 %r3207, %r3206;
@%p141 bra $L__BB4_130;
or.b32 %r3204, %r3204, 4;
add.s32 %r1835, %r321, -1;
clz.b32 %r1836, %r1835;
mov.u32 %r1837, 32;
sub.s32 %r3206, %r1837, %r1836;
max.s32 %r3208, %r3208, %r3206;
shr.u32 %r1838, %r3205, 31;
add.s32 %r1839, %r1838, %r321;
add.s32 %r3207, %r1839, -2;
$L__BB4_130:
ld.global.u32 %r330, [%rd336+268];
setp.eq.s32 %p142, %r330, 0;
mov.u32 %r3211, 0;
mov.u32 %r3210, %r3211;
@%p142 bra $L__BB4_132;
and.b32 %r1841, %r330, -2147483648;
abs.s32 %r1842, %r330;
shl.b32 %r1843, %r1842, %r23;
or.b32 %r3210, %r1843, %r1841;
$L__BB4_132:
shl.b32 %r1846, %r3210, 1;
shr.u32 %r1847, %r1846, %r23;
and.b32 %r333, %r1847, -2;
setp.eq.s32 %p143, %r333, 0;
mov.u32 %r3212, %r3211;
@%p143 bra $L__BB4_134;
or.b32 %r3204, %r3204, 8;
add.s32 %r1848, %r333, -1;
clz.b32 %r1849, %r1848;
mov.u32 %r1850, 32;
sub.s32 %r3211, %r1850, %r1849;
max.s32 %r3208, %r3208, %r3211;
shr.u32 %r1851, %r3210, 31;
add.s32 %r1852, %r1851, %r333;
add.s32 %r3212, %r1852, -2;
$L__BB4_134:
and.b32 %r3012, %r3062, 1;
shr.u32 %r1854, %r3062, 1;
or.b32 %r342, %r1854, %r3012;
add.s32 %r1855, %r3208, -1;
setp.lt.s32 %p144, %r3208, 2;
setp.gt.s32 %p145, %r3208, 1;
selp.b32 %r343, %r1855, 0, %p145;
mov.u32 %r3215, 0;
@%p144 bra $L__BB4_136;
setp.eq.s32 %p146, %r3197, %r3208;
selp.u32 %r1856, 1, 0, %p146;
setp.eq.s32 %p147, %r3201, %r3208;
selp.u32 %r1857, -1, 0, %p147;
bfi.b32 %r1858, %r1857, %r1856, 1, 1;
setp.eq.s32 %p148, %r3206, %r3208;
selp.u16 %rs254, 1, 0, %p148;
mul.wide.u16 %r1859, %rs254, 4;
or.b32 %r1860, %r1858, %r1859;
setp.eq.s32 %p149, %r3211, %r3208;
selp.u16 %rs255, 1, 0, %p149;
mul.wide.u16 %r1861, %rs255, 8;
or.b32 %r3215, %r1860, %r1861;
$L__BB4_136:
shr.u32 %r2998, %r3037, 1;
cvta.to.global.u64 %rd328, %rd29;
mov.u32 %r2997, _ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E14cleanup_cx_val;
add.s32 %r2996, %r2997, %r2998;
and.b32 %r2995, %r3062, 8;
shr.u32 %r2994, %r2995, 3;
mov.u32 %r2993, _ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E13cleanup_e_val;
add.s32 %r2992, %r2993, %r2998;
and.b32 %r1862, %r3201, 255;
and.b32 %r1863, %r3069, 255;
setp.lt.u32 %p150, %r1862, %r1863;
cvt.u16.u32 %rs256, %r3069;
cvt.u16.u32 %rs257, %r3201;
selp.b16 %rs258, %rs256, %rs257, %p150;
st.shared.u8 [%r2992+1], %rs258;
st.shared.u8 [%r2992+2], %r3211;
and.b32 %r346, %r3204, 2;
shr.u32 %r1867, %r346, 1;
or.b32 %r1868, %r2994, %r1867;
st.shared.u8 [%r2996+1], %r1868;
and.b32 %r347, %r3204, 8;
shr.u32 %r1871, %r347, 3;
st.shared.u8 [%r2996+2], %r1871;
shl.b32 %r1872, %r3204, 4;
shl.b32 %r1873, %r342, 8;
or.b32 %r1874, %r1872, %r1873;
or.b32 %r1875, %r1874, %r3215;
mul.wide.u32 %rd89, %r1875, 2;
add.s64 %rd90, %rd328, %rd89;
ld.global.u16 %rs25, [%rd90];
shr.u16 %rs259, %rs25, 4;
and.b16 %rs26, %rs259, 7;
setp.eq.s16 %p151, %rs26, 0;
mov.u32 %r3227, %r3085;
@%p151 bra $L__BB4_143;
cvt.u32.u16 %r3216, %rs26;
shr.u16 %rs260, %rs25, 8;
cvt.u32.u16 %r3217, %rs260;
$L__BB4_138:
mov.u16 %rs27, %rs532;
mov.u32 %r350, %r3216;
setp.gt.u32 %p152, %r3508, 2879;
mov.u32 %r3227, 1;
@%p152 bra $L__BB4_143;
mov.u32 %r1877, 8;
sub.s32 %r1878, %r1877, %r3510;
sub.s32 %r1879, %r1878, %r3509;
min.u32 %r1880, %r1879, %r350;
setp.eq.s32 %p153, %r1880, 32;
mov.u32 %r1881, -1;
shl.b32 %r1882, %r1881, %r1880;
not.b32 %r1883, %r1882;
selp.b32 %r1884, -1, %r1883, %p153;
and.b32 %r1885, %r1884, %r3217;
shl.b32 %r1886, %r1885, %r3509;
cvt.u16.u32 %rs261, %r1886;
or.b16 %rs532, %rs27, %rs261;
add.s32 %r3509, %r1880, %r3509;
sub.s32 %r3216, %r350, %r1880;
shr.u32 %r3217, %r3217, %r1880;
setp.gt.u32 %p154, %r1879, %r350;
@%p154 bra $L__BB4_142;
setp.ne.s32 %p155, %r3510, 0;
mov.u32 %r3510, 0;
and.b16 %rs262, %rs532, 255;
setp.ne.s16 %p156, %rs262, 127;
and.pred %p157, %p155, %p156;
@%p157 bra $L__BB4_142;
cvt.u16.u32 %rs453, %r1886;
or.b16 %rs452, %rs27, %rs453;
mov.u32 %r1889, 20548;
sub.s32 %r1890, %r1889, %r3508;
cvt.u64.u32 %rd91, %r1890;
add.s64 %rd92, %rd91, %rd5;
add.s64 %rd93, %rd1, %rd92;
st.global.u8 [%rd93], %rs452;
add.s32 %r3508, %r3508, 1;
setp.gt.u16 %p158, %rs262, 143;
selp.u32 %r3510, 1, 0, %p158;
mov.u16 %rs532, 0;
mov.u32 %r3509, 0;
$L__BB4_142:
setp.ne.s32 %p159, %r3216, 0;
mov.u32 %r3227, %r3085;
@%p159 bra $L__BB4_138;
$L__BB4_143:
setp.ne.s32 %p160, %r342, 0;
@%p160 bra $L__BB4_191;
setp.eq.s32 %p161, %r3204, 0;
add.s32 %r1891, %r3111, 17477;
cvt.u64.u32 %rd94, %r1891;
add.s64 %rd95, %rd94, %rd5;
add.s64 %rd10, %rd1, %rd95;
@%p161 bra $L__BB4_183;
shl.b16 %rs474, %rs474, 1;
add.s32 %r3120, %r3120, -1;
setp.ne.s32 %p162, %r3120, 0;
mov.u32 %r3263, %r3277;
@%p162 bra $L__BB4_148;
bra.uni $L__BB4_146;
$L__BB4_148:
setp.lt.u32 %p164, %r3275, 3;
mov.u32 %r3231, 0;
@%p164 bra $L__BB4_151;
setp.lt.u32 %p165, %r3275, 6;
mov.u32 %r3231, 1;
@%p165 bra $L__BB4_151;
setp.lt.u32 %p166, %r3275, 9;
setp.eq.s32 %p167, %r3275, 11;
selp.b32 %r1897, 4, 5, %p167;
setp.lt.u32 %p168, %r3275, 11;
selp.b32 %r1898, 3, %r1897, %p168;
selp.b32 %r3231, 2, %r1898, %p166;
$L__BB4_151:
setp.eq.s32 %p169, %r3231, 0;
@%p169 bra $L__BB4_179;
and.b32 %r375, %r3231, 3;
setp.eq.s32 %p170, %r375, 0;
mov.u32 %r3241, %r3231;
mov.u32 %r3242, %r3263;
@%p170 bra $L__BB4_164;
add.s32 %r3015, %r3231, -1;
mov.u32 %r1900, 1;
shl.b32 %r1901, %r1900, %r3015;
and.b32 %r1902, %r1901, %r3274;
setp.ne.s32 %p171, %r1902, 0;
selp.u32 %r1903, 1, 0, %p171;
cvt.u32.u16 %r1904, %rs474;
bfi.b32 %r1905, %r1904, %r1903, 1, 8;
cvt.u16.u32 %rs474, %r1905;
add.s32 %r3120, %r3120, -1;
setp.ne.s32 %p172, %r3120, 0;
mov.u32 %r3242, %r3263;
@%p172 bra $L__BB4_156;
setp.gt.u32 %p173, %r3111, 191;
mov.u32 %r3120, 0;
mov.u32 %r3242, %r1900;
@%p173 bra $L__BB4_156;
add.s32 %r1909, %r3111, 17477;
cvt.u64.u32 %rd96, %r1909;
add.s64 %rd97, %rd96, %rd5;
add.s64 %rd98, %rd1, %rd97;
st.global.u8 [%rd98], %rs474;
add.s32 %r3111, %r3111, 1;
mov.u16 %rs474, 0;
mov.u32 %r3120, 8;
mov.u32 %r3242, %r3263;
$L__BB4_156:
and.b32 %r3017, %r3231, 3;
add.s32 %r3241, %r3231, -1;
setp.eq.s32 %p174, %r3017, 1;
mov.u32 %r3263, %r3242;
@%p174 bra $L__BB4_164;
add.s32 %r3241, %r3231, -2;
mov.u32 %r1910, 1;
shl.b32 %r1911, %r1910, %r3241;
and.b32 %r1912, %r1911, %r3274;
setp.ne.s32 %p175, %r1912, 0;
selp.u32 %r1913, 1, 0, %p175;
cvt.u32.u16 %r1914, %rs474;
bfi.b32 %r1915, %r1914, %r1913, 1, 8;
cvt.u16.u32 %rs474, %r1915;
add.s32 %r3120, %r3120, -1;
setp.ne.s32 %p176, %r3120, 0;
mov.u32 %r3237, %r3242;
@%p176 bra $L__BB4_160;
setp.gt.u32 %p177, %r3111, 191;
mov.u32 %r3120, 0;
mov.u32 %r3237, %r1910;
@%p177 bra $L__BB4_160;
add.s32 %r1918, %r3111, 17477;
cvt.u64.u32 %rd99, %r1918;
add.s64 %rd100, %rd99, %rd5;
add.s64 %rd101, %rd1, %rd100;
and.b16 %rs269, %rs474, 255;
st.global.u8 [%rd101], %rs474;
add.s32 %r3111, %r3111, 1;
setp.eq.s16 %p178, %rs269, 255;
selp.b32 %r3120, 7, 8, %p178;
mov.u16 %rs474, 0;
mov.u32 %r3237, %r3242;
$L__BB4_160:
and.b32 %r3018, %r3231, 3;
setp.eq.s32 %p179, %r3018, 2;
mov.u32 %r3263, %r3237;
mov.u32 %r3242, %r3237;
@%p179 bra $L__BB4_164;
add.s32 %r3241, %r3231, -3;
mov.u32 %r1919, 1;
shl.b32 %r1920, %r1919, %r3241;
and.b32 %r1921, %r1920, %r3274;
setp.ne.s32 %p180, %r1921, 0;
selp.u32 %r1922, 1, 0, %p180;
cvt.u32.u16 %r1923, %rs474;
bfi.b32 %r1924, %r1923, %r1922, 1, 8;
cvt.u16.u32 %rs474, %r1924;
add.s32 %r3120, %r3120, -1;
setp.ne.s32 %p181, %r3120, 0;
mov.u32 %r3263, %r3237;
mov.u32 %r3242, %r3237;
@%p181 bra $L__BB4_164;
add.s32 %r3241, %r3231, -3;
setp.gt.u32 %p182, %r3111, 191;
mov.u32 %r3120, 0;
mov.u32 %r3263, %r1919;
mov.u32 %r3242, %r1919;
@%p182 bra $L__BB4_164;
add.s32 %r3241, %r3231, -3;
add.s32 %r1929, %r3111, 17477;
cvt.u64.u32 %rd102, %r1929;
add.s64 %rd103, %rd102, %rd5;
add.s64 %rd104, %rd1, %rd103;
and.b16 %rs272, %rs474, 255;
st.global.u8 [%rd104], %rs474;
add.s32 %r3111, %r3111, 1;
setp.eq.s16 %p183, %rs272, 255;
selp.b32 %r3120, 7, 8, %p183;
mov.u16 %rs474, 0;
mov.u32 %r3263, %r3237;
mov.u32 %r3242, %r3237;
$L__BB4_164:
add.s32 %r3019, %r3231, -1;
setp.lt.u32 %p184, %r3019, 3;
@%p184 bra $L__BB4_179;
mov.u32 %r3263, %r3242;
$L__BB4_166:
add.s32 %r1930, %r3241, -1;
mov.u32 %r1931, 1;
shl.b32 %r1932, %r1931, %r1930;
and.b32 %r1933, %r1932, %r3274;
setp.ne.s32 %p185, %r1933, 0;
selp.u32 %r1934, 1, 0, %p185;
cvt.u32.u16 %r1935, %rs474;
bfi.b32 %r3251, %r1935, %r1934, 1, 8;
add.s32 %r3250, %r3120, -1;
setp.ne.s32 %p186, %r3250, 0;
mov.u32 %r3252, %r3263;
@%p186 bra $L__BB4_169;
setp.gt.u32 %p187, %r3111, 191;
mov.u32 %r3250, 0;
mov.u32 %r3252, %r1931;
@%p187 bra $L__BB4_169;
cvt.u16.u32 %rs273, %r3251;
and.b16 %rs274, %rs273, 255;
add.s32 %r1939, %r3111, 17477;
cvt.u64.u32 %rd105, %r1939;
add.s64 %rd106, %rd105, %rd5;
add.s64 %rd107, %rd1, %rd106;
st.global.u8 [%rd107], %rs273;
add.s32 %r3111, %r3111, 1;
setp.eq.s16 %p188, %rs274, 255;
selp.b32 %r3250, 7, 8, %p188;
mov.u32 %r3251, 0;
mov.u32 %r3252, %r3263;
$L__BB4_169:
add.s32 %r1940, %r3241, -2;
shl.b32 %r1942, %r1931, %r1940;
and.b32 %r1943, %r1942, %r3274;
setp.ne.s32 %p189, %r1943, 0;
and.b32 %r1944, %r3251, 127;
selp.u32 %r1945, 1, 0, %p189;
bfi.b32 %r3255, %r1944, %r1945, 1, 7;
add.s32 %r3254, %r3250, -1;
setp.ne.s32 %p190, %r3254, 0;
mov.u32 %r3256, %r3252;
@%p190 bra $L__BB4_172;
setp.gt.u32 %p191, %r3111, 191;
mov.u32 %r3256, 1;
mov.u32 %r3254, 0;
@%p191 bra $L__BB4_172;
cvt.u16.u32 %rs275, %r3255;
and.b16 %rs276, %rs275, 255;
add.s32 %r1949, %r3111, 17477;
cvt.u64.u32 %rd108, %r1949;
add.s64 %rd109, %rd108, %rd5;
add.s64 %rd110, %rd1, %rd109;
st.global.u8 [%rd110], %rs275;
add.s32 %r3111, %r3111, 1;
setp.eq.s16 %p192, %rs276, 255;
selp.b32 %r3254, 7, 8, %p192;
mov.u32 %r3255, 0;
mov.u32 %r3256, %r3252;
$L__BB4_172:
add.s32 %r1950, %r3241, -3;
mov.u32 %r1951, 1;
shl.b32 %r1952, %r1951, %r1950;
and.b32 %r1953, %r1952, %r3274;
setp.ne.s32 %p193, %r1953, 0;
and.b32 %r1954, %r3255, 127;
selp.u32 %r1955, 1, 0, %p193;
bfi.b32 %r3259, %r1954, %r1955, 1, 7;
add.s32 %r3258, %r3254, -1;
setp.ne.s32 %p194, %r3258, 0;
mov.u32 %r3260, %r3256;
@%p194 bra $L__BB4_175;
setp.gt.u32 %p195, %r3111, 191;
mov.u32 %r3258, 0;
mov.u32 %r3260, %r1951;
@%p195 bra $L__BB4_175;
cvt.u16.u32 %rs277, %r3259;
and.b16 %rs278, %rs277, 255;
add.s32 %r1959, %r3111, 17477;
cvt.u64.u32 %rd111, %r1959;
add.s64 %rd112, %rd111, %rd5;
add.s64 %rd113, %rd1, %rd112;
st.global.u8 [%rd113], %rs277;
add.s32 %r3111, %r3111, 1;
setp.eq.s16 %p196, %rs278, 255;
selp.b32 %r3258, 7, 8, %p196;
mov.u32 %r3259, 0;
mov.u32 %r3260, %r3256;
$L__BB4_175:
add.s32 %r3241, %r3241, -4;
shl.b32 %r1961, %r1951, %r3241;
and.b32 %r1962, %r1961, %r3274;
setp.ne.s32 %p197, %r1962, 0;
and.b32 %r1963, %r3259, 127;
selp.u32 %r1964, 1, 0, %p197;
bfi.b32 %r1965, %r1963, %r1964, 1, 15;
cvt.u16.u32 %rs474, %r1965;
add.s32 %r3120, %r3258, -1;
setp.ne.s32 %p198, %r3120, 0;
mov.u32 %r3263, %r3260;
@%p198 bra $L__BB4_178;
setp.gt.u32 %p199, %r3111, 191;
mov.u32 %r3263, 1;
mov.u32 %r3120, 0;
@%p199 bra $L__BB4_178;
add.s32 %r1968, %r3111, 17477;
cvt.u64.u32 %rd114, %r1968;
add.s64 %rd115, %rd114, %rd5;
add.s64 %rd116, %rd1, %rd115;
and.b16 %rs280, %rs474, 255;
st.global.u8 [%rd116], %rs474;
add.s32 %r3111, %r3111, 1;
setp.eq.s16 %p200, %rs280, 255;
selp.b32 %r3120, 7, 8, %p200;
mov.u16 %rs474, 0;
mov.u32 %r3263, %r3260;
$L__BB4_178:
setp.ne.s32 %p201, %r3241, 0;
@%p201 bra $L__BB4_166;
$L__BB4_179:
add.s32 %r1970, %r3275, -1;
setp.eq.s32 %p202, %r3275, 0;
mov.u32 %r3274, 0;
selp.b32 %r3275, 0, %r1970, %p202;
setp.lt.u32 %p203, %r3275, 3;
mov.u32 %r3267, %r3274;
@%p203 bra $L__BB4_182;
setp.lt.u32 %p204, %r3275, 6;
mov.u32 %r3267, 1;
@%p204 bra $L__BB4_182;
setp.lt.u32 %p205, %r3275, 9;
setp.eq.s32 %p206, %r3275, 11;
selp.b32 %r1972, 4, 5, %p206;
setp.lt.u32 %p207, %r3275, 11;
selp.b32 %r1973, 3, %r1972, %p207;
selp.b32 %r3267, 2, %r1973, %p205;
$L__BB4_182:
mov.u32 %r1975, 1;
shl.b32 %r3276, %r1975, %r3267;
mov.u32 %r3277, %r3263;
bra.uni $L__BB4_191;
$L__BB4_183:
add.s32 %r3274, %r3274, 1;
setp.lt.u32 %p208, %r3274, %r3276;
@%p208 bra $L__BB4_191;
shl.b16 %rs281, %rs474, 1;
or.b16 %rs474, %rs281, 1;
add.s32 %r3120, %r3120, -1;
setp.ne.s32 %p209, %r3120, 0;
mov.u32 %r3270, %r3277;
@%p209 bra $L__BB4_187;
bra.uni $L__BB4_185;
$L__BB4_187:
add.s32 %r1979, %r3275, 1;
min.u32 %r3275, %r1979, 12;
setp.lt.u32 %p212, %r3275, 3;
mov.u32 %r3274, 0;
mov.u32 %r3271, %r3274;
@%p212 bra $L__BB4_190;
setp.lt.u32 %p213, %r3275, 6;
mov.u32 %r3271, 1;
@%p213 bra $L__BB4_190;
setp.lt.u32 %p214, %r3275, 9;
setp.eq.s32 %p215, %r3275, 11;
selp.b32 %r1981, 4, 5, %p215;
setp.lt.u32 %p216, %r3275, 11;
selp.b32 %r1982, 3, %r1981, %p216;
selp.b32 %r3271, 2, %r1982, %p214;
$L__BB4_190:
mov.u32 %r1984, 1;
shl.b32 %r3276, %r1984, %r3271;
mov.u32 %r3277, %r3270;
$L__BB4_191:
max.s32 %r458, %r3208, 1;
and.b16 %rs284, %rs25, 15;
cvt.u32.u16 %r459, %rs284;
and.b32 %r460, %r3204, 1;
setp.eq.s32 %p217, %r460, 0;
mov.u32 %r3292, %r3195;
@%p217 bra $L__BB4_198;
and.b32 %r1985, %r459, 1;
sub.s32 %r3278, %r458, %r1985;
setp.eq.s32 %p218, %r3278, 0;
mov.u32 %r3292, %r3195;
@%p218 bra $L__BB4_198;
mov.u32 %r1986, -1;
shl.b32 %r1987, %r1986, %r3278;
not.b32 %r1988, %r1987;
and.b32 %r3279, %r3198, %r1988;
$L__BB4_194:
setp.gt.u32 %p219, %r3654, 17476;
mov.u32 %r3292, 1;
@%p219 bra $L__BB4_198;
sub.s32 %r1990, %r3653, %r3655;
min.u32 %r1991, %r1990, %r3278;
setp.eq.s32 %p220, %r1991, 32;
mov.u32 %r1992, -1;
shl.b32 %r1993, %r1992, %r1991;
not.b32 %r1994, %r1993;
selp.b32 %r1995, -1, %r1994, %p220;
and.b32 %r1996, %r1995, %r3279;
shl.b32 %r1997, %r1996, %r3655;
or.b32 %r3656, %r1997, %r3656;
add.s32 %r3655, %r1991, %r3655;
shr.u32 %r3279, %r3279, %r1991;
sub.s32 %r3278, %r3278, %r1991;
setp.lt.u32 %p221, %r3655, %r3653;
@%p221 bra $L__BB4_197;
cvt.u64.u32 %rd117, %r3654;
add.s64 %rd118, %rd117, %rd5;
add.s64 %rd119, %rd1, %rd118;
st.global.u8 [%rd119], %r3656;
add.s32 %r3654, %r3654, 1;
setp.eq.s32 %p222, %r3656, 255;
selp.b32 %r3653, 7, 8, %p222;
mov.u32 %r3655, 0;
mov.u32 %r3656, %r3655;
$L__BB4_197:
setp.ne.s32 %p223, %r3278, 0;
mov.u32 %r3292, %r3195;
@%p223 bra $L__BB4_194;
$L__BB4_198:
and.b32 %r3013, %r3204, 2;
setp.eq.s32 %p224, %r3013, 0;
mov.u32 %r3307, %r3292;
@%p224 bra $L__BB4_205;
shr.u32 %r2000, %r459, 1;
and.b32 %r2001, %r2000, 1;
sub.s32 %r3293, %r458, %r2001;
setp.eq.s32 %p225, %r3293, 0;
mov.u32 %r3307, %r3292;
@%p225 bra $L__BB4_205;
mov.u32 %r2002, -1;
shl.b32 %r2003, %r2002, %r3293;
not.b32 %r2004, %r2003;
and.b32 %r3294, %r3202, %r2004;
$L__BB4_201:
setp.gt.u32 %p226, %r3654, 17476;
mov.u32 %r3307, 1;
@%p226 bra $L__BB4_205;
sub.s32 %r2006, %r3653, %r3655;
min.u32 %r2007, %r2006, %r3293;
setp.eq.s32 %p227, %r2007, 32;
mov.u32 %r2008, -1;
shl.b32 %r2009, %r2008, %r2007;
not.b32 %r2010, %r2009;
selp.b32 %r2011, -1, %r2010, %p227;
and.b32 %r2012, %r2011, %r3294;
shl.b32 %r2013, %r2012, %r3655;
or.b32 %r3656, %r2013, %r3656;
add.s32 %r3655, %r2007, %r3655;
shr.u32 %r3294, %r3294, %r2007;
sub.s32 %r3293, %r3293, %r2007;
setp.lt.u32 %p228, %r3655, %r3653;
@%p228 bra $L__BB4_204;
cvt.u64.u32 %rd120, %r3654;
add.s64 %rd121, %rd120, %rd5;
add.s64 %rd122, %rd1, %rd121;
st.global.u8 [%rd122], %r3656;
add.s32 %r3654, %r3654, 1;
setp.eq.s32 %p229, %r3656, 255;
selp.b32 %r3653, 7, 8, %p229;
mov.u32 %r3655, 0;
mov.u32 %r3656, %r3655;
$L__BB4_204:
setp.ne.s32 %p230, %r3293, 0;
mov.u32 %r3307, %r3292;
@%p230 bra $L__BB4_201;
$L__BB4_205:
and.b32 %r2016, %r3204, 4;
setp.eq.s32 %p231, %r2016, 0;
mov.u32 %r3322, %r3307;
@%p231 bra $L__BB4_212;
shr.u32 %r2017, %r459, 2;
and.b32 %r2018, %r2017, 1;
sub.s32 %r3308, %r458, %r2018;
setp.eq.s32 %p232, %r3308, 0;
mov.u32 %r3322, %r3307;
@%p232 bra $L__BB4_212;
mov.u32 %r2019, -1;
shl.b32 %r2020, %r2019, %r3308;
not.b32 %r2021, %r2020;
and.b32 %r3309, %r3207, %r2021;
$L__BB4_208:
setp.gt.u32 %p233, %r3654, 17476;
mov.u32 %r3322, 1;
@%p233 bra $L__BB4_212;
sub.s32 %r2023, %r3653, %r3655;
min.u32 %r2024, %r2023, %r3308;
setp.eq.s32 %p234, %r2024, 32;
mov.u32 %r2025, -1;
shl.b32 %r2026, %r2025, %r2024;
not.b32 %r2027, %r2026;
selp.b32 %r2028, -1, %r2027, %p234;
and.b32 %r2029, %r2028, %r3309;
shl.b32 %r2030, %r2029, %r3655;
or.b32 %r3656, %r2030, %r3656;
add.s32 %r3655, %r2024, %r3655;
shr.u32 %r3309, %r3309, %r2024;
sub.s32 %r3308, %r3308, %r2024;
setp.lt.u32 %p235, %r3655, %r3653;
@%p235 bra $L__BB4_211;
cvt.u64.u32 %rd123, %r3654;
add.s64 %rd124, %rd123, %rd5;
add.s64 %rd125, %rd1, %rd124;
st.global.u8 [%rd125], %r3656;
add.s32 %r3654, %r3654, 1;
setp.eq.s32 %p236, %r3656, 255;
selp.b32 %r3653, 7, 8, %p236;
mov.u32 %r3655, 0;
mov.u32 %r3656, %r3655;
$L__BB4_211:
setp.ne.s32 %p237, %r3308, 0;
mov.u32 %r3322, %r3307;
@%p237 bra $L__BB4_208;
$L__BB4_212:
and.b32 %r3014, %r3204, 8;
setp.eq.s32 %p238, %r3014, 0;
mov.u32 %r3829, %r3322;
@%p238 bra $L__BB4_219;
shr.u32 %r2033, %r459, 3;
sub.s32 %r3323, %r458, %r2033;
setp.eq.s32 %p239, %r3323, 0;
mov.u32 %r3829, %r3322;
@%p239 bra $L__BB4_219;
mov.u32 %r2034, -1;
shl.b32 %r2035, %r2034, %r3323;
not.b32 %r2036, %r2035;
and.b32 %r3324, %r3212, %r2036;
$L__BB4_215:
setp.gt.u32 %p240, %r3654, 17476;
mov.u32 %r3829, 1;
@%p240 bra $L__BB4_219;
sub.s32 %r2038, %r3653, %r3655;
min.u32 %r2039, %r2038, %r3323;
setp.eq.s32 %p241, %r2039, 32;
mov.u32 %r2040, -1;
shl.b32 %r2041, %r2040, %r2039;
not.b32 %r2042, %r2041;
selp.b32 %r2043, -1, %r2042, %p241;
and.b32 %r2044, %r2043, %r3324;
shl.b32 %r2045, %r2044, %r3655;
or.b32 %r3656, %r2045, %r3656;
add.s32 %r3655, %r2039, %r3655;
shr.u32 %r3324, %r3324, %r2039;
sub.s32 %r3323, %r3323, %r2039;
setp.lt.u32 %p242, %r3655, %r3653;
@%p242 bra $L__BB4_218;
cvt.u64.u32 %rd126, %r3654;
add.s64 %rd127, %rd126, %rd5;
add.s64 %rd128, %rd1, %rd127;
st.global.u8 [%rd128], %r3656;
add.s32 %r3654, %r3654, 1;
setp.eq.s32 %p243, %r3656, 255;
selp.b32 %r3653, 7, 8, %p243;
mov.u32 %r3655, 0;
mov.u32 %r3656, %r3655;
$L__BB4_218:
setp.ne.s32 %p244, %r3323, 0;
mov.u32 %r3829, %r3322;
@%p244 bra $L__BB4_215;
$L__BB4_219:
setp.lt.s32 %p245, %r343, 1;
setp.lt.s32 %p246, %r86, 1;
or.pred %p247, %p246, %p245;
@%p247 bra $L__BB4_267;
min.s32 %r2048, %r86, %r343;
setp.lt.s32 %p248, %r2048, 3;
add.s32 %r2049, %r3111, 17477;
cvt.u64.u32 %rd129, %r2049;
add.s64 %rd130, %rd129, %rd5;
add.s64 %rd11, %rd1, %rd130;
@%p248 bra $L__BB4_259;
bra.uni $L__BB4_221;
$L__BB4_259:
add.s32 %r3274, %r3274, 1;
setp.lt.u32 %p295, %r3274, %r3276;
@%p295 bra $L__BB4_267;
shl.b16 %rs301, %rs474, 1;
or.b16 %rs474, %rs301, 1;
add.s32 %r3120, %r3120, -1;
setp.ne.s32 %p296, %r3120, 0;
mov.u32 %r3380, %r3277;
@%p296 bra $L__BB4_263;
bra.uni $L__BB4_261;
$L__BB4_263:
add.s32 %r2137, %r3275, 1;
min.u32 %r3275, %r2137, 12;
setp.lt.u32 %p299, %r3275, 3;
mov.u32 %r3274, 0;
mov.u32 %r3381, %r3274;
@%p299 bra $L__BB4_266;
setp.lt.u32 %p300, %r3275, 6;
mov.u32 %r3381, 1;
@%p300 bra $L__BB4_266;
setp.lt.u32 %p301, %r3275, 9;
setp.eq.s32 %p302, %r3275, 11;
selp.b32 %r2139, 4, 5, %p302;
setp.lt.u32 %p303, %r3275, 11;
selp.b32 %r2140, 3, %r2139, %p303;
selp.b32 %r3381, 2, %r2140, %p301;
$L__BB4_266:
mov.u32 %r2142, 1;
shl.b32 %r3276, %r2142, %r3381;
mov.u32 %r3277, %r3380;
bra.uni $L__BB4_267;
$L__BB4_221:
shl.b16 %rs474, %rs474, 1;
add.s32 %r3120, %r3120, -1;
setp.ne.s32 %p249, %r3120, 0;
mov.u32 %r3373, %r3277;
@%p249 bra $L__BB4_224;
setp.gt.u32 %p250, %r3111, 191;
mov.u32 %r3373, 1;
mov.u32 %r3120, 0;
@%p250 bra $L__BB4_224;
st.global.u8 [%rd11], %rs474;
add.s32 %r3111, %r3111, 1;
mov.u16 %rs474, 0;
mov.u32 %r3120, 8;
mov.u32 %r3373, %r3277;
$L__BB4_224:
setp.lt.u32 %p251, %r3275, 3;
mov.u32 %r3341, 0;
@%p251 bra $L__BB4_227;
setp.lt.u32 %p252, %r3275, 6;
mov.u32 %r3341, 1;
@%p252 bra $L__BB4_227;
setp.lt.u32 %p253, %r3275, 9;
setp.eq.s32 %p254, %r3275, 11;
selp.b32 %r2055, 4, 5, %p254;
setp.lt.u32 %p255, %r3275, 11;
selp.b32 %r2056, 3, %r2055, %p255;
selp.b32 %r3341, 2, %r2056, %p253;
$L__BB4_227:
setp.eq.s32 %p256, %r3341, 0;
@%p256 bra $L__BB4_255;
and.b32 %r561, %r3341, 3;
setp.eq.s32 %p257, %r561, 0;
mov.u32 %r3351, %r3341;
mov.u32 %r3352, %r3373;
@%p257 bra $L__BB4_240;
add.s32 %r3026, %r3341, -1;
mov.u32 %r2058, 1;
shl.b32 %r2059, %r2058, %r3026;
and.b32 %r2060, %r2059, %r3274;
setp.ne.s32 %p258, %r2060, 0;
selp.u32 %r2061, 1, 0, %p258;
cvt.u32.u16 %r2062, %rs474;
bfi.b32 %r2063, %r2062, %r2061, 1, 8;
cvt.u16.u32 %rs474, %r2063;
add.s32 %r3120, %r3120, -1;
setp.ne.s32 %p259, %r3120, 0;
mov.u32 %r3352, %r3373;
@%p259 bra $L__BB4_232;
setp.gt.u32 %p260, %r3111, 191;
mov.u32 %r3120, 0;
mov.u32 %r3352, %r2058;
@%p260 bra $L__BB4_232;
add.s32 %r2067, %r3111, 17477;
cvt.u64.u32 %rd131, %r2067;
add.s64 %rd132, %rd131, %rd5;
add.s64 %rd133, %rd1, %rd132;
st.global.u8 [%rd133], %rs474;
add.s32 %r3111, %r3111, 1;
mov.u16 %rs474, 0;
mov.u32 %r3120, 8;
mov.u32 %r3352, %r3373;
$L__BB4_232:
and.b32 %r3028, %r3341, 3;
add.s32 %r3351, %r3341, -1;
setp.eq.s32 %p261, %r3028, 1;
mov.u32 %r3373, %r3352;
@%p261 bra $L__BB4_240;
add.s32 %r3351, %r3341, -2;
mov.u32 %r2068, 1;
shl.b32 %r2069, %r2068, %r3351;
and.b32 %r2070, %r2069, %r3274;
setp.ne.s32 %p262, %r2070, 0;
selp.u32 %r2071, 1, 0, %p262;
cvt.u32.u16 %r2072, %rs474;
bfi.b32 %r2073, %r2072, %r2071, 1, 8;
cvt.u16.u32 %rs474, %r2073;
add.s32 %r3120, %r3120, -1;
setp.ne.s32 %p263, %r3120, 0;
mov.u32 %r3347, %r3352;
@%p263 bra $L__BB4_236;
setp.gt.u32 %p264, %r3111, 191;
mov.u32 %r3120, 0;
mov.u32 %r3347, %r2068;
@%p264 bra $L__BB4_236;
add.s32 %r2076, %r3111, 17477;
cvt.u64.u32 %rd134, %r2076;
add.s64 %rd135, %rd134, %rd5;
add.s64 %rd136, %rd1, %rd135;
and.b16 %rs289, %rs474, 255;
st.global.u8 [%rd136], %rs474;
add.s32 %r3111, %r3111, 1;
setp.eq.s16 %p265, %rs289, 255;
selp.b32 %r3120, 7, 8, %p265;
mov.u16 %rs474, 0;
mov.u32 %r3347, %r3352;
$L__BB4_236:
and.b32 %r3029, %r3341, 3;
setp.eq.s32 %p266, %r3029, 2;
mov.u32 %r3373, %r3347;
mov.u32 %r3352, %r3347;
@%p266 bra $L__BB4_240;
add.s32 %r3351, %r3341, -3;
mov.u32 %r2077, 1;
shl.b32 %r2078, %r2077, %r3351;
and.b32 %r2079, %r2078, %r3274;
setp.ne.s32 %p267, %r2079, 0;
selp.u32 %r2080, 1, 0, %p267;
cvt.u32.u16 %r2081, %rs474;
bfi.b32 %r2082, %r2081, %r2080, 1, 8;
cvt.u16.u32 %rs474, %r2082;
add.s32 %r3120, %r3120, -1;
setp.ne.s32 %p268, %r3120, 0;
mov.u32 %r3373, %r3347;
mov.u32 %r3352, %r3347;
@%p268 bra $L__BB4_240;
add.s32 %r3351, %r3341, -3;
setp.gt.u32 %p269, %r3111, 191;
mov.u32 %r3120, 0;
mov.u32 %r3373, %r2077;
mov.u32 %r3352, %r2077;
@%p269 bra $L__BB4_240;
add.s32 %r3351, %r3341, -3;
add.s32 %r2087, %r3111, 17477;
cvt.u64.u32 %rd137, %r2087;
add.s64 %rd138, %rd137, %rd5;
add.s64 %rd139, %rd1, %rd138;
and.b16 %rs292, %rs474, 255;
st.global.u8 [%rd139], %rs474;
add.s32 %r3111, %r3111, 1;
setp.eq.s16 %p270, %rs292, 255;
selp.b32 %r3120, 7, 8, %p270;
mov.u16 %rs474, 0;
mov.u32 %r3373, %r3347;
mov.u32 %r3352, %r3347;
$L__BB4_240:
add.s32 %r3030, %r3341, -1;
setp.lt.u32 %p271, %r3030, 3;
@%p271 bra $L__BB4_255;
mov.u32 %r3373, %r3352;
$L__BB4_242:
add.s32 %r2088, %r3351, -1;
mov.u32 %r2089, 1;
shl.b32 %r2090, %r2089, %r2088;
and.b32 %r2091, %r2090, %r3274;
setp.ne.s32 %p272, %r2091, 0;
selp.u32 %r2092, 1, 0, %p272;
cvt.u32.u16 %r2093, %rs474;
bfi.b32 %r3361, %r2093, %r2092, 1, 8;
add.s32 %r3360, %r3120, -1;
setp.ne.s32 %p273, %r3360, 0;
mov.u32 %r3362, %r3373;
@%p273 bra $L__BB4_245;
setp.gt.u32 %p274, %r3111, 191;
mov.u32 %r3360, 0;
mov.u32 %r3362, %r2089;
@%p274 bra $L__BB4_245;
cvt.u16.u32 %rs293, %r3361;
and.b16 %rs294, %rs293, 255;
add.s32 %r2097, %r3111, 17477;
cvt.u64.u32 %rd140, %r2097;
add.s64 %rd141, %rd140, %rd5;
add.s64 %rd142, %rd1, %rd141;
st.global.u8 [%rd142], %rs293;
add.s32 %r3111, %r3111, 1;
setp.eq.s16 %p275, %rs294, 255;
selp.b32 %r3360, 7, 8, %p275;
mov.u32 %r3361, 0;
mov.u32 %r3362, %r3373;
$L__BB4_245:
add.s32 %r2098, %r3351, -2;
shl.b32 %r2100, %r2089, %r2098;
and.b32 %r2101, %r2100, %r3274;
setp.ne.s32 %p276, %r2101, 0;
and.b32 %r2102, %r3361, 127;
selp.u32 %r2103, 1, 0, %p276;
bfi.b32 %r3365, %r2102, %r2103, 1, 7;
add.s32 %r3364, %r3360, -1;
setp.ne.s32 %p277, %r3364, 0;
mov.u32 %r3366, %r3362;
@%p277 bra $L__BB4_248;
setp.gt.u32 %p278, %r3111, 191;
mov.u32 %r3366, 1;
mov.u32 %r3364, 0;
@%p278 bra $L__BB4_248;
cvt.u16.u32 %rs295, %r3365;
and.b16 %rs296, %rs295, 255;
add.s32 %r2107, %r3111, 17477;
cvt.u64.u32 %rd143, %r2107;
add.s64 %rd144, %rd143, %rd5;
add.s64 %rd145, %rd1, %rd144;
st.global.u8 [%rd145], %rs295;
add.s32 %r3111, %r3111, 1;
setp.eq.s16 %p279, %rs296, 255;
selp.b32 %r3364, 7, 8, %p279;
mov.u32 %r3365, 0;
mov.u32 %r3366, %r3362;
$L__BB4_248:
add.s32 %r2108, %r3351, -3;
mov.u32 %r2109, 1;
shl.b32 %r2110, %r2109, %r2108;
and.b32 %r2111, %r2110, %r3274;
setp.ne.s32 %p280, %r2111, 0;
and.b32 %r2112, %r3365, 127;
selp.u32 %r2113, 1, 0, %p280;
bfi.b32 %r3369, %r2112, %r2113, 1, 7;
add.s32 %r3368, %r3364, -1;
setp.ne.s32 %p281, %r3368, 0;
mov.u32 %r3370, %r3366;
@%p281 bra $L__BB4_251;
setp.gt.u32 %p282, %r3111, 191;
mov.u32 %r3368, 0;
mov.u32 %r3370, %r2109;
@%p282 bra $L__BB4_251;
cvt.u16.u32 %rs297, %r3369;
and.b16 %rs298, %rs297, 255;
add.s32 %r2117, %r3111, 17477;
cvt.u64.u32 %rd146, %r2117;
add.s64 %rd147, %rd146, %rd5;
add.s64 %rd148, %rd1, %rd147;
st.global.u8 [%rd148], %rs297;
add.s32 %r3111, %r3111, 1;
setp.eq.s16 %p283, %rs298, 255;
selp.b32 %r3368, 7, 8, %p283;
mov.u32 %r3369, 0;
mov.u32 %r3370, %r3366;
$L__BB4_251:
add.s32 %r3351, %r3351, -4;
shl.b32 %r2119, %r2109, %r3351;
and.b32 %r2120, %r2119, %r3274;
setp.ne.s32 %p284, %r2120, 0;
and.b32 %r2121, %r3369, 127;
selp.u32 %r2122, 1, 0, %p284;
bfi.b32 %r2123, %r2121, %r2122, 1, 15;
cvt.u16.u32 %rs474, %r2123;
add.s32 %r3120, %r3368, -1;
setp.ne.s32 %p285, %r3120, 0;
mov.u32 %r3373, %r3370;
@%p285 bra $L__BB4_254;
setp.gt.u32 %p286, %r3111, 191;
mov.u32 %r3373, 1;
mov.u32 %r3120, 0;
@%p286 bra $L__BB4_254;
add.s32 %r2126, %r3111, 17477;
cvt.u64.u32 %rd149, %r2126;
add.s64 %rd150, %rd149, %rd5;
add.s64 %rd151, %rd1, %rd150;
and.b16 %rs300, %rs474, 255;
st.global.u8 [%rd151], %rs474;
add.s32 %r3111, %r3111, 1;
setp.eq.s16 %p287, %rs300, 255;
selp.b32 %r3120, 7, 8, %p287;
mov.u16 %rs474, 0;
mov.u32 %r3373, %r3370;
$L__BB4_254:
setp.ne.s32 %p288, %r3351, 0;
@%p288 bra $L__BB4_242;
$L__BB4_255:
add.s32 %r2128, %r3275, -1;
setp.eq.s32 %p289, %r3275, 0;
mov.u32 %r3274, 0;
selp.b32 %r3275, 0, %r2128, %p289;
setp.lt.u32 %p290, %r3275, 3;
mov.u32 %r3377, %r3274;
@%p290 bra $L__BB4_258;
setp.lt.u32 %p291, %r3275, 6;
mov.u32 %r3377, 1;
@%p291 bra $L__BB4_258;
setp.lt.u32 %p292, %r3275, 9;
setp.eq.s32 %p293, %r3275, 11;
selp.b32 %r2130, 4, 5, %p293;
setp.lt.u32 %p294, %r3275, 11;
selp.b32 %r2131, 3, %r2130, %p294;
selp.b32 %r3377, 2, %r2131, %p292;
$L__BB4_258:
mov.u32 %r2133, 1;
shl.b32 %r3276, %r2133, %r3377;
mov.u32 %r3277, %r3373;
$L__BB4_267:
setp.gt.s32 %p304, %r343, 2;
setp.gt.s32 %p305, %r86, 2;
and.pred %p306, %p305, %p304;
@%p306 bra $L__BB4_317;
bra.uni $L__BB4_268;
$L__BB4_317:
mul.lo.s32 %r2263, %r86, 6;
add.s32 %r2264, %r2263, -11;
cvt.u64.u32 %rd185, %r2264;
cvta.to.global.u64 %rd186, %rd31;
add.s64 %rd14, %rd186, %rd185;
ld.global.u8 %rs99, [%rd14];
add.s32 %r2265, %r2263, -10;
cvt.u64.u32 %rd187, %r2265;
add.s64 %rd188, %rd186, %rd187;
ld.global.u8 %rs100, [%rd188];
ld.global.u8 %rs101, [%rd188+1];
mul.lo.s32 %r2266, %r343, 6;
add.s32 %r2267, %r2266, -12;
cvt.u64.u32 %rd189, %r2267;
add.s64 %rd190, %rd186, %rd189;
ld.global.u8 %rs102, [%rd190];
ld.global.u8 %rs103, [%rd190+1];
add.s32 %r2268, %r2266, -10;
cvt.u64.u32 %rd191, %r2268;
add.s64 %rd192, %rd186, %rd191;
ld.global.u8 %rs104, [%rd192];
ld.global.u8 %rs105, [%rd192+1];
setp.eq.s16 %p374, %rs99, 0;
mov.u32 %r3475, %r3227;
@%p374 bra $L__BB4_324;
ld.global.u8 %r3465, [%rd14+-1];
cvt.u32.u16 %r3464, %rs99;
$L__BB4_319:
mov.u16 %rs106, %rs532;
mov.u32 %r771, %r3464;
setp.gt.u32 %p375, %r3508, 2879;
mov.u32 %r3475, 1;
@%p375 bra $L__BB4_324;
mov.u32 %r2270, 8;
sub.s32 %r2271, %r2270, %r3510;
sub.s32 %r2272, %r2271, %r3509;
min.u32 %r2273, %r2272, %r771;
setp.eq.s32 %p376, %r2273, 32;
mov.u32 %r2274, -1;
shl.b32 %r2275, %r2274, %r2273;
not.b32 %r2276, %r2275;
selp.b32 %r2277, -1, %r2276, %p376;
and.b32 %r2278, %r2277, %r3465;
shl.b32 %r2279, %r2278, %r3509;
cvt.u16.u32 %rs336, %r2279;
or.b16 %rs532, %rs106, %rs336;
add.s32 %r3509, %r2273, %r3509;
sub.s32 %r3464, %r771, %r2273;
shr.u32 %r3465, %r3465, %r2273;
setp.gt.u32 %p377, %r2272, %r771;
@%p377 bra $L__BB4_323;
setp.ne.s32 %p378, %r3510, 0;
mov.u32 %r3510, 0;
and.b16 %rs337, %rs532, 255;
setp.ne.s16 %p379, %rs337, 127;
and.pred %p380, %p378, %p379;
@%p380 bra $L__BB4_323;
cvt.u16.u32 %rs461, %r2279;
or.b16 %rs460, %rs106, %rs461;
mov.u32 %r2282, 20548;
sub.s32 %r2283, %r2282, %r3508;
cvt.u64.u32 %rd193, %r2283;
add.s64 %rd194, %rd193, %rd5;
add.s64 %rd195, %rd1, %rd194;
st.global.u8 [%rd195], %rs460;
add.s32 %r3508, %r3508, 1;
setp.gt.u16 %p381, %rs337, 143;
selp.u32 %r3510, 1, 0, %p381;
mov.u16 %rs532, 0;
mov.u32 %r3509, 0;
$L__BB4_323:
setp.ne.s32 %p382, %r3464, 0;
mov.u32 %r3475, %r3227;
@%p382 bra $L__BB4_319;
$L__BB4_324:
setp.eq.s16 %p383, %rs103, 0;
mov.u32 %r3487, %r3475;
@%p383 bra $L__BB4_331;
cvt.u32.u16 %r2284, %rs102;
and.b32 %r3477, %r2284, 255;
cvt.u32.u16 %r2285, %rs103;
and.b32 %r3476, %r2285, 255;
$L__BB4_326:
mov.u16 %rs110, %rs532;
mov.u32 %r790, %r3476;
setp.gt.u32 %p384, %r3508, 2879;
mov.u32 %r3487, 1;
@%p384 bra $L__BB4_331;
mov.u32 %r2287, 8;
sub.s32 %r2288, %r2287, %r3510;
sub.s32 %r2289, %r2288, %r3509;
min.u32 %r2290, %r2289, %r790;
setp.eq.s32 %p385, %r2290, 32;
mov.u32 %r2291, -1;
shl.b32 %r2292, %r2291, %r2290;
not.b32 %r2293, %r2292;
selp.b32 %r2294, -1, %r2293, %p385;
and.b32 %r2295, %r2294, %r3477;
shl.b32 %r2296, %r2295, %r3509;
cvt.u16.u32 %rs341, %r2296;
or.b16 %rs532, %rs110, %rs341;
add.s32 %r3509, %r2290, %r3509;
sub.s32 %r3476, %r790, %r2290;
shr.u32 %r3477, %r3477, %r2290;
setp.gt.u32 %p386, %r2289, %r790;
@%p386 bra $L__BB4_330;
setp.ne.s32 %p387, %r3510, 0;
mov.u32 %r3510, 0;
and.b16 %rs342, %rs532, 255;
setp.ne.s16 %p388, %rs342, 127;
and.pred %p389, %p387, %p388;
@%p389 bra $L__BB4_330;
cvt.u16.u32 %rs463, %r2296;
or.b16 %rs462, %rs110, %rs463;
mov.u32 %r2299, 20548;
sub.s32 %r2300, %r2299, %r3508;
cvt.u64.u32 %rd196, %r2300;
add.s64 %rd197, %rd196, %rd5;
add.s64 %rd198, %rd1, %rd197;
st.global.u8 [%rd198], %rs462;
add.s32 %r3508, %r3508, 1;
setp.gt.u16 %p390, %rs342, 143;
selp.u32 %r3510, 1, 0, %p390;
mov.u16 %rs532, 0;
mov.u32 %r3509, 0;
$L__BB4_330:
setp.ne.s32 %p391, %r3476, 0;
mov.u32 %r3487, %r3475;
@%p391 bra $L__BB4_326;
$L__BB4_331:
setp.eq.s16 %p392, %rs101, 0;
mov.u32 %r3499, %r3487;
@%p392 bra $L__BB4_338;
cvt.u32.u16 %r2301, %rs101;
and.b32 %r3488, %r2301, 255;
cvt.u32.u16 %r2302, %rs100;
and.b32 %r3489, %r2302, 255;
$L__BB4_333:
mov.u32 %r809, %r3488;
setp.gt.u32 %p393, %r3508, 2879;
mov.u32 %r3499, 1;
@%p393 bra $L__BB4_338;
mov.u32 %r2304, 8;
sub.s32 %r2305, %r2304, %r3510;
sub.s32 %r2306, %r2305, %r3509;
min.u32 %r2307, %r2306, %r809;
setp.eq.s32 %p394, %r2307, 32;
mov.u32 %r2308, -1;
shl.b32 %r2309, %r2308, %r2307;
not.b32 %r2310, %r2309;
selp.b32 %r2311, -1, %r2310, %p394;
and.b32 %r2312, %r2311, %r3489;
shl.b32 %r2313, %r2312, %r3509;
cvt.u16.u32 %rs346, %r2313;
or.b16 %rs532, %rs532, %rs346;
add.s32 %r3509, %r2307, %r3509;
sub.s32 %r3488, %r809, %r2307;
shr.u32 %r3489, %r3489, %r2307;
setp.gt.u32 %p395, %r2306, %r809;
@%p395 bra $L__BB4_337;
setp.ne.s32 %p396, %r3510, 0;
mov.u32 %r3510, 0;
and.b16 %rs347, %rs532, 255;
setp.ne.s16 %p397, %rs347, 127;
and.pred %p398, %p396, %p397;
@%p398 bra $L__BB4_337;
mov.u32 %r2316, 20548;
sub.s32 %r2317, %r2316, %r3508;
cvt.u64.u32 %rd199, %r2317;
add.s64 %rd200, %rd199, %rd5;
add.s64 %rd201, %rd1, %rd200;
st.global.u8 [%rd201], %rs532;
add.s32 %r3508, %r3508, 1;
setp.gt.u16 %p399, %rs347, 143;
selp.u32 %r3510, 1, 0, %p399;
mov.u16 %rs532, 0;
mov.u32 %r3509, 0;
$L__BB4_337:
setp.ne.s32 %p400, %r3488, 0;
mov.u32 %r3499, %r3487;
@%p400 bra $L__BB4_333;
$L__BB4_338:
setp.eq.s16 %p401, %rs105, 0;
mov.u32 %r3511, %r3499;
@%p401 bra $L__BB4_345;
cvt.u32.u16 %r2318, %rs104;
and.b32 %r3501, %r2318, 255;
cvt.u32.u16 %r2319, %rs105;
and.b32 %r3500, %r2319, 255;
$L__BB4_340:
mov.u32 %r828, %r3500;
setp.gt.u32 %p402, %r3508, 2879;
mov.u32 %r3511, 1;
@%p402 bra $L__BB4_345;
mov.u32 %r2321, 8;
sub.s32 %r2322, %r2321, %r3510;
sub.s32 %r2323, %r2322, %r3509;
min.u32 %r2324, %r2323, %r828;
setp.eq.s32 %p403, %r2324, 32;
mov.u32 %r2325, -1;
shl.b32 %r2326, %r2325, %r2324;
not.b32 %r2327, %r2326;
selp.b32 %r2328, -1, %r2327, %p403;
and.b32 %r2329, %r2328, %r3501;
shl.b32 %r2330, %r2329, %r3509;
cvt.u16.u32 %rs351, %r2330;
or.b16 %rs532, %rs532, %rs351;
add.s32 %r3509, %r2324, %r3509;
sub.s32 %r3500, %r828, %r2324;
shr.u32 %r3501, %r3501, %r2324;
setp.gt.u32 %p404, %r2323, %r828;
@%p404 bra $L__BB4_344;
setp.ne.s32 %p405, %r3510, 0;
mov.u32 %r3510, 0;
and.b16 %rs352, %rs532, 255;
setp.ne.s16 %p406, %rs352, 127;
and.pred %p407, %p405, %p406;
@%p407 bra $L__BB4_344;
mov.u32 %r2333, 20548;
sub.s32 %r2334, %r2333, %r3508;
cvt.u64.u32 %rd202, %r2334;
add.s64 %rd203, %rd202, %rd5;
add.s64 %rd204, %rd1, %rd203;
st.global.u8 [%rd204], %rs532;
add.s32 %r3508, %r3508, 1;
setp.gt.u16 %p408, %rs352, 143;
selp.u32 %r3510, 1, 0, %p408;
mov.u16 %rs532, 0;
mov.u32 %r3509, 0;
$L__BB4_344:
setp.ne.s32 %p409, %r3500, 0;
mov.u32 %r3511, %r3499;
@%p409 bra $L__BB4_340;
bra.uni $L__BB4_345;
$L__BB4_268:
setp.gt.s32 %p307, %r343, 0;
and.pred %p309, %p305, %p307;
mul.lo.s32 %r644, %r86, 6;
@%p309 bra $L__BB4_297;
bra.uni $L__BB4_269;
$L__BB4_297:
cvt.u64.u32 %rd172, %r644;
cvta.to.global.u64 %rd173, %rd31;
add.s64 %rd13, %rd173, %rd172;
ld.global.u8 %rs85, [%rd13+1];
add.s32 %r2214, %r644, 2;
cvt.u64.u32 %rd174, %r2214;
add.s64 %rd175, %rd173, %rd174;
ld.global.u8 %rs86, [%rd175];
ld.global.u8 %rs87, [%rd175+1];
setp.eq.s16 %p348, %rs85, 0;
mov.u32 %r3443, %r3227;
@%p348 bra $L__BB4_304;
ld.global.u8 %r3433, [%rd13];
cvt.u32.u16 %r3432, %rs85;
$L__BB4_299:
mov.u16 %rs88, %rs532;
mov.u32 %r719, %r3432;
setp.gt.u32 %p349, %r3508, 2879;
mov.u32 %r3443, 1;
@%p349 bra $L__BB4_304;
mov.u32 %r2216, 8;
sub.s32 %r2217, %r2216, %r3510;
sub.s32 %r2218, %r2217, %r3509;
min.u32 %r2219, %r2218, %r719;
setp.eq.s32 %p350, %r2219, 32;
mov.u32 %r2220, -1;
shl.b32 %r2221, %r2220, %r2219;
not.b32 %r2222, %r2221;
selp.b32 %r2223, -1, %r2222, %p350;
and.b32 %r2224, %r2223, %r3433;
shl.b32 %r2225, %r2224, %r3509;
cvt.u16.u32 %rs323, %r2225;
or.b16 %rs532, %rs88, %rs323;
add.s32 %r3509, %r2219, %r3509;
sub.s32 %r3432, %r719, %r2219;
shr.u32 %r3433, %r3433, %r2219;
setp.gt.u32 %p351, %r2218, %r719;
@%p351 bra $L__BB4_303;
setp.ne.s32 %p352, %r3510, 0;
mov.u32 %r3510, 0;
and.b16 %rs324, %rs532, 255;
setp.ne.s16 %p353, %rs324, 127;
and.pred %p354, %p352, %p353;
@%p354 bra $L__BB4_303;
cvt.u16.u32 %rs459, %r2225;
or.b16 %rs458, %rs88, %rs459;
mov.u32 %r2228, 20548;
sub.s32 %r2229, %r2228, %r3508;
cvt.u64.u32 %rd176, %r2229;
add.s64 %rd177, %rd176, %rd5;
add.s64 %rd178, %rd1, %rd177;
st.global.u8 [%rd178], %rs458;
add.s32 %r3508, %r3508, 1;
setp.gt.u16 %p355, %rs324, 143;
selp.u32 %r3510, 1, 0, %p355;
mov.u16 %rs532, 0;
mov.u32 %r3509, 0;
$L__BB4_303:
setp.ne.s32 %p356, %r3432, 0;
mov.u32 %r3443, %r3227;
@%p356 bra $L__BB4_299;
$L__BB4_304:
add.s32 %r3445, %r343, -1;
cvt.u32.u16 %r2232, %rs86;
and.b32 %r3457, %r2232, 255;
mov.u32 %r3444, 1;
$L__BB4_305:
mov.u32 %r739, %r3444;
mov.u32 %r3455, 1;
setp.gt.u32 %p357, %r3508, 2879;
@%p357 bra $L__BB4_310;
mov.u32 %r2234, 8;
sub.s32 %r2235, %r2234, %r3510;
sub.s32 %r2236, %r2235, %r3509;
min.u32 %r2237, %r2236, %r739;
setp.eq.s32 %p358, %r2237, 32;
mov.u32 %r2238, -1;
shl.b32 %r2239, %r2238, %r2237;
not.b32 %r2240, %r2239;
selp.b32 %r2241, -1, %r2240, %p358;
and.b32 %r2242, %r2241, %r3445;
shl.b32 %r2243, %r2242, %r3509;
cvt.u16.u32 %rs327, %r2243;
or.b16 %rs532, %rs532, %rs327;
add.s32 %r3509, %r2237, %r3509;
sub.s32 %r3444, %r739, %r2237;
shr.u32 %r3445, %r3445, %r2237;
setp.gt.u32 %p359, %r2236, %r739;
@%p359 bra $L__BB4_309;
setp.ne.s32 %p360, %r3510, 0;
mov.u32 %r3510, 0;
and.b16 %rs328, %rs532, 255;
setp.ne.s16 %p361, %rs328, 127;
and.pred %p362, %p360, %p361;
@%p362 bra $L__BB4_309;
mov.u32 %r2246, 20548;
sub.s32 %r2247, %r2246, %r3508;
cvt.u64.u32 %rd179, %r2247;
add.s64 %rd180, %rd179, %rd5;
add.s64 %rd181, %rd1, %rd180;
st.global.u8 [%rd181], %rs532;
add.s32 %r3508, %r3508, 1;
setp.gt.u16 %p363, %rs328, 143;
selp.u32 %r3510, 1, 0, %p363;
mov.u16 %rs532, 0;
mov.u32 %r3509, 0;
$L__BB4_309:
setp.ne.s32 %p364, %r3444, 0;
mov.u32 %r3455, %r3443;
@%p364 bra $L__BB4_305;
$L__BB4_310:
setp.eq.s16 %p365, %rs87, 0;
mov.u32 %r3511, %r3455;
@%p365 bra $L__BB4_345;
cvt.u32.u16 %r3025, %rs87;
and.b32 %r3456, %r3025, 255;
$L__BB4_312:
mov.u32 %r756, %r3456;
setp.gt.u32 %p366, %r3508, 2879;
mov.u32 %r3511, 1;
@%p366 bra $L__BB4_345;
mov.u32 %r2249, 8;
sub.s32 %r2250, %r2249, %r3510;
sub.s32 %r2251, %r2250, %r3509;
min.u32 %r2252, %r2251, %r756;
setp.eq.s32 %p367, %r2252, 32;
mov.u32 %r2253, -1;
shl.b32 %r2254, %r2253, %r2252;
not.b32 %r2255, %r2254;
selp.b32 %r2256, -1, %r2255, %p367;
and.b32 %r2257, %r2256, %r3457;
shl.b32 %r2258, %r2257, %r3509;
cvt.u16.u32 %rs332, %r2258;
or.b16 %rs532, %rs532, %rs332;
add.s32 %r3509, %r2252, %r3509;
sub.s32 %r3456, %r756, %r2252;
shr.u32 %r3457, %r3457, %r2252;
setp.gt.u32 %p368, %r2251, %r756;
@%p368 bra $L__BB4_316;
setp.ne.s32 %p369, %r3510, 0;
mov.u32 %r3510, 0;
and.b16 %rs333, %rs532, 255;
setp.ne.s16 %p370, %rs333, 127;
and.pred %p371, %p369, %p370;
@%p371 bra $L__BB4_316;
mov.u32 %r2261, 20548;
sub.s32 %r2262, %r2261, %r3508;
cvt.u64.u32 %rd182, %r2262;
add.s64 %rd183, %rd182, %rd5;
add.s64 %rd184, %rd1, %rd183;
st.global.u8 [%rd184], %rs532;
add.s32 %r3508, %r3508, 1;
setp.gt.u16 %p372, %rs333, 143;
selp.u32 %r3510, 1, 0, %p372;
mov.u16 %rs532, 0;
mov.u32 %r3509, 0;
$L__BB4_316:
setp.eq.s32 %p373, %r3456, 0;
mov.u32 %r3511, %r3455;
@%p373 bra $L__BB4_345;
bra.uni $L__BB4_312;
$L__BB4_269:
setp.gt.s32 %p311, %r86, 0;
selp.b32 %r2143, %r644, 0, %p311;
cvt.u64.u32 %rd152, %r2143;
cvta.to.global.u64 %rd153, %rd31;
add.s64 %rd12, %rd153, %rd152;
ld.global.u8 %rs63, [%rd12+1];
add.s32 %r2144, %r2143, 2;
cvt.u64.u32 %rd154, %r2144;
add.s64 %rd155, %rd153, %rd154;
ld.global.u8 %rs64, [%rd155];
ld.global.u8 %rs65, [%rd155+1];
mul.lo.s32 %r2145, %r343, 6;
selp.b32 %r2146, %r2145, 0, %p307;
cvt.u64.u32 %rd156, %r2146;
add.s64 %rd157, %rd153, %rd156;
ld.global.u8 %rs66, [%rd157];
ld.global.u8 %rs67, [%rd157+1];
add.s32 %r2147, %r2146, 2;
cvt.u64.u32 %rd158, %r2147;
add.s64 %rd159, %rd153, %rd158;
ld.global.u8 %rs68, [%rd159];
ld.global.u8 %rs69, [%rd159+1];
setp.eq.s16 %p312, %rs63, 0;
mov.u32 %r3399, %r3227;
@%p312 bra $L__BB4_276;
ld.global.u8 %r3389, [%rd12];
cvt.u32.u16 %r3388, %rs63;
$L__BB4_271:
mov.u16 %rs70, %rs532;
mov.u32 %r647, %r3388;
setp.gt.u32 %p313, %r3508, 2879;
mov.u32 %r3399, 1;
@%p313 bra $L__BB4_276;
mov.u32 %r2149, 8;
sub.s32 %r2150, %r2149, %r3510;
sub.s32 %r2151, %r2150, %r3509;
min.u32 %r2152, %r2151, %r647;
setp.eq.s32 %p314, %r2152, 32;
mov.u32 %r2153, -1;
shl.b32 %r2154, %r2153, %r2152;
not.b32 %r2155, %r2154;
selp.b32 %r2156, -1, %r2155, %p314;
and.b32 %r2157, %r2156, %r3389;
shl.b32 %r2158, %r2157, %r3509;
cvt.u16.u32 %rs304, %r2158;
or.b16 %rs532, %rs70, %rs304;
add.s32 %r3509, %r2152, %r3509;
sub.s32 %r3388, %r647, %r2152;
shr.u32 %r3389, %r3389, %r2152;
setp.gt.u32 %p315, %r2151, %r647;
@%p315 bra $L__BB4_275;
setp.ne.s32 %p316, %r3510, 0;
mov.u32 %r3510, 0;
and.b16 %rs305, %rs532, 255;
setp.ne.s16 %p317, %rs305, 127;
and.pred %p318, %p316, %p317;
@%p318 bra $L__BB4_275;
cvt.u16.u32 %rs455, %r2158;
or.b16 %rs454, %rs70, %rs455;
mov.u32 %r2161, 20548;
sub.s32 %r2162, %r2161, %r3508;
cvt.u64.u32 %rd160, %r2162;
add.s64 %rd161, %rd160, %rd5;
add.s64 %rd162, %rd1, %rd161;
st.global.u8 [%rd162], %rs454;
add.s32 %r3508, %r3508, 1;
setp.gt.u16 %p319, %rs305, 143;
selp.u32 %r3510, 1, 0, %p319;
mov.u16 %rs532, 0;
mov.u32 %r3509, 0;
$L__BB4_275:
setp.ne.s32 %p320, %r3388, 0;
mov.u32 %r3399, %r3227;
@%p320 bra $L__BB4_271;
$L__BB4_276:
setp.eq.s16 %p321, %rs67, 0;
mov.u32 %r3411, %r3399;
@%p321 bra $L__BB4_283;
cvt.u32.u16 %r2163, %rs66;
and.b32 %r3401, %r2163, 255;
cvt.u32.u16 %r2164, %rs67;
and.b32 %r3400, %r2164, 255;
$L__BB4_278:
mov.u16 %rs74, %rs532;
mov.u32 %r666, %r3400;
setp.gt.u32 %p322, %r3508, 2879;
mov.u32 %r3411, 1;
@%p322 bra $L__BB4_283;
mov.u32 %r2166, 8;
sub.s32 %r2167, %r2166, %r3510;
sub.s32 %r2168, %r2167, %r3509;
min.u32 %r2169, %r2168, %r666;
setp.eq.s32 %p323, %r2169, 32;
mov.u32 %r2170, -1;
shl.b32 %r2171, %r2170, %r2169;
not.b32 %r2172, %r2171;
selp.b32 %r2173, -1, %r2172, %p323;
and.b32 %r2174, %r2173, %r3401;
shl.b32 %r2175, %r2174, %r3509;
cvt.u16.u32 %rs309, %r2175;
or.b16 %rs532, %rs74, %rs309;
add.s32 %r3509, %r2169, %r3509;
sub.s32 %r3400, %r666, %r2169;
shr.u32 %r3401, %r3401, %r2169;
setp.gt.u32 %p324, %r2168, %r666;
@%p324 bra $L__BB4_282;
setp.ne.s32 %p325, %r3510, 0;
mov.u32 %r3510, 0;
and.b16 %rs310, %rs532, 255;
setp.ne.s16 %p326, %rs310, 127;
and.pred %p327, %p325, %p326;
@%p327 bra $L__BB4_282;
cvt.u16.u32 %rs457, %r2175;
or.b16 %rs456, %rs74, %rs457;
mov.u32 %r2178, 20548;
sub.s32 %r2179, %r2178, %r3508;
cvt.u64.u32 %rd163, %r2179;
add.s64 %rd164, %rd163, %rd5;
add.s64 %rd165, %rd1, %rd164;
st.global.u8 [%rd165], %rs456;
add.s32 %r3508, %r3508, 1;
setp.gt.u16 %p328, %rs310, 143;
selp.u32 %r3510, 1, 0, %p328;
mov.u16 %rs532, 0;
mov.u32 %r3509, 0;
$L__BB4_282:
setp.ne.s32 %p329, %r3400, 0;
mov.u32 %r3411, %r3399;
@%p329 bra $L__BB4_278;
$L__BB4_283:
setp.eq.s16 %p330, %rs65, 0;
mov.u32 %r3423, %r3411;
@%p330 bra $L__BB4_290;
cvt.u32.u16 %r2180, %rs65;
and.b32 %r3412, %r2180, 255;
cvt.u32.u16 %r2181, %rs64;
and.b32 %r3413, %r2181, 255;
$L__BB4_285:
mov.u32 %r685, %r3412;
setp.gt.u32 %p331, %r3508, 2879;
mov.u32 %r3423, 1;
@%p331 bra $L__BB4_290;
mov.u32 %r2183, 8;
sub.s32 %r2184, %r2183, %r3510;
sub.s32 %r2185, %r2184, %r3509;
min.u32 %r2186, %r2185, %r685;
setp.eq.s32 %p332, %r2186, 32;
mov.u32 %r2187, -1;
shl.b32 %r2188, %r2187, %r2186;
not.b32 %r2189, %r2188;
selp.b32 %r2190, -1, %r2189, %p332;
and.b32 %r2191, %r2190, %r3413;
shl.b32 %r2192, %r2191, %r3509;
cvt.u16.u32 %rs314, %r2192;
or.b16 %rs532, %rs532, %rs314;
add.s32 %r3509, %r2186, %r3509;
sub.s32 %r3412, %r685, %r2186;
shr.u32 %r3413, %r3413, %r2186;
setp.gt.u32 %p333, %r2185, %r685;
@%p333 bra $L__BB4_289;
setp.ne.s32 %p334, %r3510, 0;
mov.u32 %r3510, 0;
and.b16 %rs315, %rs532, 255;
setp.ne.s16 %p335, %rs315, 127;
and.pred %p336, %p334, %p335;
@%p336 bra $L__BB4_289;
mov.u32 %r2195, 20548;
sub.s32 %r2196, %r2195, %r3508;
cvt.u64.u32 %rd166, %r2196;
add.s64 %rd167, %rd166, %rd5;
add.s64 %rd168, %rd1, %rd167;
st.global.u8 [%rd168], %rs532;
add.s32 %r3508, %r3508, 1;
setp.gt.u16 %p337, %rs315, 143;
selp.u32 %r3510, 1, 0, %p337;
mov.u16 %rs532, 0;
mov.u32 %r3509, 0;
$L__BB4_289:
setp.ne.s32 %p338, %r3412, 0;
mov.u32 %r3423, %r3411;
@%p338 bra $L__BB4_285;
$L__BB4_290:
setp.eq.s16 %p339, %rs69, 0;
mov.u32 %r3511, %r3423;
@%p339 bra $L__BB4_345;
cvt.u32.u16 %r2197, %rs68;
and.b32 %r3425, %r2197, 255;
cvt.u32.u16 %r2198, %rs69;
and.b32 %r3424, %r2198, 255;
$L__BB4_292:
mov.u32 %r704, %r3424;
setp.gt.u32 %p340, %r3508, 2879;
mov.u32 %r3511, 1;
@%p340 bra $L__BB4_345;
mov.u32 %r2200, 8;
sub.s32 %r2201, %r2200, %r3510;
sub.s32 %r2202, %r2201, %r3509;
min.u32 %r2203, %r2202, %r704;
setp.eq.s32 %p341, %r2203, 32;
mov.u32 %r2204, -1;
shl.b32 %r2205, %r2204, %r2203;
not.b32 %r2206, %r2205;
selp.b32 %r2207, -1, %r2206, %p341;
and.b32 %r2208, %r2207, %r3425;
shl.b32 %r2209, %r2208, %r3509;
cvt.u16.u32 %rs319, %r2209;
or.b16 %rs532, %rs532, %rs319;
add.s32 %r3509, %r2203, %r3509;
sub.s32 %r3424, %r704, %r2203;
shr.u32 %r3425, %r3425, %r2203;
setp.gt.u32 %p342, %r2202, %r704;
@%p342 bra $L__BB4_296;
setp.ne.s32 %p343, %r3510, 0;
mov.u32 %r3510, 0;
and.b16 %rs320, %rs532, 255;
setp.ne.s16 %p344, %rs320, 127;
and.pred %p345, %p343, %p344;
@%p345 bra $L__BB4_296;
mov.u32 %r2212, 20548;
sub.s32 %r2213, %r2212, %r3508;
cvt.u64.u32 %rd169, %r2213;
add.s64 %rd170, %rd169, %rd5;
add.s64 %rd171, %rd1, %rd170;
st.global.u8 [%rd171], %rs532;
add.s32 %r3508, %r3508, 1;
setp.gt.u16 %p346, %rs320, 143;
selp.u32 %r3510, 1, 0, %p346;
mov.u16 %rs532, 0;
mov.u32 %r3509, 0;
$L__BB4_296:
setp.eq.s32 %p347, %r3424, 0;
mov.u32 %r3511, %r3423;
@%p347 bra $L__BB4_345;
bra.uni $L__BB4_292;
$L__BB4_345:
and.b32 %r3022, %r3204, 1;
add.s64 %rd336, %rd336, 16;
shr.u32 %r2335, %r3204, 1;
or.b32 %r3051, %r2335, %r3022;
add.s32 %r3037, %r3037, 4;
setp.lt.u32 %p410, %r3037, 64;
@%p410 bra $L__BB4_17;
ld.param.u64 %rd329, [ j2k_htj2k_encode_codeblocks_multi_input_cleanup_64_param_3];
mov.u16 %rs355, 0;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E13cleanup_e_val+33], %rs355;
add.s64 %rd16, %rd4, 128;
cvta.to.global.u64 %rd17, %rd31;
cvta.to.global.u64 %rd18, %rd329;
mov.u32 %r3512, 2;
mov.u64 %rd337, 0;
$L__BB4_347:
shl.b64 %rd206, %rd337, 7;
add.s64 %rd207, %rd16, %rd206;
shl.b64 %rd208, %rd207, 2;
add.s64 %rd338, %rd3, %rd208;
ld.shared.u8 %rs537, [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E13cleanup_e_val+1];
mov.u32 %r2338, 0;
ld.shared.u8 %rs358, [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E13cleanup_e_val];
max.u16 %rs539, %rs358, %rs537;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E13cleanup_e_val], %rs355;
ld.shared.u8 %r2339, [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E14cleanup_cx_val];
ld.shared.u8 %rs535, [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E14cleanup_cx_val+1];
mul.wide.u16 %r2340, %rs535, 4;
add.s32 %r3543, %r2340, %r2339;
st.shared.u8 [_ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E14cleanup_cx_val], %rs355;
mov.u16 %rs536, %rs355;
mov.u16 %rs538, %rs355;
mov.u32 %r3528, %r2338;
mov.u32 %r3542, %r2338;
bra.uni $L__BB4_348;
$L__BB4_376:
setp.gt.u32 %p439, %r3111, 191;
mov.u32 %r3613, 1;
mov.u32 %r3120, 0;
@%p439 bra $L__BB4_378;
st.global.u8 [%rd22], %rs474;
add.s32 %r3111, %r3111, 1;
mov.u16 %rs474, 0;
mov.u32 %r3120, 8;
mov.u32 %r3613, %r3277;
bra.uni $L__BB4_378;
$L__BB4_477:
setp.gt.u32 %p554, %r3111, 191;
mov.u32 %r3755, 1;
mov.u32 %r3120, 0;
@%p554 bra $L__BB4_479;
st.global.u8 [%rd23], %rs474;
add.s32 %r3111, %r3111, 1;
mov.u16 %rs474, 0;
mov.u32 %r3120, 8;
mov.u32 %r3755, %r3277;
bra.uni $L__BB4_479;
$L__BB4_348:
mov.u32 %r878, %r3542;
ld.global.u32 %r882, [%rd338];
setp.eq.s32 %p411, %r882, 0;
mov.u32 %r3546, %r2338;
@%p411 bra $L__BB4_350;
and.b32 %r2342, %r882, -2147483648;
abs.s32 %r2343, %r882;
shl.b32 %r2344, %r2343, %r23;
or.b32 %r3546, %r2344, %r2342;
$L__BB4_350:
shl.b32 %r2348, %r3546, 1;
shr.u32 %r2349, %r2348, %r23;
and.b32 %r885, %r2349, -2;
setp.eq.s32 %p412, %r885, 0;
mov.u32 %r3550, 0;
mov.u32 %r3547, %r3550;
mov.u32 %r3548, %r3550;
mov.u32 %r3554, %r3550;
@%p412 bra $L__BB4_352;
add.s32 %r2351, %r885, -1;
clz.b32 %r2352, %r2351;
mov.u32 %r2353, 32;
sub.s32 %r3547, %r2353, %r2352;
shr.u32 %r2354, %r3546, 31;
add.s32 %r2355, %r2354, %r885;
add.s32 %r3548, %r2355, -2;
mov.u32 %r3554, 1;
$L__BB4_352:
ld.global.u32 %r891, [%rd338+256];
setp.eq.s32 %p413, %r891, 0;
@%p413 bra $L__BB4_354;
and.b32 %r2357, %r891, -2147483648;
abs.s32 %r2358, %r891;
shl.b32 %r2359, %r2358, %r23;
or.b32 %r3550, %r2359, %r2357;
$L__BB4_354:
shl.b32 %r2362, %r3550, 1;
shr.u32 %r2363, %r2362, %r23;
and.b32 %r894, %r2363, -2;
setp.eq.s32 %p414, %r894, 0;
mov.u32 %r3555, 0;
mov.u32 %r3551, %r3555;
mov.u32 %r3552, %r3555;
mov.u32 %r3558, %r3547;
@%p414 bra $L__BB4_356;
or.b32 %r3554, %r3554, 2;
add.s32 %r2364, %r894, -1;
clz.b32 %r2365, %r2364;
mov.u32 %r2366, 32;
sub.s32 %r3551, %r2366, %r2365;
max.s32 %r3558, %r3547, %r3551;
shr.u32 %r2367, %r3550, 31;
add.s32 %r2368, %r2367, %r894;
add.s32 %r3552, %r2368, -2;
$L__BB4_356:
ld.global.u32 %r903, [%rd338+4];
setp.eq.s32 %p415, %r903, 0;
@%p415 bra $L__BB4_358;
and.b32 %r2370, %r903, -2147483648;
abs.s32 %r2371, %r903;
shl.b32 %r2372, %r2371, %r23;
or.b32 %r3555, %r2372, %r2370;
$L__BB4_358:
shl.b32 %r2375, %r3555, 1;
shr.u32 %r2376, %r2375, %r23;
and.b32 %r906, %r2376, -2;
setp.eq.s32 %p416, %r906, 0;
mov.u32 %r3560, 0;
mov.u32 %r3556, %r3560;
mov.u32 %r3557, %r3560;
@%p416 bra $L__BB4_360;
or.b32 %r3554, %r3554, 4;
add.s32 %r2377, %r906, -1;
clz.b32 %r2378, %r2377;
mov.u32 %r2379, 32;
sub.s32 %r3556, %r2379, %r2378;
max.s32 %r3558, %r3558, %r3556;
shr.u32 %r2380, %r3555, 31;
add.s32 %r2381, %r2380, %r906;
add.s32 %r3557, %r2381, -2;
$L__BB4_360:
ld.global.u32 %r915, [%rd338+260];
setp.eq.s32 %p417, %r915, 0;
@%p417 bra $L__BB4_362;
and.b32 %r2383, %r915, -2147483648;
abs.s32 %r2384, %r915;
shl.b32 %r2385, %r2384, %r23;
or.b32 %r3560, %r2385, %r2383;
$L__BB4_362:
shl.b32 %r2388, %r3560, 1;
shr.u32 %r2389, %r2388, %r23;
and.b32 %r918, %r2389, -2;
setp.eq.s32 %p418, %r918, 0;
mov.u32 %r3565, 0;
mov.u32 %r3561, %r3565;
mov.u32 %r3562, %r3565;
@%p418 bra $L__BB4_364;
or.b32 %r3554, %r3554, 8;
add.s32 %r2390, %r918, -1;
clz.b32 %r2391, %r2390;
mov.u32 %r2392, 32;
sub.s32 %r3561, %r2392, %r2391;
max.s32 %r3558, %r3558, %r3561;
shr.u32 %r2393, %r3560, 31;
add.s32 %r2394, %r2393, %r918;
add.s32 %r3562, %r2394, -2;
$L__BB4_364:
add.s32 %r2396, %r3554, -1;
and.b32 %r2397, %r2396, %r3554;
setp.ne.s32 %p419, %r2397, 0;
and.b16 %rs359, %rs539, 255;
setp.gt.u16 %p420, %rs359, 2;
and.pred %p421, %p420, %p419;
cvt.u32.u16 %r2398, %rs539;
and.b32 %r2399, %r2398, 255;
add.s32 %r2400, %r2399, -1;
selp.b32 %r2401, %r2400, 1, %p421;
max.s32 %r927, %r2401, %r3558;
sub.s32 %r928, %r927, %r2401;
setp.lt.s32 %p422, %r928, 1;
@%p422 bra $L__BB4_366;
setp.eq.s32 %p423, %r3547, %r3558;
selp.u32 %r2402, 1, 0, %p423;
setp.eq.s32 %p424, %r3551, %r3558;
selp.u32 %r2403, -1, 0, %p424;
bfi.b32 %r2404, %r2403, %r2402, 1, 1;
setp.eq.s32 %p425, %r3556, %r3558;
selp.u16 %rs360, 1, 0, %p425;
mul.wide.u16 %r2405, %rs360, 4;
or.b32 %r2406, %r2404, %r2405;
setp.eq.s32 %p426, %r3561, %r3558;
selp.u16 %rs361, 1, 0, %p426;
mul.wide.u16 %r2407, %rs361, 8;
or.b32 %r3565, %r2406, %r2407;
$L__BB4_366:
shl.b32 %r2408, %r3554, 4;
shl.b32 %r2409, %r3543, 8;
or.b32 %r2410, %r2408, %r2409;
or.b32 %r2411, %r2410, %r3565;
mul.wide.u32 %rd209, %r2411, 2;
add.s64 %rd210, %rd18, %rd209;
ld.global.u16 %rs134, [%rd210];
shr.u16 %rs362, %rs134, 4;
and.b16 %rs135, %rs362, 7;
setp.eq.s16 %p427, %rs135, 0;
mov.u32 %r3577, %r3511;
@%p427 bra $L__BB4_373;
cvt.u32.u16 %r3566, %rs135;
shr.u16 %rs363, %rs134, 8;
cvt.u32.u16 %r3567, %rs363;
$L__BB4_368:
mov.u32 %r933, %r3566;
setp.gt.u32 %p428, %r3508, 2879;
mov.u32 %r3577, 1;
@%p428 bra $L__BB4_373;
mov.u32 %r2413, 8;
sub.s32 %r2414, %r2413, %r3510;
sub.s32 %r2415, %r2414, %r3509;
min.u32 %r2416, %r2415, %r933;
setp.eq.s32 %p429, %r2416, 32;
mov.u32 %r2417, -1;
shl.b32 %r2418, %r2417, %r2416;
not.b32 %r2419, %r2418;
selp.b32 %r2420, -1, %r2419, %p429;
and.b32 %r2421, %r2420, %r3567;
shl.b32 %r2422, %r2421, %r3509;
cvt.u16.u32 %rs364, %r2422;
or.b16 %rs532, %rs532, %rs364;
add.s32 %r3509, %r2416, %r3509;
sub.s32 %r3566, %r933, %r2416;
shr.u32 %r3567, %r3567, %r2416;
setp.gt.u32 %p430, %r2415, %r933;
@%p430 bra $L__BB4_372;
setp.ne.s32 %p431, %r3510, 0;
mov.u32 %r3510, 0;
and.b16 %rs365, %rs532, 255;
setp.ne.s16 %p432, %rs365, 127;
and.pred %p433, %p431, %p432;
@%p433 bra $L__BB4_372;
mov.u32 %r2425, 20548;
sub.s32 %r2426, %r2425, %r3508;
cvt.u64.u32 %rd211, %r2426;
add.s64 %rd212, %rd211, %rd5;
add.s64 %rd213, %rd1, %rd212;
st.global.u8 [%rd213], %rs532;
add.s32 %r3508, %r3508, 1;
setp.gt.u16 %p434, %rs365, 143;
selp.u32 %r3510, 1, 0, %p434;
mov.u16 %rs532, 0;
mov.u32 %r3509, 0;
$L__BB4_372:
setp.ne.s32 %p435, %r3566, 0;
mov.u32 %r3577, %r3511;
@%p435 bra $L__BB4_368;
$L__BB4_373:
setp.ne.s32 %p436, %r3543, 0;
@%p436 bra $L__BB4_421;
setp.eq.s32 %p437, %r3554, 0;
add.s32 %r2427, %r3111, 17477;
cvt.u64.u32 %rd214, %r2427;
add.s64 %rd215, %rd214, %rd5;
add.s64 %rd22, %rd1, %rd215;
@%p437 bra $L__BB4_413;
shl.b16 %rs474, %rs474, 1;
add.s32 %r3120, %r3120, -1;
setp.ne.s32 %p438, %r3120, 0;
mov.u32 %r3613, %r3277;
@%p438 bra $L__BB4_378;
bra.uni $L__BB4_376;
$L__BB4_378:
setp.lt.u32 %p440, %r3275, 3;
mov.u32 %r3581, 0;
@%p440 bra $L__BB4_381;
setp.lt.u32 %p441, %r3275, 6;
mov.u32 %r3581, 1;
@%p441 bra $L__BB4_381;
setp.lt.u32 %p442, %r3275, 9;
setp.eq.s32 %p443, %r3275, 11;
selp.b32 %r2433, 4, 5, %p443;
setp.lt.u32 %p444, %r3275, 11;
selp.b32 %r2434, 3, %r2433, %p444;
selp.b32 %r3581, 2, %r2434, %p442;
$L__BB4_381:
setp.eq.s32 %p445, %r3581, 0;
@%p445 bra $L__BB4_409;
add.s32 %r957, %r3581, -1;
and.b32 %r958, %r3581, 3;
setp.eq.s32 %p446, %r958, 0;
mov.u32 %r3591, %r3581;
mov.u32 %r3592, %r3613;
@%p446 bra $L__BB4_394;
mov.u32 %r2436, 1;
shl.b32 %r2437, %r2436, %r957;
and.b32 %r2438, %r2437, %r3274;
setp.ne.s32 %p447, %r2438, 0;
selp.u32 %r2439, 1, 0, %p447;
cvt.u32.u16 %r2440, %rs474;
bfi.b32 %r2441, %r2440, %r2439, 1, 8;
cvt.u16.u32 %rs474, %r2441;
add.s32 %r3120, %r3120, -1;
setp.ne.s32 %p448, %r3120, 0;
mov.u32 %r3592, %r3613;
@%p448 bra $L__BB4_386;
setp.gt.u32 %p449, %r3111, 191;
mov.u32 %r3120, 0;
mov.u32 %r3592, %r2436;
@%p449 bra $L__BB4_386;
add.s32 %r2445, %r3111, 17477;
cvt.u64.u32 %rd216, %r2445;
add.s64 %rd217, %rd216, %rd5;
add.s64 %rd218, %rd1, %rd217;
st.global.u8 [%rd218], %rs474;
add.s32 %r3111, %r3111, 1;
mov.u16 %rs474, 0;
mov.u32 %r3120, 8;
mov.u32 %r3592, %r3613;
$L__BB4_386:
setp.eq.s32 %p450, %r958, 1;
mov.u32 %r3613, %r3592;
mov.u32 %r3591, %r957;
@%p450 bra $L__BB4_394;
add.s32 %r3591, %r3581, -2;
mov.u32 %r2446, 1;
shl.b32 %r2447, %r2446, %r3591;
and.b32 %r2448, %r2447, %r3274;
setp.ne.s32 %p451, %r2448, 0;
selp.u32 %r2449, 1, 0, %p451;
cvt.u32.u16 %r2450, %rs474;
bfi.b32 %r2451, %r2450, %r2449, 1, 8;
cvt.u16.u32 %rs474, %r2451;
add.s32 %r3120, %r3120, -1;
setp.ne.s32 %p452, %r3120, 0;
mov.u32 %r3587, %r3592;
@%p452 bra $L__BB4_390;
setp.gt.u32 %p453, %r3111, 191;
mov.u32 %r3120, 0;
mov.u32 %r3587, %r2446;
@%p453 bra $L__BB4_390;
add.s32 %r2454, %r3111, 17477;
cvt.u64.u32 %rd219, %r2454;
add.s64 %rd220, %rd219, %rd5;
add.s64 %rd221, %rd1, %rd220;
and.b16 %rs372, %rs474, 255;
st.global.u8 [%rd221], %rs474;
add.s32 %r3111, %r3111, 1;
setp.eq.s16 %p454, %rs372, 255;
selp.b32 %r3120, 7, 8, %p454;
mov.u16 %rs474, 0;
mov.u32 %r3587, %r3592;
$L__BB4_390:
setp.eq.s32 %p455, %r958, 2;
mov.u32 %r3613, %r3587;
mov.u32 %r3592, %r3587;
@%p455 bra $L__BB4_394;
add.s32 %r3591, %r3581, -3;
mov.u32 %r2455, 1;
shl.b32 %r2456, %r2455, %r3591;
and.b32 %r2457, %r2456, %r3274;
setp.ne.s32 %p456, %r2457, 0;
selp.u32 %r2458, 1, 0, %p456;
cvt.u32.u16 %r2459, %rs474;
bfi.b32 %r2460, %r2459, %r2458, 1, 8;
cvt.u16.u32 %rs474, %r2460;
add.s32 %r3120, %r3120, -1;
setp.ne.s32 %p457, %r3120, 0;
mov.u32 %r3613, %r3587;
mov.u32 %r3592, %r3587;
@%p457 bra $L__BB4_394;
setp.gt.u32 %p458, %r3111, 191;
mov.u32 %r3120, 0;
mov.u32 %r3613, %r2455;
mov.u32 %r3592, %r2455;
@%p458 bra $L__BB4_394;
add.s32 %r2465, %r3111, 17477;
cvt.u64.u32 %rd222, %r2465;
add.s64 %rd223, %rd222, %rd5;
add.s64 %rd224, %rd1, %rd223;
and.b16 %rs375, %rs474, 255;
st.global.u8 [%rd224], %rs474;
add.s32 %r3111, %r3111, 1;
setp.eq.s16 %p459, %rs375, 255;
selp.b32 %r3120, 7, 8, %p459;
mov.u16 %rs474, 0;
mov.u32 %r3613, %r3587;
mov.u32 %r3592, %r3587;
$L__BB4_394:
setp.lt.u32 %p460, %r957, 3;
@%p460 bra $L__BB4_409;
mov.u32 %r3613, %r3592;
$L__BB4_396:
add.s32 %r2466, %r3591, -1;
mov.u32 %r2467, 1;
shl.b32 %r2468, %r2467, %r2466;
and.b32 %r2469, %r2468, %r3274;
setp.ne.s32 %p461, %r2469, 0;
selp.u32 %r2470, 1, 0, %p461;
cvt.u32.u16 %r2471, %rs474;
bfi.b32 %r3601, %r2471, %r2470, 1, 8;
add.s32 %r3600, %r3120, -1;
setp.ne.s32 %p462, %r3600, 0;
mov.u32 %r3602, %r3613;
@%p462 bra $L__BB4_399;
setp.gt.u32 %p463, %r3111, 191;
mov.u32 %r3600, 0;
mov.u32 %r3602, %r2467;
@%p463 bra $L__BB4_399;
cvt.u16.u32 %rs376, %r3601;
and.b16 %rs377, %rs376, 255;
add.s32 %r2475, %r3111, 17477;
cvt.u64.u32 %rd225, %r2475;
add.s64 %rd226, %rd225, %rd5;
add.s64 %rd227, %rd1, %rd226;
st.global.u8 [%rd227], %rs376;
add.s32 %r3111, %r3111, 1;
setp.eq.s16 %p464, %rs377, 255;
selp.b32 %r3600, 7, 8, %p464;
mov.u32 %r3601, 0;
mov.u32 %r3602, %r3613;
$L__BB4_399:
add.s32 %r2476, %r3591, -2;
shl.b32 %r2478, %r2467, %r2476;
and.b32 %r2479, %r2478, %r3274;
setp.ne.s32 %p465, %r2479, 0;
and.b32 %r2480, %r3601, 127;
selp.u32 %r2481, 1, 0, %p465;
bfi.b32 %r3605, %r2480, %r2481, 1, 7;
add.s32 %r3604, %r3600, -1;
setp.ne.s32 %p466, %r3604, 0;
mov.u32 %r3606, %r3602;
@%p466 bra $L__BB4_402;
setp.gt.u32 %p467, %r3111, 191;
mov.u32 %r3606, 1;
mov.u32 %r3604, 0;
@%p467 bra $L__BB4_402;
cvt.u16.u32 %rs378, %r3605;
and.b16 %rs379, %rs378, 255;
add.s32 %r2485, %r3111, 17477;
cvt.u64.u32 %rd228, %r2485;
add.s64 %rd229, %rd228, %rd5;
add.s64 %rd230, %rd1, %rd229;
st.global.u8 [%rd230], %rs378;
add.s32 %r3111, %r3111, 1;
setp.eq.s16 %p468, %rs379, 255;
selp.b32 %r3604, 7, 8, %p468;
mov.u32 %r3605, 0;
mov.u32 %r3606, %r3602;
$L__BB4_402:
add.s32 %r2486, %r3591, -3;
mov.u32 %r2487, 1;
shl.b32 %r2488, %r2487, %r2486;
and.b32 %r2489, %r2488, %r3274;
setp.ne.s32 %p469, %r2489, 0;
and.b32 %r2490, %r3605, 127;
selp.u32 %r2491, 1, 0, %p469;
bfi.b32 %r3609, %r2490, %r2491, 1, 7;
add.s32 %r3608, %r3604, -1;
setp.ne.s32 %p470, %r3608, 0;
mov.u32 %r3610, %r3606;
@%p470 bra $L__BB4_405;
setp.gt.u32 %p471, %r3111, 191;
mov.u32 %r3608, 0;
mov.u32 %r3610, %r2487;
@%p471 bra $L__BB4_405;
cvt.u16.u32 %rs380, %r3609;
and.b16 %rs381, %rs380, 255;
add.s32 %r2495, %r3111, 17477;
cvt.u64.u32 %rd231, %r2495;
add.s64 %rd232, %rd231, %rd5;
add.s64 %rd233, %rd1, %rd232;
st.global.u8 [%rd233], %rs380;
add.s32 %r3111, %r3111, 1;
setp.eq.s16 %p472, %rs381, 255;
selp.b32 %r3608, 7, 8, %p472;
mov.u32 %r3609, 0;
mov.u32 %r3610, %r3606;
$L__BB4_405:
add.s32 %r3591, %r3591, -4;
shl.b32 %r2497, %r2487, %r3591;
and.b32 %r2498, %r2497, %r3274;
setp.ne.s32 %p473, %r2498, 0;
and.b32 %r2499, %r3609, 127;
selp.u32 %r2500, 1, 0, %p473;
bfi.b32 %r2501, %r2499, %r2500, 1, 15;
cvt.u16.u32 %rs474, %r2501;
add.s32 %r3120, %r3608, -1;
setp.ne.s32 %p474, %r3120, 0;
mov.u32 %r3613, %r3610;
@%p474 bra $L__BB4_408;
setp.gt.u32 %p475, %r3111, 191;
mov.u32 %r3613, 1;
mov.u32 %r3120, 0;
@%p475 bra $L__BB4_408;
add.s32 %r2504, %r3111, 17477;
cvt.u64.u32 %rd234, %r2504;
add.s64 %rd235, %rd234, %rd5;
add.s64 %rd236, %rd1, %rd235;
and.b16 %rs383, %rs474, 255;
st.global.u8 [%rd236], %rs474;
add.s32 %r3111, %r3111, 1;
setp.eq.s16 %p476, %rs383, 255;
selp.b32 %r3120, 7, 8, %p476;
mov.u16 %rs474, 0;
mov.u32 %r3613, %r3610;
$L__BB4_408:
setp.ne.s32 %p477, %r3591, 0;
@%p477 bra $L__BB4_396;
$L__BB4_409:
add.s32 %r2506, %r3275, -1;
setp.eq.s32 %p478, %r3275, 0;
mov.u32 %r3274, 0;
selp.b32 %r3275, 0, %r2506, %p478;
setp.lt.u32 %p479, %r3275, 3;
mov.u32 %r3617, %r3274;
@%p479 bra $L__BB4_412;
setp.lt.u32 %p480, %r3275, 6;
mov.u32 %r3617, 1;
@%p480 bra $L__BB4_412;
setp.lt.u32 %p481, %r3275, 9;
setp.eq.s32 %p482, %r3275, 11;
selp.b32 %r2508, 4, 5, %p482;
setp.lt.u32 %p483, %r3275, 11;
selp.b32 %r2509, 3, %r2508, %p483;
selp.b32 %r3617, 2, %r2509, %p481;
$L__BB4_412:
mov.u32 %r2511, 1;
shl.b32 %r3276, %r2511, %r3617;
mov.u32 %r3277, %r3613;
bra.uni $L__BB4_421;
$L__BB4_413:
add.s32 %r3274, %r3274, 1;
setp.lt.u32 %p484, %r3274, %r3276;
@%p484 bra $L__BB4_421;
shl.b16 %rs384, %rs474, 1;
or.b16 %rs474, %rs384, 1;
add.s32 %r3120, %r3120, -1;
setp.ne.s32 %p485, %r3120, 0;
mov.u32 %r3620, %r3277;
@%p485 bra $L__BB4_417;
setp.gt.u32 %p486, %r3111, 191;
mov.u32 %r3620, 1;
mov.u32 %r3120, 0;
@%p486 bra $L__BB4_417;
and.b16 %rs386, %rs474, 255;
st.global.u8 [%rd22], %rs474;
add.s32 %r3111, %r3111, 1;
setp.eq.s16 %p487, %rs386, 255;
selp.b32 %r3120, 7, 8, %p487;
mov.u16 %rs474, 0;
mov.u32 %r3620, %r3277;
$L__BB4_417:
add.s32 %r2515, %r3275, 1;
min.u32 %r3275, %r2515, 12;
setp.lt.u32 %p488, %r3275, 3;
mov.u32 %r3274, 0;
mov.u32 %r3621, %r3274;
@%p488 bra $L__BB4_420;
setp.lt.u32 %p489, %r3275, 6;
mov.u32 %r3621, 1;
@%p489 bra $L__BB4_420;
setp.lt.u32 %p490, %r3275, 9;
setp.eq.s32 %p491, %r3275, 11;
selp.b32 %r2517, 4, 5, %p491;
setp.lt.u32 %p492, %r3275, 11;
selp.b32 %r2518, 3, %r2517, %p492;
selp.b32 %r3621, 2, %r2518, %p490;
$L__BB4_420:
mov.u32 %r2520, 1;
shl.b32 %r3276, %r2520, %r3621;
mov.u32 %r3277, %r3620;
$L__BB4_421:
and.b16 %rs387, %rs134, 15;
cvt.u32.u16 %r1041, %rs387;
and.b32 %r2521, %r3554, 1;
setp.eq.b32 %p493, %r2521, 1;
mov.pred %p494, 0;
xor.pred %p495, %p493, %p494;
not.pred %p496, %p495;
mov.u32 %r3642, %r3829;
@%p496 bra $L__BB4_428;
and.b32 %r2522, %r1041, 1;
sub.s32 %r3628, %r927, %r2522;
setp.eq.s32 %p497, %r3628, 0;
mov.u32 %r3642, %r3829;
@%p497 bra $L__BB4_428;
mov.u32 %r2523, -1;
shl.b32 %r2524, %r2523, %r3628;
not.b32 %r2525, %r2524;
and.b32 %r3629, %r3548, %r2525;
$L__BB4_424:
setp.gt.u32 %p498, %r3654, 17476;
mov.u32 %r3642, 1;
@%p498 bra $L__BB4_428;
sub.s32 %r2527, %r3653, %r3655;
min.u32 %r2528, %r2527, %r3628;
setp.eq.s32 %p499, %r2528, 32;
mov.u32 %r2529, -1;
shl.b32 %r2530, %r2529, %r2528;
not.b32 %r2531, %r2530;
selp.b32 %r2532, -1, %r2531, %p499;
and.b32 %r2533, %r2532, %r3629;
shl.b32 %r2534, %r2533, %r3655;
or.b32 %r3656, %r2534, %r3656;
add.s32 %r3655, %r2528, %r3655;
shr.u32 %r3629, %r3629, %r2528;
sub.s32 %r3628, %r3628, %r2528;
setp.lt.u32 %p500, %r3655, %r3653;
@%p500 bra $L__BB4_427;
cvt.u64.u32 %rd237, %r3654;
add.s64 %rd238, %rd237, %rd5;
add.s64 %rd239, %rd1, %rd238;
st.global.u8 [%rd239], %r3656;
add.s32 %r3654, %r3654, 1;
setp.eq.s32 %p501, %r3656, 255;
selp.b32 %r3653, 7, 8, %p501;
mov.u32 %r3655, 0;
mov.u32 %r3656, %r3655;
$L__BB4_427:
setp.ne.s32 %p502, %r3628, 0;
mov.u32 %r3642, %r3829;
@%p502 bra $L__BB4_424;
$L__BB4_428:
and.b32 %r1065, %r3554, 2;
setp.eq.s32 %p503, %r1065, 0;
mov.u32 %r3657, %r3642;
@%p503 bra $L__BB4_435;
shr.u32 %r2537, %r1041, 1;
and.b32 %r2538, %r2537, 1;
sub.s32 %r3643, %r927, %r2538;
setp.eq.s32 %p504, %r3643, 0;
mov.u32 %r3657, %r3642;
@%p504 bra $L__BB4_435;
mov.u32 %r2539, -1;
shl.b32 %r2540, %r2539, %r3643;
not.b32 %r2541, %r2540;
and.b32 %r3644, %r3552, %r2541;
$L__BB4_431:
setp.gt.u32 %p505, %r3654, 17476;
mov.u32 %r3657, 1;
@%p505 bra $L__BB4_435;
sub.s32 %r2543, %r3653, %r3655;
min.u32 %r2544, %r2543, %r3643;
setp.eq.s32 %p506, %r2544, 32;
mov.u32 %r2545, -1;
shl.b32 %r2546, %r2545, %r2544;
not.b32 %r2547, %r2546;
selp.b32 %r2548, -1, %r2547, %p506;
and.b32 %r2549, %r2548, %r3644;
shl.b32 %r2550, %r2549, %r3655;
or.b32 %r3656, %r2550, %r3656;
add.s32 %r3655, %r2544, %r3655;
shr.u32 %r3644, %r3644, %r2544;
sub.s32 %r3643, %r3643, %r2544;
setp.lt.u32 %p507, %r3655, %r3653;
@%p507 bra $L__BB4_434;
cvt.u64.u32 %rd240, %r3654;
add.s64 %rd241, %rd240, %rd5;
add.s64 %rd242, %rd1, %rd241;
st.global.u8 [%rd242], %r3656;
add.s32 %r3654, %r3654, 1;
setp.eq.s32 %p508, %r3656, 255;
selp.b32 %r3653, 7, 8, %p508;
mov.u32 %r3655, 0;
mov.u32 %r3656, %r3655;
$L__BB4_434:
setp.ne.s32 %p509, %r3643, 0;
mov.u32 %r3657, %r3642;
@%p509 bra $L__BB4_431;
$L__BB4_435:
and.b32 %r1089, %r3554, 4;
setp.eq.s32 %p510, %r1089, 0;
mov.u32 %r3672, %r3657;
@%p510 bra $L__BB4_442;
shr.u32 %r2553, %r1041, 2;
and.b32 %r2554, %r2553, 1;
sub.s32 %r3658, %r927, %r2554;
setp.eq.s32 %p511, %r3658, 0;
mov.u32 %r3672, %r3657;
@%p511 bra $L__BB4_442;
mov.u32 %r2555, -1;
shl.b32 %r2556, %r2555, %r3658;
not.b32 %r2557, %r2556;
and.b32 %r3659, %r3557, %r2557;
$L__BB4_438:
setp.gt.u32 %p512, %r3654, 17476;
mov.u32 %r3672, 1;
@%p512 bra $L__BB4_442;
sub.s32 %r2559, %r3653, %r3655;
min.u32 %r2560, %r2559, %r3658;
setp.eq.s32 %p513, %r2560, 32;
mov.u32 %r2561, -1;
shl.b32 %r2562, %r2561, %r2560;
not.b32 %r2563, %r2562;
selp.b32 %r2564, -1, %r2563, %p513;
and.b32 %r2565, %r2564, %r3659;
shl.b32 %r2566, %r2565, %r3655;
or.b32 %r3656, %r2566, %r3656;
add.s32 %r3655, %r2560, %r3655;
shr.u32 %r3659, %r3659, %r2560;
sub.s32 %r3658, %r3658, %r2560;
setp.lt.u32 %p514, %r3655, %r3653;
@%p514 bra $L__BB4_441;
cvt.u64.u32 %rd243, %r3654;
add.s64 %rd244, %rd243, %rd5;
add.s64 %rd245, %rd1, %rd244;
st.global.u8 [%rd245], %r3656;
add.s32 %r3654, %r3654, 1;
setp.eq.s32 %p515, %r3656, 255;
selp.b32 %r3653, 7, 8, %p515;
mov.u32 %r3655, 0;
mov.u32 %r3656, %r3655;
$L__BB4_441:
setp.ne.s32 %p516, %r3658, 0;
mov.u32 %r3672, %r3657;
@%p516 bra $L__BB4_438;
$L__BB4_442:
and.b32 %r1113, %r3554, 8;
setp.eq.s32 %p517, %r1113, 0;
mov.u32 %r3687, %r3672;
@%p517 bra $L__BB4_449;
shr.u32 %r2569, %r1041, 3;
sub.s32 %r3673, %r927, %r2569;
setp.eq.s32 %p518, %r3673, 0;
mov.u32 %r3687, %r3672;
@%p518 bra $L__BB4_449;
mov.u32 %r2570, -1;
shl.b32 %r2571, %r2570, %r3673;
not.b32 %r2572, %r2571;
and.b32 %r3674, %r3562, %r2572;
$L__BB4_445:
setp.gt.u32 %p519, %r3654, 17476;
mov.u32 %r3687, 1;
@%p519 bra $L__BB4_449;
sub.s32 %r2574, %r3653, %r3655;
min.u32 %r2575, %r2574, %r3673;
setp.eq.s32 %p520, %r2575, 32;
mov.u32 %r2576, -1;
shl.b32 %r2577, %r2576, %r2575;
not.b32 %r2578, %r2577;
selp.b32 %r2579, -1, %r2578, %p520;
and.b32 %r2580, %r2579, %r3674;
shl.b32 %r2581, %r2580, %r3655;
or.b32 %r3656, %r2581, %r3656;
add.s32 %r3655, %r2575, %r3655;
shr.u32 %r3674, %r3674, %r2575;
sub.s32 %r3673, %r3673, %r2575;
setp.lt.u32 %p521, %r3655, %r3653;
@%p521 bra $L__BB4_448;
cvt.u64.u32 %rd246, %r3654;
add.s64 %rd247, %rd246, %rd5;
add.s64 %rd248, %rd1, %rd247;
st.global.u8 [%rd248], %r3656;
add.s32 %r3654, %r3654, 1;
setp.eq.s32 %p522, %r3656, 255;
selp.b32 %r3653, 7, 8, %p522;
mov.u32 %r3655, 0;
mov.u32 %r3656, %r3655;
$L__BB4_448:
setp.ne.s32 %p523, %r3673, 0;
mov.u32 %r3687, %r3672;
@%p523 bra $L__BB4_445;
$L__BB4_449:
mov.u32 %r3000, _ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E14cleanup_cx_val;
mov.u32 %r2999, _ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E13cleanup_e_val;
and.b32 %r2585, %r3551, 255;
cvt.u32.u16 %r2586, %rs538;
and.b32 %r2587, %r2586, 255;
setp.lt.u32 %p524, %r2585, %r2587;
cvt.u16.u32 %rs388, %r3551;
selp.b16 %rs389, %rs538, %rs388, %p524;
add.s32 %r1137, %r2999, %r878;
mov.u32 %r3689, 0;
st.shared.u8 [%r1137], %rs389;
ld.shared.u8 %rs156, [%r1137+2];
setp.gt.u16 %p525, %rs537, %rs156;
add.s32 %r3542, %r878, 2;
add.s32 %r2589, %r878, 1;
selp.b32 %r2590, %r2589, %r3542, %p525;
add.s32 %r2591, %r2999, %r2590;
ld.shared.u8 %rs157, [%r2591];
cvt.u16.u32 %rs158, %r3561;
st.shared.u8 [%r1137+1], %r3561;
cvt.u16.u32 %rs391, %r1065;
shr.u16 %rs392, %rs391, 1;
or.b16 %rs393, %rs536, %rs392;
add.s32 %r1139, %r3000, %r878;
st.shared.u8 [%r1139], %rs393;
ld.shared.u8 %r1140, [%r1139+2];
shr.u32 %r1141, %r1113, 3;
st.shared.u8 [%r1139+1], %r1141;
ld.global.u32 %r1142, [%rd338+8];
setp.eq.s32 %p526, %r1142, 0;
mov.u32 %r3688, %r3689;
@%p526 bra $L__BB4_451;
and.b32 %r2593, %r1142, -2147483648;
abs.s32 %r2594, %r1142;
shl.b32 %r2595, %r2594, %r23;
or.b32 %r3688, %r2595, %r2593;
$L__BB4_451:
shl.b32 %r2599, %r3688, 1;
shr.u32 %r2600, %r2599, %r23;
and.b32 %r1145, %r2600, -2;
setp.eq.s32 %p527, %r1145, 0;
mov.u32 %r3690, %r3689;
mov.u32 %r3696, %r3689;
@%p527 bra $L__BB4_453;
add.s32 %r2602, %r1145, -1;
clz.b32 %r2603, %r2602;
mov.u32 %r2604, 32;
sub.s32 %r3689, %r2604, %r2603;
shr.u32 %r2605, %r3688, 31;
add.s32 %r2606, %r2605, %r1145;
add.s32 %r3690, %r2606, -2;
mov.u32 %r3696, 1;
$L__BB4_453:
ld.global.u32 %r1151, [%rd338+264];
setp.eq.s32 %p528, %r1151, 0;
mov.u32 %r3693, 0;
mov.u32 %r3692, %r3693;
@%p528 bra $L__BB4_455;
and.b32 %r2608, %r1151, -2147483648;
abs.s32 %r2609, %r1151;
shl.b32 %r2610, %r2609, %r23;
or.b32 %r3692, %r2610, %r2608;
$L__BB4_455:
shl.b32 %r2613, %r3692, 1;
shr.u32 %r2614, %r2613, %r23;
and.b32 %r1154, %r2614, -2;
setp.eq.s32 %p529, %r1154, 0;
mov.u32 %r3694, %r3693;
mov.u32 %r3700, %r3689;
@%p529 bra $L__BB4_457;
or.b32 %r3696, %r3696, 2;
add.s32 %r2615, %r1154, -1;
clz.b32 %r2616, %r2615;
mov.u32 %r2617, 32;
sub.s32 %r3693, %r2617, %r2616;
max.s32 %r3700, %r3689, %r3693;
shr.u32 %r2618, %r3692, 31;
add.s32 %r2619, %r2618, %r1154;
add.s32 %r3694, %r2619, -2;
$L__BB4_457:
ld.global.u32 %r1163, [%rd338+12];
setp.eq.s32 %p530, %r1163, 0;
mov.u32 %r3698, 0;
mov.u32 %r3697, %r3698;
@%p530 bra $L__BB4_459;
and.b32 %r2621, %r1163, -2147483648;
abs.s32 %r2622, %r1163;
shl.b32 %r2623, %r2622, %r23;
or.b32 %r3697, %r2623, %r2621;
$L__BB4_459:
shl.b32 %r2626, %r3697, 1;
shr.u32 %r2627, %r2626, %r23;
and.b32 %r1166, %r2627, -2;
setp.eq.s32 %p531, %r1166, 0;
mov.u32 %r3699, %r3698;
@%p531 bra $L__BB4_461;
or.b32 %r3696, %r3696, 4;
add.s32 %r2628, %r1166, -1;
clz.b32 %r2629, %r2628;
mov.u32 %r2630, 32;
sub.s32 %r3698, %r2630, %r2629;
max.s32 %r3700, %r3700, %r3698;
shr.u32 %r2631, %r3697, 31;
add.s32 %r2632, %r2631, %r1166;
add.s32 %r3699, %r2632, -2;
$L__BB4_461:
ld.global.u32 %r1175, [%rd338+268];
setp.eq.s32 %p532, %r1175, 0;
mov.u32 %r3703, 0;
mov.u32 %r3702, %r3703;
@%p532 bra $L__BB4_463;
and.b32 %r2634, %r1175, -2147483648;
abs.s32 %r2635, %r1175;
shl.b32 %r2636, %r2635, %r23;
or.b32 %r3702, %r2636, %r2634;
$L__BB4_463:
shl.b32 %r2639, %r3702, 1;
shr.u32 %r2640, %r2639, %r23;
and.b32 %r1178, %r2640, -2;
setp.eq.s32 %p533, %r1178, 0;
mov.u32 %r3704, %r3703;
@%p533 bra $L__BB4_465;
or.b32 %r3696, %r3696, 8;
add.s32 %r2641, %r1178, -1;
clz.b32 %r2642, %r2641;
mov.u32 %r2643, 32;
sub.s32 %r3703, %r2643, %r2642;
max.s32 %r3700, %r3700, %r3703;
shr.u32 %r2644, %r3702, 31;
add.s32 %r2645, %r2644, %r1178;
add.s32 %r3704, %r2645, -2;
$L__BB4_465:
shr.u32 %r2647, %r1113, 2;
shr.u32 %r2648, %r1089, 1;
or.b32 %r2649, %r2647, %r2648;
shl.b32 %r2650, %r1140, 2;
cvt.u32.u16 %r2651, %rs535;
and.b32 %r2652, %r2651, 255;
add.s32 %r2653, %r2650, %r2652;
or.b32 %r1187, %r2649, %r2653;
add.s32 %r2654, %r3696, -1;
and.b32 %r2655, %r2654, %r3696;
setp.ne.s32 %p534, %r2655, 0;
mov.u32 %r3707, 0;
setp.gt.u16 %p535, %rs157, 2;
and.pred %p536, %p535, %p534;
cvt.u32.u16 %r2656, %rs157;
and.b32 %r2657, %r2656, 255;
add.s32 %r2658, %r2657, -1;
selp.b32 %r2659, %r2658, 1, %p536;
max.s32 %r1188, %r2659, %r3700;
sub.s32 %r1189, %r1188, %r2659;
setp.lt.s32 %p537, %r1189, 1;
@%p537 bra $L__BB4_467;
setp.eq.s32 %p538, %r3689, %r3700;
selp.u32 %r2660, 1, 0, %p538;
setp.eq.s32 %p539, %r3693, %r3700;
selp.u32 %r2661, -1, 0, %p539;
bfi.b32 %r2662, %r2661, %r2660, 1, 1;
setp.eq.s32 %p540, %r3698, %r3700;
selp.u16 %rs395, 1, 0, %p540;
mul.wide.u16 %r2663, %rs395, 4;
or.b32 %r2664, %r2662, %r2663;
setp.eq.s32 %p541, %r3703, %r3700;
selp.u16 %rs396, 1, 0, %p541;
mul.wide.u16 %r2665, %rs396, 8;
or.b32 %r3707, %r2664, %r2665;
$L__BB4_467:
shl.b32 %r2666, %r3696, 4;
shl.b32 %r2667, %r1187, 8;
or.b32 %r2668, %r2666, %r2667;
or.b32 %r2669, %r2668, %r3707;
mul.wide.u32 %rd249, %r2669, 2;
add.s64 %rd250, %rd18, %rd249;
ld.global.u16 %rs159, [%rd250];
shr.u16 %rs397, %rs159, 4;
and.b16 %rs160, %rs397, 7;
setp.eq.s16 %p542, %rs160, 0;
mov.u32 %r3719, %r3577;
@%p542 bra $L__BB4_474;
cvt.u32.u16 %r3708, %rs160;
shr.u16 %rs398, %rs159, 8;
cvt.u32.u16 %r3709, %rs398;
$L__BB4_469:
mov.u32 %r1194, %r3708;
setp.gt.u32 %p543, %r3508, 2879;
mov.u32 %r3719, 1;
@%p543 bra $L__BB4_474;
mov.u32 %r2671, 8;
sub.s32 %r2672, %r2671, %r3510;
sub.s32 %r2673, %r2672, %r3509;
min.u32 %r2674, %r2673, %r1194;
setp.eq.s32 %p544, %r2674, 32;
mov.u32 %r2675, -1;
shl.b32 %r2676, %r2675, %r2674;
not.b32 %r2677, %r2676;
selp.b32 %r2678, -1, %r2677, %p544;
and.b32 %r2679, %r2678, %r3709;
shl.b32 %r2680, %r2679, %r3509;
cvt.u16.u32 %rs399, %r2680;
or.b16 %rs532, %rs532, %rs399;
add.s32 %r3509, %r2674, %r3509;
sub.s32 %r3708, %r1194, %r2674;
shr.u32 %r3709, %r3709, %r2674;
setp.gt.u32 %p545, %r2673, %r1194;
@%p545 bra $L__BB4_473;
setp.ne.s32 %p546, %r3510, 0;
mov.u32 %r3510, 0;
and.b16 %rs400, %rs532, 255;
setp.ne.s16 %p547, %rs400, 127;
and.pred %p548, %p546, %p547;
@%p548 bra $L__BB4_473;
mov.u32 %r2683, 20548;
sub.s32 %r2684, %r2683, %r3508;
cvt.u64.u32 %rd251, %r2684;
add.s64 %rd252, %rd251, %rd5;
add.s64 %rd253, %rd1, %rd252;
st.global.u8 [%rd253], %rs532;
add.s32 %r3508, %r3508, 1;
setp.gt.u16 %p549, %rs400, 143;
selp.u32 %r3510, 1, 0, %p549;
mov.u16 %rs532, 0;
mov.u32 %r3509, 0;
$L__BB4_473:
setp.ne.s32 %p550, %r3708, 0;
mov.u32 %r3719, %r3577;
@%p550 bra $L__BB4_469;
$L__BB4_474:
setp.ne.s32 %p551, %r1187, 0;
@%p551 bra $L__BB4_522;
setp.eq.s32 %p552, %r3696, 0;
add.s32 %r2685, %r3111, 17477;
cvt.u64.u32 %rd254, %r2685;
add.s64 %rd255, %rd254, %rd5;
add.s64 %rd23, %rd1, %rd255;
@%p552 bra $L__BB4_514;
shl.b16 %rs474, %rs474, 1;
add.s32 %r3120, %r3120, -1;
setp.ne.s32 %p553, %r3120, 0;
mov.u32 %r3755, %r3277;
@%p553 bra $L__BB4_479;
bra.uni $L__BB4_477;
$L__BB4_479:
setp.lt.u32 %p555, %r3275, 3;
mov.u32 %r3723, 0;
@%p555 bra $L__BB4_482;
setp.lt.u32 %p556, %r3275, 6;
mov.u32 %r3723, 1;
@%p556 bra $L__BB4_482;
setp.lt.u32 %p557, %r3275, 9;
setp.eq.s32 %p558, %r3275, 11;
selp.b32 %r2691, 4, 5, %p558;
setp.lt.u32 %p559, %r3275, 11;
selp.b32 %r2692, 3, %r2691, %p559;
selp.b32 %r3723, 2, %r2692, %p557;
$L__BB4_482:
setp.eq.s32 %p560, %r3723, 0;
@%p560 bra $L__BB4_510;
add.s32 %r1218, %r3723, -1;
and.b32 %r1219, %r3723, 3;
setp.eq.s32 %p561, %r1219, 0;
mov.u32 %r3733, %r3723;
mov.u32 %r3734, %r3755;
@%p561 bra $L__BB4_495;
mov.u32 %r2694, 1;
shl.b32 %r2695, %r2694, %r1218;
and.b32 %r2696, %r2695, %r3274;
setp.ne.s32 %p562, %r2696, 0;
selp.u32 %r2697, 1, 0, %p562;
cvt.u32.u16 %r2698, %rs474;
bfi.b32 %r2699, %r2698, %r2697, 1, 8;
cvt.u16.u32 %rs474, %r2699;
add.s32 %r3120, %r3120, -1;
setp.ne.s32 %p563, %r3120, 0;
mov.u32 %r3734, %r3755;
@%p563 bra $L__BB4_487;
setp.gt.u32 %p564, %r3111, 191;
mov.u32 %r3120, 0;
mov.u32 %r3734, %r2694;
@%p564 bra $L__BB4_487;
add.s32 %r2703, %r3111, 17477;
cvt.u64.u32 %rd256, %r2703;
add.s64 %rd257, %rd256, %rd5;
add.s64 %rd258, %rd1, %rd257;
st.global.u8 [%rd258], %rs474;
add.s32 %r3111, %r3111, 1;
mov.u16 %rs474, 0;
mov.u32 %r3120, 8;
mov.u32 %r3734, %r3755;
$L__BB4_487:
setp.eq.s32 %p565, %r1219, 1;
mov.u32 %r3755, %r3734;
mov.u32 %r3733, %r1218;
@%p565 bra $L__BB4_495;
add.s32 %r3733, %r3723, -2;
mov.u32 %r2704, 1;
shl.b32 %r2705, %r2704, %r3733;
and.b32 %r2706, %r2705, %r3274;
setp.ne.s32 %p566, %r2706, 0;
selp.u32 %r2707, 1, 0, %p566;
cvt.u32.u16 %r2708, %rs474;
bfi.b32 %r2709, %r2708, %r2707, 1, 8;
cvt.u16.u32 %rs474, %r2709;
add.s32 %r3120, %r3120, -1;
setp.ne.s32 %p567, %r3120, 0;
mov.u32 %r3729, %r3734;
@%p567 bra $L__BB4_491;
setp.gt.u32 %p568, %r3111, 191;
mov.u32 %r3120, 0;
mov.u32 %r3729, %r2704;
@%p568 bra $L__BB4_491;
add.s32 %r2712, %r3111, 17477;
cvt.u64.u32 %rd259, %r2712;
add.s64 %rd260, %rd259, %rd5;
add.s64 %rd261, %rd1, %rd260;
and.b16 %rs407, %rs474, 255;
st.global.u8 [%rd261], %rs474;
add.s32 %r3111, %r3111, 1;
setp.eq.s16 %p569, %rs407, 255;
selp.b32 %r3120, 7, 8, %p569;
mov.u16 %rs474, 0;
mov.u32 %r3729, %r3734;
$L__BB4_491:
setp.eq.s32 %p570, %r1219, 2;
mov.u32 %r3755, %r3729;
mov.u32 %r3734, %r3729;
@%p570 bra $L__BB4_495;
add.s32 %r3733, %r3723, -3;
mov.u32 %r2713, 1;
shl.b32 %r2714, %r2713, %r3733;
and.b32 %r2715, %r2714, %r3274;
setp.ne.s32 %p571, %r2715, 0;
selp.u32 %r2716, 1, 0, %p571;
cvt.u32.u16 %r2717, %rs474;
bfi.b32 %r2718, %r2717, %r2716, 1, 8;
cvt.u16.u32 %rs474, %r2718;
add.s32 %r3120, %r3120, -1;
setp.ne.s32 %p572, %r3120, 0;
mov.u32 %r3755, %r3729;
mov.u32 %r3734, %r3729;
@%p572 bra $L__BB4_495;
setp.gt.u32 %p573, %r3111, 191;
mov.u32 %r3120, 0;
mov.u32 %r3755, %r2713;
mov.u32 %r3734, %r2713;
@%p573 bra $L__BB4_495;
add.s32 %r2723, %r3111, 17477;
cvt.u64.u32 %rd262, %r2723;
add.s64 %rd263, %rd262, %rd5;
add.s64 %rd264, %rd1, %rd263;
and.b16 %rs410, %rs474, 255;
st.global.u8 [%rd264], %rs474;
add.s32 %r3111, %r3111, 1;
setp.eq.s16 %p574, %rs410, 255;
selp.b32 %r3120, 7, 8, %p574;
mov.u16 %rs474, 0;
mov.u32 %r3755, %r3729;
mov.u32 %r3734, %r3729;
$L__BB4_495:
setp.lt.u32 %p575, %r1218, 3;
@%p575 bra $L__BB4_510;
mov.u32 %r3755, %r3734;
$L__BB4_497:
add.s32 %r2724, %r3733, -1;
mov.u32 %r2725, 1;
shl.b32 %r2726, %r2725, %r2724;
and.b32 %r2727, %r2726, %r3274;
setp.ne.s32 %p576, %r2727, 0;
selp.u32 %r2728, 1, 0, %p576;
cvt.u32.u16 %r2729, %rs474;
bfi.b32 %r3743, %r2729, %r2728, 1, 8;
add.s32 %r3742, %r3120, -1;
setp.ne.s32 %p577, %r3742, 0;
mov.u32 %r3744, %r3755;
@%p577 bra $L__BB4_500;
setp.gt.u32 %p578, %r3111, 191;
mov.u32 %r3742, 0;
mov.u32 %r3744, %r2725;
@%p578 bra $L__BB4_500;
cvt.u16.u32 %rs411, %r3743;
and.b16 %rs412, %rs411, 255;
add.s32 %r2733, %r3111, 17477;
cvt.u64.u32 %rd265, %r2733;
add.s64 %rd266, %rd265, %rd5;
add.s64 %rd267, %rd1, %rd266;
st.global.u8 [%rd267], %rs411;
add.s32 %r3111, %r3111, 1;
setp.eq.s16 %p579, %rs412, 255;
selp.b32 %r3742, 7, 8, %p579;
mov.u32 %r3743, 0;
mov.u32 %r3744, %r3755;
$L__BB4_500:
add.s32 %r2734, %r3733, -2;
shl.b32 %r2736, %r2725, %r2734;
and.b32 %r2737, %r2736, %r3274;
setp.ne.s32 %p580, %r2737, 0;
and.b32 %r2738, %r3743, 127;
selp.u32 %r2739, 1, 0, %p580;
bfi.b32 %r3747, %r2738, %r2739, 1, 7;
add.s32 %r3746, %r3742, -1;
setp.ne.s32 %p581, %r3746, 0;
mov.u32 %r3748, %r3744;
@%p581 bra $L__BB4_503;
setp.gt.u32 %p582, %r3111, 191;
mov.u32 %r3748, 1;
mov.u32 %r3746, 0;
@%p582 bra $L__BB4_503;
cvt.u16.u32 %rs413, %r3747;
and.b16 %rs414, %rs413, 255;
add.s32 %r2743, %r3111, 17477;
cvt.u64.u32 %rd268, %r2743;
add.s64 %rd269, %rd268, %rd5;
add.s64 %rd270, %rd1, %rd269;
st.global.u8 [%rd270], %rs413;
add.s32 %r3111, %r3111, 1;
setp.eq.s16 %p583, %rs414, 255;
selp.b32 %r3746, 7, 8, %p583;
mov.u32 %r3747, 0;
mov.u32 %r3748, %r3744;
$L__BB4_503:
add.s32 %r2744, %r3733, -3;
mov.u32 %r2745, 1;
shl.b32 %r2746, %r2745, %r2744;
and.b32 %r2747, %r2746, %r3274;
setp.ne.s32 %p584, %r2747, 0;
and.b32 %r2748, %r3747, 127;
selp.u32 %r2749, 1, 0, %p584;
bfi.b32 %r3751, %r2748, %r2749, 1, 7;
add.s32 %r3750, %r3746, -1;
setp.ne.s32 %p585, %r3750, 0;
mov.u32 %r3752, %r3748;
@%p585 bra $L__BB4_506;
setp.gt.u32 %p586, %r3111, 191;
mov.u32 %r3750, 0;
mov.u32 %r3752, %r2745;
@%p586 bra $L__BB4_506;
cvt.u16.u32 %rs415, %r3751;
and.b16 %rs416, %rs415, 255;
add.s32 %r2753, %r3111, 17477;
cvt.u64.u32 %rd271, %r2753;
add.s64 %rd272, %rd271, %rd5;
add.s64 %rd273, %rd1, %rd272;
st.global.u8 [%rd273], %rs415;
add.s32 %r3111, %r3111, 1;
setp.eq.s16 %p587, %rs416, 255;
selp.b32 %r3750, 7, 8, %p587;
mov.u32 %r3751, 0;
mov.u32 %r3752, %r3748;
$L__BB4_506:
add.s32 %r3733, %r3733, -4;
shl.b32 %r2755, %r2745, %r3733;
and.b32 %r2756, %r2755, %r3274;
setp.ne.s32 %p588, %r2756, 0;
and.b32 %r2757, %r3751, 127;
selp.u32 %r2758, 1, 0, %p588;
bfi.b32 %r2759, %r2757, %r2758, 1, 15;
cvt.u16.u32 %rs474, %r2759;
add.s32 %r3120, %r3750, -1;
setp.ne.s32 %p589, %r3120, 0;
mov.u32 %r3755, %r3752;
@%p589 bra $L__BB4_509;
setp.gt.u32 %p590, %r3111, 191;
mov.u32 %r3755, 1;
mov.u32 %r3120, 0;
@%p590 bra $L__BB4_509;
add.s32 %r2762, %r3111, 17477;
cvt.u64.u32 %rd274, %r2762;
add.s64 %rd275, %rd274, %rd5;
add.s64 %rd276, %rd1, %rd275;
and.b16 %rs418, %rs474, 255;
st.global.u8 [%rd276], %rs474;
add.s32 %r3111, %r3111, 1;
setp.eq.s16 %p591, %rs418, 255;
selp.b32 %r3120, 7, 8, %p591;
mov.u16 %rs474, 0;
mov.u32 %r3755, %r3752;
$L__BB4_509:
setp.ne.s32 %p592, %r3733, 0;
@%p592 bra $L__BB4_497;
$L__BB4_510:
add.s32 %r2764, %r3275, -1;
setp.eq.s32 %p593, %r3275, 0;
mov.u32 %r3274, 0;
selp.b32 %r3275, 0, %r2764, %p593;
setp.lt.u32 %p594, %r3275, 3;
mov.u32 %r3759, %r3274;
@%p594 bra $L__BB4_513;
setp.lt.u32 %p595, %r3275, 6;
mov.u32 %r3759, 1;
@%p595 bra $L__BB4_513;
setp.lt.u32 %p596, %r3275, 9;
setp.eq.s32 %p597, %r3275, 11;
selp.b32 %r2766, 4, 5, %p597;
setp.lt.u32 %p598, %r3275, 11;
selp.b32 %r2767, 3, %r2766, %p598;
selp.b32 %r3759, 2, %r2767, %p596;
$L__BB4_513:
mov.u32 %r2769, 1;
shl.b32 %r3276, %r2769, %r3759;
mov.u32 %r3277, %r3755;
bra.uni $L__BB4_522;
$L__BB4_514:
add.s32 %r3274, %r3274, 1;
setp.lt.u32 %p599, %r3274, %r3276;
@%p599 bra $L__BB4_522;
shl.b16 %rs419, %rs474, 1;
or.b16 %rs474, %rs419, 1;
add.s32 %r3120, %r3120, -1;
setp.ne.s32 %p600, %r3120, 0;
mov.u32 %r3762, %r3277;
@%p600 bra $L__BB4_518;
setp.gt.u32 %p601, %r3111, 191;
mov.u32 %r3762, 1;
mov.u32 %r3120, 0;
@%p601 bra $L__BB4_518;
and.b16 %rs421, %rs474, 255;
st.global.u8 [%rd23], %rs474;
add.s32 %r3111, %r3111, 1;
setp.eq.s16 %p602, %rs421, 255;
selp.b32 %r3120, 7, 8, %p602;
mov.u16 %rs474, 0;
mov.u32 %r3762, %r3277;
$L__BB4_518:
add.s32 %r2773, %r3275, 1;
min.u32 %r3275, %r2773, 12;
setp.lt.u32 %p603, %r3275, 3;
mov.u32 %r3274, 0;
mov.u32 %r3763, %r3274;
@%p603 bra $L__BB4_521;
setp.lt.u32 %p604, %r3275, 6;
mov.u32 %r3763, 1;
@%p604 bra $L__BB4_521;
setp.lt.u32 %p605, %r3275, 9;
setp.eq.s32 %p606, %r3275, 11;
selp.b32 %r2775, 4, 5, %p606;
setp.lt.u32 %p607, %r3275, 11;
selp.b32 %r2776, 3, %r2775, %p607;
selp.b32 %r3763, 2, %r2776, %p605;
$L__BB4_521:
mov.u32 %r2778, 1;
shl.b32 %r3276, %r2778, %r3763;
mov.u32 %r3277, %r3762;
$L__BB4_522:
and.b16 %rs422, %rs159, 15;
cvt.u32.u16 %r1302, %rs422;
and.b32 %r2779, %r3696, 1;
setp.eq.b32 %p608, %r2779, 1;
mov.pred %p609, 0;
xor.pred %p610, %p608, %p609;
not.pred %p611, %p610;
mov.u32 %r3784, %r3687;
@%p611 bra $L__BB4_529;
and.b32 %r2780, %r1302, 1;
sub.s32 %r3770, %r1188, %r2780;
setp.eq.s32 %p612, %r3770, 0;
mov.u32 %r3784, %r3687;
@%p612 bra $L__BB4_529;
mov.u32 %r2781, -1;
shl.b32 %r2782, %r2781, %r3770;
not.b32 %r2783, %r2782;
and.b32 %r3771, %r3690, %r2783;
$L__BB4_525:
setp.gt.u32 %p613, %r3654, 17476;
mov.u32 %r3784, 1;
@%p613 bra $L__BB4_529;
sub.s32 %r2785, %r3653, %r3655;
min.u32 %r2786, %r2785, %r3770;
setp.eq.s32 %p614, %r2786, 32;
mov.u32 %r2787, -1;
shl.b32 %r2788, %r2787, %r2786;
not.b32 %r2789, %r2788;
selp.b32 %r2790, -1, %r2789, %p614;
and.b32 %r2791, %r2790, %r3771;
shl.b32 %r2792, %r2791, %r3655;
or.b32 %r3656, %r2792, %r3656;
add.s32 %r3655, %r2786, %r3655;
shr.u32 %r3771, %r3771, %r2786;
sub.s32 %r3770, %r3770, %r2786;
setp.lt.u32 %p615, %r3655, %r3653;
@%p615 bra $L__BB4_528;
cvt.u64.u32 %rd277, %r3654;
add.s64 %rd278, %rd277, %rd5;
add.s64 %rd279, %rd1, %rd278;
st.global.u8 [%rd279], %r3656;
add.s32 %r3654, %r3654, 1;
setp.eq.s32 %p616, %r3656, 255;
selp.b32 %r3653, 7, 8, %p616;
mov.u32 %r3655, 0;
mov.u32 %r3656, %r3655;
$L__BB4_528:
setp.ne.s32 %p617, %r3770, 0;
mov.u32 %r3784, %r3687;
@%p617 bra $L__BB4_525;
$L__BB4_529:
and.b32 %r1326, %r3696, 2;
setp.eq.s32 %p618, %r1326, 0;
mov.u32 %r3799, %r3784;
@%p618 bra $L__BB4_536;
shr.u32 %r2795, %r1302, 1;
and.b32 %r2796, %r2795, 1;
sub.s32 %r3785, %r1188, %r2796;
setp.eq.s32 %p619, %r3785, 0;
mov.u32 %r3799, %r3784;
@%p619 bra $L__BB4_536;
mov.u32 %r2797, -1;
shl.b32 %r2798, %r2797, %r3785;
not.b32 %r2799, %r2798;
and.b32 %r3786, %r3694, %r2799;
$L__BB4_532:
setp.gt.u32 %p620, %r3654, 17476;
mov.u32 %r3799, 1;
@%p620 bra $L__BB4_536;
sub.s32 %r2801, %r3653, %r3655;
min.u32 %r2802, %r2801, %r3785;
setp.eq.s32 %p621, %r2802, 32;
mov.u32 %r2803, -1;
shl.b32 %r2804, %r2803, %r2802;
not.b32 %r2805, %r2804;
selp.b32 %r2806, -1, %r2805, %p621;
and.b32 %r2807, %r2806, %r3786;
shl.b32 %r2808, %r2807, %r3655;
or.b32 %r3656, %r2808, %r3656;
add.s32 %r3655, %r2802, %r3655;
shr.u32 %r3786, %r3786, %r2802;
sub.s32 %r3785, %r3785, %r2802;
setp.lt.u32 %p622, %r3655, %r3653;
@%p622 bra $L__BB4_535;
cvt.u64.u32 %rd280, %r3654;
add.s64 %rd281, %rd280, %rd5;
add.s64 %rd282, %rd1, %rd281;
st.global.u8 [%rd282], %r3656;
add.s32 %r3654, %r3654, 1;
setp.eq.s32 %p623, %r3656, 255;
selp.b32 %r3653, 7, 8, %p623;
mov.u32 %r3655, 0;
mov.u32 %r3656, %r3655;
$L__BB4_535:
setp.ne.s32 %p624, %r3785, 0;
mov.u32 %r3799, %r3784;
@%p624 bra $L__BB4_532;
$L__BB4_536:
and.b32 %r1350, %r3696, 4;
setp.eq.s32 %p625, %r1350, 0;
mov.u32 %r3814, %r3799;
@%p625 bra $L__BB4_543;
shr.u32 %r2811, %r1302, 2;
and.b32 %r2812, %r2811, 1;
sub.s32 %r3800, %r1188, %r2812;
setp.eq.s32 %p626, %r3800, 0;
mov.u32 %r3814, %r3799;
@%p626 bra $L__BB4_543;
mov.u32 %r2813, -1;
shl.b32 %r2814, %r2813, %r3800;
not.b32 %r2815, %r2814;
and.b32 %r3801, %r3699, %r2815;
$L__BB4_539:
setp.gt.u32 %p627, %r3654, 17476;
mov.u32 %r3814, 1;
@%p627 bra $L__BB4_543;
sub.s32 %r2817, %r3653, %r3655;
min.u32 %r2818, %r2817, %r3800;
setp.eq.s32 %p628, %r2818, 32;
mov.u32 %r2819, -1;
shl.b32 %r2820, %r2819, %r2818;
not.b32 %r2821, %r2820;
selp.b32 %r2822, -1, %r2821, %p628;
and.b32 %r2823, %r2822, %r3801;
shl.b32 %r2824, %r2823, %r3655;
or.b32 %r3656, %r2824, %r3656;
add.s32 %r3655, %r2818, %r3655;
shr.u32 %r3801, %r3801, %r2818;
sub.s32 %r3800, %r3800, %r2818;
setp.lt.u32 %p629, %r3655, %r3653;
@%p629 bra $L__BB4_542;
cvt.u64.u32 %rd283, %r3654;
add.s64 %rd284, %rd283, %rd5;
add.s64 %rd285, %rd1, %rd284;
st.global.u8 [%rd285], %r3656;
add.s32 %r3654, %r3654, 1;
setp.eq.s32 %p630, %r3656, 255;
selp.b32 %r3653, 7, 8, %p630;
mov.u32 %r3655, 0;
mov.u32 %r3656, %r3655;
$L__BB4_542:
setp.ne.s32 %p631, %r3800, 0;
mov.u32 %r3814, %r3799;
@%p631 bra $L__BB4_539;
$L__BB4_543:
and.b32 %r1374, %r3696, 8;
setp.eq.s32 %p632, %r1374, 0;
mov.u32 %r3829, %r3814;
@%p632 bra $L__BB4_550;
shr.u32 %r2827, %r1302, 3;
sub.s32 %r3815, %r1188, %r2827;
setp.eq.s32 %p633, %r3815, 0;
mov.u32 %r3829, %r3814;
@%p633 bra $L__BB4_550;
mov.u32 %r2828, -1;
shl.b32 %r2829, %r2828, %r3815;
not.b32 %r2830, %r2829;
and.b32 %r3816, %r3704, %r2830;
$L__BB4_546:
setp.gt.u32 %p634, %r3654, 17476;
mov.u32 %r3829, 1;
@%p634 bra $L__BB4_550;
sub.s32 %r2832, %r3653, %r3655;
min.u32 %r2833, %r2832, %r3815;
setp.eq.s32 %p635, %r2833, 32;
mov.u32 %r2834, -1;
shl.b32 %r2835, %r2834, %r2833;
not.b32 %r2836, %r2835;
selp.b32 %r2837, -1, %r2836, %p635;
and.b32 %r2838, %r2837, %r3816;
shl.b32 %r2839, %r2838, %r3655;
or.b32 %r3656, %r2839, %r3656;
add.s32 %r3655, %r2833, %r3655;
shr.u32 %r3816, %r3816, %r2833;
sub.s32 %r3815, %r3815, %r2833;
setp.lt.u32 %p636, %r3655, %r3653;
@%p636 bra $L__BB4_549;
cvt.u64.u32 %rd286, %r3654;
add.s64 %rd287, %rd286, %rd5;
add.s64 %rd288, %rd1, %rd287;
st.global.u8 [%rd288], %r3656;
add.s32 %r3654, %r3654, 1;
setp.eq.s32 %p637, %r3656, 255;
selp.b32 %r3653, 7, 8, %p637;
mov.u32 %r3655, 0;
mov.u32 %r3656, %r3655;
$L__BB4_549:
setp.ne.s32 %p638, %r3815, 0;
mov.u32 %r3829, %r3814;
@%p638 bra $L__BB4_546;
$L__BB4_550:
mov.u32 %r3001, _ZZ55 j2k_htj2k_encode_codeblocks_multi_input_cleanup_64E13cleanup_e_val;
and.b32 %r2842, %r3693, 255;
and.b32 %r2843, %r3561, 255;
setp.lt.u32 %p639, %r2842, %r2843;
cvt.u16.u32 %rs423, %r3693;
selp.b16 %rs424, %rs158, %rs423, %p639;
st.shared.u8 [%r1137+1], %rs424;
ld.shared.u8 %rs537, [%r1137+3];
setp.gt.u16 %p640, %rs156, %rs537;
add.s32 %r2844, %r878, 3;
selp.b32 %r2845, %r3542, %r2844, %p640;
add.s32 %r2847, %r3001, %r2845;
ld.shared.u8 %rs539, [%r2847];
cvt.u16.u32 %rs538, %r3703;
shr.u32 %r2848, %r1326, 1;
or.b32 %r2849, %r1141, %r2848;
st.shared.u8 [%r1137+2], %r3703;
st.shared.u8 [%r1139+1], %r2849;
ld.shared.u8 %rs535, [%r1139+3];
mul.wide.u16 %r2850, %rs535, 4;
add.s32 %r2851, %r2850, %r1140;
shr.u32 %r2852, %r1374, 3;
cvt.u16.u32 %rs536, %r2852;
st.shared.u8 [%r1139+2], %r2852;
shr.u32 %r2853, %r1374, 2;
shr.u32 %r2854, %r1350, 1;
or.b32 %r2855, %r2853, %r2854;
or.b32 %r3543, %r2855, %r2851;
mul.lo.s32 %r2856, %r928, 6;
setp.gt.s32 %p641, %r928, 0;
selp.b32 %r2857, %r2856, 0, %p641;
cvt.u64.u32 %rd289, %r2857;
add.s64 %rd24, %rd17, %rd289;
ld.global.u8 %rs186, [%rd24+1];
add.s32 %r2858, %r2857, 2;
cvt.u64.u32 %rd290, %r2858;
add.s64 %rd291, %rd17, %rd290;
ld.global.u8 %rs187, [%rd291];
ld.global.u8 %rs188, [%rd291+1];
mul.lo.s32 %r2859, %r1189, 6;
setp.gt.s32 %p642, %r1189, 0;
selp.b32 %r2860, %r2859, 0, %p642;
cvt.u64.u32 %rd292, %r2860;
add.s64 %rd293, %rd17, %rd292;
ld.global.u8 %rs189, [%rd293];
ld.global.u8 %rs190, [%rd293+1];
add.s32 %r2861, %r2860, 2;
cvt.u64.u32 %rd294, %r2861;
add.s64 %rd295, %rd17, %rd294;
ld.global.u8 %rs191, [%rd295];
ld.global.u8 %rs192, [%rd295+1];
setp.eq.s16 %p643, %rs186, 0;
mov.u32 %r3841, %r3719;
@%p643 bra $L__BB4_557;
ld.global.u8 %r3831, [%rd24];
cvt.u32.u16 %r3830, %rs186;
$L__BB4_552:
mov.u32 %r1401, %r3830;
setp.gt.u32 %p644, %r3508, 2879;
mov.u32 %r3841, 1;
@%p644 bra $L__BB4_557;
mov.u32 %r2863, 8;
sub.s32 %r2864, %r2863, %r3510;
sub.s32 %r2865, %r2864, %r3509;
min.u32 %r2866, %r2865, %r1401;
setp.eq.s32 %p645, %r2866, 32;
mov.u32 %r2867, -1;
shl.b32 %r2868, %r2867, %r2866;
not.b32 %r2869, %r2868;
selp.b32 %r2870, -1, %r2869, %p645;
and.b32 %r2871, %r2870, %r3831;
shl.b32 %r2872, %r2871, %r3509;
cvt.u16.u32 %rs425, %r2872;
or.b16 %rs532, %rs532, %rs425;
add.s32 %r3509, %r2866, %r3509;
sub.s32 %r3830, %r1401, %r2866;
shr.u32 %r3831, %r3831, %r2866;
setp.gt.u32 %p646, %r2865, %r1401;
@%p646 bra $L__BB4_556;
setp.ne.s32 %p647, %r3510, 0;
mov.u32 %r3510, 0;
and.b16 %rs426, %rs532, 255;
setp.ne.s16 %p648, %rs426, 127;
and.pred %p649, %p647, %p648;
@%p649 bra $L__BB4_556;
mov.u32 %r2875, 20548;
sub.s32 %r2876, %r2875, %r3508;
cvt.u64.u32 %rd296, %r2876;
add.s64 %rd297, %rd296, %rd5;
add.s64 %rd298, %rd1, %rd297;
st.global.u8 [%rd298], %rs532;
add.s32 %r3508, %r3508, 1;
setp.gt.u16 %p650, %rs426, 143;
selp.u32 %r3510, 1, 0, %p650;
mov.u16 %rs532, 0;
mov.u32 %r3509, 0;
$L__BB4_556:
setp.ne.s32 %p651, %r3830, 0;
mov.u32 %r3841, %r3719;
@%p651 bra $L__BB4_552;
$L__BB4_557:
setp.eq.s16 %p652, %rs190, 0;
mov.u32 %r3853, %r3841;
@%p652 bra $L__BB4_564;
cvt.u32.u16 %r2877, %rs189;
and.b32 %r3843, %r2877, 255;
cvt.u32.u16 %r2878, %rs190;
and.b32 %r3842, %r2878, 255;
$L__BB4_559:
mov.u32 %r1420, %r3842;
setp.gt.u32 %p653, %r3508, 2879;
mov.u32 %r3853, 1;
@%p653 bra $L__BB4_564;
mov.u32 %r2880, 8;
sub.s32 %r2881, %r2880, %r3510;
sub.s32 %r2882, %r2881, %r3509;
min.u32 %r2883, %r2882, %r1420;
setp.eq.s32 %p654, %r2883, 32;
mov.u32 %r2884, -1;
shl.b32 %r2885, %r2884, %r2883;
not.b32 %r2886, %r2885;
selp.b32 %r2887, -1, %r2886, %p654;
and.b32 %r2888, %r2887, %r3843;
shl.b32 %r2889, %r2888, %r3509;
cvt.u16.u32 %rs430, %r2889;
or.b16 %rs532, %rs532, %rs430;
add.s32 %r3509, %r2883, %r3509;
sub.s32 %r3842, %r1420, %r2883;
shr.u32 %r3843, %r3843, %r2883;
setp.gt.u32 %p655, %r2882, %r1420;
@%p655 bra $L__BB4_563;
setp.ne.s32 %p656, %r3510, 0;
mov.u32 %r3510, 0;
and.b16 %rs431, %rs532, 255;
setp.ne.s16 %p657, %rs431, 127;
and.pred %p658, %p656, %p657;
@%p658 bra $L__BB4_563;
mov.u32 %r2892, 20548;
sub.s32 %r2893, %r2892, %r3508;
cvt.u64.u32 %rd299, %r2893;
add.s64 %rd300, %rd299, %rd5;
add.s64 %rd301, %rd1, %rd300;
st.global.u8 [%rd301], %rs532;
add.s32 %r3508, %r3508, 1;
setp.gt.u16 %p659, %rs431, 143;
selp.u32 %r3510, 1, 0, %p659;
mov.u16 %rs532, 0;
mov.u32 %r3509, 0;
$L__BB4_563:
setp.ne.s32 %p660, %r3842, 0;
mov.u32 %r3853, %r3841;
@%p660 bra $L__BB4_559;
$L__BB4_564:
setp.eq.s16 %p661, %rs188, 0;
mov.u32 %r3865, %r3853;
@%p661 bra $L__BB4_571;
cvt.u32.u16 %r2894, %rs188;
and.b32 %r3854, %r2894, 255;
cvt.u32.u16 %r2895, %rs187;
and.b32 %r3855, %r2895, 255;
$L__BB4_566:
mov.u32 %r1439, %r3854;
setp.gt.u32 %p662, %r3508, 2879;
mov.u32 %r3865, 1;
@%p662 bra $L__BB4_571;
mov.u32 %r2897, 8;
sub.s32 %r2898, %r2897, %r3510;
sub.s32 %r2899, %r2898, %r3509;
min.u32 %r2900, %r2899, %r1439;
setp.eq.s32 %p663, %r2900, 32;
mov.u32 %r2901, -1;
shl.b32 %r2902, %r2901, %r2900;
not.b32 %r2903, %r2902;
selp.b32 %r2904, -1, %r2903, %p663;
and.b32 %r2905, %r2904, %r3855;
shl.b32 %r2906, %r2905, %r3509;
cvt.u16.u32 %rs435, %r2906;
or.b16 %rs532, %rs532, %rs435;
add.s32 %r3509, %r2900, %r3509;
sub.s32 %r3854, %r1439, %r2900;
shr.u32 %r3855, %r3855, %r2900;
setp.gt.u32 %p664, %r2899, %r1439;
@%p664 bra $L__BB4_570;
setp.ne.s32 %p665, %r3510, 0;
mov.u32 %r3510, 0;
and.b16 %rs436, %rs532, 255;
setp.ne.s16 %p666, %rs436, 127;
and.pred %p667, %p665, %p666;
@%p667 bra $L__BB4_570;
mov.u32 %r2909, 20548;
sub.s32 %r2910, %r2909, %r3508;
cvt.u64.u32 %rd302, %r2910;
add.s64 %rd303, %rd302, %rd5;
add.s64 %rd304, %rd1, %rd303;
st.global.u8 [%rd304], %rs532;
add.s32 %r3508, %r3508, 1;
setp.gt.u16 %p668, %rs436, 143;
selp.u32 %r3510, 1, 0, %p668;
mov.u16 %rs532, 0;
mov.u32 %r3509, 0;
$L__BB4_570:
setp.ne.s32 %p669, %r3854, 0;
mov.u32 %r3865, %r3853;
@%p669 bra $L__BB4_566;
$L__BB4_571:
setp.eq.s16 %p670, %rs192, 0;
mov.u32 %r3511, %r3865;
@%p670 bra $L__BB4_578;
cvt.u32.u16 %r2911, %rs191;
and.b32 %r3867, %r2911, 255;
cvt.u32.u16 %r2912, %rs192;
and.b32 %r3866, %r2912, 255;
$L__BB4_573:
mov.u32 %r1458, %r3866;
setp.gt.u32 %p671, %r3508, 2879;
mov.u32 %r3511, 1;
@%p671 bra $L__BB4_578;
mov.u32 %r2914, 8;
sub.s32 %r2915, %r2914, %r3510;
sub.s32 %r2916, %r2915, %r3509;
min.u32 %r2917, %r2916, %r1458;
setp.eq.s32 %p672, %r2917, 32;
mov.u32 %r2918, -1;
shl.b32 %r2919, %r2918, %r2917;
not.b32 %r2920, %r2919;
selp.b32 %r2921, -1, %r2920, %p672;
and.b32 %r2922, %r2921, %r3867;
shl.b32 %r2923, %r2922, %r3509;
cvt.u16.u32 %rs440, %r2923;
or.b16 %rs532, %rs532, %rs440;
add.s32 %r3509, %r2917, %r3509;
sub.s32 %r3866, %r1458, %r2917;
shr.u32 %r3867, %r3867, %r2917;
setp.gt.u32 %p673, %r2916, %r1458;
@%p673 bra $L__BB4_577;
setp.ne.s32 %p674, %r3510, 0;
mov.u32 %r3510, 0;
and.b16 %rs441, %rs532, 255;
setp.ne.s16 %p675, %rs441, 127;
and.pred %p676, %p674, %p675;
@%p676 bra $L__BB4_577;
mov.u32 %r2926, 20548;
sub.s32 %r2927, %r2926, %r3508;
cvt.u64.u32 %rd305, %r2927;
add.s64 %rd306, %rd305, %rd5;
add.s64 %rd307, %rd1, %rd306;
st.global.u8 [%rd307], %rs532;
add.s32 %r3508, %r3508, 1;
setp.gt.u16 %p677, %rs441, 143;
selp.u32 %r3510, 1, 0, %p677;
mov.u16 %rs532, 0;
mov.u32 %r3509, 0;
$L__BB4_577:
setp.ne.s32 %p678, %r3866, 0;
mov.u32 %r3511, %r3865;
@%p678 bra $L__BB4_573;
$L__BB4_578:
add.s64 %rd338, %rd338, 16;
add.s32 %r3528, %r3528, 4;
setp.lt.u32 %p679, %r3528, 64;
@%p679 bra $L__BB4_348;
add.s32 %r3512, %r3512, 2;
setp.lt.u32 %p680, %r3512, 64;
add.s64 %rd337, %rd337, 1;
@%p680 bra $L__BB4_347;
setp.eq.s32 %p681, %r3274, 0;
mov.u32 %r3880, %r3277;
@%p681 bra $L__BB4_584;
shl.b16 %rs444, %rs474, 1;
or.b16 %rs474, %rs444, 1;
add.s32 %r3120, %r3120, -1;
setp.ne.s32 %p682, %r3120, 0;
mov.u32 %r3880, %r3277;
@%p682 bra $L__BB4_584;
setp.gt.u32 %p683, %r3111, 191;
mov.u32 %r3880, 1;
mov.u32 %r3120, 0;
@%p683 bra $L__BB4_584;
add.s32 %r2930, %r3111, 17477;
cvt.u64.u32 %rd308, %r2930;
add.s64 %rd309, %rd308, %rd5;
add.s64 %rd310, %rd1, %rd309;
and.b16 %rs446, %rs474, 255;
st.global.u8 [%rd310], %rs474;
add.s32 %r3111, %r3111, 1;
setp.eq.s16 %p684, %rs446, 255;
selp.b32 %r3120, 7, 8, %p684;
mov.u16 %rs474, 0;
mov.u32 %r3880, %r3277;
$L__BB4_584:
cvt.u32.u16 %r2931, %rs474;
and.b32 %r2932, %r2931, 255;
shl.b32 %r2933, %r2932, %r3120;
cvt.u16.u32 %rs211, %r2933;
mov.u32 %r2934, -1;
shl.b32 %r2935, %r2934, %r3509;
not.b32 %r2936, %r2935;
mov.u32 %r2937, 255;
and.b32 %r2938, %r2936, 255;
setp.eq.s32 %p685, %r3509, 0;
selp.b32 %r1483, 0, %r2938, %p685;
shl.b32 %r1484, %r2937, %r3120;
and.b32 %r2939, %r1484, 255;
or.b32 %r2940, %r2939, %r1483;
setp.eq.s32 %p686, %r2940, 0;
mov.u32 %r3882, %r3511;
mov.u32 %r3884, %r3880;
@%p686 bra $L__BB4_590;
or.b16 %rs212, %rs532, %rs211;
and.b16 %rs447, %rs212, 255;
xor.b16 %rs448, %rs212, %rs211;
cvt.u32.u16 %r2941, %rs448;
and.b32 %r2942, %r1484, %r2941;
and.b32 %r2943, %r2942, 255;
xor.b16 %rs449, %rs212, %rs532;
cvt.u32.u16 %r2944, %rs449;
and.b32 %r2945, %r1483, %r2944;
or.b32 %r2946, %r2943, %r2945;
setp.eq.s32 %p687, %r2946, 0;
setp.ne.s16 %p688, %rs447, 255;
and.pred %p689, %p688, %p687;
setp.gt.u32 %p690, %r3508, 1;
and.pred %p691, %p690, %p689;
add.s32 %r2947, %r3111, 17477;
cvt.u64.u32 %rd311, %r2947;
add.s64 %rd312, %rd311, %rd5;
add.s64 %rd27, %rd1, %rd312;
@%p691 bra $L__BB4_588;
bra.uni $L__BB4_586;
$L__BB4_588:
setp.gt.u32 %p695, %r3111, 191;
mov.u32 %r3884, 1;
mov.u32 %r3882, %r3511;
@%p695 bra $L__BB4_590;
st.global.u8 [%rd27], %rs212;
add.s32 %r3111, %r3111, 1;
mov.u32 %r3882, %r3511;
mov.u32 %r3884, %r3880;
bra.uni $L__BB4_590;
$L__BB4_586:
setp.gt.u32 %p692, %r3111, 191;
setp.gt.u32 %p693, %r3508, 2879;
or.pred %p694, %p693, %p692;
mov.u32 %r3882, 1;
mov.u32 %r3884, %r3882;
@%p694 bra $L__BB4_590;
st.global.u8 [%rd27], %rs211;
add.s32 %r3111, %r3111, 1;
mov.u32 %r2950, 20548;
sub.s32 %r2951, %r2950, %r3508;
cvt.u64.u32 %rd313, %r2951;
add.s64 %rd314, %rd313, %rd5;
add.s64 %rd315, %rd1, %rd314;
st.global.u8 [%rd315], %rs532;
add.s32 %r3508, %r3508, 1;
mov.u32 %r3882, %r3511;
mov.u32 %r3884, %r3880;
$L__BB4_590:
setp.eq.s32 %p696, %r3655, 0;
@%p696 bra $L__BB4_594;
sub.s32 %r2953, %r3653, %r3655;
mov.u32 %r2954, -1;
shl.b32 %r2955, %r2954, %r2953;
not.b32 %r2956, %r2955;
and.b32 %r2957, %r2956, 255;
shl.b32 %r2958, %r2957, %r3655;
or.b32 %r1492, %r2958, %r3656;
setp.eq.s32 %p697, %r1492, 255;
mov.u32 %r3886, %r3829;
@%p697 bra $L__BB4_596;
setp.gt.u32 %p698, %r3654, 17476;
mov.u32 %r3886, 1;
@%p698 bra $L__BB4_596;
cvt.u64.u32 %rd316, %r3654;
add.s64 %rd317, %rd316, %rd5;
add.s64 %rd318, %rd1, %rd317;
st.global.u8 [%rd318], %r1492;
add.s32 %r3654, %r3654, 1;
mov.u32 %r3886, %r3829;
bra.uni $L__BB4_596;
$L__BB4_594:
setp.ne.s32 %p699, %r3653, 7;
mov.u32 %r3886, %r3829;
@%p699 bra $L__BB4_596;
setp.eq.s32 %p700, %r3654, 0;
add.s32 %r2960, %r3654, -1;
selp.b32 %r3654, 0, %r2960, %p700;
mov.u32 %r3886, %r3829;
$L__BB4_596:
or.b32 %r2961, %r3884, %r3882;
or.b32 %r2962, %r2961, %r3886;
setp.eq.s32 %p701, %r2962, 0;
@%p701 bra $L__BB4_598;
ld.param.u64 %rd331, [ j2k_htj2k_encode_codeblocks_multi_input_cleanup_64_param_5];
cvta.to.global.u64 %rd330, %rd331;
mov.u32 %r3002, %ctaid.x;
mul.wide.u32 %rd320, %r3002, 32;
add.s64 %rd321, %rd330, %rd320;
mov.u32 %r2964, 1;
st.global.u32 [%rd321], %r2964;
mov.u32 %r2965, 3;
st.global.u32 [%rd321+4], %r2965;
mov.u32 %r2966, 0;
st.global.u32 [%rd321+8], %r2966;
st.global.u32 [%rd321+12], %r2966;
st.global.u32 [%rd321+16], %r2966;
st.global.u32 [%rd321+20], %r2966;
st.global.u32 [%rd321+24], %r2966;
st.global.u32 [%rd321+28], %r2966;
bra.uni $L__BB4_605;
$L__BB4_598:
add.s32 %r2967, %r3111, %r3508;
add.s32 %r1497, %r2967, %r3654;
setp.lt.u32 %p702, %r1497, 2;
setp.gt.u32 %p703, %r1497, %r1506;
or.pred %p704, %p702, %p703;
@%p704 bra $L__BB4_600;
bra.uni $L__BB4_599;
$L__BB4_600:
ld.param.u64 %rd335, [ j2k_htj2k_encode_codeblocks_multi_input_cleanup_64_param_5];
cvta.to.global.u64 %rd334, %rd335;
mov.u32 %r3004, %ctaid.x;
mul.wide.u32 %rd326, %r3004, 32;
add.s64 %rd327, %rd334, %rd326;
mov.u32 %r2976, 1;
st.global.u32 [%rd327], %r2976;
mov.u32 %r2977, 4;
st.global.u32 [%rd327+4], %r2977;
mov.u32 %r2978, 0;
st.global.u32 [%rd327+8], %r2978;
st.global.u32 [%rd327+12], %r2978;
st.global.u32 [%rd327+16], %r2978;
st.global.u32 [%rd327+20], %r2978;
st.global.u32 [%rd327+24], %r2978;
st.global.u32 [%rd327+28], %r2978;
bra.uni $L__BB4_605;
$L__BB4_599:
ld.param.u64 %rd333, [ j2k_htj2k_encode_codeblocks_multi_input_cleanup_64_param_5];
cvta.to.global.u64 %rd332, %rd333;
mov.u32 %r3003, %ctaid.x;
and.b32 %r2968, %r3111, 32767;
and.b32 %r2969, %r3508, 32767;
bfi.b32 %r2970, %r2969, %r2968, 15, 15;
or.b32 %r2971, %r2970, -2147483648;
mul.wide.u32 %rd323, %r3003, 32;
add.s64 %rd324, %rd332, %rd323;
mov.u32 %r2973, 0;
st.global.u32 [%rd324], %r2973;
st.global.u32 [%rd324+4], %r2973;
st.global.u32 [%rd324+8], %r1497;
mov.u32 %r2974, 1;
st.global.u32 [%rd324+12], %r2974;
add.s32 %r2989, %r1504, -1;
st.global.u32 [%rd324+16], %r2989;
st.global.u32 [%rd324+20], %r1497;
st.global.u32 [%rd324+24], %r2973;
st.global.u32 [%rd324+28], %r2971;
bra.uni $L__BB4_605;
}
// .globl j2k_htj2k_compact_codeblocks
.visible .entry j2k_htj2k_compact_codeblocks(
.param .u64 j2k_htj2k_compact_codeblocks_param_0,
.param .u64 j2k_htj2k_compact_codeblocks_param_1,
.param .u64 j2k_htj2k_compact_codeblocks_param_2,
.param .u64 j2k_htj2k_compact_codeblocks_param_3
)
{
.reg .pred %p<16>;
.reg .b16 %rs<11>;
.reg .b32 %r<48>;
.reg .b64 %rd<23>;
ld.param.u64 %rd5, [ j2k_htj2k_compact_codeblocks_param_0];
ld.param.u64 %rd6, [ j2k_htj2k_compact_codeblocks_param_1];
ld.param.u64 %rd4, [ j2k_htj2k_compact_codeblocks_param_2];
ld.param.u64 %rd7, [ j2k_htj2k_compact_codeblocks_param_3];
cvta.to.global.u64 %rd1, %rd6;
cvta.to.global.u64 %rd2, %rd5;
mov.u32 %r32, %ctaid.x;
cvt.u64.u32 %rd3, %r32;
setp.ge.u64 %p1, %rd3, %rd7;
@%p1 bra $L__BB5_21;
cvta.to.global.u64 %rd8, %rd4;
shl.b64 %rd9, %rd3, 4;
add.s64 %rd10, %rd8, %rd9;
ld.global.u32 %r1, [%rd10];
ld.global.u32 %r2, [%rd10+4];
ld.global.u32 %r3, [%rd10+8];
ld.global.u32 %r4, [%rd10+12];
setp.lt.s32 %p2, %r4, 0;
@%p2 bra $L__BB5_5;
mov.u32 %r43, %tid.x;
setp.ge.u32 %p3, %r43, %r3;
@%p3 bra $L__BB5_21;
mov.u32 %r6, %ntid.x;
$L__BB5_4:
add.s32 %r33, %r43, %r1;
cvt.u64.u32 %rd11, %r33;
add.s64 %rd12, %rd2, %rd11;
ld.global.u8 %rs3, [%rd12];
add.s32 %r34, %r43, %r2;
cvt.u64.u32 %rd13, %r34;
add.s64 %rd14, %rd1, %rd13;
st.global.u8 [%rd14], %rs3;
add.s32 %r43, %r43, %r6;
setp.lt.u32 %p4, %r43, %r3;
@%p4 bra $L__BB5_4;
bra.uni $L__BB5_21;
$L__BB5_5:
and.b32 %r9, %r4, 32767;
shr.u32 %r35, %r4, 15;
and.b32 %r10, %r35, 32767;
add.s32 %r11, %r10, %r9;
setp.lt.u32 %p5, %r3, %r11;
@%p5 bra $L__BB5_21;
sub.s32 %r12, %r3, %r11;
mov.u32 %r44, %tid.x;
setp.ge.u32 %p6, %r44, %r3;
@%p6 bra $L__BB5_21;
setp.gt.u32 %p7, %r3, 1;
add.s32 %r14, %r12, %r9;
mov.u32 %r15, %ntid.x;
add.s32 %r36, %r1, 17477;
sub.s32 %r16, %r36, %r12;
add.s32 %r37, %r1, 20549;
sub.s32 %r38, %r37, %r9;
sub.s32 %r39, %r38, %r10;
sub.s32 %r17, %r39, %r12;
@%p7 bra $L__BB5_14;
bra.uni $L__BB5_8;
$L__BB5_14:
cvt.u16.u32 %rs5, %r11;
and.b16 %rs1, %rs5, 15;
add.s32 %r24, %r3, -2;
shr.u32 %r41, %r11, 4;
cvt.u16.u32 %rs2, %r41;
add.s32 %r25, %r3, -1;
$L__BB5_15:
setp.lt.u32 %p11, %r44, %r12;
@%p11 bra $L__BB5_19;
bra.uni $L__BB5_16;
$L__BB5_19:
add.s32 %r47, %r44, %r1;
bra.uni $L__BB5_20;
$L__BB5_16:
setp.lt.u32 %p12, %r44, %r14;
@%p12 bra $L__BB5_18;
bra.uni $L__BB5_17;
$L__BB5_18:
add.s32 %r47, %r16, %r44;
bra.uni $L__BB5_20;
$L__BB5_17:
add.s32 %r47, %r17, %r44;
$L__BB5_20:
cvt.u64.u32 %rd19, %r47;
add.s64 %rd20, %rd2, %rd19;
ld.global.u8 %rs6, [%rd20];
and.b16 %rs7, %rs6, 240;
or.b16 %rs8, %rs7, %rs1;
setp.eq.s32 %p13, %r44, %r24;
selp.b16 %rs9, %rs8, %rs6, %p13;
setp.eq.s32 %p14, %r44, %r25;
selp.b16 %rs10, %rs2, %rs9, %p14;
add.s32 %r42, %r44, %r2;
cvt.u64.u32 %rd21, %r42;
add.s64 %rd22, %rd1, %rd21;
st.global.u8 [%rd22], %rs10;
add.s32 %r44, %r44, %r15;
setp.lt.u32 %p15, %r44, %r3;
@%p15 bra $L__BB5_15;
bra.uni $L__BB5_21;
$L__BB5_8:
setp.lt.u32 %p8, %r44, %r12;
@%p8 bra $L__BB5_12;
bra.uni $L__BB5_9;
$L__BB5_12:
add.s32 %r45, %r44, %r1;
bra.uni $L__BB5_13;
$L__BB5_9:
setp.lt.u32 %p9, %r44, %r14;
@%p9 bra $L__BB5_11;
bra.uni $L__BB5_10;
$L__BB5_11:
add.s32 %r45, %r16, %r44;
bra.uni $L__BB5_13;
$L__BB5_10:
add.s32 %r45, %r17, %r44;
$L__BB5_13:
cvt.u64.u32 %rd15, %r45;
add.s64 %rd16, %rd2, %rd15;
ld.global.u8 %rs4, [%rd16];
add.s32 %r40, %r44, %r2;
cvt.u64.u32 %rd17, %r40;
add.s64 %rd18, %rd1, %rd17;
st.global.u8 [%rd18], %rs4;
add.s32 %r44, %r44, %r15;
setp.lt.u32 %p10, %r44, %r3;
@%p10 bra $L__BB5_8;
$L__BB5_21:
ret;
}
// .globl j2k_htj2k_packetize_cleanup
.visible .entry j2k_htj2k_packetize_cleanup(
.param .u64 j2k_htj2k_packetize_cleanup_param_0,
.param .u64 j2k_htj2k_packetize_cleanup_param_1,
.param .u64 j2k_htj2k_packetize_cleanup_param_2,
.param .u64 j2k_htj2k_packetize_cleanup_param_3,
.param .u64 j2k_htj2k_packetize_cleanup_param_4,
.param .u64 j2k_htj2k_packetize_cleanup_param_5,
.param .u64 j2k_htj2k_packetize_cleanup_param_6,
.param .u64 j2k_htj2k_packetize_cleanup_param_7,
.param .u64 j2k_htj2k_packetize_cleanup_param_8,
.param .u64 j2k_htj2k_packetize_cleanup_param_9,
.param .u64 j2k_htj2k_packetize_cleanup_param_10,
.param .u64 j2k_htj2k_packetize_cleanup_param_11
)
{
.local .align 16 .b8 __local_depot6[49632];
.reg .b64 %SP;
.reg .b64 %SPL;
.reg .pred %p<731>;
.reg .b16 %rs<137>;
.reg .b32 %r<3496>;
.reg .b64 %rd<394>;
// demoted variable
.shared .align 4 .u32 _ZZ32 j2k_htj2k_packetize_cleanupE11shared_code;
// demoted variable
.shared .align 4 .u32 _ZZ32 j2k_htj2k_packetize_cleanupE17shared_header_len;
// demoted variable
.shared .align 4 .u32 _ZZ32 j2k_htj2k_packetize_cleanupE15shared_body_len;
mov.u64 %SPL, __local_depot6;
ld.param.u64 %rd57, [ j2k_htj2k_packetize_cleanup_param_1];
ld.param.u64 %rd58, [ j2k_htj2k_packetize_cleanup_param_2];
ld.param.u64 %rd59, [ j2k_htj2k_packetize_cleanup_param_3];
ld.param.u64 %rd60, [ j2k_htj2k_packetize_cleanup_param_4];
ld.param.u64 %rd62, [ j2k_htj2k_packetize_cleanup_param_6];
ld.param.u64 %rd63, [ j2k_htj2k_packetize_cleanup_param_7];
ld.param.u64 %rd65, [ j2k_htj2k_packetize_cleanup_param_9];
ld.param.u64 %rd67, [ j2k_htj2k_packetize_cleanup_param_11];
add.u64 %rd1, %SPL, 64;
add.u64 %rd2, %SPL, 24844;
mov.u32 %r1511, %ctaid.x;
cvt.u64.u32 %rd3, %r1511;
setp.ge.u64 %p1, %rd3, %rd67;
@%p1 bra $L__BB6_608;
cvta.to.global.u64 %rd70, %rd65;
cvta.to.global.u64 %rd71, %rd58;
mul.lo.s64 %rd72, %rd3, 28;
add.s64 %rd73, %rd71, %rd72;
ld.global.u32 %r1, [%rd73+8];
ld.global.u32 %r2, [%rd73+12];
ld.global.u32 %r3, [%rd73+20];
ld.global.u32 %r4, [%rd73+24];
ld.global.u32 %rd74, [%rd73+16];
add.s64 %rd4, %rd70, %rd74;
mov.u32 %r1512, %tid.x;
setp.ne.s32 %p2, %r1512, 0;
@%p2 bra $L__BB6_593;
setp.eq.s32 %p3, %r2, 0;
mov.u32 %r2752, 0;
@%p3 bra $L__BB6_16;
cvta.to.global.u64 %rd5, %rd59;
cvta.to.global.u64 %rd6, %rd60;
mov.u32 %r1515, 0;
mov.u32 %r2748, %r1515;
mov.u32 %r2752, %r1515;
$L__BB6_4:
add.s32 %r1521, %r2748, %r1;
mul.wide.u32 %rd75, %r1521, 16;
add.s64 %rd7, %rd5, %rd75;
ld.global.u32 %r1522, [%rd7+8];
setp.eq.s32 %p4, %r1522, 0;
ld.global.u32 %r1523, [%rd7+12];
setp.eq.s32 %p5, %r1523, 0;
or.pred %p6, %p4, %p5;
mul.lo.s32 %r1524, %r1523, %r1522;
ld.global.u32 %r7, [%rd7+4];
setp.ne.s32 %p7, %r1524, %r7;
mov.u32 %r3482, 7;
mov.u32 %r3481, 1;
or.pred %p8, %p6, %p7;
mov.u32 %r3483, %r1515;
mov.u32 %r3484, %r1515;
mov.u32 %r3485, %r1515;
@%p8 bra $L__BB6_592;
setp.eq.s32 %p9, %r7, 0;
@%p9 bra $L__BB6_15;
ld.global.u32 %r8, [%rd7];
mov.u32 %r3483, 0;
mov.u32 %r2750, %r3483;
$L__BB6_7:
add.s32 %r1531, %r2750, %r8;
mul.wide.u32 %rd76, %r1531, 36;
add.s64 %rd8, %rd6, %rd76;
ld.global.u32 %rd9, [%rd8+4];
ld.global.u32 %rd10, [%rd8+8];
ld.global.u32 %rd11, [%rd8+12];
ld.global.u32 %r11, [%rd8+16];
setp.eq.s32 %p10, %r11, 0;
selp.b32 %r2752, %r2752, 1, %p10;
setp.gt.u32 %p11, %r11, 164;
mov.u32 %r3482, 1;
mov.u32 %r3481, 2;
mov.u32 %r3484, %r3483;
mov.u32 %r3485, %r3483;
@%p11 bra $L__BB6_592;
ld.global.u32 %r1537, [%rd8];
cvt.u64.u32 %rd77, %r1537;
cvt.u32.u64 %r1538, %rd9;
add.s32 %r1539, %r1538, %r1537;
setp.lt.u32 %p12, %r1539, %r1538;
add.s64 %rd78, %rd9, %rd77;
setp.gt.u64 %p13, %rd78, %rd57;
or.pred %p14, %p12, %p13;
mov.u32 %r3482, 2;
mov.u32 %r3481, 1;
mov.u32 %r3484, %r3483;
mov.u32 %r3485, %r3483;
@%p14 bra $L__BB6_592;
mov.u32 %r3481, 1;
setp.eq.s32 %p725, %r11, 0;
@%p725 bra $L__BB6_13;
setp.eq.s32 %p16, %r11, 1;
@%p16 bra $L__BB6_12;
bra.uni $L__BB6_11;
$L__BB6_12:
cvt.u32.u64 %r2718, %rd9;
cvt.u32.u64 %r1553, %rd10;
setp.eq.s32 %p26, %r1553, 0;
selp.b32 %r1555, %r2718, %r1553, %p26;
setp.ne.s32 %p27, %r1555, %r2718;
cvt.u32.u64 %r1556, %rd11;
setp.ne.s32 %p28, %r1556, 0;
or.pred %p29, %p28, %p27;
mov.u32 %r3482, 11;
mov.u32 %r3484, %r3483;
mov.u32 %r3485, %r3483;
@%p29 bra $L__BB6_592;
bra.uni $L__BB6_14;
$L__BB6_13:
cvt.u32.u64 %r2720, %rd9;
cvt.u32.u64 %r1563, %rd10;
or.b32 %r1564, %r1563, %r2720;
cvt.u32.u64 %r1565, %rd11;
or.b32 %r1566, %r1564, %r1565;
setp.ne.s32 %p30, %r1566, 0;
mov.u32 %r3482, 10;
mov.u32 %r3484, %r3483;
mov.u32 %r3485, %r3483;
@%p30 bra $L__BB6_592;
bra.uni $L__BB6_14;
$L__BB6_11:
cvt.u32.u64 %r1545, %rd10;
setp.eq.s32 %p17, %r1545, 0;
cvt.u32.u64 %r1546, %rd11;
setp.eq.s32 %p18, %r1546, 0;
or.pred %p19, %p17, %p18;
add.s64 %rd79, %rd11, %rd10;
setp.ne.s64 %p20, %rd79, %rd9;
or.pred %p21, %p19, %p20;
add.s32 %r1547, %r1545, -2;
setp.gt.u32 %p22, %r1547, 65532;
or.pred %p23, %p22, %p21;
setp.gt.u32 %p24, %r1546, 2046;
or.pred %p25, %p24, %p23;
mov.u32 %r3482, 12;
mov.u32 %r3484, %r3483;
mov.u32 %r3485, %r3483;
@%p25 bra $L__BB6_592;
$L__BB6_14:
add.s32 %r2750, %r2750, 1;
setp.lt.u32 %p31, %r2750, %r7;
@%p31 bra $L__BB6_7;
$L__BB6_15:
add.s32 %r2748, %r2748, 1;
setp.lt.u32 %p32, %r2748, %r2;
@%p32 bra $L__BB6_4;
$L__BB6_16:
setp.eq.s32 %p33, %r2752, 0;
@%p33 bra $L__BB6_590;
setp.eq.s32 %p726, %r2, 0;
mov.u32 %r3419, 0;
mov.u32 %r3457, 1;
mov.u32 %r3458, %r3457;
mov.u32 %r3422, %r3419;
mov.u32 %r3423, %r3419;
@%p726 bra $L__BB6_242;
ld.param.u64 %rd383, [ j2k_htj2k_packetize_cleanup_param_5];
ld.param.u64 %rd382, [ j2k_htj2k_packetize_cleanup_param_3];
mov.u32 %r2754, 0;
mov.u32 %r3458, 1;
cvta.to.global.u64 %rd80, %rd382;
add.s64 %rd13, %rd1, 24576;
add.s64 %rd14, %rd2, 24576;
cvta.to.global.u64 %rd148, %rd383;
mov.u32 %r3423, %r2754;
mov.u32 %r3422, %r2754;
mov.u32 %r3457, %r3458;
mov.u32 %r3419, %r2754;
$L__BB6_19:
add.s32 %r1578, %r2754, %r1;
mul.wide.u32 %rd81, %r1578, 16;
add.s64 %rd82, %rd80, %rd81;
ld.global.u32 %r23, [%rd82];
ld.global.u32 %r24, [%rd82+4];
ld.global.u32 %r25, [%rd82+8];
setp.eq.s32 %p35, %r25, 0;
ld.global.u32 %r26, [%rd82+12];
setp.eq.s32 %p36, %r26, 0;
or.pred %p37, %p35, %p36;
@%p37 bra $L__BB6_229;
bra.uni $L__BB6_20;
$L__BB6_229:
mov.u32 %r1975, 1;
st.local.u32 [%rd13+200], %r1975;
bra.uni $L__BB6_230;
$L__BB6_20:
mul.lo.s32 %r2766, %r25, %r26;
div.u32 %r1579, %r2766, %r25;
setp.ne.s32 %p38, %r1579, %r26;
@%p38 bra $L__BB6_81;
setp.gt.u32 %p39, %r2766, 2048;
@%p39 bra $L__BB6_228;
bra.uni $L__BB6_22;
$L__BB6_228:
mov.u32 %r1974, 1;
st.local.u32 [%rd13+200], %r1974;
bra.uni $L__BB6_230;
$L__BB6_22:
st.local.u32 [%rd13], %r25;
st.local.u32 [%rd13+64], %r26;
mov.u32 %r1581, 0;
st.local.u32 [%rd13+128], %r1581;
or.b32 %r1582, %r25, %r26;
and.b32 %r28, %r1582, -2;
setp.eq.s32 %p40, %r28, 0;
mov.u32 %r2760, 1;
mov.u32 %r2761, %r2766;
@%p40 bra $L__BB6_85;
add.s32 %r1583, %r25, 1;
shr.u32 %r29, %r1583, 1;
add.s32 %r1584, %r26, 1;
shr.u32 %r30, %r1584, 1;
mul.lo.s32 %r31, %r29, %r30;
setp.eq.s32 %p41, %r29, 0;
@%p41 bra $L__BB6_25;
div.u32 %r1585, %r31, %r29;
setp.ne.s32 %p42, %r1585, %r30;
@%p42 bra $L__BB6_81;
$L__BB6_25:
add.s32 %r2761, %r2766, %r31;
setp.lt.u32 %p43, %r2761, %r2766;
setp.gt.u32 %p44, %r2761, 2048;
or.pred %p45, %p43, %p44;
@%p45 bra $L__BB6_228;
st.local.u32 [%rd13+4], %r29;
st.local.u32 [%rd13+68], %r30;
st.local.u32 [%rd13+132], %r2766;
or.b32 %r1587, %r29, %r30;
and.b32 %r1588, %r1587, 2147483646;
setp.eq.s32 %p46, %r1588, 0;
mov.u32 %r2760, 2;
@%p46 bra $L__BB6_85;
add.s32 %r1589, %r29, 1;
shr.u32 %r33, %r1589, 1;
add.s32 %r1590, %r30, 1;
shr.u32 %r34, %r1590, 1;
mul.lo.s32 %r35, %r33, %r34;
setp.eq.s32 %p47, %r33, 0;
@%p47 bra $L__BB6_29;
div.u32 %r1591, %r35, %r33;
setp.ne.s32 %p48, %r1591, %r34;
@%p48 bra $L__BB6_81;
$L__BB6_29:
add.s32 %r36, %r2761, %r35;
setp.lt.u32 %p49, %r36, %r2761;
setp.gt.u32 %p50, %r36, 2048;
or.pred %p51, %p49, %p50;
@%p51 bra $L__BB6_228;
st.local.u32 [%rd13+8], %r33;
st.local.u32 [%rd13+72], %r34;
st.local.u32 [%rd13+136], %r2761;
or.b32 %r1593, %r33, %r34;
and.b32 %r1594, %r1593, 2147483646;
setp.eq.s32 %p52, %r1594, 0;
mov.u32 %r2760, 3;
mov.u32 %r2761, %r36;
@%p52 bra $L__BB6_85;
add.s32 %r1595, %r33, 1;
shr.u32 %r37, %r1595, 1;
add.s32 %r1596, %r34, 1;
shr.u32 %r38, %r1596, 1;
mul.lo.s32 %r39, %r37, %r38;
setp.eq.s32 %p53, %r37, 0;
@%p53 bra $L__BB6_33;
div.u32 %r1597, %r39, %r37;
setp.ne.s32 %p54, %r1597, %r38;
@%p54 bra $L__BB6_81;
$L__BB6_33:
add.s32 %r2761, %r36, %r39;
setp.lt.u32 %p55, %r2761, %r36;
setp.gt.u32 %p56, %r2761, 2048;
or.pred %p57, %p55, %p56;
@%p57 bra $L__BB6_228;
st.local.u32 [%rd13+12], %r37;
st.local.u32 [%rd13+76], %r38;
st.local.u32 [%rd13+140], %r36;
or.b32 %r1599, %r37, %r38;
and.b32 %r1600, %r1599, 2147483646;
setp.eq.s32 %p58, %r1600, 0;
mov.u32 %r2760, 4;
@%p58 bra $L__BB6_85;
add.s32 %r1601, %r37, 1;
shr.u32 %r41, %r1601, 1;
add.s32 %r1602, %r38, 1;
shr.u32 %r42, %r1602, 1;
mul.lo.s32 %r43, %r41, %r42;
setp.eq.s32 %p59, %r41, 0;
@%p59 bra $L__BB6_37;
div.u32 %r1603, %r43, %r41;
setp.ne.s32 %p60, %r1603, %r42;
@%p60 bra $L__BB6_81;
$L__BB6_37:
add.s32 %r44, %r2761, %r43;
setp.lt.u32 %p61, %r44, %r2761;
setp.gt.u32 %p62, %r44, 2048;
or.pred %p63, %p61, %p62;
@%p63 bra $L__BB6_228;
st.local.u32 [%rd13+16], %r41;
st.local.u32 [%rd13+80], %r42;
st.local.u32 [%rd13+144], %r2761;
or.b32 %r1605, %r41, %r42;
and.b32 %r1606, %r1605, 2147483646;
setp.eq.s32 %p64, %r1606, 0;
mov.u32 %r2760, 5;
mov.u32 %r2761, %r44;
@%p64 bra $L__BB6_85;
add.s32 %r1607, %r41, 1;
shr.u32 %r45, %r1607, 1;
add.s32 %r1608, %r42, 1;
shr.u32 %r46, %r1608, 1;
mul.lo.s32 %r47, %r45, %r46;
setp.eq.s32 %p65, %r45, 0;
@%p65 bra $L__BB6_41;
div.u32 %r1609, %r47, %r45;
setp.ne.s32 %p66, %r1609, %r46;
@%p66 bra $L__BB6_81;
$L__BB6_41:
add.s32 %r2761, %r44, %r47;
setp.lt.u32 %p67, %r2761, %r44;
setp.gt.u32 %p68, %r2761, 2048;
or.pred %p69, %p67, %p68;
@%p69 bra $L__BB6_228;
st.local.u32 [%rd13+20], %r45;
st.local.u32 [%rd13+84], %r46;
st.local.u32 [%rd13+148], %r44;
or.b32 %r1611, %r45, %r46;
and.b32 %r1612, %r1611, 2147483646;
setp.eq.s32 %p70, %r1612, 0;
mov.u32 %r2760, 6;
@%p70 bra $L__BB6_85;
add.s32 %r1613, %r45, 1;
shr.u32 %r49, %r1613, 1;
add.s32 %r1614, %r46, 1;
shr.u32 %r50, %r1614, 1;
mul.lo.s32 %r51, %r49, %r50;
setp.eq.s32 %p71, %r49, 0;
@%p71 bra $L__BB6_45;
div.u32 %r1615, %r51, %r49;
setp.ne.s32 %p72, %r1615, %r50;
@%p72 bra $L__BB6_81;
$L__BB6_45:
add.s32 %r52, %r2761, %r51;
setp.lt.u32 %p73, %r52, %r2761;
setp.gt.u32 %p74, %r52, 2048;
or.pred %p75, %p73, %p74;
@%p75 bra $L__BB6_228;
st.local.u32 [%rd13+24], %r49;
st.local.u32 [%rd13+88], %r50;
st.local.u32 [%rd13+152], %r2761;
or.b32 %r1617, %r49, %r50;
and.b32 %r1618, %r1617, 2147483646;
setp.eq.s32 %p76, %r1618, 0;
mov.u32 %r2760, 7;
mov.u32 %r2761, %r52;
@%p76 bra $L__BB6_85;
add.s32 %r1619, %r49, 1;
shr.u32 %r53, %r1619, 1;
add.s32 %r1620, %r50, 1;
shr.u32 %r54, %r1620, 1;
mul.lo.s32 %r55, %r53, %r54;
setp.eq.s32 %p77, %r53, 0;
@%p77 bra $L__BB6_49;
div.u32 %r1621, %r55, %r53;
setp.ne.s32 %p78, %r1621, %r54;
@%p78 bra $L__BB6_81;
$L__BB6_49:
add.s32 %r2761, %r52, %r55;
setp.lt.u32 %p79, %r2761, %r52;
setp.gt.u32 %p80, %r2761, 2048;
or.pred %p81, %p79, %p80;
@%p81 bra $L__BB6_228;
st.local.u32 [%rd13+28], %r53;
st.local.u32 [%rd13+92], %r54;
st.local.u32 [%rd13+156], %r52;
or.b32 %r1623, %r53, %r54;
and.b32 %r1624, %r1623, 2147483646;
setp.eq.s32 %p82, %r1624, 0;
mov.u32 %r2760, 8;
@%p82 bra $L__BB6_85;
add.s32 %r1625, %r53, 1;
shr.u32 %r57, %r1625, 1;
add.s32 %r1626, %r54, 1;
shr.u32 %r58, %r1626, 1;
mul.lo.s32 %r59, %r57, %r58;
setp.eq.s32 %p83, %r57, 0;
@%p83 bra $L__BB6_53;
div.u32 %r1627, %r59, %r57;
setp.ne.s32 %p84, %r1627, %r58;
@%p84 bra $L__BB6_81;
$L__BB6_53:
add.s32 %r60, %r2761, %r59;
setp.lt.u32 %p85, %r60, %r2761;
setp.gt.u32 %p86, %r60, 2048;
or.pred %p87, %p85, %p86;
@%p87 bra $L__BB6_228;
st.local.u32 [%rd13+32], %r57;
st.local.u32 [%rd13+96], %r58;
st.local.u32 [%rd13+160], %r2761;
or.b32 %r1629, %r57, %r58;
and.b32 %r1630, %r1629, 2147483646;
setp.eq.s32 %p88, %r1630, 0;
mov.u32 %r2760, 9;
mov.u32 %r2761, %r60;
@%p88 bra $L__BB6_85;
add.s32 %r1631, %r57, 1;
shr.u32 %r61, %r1631, 1;
add.s32 %r1632, %r58, 1;
shr.u32 %r62, %r1632, 1;
mul.lo.s32 %r63, %r61, %r62;
setp.eq.s32 %p89, %r61, 0;
@%p89 bra $L__BB6_57;
div.u32 %r1633, %r63, %r61;
setp.ne.s32 %p90, %r1633, %r62;
@%p90 bra $L__BB6_81;
$L__BB6_57:
add.s32 %r2761, %r60, %r63;
setp.lt.u32 %p91, %r2761, %r60;
setp.gt.u32 %p92, %r2761, 2048;
or.pred %p93, %p91, %p92;
@%p93 bra $L__BB6_228;
st.local.u32 [%rd13+36], %r61;
st.local.u32 [%rd13+100], %r62;
st.local.u32 [%rd13+164], %r60;
or.b32 %r1635, %r61, %r62;
and.b32 %r1636, %r1635, 2147483646;
setp.eq.s32 %p94, %r1636, 0;
mov.u32 %r2760, 10;
@%p94 bra $L__BB6_85;
add.s32 %r1637, %r61, 1;
shr.u32 %r65, %r1637, 1;
add.s32 %r1638, %r62, 1;
shr.u32 %r66, %r1638, 1;
mul.lo.s32 %r67, %r65, %r66;
setp.eq.s32 %p95, %r65, 0;
@%p95 bra $L__BB6_61;
div.u32 %r1639, %r67, %r65;
setp.ne.s32 %p96, %r1639, %r66;
@%p96 bra $L__BB6_81;
$L__BB6_61:
add.s32 %r68, %r2761, %r67;
setp.lt.u32 %p97, %r68, %r2761;
setp.gt.u32 %p98, %r68, 2048;
or.pred %p99, %p97, %p98;
@%p99 bra $L__BB6_228;
st.local.u32 [%rd13+40], %r65;
st.local.u32 [%rd13+104], %r66;
st.local.u32 [%rd13+168], %r2761;
or.b32 %r1641, %r65, %r66;
and.b32 %r1642, %r1641, 2147483646;
setp.eq.s32 %p100, %r1642, 0;
mov.u32 %r2760, 11;
mov.u32 %r2761, %r68;
@%p100 bra $L__BB6_85;
add.s32 %r1643, %r65, 1;
shr.u32 %r69, %r1643, 1;
add.s32 %r1644, %r66, 1;
shr.u32 %r70, %r1644, 1;
mul.lo.s32 %r71, %r69, %r70;
setp.eq.s32 %p101, %r69, 0;
@%p101 bra $L__BB6_65;
div.u32 %r1645, %r71, %r69;
setp.ne.s32 %p102, %r1645, %r70;
@%p102 bra $L__BB6_81;
$L__BB6_65:
add.s32 %r2761, %r68, %r71;
setp.lt.u32 %p103, %r2761, %r68;
setp.gt.u32 %p104, %r2761, 2048;
or.pred %p105, %p103, %p104;
@%p105 bra $L__BB6_228;
st.local.u32 [%rd13+44], %r69;
st.local.u32 [%rd13+108], %r70;
st.local.u32 [%rd13+172], %r68;
or.b32 %r1647, %r69, %r70;
and.b32 %r1648, %r1647, 2147483646;
setp.eq.s32 %p106, %r1648, 0;
mov.u32 %r2760, 12;
@%p106 bra $L__BB6_85;
add.s32 %r1649, %r69, 1;
shr.u32 %r73, %r1649, 1;
add.s32 %r1650, %r70, 1;
shr.u32 %r74, %r1650, 1;
mul.lo.s32 %r75, %r73, %r74;
setp.eq.s32 %p107, %r73, 0;
@%p107 bra $L__BB6_69;
div.u32 %r1651, %r75, %r73;
setp.ne.s32 %p108, %r1651, %r74;
@%p108 bra $L__BB6_81;
$L__BB6_69:
add.s32 %r76, %r2761, %r75;
setp.lt.u32 %p109, %r76, %r2761;
setp.gt.u32 %p110, %r76, 2048;
or.pred %p111, %p109, %p110;
@%p111 bra $L__BB6_228;
st.local.u32 [%rd13+48], %r73;
st.local.u32 [%rd13+112], %r74;
st.local.u32 [%rd13+176], %r2761;
or.b32 %r1653, %r73, %r74;
and.b32 %r1654, %r1653, 2147483646;
setp.eq.s32 %p112, %r1654, 0;
mov.u32 %r2760, 13;
mov.u32 %r2761, %r76;
@%p112 bra $L__BB6_85;
add.s32 %r1655, %r73, 1;
shr.u32 %r77, %r1655, 1;
add.s32 %r1656, %r74, 1;
shr.u32 %r78, %r1656, 1;
mul.lo.s32 %r79, %r77, %r78;
setp.eq.s32 %p113, %r77, 0;
@%p113 bra $L__BB6_73;
div.u32 %r1657, %r79, %r77;
setp.ne.s32 %p114, %r1657, %r78;
@%p114 bra $L__BB6_81;
$L__BB6_73:
add.s32 %r2761, %r76, %r79;
setp.lt.u32 %p115, %r2761, %r76;
setp.gt.u32 %p116, %r2761, 2048;
or.pred %p117, %p115, %p116;
@%p117 bra $L__BB6_228;
st.local.u32 [%rd13+52], %r77;
st.local.u32 [%rd13+116], %r78;
st.local.u32 [%rd13+180], %r76;
or.b32 %r1659, %r77, %r78;
and.b32 %r1660, %r1659, 2147483646;
setp.eq.s32 %p118, %r1660, 0;
mov.u32 %r2760, 14;
@%p118 bra $L__BB6_85;
add.s32 %r1661, %r77, 1;
shr.u32 %r81, %r1661, 1;
add.s32 %r1662, %r78, 1;
shr.u32 %r82, %r1662, 1;
mul.lo.s32 %r83, %r81, %r82;
setp.eq.s32 %p119, %r81, 0;
@%p119 bra $L__BB6_77;
div.u32 %r1663, %r83, %r81;
setp.ne.s32 %p120, %r1663, %r82;
@%p120 bra $L__BB6_81;
$L__BB6_77:
add.s32 %r84, %r2761, %r83;
setp.lt.u32 %p121, %r84, %r2761;
setp.gt.u32 %p122, %r84, 2048;
or.pred %p123, %p121, %p122;
@%p123 bra $L__BB6_228;
st.local.u32 [%rd13+56], %r81;
st.local.u32 [%rd13+120], %r82;
st.local.u32 [%rd13+184], %r2761;
or.b32 %r1665, %r81, %r82;
and.b32 %r1666, %r1665, 2147483646;
setp.eq.s32 %p124, %r1666, 0;
mov.u32 %r2760, 15;
mov.u32 %r2761, %r84;
@%p124 bra $L__BB6_85;
add.s32 %r1667, %r81, 1;
shr.u32 %r85, %r1667, 1;
add.s32 %r1668, %r82, 1;
shr.u32 %r86, %r1668, 1;
mul.lo.s32 %r87, %r85, %r86;
setp.eq.s32 %p125, %r85, 0;
@%p125 bra $L__BB6_82;
div.u32 %r1669, %r87, %r85;
setp.eq.s32 %p126, %r1669, %r86;
@%p126 bra $L__BB6_82;
bra.uni $L__BB6_81;
$L__BB6_82:
add.s32 %r2761, %r84, %r87;
setp.lt.u32 %p127, %r2761, %r84;
setp.gt.u32 %p128, %r2761, 2048;
or.pred %p129, %p127, %p128;
@%p129 bra $L__BB6_228;
st.local.u32 [%rd13+60], %r85;
st.local.u32 [%rd13+124], %r86;
st.local.u32 [%rd13+188], %r84;
or.b32 %r1672, %r85, %r86;
and.b32 %r1673, %r1672, 2147483646;
setp.eq.s32 %p130, %r1673, 0;
mov.u32 %r2760, 16;
@%p130 bra $L__BB6_85;
mov.u32 %r1674, 1;
st.local.u32 [%rd13+200], %r1674;
bra.uni $L__BB6_230;
$L__BB6_81:
mov.u32 %r1670, 1;
st.local.u32 [%rd13+200], %r1670;
$L__BB6_230:
mov.u32 %r1976, 1;
st.local.u32 [%rd13+200], %r1976;
st.local.u32 [%rd14+200], %r1976;
$L__BB6_231:
ld.local.u32 %r1982, [%rd14+200];
ld.local.u32 %r1983, [%rd13+200];
or.b32 %r1984, %r1982, %r1983;
setp.ne.s32 %p287, %r1984, 0;
mov.u32 %r3483, 0;
mov.u32 %r3482, 8;
mov.u32 %r3481, 2;
mov.u32 %r3484, %r3483;
mov.u32 %r3485, %r3483;
@%p287 bra $L__BB6_592;
setp.eq.s32 %p288, %r24, 0;
@%p288 bra $L__BB6_241;
mov.u32 %r2804, 0;
$L__BB6_234:
cvta.to.global.u64 %rd377, %rd60;
add.s32 %r1986, %r2804, %r23;
mul.wide.u32 %rd190, %r1986, 36;
add.s64 %rd191, %rd377, %rd190;
add.s64 %rd33, %rd191, 4;
ld.global.u32 %r271, [%rd191+4];
ld.global.u32 %r272, [%rd191+8];
ld.global.u32 %r273, [%rd191+12];
ld.global.u32 %r274, [%rd191+16];
ld.global.u32 %r3297, [%rd191+24];
div.u32 %r2937, %r2804, %r25;
mul.lo.s32 %r1987, %r2937, %r25;
sub.s32 %r2938, %r2804, %r1987;
ld.global.u32 %r1988, [%rd191+28];
setp.eq.s32 %p289, %r1988, 0;
@%p289 bra $L__BB6_263;
setp.ne.s32 %p290, %r274, 0;
shl.b32 %r3457, %r3457, 1;
cvt.u64.u32 %rd192, %r3419;
add.s64 %rd34, %rd4, %rd192;
@%p290 bra $L__BB6_259;
bra.uni $L__BB6_236;
$L__BB6_259:
or.b32 %r3457, %r3457, 1;
setp.eq.s32 %p296, %r3422, 0;
selp.b32 %r289, 8, 7, %p296;
add.s32 %r3458, %r3458, 1;
setp.lt.u32 %p297, %r3458, %r289;
@%p297 bra $L__BB6_373;
sub.s32 %r3458, %r3458, %r289;
setp.ge.u32 %p298, %r3419, %r3;
mov.u32 %r2815, 1;
@%p298 bra $L__BB6_262;
shr.u32 %r1996, %r3457, %r3458;
cvt.u16.u32 %rs3, %r1996;
and.b16 %rs4, %rs3, 255;
st.global.u8 [%rd34], %rs3;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p299, %rs4, 255;
selp.u32 %r3422, 1, 0, %p299;
mov.u32 %r2815, %r3423;
$L__BB6_262:
mov.u32 %r1997, -1;
shl.b32 %r1998, %r1997, %r3458;
not.b32 %r1999, %r1998;
setp.eq.s32 %p300, %r3458, 0;
selp.b32 %r2000, 0, %r1999, %p300;
and.b32 %r3457, %r2000, %r3457;
mov.u32 %r3423, %r2815;
bra.uni $L__BB6_373;
$L__BB6_263:
setp.ne.s32 %p301, %r274, 0;
ld.global.u32 %r2006, [%rd33+28];
setp.ne.s32 %p302, %r2006, %r4;
and.pred %p303, %p301, %p302;
mov.u32 %r3483, 0;
mov.u32 %r3482, 9;
mov.u32 %r3481, 1;
mov.u32 %r3484, %r3483;
mov.u32 %r3485, %r3483;
@%p303 bra $L__BB6_592;
ld.global.u32 %r298, [%rd33+16];
ld.local.u32 %r2823, [%rd13+192];
setp.eq.s32 %p304, %r2823, 0;
@%p304 bra $L__BB6_272;
add.s32 %r2008, %r2823, -1;
setp.lt.u32 %p305, %r2008, 3;
mov.u32 %r2820, 0;
mov.u32 %r2821, %r2937;
mov.u32 %r2822, %r2938;
@%p305 bra $L__BB6_268;
and.b32 %r2691, %r2823, 3;
sub.s32 %r2819, %r2823, %r2691;
mov.u32 %r2820, 0;
mov.u32 %r2821, %r2937;
mov.u32 %r2822, %r2938;
$L__BB6_267:
add.u64 %rd375, %SPL, 0;
mul.wide.u32 %rd194, %r2820, 4;
add.s64 %rd195, %rd1, %rd194;
ld.local.u32 %r2010, [%rd195+24576];
ld.local.u32 %r2011, [%rd195+24704];
add.s32 %r2012, %r2011, %r2822;
mad.lo.s32 %r2013, %r2010, %r2821, %r2012;
add.s64 %rd196, %rd375, %rd194;
st.local.u32 [%rd196], %r2013;
ld.local.u32 %r2014, [%rd195+24580];
shr.u32 %r2015, %r2821, 1;
ld.local.u32 %r2016, [%rd195+24708];
shr.u32 %r2017, %r2822, 1;
add.s32 %r2018, %r2016, %r2017;
mad.lo.s32 %r2019, %r2014, %r2015, %r2018;
st.local.u32 [%rd196+4], %r2019;
ld.local.u32 %r2020, [%rd195+24584];
shr.u32 %r2021, %r2821, 2;
ld.local.u32 %r2022, [%rd195+24712];
shr.u32 %r2023, %r2822, 2;
add.s32 %r2024, %r2022, %r2023;
mad.lo.s32 %r2025, %r2020, %r2021, %r2024;
st.local.u32 [%rd196+8], %r2025;
ld.local.u32 %r2026, [%rd195+24588];
shr.u32 %r2027, %r2821, 3;
ld.local.u32 %r2028, [%rd195+24716];
shr.u32 %r2029, %r2822, 3;
add.s32 %r2030, %r2028, %r2029;
mad.lo.s32 %r2031, %r2026, %r2027, %r2030;
st.local.u32 [%rd196+12], %r2031;
shr.u32 %r2822, %r2822, 4;
shr.u32 %r2821, %r2821, 4;
add.s32 %r2820, %r2820, 4;
add.s32 %r2819, %r2819, -4;
setp.ne.s32 %p306, %r2819, 0;
@%p306 bra $L__BB6_267;
$L__BB6_268:
and.b32 %r2688, %r2823, 3;
setp.eq.s32 %p307, %r2688, 0;
@%p307 bra $L__BB6_272;
and.b32 %r2689, %r2823, 3;
mul.wide.u32 %rd197, %r2820, 4;
add.s64 %rd198, %rd1, %rd197;
add.s64 %rd36, %rd198, 24704;
ld.local.u32 %r2032, [%rd198+24576];
ld.local.u32 %r2033, [%rd198+24704];
add.s32 %r2034, %r2033, %r2822;
mad.lo.s32 %r2035, %r2032, %r2821, %r2034;
add.u64 %rd200, %SPL, 0;
add.s64 %rd37, %rd200, %rd197;
st.local.u32 [%rd37], %r2035;
setp.eq.s32 %p308, %r2689, 1;
@%p308 bra $L__BB6_272;
and.b32 %r2690, %r2823, 3;
shr.u32 %r2036, %r2821, 1;
ld.local.u32 %r2037, [%rd36+-124];
ld.local.u32 %r2038, [%rd36+4];
shr.u32 %r2039, %r2822, 1;
add.s32 %r2040, %r2038, %r2039;
mad.lo.s32 %r2041, %r2037, %r2036, %r2040;
st.local.u32 [%rd37+4], %r2041;
setp.eq.s32 %p309, %r2690, 2;
@%p309 bra $L__BB6_272;
shr.u32 %r2042, %r2821, 2;
ld.local.u32 %r2043, [%rd36+-120];
ld.local.u32 %r2044, [%rd36+8];
shr.u32 %r2045, %r2822, 2;
add.s32 %r2046, %r2044, %r2045;
mad.lo.s32 %r2047, %r2043, %r2042, %r2046;
st.local.u32 [%rd37+8], %r2047;
$L__BB6_272:
@%p304 bra $L__BB6_318;
mov.u32 %r2824, 0;
$L__BB6_274:
add.u64 %rd378, %SPL, 0;
add.s32 %r2823, %r2823, -1;
mul.wide.u32 %rd202, %r2823, 4;
add.s64 %rd203, %rd378, %rd202;
ld.local.u32 %r2049, [%rd203];
mul.wide.u32 %rd204, %r2049, 4;
add.s64 %rd205, %rd1, %rd204;
add.s64 %rd39, %rd205, 8192;
ld.local.u32 %r2921, [%rd205+8192];
max.u32 %r322, %r2921, %r2824;
ld.local.u32 %r2050, [%rd205+16384];
setp.ne.s32 %p311, %r2050, 0;
@%p311 bra $L__BB6_317;
add.s32 %r2051, %r4, 1;
ld.local.u32 %r323, [%rd39+-8192];
min.u32 %r324, %r323, %r2051;
setp.le.u32 %p312, %r324, %r322;
mov.u32 %r2869, %r3457;
@%p312 bra $L__BB6_310;
add.s32 %r2695, %r4, 1;
min.u32 %r2694, %r323, %r2695;
sub.s32 %r2053, %r2694, %r322;
and.b32 %r325, %r2053, 3;
setp.eq.s32 %p313, %r325, 0;
mov.u32 %r2854, %r322;
mov.u32 %r2869, %r3457;
@%p313 bra $L__BB6_292;
shl.b32 %r2869, %r3457, 1;
setp.eq.s32 %p314, %r3422, 0;
selp.b32 %r327, 8, 7, %p314;
add.s32 %r3458, %r3458, 1;
setp.lt.u32 %p315, %r3458, %r327;
@%p315 bra $L__BB6_281;
sub.s32 %r3458, %r3458, %r327;
setp.ge.u32 %p316, %r3419, %r3;
mov.u32 %r2832, 1;
@%p316 bra $L__BB6_280;
shl.b32 %r2701, %r3457, 1;
shr.u32 %r2055, %r2701, %r3458;
cvt.u16.u32 %rs5, %r2055;
and.b16 %rs6, %rs5, 255;
cvt.u64.u32 %rd206, %r3419;
add.s64 %rd207, %rd4, %rd206;
st.global.u8 [%rd207], %rs5;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p317, %rs6, 255;
selp.u32 %r3422, 1, 0, %p317;
mov.u32 %r2832, %r3423;
$L__BB6_280:
shl.b32 %r2700, %r3457, 1;
mov.u32 %r2056, -1;
shl.b32 %r2057, %r2056, %r3458;
xor.b32 %r2058, %r2057, -2;
setp.eq.s32 %p318, %r3458, 0;
selp.b32 %r2059, 0, %r2058, %p318;
and.b32 %r2869, %r2059, %r2700;
mov.u32 %r3423, %r2832;
$L__BB6_281:
add.s32 %r2705, %r4, 1;
min.u32 %r2704, %r323, %r2705;
sub.s32 %r2703, %r2704, %r322;
and.b32 %r2702, %r2703, 3;
add.s32 %r2854, %r322, 1;
setp.eq.s32 %p319, %r2702, 1;
@%p319 bra $L__BB6_292;
shl.b32 %r2842, %r2869, 1;
setp.eq.s32 %p320, %r3422, 0;
selp.b32 %r343, 8, 7, %p320;
add.s32 %r3458, %r3458, 1;
setp.lt.u32 %p321, %r3458, %r343;
@%p321 bra $L__BB6_286;
sub.s32 %r3458, %r3458, %r343;
setp.ge.u32 %p322, %r3419, %r3;
mov.u32 %r2840, 1;
@%p322 bra $L__BB6_285;
shl.b32 %r2711, %r2869, 1;
shr.u32 %r2061, %r2711, %r3458;
cvt.u16.u32 %rs7, %r2061;
and.b16 %rs8, %rs7, 255;
cvt.u64.u32 %rd208, %r3419;
add.s64 %rd209, %rd4, %rd208;
st.global.u8 [%rd209], %rs7;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p323, %rs8, 255;
selp.u32 %r3422, 1, 0, %p323;
mov.u32 %r2840, %r3423;
$L__BB6_285:
shl.b32 %r2710, %r2869, 1;
mov.u32 %r2062, -1;
shl.b32 %r2063, %r2062, %r3458;
xor.b32 %r2064, %r2063, -2;
setp.eq.s32 %p324, %r3458, 0;
selp.b32 %r2065, 0, %r2064, %p324;
and.b32 %r2842, %r2065, %r2710;
mov.u32 %r3423, %r2840;
$L__BB6_286:
add.s32 %r2709, %r4, 1;
min.u32 %r2708, %r323, %r2709;
sub.s32 %r2707, %r2708, %r322;
and.b32 %r2706, %r2707, 3;
add.s32 %r2854, %r322, 2;
setp.eq.s32 %p325, %r2706, 2;
mov.u32 %r2869, %r2842;
@%p325 bra $L__BB6_292;
shl.b32 %r2869, %r2842, 1;
setp.eq.s32 %p326, %r3422, 0;
selp.b32 %r359, 8, 7, %p326;
add.s32 %r3458, %r3458, 1;
setp.lt.u32 %p327, %r3458, %r359;
@%p327 bra $L__BB6_291;
sub.s32 %r3458, %r3458, %r359;
setp.ge.u32 %p328, %r3419, %r3;
mov.u32 %r2848, 1;
@%p328 bra $L__BB6_290;
shl.b32 %r2713, %r2842, 1;
shr.u32 %r2067, %r2713, %r3458;
cvt.u16.u32 %rs9, %r2067;
and.b16 %rs10, %rs9, 255;
cvt.u64.u32 %rd210, %r3419;
add.s64 %rd211, %rd4, %rd210;
st.global.u8 [%rd211], %rs9;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p329, %rs10, 255;
selp.u32 %r3422, 1, 0, %p329;
mov.u32 %r2848, %r3423;
$L__BB6_290:
shl.b32 %r2712, %r2842, 1;
mov.u32 %r2068, -1;
shl.b32 %r2069, %r2068, %r3458;
xor.b32 %r2070, %r2069, -2;
setp.eq.s32 %p330, %r3458, 0;
selp.b32 %r2071, 0, %r2070, %p330;
and.b32 %r2869, %r2071, %r2712;
mov.u32 %r3423, %r2848;
$L__BB6_291:
add.s32 %r2854, %r322, 3;
$L__BB6_292:
add.s32 %r2697, %r4, 1;
min.u32 %r2696, %r323, %r2697;
not.b32 %r2072, %r322;
add.s32 %r2073, %r2696, %r2072;
setp.lt.u32 %p331, %r2073, 3;
@%p331 bra $L__BB6_310;
$L__BB6_293:
shl.b32 %r2875, %r2869, 1;
setp.eq.s32 %p332, %r3422, 0;
selp.b32 %r392, 8, 7, %p332;
add.s32 %r2876, %r3458, 1;
setp.lt.u32 %p333, %r2876, %r392;
@%p333 bra $L__BB6_297;
sub.s32 %r2876, %r2876, %r392;
setp.ge.u32 %p334, %r3419, %r3;
mov.u32 %r2873, 1;
@%p334 bra $L__BB6_296;
shr.u32 %r2075, %r2875, %r2876;
cvt.u16.u32 %rs11, %r2075;
and.b16 %rs12, %rs11, 255;
cvt.u64.u32 %rd212, %r3419;
add.s64 %rd213, %rd4, %rd212;
st.global.u8 [%rd213], %rs11;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p335, %rs12, 255;
selp.u32 %r3422, 1, 0, %p335;
mov.u32 %r2873, %r3423;
$L__BB6_296:
mov.u32 %r2076, -1;
shl.b32 %r2077, %r2076, %r2876;
xor.b32 %r2078, %r2077, -2;
setp.eq.s32 %p336, %r2876, 0;
selp.b32 %r2079, 0, %r2078, %p336;
and.b32 %r2875, %r2079, %r2875;
mov.u32 %r3423, %r2873;
$L__BB6_297:
shl.b32 %r2883, %r2875, 1;
setp.eq.s32 %p337, %r3422, 0;
selp.b32 %r407, 8, 7, %p337;
add.s32 %r2884, %r2876, 1;
setp.lt.u32 %p338, %r2884, %r407;
@%p338 bra $L__BB6_301;
sub.s32 %r2884, %r2884, %r407;
setp.ge.u32 %p339, %r3419, %r3;
mov.u32 %r2881, 1;
@%p339 bra $L__BB6_300;
shr.u32 %r2081, %r2883, %r2884;
cvt.u16.u32 %rs13, %r2081;
and.b16 %rs14, %rs13, 255;
cvt.u64.u32 %rd214, %r3419;
add.s64 %rd215, %rd4, %rd214;
st.global.u8 [%rd215], %rs13;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p340, %rs14, 255;
selp.u32 %r3422, 1, 0, %p340;
mov.u32 %r2881, %r3423;
$L__BB6_300:
mov.u32 %r2082, -1;
shl.b32 %r2083, %r2082, %r2884;
xor.b32 %r2084, %r2083, -2;
setp.eq.s32 %p341, %r2884, 0;
selp.b32 %r2085, 0, %r2084, %p341;
and.b32 %r2883, %r2085, %r2883;
mov.u32 %r3423, %r2881;
$L__BB6_301:
shl.b32 %r2891, %r2883, 1;
setp.eq.s32 %p342, %r3422, 0;
selp.b32 %r422, 8, 7, %p342;
add.s32 %r2892, %r2884, 1;
setp.lt.u32 %p343, %r2892, %r422;
@%p343 bra $L__BB6_305;
sub.s32 %r2892, %r2892, %r422;
setp.ge.u32 %p344, %r3419, %r3;
mov.u32 %r2889, 1;
@%p344 bra $L__BB6_304;
shr.u32 %r2087, %r2891, %r2892;
cvt.u16.u32 %rs15, %r2087;
and.b16 %rs16, %rs15, 255;
cvt.u64.u32 %rd216, %r3419;
add.s64 %rd217, %rd4, %rd216;
st.global.u8 [%rd217], %rs15;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p345, %rs16, 255;
selp.u32 %r3422, 1, 0, %p345;
mov.u32 %r2889, %r3423;
$L__BB6_304:
mov.u32 %r2088, -1;
shl.b32 %r2089, %r2088, %r2892;
xor.b32 %r2090, %r2089, -2;
setp.eq.s32 %p346, %r2892, 0;
selp.b32 %r2091, 0, %r2090, %p346;
and.b32 %r2891, %r2091, %r2891;
mov.u32 %r3423, %r2889;
$L__BB6_305:
shl.b32 %r2869, %r2891, 1;
setp.eq.s32 %p347, %r3422, 0;
selp.b32 %r437, 8, 7, %p347;
add.s32 %r3458, %r2892, 1;
setp.lt.u32 %p348, %r3458, %r437;
@%p348 bra $L__BB6_309;
sub.s32 %r3458, %r3458, %r437;
setp.ge.u32 %p349, %r3419, %r3;
mov.u32 %r2897, 1;
@%p349 bra $L__BB6_308;
shr.u32 %r2093, %r2869, %r3458;
cvt.u16.u32 %rs17, %r2093;
and.b16 %rs18, %rs17, 255;
cvt.u64.u32 %rd218, %r3419;
add.s64 %rd219, %rd4, %rd218;
st.global.u8 [%rd219], %rs17;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p350, %rs18, 255;
selp.u32 %r3422, 1, 0, %p350;
mov.u32 %r2897, %r3423;
$L__BB6_308:
mov.u32 %r2094, -1;
shl.b32 %r2095, %r2094, %r3458;
xor.b32 %r2096, %r2095, -2;
setp.eq.s32 %p351, %r3458, 0;
selp.b32 %r2097, 0, %r2096, %p351;
and.b32 %r2869, %r2097, %r2869;
mov.u32 %r3423, %r2897;
$L__BB6_309:
add.s32 %r2699, %r4, 1;
min.u32 %r2698, %r323, %r2699;
add.s32 %r2854, %r2854, 4;
setp.lt.u32 %p352, %r2854, %r2698;
@%p352 bra $L__BB6_293;
$L__BB6_310:
setp.ge.u32 %p353, %r323, %r2051;
@%p353 bra $L__BB6_316;
shl.b32 %r2099, %r2869, 1;
or.b32 %r2912, %r2099, 1;
setp.eq.s32 %p354, %r3422, 0;
selp.b32 %r458, 8, 7, %p354;
add.s32 %r3458, %r3458, 1;
setp.lt.u32 %p355, %r3458, %r458;
@%p355 bra $L__BB6_315;
sub.s32 %r3458, %r3458, %r458;
setp.ge.u32 %p356, %r3419, %r3;
mov.u32 %r2910, 1;
@%p356 bra $L__BB6_314;
shr.u32 %r2101, %r2912, %r3458;
cvt.u16.u32 %rs19, %r2101;
and.b16 %rs20, %rs19, 255;
cvt.u64.u32 %rd220, %r3419;
add.s64 %rd221, %rd4, %rd220;
st.global.u8 [%rd221], %rs19;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p357, %rs20, 255;
selp.u32 %r3422, 1, 0, %p357;
mov.u32 %r2910, %r3423;
$L__BB6_314:
shl.b32 %r2728, %r2869, 1;
or.b32 %r2727, %r2728, 1;
mov.u32 %r2102, -1;
shl.b32 %r2103, %r2102, %r3458;
not.b32 %r2104, %r2103;
setp.eq.s32 %p358, %r3458, 0;
selp.b32 %r2105, 0, %r2104, %p358;
and.b32 %r2912, %r2105, %r2727;
mov.u32 %r3423, %r2910;
$L__BB6_315:
mov.u32 %r2106, 1;
st.local.u32 [%rd39+8192], %r2106;
mov.u32 %r2869, %r2912;
$L__BB6_316:
add.s32 %r2693, %r4, 1;
min.u32 %r2921, %r323, %r2693;
st.local.u32 [%rd39], %r2921;
mov.u32 %r3457, %r2869;
$L__BB6_317:
setp.ne.s32 %p359, %r2823, 0;
mov.u32 %r2824, %r2921;
@%p359 bra $L__BB6_274;
$L__BB6_318:
setp.eq.s32 %p360, %r274, 0;
@%p360 bra $L__BB6_240;
ld.local.u32 %r2939, [%rd14+192];
setp.eq.s32 %p361, %r2939, 0;
@%p361 bra $L__BB6_327;
add.s32 %r2108, %r2939, -1;
and.b32 %r490, %r2939, 3;
setp.lt.u32 %p362, %r2108, 3;
mov.u32 %r2936, 0;
@%p362 bra $L__BB6_323;
sub.s32 %r2935, %r2939, %r490;
mov.u32 %r2936, 0;
$L__BB6_322:
add.u64 %rd380, %SPL, 0;
mul.wide.u32 %rd223, %r2936, 4;
add.s64 %rd224, %rd2, %rd223;
ld.local.u32 %r2110, [%rd224+24576];
ld.local.u32 %r2111, [%rd224+24704];
add.s32 %r2112, %r2111, %r2938;
mad.lo.s32 %r2113, %r2110, %r2937, %r2112;
add.s64 %rd225, %rd380, %rd223;
st.local.u32 [%rd225], %r2113;
ld.local.u32 %r2114, [%rd224+24580];
shr.u32 %r2115, %r2937, 1;
ld.local.u32 %r2116, [%rd224+24708];
shr.u32 %r2117, %r2938, 1;
add.s32 %r2118, %r2116, %r2117;
mad.lo.s32 %r2119, %r2114, %r2115, %r2118;
st.local.u32 [%rd225+4], %r2119;
ld.local.u32 %r2120, [%rd224+24584];
shr.u32 %r2121, %r2937, 2;
ld.local.u32 %r2122, [%rd224+24712];
shr.u32 %r2123, %r2938, 2;
add.s32 %r2124, %r2122, %r2123;
mad.lo.s32 %r2125, %r2120, %r2121, %r2124;
st.local.u32 [%rd225+8], %r2125;
ld.local.u32 %r2126, [%rd224+24588];
shr.u32 %r2127, %r2937, 3;
ld.local.u32 %r2128, [%rd224+24716];
shr.u32 %r2129, %r2938, 3;
add.s32 %r2130, %r2128, %r2129;
mad.lo.s32 %r2131, %r2126, %r2127, %r2130;
st.local.u32 [%rd225+12], %r2131;
shr.u32 %r2938, %r2938, 4;
shr.u32 %r2937, %r2937, 4;
add.s32 %r2936, %r2936, 4;
add.s32 %r2935, %r2935, -4;
setp.ne.s32 %p363, %r2935, 0;
@%p363 bra $L__BB6_322;
$L__BB6_323:
and.b32 %r2714, %r2939, 3;
setp.eq.s32 %p364, %r2714, 0;
@%p364 bra $L__BB6_327;
and.b32 %r2715, %r2939, 3;
mul.wide.u32 %rd226, %r2936, 4;
add.s64 %rd227, %rd2, %rd226;
add.s64 %rd41, %rd227, 24704;
ld.local.u32 %r2132, [%rd227+24576];
ld.local.u32 %r2133, [%rd227+24704];
add.s32 %r2134, %r2133, %r2938;
mad.lo.s32 %r2135, %r2132, %r2937, %r2134;
add.u64 %rd229, %SPL, 0;
add.s64 %rd42, %rd229, %rd226;
st.local.u32 [%rd42], %r2135;
setp.eq.s32 %p365, %r2715, 1;
@%p365 bra $L__BB6_327;
and.b32 %r2716, %r2939, 3;
shr.u32 %r2136, %r2937, 1;
ld.local.u32 %r2137, [%rd41+-124];
ld.local.u32 %r2138, [%rd41+4];
shr.u32 %r2139, %r2938, 1;
add.s32 %r2140, %r2138, %r2139;
mad.lo.s32 %r2141, %r2137, %r2136, %r2140;
st.local.u32 [%rd42+4], %r2141;
setp.eq.s32 %p366, %r2716, 2;
@%p366 bra $L__BB6_327;
shr.u32 %r2142, %r2937, 2;
ld.local.u32 %r2143, [%rd41+-120];
ld.local.u32 %r2144, [%rd41+8];
shr.u32 %r2145, %r2938, 2;
add.s32 %r2146, %r2144, %r2145;
mad.lo.s32 %r2147, %r2143, %r2142, %r2146;
st.local.u32 [%rd42+8], %r2147;
$L__BB6_327:
@%p361 bra $L__BB6_373;
mov.u32 %r2940, 0;
$L__BB6_329:
add.u64 %rd388, %SPL, 0;
add.s32 %r2939, %r2939, -1;
mul.wide.u32 %rd231, %r2939, 4;
add.s64 %rd232, %rd388, %rd231;
ld.local.u32 %r2149, [%rd232];
mul.wide.u32 %rd233, %r2149, 4;
add.s64 %rd234, %rd2, %rd233;
add.s64 %rd44, %rd234, 8192;
ld.local.u32 %r3037, [%rd234+8192];
max.u32 %r512, %r3037, %r2940;
ld.local.u32 %r2150, [%rd234+16384];
setp.ne.s32 %p368, %r2150, 0;
@%p368 bra $L__BB6_372;
add.s32 %r2732, %r298, 1;
ld.local.u32 %r513, [%rd44+-8192];
min.u32 %r514, %r513, %r2732;
setp.le.u32 %p369, %r514, %r512;
mov.u32 %r2985, %r3457;
@%p369 bra $L__BB6_365;
min.u32 %r2733, %r513, %r2732;
sub.s32 %r2152, %r2733, %r512;
and.b32 %r515, %r2152, 3;
setp.eq.s32 %p370, %r515, 0;
mov.u32 %r2970, %r512;
mov.u32 %r2985, %r3457;
@%p370 bra $L__BB6_347;
shl.b32 %r2985, %r3457, 1;
setp.eq.s32 %p371, %r3422, 0;
selp.b32 %r517, 8, 7, %p371;
add.s32 %r3458, %r3458, 1;
setp.lt.u32 %p372, %r3458, %r517;
@%p372 bra $L__BB6_336;
sub.s32 %r3458, %r3458, %r517;
setp.ge.u32 %p373, %r3419, %r3;
mov.u32 %r2948, 1;
@%p373 bra $L__BB6_335;
shl.b32 %r2737, %r3457, 1;
shr.u32 %r2154, %r2737, %r3458;
cvt.u16.u32 %rs21, %r2154;
and.b16 %rs22, %rs21, 255;
cvt.u64.u32 %rd235, %r3419;
add.s64 %rd236, %rd4, %rd235;
st.global.u8 [%rd236], %rs21;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p374, %rs22, 255;
selp.u32 %r3422, 1, 0, %p374;
mov.u32 %r2948, %r3423;
$L__BB6_335:
shl.b32 %r2736, %r3457, 1;
mov.u32 %r2155, -1;
shl.b32 %r2156, %r2155, %r3458;
xor.b32 %r2157, %r2156, -2;
setp.eq.s32 %p375, %r3458, 0;
selp.b32 %r2158, 0, %r2157, %p375;
and.b32 %r2985, %r2158, %r2736;
mov.u32 %r3423, %r2948;
$L__BB6_336:
add.s32 %r2970, %r512, 1;
setp.eq.s32 %p376, %r515, 1;
@%p376 bra $L__BB6_347;
shl.b32 %r2958, %r2985, 1;
setp.eq.s32 %p377, %r3422, 0;
selp.b32 %r533, 8, 7, %p377;
add.s32 %r3458, %r3458, 1;
setp.lt.u32 %p378, %r3458, %r533;
@%p378 bra $L__BB6_341;
sub.s32 %r3458, %r3458, %r533;
setp.ge.u32 %p379, %r3419, %r3;
mov.u32 %r2956, 1;
@%p379 bra $L__BB6_340;
shl.b32 %r2739, %r2985, 1;
shr.u32 %r2160, %r2739, %r3458;
cvt.u16.u32 %rs23, %r2160;
and.b16 %rs24, %rs23, 255;
cvt.u64.u32 %rd237, %r3419;
add.s64 %rd238, %rd4, %rd237;
st.global.u8 [%rd238], %rs23;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p380, %rs24, 255;
selp.u32 %r3422, 1, 0, %p380;
mov.u32 %r2956, %r3423;
$L__BB6_340:
shl.b32 %r2738, %r2985, 1;
mov.u32 %r2161, -1;
shl.b32 %r2162, %r2161, %r3458;
xor.b32 %r2163, %r2162, -2;
setp.eq.s32 %p381, %r3458, 0;
selp.b32 %r2164, 0, %r2163, %p381;
and.b32 %r2958, %r2164, %r2738;
mov.u32 %r3423, %r2956;
$L__BB6_341:
add.s32 %r2970, %r512, 2;
setp.eq.s32 %p382, %r515, 2;
mov.u32 %r2985, %r2958;
@%p382 bra $L__BB6_347;
shl.b32 %r2985, %r2958, 1;
setp.eq.s32 %p383, %r3422, 0;
selp.b32 %r549, 8, 7, %p383;
add.s32 %r3458, %r3458, 1;
setp.lt.u32 %p384, %r3458, %r549;
@%p384 bra $L__BB6_346;
sub.s32 %r3458, %r3458, %r549;
setp.ge.u32 %p385, %r3419, %r3;
mov.u32 %r2964, 1;
@%p385 bra $L__BB6_345;
shl.b32 %r2741, %r2958, 1;
shr.u32 %r2166, %r2741, %r3458;
cvt.u16.u32 %rs25, %r2166;
and.b16 %rs26, %rs25, 255;
cvt.u64.u32 %rd239, %r3419;
add.s64 %rd240, %rd4, %rd239;
st.global.u8 [%rd240], %rs25;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p386, %rs26, 255;
selp.u32 %r3422, 1, 0, %p386;
mov.u32 %r2964, %r3423;
$L__BB6_345:
shl.b32 %r2740, %r2958, 1;
mov.u32 %r2167, -1;
shl.b32 %r2168, %r2167, %r3458;
xor.b32 %r2169, %r2168, -2;
setp.eq.s32 %p387, %r3458, 0;
selp.b32 %r2170, 0, %r2169, %p387;
and.b32 %r2985, %r2170, %r2740;
mov.u32 %r3423, %r2964;
$L__BB6_346:
add.s32 %r2970, %r512, 3;
$L__BB6_347:
min.u32 %r2734, %r513, %r2732;
not.b32 %r2171, %r512;
add.s32 %r2172, %r2734, %r2171;
setp.lt.u32 %p388, %r2172, 3;
@%p388 bra $L__BB6_365;
$L__BB6_348:
shl.b32 %r2991, %r2985, 1;
setp.eq.s32 %p389, %r3422, 0;
selp.b32 %r582, 8, 7, %p389;
add.s32 %r2992, %r3458, 1;
setp.lt.u32 %p390, %r2992, %r582;
@%p390 bra $L__BB6_352;
sub.s32 %r2992, %r2992, %r582;
setp.ge.u32 %p391, %r3419, %r3;
mov.u32 %r2989, 1;
@%p391 bra $L__BB6_351;
shr.u32 %r2174, %r2991, %r2992;
cvt.u16.u32 %rs27, %r2174;
and.b16 %rs28, %rs27, 255;
cvt.u64.u32 %rd241, %r3419;
add.s64 %rd242, %rd4, %rd241;
st.global.u8 [%rd242], %rs27;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p392, %rs28, 255;
selp.u32 %r3422, 1, 0, %p392;
mov.u32 %r2989, %r3423;
$L__BB6_351:
mov.u32 %r2175, -1;
shl.b32 %r2176, %r2175, %r2992;
xor.b32 %r2177, %r2176, -2;
setp.eq.s32 %p393, %r2992, 0;
selp.b32 %r2178, 0, %r2177, %p393;
and.b32 %r2991, %r2178, %r2991;
mov.u32 %r3423, %r2989;
$L__BB6_352:
shl.b32 %r2999, %r2991, 1;
setp.eq.s32 %p394, %r3422, 0;
selp.b32 %r597, 8, 7, %p394;
add.s32 %r3000, %r2992, 1;
setp.lt.u32 %p395, %r3000, %r597;
@%p395 bra $L__BB6_356;
sub.s32 %r3000, %r3000, %r597;
setp.ge.u32 %p396, %r3419, %r3;
mov.u32 %r2997, 1;
@%p396 bra $L__BB6_355;
shr.u32 %r2180, %r2999, %r3000;
cvt.u16.u32 %rs29, %r2180;
and.b16 %rs30, %rs29, 255;
cvt.u64.u32 %rd243, %r3419;
add.s64 %rd244, %rd4, %rd243;
st.global.u8 [%rd244], %rs29;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p397, %rs30, 255;
selp.u32 %r3422, 1, 0, %p397;
mov.u32 %r2997, %r3423;
$L__BB6_355:
mov.u32 %r2181, -1;
shl.b32 %r2182, %r2181, %r3000;
xor.b32 %r2183, %r2182, -2;
setp.eq.s32 %p398, %r3000, 0;
selp.b32 %r2184, 0, %r2183, %p398;
and.b32 %r2999, %r2184, %r2999;
mov.u32 %r3423, %r2997;
$L__BB6_356:
shl.b32 %r3007, %r2999, 1;
setp.eq.s32 %p399, %r3422, 0;
selp.b32 %r612, 8, 7, %p399;
add.s32 %r3008, %r3000, 1;
setp.lt.u32 %p400, %r3008, %r612;
@%p400 bra $L__BB6_360;
sub.s32 %r3008, %r3008, %r612;
setp.ge.u32 %p401, %r3419, %r3;
mov.u32 %r3005, 1;
@%p401 bra $L__BB6_359;
shr.u32 %r2186, %r3007, %r3008;
cvt.u16.u32 %rs31, %r2186;
and.b16 %rs32, %rs31, 255;
cvt.u64.u32 %rd245, %r3419;
add.s64 %rd246, %rd4, %rd245;
st.global.u8 [%rd246], %rs31;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p402, %rs32, 255;
selp.u32 %r3422, 1, 0, %p402;
mov.u32 %r3005, %r3423;
$L__BB6_359:
mov.u32 %r2187, -1;
shl.b32 %r2188, %r2187, %r3008;
xor.b32 %r2189, %r2188, -2;
setp.eq.s32 %p403, %r3008, 0;
selp.b32 %r2190, 0, %r2189, %p403;
and.b32 %r3007, %r2190, %r3007;
mov.u32 %r3423, %r3005;
$L__BB6_360:
shl.b32 %r2985, %r3007, 1;
setp.eq.s32 %p404, %r3422, 0;
selp.b32 %r627, 8, 7, %p404;
add.s32 %r3458, %r3008, 1;
setp.lt.u32 %p405, %r3458, %r627;
@%p405 bra $L__BB6_364;
sub.s32 %r3458, %r3458, %r627;
setp.ge.u32 %p406, %r3419, %r3;
mov.u32 %r3013, 1;
@%p406 bra $L__BB6_363;
shr.u32 %r2192, %r2985, %r3458;
cvt.u16.u32 %rs33, %r2192;
and.b16 %rs34, %rs33, 255;
cvt.u64.u32 %rd247, %r3419;
add.s64 %rd248, %rd4, %rd247;
st.global.u8 [%rd248], %rs33;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p407, %rs34, 255;
selp.u32 %r3422, 1, 0, %p407;
mov.u32 %r3013, %r3423;
$L__BB6_363:
mov.u32 %r2193, -1;
shl.b32 %r2194, %r2193, %r3458;
xor.b32 %r2195, %r2194, -2;
setp.eq.s32 %p408, %r3458, 0;
selp.b32 %r2196, 0, %r2195, %p408;
and.b32 %r2985, %r2196, %r2985;
mov.u32 %r3423, %r3013;
$L__BB6_364:
min.u32 %r2735, %r513, %r2732;
add.s32 %r2970, %r2970, 4;
setp.lt.u32 %p409, %r2970, %r2735;
@%p409 bra $L__BB6_348;
$L__BB6_365:
add.s32 %r2729, %r298, 1;
setp.ge.u32 %p410, %r513, %r2729;
@%p410 bra $L__BB6_371;
shl.b32 %r2197, %r2985, 1;
or.b32 %r3028, %r2197, 1;
setp.eq.s32 %p411, %r3422, 0;
selp.b32 %r648, 8, 7, %p411;
add.s32 %r3458, %r3458, 1;
setp.lt.u32 %p412, %r3458, %r648;
@%p412 bra $L__BB6_370;
sub.s32 %r3458, %r3458, %r648;
setp.ge.u32 %p413, %r3419, %r3;
mov.u32 %r3026, 1;
@%p413 bra $L__BB6_369;
shl.b32 %r2745, %r2985, 1;
or.b32 %r2744, %r2745, 1;
shr.u32 %r2199, %r2744, %r3458;
cvt.u16.u32 %rs35, %r2199;
and.b16 %rs36, %rs35, 255;
cvt.u64.u32 %rd249, %r3419;
add.s64 %rd250, %rd4, %rd249;
st.global.u8 [%rd250], %rs35;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p414, %rs36, 255;
selp.u32 %r3422, 1, 0, %p414;
mov.u32 %r3026, %r3423;
$L__BB6_369:
shl.b32 %r2743, %r2985, 1;
or.b32 %r2742, %r2743, 1;
mov.u32 %r2200, -1;
shl.b32 %r2201, %r2200, %r3458;
not.b32 %r2202, %r2201;
setp.eq.s32 %p415, %r3458, 0;
selp.b32 %r2203, 0, %r2202, %p415;
and.b32 %r3028, %r2203, %r2742;
mov.u32 %r3423, %r3026;
$L__BB6_370:
mov.u32 %r2204, 1;
st.local.u32 [%rd44+8192], %r2204;
mov.u32 %r2985, %r3028;
$L__BB6_371:
add.s32 %r2731, %r298, 1;
min.u32 %r3037, %r513, %r2731;
st.local.u32 [%rd44], %r3037;
mov.u32 %r3457, %r2985;
$L__BB6_372:
setp.ne.s32 %p416, %r2939, 0;
mov.u32 %r2940, %r3037;
@%p416 bra $L__BB6_329;
$L__BB6_373:
cvt.u64.u32 %rd251, %r3419;
add.s64 %rd45, %rd4, %rd251;
setp.eq.s32 %p417, %r274, 1;
@%p417 bra $L__BB6_501;
bra.uni $L__BB6_374;
$L__BB6_501:
shl.b32 %r3285, %r3457, 1;
setp.eq.s32 %p575, %r3422, 0;
selp.b32 %r1129, 8, 7, %p575;
add.s32 %r3286, %r3458, 1;
setp.lt.u32 %p576, %r3286, %r1129;
@%p576 bra $L__BB6_505;
sub.s32 %r3286, %r3286, %r1129;
setp.ge.u32 %p577, %r3419, %r3;
mov.u32 %r3283, 1;
@%p577 bra $L__BB6_504;
shr.u32 %r2432, %r3285, %r3286;
cvt.u16.u32 %rs99, %r2432;
and.b16 %rs100, %rs99, 255;
st.global.u8 [%rd45], %rs99;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p578, %rs100, 255;
selp.u32 %r3422, 1, 0, %p578;
mov.u32 %r3283, %r3423;
$L__BB6_504:
mov.u32 %r2433, -1;
shl.b32 %r2434, %r2433, %r3286;
xor.b32 %r2435, %r2434, -2;
setp.eq.s32 %p579, %r3286, 0;
selp.b32 %r2436, 0, %r2435, %p579;
and.b32 %r3285, %r2436, %r3285;
mov.u32 %r3423, %r3283;
bra.uni $L__BB6_505;
$L__BB6_374:
setp.eq.s32 %p418, %r274, 2;
@%p418 bra $L__BB6_493;
bra.uni $L__BB6_375;
$L__BB6_493:
shl.b32 %r2418, %r3457, 1;
or.b32 %r3274, %r2418, 1;
setp.eq.s32 %p565, %r3422, 0;
selp.b32 %r1104, 8, 7, %p565;
add.s32 %r3275, %r3458, 1;
setp.lt.u32 %p566, %r3275, %r1104;
@%p566 bra $L__BB6_497;
sub.s32 %r3275, %r3275, %r1104;
setp.ge.u32 %p567, %r3419, %r3;
mov.u32 %r3272, 1;
@%p567 bra $L__BB6_496;
shr.u32 %r2420, %r3274, %r3275;
cvt.u16.u32 %rs95, %r2420;
and.b16 %rs96, %rs95, 255;
st.global.u8 [%rd45], %rs95;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p568, %rs96, 255;
selp.u32 %r3422, 1, 0, %p568;
mov.u32 %r3272, %r3423;
$L__BB6_496:
mov.u32 %r2421, -1;
shl.b32 %r2422, %r2421, %r3275;
not.b32 %r2423, %r2422;
setp.eq.s32 %p569, %r3275, 0;
selp.b32 %r2424, 0, %r2423, %p569;
and.b32 %r3274, %r3274, %r2424;
mov.u32 %r3423, %r3272;
$L__BB6_497:
shl.b32 %r3285, %r3274, 1;
setp.eq.s32 %p570, %r3422, 0;
selp.b32 %r1119, 8, 7, %p570;
add.s32 %r3286, %r3275, 1;
setp.lt.u32 %p571, %r3286, %r1119;
@%p571 bra $L__BB6_505;
sub.s32 %r3286, %r3286, %r1119;
setp.ge.u32 %p572, %r3419, %r3;
mov.u32 %r3280, 1;
@%p572 bra $L__BB6_500;
shr.u32 %r2426, %r3285, %r3286;
cvt.u16.u32 %rs97, %r2426;
and.b16 %rs98, %rs97, 255;
cvt.u64.u32 %rd304, %r3419;
add.s64 %rd305, %rd4, %rd304;
st.global.u8 [%rd305], %rs97;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p573, %rs98, 255;
selp.u32 %r3422, 1, 0, %p573;
mov.u32 %r3280, %r3423;
$L__BB6_500:
mov.u32 %r2427, -1;
shl.b32 %r2428, %r2427, %r3286;
not.b32 %r2429, %r2428;
setp.eq.s32 %p574, %r3286, 0;
selp.b32 %r2430, 0, %r2429, %p574;
and.b32 %r3285, %r3285, %r2430;
mov.u32 %r3423, %r3280;
bra.uni $L__BB6_505;
$L__BB6_236:
add.s32 %r3458, %r3458, 1;
setp.eq.s32 %p291, %r3422, 0;
selp.b32 %r280, 8, 7, %p291;
setp.lt.u32 %p292, %r3458, %r280;
@%p292 bra $L__BB6_240;
sub.s32 %r3458, %r3458, %r280;
setp.ge.u32 %p293, %r3419, %r3;
mov.u32 %r2812, 1;
@%p293 bra $L__BB6_239;
shr.u32 %r1990, %r3457, %r3458;
cvt.u16.u32 %rs1, %r1990;
and.b16 %rs2, %rs1, 255;
st.global.u8 [%rd34], %rs1;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p294, %rs2, 255;
selp.u32 %r3422, 1, 0, %p294;
mov.u32 %r2812, %r3423;
$L__BB6_239:
mov.u32 %r1991, -1;
shl.b32 %r1992, %r1991, %r3458;
xor.b32 %r1993, %r1992, -2;
setp.eq.s32 %p295, %r3458, 0;
selp.b32 %r1994, 0, %r1993, %p295;
and.b32 %r3457, %r1994, %r3457;
mov.u32 %r3423, %r2812;
bra.uni $L__BB6_240;
$L__BB6_375:
setp.lt.u32 %p419, %r274, 6;
@%p419 bra $L__BB6_477;
bra.uni $L__BB6_376;
$L__BB6_477:
shl.b32 %r2389, %r3457, 1;
or.b32 %r3247, %r2389, 1;
setp.eq.s32 %p545, %r3422, 0;
selp.b32 %r1048, 8, 7, %p545;
add.s32 %r3248, %r3458, 1;
setp.lt.u32 %p546, %r3248, %r1048;
@%p546 bra $L__BB6_481;
sub.s32 %r3248, %r3248, %r1048;
setp.ge.u32 %p547, %r3419, %r3;
mov.u32 %r3245, 1;
@%p547 bra $L__BB6_480;
shr.u32 %r2391, %r3247, %r3248;
cvt.u16.u32 %rs87, %r2391;
and.b16 %rs88, %rs87, 255;
st.global.u8 [%rd45], %rs87;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p548, %rs88, 255;
selp.u32 %r3422, 1, 0, %p548;
mov.u32 %r3245, %r3423;
$L__BB6_480:
mov.u32 %r2392, -1;
shl.b32 %r2393, %r2392, %r3248;
not.b32 %r2394, %r2393;
setp.eq.s32 %p549, %r3248, 0;
selp.b32 %r2395, 0, %r2394, %p549;
and.b32 %r3247, %r3247, %r2395;
mov.u32 %r3423, %r3245;
$L__BB6_481:
shl.b32 %r2396, %r3247, 1;
or.b32 %r3255, %r2396, 1;
setp.eq.s32 %p550, %r3422, 0;
selp.b32 %r1063, 8, 7, %p550;
add.s32 %r3256, %r3248, 1;
setp.lt.u32 %p551, %r3256, %r1063;
@%p551 bra $L__BB6_485;
sub.s32 %r3256, %r3256, %r1063;
setp.ge.u32 %p552, %r3419, %r3;
mov.u32 %r3253, 1;
@%p552 bra $L__BB6_484;
shr.u32 %r2398, %r3255, %r3256;
cvt.u16.u32 %rs89, %r2398;
and.b16 %rs90, %rs89, 255;
cvt.u64.u32 %rd298, %r3419;
add.s64 %rd299, %rd4, %rd298;
st.global.u8 [%rd299], %rs89;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p553, %rs90, 255;
selp.u32 %r3422, 1, 0, %p553;
mov.u32 %r3253, %r3423;
$L__BB6_484:
mov.u32 %r2399, -1;
shl.b32 %r2400, %r2399, %r3256;
not.b32 %r2401, %r2400;
setp.eq.s32 %p554, %r3256, 0;
selp.b32 %r2402, 0, %r2401, %p554;
and.b32 %r3255, %r3255, %r2402;
mov.u32 %r3423, %r3253;
$L__BB6_485:
add.s32 %r1077, %r274, -3;
shr.u32 %r2403, %r1077, 1;
and.b32 %r2404, %r2403, 1;
bfi.b32 %r3263, %r3255, %r2404, 1, 31;
setp.eq.s32 %p555, %r3422, 0;
selp.b32 %r1079, 8, 7, %p555;
add.s32 %r3264, %r3256, 1;
setp.lt.u32 %p556, %r3264, %r1079;
@%p556 bra $L__BB6_489;
sub.s32 %r3264, %r3264, %r1079;
setp.ge.u32 %p557, %r3419, %r3;
mov.u32 %r3261, 1;
@%p557 bra $L__BB6_488;
shr.u32 %r2406, %r3263, %r3264;
cvt.u16.u32 %rs91, %r2406;
and.b16 %rs92, %rs91, 255;
cvt.u64.u32 %rd300, %r3419;
add.s64 %rd301, %rd4, %rd300;
st.global.u8 [%rd301], %rs91;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p558, %rs92, 255;
selp.u32 %r3422, 1, 0, %p558;
mov.u32 %r3261, %r3423;
$L__BB6_488:
mov.u32 %r2407, -1;
shl.b32 %r2408, %r2407, %r3264;
not.b32 %r2409, %r2408;
setp.eq.s32 %p559, %r3264, 0;
selp.b32 %r2410, 0, %r2409, %p559;
and.b32 %r3263, %r3263, %r2410;
mov.u32 %r3423, %r3261;
$L__BB6_489:
and.b32 %r2411, %r1077, 1;
bfi.b32 %r3285, %r3263, %r2411, 1, 31;
setp.eq.s32 %p560, %r3422, 0;
selp.b32 %r1094, 8, 7, %p560;
add.s32 %r3286, %r3264, 1;
setp.lt.u32 %p561, %r3286, %r1094;
@%p561 bra $L__BB6_505;
sub.s32 %r3286, %r3286, %r1094;
setp.ge.u32 %p562, %r3419, %r3;
mov.u32 %r3269, 1;
@%p562 bra $L__BB6_492;
shr.u32 %r2413, %r3285, %r3286;
cvt.u16.u32 %rs93, %r2413;
and.b16 %rs94, %rs93, 255;
cvt.u64.u32 %rd302, %r3419;
add.s64 %rd303, %rd4, %rd302;
st.global.u8 [%rd303], %rs93;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p563, %rs94, 255;
selp.u32 %r3422, 1, 0, %p563;
mov.u32 %r3269, %r3423;
$L__BB6_492:
mov.u32 %r2414, -1;
shl.b32 %r2415, %r2414, %r3286;
not.b32 %r2416, %r2415;
setp.eq.s32 %p564, %r3286, 0;
selp.b32 %r2417, 0, %r2416, %p564;
and.b32 %r3285, %r3285, %r2417;
mov.u32 %r3423, %r3269;
bra.uni $L__BB6_505;
$L__BB6_376:
setp.lt.u32 %p420, %r274, 37;
shl.b32 %r2205, %r3457, 1;
or.b32 %r3057, %r2205, 1;
add.s32 %r3058, %r3458, 1;
setp.eq.s32 %p421, %r3422, 0;
selp.b32 %r685, 8, 7, %p421;
@%p420 bra $L__BB6_441;
bra.uni $L__BB6_377;
$L__BB6_441:
setp.lt.u32 %p501, %r3058, %r685;
@%p501 bra $L__BB6_445;
sub.s32 %r3058, %r3058, %r685;
setp.ge.u32 %p502, %r3419, %r3;
mov.u32 %r3178, 1;
@%p502 bra $L__BB6_444;
shr.u32 %r2324, %r3057, %r3058;
cvt.u16.u32 %rs69, %r2324;
and.b16 %rs70, %rs69, 255;
st.global.u8 [%rd45], %rs69;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p503, %rs70, 255;
selp.u32 %r3422, 1, 0, %p503;
mov.u32 %r3178, %r3423;
$L__BB6_444:
mov.u32 %r2325, -1;
shl.b32 %r2326, %r2325, %r3058;
not.b32 %r2327, %r2326;
setp.eq.s32 %p504, %r3058, 0;
selp.b32 %r2328, 0, %r2327, %p504;
and.b32 %r3057, %r3057, %r2328;
mov.u32 %r3423, %r3178;
$L__BB6_445:
shl.b32 %r2329, %r3057, 1;
or.b32 %r3188, %r2329, 1;
setp.eq.s32 %p505, %r3422, 0;
selp.b32 %r932, 8, 7, %p505;
add.s32 %r3189, %r3058, 1;
setp.lt.u32 %p506, %r3189, %r932;
@%p506 bra $L__BB6_449;
sub.s32 %r3189, %r3189, %r932;
setp.ge.u32 %p507, %r3419, %r3;
mov.u32 %r3186, 1;
@%p507 bra $L__BB6_448;
shr.u32 %r2331, %r3188, %r3189;
cvt.u16.u32 %rs71, %r2331;
and.b16 %rs72, %rs71, 255;
cvt.u64.u32 %rd282, %r3419;
add.s64 %rd283, %rd4, %rd282;
st.global.u8 [%rd283], %rs71;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p508, %rs72, 255;
selp.u32 %r3422, 1, 0, %p508;
mov.u32 %r3186, %r3423;
$L__BB6_448:
mov.u32 %r2332, -1;
shl.b32 %r2333, %r2332, %r3189;
not.b32 %r2334, %r2333;
setp.eq.s32 %p509, %r3189, 0;
selp.b32 %r2335, 0, %r2334, %p509;
and.b32 %r3188, %r3188, %r2335;
mov.u32 %r3423, %r3186;
$L__BB6_449:
shl.b32 %r2336, %r3188, 1;
or.b32 %r3196, %r2336, 1;
setp.eq.s32 %p510, %r3422, 0;
selp.b32 %r947, 8, 7, %p510;
add.s32 %r3197, %r3189, 1;
setp.lt.u32 %p511, %r3197, %r947;
@%p511 bra $L__BB6_453;
sub.s32 %r3197, %r3197, %r947;
setp.ge.u32 %p512, %r3419, %r3;
mov.u32 %r3194, 1;
@%p512 bra $L__BB6_452;
shr.u32 %r2338, %r3196, %r3197;
cvt.u16.u32 %rs73, %r2338;
and.b16 %rs74, %rs73, 255;
cvt.u64.u32 %rd284, %r3419;
add.s64 %rd285, %rd4, %rd284;
st.global.u8 [%rd285], %rs73;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p513, %rs74, 255;
selp.u32 %r3422, 1, 0, %p513;
mov.u32 %r3194, %r3423;
$L__BB6_452:
mov.u32 %r2339, -1;
shl.b32 %r2340, %r2339, %r3197;
not.b32 %r2341, %r2340;
setp.eq.s32 %p514, %r3197, 0;
selp.b32 %r2342, 0, %r2341, %p514;
and.b32 %r3196, %r3196, %r2342;
mov.u32 %r3423, %r3194;
$L__BB6_453:
shl.b32 %r2343, %r3196, 1;
or.b32 %r3204, %r2343, 1;
setp.eq.s32 %p515, %r3422, 0;
selp.b32 %r962, 8, 7, %p515;
add.s32 %r3205, %r3197, 1;
setp.lt.u32 %p516, %r3205, %r962;
@%p516 bra $L__BB6_457;
sub.s32 %r3205, %r3205, %r962;
setp.ge.u32 %p517, %r3419, %r3;
mov.u32 %r3202, 1;
@%p517 bra $L__BB6_456;
shr.u32 %r2345, %r3204, %r3205;
cvt.u16.u32 %rs75, %r2345;
and.b16 %rs76, %rs75, 255;
cvt.u64.u32 %rd286, %r3419;
add.s64 %rd287, %rd4, %rd286;
st.global.u8 [%rd287], %rs75;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p518, %rs76, 255;
selp.u32 %r3422, 1, 0, %p518;
mov.u32 %r3202, %r3423;
$L__BB6_456:
mov.u32 %r2346, -1;
shl.b32 %r2347, %r2346, %r3205;
not.b32 %r2348, %r2347;
setp.eq.s32 %p519, %r3205, 0;
selp.b32 %r2349, 0, %r2348, %p519;
and.b32 %r3204, %r3204, %r2349;
mov.u32 %r3423, %r3202;
$L__BB6_457:
add.s32 %r976, %r274, -6;
shr.u32 %r2350, %r976, 4;
and.b32 %r2351, %r2350, 1;
bfi.b32 %r3212, %r3204, %r2351, 1, 31;
setp.eq.s32 %p520, %r3422, 0;
selp.b32 %r978, 8, 7, %p520;
add.s32 %r3213, %r3205, 1;
setp.lt.u32 %p521, %r3213, %r978;
@%p521 bra $L__BB6_461;
sub.s32 %r3213, %r3213, %r978;
setp.ge.u32 %p522, %r3419, %r3;
mov.u32 %r3210, 1;
@%p522 bra $L__BB6_460;
shr.u32 %r2353, %r3212, %r3213;
cvt.u16.u32 %rs77, %r2353;
and.b16 %rs78, %rs77, 255;
cvt.u64.u32 %rd288, %r3419;
add.s64 %rd289, %rd4, %rd288;
st.global.u8 [%rd289], %rs77;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p523, %rs78, 255;
selp.u32 %r3422, 1, 0, %p523;
mov.u32 %r3210, %r3423;
$L__BB6_460:
mov.u32 %r2354, -1;
shl.b32 %r2355, %r2354, %r3213;
not.b32 %r2356, %r2355;
setp.eq.s32 %p524, %r3213, 0;
selp.b32 %r2357, 0, %r2356, %p524;
and.b32 %r3212, %r3212, %r2357;
mov.u32 %r3423, %r3210;
$L__BB6_461:
shr.u32 %r2358, %r976, 3;
and.b32 %r2359, %r2358, 1;
bfi.b32 %r3220, %r3212, %r2359, 1, 31;
setp.eq.s32 %p525, %r3422, 0;
selp.b32 %r993, 8, 7, %p525;
add.s32 %r3221, %r3213, 1;
setp.lt.u32 %p526, %r3221, %r993;
@%p526 bra $L__BB6_465;
sub.s32 %r3221, %r3221, %r993;
setp.ge.u32 %p527, %r3419, %r3;
mov.u32 %r3218, 1;
@%p527 bra $L__BB6_464;
shr.u32 %r2361, %r3220, %r3221;
cvt.u16.u32 %rs79, %r2361;
and.b16 %rs80, %rs79, 255;
cvt.u64.u32 %rd290, %r3419;
add.s64 %rd291, %rd4, %rd290;
st.global.u8 [%rd291], %rs79;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p528, %rs80, 255;
selp.u32 %r3422, 1, 0, %p528;
mov.u32 %r3218, %r3423;
$L__BB6_464:
mov.u32 %r2362, -1;
shl.b32 %r2363, %r2362, %r3221;
not.b32 %r2364, %r2363;
setp.eq.s32 %p529, %r3221, 0;
selp.b32 %r2365, 0, %r2364, %p529;
and.b32 %r3220, %r3220, %r2365;
mov.u32 %r3423, %r3218;
$L__BB6_465:
shr.u32 %r2366, %r976, 2;
and.b32 %r2367, %r2366, 1;
bfi.b32 %r3228, %r3220, %r2367, 1, 31;
setp.eq.s32 %p530, %r3422, 0;
selp.b32 %r1008, 8, 7, %p530;
add.s32 %r3229, %r3221, 1;
setp.lt.u32 %p531, %r3229, %r1008;
@%p531 bra $L__BB6_469;
sub.s32 %r3229, %r3229, %r1008;
setp.ge.u32 %p532, %r3419, %r3;
mov.u32 %r3226, 1;
@%p532 bra $L__BB6_468;
shr.u32 %r2369, %r3228, %r3229;
cvt.u16.u32 %rs81, %r2369;
and.b16 %rs82, %rs81, 255;
cvt.u64.u32 %rd292, %r3419;
add.s64 %rd293, %rd4, %rd292;
st.global.u8 [%rd293], %rs81;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p533, %rs82, 255;
selp.u32 %r3422, 1, 0, %p533;
mov.u32 %r3226, %r3423;
$L__BB6_468:
mov.u32 %r2370, -1;
shl.b32 %r2371, %r2370, %r3229;
not.b32 %r2372, %r2371;
setp.eq.s32 %p534, %r3229, 0;
selp.b32 %r2373, 0, %r2372, %p534;
and.b32 %r3228, %r3228, %r2373;
mov.u32 %r3423, %r3226;
$L__BB6_469:
shr.u32 %r2374, %r976, 1;
and.b32 %r2375, %r2374, 1;
bfi.b32 %r3236, %r3228, %r2375, 1, 31;
setp.eq.s32 %p535, %r3422, 0;
selp.b32 %r1023, 8, 7, %p535;
add.s32 %r3237, %r3229, 1;
setp.lt.u32 %p536, %r3237, %r1023;
@%p536 bra $L__BB6_473;
sub.s32 %r3237, %r3237, %r1023;
setp.ge.u32 %p537, %r3419, %r3;
mov.u32 %r3234, 1;
@%p537 bra $L__BB6_472;
shr.u32 %r2377, %r3236, %r3237;
cvt.u16.u32 %rs83, %r2377;
and.b16 %rs84, %rs83, 255;
cvt.u64.u32 %rd294, %r3419;
add.s64 %rd295, %rd4, %rd294;
st.global.u8 [%rd295], %rs83;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p538, %rs84, 255;
selp.u32 %r3422, 1, 0, %p538;
mov.u32 %r3234, %r3423;
$L__BB6_472:
mov.u32 %r2378, -1;
shl.b32 %r2379, %r2378, %r3237;
not.b32 %r2380, %r2379;
setp.eq.s32 %p539, %r3237, 0;
selp.b32 %r2381, 0, %r2380, %p539;
and.b32 %r3236, %r3236, %r2381;
mov.u32 %r3423, %r3234;
$L__BB6_473:
and.b32 %r2382, %r976, 1;
bfi.b32 %r3285, %r3236, %r2382, 1, 31;
setp.eq.s32 %p540, %r3422, 0;
selp.b32 %r1038, 8, 7, %p540;
add.s32 %r3286, %r3237, 1;
setp.lt.u32 %p541, %r3286, %r1038;
@%p541 bra $L__BB6_505;
sub.s32 %r3286, %r3286, %r1038;
setp.ge.u32 %p542, %r3419, %r3;
mov.u32 %r3242, 1;
@%p542 bra $L__BB6_476;
shr.u32 %r2384, %r3285, %r3286;
cvt.u16.u32 %rs85, %r2384;
and.b16 %rs86, %rs85, 255;
cvt.u64.u32 %rd296, %r3419;
add.s64 %rd297, %rd4, %rd296;
st.global.u8 [%rd297], %rs85;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p543, %rs86, 255;
selp.u32 %r3422, 1, 0, %p543;
mov.u32 %r3242, %r3423;
$L__BB6_476:
mov.u32 %r2385, -1;
shl.b32 %r2386, %r2385, %r3286;
not.b32 %r2387, %r2386;
setp.eq.s32 %p544, %r3286, 0;
selp.b32 %r2388, 0, %r2387, %p544;
and.b32 %r3285, %r3285, %r2388;
mov.u32 %r3423, %r3242;
bra.uni $L__BB6_505;
$L__BB6_377:
setp.lt.u32 %p422, %r3058, %r685;
@%p422 bra $L__BB6_381;
sub.s32 %r3058, %r3058, %r685;
setp.ge.u32 %p423, %r3419, %r3;
mov.u32 %r3055, 1;
@%p423 bra $L__BB6_380;
shr.u32 %r2207, %r3057, %r3058;
cvt.u16.u32 %rs37, %r2207;
and.b16 %rs38, %rs37, 255;
st.global.u8 [%rd45], %rs37;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p424, %rs38, 255;
selp.u32 %r3422, 1, 0, %p424;
mov.u32 %r3055, %r3423;
$L__BB6_380:
mov.u32 %r2208, -1;
shl.b32 %r2209, %r2208, %r3058;
not.b32 %r2210, %r2209;
setp.eq.s32 %p425, %r3058, 0;
selp.b32 %r2211, 0, %r2210, %p425;
and.b32 %r3057, %r3057, %r2211;
mov.u32 %r3423, %r3055;
$L__BB6_381:
shl.b32 %r2212, %r3057, 1;
or.b32 %r3065, %r2212, 1;
setp.eq.s32 %p426, %r3422, 0;
selp.b32 %r699, 8, 7, %p426;
add.s32 %r3066, %r3058, 1;
setp.lt.u32 %p427, %r3066, %r699;
@%p427 bra $L__BB6_385;
sub.s32 %r3066, %r3066, %r699;
setp.ge.u32 %p428, %r3419, %r3;
mov.u32 %r3063, 1;
@%p428 bra $L__BB6_384;
shr.u32 %r2214, %r3065, %r3066;
cvt.u16.u32 %rs39, %r2214;
and.b16 %rs40, %rs39, 255;
cvt.u64.u32 %rd252, %r3419;
add.s64 %rd253, %rd4, %rd252;
st.global.u8 [%rd253], %rs39;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p429, %rs40, 255;
selp.u32 %r3422, 1, 0, %p429;
mov.u32 %r3063, %r3423;
$L__BB6_384:
mov.u32 %r2215, -1;
shl.b32 %r2216, %r2215, %r3066;
not.b32 %r2217, %r2216;
setp.eq.s32 %p430, %r3066, 0;
selp.b32 %r2218, 0, %r2217, %p430;
and.b32 %r3065, %r3065, %r2218;
mov.u32 %r3423, %r3063;
$L__BB6_385:
shl.b32 %r2219, %r3065, 1;
or.b32 %r3073, %r2219, 1;
setp.eq.s32 %p431, %r3422, 0;
selp.b32 %r714, 8, 7, %p431;
add.s32 %r3074, %r3066, 1;
setp.lt.u32 %p432, %r3074, %r714;
@%p432 bra $L__BB6_389;
sub.s32 %r3074, %r3074, %r714;
setp.ge.u32 %p433, %r3419, %r3;
mov.u32 %r3071, 1;
@%p433 bra $L__BB6_388;
shr.u32 %r2221, %r3073, %r3074;
cvt.u16.u32 %rs41, %r2221;
and.b16 %rs42, %rs41, 255;
cvt.u64.u32 %rd254, %r3419;
add.s64 %rd255, %rd4, %rd254;
st.global.u8 [%rd255], %rs41;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p434, %rs42, 255;
selp.u32 %r3422, 1, 0, %p434;
mov.u32 %r3071, %r3423;
$L__BB6_388:
mov.u32 %r2222, -1;
shl.b32 %r2223, %r2222, %r3074;
not.b32 %r2224, %r2223;
setp.eq.s32 %p435, %r3074, 0;
selp.b32 %r2225, 0, %r2224, %p435;
and.b32 %r3073, %r3073, %r2225;
mov.u32 %r3423, %r3071;
$L__BB6_389:
shl.b32 %r2226, %r3073, 1;
or.b32 %r3081, %r2226, 1;
setp.eq.s32 %p436, %r3422, 0;
selp.b32 %r729, 8, 7, %p436;
add.s32 %r3082, %r3074, 1;
setp.lt.u32 %p437, %r3082, %r729;
@%p437 bra $L__BB6_393;
sub.s32 %r3082, %r3082, %r729;
setp.ge.u32 %p438, %r3419, %r3;
mov.u32 %r3079, 1;
@%p438 bra $L__BB6_392;
shr.u32 %r2228, %r3081, %r3082;
cvt.u16.u32 %rs43, %r2228;
and.b16 %rs44, %rs43, 255;
cvt.u64.u32 %rd256, %r3419;
add.s64 %rd257, %rd4, %rd256;
st.global.u8 [%rd257], %rs43;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p439, %rs44, 255;
selp.u32 %r3422, 1, 0, %p439;
mov.u32 %r3079, %r3423;
$L__BB6_392:
mov.u32 %r2229, -1;
shl.b32 %r2230, %r2229, %r3082;
not.b32 %r2231, %r2230;
setp.eq.s32 %p440, %r3082, 0;
selp.b32 %r2232, 0, %r2231, %p440;
and.b32 %r3081, %r3081, %r2232;
mov.u32 %r3423, %r3079;
$L__BB6_393:
shl.b32 %r2233, %r3081, 1;
or.b32 %r3089, %r2233, 1;
setp.eq.s32 %p441, %r3422, 0;
selp.b32 %r744, 8, 7, %p441;
add.s32 %r3090, %r3082, 1;
setp.lt.u32 %p442, %r3090, %r744;
@%p442 bra $L__BB6_397;
sub.s32 %r3090, %r3090, %r744;
setp.ge.u32 %p443, %r3419, %r3;
mov.u32 %r3087, 1;
@%p443 bra $L__BB6_396;
shr.u32 %r2235, %r3089, %r3090;
cvt.u16.u32 %rs45, %r2235;
and.b16 %rs46, %rs45, 255;
cvt.u64.u32 %rd258, %r3419;
add.s64 %rd259, %rd4, %rd258;
st.global.u8 [%rd259], %rs45;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p444, %rs46, 255;
selp.u32 %r3422, 1, 0, %p444;
mov.u32 %r3087, %r3423;
$L__BB6_396:
mov.u32 %r2236, -1;
shl.b32 %r2237, %r2236, %r3090;
not.b32 %r2238, %r2237;
setp.eq.s32 %p445, %r3090, 0;
selp.b32 %r2239, 0, %r2238, %p445;
and.b32 %r3089, %r3089, %r2239;
mov.u32 %r3423, %r3087;
$L__BB6_397:
shl.b32 %r2240, %r3089, 1;
or.b32 %r3097, %r2240, 1;
setp.eq.s32 %p446, %r3422, 0;
selp.b32 %r759, 8, 7, %p446;
add.s32 %r3098, %r3090, 1;
setp.lt.u32 %p447, %r3098, %r759;
@%p447 bra $L__BB6_401;
sub.s32 %r3098, %r3098, %r759;
setp.ge.u32 %p448, %r3419, %r3;
mov.u32 %r3095, 1;
@%p448 bra $L__BB6_400;
shr.u32 %r2242, %r3097, %r3098;
cvt.u16.u32 %rs47, %r2242;
and.b16 %rs48, %rs47, 255;
cvt.u64.u32 %rd260, %r3419;
add.s64 %rd261, %rd4, %rd260;
st.global.u8 [%rd261], %rs47;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p449, %rs48, 255;
selp.u32 %r3422, 1, 0, %p449;
mov.u32 %r3095, %r3423;
$L__BB6_400:
mov.u32 %r2243, -1;
shl.b32 %r2244, %r2243, %r3098;
not.b32 %r2245, %r2244;
setp.eq.s32 %p450, %r3098, 0;
selp.b32 %r2246, 0, %r2245, %p450;
and.b32 %r3097, %r3097, %r2246;
mov.u32 %r3423, %r3095;
$L__BB6_401:
shl.b32 %r2247, %r3097, 1;
or.b32 %r3105, %r2247, 1;
setp.eq.s32 %p451, %r3422, 0;
selp.b32 %r774, 8, 7, %p451;
add.s32 %r3106, %r3098, 1;
setp.lt.u32 %p452, %r3106, %r774;
@%p452 bra $L__BB6_405;
sub.s32 %r3106, %r3106, %r774;
setp.ge.u32 %p453, %r3419, %r3;
mov.u32 %r3103, 1;
@%p453 bra $L__BB6_404;
shr.u32 %r2249, %r3105, %r3106;
cvt.u16.u32 %rs49, %r2249;
and.b16 %rs50, %rs49, 255;
cvt.u64.u32 %rd262, %r3419;
add.s64 %rd263, %rd4, %rd262;
st.global.u8 [%rd263], %rs49;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p454, %rs50, 255;
selp.u32 %r3422, 1, 0, %p454;
mov.u32 %r3103, %r3423;
$L__BB6_404:
mov.u32 %r2250, -1;
shl.b32 %r2251, %r2250, %r3106;
not.b32 %r2252, %r2251;
setp.eq.s32 %p455, %r3106, 0;
selp.b32 %r2253, 0, %r2252, %p455;
and.b32 %r3105, %r3105, %r2253;
mov.u32 %r3423, %r3103;
$L__BB6_405:
shl.b32 %r2254, %r3105, 1;
or.b32 %r3113, %r2254, 1;
setp.eq.s32 %p456, %r3422, 0;
selp.b32 %r789, 8, 7, %p456;
add.s32 %r3114, %r3106, 1;
setp.lt.u32 %p457, %r3114, %r789;
@%p457 bra $L__BB6_409;
sub.s32 %r3114, %r3114, %r789;
setp.ge.u32 %p458, %r3419, %r3;
mov.u32 %r3111, 1;
@%p458 bra $L__BB6_408;
shr.u32 %r2256, %r3113, %r3114;
cvt.u16.u32 %rs51, %r2256;
and.b16 %rs52, %rs51, 255;
cvt.u64.u32 %rd264, %r3419;
add.s64 %rd265, %rd4, %rd264;
st.global.u8 [%rd265], %rs51;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p459, %rs52, 255;
selp.u32 %r3422, 1, 0, %p459;
mov.u32 %r3111, %r3423;
$L__BB6_408:
mov.u32 %r2257, -1;
shl.b32 %r2258, %r2257, %r3114;
not.b32 %r2259, %r2258;
setp.eq.s32 %p460, %r3114, 0;
selp.b32 %r2260, 0, %r2259, %p460;
and.b32 %r3113, %r3113, %r2260;
mov.u32 %r3423, %r3111;
$L__BB6_409:
shl.b32 %r2261, %r3113, 1;
or.b32 %r3121, %r2261, 1;
setp.eq.s32 %p461, %r3422, 0;
selp.b32 %r804, 8, 7, %p461;
add.s32 %r3122, %r3114, 1;
setp.lt.u32 %p462, %r3122, %r804;
@%p462 bra $L__BB6_413;
sub.s32 %r3122, %r3122, %r804;
setp.ge.u32 %p463, %r3419, %r3;
mov.u32 %r3119, 1;
@%p463 bra $L__BB6_412;
shr.u32 %r2263, %r3121, %r3122;
cvt.u16.u32 %rs53, %r2263;
and.b16 %rs54, %rs53, 255;
cvt.u64.u32 %rd266, %r3419;
add.s64 %rd267, %rd4, %rd266;
st.global.u8 [%rd267], %rs53;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p464, %rs54, 255;
selp.u32 %r3422, 1, 0, %p464;
mov.u32 %r3119, %r3423;
$L__BB6_412:
mov.u32 %r2264, -1;
shl.b32 %r2265, %r2264, %r3122;
not.b32 %r2266, %r2265;
setp.eq.s32 %p465, %r3122, 0;
selp.b32 %r2267, 0, %r2266, %p465;
and.b32 %r3121, %r3121, %r2267;
mov.u32 %r3423, %r3119;
$L__BB6_413:
add.s32 %r818, %r274, -37;
shr.u32 %r2268, %r818, 6;
and.b32 %r2269, %r2268, 1;
bfi.b32 %r3129, %r3121, %r2269, 1, 31;
setp.eq.s32 %p466, %r3422, 0;
selp.b32 %r820, 8, 7, %p466;
add.s32 %r3130, %r3122, 1;
setp.lt.u32 %p467, %r3130, %r820;
@%p467 bra $L__BB6_417;
sub.s32 %r3130, %r3130, %r820;
setp.ge.u32 %p468, %r3419, %r3;
mov.u32 %r3127, 1;
@%p468 bra $L__BB6_416;
shr.u32 %r2271, %r3129, %r3130;
cvt.u16.u32 %rs55, %r2271;
and.b16 %rs56, %rs55, 255;
cvt.u64.u32 %rd268, %r3419;
add.s64 %rd269, %rd4, %rd268;
st.global.u8 [%rd269], %rs55;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p469, %rs56, 255;
selp.u32 %r3422, 1, 0, %p469;
mov.u32 %r3127, %r3423;
$L__BB6_416:
mov.u32 %r2272, -1;
shl.b32 %r2273, %r2272, %r3130;
not.b32 %r2274, %r2273;
setp.eq.s32 %p470, %r3130, 0;
selp.b32 %r2275, 0, %r2274, %p470;
and.b32 %r3129, %r3129, %r2275;
mov.u32 %r3423, %r3127;
$L__BB6_417:
shr.u32 %r2276, %r818, 5;
and.b32 %r2277, %r2276, 1;
bfi.b32 %r3137, %r3129, %r2277, 1, 31;
setp.eq.s32 %p471, %r3422, 0;
selp.b32 %r835, 8, 7, %p471;
add.s32 %r3138, %r3130, 1;
setp.lt.u32 %p472, %r3138, %r835;
@%p472 bra $L__BB6_421;
sub.s32 %r3138, %r3138, %r835;
setp.ge.u32 %p473, %r3419, %r3;
mov.u32 %r3135, 1;
@%p473 bra $L__BB6_420;
shr.u32 %r2279, %r3137, %r3138;
cvt.u16.u32 %rs57, %r2279;
and.b16 %rs58, %rs57, 255;
cvt.u64.u32 %rd270, %r3419;
add.s64 %rd271, %rd4, %rd270;
st.global.u8 [%rd271], %rs57;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p474, %rs58, 255;
selp.u32 %r3422, 1, 0, %p474;
mov.u32 %r3135, %r3423;
$L__BB6_420:
mov.u32 %r2280, -1;
shl.b32 %r2281, %r2280, %r3138;
not.b32 %r2282, %r2281;
setp.eq.s32 %p475, %r3138, 0;
selp.b32 %r2283, 0, %r2282, %p475;
and.b32 %r3137, %r3137, %r2283;
mov.u32 %r3423, %r3135;
$L__BB6_421:
shr.u32 %r2284, %r818, 4;
and.b32 %r2285, %r2284, 1;
bfi.b32 %r3145, %r3137, %r2285, 1, 31;
setp.eq.s32 %p476, %r3422, 0;
selp.b32 %r850, 8, 7, %p476;
add.s32 %r3146, %r3138, 1;
setp.lt.u32 %p477, %r3146, %r850;
@%p477 bra $L__BB6_425;
sub.s32 %r3146, %r3146, %r850;
setp.ge.u32 %p478, %r3419, %r3;
mov.u32 %r3143, 1;
@%p478 bra $L__BB6_424;
shr.u32 %r2287, %r3145, %r3146;
cvt.u16.u32 %rs59, %r2287;
and.b16 %rs60, %rs59, 255;
cvt.u64.u32 %rd272, %r3419;
add.s64 %rd273, %rd4, %rd272;
st.global.u8 [%rd273], %rs59;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p479, %rs60, 255;
selp.u32 %r3422, 1, 0, %p479;
mov.u32 %r3143, %r3423;
$L__BB6_424:
mov.u32 %r2288, -1;
shl.b32 %r2289, %r2288, %r3146;
not.b32 %r2290, %r2289;
setp.eq.s32 %p480, %r3146, 0;
selp.b32 %r2291, 0, %r2290, %p480;
and.b32 %r3145, %r3145, %r2291;
mov.u32 %r3423, %r3143;
$L__BB6_425:
shr.u32 %r2292, %r818, 3;
and.b32 %r2293, %r2292, 1;
bfi.b32 %r3153, %r3145, %r2293, 1, 31;
setp.eq.s32 %p481, %r3422, 0;
selp.b32 %r865, 8, 7, %p481;
add.s32 %r3154, %r3146, 1;
setp.lt.u32 %p482, %r3154, %r865;
@%p482 bra $L__BB6_429;
sub.s32 %r3154, %r3154, %r865;
setp.ge.u32 %p483, %r3419, %r3;
mov.u32 %r3151, 1;
@%p483 bra $L__BB6_428;
shr.u32 %r2295, %r3153, %r3154;
cvt.u16.u32 %rs61, %r2295;
and.b16 %rs62, %rs61, 255;
cvt.u64.u32 %rd274, %r3419;
add.s64 %rd275, %rd4, %rd274;
st.global.u8 [%rd275], %rs61;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p484, %rs62, 255;
selp.u32 %r3422, 1, 0, %p484;
mov.u32 %r3151, %r3423;
$L__BB6_428:
mov.u32 %r2296, -1;
shl.b32 %r2297, %r2296, %r3154;
not.b32 %r2298, %r2297;
setp.eq.s32 %p485, %r3154, 0;
selp.b32 %r2299, 0, %r2298, %p485;
and.b32 %r3153, %r3153, %r2299;
mov.u32 %r3423, %r3151;
$L__BB6_429:
shr.u32 %r2300, %r818, 2;
and.b32 %r2301, %r2300, 1;
bfi.b32 %r3161, %r3153, %r2301, 1, 31;
setp.eq.s32 %p486, %r3422, 0;
selp.b32 %r880, 8, 7, %p486;
add.s32 %r3162, %r3154, 1;
setp.lt.u32 %p487, %r3162, %r880;
@%p487 bra $L__BB6_433;
sub.s32 %r3162, %r3162, %r880;
setp.ge.u32 %p488, %r3419, %r3;
mov.u32 %r3159, 1;
@%p488 bra $L__BB6_432;
shr.u32 %r2303, %r3161, %r3162;
cvt.u16.u32 %rs63, %r2303;
and.b16 %rs64, %rs63, 255;
cvt.u64.u32 %rd276, %r3419;
add.s64 %rd277, %rd4, %rd276;
st.global.u8 [%rd277], %rs63;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p489, %rs64, 255;
selp.u32 %r3422, 1, 0, %p489;
mov.u32 %r3159, %r3423;
$L__BB6_432:
mov.u32 %r2304, -1;
shl.b32 %r2305, %r2304, %r3162;
not.b32 %r2306, %r2305;
setp.eq.s32 %p490, %r3162, 0;
selp.b32 %r2307, 0, %r2306, %p490;
and.b32 %r3161, %r3161, %r2307;
mov.u32 %r3423, %r3159;
$L__BB6_433:
shr.u32 %r2308, %r818, 1;
and.b32 %r2309, %r2308, 1;
bfi.b32 %r3169, %r3161, %r2309, 1, 31;
setp.eq.s32 %p491, %r3422, 0;
selp.b32 %r895, 8, 7, %p491;
add.s32 %r3170, %r3162, 1;
setp.lt.u32 %p492, %r3170, %r895;
@%p492 bra $L__BB6_437;
sub.s32 %r3170, %r3170, %r895;
setp.ge.u32 %p493, %r3419, %r3;
mov.u32 %r3167, 1;
@%p493 bra $L__BB6_436;
shr.u32 %r2311, %r3169, %r3170;
cvt.u16.u32 %rs65, %r2311;
and.b16 %rs66, %rs65, 255;
cvt.u64.u32 %rd278, %r3419;
add.s64 %rd279, %rd4, %rd278;
st.global.u8 [%rd279], %rs65;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p494, %rs66, 255;
selp.u32 %r3422, 1, 0, %p494;
mov.u32 %r3167, %r3423;
$L__BB6_436:
mov.u32 %r2312, -1;
shl.b32 %r2313, %r2312, %r3170;
not.b32 %r2314, %r2313;
setp.eq.s32 %p495, %r3170, 0;
selp.b32 %r2315, 0, %r2314, %p495;
and.b32 %r3169, %r3169, %r2315;
mov.u32 %r3423, %r3167;
$L__BB6_437:
and.b32 %r2316, %r818, 1;
bfi.b32 %r3285, %r3169, %r2316, 1, 31;
setp.eq.s32 %p496, %r3422, 0;
selp.b32 %r910, 8, 7, %p496;
add.s32 %r3286, %r3170, 1;
setp.lt.u32 %p497, %r3286, %r910;
@%p497 bra $L__BB6_505;
sub.s32 %r3286, %r3286, %r910;
setp.ge.u32 %p498, %r3419, %r3;
mov.u32 %r3175, 1;
@%p498 bra $L__BB6_440;
shr.u32 %r2318, %r3285, %r3286;
cvt.u16.u32 %rs67, %r2318;
and.b16 %rs68, %rs67, 255;
cvt.u64.u32 %rd280, %r3419;
add.s64 %rd281, %rd4, %rd280;
st.global.u8 [%rd281], %rs67;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p499, %rs68, 255;
selp.u32 %r3422, 1, 0, %p499;
mov.u32 %r3175, %r3423;
$L__BB6_440:
mov.u32 %r2319, -1;
shl.b32 %r2320, %r2319, %r3286;
not.b32 %r2321, %r2320;
setp.eq.s32 %p500, %r3286, 0;
selp.b32 %r2322, 0, %r2321, %p500;
and.b32 %r3285, %r3285, %r2322;
mov.u32 %r3423, %r3175;
$L__BB6_505:
setp.eq.s32 %p580, %r272, 0;
mov.u32 %r3291, 0;
and.pred %p582, %p580, %p417;
selp.b32 %r1143, %r271, %r272, %p582;
add.s32 %r2438, %r274, -1;
mul.wide.u32 %rd306, %r2438, -1431655765;
shr.u64 %rd307, %rd306, 33;
cvt.u32.u64 %r2439, %rd307;
mad.lo.s32 %r3290, %r2439, 3, 1;
setp.lt.u32 %p583, %r3290, 2;
setp.eq.s32 %p584, %r274, 0;
or.pred %p585, %p584, %p583;
@%p585 bra $L__BB6_508;
mov.u32 %r3291, 0;
$L__BB6_507:
shr.u32 %r1147, %r3290, 1;
add.s32 %r3291, %r3291, 1;
setp.gt.u32 %p586, %r3290, 3;
mov.u32 %r3290, %r1147;
@%p586 bra $L__BB6_507;
$L__BB6_508:
add.s32 %r3298, %r3291, %r3297;
bra.uni $L__BB6_509;
$L__BB6_589:
add.s32 %r3297, %r3297, 1;
add.s32 %r3298, %r3298, 1;
$L__BB6_509:
mov.u32 %r2441, 1;
shl.b32 %r2442, %r2441, %r3298;
setp.le.u32 %p588, %r2442, %r1143;
setp.lt.u32 %p589, %r3298, 32;
and.pred %p590, %p589, %p588;
@%p590 bra $L__BB6_585;
setp.lt.u32 %p591, %r274, 2;
@%p591 bra $L__BB6_512;
setp.gt.u32 %p730, %r274, 2;
selp.u32 %r2747, 1, 0, %p730;
add.s32 %r2443, %r3297, %r2747;
setp.lt.u32 %p592, %r2443, 32;
mov.u32 %r2444, 1;
shl.b32 %r2445, %r2444, %r2443;
setp.le.u32 %p593, %r2445, %r273;
and.pred %p594, %p592, %p593;
@%p594 bra $L__BB6_585;
bra.uni $L__BB6_512;
$L__BB6_585:
shl.b32 %r2572, %r3285, 1;
or.b32 %r3285, %r2572, 1;
setp.eq.s32 %p683, %r3422, 0;
selp.b32 %r1425, 8, 7, %p683;
add.s32 %r3286, %r3286, 1;
setp.lt.u32 %p684, %r3286, %r1425;
@%p684 bra $L__BB6_589;
sub.s32 %r3286, %r3286, %r1425;
setp.ge.u32 %p685, %r3419, %r3;
mov.u32 %r3450, 1;
@%p685 bra $L__BB6_588;
cvt.u64.u32 %rd393, %r3419;
add.s64 %rd392, %rd4, %rd393;
shr.u32 %r2574, %r3285, %r3286;
cvt.u16.u32 %rs131, %r2574;
and.b16 %rs132, %rs131, 255;
st.global.u8 [%rd392], %rs131;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p686, %rs132, 255;
selp.u32 %r3422, 1, 0, %p686;
mov.u32 %r3450, %r3423;
$L__BB6_588:
mov.u32 %r2575, -1;
shl.b32 %r2576, %r2575, %r3286;
not.b32 %r2577, %r2576;
setp.eq.s32 %p687, %r3286, 0;
selp.b32 %r2578, 0, %r2577, %p687;
and.b32 %r3285, %r2578, %r3285;
mov.u32 %r3423, %r3450;
bra.uni $L__BB6_589;
$L__BB6_512:
shl.b32 %r3457, %r3285, 1;
setp.eq.s32 %p595, %r3422, 0;
selp.b32 %r1160, 8, 7, %p595;
add.s32 %r3458, %r3286, 1;
setp.lt.u32 %p596, %r3458, %r1160;
@%p596 bra $L__BB6_516;
sub.s32 %r3458, %r3458, %r1160;
setp.ge.u32 %p597, %r3419, %r3;
mov.u32 %r3301, 1;
@%p597 bra $L__BB6_515;
cvt.u64.u32 %rd391, %r3419;
add.s64 %rd390, %rd4, %rd391;
shr.u32 %r2447, %r3457, %r3458;
cvt.u16.u32 %rs101, %r2447;
and.b16 %rs102, %rs101, 255;
st.global.u8 [%rd390], %rs101;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p598, %rs102, 255;
selp.u32 %r3422, 1, 0, %p598;
mov.u32 %r3301, %r3423;
$L__BB6_515:
mov.u32 %r2448, -1;
shl.b32 %r2449, %r2448, %r3458;
xor.b32 %r2450, %r2449, -2;
setp.eq.s32 %p599, %r3458, 0;
selp.b32 %r2451, 0, %r2450, %p599;
and.b32 %r3457, %r2451, %r3457;
mov.u32 %r3423, %r3301;
$L__BB6_516:
setp.eq.s32 %p600, %r3298, 0;
@%p600 bra $L__BB6_550;
add.s32 %r1174, %r3298, -1;
and.b32 %r1175, %r3298, 3;
setp.eq.s32 %p601, %r1175, 0;
mov.u32 %r3331, %r3298;
@%p601 bra $L__BB6_532;
shr.u32 %r2453, %r1143, %r1174;
and.b32 %r2454, %r2453, 1;
bfi.b32 %r3457, %r3457, %r2454, 1, 31;
setp.eq.s32 %p602, %r3422, 0;
selp.b32 %r1177, 8, 7, %p602;
add.s32 %r3458, %r3458, 1;
setp.lt.u32 %p603, %r3458, %r1177;
@%p603 bra $L__BB6_522;
sub.s32 %r3458, %r3458, %r1177;
setp.ge.u32 %p604, %r3419, %r3;
mov.u32 %r3309, 1;
@%p604 bra $L__BB6_521;
shr.u32 %r2456, %r3457, %r3458;
cvt.u16.u32 %rs103, %r2456;
and.b16 %rs104, %rs103, 255;
cvt.u64.u32 %rd309, %r3419;
add.s64 %rd310, %rd4, %rd309;
st.global.u8 [%rd310], %rs103;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p605, %rs104, 255;
selp.u32 %r3422, 1, 0, %p605;
mov.u32 %r3309, %r3423;
$L__BB6_521:
mov.u32 %r2457, -1;
shl.b32 %r2458, %r2457, %r3458;
not.b32 %r2459, %r2458;
setp.eq.s32 %p606, %r3458, 0;
selp.b32 %r2460, 0, %r2459, %p606;
and.b32 %r3457, %r3457, %r2460;
mov.u32 %r3423, %r3309;
$L__BB6_522:
setp.eq.s32 %p607, %r1175, 1;
mov.u32 %r3331, %r1174;
@%p607 bra $L__BB6_532;
add.s32 %r3331, %r3298, -2;
shr.u32 %r2461, %r1143, %r3331;
and.b32 %r2462, %r2461, 1;
bfi.b32 %r3457, %r3457, %r2462, 1, 31;
setp.eq.s32 %p608, %r3422, 0;
selp.b32 %r1193, 8, 7, %p608;
add.s32 %r3458, %r3458, 1;
setp.lt.u32 %p609, %r3458, %r1193;
@%p609 bra $L__BB6_527;
sub.s32 %r3458, %r3458, %r1193;
setp.ge.u32 %p610, %r3419, %r3;
mov.u32 %r3317, 1;
@%p610 bra $L__BB6_526;
shr.u32 %r2464, %r3457, %r3458;
cvt.u16.u32 %rs105, %r2464;
and.b16 %rs106, %rs105, 255;
cvt.u64.u32 %rd311, %r3419;
add.s64 %rd312, %rd4, %rd311;
st.global.u8 [%rd312], %rs105;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p611, %rs106, 255;
selp.u32 %r3422, 1, 0, %p611;
mov.u32 %r3317, %r3423;
$L__BB6_526:
mov.u32 %r2465, -1;
shl.b32 %r2466, %r2465, %r3458;
not.b32 %r2467, %r2466;
setp.eq.s32 %p612, %r3458, 0;
selp.b32 %r2468, 0, %r2467, %p612;
and.b32 %r3457, %r3457, %r2468;
mov.u32 %r3423, %r3317;
$L__BB6_527:
setp.eq.s32 %p613, %r1175, 2;
@%p613 bra $L__BB6_532;
add.s32 %r3331, %r3298, -3;
shr.u32 %r2469, %r1143, %r3331;
and.b32 %r2470, %r2469, 1;
bfi.b32 %r3457, %r3457, %r2470, 1, 31;
setp.eq.s32 %p614, %r3422, 0;
selp.b32 %r1209, 8, 7, %p614;
add.s32 %r3458, %r3458, 1;
setp.lt.u32 %p615, %r3458, %r1209;
@%p615 bra $L__BB6_532;
sub.s32 %r3458, %r3458, %r1209;
setp.ge.u32 %p616, %r3419, %r3;
mov.u32 %r3325, 1;
@%p616 bra $L__BB6_531;
shr.u32 %r2472, %r3457, %r3458;
cvt.u16.u32 %rs107, %r2472;
and.b16 %rs108, %rs107, 255;
cvt.u64.u32 %rd313, %r3419;
add.s64 %rd314, %rd4, %rd313;
st.global.u8 [%rd314], %rs107;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p617, %rs108, 255;
selp.u32 %r3422, 1, 0, %p617;
mov.u32 %r3325, %r3423;
$L__BB6_531:
mov.u32 %r2473, -1;
shl.b32 %r2474, %r2473, %r3458;
not.b32 %r2475, %r2474;
setp.eq.s32 %p618, %r3458, 0;
selp.b32 %r2476, 0, %r2475, %p618;
and.b32 %r3457, %r3457, %r2476;
mov.u32 %r3423, %r3325;
$L__BB6_532:
setp.lt.u32 %p619, %r1174, 3;
@%p619 bra $L__BB6_550;
$L__BB6_533:
add.s32 %r2477, %r3331, -1;
shr.u32 %r2478, %r1143, %r2477;
and.b32 %r2479, %r2478, 1;
bfi.b32 %r3347, %r3457, %r2479, 1, 31;
setp.eq.s32 %p620, %r3422, 0;
selp.b32 %r1236, 8, 7, %p620;
add.s32 %r3348, %r3458, 1;
setp.lt.u32 %p621, %r3348, %r1236;
@%p621 bra $L__BB6_537;
sub.s32 %r3348, %r3348, %r1236;
setp.ge.u32 %p622, %r3419, %r3;
mov.u32 %r3345, 1;
@%p622 bra $L__BB6_536;
shr.u32 %r2481, %r3347, %r3348;
cvt.u16.u32 %rs109, %r2481;
and.b16 %rs110, %rs109, 255;
cvt.u64.u32 %rd315, %r3419;
add.s64 %rd316, %rd4, %rd315;
st.global.u8 [%rd316], %rs109;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p623, %rs110, 255;
selp.u32 %r3422, 1, 0, %p623;
mov.u32 %r3345, %r3423;
$L__BB6_536:
mov.u32 %r2482, -1;
shl.b32 %r2483, %r2482, %r3348;
not.b32 %r2484, %r2483;
setp.eq.s32 %p624, %r3348, 0;
selp.b32 %r2485, 0, %r2484, %p624;
and.b32 %r3347, %r3347, %r2485;
mov.u32 %r3423, %r3345;
$L__BB6_537:
add.s32 %r2486, %r3331, -2;
shr.u32 %r2487, %r1143, %r2486;
and.b32 %r2488, %r2487, 1;
bfi.b32 %r3355, %r3347, %r2488, 1, 31;
setp.eq.s32 %p625, %r3422, 0;
selp.b32 %r1251, 8, 7, %p625;
add.s32 %r3356, %r3348, 1;
setp.lt.u32 %p626, %r3356, %r1251;
@%p626 bra $L__BB6_541;
sub.s32 %r3356, %r3356, %r1251;
setp.ge.u32 %p627, %r3419, %r3;
mov.u32 %r3353, 1;
@%p627 bra $L__BB6_540;
shr.u32 %r2490, %r3355, %r3356;
cvt.u16.u32 %rs111, %r2490;
and.b16 %rs112, %rs111, 255;
cvt.u64.u32 %rd317, %r3419;
add.s64 %rd318, %rd4, %rd317;
st.global.u8 [%rd318], %rs111;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p628, %rs112, 255;
selp.u32 %r3422, 1, 0, %p628;
mov.u32 %r3353, %r3423;
$L__BB6_540:
mov.u32 %r2491, -1;
shl.b32 %r2492, %r2491, %r3356;
not.b32 %r2493, %r2492;
setp.eq.s32 %p629, %r3356, 0;
selp.b32 %r2494, 0, %r2493, %p629;
and.b32 %r3355, %r3355, %r2494;
mov.u32 %r3423, %r3353;
$L__BB6_541:
add.s32 %r2495, %r3331, -3;
shr.u32 %r2496, %r1143, %r2495;
and.b32 %r2497, %r2496, 1;
bfi.b32 %r3363, %r3355, %r2497, 1, 31;
setp.eq.s32 %p630, %r3422, 0;
selp.b32 %r1266, 8, 7, %p630;
add.s32 %r3364, %r3356, 1;
setp.lt.u32 %p631, %r3364, %r1266;
@%p631 bra $L__BB6_545;
sub.s32 %r3364, %r3364, %r1266;
setp.ge.u32 %p632, %r3419, %r3;
mov.u32 %r3361, 1;
@%p632 bra $L__BB6_544;
shr.u32 %r2499, %r3363, %r3364;
cvt.u16.u32 %rs113, %r2499;
and.b16 %rs114, %rs113, 255;
cvt.u64.u32 %rd319, %r3419;
add.s64 %rd320, %rd4, %rd319;
st.global.u8 [%rd320], %rs113;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p633, %rs114, 255;
selp.u32 %r3422, 1, 0, %p633;
mov.u32 %r3361, %r3423;
$L__BB6_544:
mov.u32 %r2500, -1;
shl.b32 %r2501, %r2500, %r3364;
not.b32 %r2502, %r2501;
setp.eq.s32 %p634, %r3364, 0;
selp.b32 %r2503, 0, %r2502, %p634;
and.b32 %r3363, %r3363, %r2503;
mov.u32 %r3423, %r3361;
$L__BB6_545:
add.s32 %r3331, %r3331, -4;
shr.u32 %r2504, %r1143, %r3331;
and.b32 %r2505, %r2504, 1;
bfi.b32 %r3457, %r3363, %r2505, 1, 31;
setp.eq.s32 %p635, %r3422, 0;
selp.b32 %r1282, 8, 7, %p635;
add.s32 %r3458, %r3364, 1;
setp.lt.u32 %p636, %r3458, %r1282;
@%p636 bra $L__BB6_549;
sub.s32 %r3458, %r3458, %r1282;
setp.ge.u32 %p637, %r3419, %r3;
mov.u32 %r3369, 1;
@%p637 bra $L__BB6_548;
shr.u32 %r2507, %r3457, %r3458;
cvt.u16.u32 %rs115, %r2507;
and.b16 %rs116, %rs115, 255;
cvt.u64.u32 %rd321, %r3419;
add.s64 %rd322, %rd4, %rd321;
st.global.u8 [%rd322], %rs115;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p638, %rs116, 255;
selp.u32 %r3422, 1, 0, %p638;
mov.u32 %r3369, %r3423;
$L__BB6_548:
mov.u32 %r2508, -1;
shl.b32 %r2509, %r2508, %r3458;
not.b32 %r2510, %r2509;
setp.eq.s32 %p639, %r3458, 0;
selp.b32 %r2511, 0, %r2510, %p639;
and.b32 %r3457, %r3457, %r2511;
mov.u32 %r3423, %r3369;
$L__BB6_549:
setp.ne.s32 %p640, %r3331, 0;
@%p640 bra $L__BB6_533;
$L__BB6_550:
setp.lt.u32 %p728, %r274, 2;
@%p728 bra $L__BB6_240;
setp.gt.u32 %p729, %r274, 2;
selp.u32 %r2746, 1, 0, %p729;
add.s32 %r1301, %r3297, %r2746;
setp.eq.s32 %p642, %r1301, 0;
@%p642 bra $L__BB6_240;
add.s32 %r1302, %r1301, -1;
and.b32 %r1303, %r1301, 3;
setp.eq.s32 %p643, %r1303, 0;
mov.u32 %r3404, %r1301;
@%p643 bra $L__BB6_567;
shr.u32 %r2513, %r273, %r1302;
and.b32 %r2514, %r2513, 1;
bfi.b32 %r3457, %r3457, %r2514, 1, 31;
setp.eq.s32 %p644, %r3422, 0;
selp.b32 %r1305, 8, 7, %p644;
add.s32 %r3458, %r3458, 1;
setp.lt.u32 %p645, %r3458, %r1305;
@%p645 bra $L__BB6_557;
sub.s32 %r3458, %r3458, %r1305;
setp.ge.u32 %p646, %r3419, %r3;
mov.u32 %r3382, 1;
@%p646 bra $L__BB6_556;
shr.u32 %r2516, %r3457, %r3458;
cvt.u16.u32 %rs117, %r2516;
and.b16 %rs118, %rs117, 255;
cvt.u64.u32 %rd323, %r3419;
add.s64 %rd324, %rd4, %rd323;
st.global.u8 [%rd324], %rs117;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p647, %rs118, 255;
selp.u32 %r3422, 1, 0, %p647;
mov.u32 %r3382, %r3423;
$L__BB6_556:
mov.u32 %r2517, -1;
shl.b32 %r2518, %r2517, %r3458;
not.b32 %r2519, %r2518;
setp.eq.s32 %p648, %r3458, 0;
selp.b32 %r2520, 0, %r2519, %p648;
and.b32 %r3457, %r3457, %r2520;
mov.u32 %r3423, %r3382;
$L__BB6_557:
setp.eq.s32 %p649, %r1303, 1;
mov.u32 %r3404, %r1302;
@%p649 bra $L__BB6_567;
add.s32 %r3404, %r1301, -2;
shr.u32 %r2521, %r273, %r3404;
and.b32 %r2522, %r2521, 1;
bfi.b32 %r3457, %r3457, %r2522, 1, 31;
setp.eq.s32 %p650, %r3422, 0;
selp.b32 %r1321, 8, 7, %p650;
add.s32 %r3458, %r3458, 1;
setp.lt.u32 %p651, %r3458, %r1321;
@%p651 bra $L__BB6_562;
sub.s32 %r3458, %r3458, %r1321;
setp.ge.u32 %p652, %r3419, %r3;
mov.u32 %r3390, 1;
@%p652 bra $L__BB6_561;
shr.u32 %r2524, %r3457, %r3458;
cvt.u16.u32 %rs119, %r2524;
and.b16 %rs120, %rs119, 255;
cvt.u64.u32 %rd325, %r3419;
add.s64 %rd326, %rd4, %rd325;
st.global.u8 [%rd326], %rs119;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p653, %rs120, 255;
selp.u32 %r3422, 1, 0, %p653;
mov.u32 %r3390, %r3423;
$L__BB6_561:
mov.u32 %r2525, -1;
shl.b32 %r2526, %r2525, %r3458;
not.b32 %r2527, %r2526;
setp.eq.s32 %p654, %r3458, 0;
selp.b32 %r2528, 0, %r2527, %p654;
and.b32 %r3457, %r3457, %r2528;
mov.u32 %r3423, %r3390;
$L__BB6_562:
setp.eq.s32 %p655, %r1303, 2;
@%p655 bra $L__BB6_567;
add.s32 %r3404, %r1301, -3;
shr.u32 %r2529, %r273, %r3404;
and.b32 %r2530, %r2529, 1;
bfi.b32 %r3457, %r3457, %r2530, 1, 31;
setp.eq.s32 %p656, %r3422, 0;
selp.b32 %r1337, 8, 7, %p656;
add.s32 %r3458, %r3458, 1;
setp.lt.u32 %p657, %r3458, %r1337;
@%p657 bra $L__BB6_567;
sub.s32 %r3458, %r3458, %r1337;
setp.ge.u32 %p658, %r3419, %r3;
mov.u32 %r3398, 1;
@%p658 bra $L__BB6_566;
shr.u32 %r2532, %r3457, %r3458;
cvt.u16.u32 %rs121, %r2532;
and.b16 %rs122, %rs121, 255;
cvt.u64.u32 %rd327, %r3419;
add.s64 %rd328, %rd4, %rd327;
st.global.u8 [%rd328], %rs121;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p659, %rs122, 255;
selp.u32 %r3422, 1, 0, %p659;
mov.u32 %r3398, %r3423;
$L__BB6_566:
mov.u32 %r2533, -1;
shl.b32 %r2534, %r2533, %r3458;
not.b32 %r2535, %r2534;
setp.eq.s32 %p660, %r3458, 0;
selp.b32 %r2536, 0, %r2535, %p660;
and.b32 %r3457, %r3457, %r2536;
mov.u32 %r3423, %r3398;
$L__BB6_567:
setp.lt.u32 %p661, %r1302, 3;
@%p661 bra $L__BB6_240;
$L__BB6_568:
add.s32 %r2537, %r3404, -1;
shr.u32 %r2538, %r273, %r2537;
and.b32 %r2539, %r2538, 1;
bfi.b32 %r3420, %r3457, %r2539, 1, 31;
setp.eq.s32 %p662, %r3422, 0;
selp.b32 %r1364, 8, 7, %p662;
add.s32 %r3421, %r3458, 1;
setp.lt.u32 %p663, %r3421, %r1364;
@%p663 bra $L__BB6_572;
sub.s32 %r3421, %r3421, %r1364;
setp.ge.u32 %p664, %r3419, %r3;
mov.u32 %r3418, 1;
@%p664 bra $L__BB6_571;
shr.u32 %r2541, %r3420, %r3421;
cvt.u16.u32 %rs123, %r2541;
and.b16 %rs124, %rs123, 255;
cvt.u64.u32 %rd329, %r3419;
add.s64 %rd330, %rd4, %rd329;
st.global.u8 [%rd330], %rs123;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p665, %rs124, 255;
selp.u32 %r3422, 1, 0, %p665;
mov.u32 %r3418, %r3423;
$L__BB6_571:
mov.u32 %r2542, -1;
shl.b32 %r2543, %r2542, %r3421;
not.b32 %r2544, %r2543;
setp.eq.s32 %p666, %r3421, 0;
selp.b32 %r2545, 0, %r2544, %p666;
and.b32 %r3420, %r3420, %r2545;
mov.u32 %r3423, %r3418;
$L__BB6_572:
add.s32 %r2546, %r3404, -2;
shr.u32 %r2547, %r273, %r2546;
and.b32 %r2548, %r2547, 1;
bfi.b32 %r3428, %r3420, %r2548, 1, 31;
setp.eq.s32 %p667, %r3422, 0;
selp.b32 %r1379, 8, 7, %p667;
add.s32 %r3429, %r3421, 1;
setp.lt.u32 %p668, %r3429, %r1379;
@%p668 bra $L__BB6_576;
sub.s32 %r3429, %r3429, %r1379;
setp.ge.u32 %p669, %r3419, %r3;
mov.u32 %r3426, 1;
@%p669 bra $L__BB6_575;
shr.u32 %r2550, %r3428, %r3429;
cvt.u16.u32 %rs125, %r2550;
and.b16 %rs126, %rs125, 255;
cvt.u64.u32 %rd331, %r3419;
add.s64 %rd332, %rd4, %rd331;
st.global.u8 [%rd332], %rs125;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p670, %rs126, 255;
selp.u32 %r3422, 1, 0, %p670;
mov.u32 %r3426, %r3423;
$L__BB6_575:
mov.u32 %r2551, -1;
shl.b32 %r2552, %r2551, %r3429;
not.b32 %r2553, %r2552;
setp.eq.s32 %p671, %r3429, 0;
selp.b32 %r2554, 0, %r2553, %p671;
and.b32 %r3428, %r3428, %r2554;
mov.u32 %r3423, %r3426;
$L__BB6_576:
add.s32 %r2555, %r3404, -3;
shr.u32 %r2556, %r273, %r2555;
and.b32 %r2557, %r2556, 1;
bfi.b32 %r3436, %r3428, %r2557, 1, 31;
setp.eq.s32 %p672, %r3422, 0;
selp.b32 %r1394, 8, 7, %p672;
add.s32 %r3437, %r3429, 1;
setp.lt.u32 %p673, %r3437, %r1394;
@%p673 bra $L__BB6_580;
sub.s32 %r3437, %r3437, %r1394;
setp.ge.u32 %p674, %r3419, %r3;
mov.u32 %r3434, 1;
@%p674 bra $L__BB6_579;
shr.u32 %r2559, %r3436, %r3437;
cvt.u16.u32 %rs127, %r2559;
and.b16 %rs128, %rs127, 255;
cvt.u64.u32 %rd333, %r3419;
add.s64 %rd334, %rd4, %rd333;
st.global.u8 [%rd334], %rs127;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p675, %rs128, 255;
selp.u32 %r3422, 1, 0, %p675;
mov.u32 %r3434, %r3423;
$L__BB6_579:
mov.u32 %r2560, -1;
shl.b32 %r2561, %r2560, %r3437;
not.b32 %r2562, %r2561;
setp.eq.s32 %p676, %r3437, 0;
selp.b32 %r2563, 0, %r2562, %p676;
and.b32 %r3436, %r3436, %r2563;
mov.u32 %r3423, %r3434;
$L__BB6_580:
add.s32 %r3404, %r3404, -4;
shr.u32 %r2564, %r273, %r3404;
and.b32 %r2565, %r2564, 1;
bfi.b32 %r3457, %r3436, %r2565, 1, 31;
setp.eq.s32 %p677, %r3422, 0;
selp.b32 %r1410, 8, 7, %p677;
add.s32 %r3458, %r3437, 1;
setp.lt.u32 %p678, %r3458, %r1410;
@%p678 bra $L__BB6_584;
sub.s32 %r3458, %r3458, %r1410;
setp.ge.u32 %p679, %r3419, %r3;
mov.u32 %r3442, 1;
@%p679 bra $L__BB6_583;
shr.u32 %r2567, %r3457, %r3458;
cvt.u16.u32 %rs129, %r2567;
and.b16 %rs130, %rs129, 255;
cvt.u64.u32 %rd335, %r3419;
add.s64 %rd336, %rd4, %rd335;
st.global.u8 [%rd336], %rs129;
add.s32 %r3419, %r3419, 1;
setp.eq.s16 %p680, %rs130, 255;
selp.u32 %r3422, 1, 0, %p680;
mov.u32 %r3442, %r3423;
$L__BB6_583:
mov.u32 %r2568, -1;
shl.b32 %r2569, %r2568, %r3458;
not.b32 %r2570, %r2569;
setp.eq.s32 %p681, %r3458, 0;
selp.b32 %r2571, 0, %r2570, %p681;
and.b32 %r3457, %r3457, %r2571;
mov.u32 %r3423, %r3442;
$L__BB6_584:
setp.eq.s32 %p682, %r3404, 0;
@%p682 bra $L__BB6_240;
bra.uni $L__BB6_568;
$L__BB6_240:
add.s32 %r2804, %r2804, 1;
setp.lt.u32 %p688, %r2804, %r24;
@%p688 bra $L__BB6_234;
$L__BB6_241:
add.s32 %r2754, %r2754, 1;
setp.lt.u32 %p689, %r2754, %r2;
@%p689 bra $L__BB6_19;
bra.uni $L__BB6_242;
$L__BB6_85:
st.local.u32 [%rd13+192], %r2760;
st.local.u32 [%rd13+196], %r2761;
st.local.u32 [%rd13+200], %r1581;
setp.eq.s32 %p131, %r2761, 0;
@%p131 bra $L__BB6_93;
add.s32 %r1677, %r2761, -1;
and.b32 %r91, %r2761, 3;
setp.lt.u32 %p132, %r1677, 3;
mov.u32 %r2764, 0;
@%p132 bra $L__BB6_89;
sub.s32 %r2763, %r2761, %r91;
mov.u32 %r2764, 0;
$L__BB6_88:
mov.u32 %r2724, 0;
mul.wide.u32 %rd83, %r2764, 4;
add.s64 %rd84, %rd1, %rd83;
st.local.u32 [%rd84], %r2724;
st.local.u32 [%rd84+8192], %r2724;
st.local.u32 [%rd84+16384], %r2724;
st.local.u32 [%rd84+4], %r2724;
st.local.u32 [%rd84+8196], %r2724;
st.local.u32 [%rd84+16388], %r2724;
st.local.u32 [%rd84+8], %r2724;
st.local.u32 [%rd84+8200], %r2724;
st.local.u32 [%rd84+16392], %r2724;
st.local.u32 [%rd84+12], %r2724;
st.local.u32 [%rd84+8204], %r2724;
st.local.u32 [%rd84+16396], %r2724;
add.s32 %r2764, %r2764, 4;
add.s32 %r2763, %r2763, -4;
setp.ne.s32 %p133, %r2763, 0;
@%p133 bra $L__BB6_88;
$L__BB6_89:
setp.eq.s32 %p134, %r91, 0;
@%p134 bra $L__BB6_93;
mul.wide.u32 %rd85, %r2764, 4;
add.s64 %rd15, %rd1, %rd85;
mov.u32 %r1680, 0;
st.local.u32 [%rd15], %r1680;
st.local.u32 [%rd15+8192], %r1680;
st.local.u32 [%rd15+16384], %r1680;
setp.eq.s32 %p135, %r91, 1;
@%p135 bra $L__BB6_93;
st.local.u32 [%rd15+4], %r1680;
st.local.u32 [%rd15+8196], %r1680;
st.local.u32 [%rd15+16388], %r1680;
setp.eq.s32 %p136, %r91, 2;
@%p136 bra $L__BB6_93;
mov.u32 %r1682, 0;
st.local.u32 [%rd15+8], %r1682;
st.local.u32 [%rd15+8200], %r1682;
st.local.u32 [%rd15+16392], %r1682;
$L__BB6_93:
or.b32 %r2723, %r25, %r26;
and.b32 %r2722, %r2723, -2;
setp.eq.s32 %p727, %r2722, 0;
mov.u32 %r1684, 0;
st.local.u32 [%rd14], %r25;
st.local.u32 [%rd14+64], %r26;
st.local.u32 [%rd14+128], %r1684;
mov.u32 %r2765, 1;
@%p727 bra $L__BB6_157;
add.s32 %r1685, %r25, 1;
shr.u32 %r98, %r1685, 1;
add.s32 %r1686, %r26, 1;
shr.u32 %r99, %r1686, 1;
mul.lo.s32 %r100, %r98, %r99;
setp.eq.s32 %p138, %r98, 0;
@%p138 bra $L__BB6_96;
div.u32 %r1687, %r100, %r98;
setp.ne.s32 %p139, %r1687, %r99;
@%p139 bra $L__BB6_152;
$L__BB6_96:
add.s32 %r101, %r2766, %r100;
setp.lt.u32 %p140, %r101, %r2766;
setp.gt.u32 %p141, %r101, 2048;
or.pred %p142, %p140, %p141;
@%p142 bra $L__BB6_156;
bra.uni $L__BB6_97;
$L__BB6_156:
mov.u32 %r1777, 1;
st.local.u32 [%rd14+200], %r1777;
bra.uni $L__BB6_230;
$L__BB6_97:
st.local.u32 [%rd14+4], %r98;
st.local.u32 [%rd14+68], %r99;
st.local.u32 [%rd14+132], %r2766;
or.b32 %r1689, %r98, %r99;
and.b32 %r1690, %r1689, 2147483646;
setp.eq.s32 %p143, %r1690, 0;
mov.u32 %r2765, 2;
mov.u32 %r2766, %r101;
@%p143 bra $L__BB6_157;
add.s32 %r1691, %r98, 1;
shr.u32 %r102, %r1691, 1;
add.s32 %r1692, %r99, 1;
shr.u32 %r103, %r1692, 1;
mul.lo.s32 %r104, %r102, %r103;
setp.eq.s32 %p144, %r102, 0;
@%p144 bra $L__BB6_100;
div.u32 %r1693, %r104, %r102;
setp.ne.s32 %p145, %r1693, %r103;
@%p145 bra $L__BB6_152;
$L__BB6_100:
add.s32 %r2766, %r101, %r104;
setp.lt.u32 %p146, %r2766, %r101;
setp.gt.u32 %p147, %r2766, 2048;
or.pred %p148, %p146, %p147;
@%p148 bra $L__BB6_156;
st.local.u32 [%rd14+8], %r102;
st.local.u32 [%rd14+72], %r103;
st.local.u32 [%rd14+136], %r101;
or.b32 %r1695, %r102, %r103;
and.b32 %r1696, %r1695, 2147483646;
setp.eq.s32 %p149, %r1696, 0;
mov.u32 %r2765, 3;
@%p149 bra $L__BB6_157;
add.s32 %r1697, %r102, 1;
shr.u32 %r106, %r1697, 1;
add.s32 %r1698, %r103, 1;
shr.u32 %r107, %r1698, 1;
mul.lo.s32 %r108, %r106, %r107;
setp.eq.s32 %p150, %r106, 0;
@%p150 bra $L__BB6_104;
div.u32 %r1699, %r108, %r106;
setp.ne.s32 %p151, %r1699, %r107;
@%p151 bra $L__BB6_152;
$L__BB6_104:
add.s32 %r109, %r2766, %r108;
setp.lt.u32 %p152, %r109, %r2766;
setp.gt.u32 %p153, %r109, 2048;
or.pred %p154, %p152, %p153;
@%p154 bra $L__BB6_156;
st.local.u32 [%rd14+12], %r106;
st.local.u32 [%rd14+76], %r107;
st.local.u32 [%rd14+140], %r2766;
or.b32 %r1701, %r106, %r107;
and.b32 %r1702, %r1701, 2147483646;
setp.eq.s32 %p155, %r1702, 0;
mov.u32 %r2765, 4;
mov.u32 %r2766, %r109;
@%p155 bra $L__BB6_157;
add.s32 %r1703, %r106, 1;
shr.u32 %r110, %r1703, 1;
add.s32 %r1704, %r107, 1;
shr.u32 %r111, %r1704, 1;
mul.lo.s32 %r112, %r110, %r111;
setp.eq.s32 %p156, %r110, 0;
@%p156 bra $L__BB6_108;
div.u32 %r1705, %r112, %r110;
setp.ne.s32 %p157, %r1705, %r111;
@%p157 bra $L__BB6_152;
$L__BB6_108:
add.s32 %r2766, %r109, %r112;
setp.lt.u32 %p158, %r2766, %r109;
setp.gt.u32 %p159, %r2766, 2048;
or.pred %p160, %p158, %p159;
@%p160 bra $L__BB6_156;
st.local.u32 [%rd14+16], %r110;
st.local.u32 [%rd14+80], %r111;
st.local.u32 [%rd14+144], %r109;
or.b32 %r1707, %r110, %r111;
and.b32 %r1708, %r1707, 2147483646;
setp.eq.s32 %p161, %r1708, 0;
mov.u32 %r2765, 5;
@%p161 bra $L__BB6_157;
add.s32 %r1709, %r110, 1;
shr.u32 %r114, %r1709, 1;
add.s32 %r1710, %r111, 1;
shr.u32 %r115, %r1710, 1;
mul.lo.s32 %r116, %r114, %r115;
setp.eq.s32 %p162, %r114, 0;
@%p162 bra $L__BB6_112;
div.u32 %r1711, %r116, %r114;
setp.ne.s32 %p163, %r1711, %r115;
@%p163 bra $L__BB6_152;
$L__BB6_112:
add.s32 %r117, %r2766, %r116;
setp.lt.u32 %p164, %r117, %r2766;
setp.gt.u32 %p165, %r117, 2048;
or.pred %p166, %p164, %p165;
@%p166 bra $L__BB6_156;
st.local.u32 [%rd14+20], %r114;
st.local.u32 [%rd14+84], %r115;
st.local.u32 [%rd14+148], %r2766;
or.b32 %r1713, %r114, %r115;
and.b32 %r1714, %r1713, 2147483646;
setp.eq.s32 %p167, %r1714, 0;
mov.u32 %r2765, 6;
mov.u32 %r2766, %r117;
@%p167 bra $L__BB6_157;
add.s32 %r1715, %r114, 1;
shr.u32 %r118, %r1715, 1;
add.s32 %r1716, %r115, 1;
shr.u32 %r119, %r1716, 1;
mul.lo.s32 %r120, %r118, %r119;
setp.eq.s32 %p168, %r118, 0;
@%p168 bra $L__BB6_116;
div.u32 %r1717, %r120, %r118;
setp.ne.s32 %p169, %r1717, %r119;
@%p169 bra $L__BB6_152;
$L__BB6_116:
add.s32 %r2766, %r117, %r120;
setp.lt.u32 %p170, %r2766, %r117;
setp.gt.u32 %p171, %r2766, 2048;
or.pred %p172, %p170, %p171;
@%p172 bra $L__BB6_156;
st.local.u32 [%rd14+24], %r118;
st.local.u32 [%rd14+88], %r119;
st.local.u32 [%rd14+152], %r117;
or.b32 %r1719, %r118, %r119;
and.b32 %r1720, %r1719, 2147483646;
setp.eq.s32 %p173, %r1720, 0;
mov.u32 %r2765, 7;
@%p173 bra $L__BB6_157;
add.s32 %r1721, %r118, 1;
shr.u32 %r122, %r1721, 1;
add.s32 %r1722, %r119, 1;
shr.u32 %r123, %r1722, 1;
mul.lo.s32 %r124, %r122, %r123;
setp.eq.s32 %p174, %r122, 0;
@%p174 bra $L__BB6_120;
div.u32 %r1723, %r124, %r122;
setp.ne.s32 %p175, %r1723, %r123;
@%p175 bra $L__BB6_152;
$L__BB6_120:
add.s32 %r125, %r2766, %r124;
setp.lt.u32 %p176, %r125, %r2766;
setp.gt.u32 %p177, %r125, 2048;
or.pred %p178, %p176, %p177;
@%p178 bra $L__BB6_156;
st.local.u32 [%rd14+28], %r122;
st.local.u32 [%rd14+92], %r123;
st.local.u32 [%rd14+156], %r2766;
or.b32 %r1725, %r122, %r123;
and.b32 %r1726, %r1725, 2147483646;
setp.eq.s32 %p179, %r1726, 0;
mov.u32 %r2765, 8;
mov.u32 %r2766, %r125;
@%p179 bra $L__BB6_157;
add.s32 %r1727, %r122, 1;
shr.u32 %r126, %r1727, 1;
add.s32 %r1728, %r123, 1;
shr.u32 %r127, %r1728, 1;
mul.lo.s32 %r128, %r126, %r127;
setp.eq.s32 %p180, %r126, 0;
@%p180 bra $L__BB6_124;
div.u32 %r1729, %r128, %r126;
setp.ne.s32 %p181, %r1729, %r127;
@%p181 bra $L__BB6_152;
$L__BB6_124:
add.s32 %r2766, %r125, %r128;
setp.lt.u32 %p182, %r2766, %r125;
setp.gt.u32 %p183, %r2766, 2048;
or.pred %p184, %p182, %p183;
@%p184 bra $L__BB6_156;
st.local.u32 [%rd14+32], %r126;
st.local.u32 [%rd14+96], %r127;
st.local.u32 [%rd14+160], %r125;
or.b32 %r1731, %r126, %r127;
and.b32 %r1732, %r1731, 2147483646;
setp.eq.s32 %p185, %r1732, 0;
mov.u32 %r2765, 9;
@%p185 bra $L__BB6_157;
add.s32 %r1733, %r126, 1;
shr.u32 %r130, %r1733, 1;
add.s32 %r1734, %r127, 1;
shr.u32 %r131, %r1734, 1;
mul.lo.s32 %r132, %r130, %r131;
setp.eq.s32 %p186, %r130, 0;
@%p186 bra $L__BB6_128;
div.u32 %r1735, %r132, %r130;
setp.ne.s32 %p187, %r1735, %r131;
@%p187 bra $L__BB6_152;
$L__BB6_128:
add.s32 %r133, %r2766, %r132;
setp.lt.u32 %p188, %r133, %r2766;
setp.gt.u32 %p189, %r133, 2048;
or.pred %p190, %p188, %p189;
@%p190 bra $L__BB6_156;
st.local.u32 [%rd14+36], %r130;
st.local.u32 [%rd14+100], %r131;
st.local.u32 [%rd14+164], %r2766;
or.b32 %r1737, %r130, %r131;
and.b32 %r1738, %r1737, 2147483646;
setp.eq.s32 %p191, %r1738, 0;
mov.u32 %r2765, 10;
mov.u32 %r2766, %r133;
@%p191 bra $L__BB6_157;
add.s32 %r1739, %r130, 1;
shr.u32 %r134, %r1739, 1;
add.s32 %r1740, %r131, 1;
shr.u32 %r135, %r1740, 1;
mul.lo.s32 %r136, %r134, %r135;
setp.eq.s32 %p192, %r134, 0;
@%p192 bra $L__BB6_132;
div.u32 %r1741, %r136, %r134;
setp.ne.s32 %p193, %r1741, %r135;
@%p193 bra $L__BB6_152;
$L__BB6_132:
add.s32 %r2766, %r133, %r136;
setp.lt.u32 %p194, %r2766, %r133;
setp.gt.u32 %p195, %r2766, 2048;
or.pred %p196, %p194, %p195;
@%p196 bra $L__BB6_156;
st.local.u32 [%rd14+40], %r134;
st.local.u32 [%rd14+104], %r135;
st.local.u32 [%rd14+168], %r133;
or.b32 %r1743, %r134, %r135;
and.b32 %r1744, %r1743, 2147483646;
setp.eq.s32 %p197, %r1744, 0;
mov.u32 %r2765, 11;
@%p197 bra $L__BB6_157;
add.s32 %r1745, %r134, 1;
shr.u32 %r138, %r1745, 1;
add.s32 %r1746, %r135, 1;
shr.u32 %r139, %r1746, 1;
mul.lo.s32 %r140, %r138, %r139;
setp.eq.s32 %p198, %r138, 0;
@%p198 bra $L__BB6_136;
div.u32 %r1747, %r140, %r138;
setp.ne.s32 %p199, %r1747, %r139;
@%p199 bra $L__BB6_152;
$L__BB6_136:
add.s32 %r141, %r2766, %r140;
setp.lt.u32 %p200, %r141, %r2766;
setp.gt.u32 %p201, %r141, 2048;
or.pred %p202, %p200, %p201;
@%p202 bra $L__BB6_156;
st.local.u32 [%rd14+44], %r138;
st.local.u32 [%rd14+108], %r139;
st.local.u32 [%rd14+172], %r2766;
or.b32 %r1749, %r138, %r139;
and.b32 %r1750, %r1749, 2147483646;
setp.eq.s32 %p203, %r1750, 0;
mov.u32 %r2765, 12;
mov.u32 %r2766, %r141;
@%p203 bra $L__BB6_157;
add.s32 %r1751, %r138, 1;
shr.u32 %r142, %r1751, 1;
add.s32 %r1752, %r139, 1;
shr.u32 %r143, %r1752, 1;
mul.lo.s32 %r144, %r142, %r143;
setp.eq.s32 %p204, %r142, 0;
@%p204 bra $L__BB6_140;
div.u32 %r1753, %r144, %r142;
setp.ne.s32 %p205, %r1753, %r143;
@%p205 bra $L__BB6_152;
$L__BB6_140:
add.s32 %r2766, %r141, %r144;
setp.lt.u32 %p206, %r2766, %r141;
setp.gt.u32 %p207, %r2766, 2048;
or.pred %p208, %p206, %p207;
@%p208 bra $L__BB6_156;
st.local.u32 [%rd14+48], %r142;
st.local.u32 [%rd14+112], %r143;
st.local.u32 [%rd14+176], %r141;
or.b32 %r1755, %r142, %r143;
and.b32 %r1756, %r1755, 2147483646;
setp.eq.s32 %p209, %r1756, 0;
mov.u32 %r2765, 13;
@%p209 bra $L__BB6_157;
add.s32 %r1757, %r142, 1;
shr.u32 %r146, %r1757, 1;
add.s32 %r1758, %r143, 1;
shr.u32 %r147, %r1758, 1;
mul.lo.s32 %r148, %r146, %r147;
setp.eq.s32 %p210, %r146, 0;
@%p210 bra $L__BB6_144;
div.u32 %r1759, %r148, %r146;
setp.ne.s32 %p211, %r1759, %r147;
@%p211 bra $L__BB6_152;
$L__BB6_144:
add.s32 %r149, %r2766, %r148;
setp.lt.u32 %p212, %r149, %r2766;
setp.gt.u32 %p213, %r149, 2048;
or.pred %p214, %p212, %p213;
@%p214 bra $L__BB6_156;
st.local.u32 [%rd14+52], %r146;
st.local.u32 [%rd14+116], %r147;
st.local.u32 [%rd14+180], %r2766;
or.b32 %r1761, %r146, %r147;
and.b32 %r1762, %r1761, 2147483646;
setp.eq.s32 %p215, %r1762, 0;
mov.u32 %r2765, 14;
mov.u32 %r2766, %r149;
@%p215 bra $L__BB6_157;
add.s32 %r1763, %r146, 1;
shr.u32 %r150, %r1763, 1;
add.s32 %r1764, %r147, 1;
shr.u32 %r151, %r1764, 1;
mul.lo.s32 %r152, %r150, %r151;
setp.eq.s32 %p216, %r150, 0;
@%p216 bra $L__BB6_148;
div.u32 %r1765, %r152, %r150;
setp.ne.s32 %p217, %r1765, %r151;
@%p217 bra $L__BB6_152;
$L__BB6_148:
add.s32 %r2766, %r149, %r152;
setp.lt.u32 %p218, %r2766, %r149;
setp.gt.u32 %p219, %r2766, 2048;
or.pred %p220, %p218, %p219;
@%p220 bra $L__BB6_156;
st.local.u32 [%rd14+56], %r150;
st.local.u32 [%rd14+120], %r151;
st.local.u32 [%rd14+184], %r149;
or.b32 %r1767, %r150, %r151;
and.b32 %r1768, %r1767, 2147483646;
setp.eq.s32 %p221, %r1768, 0;
mov.u32 %r2765, 15;
@%p221 bra $L__BB6_157;
add.s32 %r1769, %r150, 1;
shr.u32 %r154, %r1769, 1;
add.s32 %r1770, %r151, 1;
shr.u32 %r155, %r1770, 1;
mul.lo.s32 %r156, %r154, %r155;
setp.eq.s32 %p222, %r154, 0;
@%p222 bra $L__BB6_153;
div.u32 %r1771, %r156, %r154;
setp.eq.s32 %p223, %r1771, %r155;
@%p223 bra $L__BB6_153;
bra.uni $L__BB6_152;
$L__BB6_153:
add.s32 %r157, %r2766, %r156;
setp.lt.u32 %p224, %r157, %r2766;
setp.gt.u32 %p225, %r157, 2048;
or.pred %p226, %p224, %p225;
@%p226 bra $L__BB6_156;
st.local.u32 [%rd14+60], %r154;
st.local.u32 [%rd14+124], %r155;
st.local.u32 [%rd14+188], %r2766;
or.b32 %r1774, %r154, %r155;
and.b32 %r1775, %r1774, 2147483646;
setp.eq.s32 %p227, %r1775, 0;
mov.u32 %r2765, 16;
mov.u32 %r2766, %r157;
@%p227 bra $L__BB6_157;
mov.u32 %r1776, 1;
st.local.u32 [%rd14+200], %r1776;
bra.uni $L__BB6_230;
$L__BB6_157:
st.local.u32 [%rd14+192], %r2765;
st.local.u32 [%rd14+196], %r2766;
st.local.u32 [%rd14+200], %r1684;
setp.eq.s32 %p228, %r2766, 0;
@%p228 bra $L__BB6_165;
add.s32 %r1780, %r2766, -1;
and.b32 %r160, %r2766, 3;
setp.lt.u32 %p229, %r1780, 3;
mov.u32 %r2769, 0;
@%p229 bra $L__BB6_161;
sub.s32 %r2768, %r2766, %r160;
mov.u32 %r1781, 0;
mov.u32 %r2769, %r1781;
$L__BB6_160:
mul.wide.u32 %rd86, %r2769, 4;
add.s64 %rd87, %rd2, %rd86;
st.local.u32 [%rd87], %r1781;
st.local.u32 [%rd87+8192], %r1781;
st.local.u32 [%rd87+16384], %r1781;
st.local.u32 [%rd87+4], %r1781;
st.local.u32 [%rd87+8196], %r1781;
st.local.u32 [%rd87+16388], %r1781;
st.local.u32 [%rd87+8], %r1781;
st.local.u32 [%rd87+8200], %r1781;
st.local.u32 [%rd87+16392], %r1781;
st.local.u32 [%rd87+12], %r1781;
st.local.u32 [%rd87+8204], %r1781;
st.local.u32 [%rd87+16396], %r1781;
add.s32 %r2769, %r2769, 4;
add.s32 %r2768, %r2768, -4;
setp.ne.s32 %p230, %r2768, 0;
@%p230 bra $L__BB6_160;
$L__BB6_161:
setp.eq.s32 %p231, %r160, 0;
@%p231 bra $L__BB6_165;
mul.wide.u32 %rd88, %r2769, 4;
add.s64 %rd16, %rd2, %rd88;
mov.u32 %r1783, 0;
st.local.u32 [%rd16], %r1783;
st.local.u32 [%rd16+8192], %r1783;
st.local.u32 [%rd16+16384], %r1783;
setp.eq.s32 %p232, %r160, 1;
@%p232 bra $L__BB6_165;
st.local.u32 [%rd16+4], %r1783;
st.local.u32 [%rd16+8196], %r1783;
st.local.u32 [%rd16+16388], %r1783;
setp.eq.s32 %p233, %r160, 2;
@%p233 bra $L__BB6_165;
mov.u32 %r1785, 0;
st.local.u32 [%rd16+8], %r1785;
st.local.u32 [%rd16+8200], %r1785;
st.local.u32 [%rd16+16392], %r1785;
$L__BB6_165:
setp.eq.s32 %p234, %r24, 0;
@%p234 bra $L__BB6_173;
add.s32 %r1787, %r24, -1;
and.b32 %r167, %r24, 3;
setp.lt.u32 %p235, %r1787, 3;
mov.u32 %r2772, 0;
@%p235 bra $L__BB6_169;
sub.s32 %r2771, %r24, %r167;
mov.u32 %r2772, 0;
$L__BB6_168:
cvta.to.global.u64 %rd384, %rd60;
add.s32 %r1789, %r2772, %r23;
mul.wide.u32 %rd89, %r1789, 36;
add.s64 %rd90, %rd384, %rd89;
ld.global.u32 %r1790, [%rd90+20];
ld.global.u32 %r1791, [%rd90+28];
setp.eq.s32 %p236, %r1791, 0;
ld.global.u32 %r1792, [%rd90+32];
selp.b32 %r1793, %r1792, 2147483647, %p236;
mul.wide.u32 %rd91, %r2772, 4;
add.s64 %rd92, %rd1, %rd91;
st.local.u32 [%rd92], %r1793;
add.s64 %rd93, %rd2, %rd91;
st.local.u32 [%rd93], %r1790;
add.s32 %r1794, %r1789, 1;
mul.wide.u32 %rd94, %r1794, 36;
add.s64 %rd95, %rd384, %rd94;
ld.global.u32 %r1795, [%rd95+20];
ld.global.u32 %r1796, [%rd95+28];
setp.eq.s32 %p237, %r1796, 0;
ld.global.u32 %r1797, [%rd95+32];
selp.b32 %r1798, %r1797, 2147483647, %p237;
st.local.u32 [%rd92+4], %r1798;
st.local.u32 [%rd93+4], %r1795;
add.s32 %r1799, %r1789, 2;
mul.wide.u32 %rd96, %r1799, 36;
add.s64 %rd97, %rd384, %rd96;
ld.global.u32 %r1800, [%rd97+20];
ld.global.u32 %r1801, [%rd97+28];
setp.eq.s32 %p238, %r1801, 0;
ld.global.u32 %r1802, [%rd97+32];
selp.b32 %r1803, %r1802, 2147483647, %p238;
st.local.u32 [%rd92+8], %r1803;
st.local.u32 [%rd93+8], %r1800;
add.s32 %r1804, %r1789, 3;
mul.wide.u32 %rd98, %r1804, 36;
add.s64 %rd99, %rd384, %rd98;
ld.global.u32 %r1805, [%rd99+20];
ld.global.u32 %r1806, [%rd99+28];
setp.eq.s32 %p239, %r1806, 0;
ld.global.u32 %r1807, [%rd99+32];
selp.b32 %r1808, %r1807, 2147483647, %p239;
st.local.u32 [%rd92+12], %r1808;
st.local.u32 [%rd93+12], %r1805;
add.s32 %r2772, %r2772, 4;
add.s32 %r2771, %r2771, -4;
setp.ne.s32 %p240, %r2771, 0;
@%p240 bra $L__BB6_168;
$L__BB6_169:
and.b32 %r2725, %r24, 3;
setp.eq.s32 %p241, %r2725, 0;
@%p241 bra $L__BB6_173;
and.b32 %r2726, %r24, 3;
add.s32 %r1809, %r2772, %r23;
cvta.to.global.u64 %rd100, %rd60;
mul.wide.u32 %rd101, %r1809, 36;
add.s64 %rd102, %rd100, %rd101;
ld.global.u32 %r1810, [%rd102+20];
ld.global.u32 %r1811, [%rd102+28];
setp.eq.s32 %p242, %r1811, 0;
ld.global.u32 %r1812, [%rd102+32];
selp.b32 %r1813, %r1812, 2147483647, %p242;
mul.wide.u32 %rd103, %r2772, 4;
add.s64 %rd18, %rd1, %rd103;
st.local.u32 [%rd18], %r1813;
add.s64 %rd19, %rd2, %rd103;
st.local.u32 [%rd19], %r1810;
setp.eq.s32 %p243, %r2726, 1;
@%p243 bra $L__BB6_173;
mul.wide.u32 %rd371, %r2772, 4;
add.s64 %rd370, %rd1, %rd371;
add.s32 %r2686, %r2772, %r23;
cvta.to.global.u64 %rd369, %rd60;
and.b32 %r2685, %r24, 3;
add.s32 %r1815, %r2686, 1;
mul.wide.u32 %rd105, %r1815, 36;
add.s64 %rd106, %rd369, %rd105;
ld.global.u32 %r1816, [%rd106+20];
ld.global.u32 %r1817, [%rd106+28];
setp.eq.s32 %p244, %r1817, 0;
ld.global.u32 %r1818, [%rd106+32];
selp.b32 %r1819, %r1818, 2147483647, %p244;
st.local.u32 [%rd370+4], %r1819;
st.local.u32 [%rd19+4], %r1816;
setp.eq.s32 %p245, %r2685, 2;
@%p245 bra $L__BB6_173;
mul.wide.u32 %rd374, %r2772, 4;
add.s64 %rd373, %rd1, %rd374;
add.s32 %r2687, %r2772, %r23;
cvta.to.global.u64 %rd372, %rd60;
add.s32 %r1821, %r2687, 2;
mul.wide.u32 %rd108, %r1821, 36;
add.s64 %rd109, %rd372, %rd108;
ld.global.u32 %r1822, [%rd109+20];
ld.global.u32 %r1823, [%rd109+28];
setp.eq.s32 %p246, %r1823, 0;
ld.global.u32 %r1824, [%rd109+32];
selp.b32 %r1825, %r1824, 2147483647, %p246;
st.local.u32 [%rd373+8], %r1825;
st.local.u32 [%rd19+8], %r1822;
$L__BB6_173:
ld.local.u32 %r2786, [%rd13+192];
setp.lt.u32 %p247, %r2786, 2;
@%p247 bra $L__BB6_193;
mov.u32 %r2774, 1;
$L__BB6_175:
add.s32 %r1827, %r2774, -1;
mul.wide.u32 %rd111, %r1827, 4;
add.s64 %rd20, %rd13, %rd111;
ld.local.u32 %r177, [%rd20];
mul.wide.u32 %rd112, %r2774, 4;
add.s64 %rd22, %rd13, %rd112;
ld.local.u32 %r178, [%rd22];
ld.local.u32 %r179, [%rd22+64];
setp.eq.s32 %p248, %r179, 0;
@%p248 bra $L__BB6_192;
ld.local.u32 %r180, [%rd20+64];
mov.u32 %r2775, 0;
$L__BB6_177:
setp.eq.s32 %p249, %r178, 0;
@%p249 bra $L__BB6_190;
mov.u32 %r2776, 0;
$L__BB6_179:
shl.b32 %r2659, %r2775, 1;
add.s32 %r2658, %r2659, 2;
min.u32 %r2657, %r2658, %r180;
shl.b32 %r2642, %r2775, 1;
setp.ge.u32 %p250, %r2642, %r2657;
mov.u32 %r2784, -1;
@%p250 bra $L__BB6_189;
not.b32 %r2645, %r177;
shl.b32 %r2644, %r2776, 1;
shl.b32 %r2777, %r2775, 1;
mov.u32 %r1834, -2;
mov.u32 %r1835, -3;
sub.s32 %r1836, %r1835, %r2644;
max.u32 %r1837, %r1836, %r2645;
not.b32 %r1838, %r1837;
sub.s32 %r1839, %r1838, %r2644;
and.b32 %r190, %r1839, 3;
sub.s32 %r1840, %r1834, %r2644;
sub.s32 %r193, %r1840, %r1837;
mov.u32 %r2784, -1;
$L__BB6_181:
shl.b32 %r2648, %r2776, 1;
add.s32 %r2647, %r2648, 2;
min.u32 %r2646, %r2647, %r177;
setp.ge.u32 %p251, %r2648, %r2646;
@%p251 bra $L__BB6_188;
shl.b32 %r2779, %r2776, 1;
setp.eq.s32 %p252, %r190, 0;
ld.local.u32 %r196, [%rd20+128];
mul.lo.s32 %r197, %r2777, %r177;
@%p252 bra $L__BB6_186;
add.s32 %r2779, %r2644, 1;
shl.b32 %r2650, %r2776, 1;
setp.eq.s32 %p253, %r190, 1;
add.s32 %r1842, %r2650, %r197;
add.s32 %r1843, %r1842, %r196;
mul.wide.u32 %rd113, %r1843, 4;
add.s64 %rd114, %rd1, %rd113;
ld.local.u32 %r1844, [%rd114];
min.u32 %r2784, %r2784, %r1844;
@%p253 bra $L__BB6_186;
shl.b32 %r2631, %r2776, 1;
add.s32 %r2779, %r2631, 2;
add.s32 %r2629, %r2631, 1;
setp.eq.s32 %p254, %r190, 2;
add.s32 %r1845, %r2629, %r197;
add.s32 %r1846, %r1845, %r196;
mul.wide.u32 %rd115, %r1846, 4;
add.s64 %rd116, %rd1, %rd115;
ld.local.u32 %r1847, [%rd116];
min.u32 %r2784, %r2784, %r1847;
@%p254 bra $L__BB6_186;
shl.b32 %r2634, %r2776, 1;
add.s32 %r2779, %r2634, 3;
add.s32 %r2632, %r2634, 2;
add.s32 %r1848, %r2632, %r197;
add.s32 %r1849, %r1848, %r196;
mul.wide.u32 %rd117, %r1849, 4;
add.s64 %rd118, %rd1, %rd117;
ld.local.u32 %r1850, [%rd118];
min.u32 %r2784, %r2784, %r1850;
$L__BB6_186:
setp.lt.u32 %p255, %r193, 3;
@%p255 bra $L__BB6_188;
$L__BB6_187:
shl.b32 %r2637, %r2776, 1;
add.s32 %r2636, %r2637, 2;
min.u32 %r2635, %r2636, %r177;
add.s32 %r1851, %r2779, %r197;
add.s32 %r1852, %r1851, %r196;
mul.wide.u32 %rd119, %r1852, 4;
add.s64 %rd120, %rd1, %rd119;
ld.local.u32 %r1853, [%rd120];
min.u32 %r1854, %r2784, %r1853;
add.s32 %r1855, %r1852, 1;
mul.wide.u32 %rd121, %r1855, 4;
add.s64 %rd122, %rd1, %rd121;
ld.local.u32 %r1856, [%rd122];
min.u32 %r1857, %r1854, %r1856;
add.s32 %r1858, %r1852, 2;
mul.wide.u32 %rd123, %r1858, 4;
add.s64 %rd124, %rd1, %rd123;
ld.local.u32 %r1859, [%rd124];
min.u32 %r1860, %r1857, %r1859;
add.s32 %r1861, %r1852, 3;
mul.wide.u32 %rd125, %r1861, 4;
add.s64 %rd126, %rd1, %rd125;
ld.local.u32 %r1862, [%rd126];
min.u32 %r2784, %r1860, %r1862;
add.s32 %r2779, %r2779, 4;
setp.lt.u32 %p256, %r2779, %r2635;
@%p256 bra $L__BB6_187;
$L__BB6_188:
shl.b32 %r2654, %r2775, 1;
add.s32 %r2653, %r2654, 2;
min.u32 %r2652, %r2653, %r180;
add.s32 %r2777, %r2777, 1;
setp.lt.u32 %p257, %r2777, %r2652;
@%p257 bra $L__BB6_181;
$L__BB6_189:
mul.wide.u32 %rd357, %r2774, 4;
add.s64 %rd356, %rd13, %rd357;
mul.lo.s32 %r2638, %r2775, %r178;
add.s32 %r1863, %r2776, %r2638;
ld.local.u32 %r1864, [%rd356+128];
add.s32 %r1865, %r1863, %r1864;
mul.wide.u32 %rd127, %r1865, 4;
add.s64 %rd128, %rd1, %rd127;
st.local.u32 [%rd128], %r2784;
add.s32 %r2776, %r2776, 1;
setp.lt.u32 %p258, %r2776, %r178;
@%p258 bra $L__BB6_179;
$L__BB6_190:
add.s32 %r2775, %r2775, 1;
setp.lt.u32 %p259, %r2775, %r179;
@%p259 bra $L__BB6_177;
ld.local.u32 %r2786, [%rd13+192];
$L__BB6_192:
cvt.u64.u32 %rd358, %r2774;
cvt.u32.u64 %r1866, %rd358;
add.s32 %r2774, %r1866, 1;
setp.lt.u32 %p260, %r2774, %r2786;
@%p260 bra $L__BB6_175;
$L__BB6_193:
ld.local.u32 %r2800, [%rd14+192];
setp.lt.u32 %p261, %r2800, 2;
@%p261 bra $L__BB6_213;
mov.u32 %r2788, 1;
$L__BB6_195:
add.s32 %r1868, %r2788, -1;
mul.wide.u32 %rd130, %r1868, 4;
add.s64 %rd23, %rd14, %rd130;
ld.local.u32 %r219, [%rd23];
mul.wide.u32 %rd131, %r2788, 4;
add.s64 %rd25, %rd14, %rd131;
ld.local.u32 %r220, [%rd25];
ld.local.u32 %r221, [%rd25+64];
setp.eq.s32 %p262, %r221, 0;
@%p262 bra $L__BB6_212;
ld.local.u32 %r222, [%rd23+64];
mov.u32 %r2789, 0;
$L__BB6_197:
setp.eq.s32 %p263, %r220, 0;
@%p263 bra $L__BB6_210;
mov.u32 %r2790, 0;
$L__BB6_199:
shl.b32 %r2675, %r2789, 1;
add.s32 %r2674, %r2675, 2;
min.u32 %r2673, %r2674, %r222;
setp.ge.u32 %p264, %r2675, %r2673;
mov.u32 %r2798, -1;
@%p264 bra $L__BB6_209;
not.b32 %r2678, %r219;
shl.b32 %r2677, %r2790, 1;
shl.b32 %r2791, %r2789, 1;
mov.u32 %r1875, -2;
mov.u32 %r1876, -3;
sub.s32 %r1877, %r1876, %r2677;
max.u32 %r1878, %r1877, %r2678;
not.b32 %r1879, %r1878;
sub.s32 %r1880, %r1879, %r2677;
and.b32 %r232, %r1880, 3;
sub.s32 %r1881, %r1875, %r2677;
sub.s32 %r235, %r1881, %r1878;
mov.u32 %r2798, -1;
$L__BB6_201:
shl.b32 %r2681, %r2790, 1;
add.s32 %r2680, %r2681, 2;
min.u32 %r2679, %r2680, %r219;
setp.ge.u32 %p265, %r2681, %r2679;
@%p265 bra $L__BB6_208;
shl.b32 %r2793, %r2790, 1;
setp.eq.s32 %p266, %r232, 0;
ld.local.u32 %r238, [%rd23+128];
mul.lo.s32 %r239, %r2791, %r219;
@%p266 bra $L__BB6_206;
add.s32 %r2793, %r2677, 1;
shl.b32 %r2683, %r2790, 1;
setp.eq.s32 %p267, %r232, 1;
add.s32 %r1883, %r2683, %r239;
add.s32 %r1884, %r1883, %r238;
mul.wide.u32 %rd132, %r1884, 4;
add.s64 %rd133, %rd2, %rd132;
ld.local.u32 %r1885, [%rd133];
min.u32 %r2798, %r2798, %r1885;
@%p267 bra $L__BB6_206;
shl.b32 %r2662, %r2790, 1;
add.s32 %r2793, %r2662, 2;
add.s32 %r2660, %r2662, 1;
setp.eq.s32 %p268, %r232, 2;
add.s32 %r1886, %r2660, %r239;
add.s32 %r1887, %r1886, %r238;
mul.wide.u32 %rd134, %r1887, 4;
add.s64 %rd135, %rd2, %rd134;
ld.local.u32 %r1888, [%rd135];
min.u32 %r2798, %r2798, %r1888;
@%p268 bra $L__BB6_206;
shl.b32 %r2665, %r2790, 1;
add.s32 %r2793, %r2665, 3;
add.s32 %r2663, %r2665, 2;
add.s32 %r1889, %r2663, %r239;
add.s32 %r1890, %r1889, %r238;
mul.wide.u32 %rd136, %r1890, 4;
add.s64 %rd137, %rd2, %rd136;
ld.local.u32 %r1891, [%rd137];
min.u32 %r2798, %r2798, %r1891;
$L__BB6_206:
setp.lt.u32 %p269, %r235, 3;
@%p269 bra $L__BB6_208;
$L__BB6_207:
shl.b32 %r2668, %r2790, 1;
add.s32 %r2667, %r2668, 2;
min.u32 %r2666, %r2667, %r219;
add.s32 %r1892, %r2793, %r239;
add.s32 %r1893, %r1892, %r238;
mul.wide.u32 %rd138, %r1893, 4;
add.s64 %rd139, %rd2, %rd138;
ld.local.u32 %r1894, [%rd139];
min.u32 %r1895, %r2798, %r1894;
add.s32 %r1896, %r1893, 1;
mul.wide.u32 %rd140, %r1896, 4;
add.s64 %rd141, %rd2, %rd140;
ld.local.u32 %r1897, [%rd141];
min.u32 %r1898, %r1895, %r1897;
add.s32 %r1899, %r1893, 2;
mul.wide.u32 %rd142, %r1899, 4;
add.s64 %rd143, %rd2, %rd142;
ld.local.u32 %r1900, [%rd143];
min.u32 %r1901, %r1898, %r1900;
add.s32 %r1902, %r1893, 3;
mul.wide.u32 %rd144, %r1902, 4;
add.s64 %rd145, %rd2, %rd144;
ld.local.u32 %r1903, [%rd145];
min.u32 %r2798, %r1901, %r1903;
add.s32 %r2793, %r2793, 4;
setp.lt.u32 %p270, %r2793, %r2666;
@%p270 bra $L__BB6_207;
$L__BB6_208:
shl.b32 %r2671, %r2789, 1;
add.s32 %r2670, %r2671, 2;
min.u32 %r2669, %r2670, %r222;
add.s32 %r2791, %r2791, 1;
setp.lt.u32 %p271, %r2791, %r2669;
@%p271 bra $L__BB6_201;
$L__BB6_209:
mul.wide.u32 %rd367, %r2788, 4;
add.s64 %rd366, %rd14, %rd367;
mul.lo.s32 %r2672, %r2789, %r220;
add.s32 %r1904, %r2790, %r2672;
ld.local.u32 %r1905, [%rd366+128];
add.s32 %r1906, %r1904, %r1905;
mul.wide.u32 %rd146, %r1906, 4;
add.s64 %rd147, %rd2, %rd146;
st.local.u32 [%rd147], %r2798;
add.s32 %r2790, %r2790, 1;
setp.lt.u32 %p272, %r2790, %r220;
@%p272 bra $L__BB6_199;
$L__BB6_210:
add.s32 %r2789, %r2789, 1;
setp.lt.u32 %p273, %r2789, %r221;
@%p273 bra $L__BB6_197;
ld.local.u32 %r2800, [%rd14+192];
$L__BB6_212:
cvt.u64.u32 %rd368, %r2788;
cvt.u32.u64 %r1907, %rd368;
add.s32 %r2788, %r1907, 1;
setp.lt.u32 %p274, %r2788, %r2800;
@%p274 bra $L__BB6_195;
$L__BB6_213:
setp.eq.s64 %p275, %rd63, 0;
@%p275 bra $L__BB6_231;
add.s32 %r2655, %r2754, %r1;
cvt.u64.u32 %rd364, %r2655;
setp.lt.u64 %p276, %rd364, %rd63;
@%p276 bra $L__BB6_216;
bra.uni $L__BB6_215;
$L__BB6_216:
add.s32 %r2656, %r2754, %r1;
cvt.u64.u32 %rd365, %r2656;
shl.b64 %rd149, %rd365, 4;
add.s64 %rd150, %rd148, %rd149;
ld.global.u32 %rd26, [%rd150];
ld.global.u32 %rd27, [%rd150+4];
ld.global.u32 %r1909, [%rd150+8];
cvt.u64.u32 %rd28, %r1909;
ld.local.u32 %r1910, [%rd13+196];
setp.eq.s32 %p277, %r1909, %r1910;
@%p277 bra $L__BB6_218;
bra.uni $L__BB6_217;
$L__BB6_218:
ld.param.u64 %rd359, [ j2k_htj2k_packetize_cleanup_param_8];
add.s64 %rd151, %rd28, %rd26;
setp.gt.u64 %p278, %rd151, %rd359;
add.s64 %rd152, %rd28, %rd27;
setp.gt.u64 %p279, %rd152, %rd359;
or.pred %p280, %p278, %p279;
@%p280 bra $L__BB6_227;
bra.uni $L__BB6_219;
$L__BB6_227:
mov.u32 %r1973, 1;
st.local.u32 [%rd13+200], %r1973;
st.local.u32 [%rd14+200], %r1973;
bra.uni $L__BB6_231;
$L__BB6_152:
mov.u32 %r1772, 1;
st.local.u32 [%rd14+200], %r1772;
bra.uni $L__BB6_230;
$L__BB6_215:
mov.u32 %r1908, 1;
st.local.u32 [%rd13+200], %r1908;
st.local.u32 [%rd14+200], %r1908;
bra.uni $L__BB6_231;
$L__BB6_217:
mov.u32 %r1911, 1;
st.local.u32 [%rd13+200], %r1911;
st.local.u32 [%rd14+200], %r1911;
bra.uni $L__BB6_231;
$L__BB6_219:
cvt.u32.u64 %r1912, %rd28;
setp.eq.s32 %p281, %r1912, 0;
@%p281 bra $L__BB6_231;
add.s32 %r1915, %r1912, -1;
and.b32 %r258, %r1912, 3;
setp.lt.u32 %p282, %r1915, 3;
mov.u32 %r2803, 0;
@%p282 bra $L__BB6_223;
sub.s32 %r2802, %r1912, %r258;
mov.u32 %r2803, 0;
$L__BB6_222:
cvta.to.global.u64 %rd385, %rd62;
cvt.u32.u64 %r1918, %rd26;
add.s32 %r1919, %r2803, %r1918;
mul.wide.u32 %rd153, %r1919, 8;
add.s64 %rd154, %rd385, %rd153;
ld.global.u32 %r1920, [%rd154];
ld.global.u32 %r1921, [%rd154+4];
cvt.u32.u64 %r1922, %rd27;
add.s32 %r1923, %r2803, %r1922;
mul.wide.u32 %rd155, %r1923, 8;
add.s64 %rd156, %rd385, %rd155;
ld.global.u32 %r1924, [%rd156];
ld.global.u32 %r1925, [%rd156+4];
mul.wide.u32 %rd157, %r2803, 4;
add.s64 %rd158, %rd1, %rd157;
st.local.u32 [%rd158+8192], %r1920;
st.local.u32 [%rd158+16384], %r1921;
add.s64 %rd159, %rd2, %rd157;
st.local.u32 [%rd159+8192], %r1924;
st.local.u32 [%rd159+16384], %r1925;
add.s32 %r1926, %r2803, 1;
add.s32 %r1927, %r1926, %r1918;
mul.wide.u32 %rd160, %r1927, 8;
add.s64 %rd161, %rd385, %rd160;
ld.global.u32 %r1928, [%rd161];
ld.global.u32 %r1929, [%rd161+4];
add.s32 %r1930, %r1926, %r1922;
mul.wide.u32 %rd162, %r1930, 8;
add.s64 %rd163, %rd385, %rd162;
ld.global.u32 %r1931, [%rd163];
ld.global.u32 %r1932, [%rd163+4];
st.local.u32 [%rd158+8196], %r1928;
st.local.u32 [%rd158+16388], %r1929;
st.local.u32 [%rd159+8196], %r1931;
st.local.u32 [%rd159+16388], %r1932;
add.s32 %r1933, %r2803, 2;
add.s32 %r1934, %r1933, %r1918;
mul.wide.u32 %rd164, %r1934, 8;
add.s64 %rd165, %rd385, %rd164;
ld.global.u32 %r1935, [%rd165];
ld.global.u32 %r1936, [%rd165+4];
add.s32 %r1937, %r1933, %r1922;
mul.wide.u32 %rd166, %r1937, 8;
add.s64 %rd167, %rd385, %rd166;
ld.global.u32 %r1938, [%rd167];
ld.global.u32 %r1939, [%rd167+4];
st.local.u32 [%rd158+8200], %r1935;
st.local.u32 [%rd158+16392], %r1936;
st.local.u32 [%rd159+8200], %r1938;
st.local.u32 [%rd159+16392], %r1939;
add.s32 %r1940, %r2803, 3;
add.s32 %r1941, %r1940, %r1918;
mul.wide.u32 %rd168, %r1941, 8;
add.s64 %rd169, %rd385, %rd168;
ld.global.u32 %r1942, [%rd169];
ld.global.u32 %r1943, [%rd169+4];
add.s32 %r1944, %r1940, %r1922;
mul.wide.u32 %rd170, %r1944, 8;
add.s64 %rd171, %rd385, %rd170;
ld.global.u32 %r1945, [%rd171];
ld.global.u32 %r1946, [%rd171+4];
st.local.u32 [%rd158+8204], %r1942;
st.local.u32 [%rd158+16396], %r1943;
st.local.u32 [%rd159+8204], %r1945;
st.local.u32 [%rd159+16396], %r1946;
add.s32 %r2803, %r2803, 4;
add.s32 %r2802, %r2802, -4;
setp.ne.s32 %p283, %r2802, 0;
@%p283 bra $L__BB6_222;
$L__BB6_223:
setp.eq.s32 %p284, %r258, 0;
@%p284 bra $L__BB6_231;
cvt.u32.u64 %r1947, %rd27;
cvt.u32.u64 %r1948, %rd26;
add.s32 %r1949, %r2803, %r1948;
cvta.to.global.u64 %rd172, %rd62;
mul.wide.u32 %rd173, %r1949, 8;
add.s64 %rd174, %rd172, %rd173;
ld.global.u32 %r1950, [%rd174];
ld.global.u32 %r1951, [%rd174+4];
add.s32 %r1952, %r2803, %r1947;
mul.wide.u32 %rd175, %r1952, 8;
add.s64 %rd176, %rd172, %rd175;
ld.global.u32 %r1953, [%rd176];
ld.global.u32 %r1954, [%rd176+4];
mul.wide.u32 %rd177, %r2803, 4;
add.s64 %rd178, %rd1, %rd177;
add.s64 %rd30, %rd178, 8192;
st.local.u32 [%rd178+8192], %r1950;
st.local.u32 [%rd178+16384], %r1951;
add.s64 %rd179, %rd2, %rd177;
add.s64 %rd31, %rd179, 8192;
st.local.u32 [%rd179+8192], %r1953;
st.local.u32 [%rd179+16384], %r1954;
setp.eq.s32 %p285, %r258, 1;
@%p285 bra $L__BB6_231;
cvta.to.global.u64 %rd386, %rd62;
add.s32 %r1955, %r2803, 1;
add.s32 %r1958, %r1955, %r1948;
mul.wide.u32 %rd181, %r1958, 8;
add.s64 %rd182, %rd386, %rd181;
ld.global.u32 %r1959, [%rd182];
ld.global.u32 %r1960, [%rd182+4];
add.s32 %r1961, %r1955, %r1947;
mul.wide.u32 %rd183, %r1961, 8;
add.s64 %rd184, %rd386, %rd183;
ld.global.u32 %r1962, [%rd184];
ld.global.u32 %r1963, [%rd184+4];
st.local.u32 [%rd30+4], %r1959;
st.local.u32 [%rd30+8196], %r1960;
st.local.u32 [%rd31+4], %r1962;
st.local.u32 [%rd31+8196], %r1963;
setp.eq.s32 %p286, %r258, 2;
@%p286 bra $L__BB6_231;
cvta.to.global.u64 %rd387, %rd62;
add.s32 %r1964, %r2803, 2;
add.s32 %r1967, %r1964, %r1948;
mul.wide.u32 %rd186, %r1967, 8;
add.s64 %rd187, %rd387, %rd186;
ld.global.u32 %r1968, [%rd187];
ld.global.u32 %r1969, [%rd187+4];
add.s32 %r1970, %r1964, %r1947;
mul.wide.u32 %rd188, %r1970, 8;
add.s64 %rd189, %rd387, %rd188;
ld.global.u32 %r1971, [%rd189];
ld.global.u32 %r1972, [%rd189+4];
st.local.u32 [%rd30+8], %r1968;
st.local.u32 [%rd30+8200], %r1969;
st.local.u32 [%rd31+8], %r1971;
st.local.u32 [%rd31+8200], %r1972;
bra.uni $L__BB6_231;
$L__BB6_590:
setp.eq.s32 %p709, %r3, 0;
mov.u32 %r3483, 0;
mov.u32 %r3482, 3;
mov.u32 %r3481, 1;
mov.u32 %r3484, %r3483;
mov.u32 %r3485, %r3483;
@%p709 bra $L__BB6_592;
mov.u16 %rs135, 0;
st.global.u8 [%rd4], %rs135;
mov.u32 %r3483, 1;
mov.u32 %r3481, 0;
mov.u32 %r3482, %r3481;
mov.u32 %r3484, %r3481;
mov.u32 %r3485, %r3483;
bra.uni $L__BB6_592;
$L__BB6_242:
setp.eq.s32 %p690, %r3458, 0;
mov.u32 %r3472, %r3423;
@%p690 bra $L__BB6_245;
setp.eq.s32 %p691, %r3422, 0;
selp.b32 %r2580, 8, 7, %p691;
sub.s32 %r2581, %r2580, %r3458;
shl.b32 %r1458, %r3457, %r2581;
setp.ge.u32 %p692, %r3419, %r3;
mov.u32 %r3472, 1;
@%p692 bra $L__BB6_245;
cvt.u64.u32 %rd337, %r3419;
add.s64 %rd338, %rd4, %rd337;
st.global.u8 [%rd338], %r1458;
add.s32 %r3419, %r3419, 1;
mov.u32 %r3472, %r3423;
$L__BB6_245:
setp.ne.s32 %p693, %r3472, 0;
mov.u32 %r3483, 0;
mov.u32 %r3482, 4;
mov.u32 %r3481, 1;
mov.u32 %r3484, %r3483;
mov.u32 %r3485, %r3483;
@%p693 bra $L__BB6_592;
setp.eq.s32 %p694, %r3419, 0;
mov.u32 %r3478, 0;
mov.u32 %r3473, %r3478;
@%p694 bra $L__BB6_250;
add.s32 %r2588, %r3419, -1;
cvt.u64.u32 %rd339, %r2588;
add.s64 %rd340, %rd4, %rd339;
ld.global.u8 %rs133, [%rd340];
setp.ne.s16 %p695, %rs133, 255;
mov.u32 %r3473, %r3419;
@%p695 bra $L__BB6_250;
setp.ge.u32 %p696, %r3419, %r3;
mov.u32 %r3483, 0;
mov.u32 %r3482, 5;
mov.u32 %r3484, %r3483;
mov.u32 %r3485, %r3483;
@%p696 bra $L__BB6_592;
cvt.u64.u32 %rd341, %r3419;
add.s64 %rd342, %rd4, %rd341;
mov.u16 %rs134, 0;
st.global.u8 [%rd342], %rs134;
add.s32 %r3473, %r3419, 1;
$L__BB6_250:
setp.eq.s32 %p724, %r2, 0;
@%p724 bra $L__BB6_258;
ld.param.u64 %rd363, [ j2k_htj2k_packetize_cleanup_param_3];
cvta.to.global.u64 %rd47, %rd363;
mov.u32 %r3474, 0;
cvta.to.global.u64 %rd49, %rd60;
mov.u32 %r3478, %r3474;
$L__BB6_252:
add.s32 %r2597, %r3474, %r1;
mul.wide.u32 %rd343, %r2597, 16;
add.s64 %rd48, %rd47, %rd343;
ld.global.u32 %r1466, [%rd48+4];
setp.eq.s32 %p698, %r1466, 0;
@%p698 bra $L__BB6_257;
ld.global.u32 %r1467, [%rd48];
mov.u32 %r3476, 0;
$L__BB6_254:
add.s32 %r2599, %r3476, %r1467;
mul.wide.u32 %rd344, %r2599, 36;
add.s64 %rd345, %rd49, %rd344;
ld.global.u32 %r2600, [%rd345+16];
setp.eq.s32 %p699, %r2600, 0;
ld.global.u32 %r1470, [%rd345+4];
setp.eq.s32 %p700, %r1470, 0;
or.pred %p701, %p700, %p699;
@%p701 bra $L__BB6_256;
add.s32 %r3478, %r1470, %r3478;
setp.lt.u32 %p702, %r3478, %r1470;
add.s32 %r2606, %r3478, %r3473;
setp.lt.u32 %p703, %r2606, %r3478;
or.pred %p704, %p702, %p703;
setp.gt.u32 %p705, %r2606, %r3;
or.pred %p706, %p705, %p704;
mov.u32 %r3483, 0;
mov.u32 %r3482, 6;
mov.u32 %r3484, %r3483;
mov.u32 %r3485, %r3483;
@%p706 bra $L__BB6_592;
$L__BB6_256:
add.s32 %r3476, %r3476, 1;
setp.lt.u32 %p707, %r3476, %r1466;
@%p707 bra $L__BB6_254;
$L__BB6_257:
add.s32 %r3474, %r3474, 1;
setp.lt.u32 %p708, %r3474, %r2;
@%p708 bra $L__BB6_252;
$L__BB6_258:
add.s32 %r3485, %r3478, %r3473;
mov.u32 %r3481, 0;
mov.u32 %r3482, %r3481;
mov.u32 %r3483, %r3473;
mov.u32 %r3484, %r3478;
$L__BB6_592:
mov.u32 %r2639, %ctaid.x;
ld.param.u64 %rd360, [ j2k_htj2k_packetize_cleanup_param_10];
mov.u32 %r2617, 0;
st.shared.u32 [_ZZ32 j2k_htj2k_packetize_cleanupE11shared_code], %r3481;
st.shared.u32 [_ZZ32 j2k_htj2k_packetize_cleanupE17shared_header_len], %r3483;
st.shared.u32 [_ZZ32 j2k_htj2k_packetize_cleanupE15shared_body_len], %r3484;
cvta.to.global.u64 %rd346, %rd360;
mul.wide.u32 %rd347, %r2639, 16;
add.s64 %rd348, %rd346, %rd347;
st.global.u32 [%rd348], %r3481;
st.global.u32 [%rd348+4], %r3482;
st.global.u32 [%rd348+8], %r3485;
st.global.u32 [%rd348+12], %r2617;
$L__BB6_593:
bar.sync 0;
ld.shared.u32 %r2619, [_ZZ32 j2k_htj2k_packetize_cleanupE11shared_code];
setp.ne.s32 %p710, %r2619, 0;
ld.shared.u32 %r1487, [_ZZ32 j2k_htj2k_packetize_cleanupE15shared_body_len];
setp.eq.s32 %p711, %r1487, 0;
or.pred %p712, %p710, %p711;
@%p712 bra $L__BB6_608;
mov.u32 %r2640, %tid.x;
setp.ge.u32 %p713, %r2640, %r1487;
@%p713 bra $L__BB6_608;
setp.eq.s32 %p714, %r2, 0;
mov.u32 %r1488, %ntid.x;
@%p714 bra $L__BB6_606;
ld.param.u64 %rd362, [ j2k_htj2k_packetize_cleanup_param_0];
ld.param.u64 %rd361, [ j2k_htj2k_packetize_cleanup_param_3];
ld.shared.u32 %r1489, [_ZZ32 j2k_htj2k_packetize_cleanupE17shared_header_len];
mov.u32 %r3486, %tid.x;
cvta.to.global.u64 %rd50, %rd361;
cvta.to.global.u64 %rd53, %rd60;
cvta.to.global.u64 %rd354, %rd362;
$L__BB6_597:
add.s32 %r2622, %r3486, %r1489;
cvt.u64.u32 %rd349, %r2622;
add.s64 %rd51, %rd4, %rd349;
mov.u32 %r3494, 0;
mov.u32 %r3491, %r3486;
$L__BB6_598:
add.s32 %r2623, %r3494, %r1;
mul.wide.u32 %rd350, %r2623, 16;
add.s64 %rd52, %rd50, %rd350;
ld.global.u32 %r1494, [%rd52+4];
setp.eq.s32 %p715, %r1494, 0;
@%p715 bra $L__BB6_604;
ld.global.u32 %r3489, [%rd52];
mov.u32 %r3490, 0;
$L__BB6_600:
cvt.u64.u32 %rd54, %r3489;
mul.wide.u32 %rd351, %r3489, 36;
add.s64 %rd352, %rd53, %rd351;
add.s64 %rd55, %rd352, 4;
ld.global.u32 %r2625, [%rd352+16];
setp.eq.s32 %p716, %r2625, 0;
ld.global.u32 %r1499, [%rd352+4];
setp.eq.s32 %p717, %r1499, 0;
or.pred %p718, %p717, %p716;
@%p718 bra $L__BB6_603;
setp.lt.u32 %p719, %r3491, %r1499;
@%p719 bra $L__BB6_609;
sub.s32 %r3491, %r3491, %r1499;
$L__BB6_603:
cvt.u32.u64 %r2628, %rd54;
add.s32 %r3489, %r2628, 1;
add.s32 %r3490, %r3490, 1;
setp.lt.u32 %p720, %r3490, %r1494;
@%p720 bra $L__BB6_600;
bra.uni $L__BB6_604;
$L__BB6_609:
ld.global.u32 %r2626, [%rd55+-4];
add.s32 %r2627, %r2626, %r3491;
cvt.u64.u32 %rd353, %r2627;
add.s64 %rd355, %rd354, %rd353;
ld.global.u8 %rs136, [%rd355];
st.global.u8 [%rd51], %rs136;
mov.u32 %r3494, %r2;
$L__BB6_604:
add.s32 %r3494, %r3494, 1;
setp.lt.u32 %p721, %r3494, %r2;
@%p721 bra $L__BB6_598;
add.s32 %r3486, %r3486, %r1488;
setp.lt.u32 %p722, %r3486, %r1487;
@%p722 bra $L__BB6_597;
bra.uni $L__BB6_608;
$L__BB6_606:
mov.u32 %r3495, %tid.x;
$L__BB6_607:
add.s32 %r3495, %r3495, %r1488;
setp.lt.u32 %p723, %r3495, %r1487;
@%p723 bra $L__BB6_607;
$L__BB6_608:
ret;
}