//
// Generated by NVIDIA NVVM Compiler
//
// Compiler Build ID: CL-29745058
// Cuda compilation tools, release 11.3, V11.3.58
// Based on NVVM 7.0.1
//
.version 7.3
.target sm_52
.address_size 64
// .globl sin_kernel
.global .align 4 .b8 __cudart_i2opi_f[24] = {65, 144, 67, 60, 153, 149, 98, 219, 192, 221, 52, 245, 209, 87, 39, 252, 41, 21, 68, 78, 110, 131, 249, 162};
.visible .entry sin_kernel(
.param .u64 sin_kernel_param_0,
.param .u64 sin_kernel_param_1,
.param .u32 sin_kernel_param_2
)
{
.local .align 4 .b8 __local_depot0[28];
.reg .b64 %SP;
.reg .b64 %SPL;
.reg .pred %p<12>;
.reg .f32 %f<38>;
.reg .b32 %r<53>;
.reg .f64 %fd<3>;
.reg .b64 %rd<33>;
mov.u64 %SPL, __local_depot0;
ld.param.u64 %rd10, [sin_kernel_param_0];
ld.param.u64 %rd11, [sin_kernel_param_1];
ld.param.u32 %r19, [sin_kernel_param_2];
add.u64 %rd1, %SPL, 0;
mov.u32 %r20, %ntid.x;
mov.u32 %r21, %ctaid.x;
mov.u32 %r22, %tid.x;
mad.lo.s32 %r1, %r21, %r20, %r22;
setp.ge.s32 %p1, %r1, %r19;
@%p1 bra $L__BB0_14;
cvta.to.global.u64 %rd13, %rd11;
cvt.s64.s32 %rd2, %r1;
mul.wide.s32 %rd14, %r1, 4;
add.s64 %rd15, %rd13, %rd14;
ld.global.f32 %f1, [%rd15];
mul.f32 %f14, %f1, 0f3F22F983;
cvt.rni.s32.f32 %r52, %f14;
cvt.rn.f32.s32 %f15, %r52;
mov.f32 %f16, 0fBFC90FDA;
fma.rn.f32 %f17, %f15, %f16, %f1;
mov.f32 %f18, 0fB3A22168;
fma.rn.f32 %f19, %f15, %f18, %f17;
mov.f32 %f20, 0fA7C234C5;
fma.rn.f32 %f35, %f15, %f20, %f19;
abs.f32 %f3, %f1;
setp.leu.f32 %p2, %f3, 0f47CE4780;
@%p2 bra $L__BB0_9;
setp.eq.f32 %p3, %f3, 0f7F800000;
@%p3 bra $L__BB0_8;
bra.uni $L__BB0_3;
$L__BB0_8:
mov.f32 %f23, 0f00000000;
mul.rn.f32 %f35, %f1, %f23;
bra.uni $L__BB0_9;
$L__BB0_3:
mov.b32 %r3, %f1;
bfe.u32 %r24, %r3, 23, 8;
add.s32 %r4, %r24, -128;
shl.b32 %r25, %r3, 8;
or.b32 %r5, %r25, -2147483648;
shr.u32 %r6, %r4, 5;
mov.u64 %rd32, 0;
mov.u32 %r49, 0;
mov.u64 %rd30, __cudart_i2opi_f;
mov.u64 %rd31, %rd1;
$L__BB0_4:
.pragma "nounroll";
ld.global.nc.u32 %r26, [%rd30];
mad.wide.u32 %rd18, %r26, %r5, %rd32;
shr.u64 %rd32, %rd18, 32;
st.local.u32 [%rd31], %rd18;
add.s64 %rd31, %rd31, 4;
add.s64 %rd30, %rd30, 4;
add.s32 %r49, %r49, 1;
setp.ne.s32 %p4, %r49, 6;
@%p4 bra $L__BB0_4;
st.local.u32 [%rd1+24], %rd32;
cvt.u64.u32 %rd19, %r6;
mov.u64 %rd20, 2;
sub.s64 %rd21, %rd20, %rd19;
shl.b64 %rd22, %rd21, 2;
add.s64 %rd23, %rd1, %rd22;
add.s64 %rd9, %rd23, 16;
ld.local.u32 %r50, [%rd23+16];
ld.local.u32 %r51, [%rd23+12];
and.b32 %r11, %r4, 31;
setp.eq.s32 %p5, %r11, 0;
@%p5 bra $L__BB0_7;
mov.u32 %r27, 32;
sub.s32 %r28, %r27, %r11;
shr.u32 %r29, %r51, %r28;
shl.b32 %r30, %r50, %r11;
add.s32 %r50, %r29, %r30;
ld.local.u32 %r31, [%rd9+-8];
shr.u32 %r32, %r31, %r28;
shl.b32 %r33, %r51, %r11;
add.s32 %r51, %r32, %r33;
$L__BB0_7:
and.b32 %r34, %r3, -2147483648;
shr.u32 %r35, %r51, 30;
shl.b32 %r36, %r50, 2;
or.b32 %r37, %r35, %r36;
shr.u32 %r38, %r37, 31;
shr.u32 %r39, %r50, 30;
add.s32 %r40, %r38, %r39;
neg.s32 %r41, %r40;
setp.eq.s32 %p6, %r34, 0;
selp.b32 %r52, %r40, %r41, %p6;
setp.ne.s32 %p7, %r38, 0;
xor.b32 %r42, %r34, -2147483648;
selp.b32 %r43, %r42, %r34, %p7;
selp.b32 %r44, -1, 0, %p7;
xor.b32 %r45, %r37, %r44;
shl.b32 %r46, %r51, 2;
xor.b32 %r47, %r46, %r44;
cvt.u64.u32 %rd24, %r45;
cvt.u64.u32 %rd25, %r47;
bfi.b64 %rd26, %rd24, %rd25, 32, 32;
cvt.rn.f64.s64 %fd1, %rd26;
mul.f64 %fd2, %fd1, 0d3BF921FB54442D19;
cvt.rn.f32.f64 %f21, %fd2;
setp.eq.s32 %p8, %r43, 0;
neg.f32 %f22, %f21;
selp.f32 %f35, %f21, %f22, %p8;
$L__BB0_9:
and.b32 %r18, %r52, 1;
setp.eq.s32 %p9, %r18, 0;
selp.f32 %f7, %f35, 0f3F800000, %p9;
mul.rn.f32 %f8, %f35, %f35;
mov.f32 %f36, 0fB94D4153;
@%p9 bra $L__BB0_11;
mov.f32 %f25, 0fBAB607ED;
mov.f32 %f26, 0f37CBAC00;
fma.rn.f32 %f36, %f26, %f8, %f25;
$L__BB0_11:
selp.f32 %f27, 0f3C0885E4, 0f3D2AAABB, %p9;
fma.rn.f32 %f28, %f36, %f8, %f27;
selp.f32 %f29, 0fBE2AAAA8, 0fBEFFFFFF, %p9;
fma.rn.f32 %f30, %f28, %f8, %f29;
mov.f32 %f31, 0f00000000;
fma.rn.f32 %f32, %f8, %f7, %f31;
fma.rn.f32 %f37, %f30, %f32, %f7;
and.b32 %r48, %r52, 2;
setp.eq.s32 %p11, %r48, 0;
@%p11 bra $L__BB0_13;
mov.f32 %f34, 0fBF800000;
fma.rn.f32 %f37, %f37, %f34, %f31;
$L__BB0_13:
cvta.to.global.u64 %rd27, %rd10;
shl.b64 %rd28, %rd2, 2;
add.s64 %rd29, %rd27, %rd28;
st.global.f32 [%rd29], %f37;
$L__BB0_14:
ret;
}