cudarc 0.9.9

Safe wrappers around CUDA apis
Documentation
//
// Generated by NVIDIA NVVM Compiler
//
// Compiler Build ID: CL-29745058
// Cuda compilation tools, release 11.3, V11.3.58
// Based on NVVM 7.0.1
//

.version 7.3
.target sm_52
.address_size 64

	// .globl	sin_kernel
.global .align 4 .b8 __cudart_i2opi_f[24] = {65, 144, 67, 60, 153, 149, 98, 219, 192, 221, 52, 245, 209, 87, 39, 252, 41, 21, 68, 78, 110, 131, 249, 162};

.visible .entry sin_kernel(
	.param .u64 sin_kernel_param_0,
	.param .u64 sin_kernel_param_1,
	.param .u32 sin_kernel_param_2
)
{
	.local .align 4 .b8 	__local_depot0[28];
	.reg .b64 	%SP;
	.reg .b64 	%SPL;
	.reg .pred 	%p<12>;
	.reg .f32 	%f<38>;
	.reg .b32 	%r<53>;
	.reg .f64 	%fd<3>;
	.reg .b64 	%rd<33>;


	mov.u64 	%SPL, __local_depot0;
	ld.param.u64 	%rd10, [sin_kernel_param_0];
	ld.param.u64 	%rd11, [sin_kernel_param_1];
	ld.param.u32 	%r19, [sin_kernel_param_2];
	add.u64 	%rd1, %SPL, 0;
	mov.u32 	%r20, %ntid.x;
	mov.u32 	%r21, %ctaid.x;
	mov.u32 	%r22, %tid.x;
	mad.lo.s32 	%r1, %r21, %r20, %r22;
	setp.ge.s32 	%p1, %r1, %r19;
	@%p1 bra 	$L__BB0_14;

	cvta.to.global.u64 	%rd13, %rd11;
	cvt.s64.s32 	%rd2, %r1;
	mul.wide.s32 	%rd14, %r1, 4;
	add.s64 	%rd15, %rd13, %rd14;
	ld.global.f32 	%f1, [%rd15];
	mul.f32 	%f14, %f1, 0f3F22F983;
	cvt.rni.s32.f32 	%r52, %f14;
	cvt.rn.f32.s32 	%f15, %r52;
	mov.f32 	%f16, 0fBFC90FDA;
	fma.rn.f32 	%f17, %f15, %f16, %f1;
	mov.f32 	%f18, 0fB3A22168;
	fma.rn.f32 	%f19, %f15, %f18, %f17;
	mov.f32 	%f20, 0fA7C234C5;
	fma.rn.f32 	%f35, %f15, %f20, %f19;
	abs.f32 	%f3, %f1;
	setp.leu.f32 	%p2, %f3, 0f47CE4780;
	@%p2 bra 	$L__BB0_9;

	setp.eq.f32 	%p3, %f3, 0f7F800000;
	@%p3 bra 	$L__BB0_8;
	bra.uni 	$L__BB0_3;

$L__BB0_8:
	mov.f32 	%f23, 0f00000000;
	mul.rn.f32 	%f35, %f1, %f23;
	bra.uni 	$L__BB0_9;

$L__BB0_3:
	mov.b32 	%r3, %f1;
	bfe.u32 	%r24, %r3, 23, 8;
	add.s32 	%r4, %r24, -128;
	shl.b32 	%r25, %r3, 8;
	or.b32  	%r5, %r25, -2147483648;
	shr.u32 	%r6, %r4, 5;
	mov.u64 	%rd32, 0;
	mov.u32 	%r49, 0;
	mov.u64 	%rd30, __cudart_i2opi_f;
	mov.u64 	%rd31, %rd1;

$L__BB0_4:
	.pragma "nounroll";
	ld.global.nc.u32 	%r26, [%rd30];
	mad.wide.u32 	%rd18, %r26, %r5, %rd32;
	shr.u64 	%rd32, %rd18, 32;
	st.local.u32 	[%rd31], %rd18;
	add.s64 	%rd31, %rd31, 4;
	add.s64 	%rd30, %rd30, 4;
	add.s32 	%r49, %r49, 1;
	setp.ne.s32 	%p4, %r49, 6;
	@%p4 bra 	$L__BB0_4;

	st.local.u32 	[%rd1+24], %rd32;
	cvt.u64.u32 	%rd19, %r6;
	mov.u64 	%rd20, 2;
	sub.s64 	%rd21, %rd20, %rd19;
	shl.b64 	%rd22, %rd21, 2;
	add.s64 	%rd23, %rd1, %rd22;
	add.s64 	%rd9, %rd23, 16;
	ld.local.u32 	%r50, [%rd23+16];
	ld.local.u32 	%r51, [%rd23+12];
	and.b32  	%r11, %r4, 31;
	setp.eq.s32 	%p5, %r11, 0;
	@%p5 bra 	$L__BB0_7;

	mov.u32 	%r27, 32;
	sub.s32 	%r28, %r27, %r11;
	shr.u32 	%r29, %r51, %r28;
	shl.b32 	%r30, %r50, %r11;
	add.s32 	%r50, %r29, %r30;
	ld.local.u32 	%r31, [%rd9+-8];
	shr.u32 	%r32, %r31, %r28;
	shl.b32 	%r33, %r51, %r11;
	add.s32 	%r51, %r32, %r33;

$L__BB0_7:
	and.b32  	%r34, %r3, -2147483648;
	shr.u32 	%r35, %r51, 30;
	shl.b32 	%r36, %r50, 2;
	or.b32  	%r37, %r35, %r36;
	shr.u32 	%r38, %r37, 31;
	shr.u32 	%r39, %r50, 30;
	add.s32 	%r40, %r38, %r39;
	neg.s32 	%r41, %r40;
	setp.eq.s32 	%p6, %r34, 0;
	selp.b32 	%r52, %r40, %r41, %p6;
	setp.ne.s32 	%p7, %r38, 0;
	xor.b32  	%r42, %r34, -2147483648;
	selp.b32 	%r43, %r42, %r34, %p7;
	selp.b32 	%r44, -1, 0, %p7;
	xor.b32  	%r45, %r37, %r44;
	shl.b32 	%r46, %r51, 2;
	xor.b32  	%r47, %r46, %r44;
	cvt.u64.u32 	%rd24, %r45;
	cvt.u64.u32 	%rd25, %r47;
	bfi.b64 	%rd26, %rd24, %rd25, 32, 32;
	cvt.rn.f64.s64 	%fd1, %rd26;
	mul.f64 	%fd2, %fd1, 0d3BF921FB54442D19;
	cvt.rn.f32.f64 	%f21, %fd2;
	setp.eq.s32 	%p8, %r43, 0;
	neg.f32 	%f22, %f21;
	selp.f32 	%f35, %f21, %f22, %p8;

$L__BB0_9:
	and.b32  	%r18, %r52, 1;
	setp.eq.s32 	%p9, %r18, 0;
	selp.f32 	%f7, %f35, 0f3F800000, %p9;
	mul.rn.f32 	%f8, %f35, %f35;
	mov.f32 	%f36, 0fB94D4153;
	@%p9 bra 	$L__BB0_11;

	mov.f32 	%f25, 0fBAB607ED;
	mov.f32 	%f26, 0f37CBAC00;
	fma.rn.f32 	%f36, %f26, %f8, %f25;

$L__BB0_11:
	selp.f32 	%f27, 0f3C0885E4, 0f3D2AAABB, %p9;
	fma.rn.f32 	%f28, %f36, %f8, %f27;
	selp.f32 	%f29, 0fBE2AAAA8, 0fBEFFFFFF, %p9;
	fma.rn.f32 	%f30, %f28, %f8, %f29;
	mov.f32 	%f31, 0f00000000;
	fma.rn.f32 	%f32, %f8, %f7, %f31;
	fma.rn.f32 	%f37, %f30, %f32, %f7;
	and.b32  	%r48, %r52, 2;
	setp.eq.s32 	%p11, %r48, 0;
	@%p11 bra 	$L__BB0_13;

	mov.f32 	%f34, 0fBF800000;
	fma.rn.f32 	%f37, %f37, %f34, %f31;

$L__BB0_13:
	cvta.to.global.u64 	%rd27, %rd10;
	shl.b64 	%rd28, %rd2, 2;
	add.s64 	%rd29, %rd27, %rd28;
	st.global.f32 	[%rd29], %f37;

$L__BB0_14:
	ret;

}