image-transitions 0.1.3

A library to create image transitions on GPU
Documentation
//
// Generated by NVIDIA NVVM Compiler
//
// Compiler Build ID: CL-31294372
// Cuda compilation tools, release 11.7, V11.7.64
// Based on NVVM 7.0.1
//

.version 7.7
.target sm_61
.address_size 64

	// .globl	cross_fade

.visible .entry cross_fade(
	.param .u64 cross_fade_param_0,
	.param .u64 cross_fade_param_1,
	.param .u64 cross_fade_param_2,
	.param .u64 cross_fade_param_3,
	.param .u16 cross_fade_param_4,
	.param .u64 cross_fade_param_5,
	.param .u64 cross_fade_param_6,
	.param .u64 cross_fade_param_7
)
{
	.reg .pred 	%p<9>;
	.reg .b16 	%rs<9>;
	.reg .b32 	%r<8>;
	.reg .f64 	%fd<12>;
	.reg .b64 	%rd<31>;


	ld.param.u64 	%rd13, [cross_fade_param_0];
	ld.param.u64 	%rd14, [cross_fade_param_1];
	ld.param.u64 	%rd15, [cross_fade_param_2];
	ld.param.u64 	%rd16, [cross_fade_param_3];
	ld.param.u16 	%rs1, [cross_fade_param_4];
	ld.param.u64 	%rd17, [cross_fade_param_5];
	ld.param.u64 	%rd18, [cross_fade_param_6];
	ld.param.u64 	%rd19, [cross_fade_param_7];
	setp.eq.s64 	%p1, %rd18, 0;
	@%p1 bra 	$L__BB0_12;

	cvt.u64.u16 	%rd20, %rs1;
	mul.lo.s64 	%rd1, %rd20, %rd14;
	mov.u32 	%r1, %tid.x;
	mov.u32 	%r2, %ctaid.x;
	mov.u32 	%r3, %ntid.x;
	mad.lo.s32 	%r4, %r3, %r2, %r1;
	cvt.u64.u32 	%rd2, %r4;
	setp.eq.s64 	%p2, %rd14, 0;
	@%p2 bra 	$L__BB0_10;

	cvta.to.global.u64 	%rd3, %rd19;
	cvta.to.global.u64 	%rd4, %rd15;
	cvta.to.global.u64 	%rd5, %rd13;
	add.s16 	%rs2, %rs1, -1;
	cvt.rn.f64.u16 	%fd1, %rs2;
	mov.u64 	%rd29, 0;
	cvt.u32.u64 	%r5, %rd14;
	mov.f64 	%fd5, 0d3FF0000000000000;

$L__BB0_3:
	add.s64 	%rd7, %rd29, 1;
	mul.lo.s64 	%rd22, %rd29, %rd17;
	add.s64 	%rd8, %rd22, %rd2;
	setp.ge.u64 	%p3, %rd8, %rd1;
	@%p3 bra 	$L__BB0_12;

	or.b64  	%rd23, %rd8, %rd14;
	and.b64  	%rd24, %rd23, -4294967296;
	setp.eq.s64 	%p4, %rd24, 0;
	@%p4 bra 	$L__BB0_6;

	div.u64 	%rd30, %rd8, %rd14;
	bra.uni 	$L__BB0_7;

$L__BB0_6:
	cvt.u32.u64 	%r6, %rd8;
	div.u32 	%r7, %r6, %r5;
	cvt.u64.u32 	%rd30, %r7;

$L__BB0_7:
	mul.lo.s64 	%rd25, %rd30, %rd14;
	sub.s64 	%rd12, %rd8, %rd25;
	setp.lt.u64 	%p5, %rd12, %rd16;
	@%p5 bra 	$L__BB0_9;
	bra.uni 	$L__BB0_8;

$L__BB0_9:
	cvt.rn.f64.u64 	%fd2, %rd30;
	div.rn.f64 	%fd3, %fd2, %fd1;
	add.s64 	%rd26, %rd5, %rd12;
	ld.global.nc.u8 	%rs3, [%rd26];
	cvt.rn.f64.u16 	%fd4, %rs3;
	sub.f64 	%fd6, %fd5, %fd3;
	add.s64 	%rd27, %rd4, %rd12;
	ld.global.nc.u8 	%rs5, [%rd27];
	cvt.rn.f64.u16 	%fd7, %rs5;
	mul.f64 	%fd8, %fd3, %fd7;
	fma.rn.f64 	%fd9, %fd6, %fd4, %fd8;
	cvt.rmi.f64.f64 	%fd10, %fd9;
	setp.gt.f64 	%p6, %fd10, 0d406FE00000000000;
	max.f64 	%fd11, %fd10, 0d0000000000000000;
	cvt.rzi.u16.f64 	%rs7, %fd11;
	selp.b16 	%rs8, -1, %rs7, %p6;
	add.s64 	%rd28, %rd3, %rd8;
	st.global.u8 	[%rd28], %rs8;
	setp.lt.u64 	%p7, %rd7, %rd18;
	mov.u64 	%rd29, %rd7;
	@%p7 bra 	$L__BB0_3;

$L__BB0_12:
	ret;

$L__BB0_10:
	setp.le.u64 	%p8, %rd1, %rd2;
	@%p8 bra 	$L__BB0_12;

	trap;

$L__BB0_8:
	trap;

}