ptoxide 0.1.0

A virtual machine to execute CUDA PTX without a GPU
Documentation
//
// Generated by NVIDIA NVVM Compiler
//
// Compiler Build ID: CL-33281558
// Cuda compilation tools, release 12.3, V12.3.52
// Based on NVVM 7.0.1
//

.version 8.3
.target sm_89
.address_size 64

	// .globl	_Z10add_simplePfS_S_

.visible .entry _Z10add_simplePfS_S_(
	.param .u64 _Z10add_simplePfS_S__param_0,
	.param .u64 _Z10add_simplePfS_S__param_1,
	.param .u64 _Z10add_simplePfS_S__param_2
)
{
	.reg .f32 	%f<4>;
	.reg .b32 	%r<2>;
	.reg .b64 	%rd<11>;


	ld.param.u64 	%rd1, [_Z10add_simplePfS_S__param_0];
	ld.param.u64 	%rd2, [_Z10add_simplePfS_S__param_1];
	ld.param.u64 	%rd3, [_Z10add_simplePfS_S__param_2];
	cvta.to.global.u64 	%rd4, %rd3;
	cvta.to.global.u64 	%rd5, %rd2;
	cvta.to.global.u64 	%rd6, %rd1;
	mov.u32 	%r1, %tid.x;
	mul.wide.u32 	%rd7, %r1, 4;
	add.s64 	%rd8, %rd6, %rd7;
	ld.global.f32 	%f1, [%rd8];
	add.s64 	%rd9, %rd5, %rd7;
	ld.global.f32 	%f2, [%rd9];
	add.f32 	%f3, %f1, %f2;
	add.s64 	%rd10, %rd4, %rd7;
	st.global.f32 	[%rd10], %f3;
	ret;

}