#pragma OPENCL EXTENSION cl_khr_fp16 : enable
//------------------------------------------------------------------------------
// tri
//------------------------------------------------------------------------------
__kernel void kernel_tri_f32(
global float * src0,
ulong offset0,
global float * dst,
ulong offsetd,
int n,
int ne0,
int ne1,
int tri_type
) {
src0 = (global float*)((global char*)src0 + offset0) dst = (global float*)((global char*)dst + offsetd)
int idx = get_global_id(0) if (idx >= n) return
int i0 = idx % ne0 int i1 = (idx / ne0) % ne1
int keep = 0 if (tri_type == 0) keep = (i0 >= i1) else if (tri_type == 1) keep = (i0 > i1) else if (tri_type == 2) keep = (i0 <= i1) else keep = (i0 < i1)
dst[idx] = keep ? src0[idx] : 0.0f}