llama-cpp-sys-4 0.2.45

#pragma OPENCL EXTENSION cl_khr_fp16 : enable

//------------------------------------------------------------------------------
// tri
//------------------------------------------------------------------------------
__kernel void kernel_tri_f32(
        global float * src0,
        ulong offset0,
        global float * dst,
        ulong offsetd,
        int n,
        int ne0,
        int ne1,
        int tri_type
) {
    src0 = (global float*)((global char*)src0 + offset0);
    dst = (global float*)((global char*)dst + offsetd);

    int idx = get_global_id(0);
    if (idx >= n) return;

    int i0 = idx % ne0;
    int i1 = (idx / ne0) % ne1;

    int keep = 0;
    if (tri_type == 0) keep = (i0 >= i1);
    else if (tri_type == 1) keep = (i0 >  i1);
    else if (tri_type == 2) keep = (i0 <= i1);
    else                    keep = (i0 <  i1);

    dst[idx] = keep ? src0[idx] : 0.0f;
}