slosh2d 0.4.1

Cross-platform GPU 2D Material Point Method implementation.
@binding(0) @group(0) var<storage, read_write> entryPointParams_data_0 : array<u32>;

@binding(1) @group(0) var<storage, read_write> entryPointParams_aux_0 : array<u32>;

@binding(2) @group(0) var<storage, read_write> entryPointParams_data_1 : array<u32>;

@binding(3) @group(0) var<storage, read_write> entryPointParams_aux_1 : array<u32>;

fn next_power_of_two_0( val_0 : u32) -> u32
{
    var v_0 : u32 = val_0 - u32(1);
    var v_1 : u32 = (v_0 | (((v_0 >> (u32(1))))));
    var v_2 : u32 = (v_1 | (((v_1 >> (u32(2))))));
    var v_3 : u32 = (v_2 | (((v_2 >> (u32(4))))));
    var v_4 : u32 = (v_3 | (((v_3 >> (u32(8))))));
    return ((v_4 | (((v_4 >> (u32(16))))))) + u32(1);
}

var<workgroup> workspace_0 : array<u32, i32(256)>;

fn getCount_0() -> i32
{
    var _S1 : vec2<u32> = vec2<u32>(arrayLength(&entryPointParams_data_0), 4);
    return i32(_S1.x);
}

@compute
@workgroup_size(256, 1, 1)
fn prefix_sum(@builtin(workgroup_id) block_id_0 : vec3<u32>, @builtin(local_invocation_id) thread_id_0 : vec3<u32>)
{
    var _S2 : u32 = block_id_0.x;
    var _S3 : u32 = thread_id_0.x;
    var _S4 : i32 = getCount_0();
    var _S5 : u32 = _S2 * u32(256);
    var _S6 : u32 = u32(_S4);
    if(_S5 >= _S6)
    {
        return;
    }
    var _S7 : u32 = clamp(next_power_of_two_0(_S6 - _S5), u32(1), u32(256));
    var _S8 : u32 = _S3 + _S5;
    var _S9 : bool = _S8 < _S6;
    if(_S9)
    {
        workspace_0[_S3] = entryPointParams_data_0[_S8];
    }
    else
    {
        workspace_0[_S3] = u32(0);
    }
    var d_0 : u32 = _S7 / u32(2);
    var offset_0 : u32 = u32(1);
    for(;;)
    {
        if(d_0 > u32(0))
        {
        }
        else
        {
            break;
        }
        workgroupBarrier();
        if(_S3 < d_0)
        {
            var _S10 : u32 = _S3 * u32(2);
            workspace_0[(_S10 + u32(1)) * offset_0 + offset_0 - u32(1)] = workspace_0[_S10 * offset_0 + offset_0 - u32(1)] + workspace_0[(_S10 + u32(1)) * offset_0 + offset_0 - u32(1)];
        }
        var offset_1 : u32 = offset_0 * u32(2);
        d_0 = d_0 / u32(2);
        offset_0 = offset_1;
    }
    if(_S3 == u32(0))
    {
        entryPointParams_aux_0[_S2] = workspace_0[_S7 - u32(1)];
        workspace_0[_S7 - u32(1)] = u32(0);
    }
    var _S11 : u32 = _S7 / u32(2);
    d_0 = u32(1);
    offset_0 = _S11;
    for(;;)
    {
        if(d_0 < _S7)
        {
        }
        else
        {
            break;
        }
        workgroupBarrier();
        if(_S3 < d_0)
        {
            var _S12 : u32 = _S3 * u32(2);
            var _S13 : u32 = workspace_0[_S12 * offset_0 + offset_0 - u32(1)];
            var _S14 : u32 = workspace_0[(_S12 + u32(1)) * offset_0 + offset_0 - u32(1)];
            workspace_0[_S12 * offset_0 + offset_0 - u32(1)] = workspace_0[(_S12 + u32(1)) * offset_0 + offset_0 - u32(1)];
            workspace_0[(_S12 + u32(1)) * offset_0 + offset_0 - u32(1)] = _S13 + _S14;
        }
        var offset_2 : u32 = offset_0 / u32(2);
        d_0 = d_0 * u32(2);
        offset_0 = offset_2;
    }
    workgroupBarrier();
    if(_S9)
    {
        entryPointParams_data_0[_S8] = workspace_0[_S3];
    }
    return;
}

fn getCount_1() -> i32
{
    var _S15 : vec2<u32> = vec2<u32>(arrayLength(&entryPointParams_data_1), 4);
    return i32(_S15.x);
}

@compute
@workgroup_size(256, 1, 1)
fn add_data_grp(@builtin(workgroup_id) block_id_1 : vec3<u32>, @builtin(global_invocation_id) thread_id_1 : vec3<u32>)
{
    var _S16 : u32 = thread_id_1.x;
    var _S17 : u32 = block_id_1.x;
    var _S18 : i32 = getCount_1();
    if(_S16 < u32(_S18))
    {
        entryPointParams_data_1[_S16] = entryPointParams_data_1[_S16] + entryPointParams_aux_1[_S17];
    }
    return;
}