#version 450
layout(local_size_x = 16, local_size_y = 8, local_size_z = 1) in;
layout(set = 0, binding = 0) uniform sampler2D in_depth;
layout(set = 0, binding = 1, rg16f) writeonly uniform image2D reduced;
shared lowp float local_tile_min[gl_WorkGroupSize.x*gl_WorkGroupSize.y*2];
shared lowp float local_tile_max[gl_WorkGroupSize.x*gl_WorkGroupSize.y*2];
void main()
{
//load into group memory
local_tile_min[gl_LocalInvocationIndex] = min(texelFetch(in_depth, ivec2(ivec2(1, 2)*gl_GlobalInvocationID.xy), 0).x,
texelFetch(in_depth, ivec2(ivec2(1, 2)*gl_GlobalInvocationID.xy+ivec2(8, 0)), 0).x);
local_tile_max[gl_LocalInvocationIndex] = max(texelFetch(in_depth, ivec2(ivec2(1, 2)*gl_GlobalInvocationID.xy), 0).x,
texelFetch(in_depth, ivec2(ivec2(1, 2)*gl_GlobalInvocationID.xy+ivec2(8, 0)), 0).x);
groupMemoryBarrier();
barrier();
for(int i = 64; i >= 1; i /= 2) {
if(gl_LocalInvocationIndex < i) {
local_tile_min[gl_LocalInvocationIndex] = min(local_tile_min[gl_LocalInvocationIndex],
local_tile_min[gl_LocalInvocationIndex+i]);
local_tile_max[gl_LocalInvocationIndex] = max(local_tile_max[gl_LocalInvocationIndex],
local_tile_max[gl_LocalInvocationIndex+i]);
groupMemoryBarrier();
barrier();
}
}
if(gl_LocalInvocationIndex == 0)
{
imageStore(reduced, ivec2(gl_WorkGroupID.xy), vec2(local_tile_min[0], local_tile_max[0]).xyxx);
}
}