#version 450
layout(local_size_x = 32, local_size_y = 32) in;
layout(set = 0, binding = 0, rgba8) uniform readonly image2D u_src;
layout(set = 0, binding = 1, rgba8) uniform writeonly image2D u_dst;
// Clamp to edge
#define L(u) imageLoad(u_src, clamp(u, ivec2(0), ivec2(imageSize(u_src) - 1)))
// From page 220, Chapter 7.7.7 Conversion Rules for sRGBA and sBGRA Textures
// https://developer.apple.com/metal/Metal-Shading-Language-Specification.pdf
vec3 srgb_to_linear(vec3 c) {
return mix(c / 12.92, pow((c + 0.055) / 1.055, vec3(2.4)), step(0.04045, c));
}
vec3 linear_to_srgb(vec3 c) {
c = mix(c, vec3(0.0), isnan(c));
c = clamp(c, vec3(0.0), vec3(1.0));
return mix(c * 12.92, 1.055 * pow(c, vec3(1.0 / 2.4)) - 0.055,
step(0.0031308, c));
}
// The behavior I'm observing on MacOS with a GPUFamily2 v1 capable GPU
// is that the implementation performs the conversion from srgb to linear
// on load, but it's up to me to convert back to srgb before calling store
void main() {
ivec2 dst_uv = ivec2(gl_GlobalInvocationID.xy);
ivec2 src_uv = 2 * dst_uv;
vec4 l = L(src_uv + ivec2(0, 0));
vec4 r = L(src_uv + ivec2(1, 0));
vec4 u = L(src_uv + ivec2(0, 1));
vec4 d = L(src_uv + ivec2(1, 1));
vec4 c = (l + r + u + d) / 4.0;
imageStore(u_dst, dst_uv, vec4(linear_to_srgb(c.rgb), c.a));
}