graphics 0.5.12

// Screen-space ambient occlusion (SSAO).
//
// Reads the 1-sample depth buffer written by the geometry prepass, reconstructs
// world-space positions and normals per-pixel, then samples a fixed 16-point
// hemisphere kernel (with per-pixel random rotation to reduce banding) to
// estimate how much of the hemisphere is occluded by nearby geometry.
//
// Output: grey vec4 where 1.0 = unoccluded, 0.0 = fully occluded.
// Applied to the scene via a multiplicative blend: scene_color * ao.

struct SsaoUniforms {
    // Combined projection * view matrix, for projecting hemisphere samples back to screen.
    proj_view:     mat4x4<f32>,
    // Inverse of proj_view, for reconstructing world-space position from depth + UV.
    proj_view_inv: mat4x4<f32>,
    // World-space camera position (to orient normals toward the viewer).
    cam_pos:       vec4<f32>,
    near:          f32,
    far:           f32,
    // World-space hemisphere sample radius.
    radius:        f32,
    // Small depth bias to prevent surfaces from self-occluding.
    bias:          f32,
    // Output strength multiplier: higher → darker crevices.
    strength:      f32,
    _pad0:         f32,
    _pad1:         f32,
    _pad2:         f32,
}

@group(0) @binding(0) var depth_tex: texture_depth_2d;
@group(0) @binding(1) var<uniform> su: SsaoUniforms;

struct VOut {
    @builtin(position) pos: vec4<f32>,
}

// Full-screen triangle: no vertex buffer needed.
@vertex
fn vs_ssao(@builtin(vertex_index) vi: u32) -> VOut {
    var positions = array<vec2<f32>, 3>(
        vec2<f32>(-1., -1.),
        vec2<f32>( 3., -1.),
        vec2<f32>(-1.,  3.),
    );
    return VOut(vec4<f32>(positions[vi], 0., 1.));
}

fn load_depth(px: vec2<i32>, dims: vec2<i32>) -> f32 {
    let c = clamp(px, vec2<i32>(0), dims - 1);
    return textureLoad(depth_tex, c, 0);
}

// Non-linear perspective depth (0..1) → linear view-space distance.
fn linearize(d: f32) -> f32 {
    return su.near * su.far / (su.far - d * (su.far - su.near));
}

// Reconstruct world-space position from a screen UV (0..1) and raw depth value.
fn world_from_depth(uv: vec2<f32>, depth: f32) -> vec3<f32> {
    // wgpu NDC: x ∈ [-1,1] (left→right), y ∈ [-1,1] (bottom→top), depth ∈ [0,1].
    // Screen UV: (0,0) = top-left, so y must be flipped.
    let ndc = vec4<f32>(uv.x * 2.0 - 1.0, 1.0 - uv.y * 2.0, depth, 1.0);
    let world_h = su.proj_view_inv * ndc;
    return world_h.xyz / world_h.w;
}

// Simple hash for pseudo-random per-pixel rotation (reduces banding).
fn hash21(p: vec2<f32>) -> f32 {
    return fract(sin(dot(p, vec2<f32>(127.1, 311.7))) * 43758.5453);
}

// Rotate 2-D vector by angle (radians).
fn rot2(v: vec2<f32>, a: f32) -> vec2<f32> {
    let s = sin(a);
    let c = cos(a);
    return vec2<f32>(c * v.x - s * v.y, s * v.x + c * v.y);
}

@fragment
fn fs_ssao(@builtin(position) frag_pos: vec4<f32>) -> @location(0) vec4<f32> {
    let dims  = vec2<i32>(textureDimensions(depth_tex));
    let fdims = vec2<f32>(f32(dims.x), f32(dims.y));
    let px    = vec2<i32>(i32(frag_pos.x), i32(frag_pos.y));
    let uv    = frag_pos.xy / fdims;

    let depth0 = load_depth(px, dims);
    // Sky / background: nothing to occlude, return unoccluded white.
    if depth0 >= 1.0 { return vec4<f32>(1., 1., 1., 1.); }

    let pos0 = world_from_depth(uv, depth0);

    // ── Normal reconstruction from neighbouring depth samples ────────────────
    // Pick the neighbour that is closest in depth on each axis (avoids artefacts
    // at depth discontinuities such as silhouettes).
    let dx = vec2<i32>(1, 0);
    let dy = vec2<i32>(0, 1);

    let depth_r = load_depth(px + dx, dims);
    let depth_l = load_depth(px - dx, dims);
    let depth_u = load_depth(px + dy, dims);
    let depth_d = load_depth(px - dy, dims);

    let use_right = abs(depth_r - depth0) < abs(depth_l - depth0);
    let use_down  = abs(depth_u - depth0) < abs(depth_d - depth0);

    let h_uv    = select((frag_pos.xy + vec2(-1., 0.)) / fdims,
                         (frag_pos.xy + vec2( 1., 0.)) / fdims, use_right);
    let h_depth = select(depth_l, depth_r, use_right);
    let h_sign  = select(-1.0, 1.0, use_right);

    let v_uv    = select((frag_pos.xy + vec2(0., -1.)) / fdims,
                         (frag_pos.xy + vec2( 0., 1.)) / fdims, use_down);
    let v_depth = select(depth_d, depth_u, use_down);
    let v_sign  = select(-1.0, 1.0, use_down);

    let tang  = (world_from_depth(h_uv, h_depth) - pos0) * h_sign;
    let btng  = (world_from_depth(v_uv, v_depth) - pos0) * v_sign;
    var normal = normalize(cross(tang, btng));

    // Ensure normal faces the camera.
    if dot(normal, pos0 - su.cam_pos.xyz) > 0.0 { normal = -normal; }

    // ── Orthonormal TBN frame for rotating the hemisphere ────────────────────
    var tangent: vec3<f32>;
    if abs(normal.y) < 0.9 {
        tangent = normalize(cross(normal, vec3<f32>(0., 1., 0.)));
    } else {
        tangent = normalize(cross(normal, vec3<f32>(1., 0., 0.)));
    }
    let bitangent = cross(normal, tangent);

    // Per-pixel random rotation angle to break up the fixed kernel pattern.
    let rot_angle = hash21(frag_pos.xy) * 6.283185;

    let linear0 = linearize(depth0);

    // ── Fixed 16-point hemisphere kernel (z > 0 → away from surface) ─────────
    // Samples are intentionally non-uniform in distance (closer ones carry more
    // weight for small crevices; farther ones catch larger occluders).
    var kernel = array<vec3<f32>, 16>(
        vec3<f32>( 0.53813,  0.18508,  0.19261),
        vec3<f32>( 0.13488, -0.87838,  0.40077),
        vec3<f32>( 0.35758, -0.38415,  0.28973),
        vec3<f32>(-0.22072,  0.12715,  0.11035),
        vec3<f32>(-0.26987,  0.53448,  0.32635),
        vec3<f32>(-0.07967,  0.04402,  0.26865),
        vec3<f32>(-0.09560, -0.30776,  0.24669),
        vec3<f32>( 0.16201,  0.11422,  0.29748),
        vec3<f32>(-0.38296,  0.56558,  0.46939),
        vec3<f32>(-0.10660, -0.64196,  0.44051),
        vec3<f32>( 0.01006,  0.09939,  0.18907),
        vec3<f32>( 0.09448,  0.60592,  0.58505),
        vec3<f32>( 0.55916,  0.67533,  0.08070),
        vec3<f32>(-0.18608,  0.16559,  0.07534),
        vec3<f32>( 0.14520, -0.39572,  0.08600),
        vec3<f32>(-0.32639,  0.26070,  0.09231),
    );

    // Accelerating sample distance: i/16 lerped into i²/16² keeps short-range
    // samples dense (better for tight crevices) without losing coverage.
    var scale_sq = array<f32, 16>(
        0.0039, 0.0156, 0.0352, 0.0625,
        0.0977, 0.1406, 0.1914, 0.2500,
        0.3164, 0.3906, 0.4727, 0.5625,
        0.6602, 0.7656, 0.8789, 1.0000,
    );

    var occlusion = 0.0;
    let num_samples = 16;

    for (var i = 0; i < num_samples; i++) {
        var s = kernel[i];

        // Rotate the in-plane (xy) components by the per-pixel random angle.
        let rxy = rot2(s.xy, rot_angle);
        s = vec3<f32>(rxy, s.z);

        // Transform from TBN to world space.
        let world_dir = tangent * s.x + bitangent * s.y + normal * s.z;

        // Accelerated radial scale: samples cluster near the surface.
        let scale = mix(0.1, 1.0, scale_sq[i]);
        let sample_world = pos0 + world_dir * su.radius * scale;

        // Project sample point into clip space.
        let clip = su.proj_view * vec4<f32>(sample_world, 1.0);
        if clip.w <= 0.0 { continue; }
        let ndc_xy        = clip.xy / clip.w;
        let sample_depth  = clip.z  / clip.w;   // depth in [0,1] for LH proj

        // Convert NDC to screen UV (y flipped: NDC +1 = top = UV 0).
        let sample_uv = ndc_xy * vec2<f32>(0.5, -0.5) + 0.5;

        // Discard out-of-screen and behind-near-plane samples.
        if any(sample_uv < vec2<f32>(0.0)) || any(sample_uv > vec2<f32>(1.0)) { continue; }
        if sample_depth < 0.0 || sample_depth > 1.0 { continue; }

        // Fetch geometry depth at the projected location.
        let sample_px  = vec2<i32>(i32(sample_uv.x * fdims.x), i32(sample_uv.y * fdims.y));
        let geom_depth = load_depth(sample_px, dims);

        // Range check: suppress contributions from surfaces far away in depth
        // (avoids halos at depth discontinuities).
        let linear_geom   = linearize(geom_depth);
        let linear_sample = linearize(sample_depth);
        let range_check = smoothstep(0.0, 1.0, su.radius / abs(linear0 - linear_geom));

        // Occluded when geometry is closer to the camera than the sample point.
        if geom_depth < sample_depth - su.bias {
            occlusion += range_check;
        }
    }

    occlusion /= f32(num_samples);

    // ao = 1 (no occlusion) → multiply scene by 1 (no change).
    // ao = 0 (full occlusion) → multiply scene by 0 (black).
    let ao = 1.0 - clamp(occlusion * su.strength, 0.0, 1.0);
    return vec4<f32>(ao, ao, ao, 1.0);
}