// GPU.5 — multi-grid scene marcher.
//
// For each grid in 0..grid_count:
// build the grid-local ray from per_grid_camera[i]
// outer DDA over chunks (skip via chunk_occupancy)
// inner DDA over voxels (bounded to current chunk)
// on hit at world-t < best_t: update best_color + best_t
// emit best_color (or sky if no hit).
//
// All grids' chunks share one set of storage buffers; per-grid
// offsets live in `grid_static_meta`. Per-grid camera state lives
// in the `per_grid_camera` uniform array (computed CPU-side each
// frame via inverse `GridTransform`).
//
// `t` is in WORLD units. Comparing best_t across grids works
// because each grid's per-grid camera is the WORLD camera
// transformed into grid-local — the `t` along the local ray equals
// the world-space `t` (rigid transforms preserve distance).
const OCC_WORDS_PER_COLUMN: u32 = 8u; // CHUNK_Z (256) / 32
const CHUNK_Z: u32 = 256u;
const MAX_INNER_STEPS: u32 = 768u;
const MAX_GPU_MIPS: u32 = 6u; // GPU.11 — must match scene::MAX_GPU_MIPS
const T_INF: f32 = 1.0e30;
struct PerGridCamera {
pos: vec3<f32>,
_pad0: f32,
right: vec3<f32>,
_pad1: f32,
down: vec3<f32>,
_pad2: f32,
forward: vec3<f32>,
_pad3: f32,
};
struct GridStaticMeta {
occupancy_offset: u32,
color_offsets_offset: u32,
colors_offset: u32,
chunk_colors_base_offset: u32,
chunk_occupancy_offset: u32,
slot_chunk_idx_offset: u32,
vsid: u32,
total_slots: u32,
pool_dims: vec3<u32>,
_pad0: u32,
// GPU.11 — per-slot strides spanning the whole mip ladder, plus
// per-mip within-slot relative offsets. mip_*_rel[0] == 0 so
// mip-0 reads index exactly as the pre-mip layout did.
occ_words_per_slot: u32,
offsets_words_per_slot: u32,
mip_count: u32,
_pad1: u32,
mip_occ_rel: array<u32, MAX_GPU_MIPS>,
mip_coff_rel: array<u32, MAX_GPU_MIPS>,
// GPU.13.0 — occupied chunk-AABB (inclusive) in chunk-index space.
// `vec3<i32>` aligns to 16 here (mip_coff_rel ends 16-aligned), so
// these mirror the host's `[i32;3] + pad` pair exactly (112→144).
aabb_min: vec3<i32>,
aabb_max: vec3<i32>,
};
struct Uniforms {
fov_y_rad: f32,
grid_count: u32,
max_outer_steps: u32,
_pad0: u32,
screen_size: vec2<u32>,
_pad1: vec2<u32>,
// GPU.8 fog. `fog_color.rgb` is the colour we blend toward at
// far distances. `fog_color.w` is `fog_near`, packed with the
// colour to keep std140 alignment simple.
fog_color: vec4<f32>,
fog_far: f32,
// GPU.9: gate the depth-buffer write. When the sprite pass is
// active this is 1 and `render_scene` records `best_t` per
// pixel; otherwise 0 and the no-sprite path stays unchanged.
write_depth: u32,
// Occupancy paging: words per storage page, and the number of
// real pages. `occ_num_pages == 1` (multi-GiB GPUs) takes a
// branch-free single-page read.
occ_page_words: u32,
occ_num_pages: u32,
// GPU.11.1 — scene-grid LOD. A chunk entered at world-t `t` is
// marched at mip level `floor(log2(max(t, msd) / msd))`, clamped
// to the grid's `mip_count`. `0` disables LOD (always mip-0).
// Tunable for the axis-aligned-mip-beams mitigation (11.2).
mip_scan_dist: f32,
_pad2: u32,
_pad3: u32,
_pad4: u32,
// World camera used purely to derive the per-pixel sky direction.
// Always valid (even with grid_count == 0, where no grid ray
// exists), so a grid-less scene still paints a proper sky instead
// of a degenerate (0,0,1) → atan2(0,0) → black sample.
sky_cam: PerGridCamera,
// Per-face directional shading (voxlap setsideshades), as the
// alpha-brightness reduction applied at a voxel hit. Each value is
// the u8 shade intensity (0..255) subtracted from the voxel's
// brightness byte before the /128 divide — matching the CPU
// `grouscan_shade`. side_shades0 = (top, bot, left, right),
// side_shades1 = (up, down, _, _). All-zero = no shading.
side_shades0: vec4<i32>,
side_shades1: vec4<i32>,
};
@group(0) @binding(0) var<uniform> u: Uniforms;
// Occupancy is split across up to MAX_OCC_PAGES (=4) storage
// bindings so no single binding exceeds the device limit. Page 0 is
// binding 1; pages 1..3 are bindings 12..14. `occ_word()` maps a
// global word index to its page. See scene::split_occupancy_pages.
@group(0) @binding(1) var<storage, read> occ_page0: array<u32>;
@group(0) @binding(2) var<storage, read> all_color_offsets: array<u32>;
@group(0) @binding(3) var<storage, read> all_colors: array<u32>;
@group(0) @binding(4) var<storage, read> all_chunk_colors_base: array<u32>;
@group(0) @binding(5) var<storage, read> all_chunk_occupancy: array<u32>;
@group(0) @binding(6) var<storage, read> grid_static_meta: array<GridStaticMeta>;
// GPU.7: per-slot chunk_idx, vec3<i32> with std430 16-byte stride.
@group(0) @binding(7) var<storage, read> all_slot_chunk_idx: array<vec3<i32>>;
// Framebuffer as a storage BUFFER (packed `rgba8unorm` per pixel),
// not a storage texture: Chrome's Dawn lays out write storage
// textures with GPU-optimal tiling that the sampled read-back
// disagrees with, producing a 128×256-tiled image. A linear buffer
// + an explicit `screen_size.x` stride is layout-unambiguous on every
// backend (the depth buffer already uses this).
@group(0) @binding(8) var<storage, read_write> output: array<u32>;
// GPU.8: panoramic sky.
@group(0) @binding(9) var sky_texture: texture_2d<f32>;
@group(0) @binding(10) var sky_sampler: sampler;
// GPU.9: per-pixel world-t depth (f32 bits as u32). Written here
// when `u.write_depth != 0`, read+tested by the sprite splatter.
@group(0) @binding(11) var<storage, read_write> depth_buffer: array<u32>;
// Occupancy pages 1..3 (page 0 is binding 1). Unused pages bind a
// 1-word dummy and are never indexed.
@group(0) @binding(12) var<storage, read> occ_page1: array<u32>;
@group(0) @binding(13) var<storage, read> occ_page2: array<u32>;
@group(0) @binding(14) var<storage, read> occ_page3: array<u32>;
// Per-grid world->grid cameras, one per grid (`grid_count` of them).
// Moved out of the uniform (was a fixed `array<…, 16>`) into a runtime-
// sized storage array so a scene can hold any number of grids — the cap
// is now the device's storage limit, not a baked-in 16. The shader only
// indexes `0..grid_count`, so a grid-less scene binds a 1-element dummy.
@group(0) @binding(15) var<storage, read> grid_cameras: array<PerGridCamera>;
// Read one occupancy word by global index, selecting its page.
// Single-page scenes (multi-GiB GPUs) skip the division — the
// branch is uniform across the workgroup, so it's effectively free.
fn occ_word(i: u32) -> u32 {
if (u.occ_num_pages <= 1u) {
return occ_page0[i];
}
let page = i / u.occ_page_words;
let local = i % u.occ_page_words;
if (page == 0u) { return occ_page0[local]; }
if (page == 1u) { return occ_page1[local]; }
if (page == 2u) { return occ_page2[local]; }
return occ_page3[local];
}
// GPU.11.1 — occupancy words per column at `mip`
// (`(CHUNK_Z >> mip) / 32`, min 1). Mirrors
// `decompress::occ_words_per_column_for_mip`.
fn occ_words_per_col_for_mip(mip: u32) -> u32 {
return max(1u, (CHUNK_Z >> mip) / 32u);
}
// GPU.11.1 — word base of column `(p_voxel.x, p_voxel.y)`'s occupancy
// at `mip` within slot `meta_id`. Indexes `grid_static_meta`
// **directly** (storage address space): WGSL forbids dynamic
// indexing of an array member once the struct is copied into a value
// `let`. `mip_occ_rel[mip]` is the within-slot start of that mip's
// sub-block (0 for mip-0).
fn col_word_base_mip(g: u32, meta_id: u32, mip: u32, p_voxel: vec3<i32>) -> u32 {
let vsid_mip = grid_static_meta[g].vsid >> mip;
let col_idx = u32(p_voxel.x) + u32(p_voxel.y) * vsid_mip;
let occ_base = grid_static_meta[g].occupancy_offset
+ meta_id * grid_static_meta[g].occ_words_per_slot
+ grid_static_meta[g].mip_occ_rel[mip];
return occ_base + col_idx * occ_words_per_col_for_mip(mip);
}
// Within-slot word stride of one mip's textured occupancy block; the
// SOLID occupancy block sits immediately after it (cliff-face fix). So
// the solid word base for a column == its textured base + this.
fn mip_occ_block_words(g: u32, mip: u32) -> u32 {
let vsid_mip = grid_static_meta[g].vsid >> mip;
return vsid_mip * vsid_mip * occ_words_per_col_for_mip(mip);
}
// GPU — hit-test against the SOLID bitmap (textured surfaces + bedrock
// interior) so vertical wall/cliff faces are opaque. The textured
// bitmap (used for colour rank) is the first block; solid is the
// second.
fn voxel_solid_in(g: u32, meta_id: u32, mip: u32, p_voxel: vec3<i32>) -> bool {
let solid_base = col_word_base_mip(g, meta_id, mip, p_voxel) + mip_occ_block_words(g, mip);
let z_word = u32(p_voxel.z) >> 5u;
let z_bit = u32(p_voxel.z) & 31u;
return (occ_word(solid_base + z_word) & (1u << z_bit)) != 0u;
}
// Per-face side-shade intensity for a voxel hit, mirroring the CPU's
// gcsub-lane selection: z-faces → top/bot (ceiling/floor), x-faces →
// left/right, y-faces → up/down, with the pair chosen by the ray's
// direction sign along that axis (= voxlap's gixy-sign select).
// `axis`: 0=x, 1=y, 2=z.
fn side_shade_for(axis: i32, ray_dir: vec3<f32>) -> f32 {
if (axis == 2) {
// ray going +z (down, voxlap z-down) hits a floor → bot, else ceiling → top
return f32(select(u.side_shades0.x, u.side_shades0.y, ray_dir.z >= 0.0));
} else if (axis == 0) {
return f32(select(u.side_shades0.z, u.side_shades0.w, ray_dir.x >= 0.0));
}
return f32(select(u.side_shades1.x, u.side_shades1.y, ray_dir.y >= 0.0));
}
fn voxel_color_in(g: u32, meta_id: u32, mip: u32, p_voxel: vec3<i32>, face_shade: f32) -> vec3<f32> {
let vsid_mip = grid_static_meta[g].vsid >> mip;
let col_idx = u32(p_voxel.x) + u32(p_voxel.y) * vsid_mip;
let col_word_base = col_word_base_mip(g, meta_id, mip, p_voxel);
let z_word = u32(p_voxel.z) >> 5u;
let z_bit = u32(p_voxel.z) & 31u;
// Rank = number of TEXTURED voxels below z. Indexes the colour.
var rank: u32 = 0u;
for (var w: u32 = 0u; w < z_word; w = w + 1u) {
rank = rank + countOneBits(occ_word(col_word_base + w));
}
var mask: u32 = 0u;
if (z_bit > 0u) {
mask = (1u << z_bit) - 1u;
}
let z_word_bits = occ_word(col_word_base + z_word);
rank = rank + countOneBits(z_word_bits & mask);
// A bedrock hit (solid but not textured) inherits the colour of the
// textured surface directly above it: that's `rank - 1` (rank here
// counts surfaces strictly above). A textured hit uses `rank`.
let is_textured = (z_word_bits & (1u << z_bit)) != 0u;
var color_index = rank;
if (!is_textured && rank > 0u) {
color_index = rank - 1u;
}
// Cumulative-within-slot colour offsets: the mip's sub-table
// lives at `mip_coff_rel[mip]`, and its values already include
// every finer mip's colour count, so `chunk_colors_base + value
// + index` indexes the slot's concatenated colour block directly.
let offsets_base = grid_static_meta[g].color_offsets_offset
+ meta_id * grid_static_meta[g].offsets_words_per_slot
+ grid_static_meta[g].mip_coff_rel[mip];
let chunk_local_offset = all_color_offsets[offsets_base + col_idx];
let chunk_colors_base =
all_chunk_colors_base[grid_static_meta[g].chunk_colors_base_offset + meta_id];
let packed = all_colors[grid_static_meta[g].colors_offset + chunk_colors_base
+ chunk_local_offset + color_index];
let a = f32((packed >> 24u) & 0xffu);
let r = f32((packed >> 16u) & 0xffu);
let g_chan = f32((packed >> 8u) & 0xffu);
let b = f32(packed & 0xffu);
// Side-shade: reduce the brightness byte by the hit face's shade
// before the /128 divide (CPU grouscan_shade equivalent). With no
// baked light (flat a=0x80) this is pure runtime side-shading; with
// baked light it stacks, exactly like voxlap.
let brightness = max(0.0, a - face_shade) * (1.0 / 128.0);
return vec3<f32>(r, g_chan, b) * (brightness / 255.0);
}
// GPU.7 modular slot lookup. `pool_dims` are powers of 2 (asserted
// on the host), so `chunk_idx & (pool_dims - 1)` is the slot index
// per axis. Slot identity must be verified against
// `all_slot_chunk_idx` — multiple chunk_idx values can map to the
// same slot under the pool's collision invariant.
fn slot_idx_of(g: u32, chunk_idx: vec3<i32>) -> u32 {
let m = grid_static_meta[g];
let mask = vec3<i32>(m.pool_dims) - vec3<i32>(1, 1, 1);
let s = chunk_idx & mask;
return u32(s.x)
+ u32(s.y) * m.pool_dims.x
+ u32(s.z) * m.pool_dims.x * m.pool_dims.y;
}
// GPU.13.0 — has the outer DDA left the grid's occupied chunk-AABB
// for good? A 3D-DDA ray is inside the box only while all three axes
// are within `[aabb_min, aabb_max]`; once it crosses the far slab on
// any axis (in its travel direction) it can never re-enter, so no
// resident chunk lies ahead. An axis the ray is parallel to (`step ==
// 0`) and already outside the box means the ray misses the grid
// entirely. Either way the caller returns `out` (sky / no closer hit).
// The empty-grid sentinel (min = i32::MAX, max = i32::MIN) makes every
// branch fire immediately, so an empty grid contributes nothing.
fn aabb_passed(g: u32, p: vec3<i32>, step: vec3<i32>) -> bool {
let mn = grid_static_meta[g].aabb_min;
let mx = grid_static_meta[g].aabb_max;
if (step.x > 0 && p.x > mx.x) { return true; }
if (step.x < 0 && p.x < mn.x) { return true; }
if (step.x == 0 && (p.x < mn.x || p.x > mx.x)) { return true; }
if (step.y > 0 && p.y > mx.y) { return true; }
if (step.y < 0 && p.y < mn.y) { return true; }
if (step.y == 0 && (p.y < mn.y || p.y > mx.y)) { return true; }
if (step.z > 0 && p.z > mx.z) { return true; }
if (step.z < 0 && p.z < mn.z) { return true; }
if (step.z == 0 && (p.z < mn.z || p.z > mx.z)) { return true; }
return false;
}
fn chunk_has_content(g: u32, slot_idx: u32, chunk_idx: vec3<i32>) -> bool {
let m = grid_static_meta[g];
// Identity check: does this slot actually hold the chunk the
// outer DDA is visiting? An empty slot's sentinel
// (i32::MIN, i32::MIN, i32::MIN) fails this check.
// vec3<i32> entries are at `slot_chunk_idx_offset/4 + slot_idx`
// since WGSL `array<vec3<i32>>` uses 16-byte stride.
let stored = all_slot_chunk_idx[m.slot_chunk_idx_offset / 4u + slot_idx];
if (stored.x != chunk_idx.x || stored.y != chunk_idx.y || stored.z != chunk_idx.z) {
return false;
}
return (all_chunk_occupancy[m.chunk_occupancy_offset + (slot_idx >> 5u)]
& (1u << (slot_idx & 31u))) != 0u;
}
// Voxlap-convention sky sample. The bundled `assets/sky.png` is
// `width = elevation` (horizon → zenith), `height = azimuth`
// (wraps 360°) — the OPPOSITE axes of a standard equirectangular
// panorama. We sample `(elevation, azimuth)` in `(u, v)` to match
// the CPU rasterizer's orientation, and rely on the sampler's
// `Repeat` mode on both axes (elevation values stay in [0, 1] so
// Repeat is a no-op there; azimuth needs the wrap).
fn sky_color(dir: vec3<f32>) -> vec3<f32> {
let pi = 3.1415926535897932;
let azimuth = atan2(dir.x, dir.y) * (0.5 / pi) + 0.5;
let elevation = clamp(acos(-dir.z) * (1.0 / pi), 0.0, 1.0);
return textureSampleLevel(
sky_texture,
sky_sampler,
vec2<f32>(elevation, azimuth),
0.0,
).rgb;
}
// GPU.8 fog blend. `t` is the world-space hit distance; below
// `fog_near` the hit shows through fully; above `fog_far` only the
// fog colour shows. Smoothstep gives a soft mid-band.
fn apply_fog(hit_color: vec3<f32>, t: f32) -> vec3<f32> {
let fog_near = u.fog_color.w;
let factor = smoothstep(fog_near, u.fog_far, t);
return mix(hit_color, u.fog_color.rgb, factor);
}
fn shield_parallel(t_max: vec3<f32>, dir: vec3<f32>) -> vec3<f32> {
var t = t_max;
if (dir.x == 0.0) { t.x = T_INF; }
if (dir.y == 0.0) { t.y = T_INF; }
if (dir.z == 0.0) { t.z = T_INF; }
return t;
}
// GPU.11.1 — choose the mip a chunk is marched at, from the world-t
// at which the ray enters it. mip-0 inside `mip_scan_dist`, then one
// coarser level per distance-octave, clamped to the grid's ladder.
fn pick_mip(t: f32, mip_count: u32) -> u32 {
if (u.mip_scan_dist <= 0.0 || mip_count <= 1u) {
return 0u;
}
let ratio = max(t, u.mip_scan_dist) / u.mip_scan_dist;
let lvl = u32(floor(log2(ratio)));
return min(lvl, mip_count - 1u);
}
// March one grid; return (hit, t, color). `best_t` is the world-t
// threshold the caller already found in earlier grids; we early-out
// once our outer t passes it.
struct GridHit {
hit: bool,
t: f32,
color: vec3<f32>,
};
fn march_grid(
g: u32,
ray_origin: vec3<f32>,
ray_dir: vec3<f32>,
best_t: f32,
) -> GridHit {
let m = grid_static_meta[g];
let chunk_dim = vec3<f32>(f32(m.vsid), f32(m.vsid), f32(CHUNK_Z));
var p_chunk = vec3<i32>(floor(ray_origin / chunk_dim));
let step_chunk = vec3<i32>(sign(ray_dir));
let t_delta_chunk = abs(chunk_dim / ray_dir);
let next_boundary_chunk = vec3<f32>(
select(f32(p_chunk.x), f32(p_chunk.x + 1), step_chunk.x > 0) * chunk_dim.x,
select(f32(p_chunk.y), f32(p_chunk.y + 1), step_chunk.y > 0) * chunk_dim.y,
select(f32(p_chunk.z), f32(p_chunk.z + 1), step_chunk.z > 0) * chunk_dim.z,
);
var t_max_chunk = shield_parallel(
(next_boundary_chunk - ray_origin) / ray_dir,
ray_dir,
);
var t_enter: f32 = 0.0;
// Axis crossed to enter the current chunk (= the face normal of a
// voxel that is already solid at the chunk-entry point). Seeds
// `hit_axis` for the `iv==0` case so a surface flush with the chunk
// boundary gets its real face axis, not a hardcoded z. Defaults to z
// only for the first chunk (t_enter==0, ray starts inside it).
var entry_axis: i32 = 2;
var out: GridHit;
out.hit = false;
out.t = T_INF;
out.color = vec3<f32>(0.0);
for (var step: u32 = 0u; step < u.max_outer_steps; step = step + 1u) {
if (t_enter > best_t) {
return out; // no closer hit possible in this grid
}
// GPU.13.0 — once the ray has left the occupied chunk-AABB
// along its travel direction, no resident chunk lies ahead:
// stop instead of stepping empty space to max_outer_steps.
if (aabb_passed(g, p_chunk, step_chunk)) {
return out;
}
let slot_id = slot_idx_of(g, p_chunk);
if (chunk_has_content(g, slot_id, p_chunk)) {
// GPU.11.1 — pick the mip for this chunk by entry distance.
// Voxels are `vsize` world units; the chunk holds
// `vsid>>mip` × `vsid>>mip` × `CHUNK_Z>>mip` of them.
let mip = pick_mip(t_enter, m.mip_count);
let vsize = f32(1u << mip);
let vsid_mip = i32(m.vsid >> mip);
let cz_mip = i32(CHUNK_Z >> mip);
let entry_world = ray_origin + t_enter * ray_dir;
let chunk_origin_world = vec3<f32>(p_chunk) * chunk_dim;
let entry_in_chunk = entry_world - chunk_origin_world;
var p_voxel = vec3<i32>(floor(entry_in_chunk / vsize));
p_voxel = clamp(
p_voxel,
vec3<i32>(0),
vec3<i32>(vsid_mip - 1, vsid_mip - 1, cz_mip - 1),
);
// Voxel boundaries are at integer-mip-coord * vsize.
let next_voxel_world = vec3<f32>(
select(f32(p_voxel.x), f32(p_voxel.x + 1), step_chunk.x > 0) * vsize
+ chunk_origin_world.x,
select(f32(p_voxel.y), f32(p_voxel.y + 1), step_chunk.y > 0) * vsize
+ chunk_origin_world.y,
select(f32(p_voxel.z), f32(p_voxel.z + 1), step_chunk.z > 0) * vsize
+ chunk_origin_world.z,
);
var t_max_voxel = shield_parallel(
(next_voxel_world - ray_origin) / ray_dir,
ray_dir,
);
let t_delta_voxel = abs(vsize / ray_dir);
var t_hit: f32 = t_enter;
// Axis of the last voxel step = the hit face normal (for
// side-shading). An iv==0 hit (solid at the chunk-entry point)
// takes no inner step, so seed with the chunk-entry axis — the
// face the ray crossed to enter this chunk. Surfaces hit after
// any inner travel overwrite this with the real stepped axis.
var hit_axis: i32 = entry_axis;
for (var iv: u32 = 0u; iv < MAX_INNER_STEPS; iv = iv + 1u) {
if (voxel_solid_in(g, slot_id, mip, p_voxel)) {
if (t_hit < best_t) {
out.hit = true;
out.t = t_hit;
let shade = side_shade_for(hit_axis, ray_dir);
out.color = apply_fog(
voxel_color_in(g, slot_id, mip, p_voxel, shade),
t_hit,
);
return out;
} else {
return out;
}
}
if (t_max_voxel.x < t_max_voxel.y && t_max_voxel.x < t_max_voxel.z) {
t_hit = t_max_voxel.x;
p_voxel.x = p_voxel.x + step_chunk.x;
t_max_voxel.x = t_max_voxel.x + t_delta_voxel.x;
hit_axis = 0;
if (p_voxel.x < 0 || p_voxel.x >= vsid_mip) {
break;
}
} else if (t_max_voxel.y < t_max_voxel.z) {
t_hit = t_max_voxel.y;
p_voxel.y = p_voxel.y + step_chunk.y;
t_max_voxel.y = t_max_voxel.y + t_delta_voxel.y;
hit_axis = 1;
if (p_voxel.y < 0 || p_voxel.y >= vsid_mip) {
break;
}
} else {
t_hit = t_max_voxel.z;
p_voxel.z = p_voxel.z + step_chunk.z;
t_max_voxel.z = t_max_voxel.z + t_delta_voxel.z;
hit_axis = 2;
if (p_voxel.z < 0 || p_voxel.z >= cz_mip) {
break;
}
}
}
}
if (t_max_chunk.x < t_max_chunk.y && t_max_chunk.x < t_max_chunk.z) {
t_enter = t_max_chunk.x;
p_chunk.x = p_chunk.x + step_chunk.x;
t_max_chunk.x = t_max_chunk.x + t_delta_chunk.x;
entry_axis = 0;
} else if (t_max_chunk.y < t_max_chunk.z) {
t_enter = t_max_chunk.y;
p_chunk.y = p_chunk.y + step_chunk.y;
t_max_chunk.y = t_max_chunk.y + t_delta_chunk.y;
entry_axis = 1;
} else {
t_enter = t_max_chunk.z;
p_chunk.z = p_chunk.z + step_chunk.z;
t_max_chunk.z = t_max_chunk.z + t_delta_chunk.z;
entry_axis = 2;
}
}
return out;
}
@compute @workgroup_size(8, 8)
fn render_scene(@builtin(global_invocation_id) gid: vec3<u32>) {
if (gid.x >= u.screen_size.x || gid.y >= u.screen_size.y) {
return;
}
let aspect = f32(u.screen_size.x) / f32(u.screen_size.y);
let half_h = tan(u.fov_y_rad * 0.5);
let half_w = half_h * aspect;
let ndc_x = (f32(gid.x) + 0.5) / f32(u.screen_size.x) * 2.0 - 1.0;
let ndc_y_top_pos = 1.0 - (f32(gid.y) + 0.5) / f32(u.screen_size.y) * 2.0;
var best_t: f32 = T_INF;
// Sky direction = the per-pixel ray of the dedicated world/sky
// camera. Valid regardless of grid_count (a grid-less scene has no
// grid ray), so a sprite-only / empty scene paints a real sky.
let sky_dir = normalize(
u.sky_cam.forward
+ ndc_x * half_w * u.sky_cam.right
- ndc_y_top_pos * half_h * u.sky_cam.down
);
var best_color = vec3<f32>(0.6, 0.7, 0.85);
var any_hit = false;
for (var g: u32 = 0u; g < u.grid_count; g = g + 1u) {
let cam = grid_cameras[g];
let ray_dir = normalize(
cam.forward
+ ndc_x * half_w * cam.right
- ndc_y_top_pos * half_h * cam.down
);
let hit = march_grid(g, cam.pos, ray_dir, best_t);
if (hit.hit && hit.t < best_t) {
best_t = hit.t;
best_color = hit.color;
any_hit = true;
}
}
if (!any_hit) {
best_color = sky_color(sky_dir);
}
output[gid.y * u.screen_size.x + gid.x] = pack4x8unorm(vec4<f32>(best_color, 1.0));
if (u.write_depth != 0u) {
let pix_idx = gid.y * u.screen_size.x + gid.x;
depth_buffer[pix_idx] = bitcast<u32>(best_t);
}
}