Skip to main content

roxlap_gpu/
lib.rs

1//! WGPU-backed compute-shader renderer scaffold for the roxlap
2//! voxel engine. GPU.1 in `PORTING-GPU.md`.
3//!
4//! GPU.1's job: stand up the device + surface + swapchain on a
5//! host window (any [`raw-window-handle`](raw_window_handle)
6//! provider), present a clear-to-colour frame each render call,
7//! and give the host a one-call opt-in. No voxel marching yet — the
8//! [`examples/probe.rs`](../examples/probe.rs) standalone holds
9//! the empirical FPS baseline from GPU.0.
10//!
11//! Later sub-substages flesh `GpuRenderer::render` out: GPU.2
12//! uploads voxel data, GPU.3 dispatches the inner-DDA compute
13//! shader, GPU.4 layers in chunk skipping, GPU.5 plugs the renderer
14//! into `roxlap-scene::Scene`, …
15//!
16//! ## Host integration shape (GPU.1)
17//!
18//! ```no_run
19//! use std::sync::Arc;
20//! use roxlap_gpu::{GpuRenderer, GpuRendererSettings};
21//! # use winit::window::Window;
22//! # fn pick(w: Arc<Window>, size: (u32, u32)) -> Option<GpuRenderer> {
23//! match GpuRenderer::new_blocking(w, size, GpuRendererSettings::default()) {
24//!     Ok(r) => Some(r),
25//!     Err(e) => {
26//!         eprintln!("GPU init failed: {e}; falling back to CPU");
27//!         None
28//!     }
29//! }
30//! # }
31//! ```
32
33#![allow(clippy::must_use_candidate, clippy::too_many_lines)]
34
35pub mod camera;
36pub mod decompress;
37pub mod grid;
38// Headless rendering is a native-only test/bench aid: it blocks on
39// `pollster` + `device.poll(Wait)`, neither of which exists on wasm.
40#[cfg(not(target_arch = "wasm32"))]
41pub mod headless;
42pub mod resident;
43pub mod scene;
44pub mod sprite_model;
45
46pub use camera::Camera;
47pub use decompress::{decompress_chunk, ChunkUpload, BEDROCK_RGB, CHUNK_Z};
48pub use grid::{bounding_box_of, GpuGridResident, GridUpload};
49#[cfg(not(target_arch = "wasm32"))]
50pub use headless::HeadlessGpu;
51pub use resident::GpuChunkResident;
52pub use scene::{
53    GpuSceneResident, GridRuntimeTransform, GridStaticMeta, RefreshOutcome, SceneUpload,
54};
55pub use sprite_model::{
56    build_sprite_model, build_sprite_model_with_materials, sprite_model_from_clip_frame,
57    sprite_model_from_clip_frame_with_materials, sprite_model_from_voxel_frame,
58    sprite_model_from_voxel_frame_with_materials, SpriteInstance, SpriteInstanceTransform,
59    SpriteModel, SpriteModelRegistry, SpriteRegistryResident,
60};
61
62use std::sync::Arc;
63
64use bytemuck::{Pod, Zeroable};
65use raw_window_handle::{HasDisplayHandle, HasWindowHandle};
66
67/// Caller-controllable knobs for [`GpuRenderer::new`]. Defaults
68/// target "highest-performance GPU, prefer Mailbox/Immediate over
69/// vsync" — i.e. the same configuration the GPU.0 probe used to
70/// measure the FPS ceiling.
71#[derive(Debug, Clone, Copy)]
72pub struct GpuRendererSettings {
73    pub power_preference: PowerPreference,
74    /// Initial clear colour cycled by GPU.1's empty render path.
75    /// The voxel-rendering substages overwrite this entirely.
76    pub clear_colour: [f64; 3],
77    /// Prefer mailbox/immediate when offered; falls back to FIFO if
78    /// the surface only supports it (Wayland under Mesa often does).
79    pub uncapped_present: bool,
80}
81
82#[derive(Debug, Clone, Copy)]
83pub enum PowerPreference {
84    Low,
85    High,
86}
87
88impl Default for GpuRendererSettings {
89    fn default() -> Self {
90        Self {
91            power_preference: PowerPreference::High,
92            clear_colour: [0.06, 0.08, 0.12],
93            uncapped_present: true,
94        }
95    }
96}
97
98/// Errors `GpuRenderer::new` surfaces to the host. The host's
99/// expected flow is "try this, fall back to the CPU path on Err".
100#[derive(Debug)]
101pub enum GpuInitError {
102    CreateSurface(wgpu::CreateSurfaceError),
103    NoAdapter,
104    RequestDevice(wgpu::RequestDeviceError),
105}
106
107impl std::fmt::Display for GpuInitError {
108    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
109        match self {
110            Self::CreateSurface(e) => write!(f, "create_surface failed: {e}"),
111            Self::NoAdapter => write!(
112                f,
113                "no compatible adapter — does this system have a Vulkan/Metal/DX12 driver?"
114            ),
115            Self::RequestDevice(e) => write!(f, "request_device failed: {e}"),
116        }
117    }
118}
119
120impl std::error::Error for GpuInitError {
121    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
122        match self {
123            Self::CreateSurface(e) => Some(e),
124            Self::RequestDevice(e) => Some(e),
125            Self::NoAdapter => None,
126        }
127    }
128}
129
130impl From<wgpu::CreateSurfaceError> for GpuInitError {
131    fn from(value: wgpu::CreateSurfaceError) -> Self {
132        Self::CreateSurface(value)
133    }
134}
135
136impl From<wgpu::RequestDeviceError> for GpuInitError {
137    fn from(value: wgpu::RequestDeviceError) -> Self {
138        Self::RequestDevice(value)
139    }
140}
141
142/// WGPU-backed renderer. Owns the device, queue, and surface
143/// bound to the host's window. [`Self::render`] is the GPU.1
144/// clear-to-colour path; [`Self::render_chunk`] is GPU.3's
145/// single-chunk DDA marcher.
146///
147/// The window is consumed only at construction — `wgpu`'s
148/// `Surface<'static>` keeps its own `Arc` clone of the handle, so
149/// the renderer holds no window field of its own.
150/// A world-space line segment for [`GpuRenderer::draw_lines_deferred`].
151/// `color` is straight RGBA in `0..=1` (the alpha drives the over-blend);
152/// `width_px` is the screen-space thickness; `depth_test` occludes the
153/// segment behind nearer marched geometry.
154#[derive(Clone, Copy, Debug)]
155pub struct GpuLine {
156    pub a: [f32; 3],
157    pub b: [f32; 3],
158    pub color: [f32; 4],
159    pub width_px: f32,
160    pub depth_test: bool,
161}
162
163/// World camera basis for projecting [`GpuLine`] endpoints — the same
164/// pinhole the scene-DDA pass marches with (`right`/`down`/`forward`
165/// orthonormal, `pos` in world voxel units).
166#[derive(Clone, Copy, Debug)]
167pub struct GpuLineCamera {
168    pub pos: [f32; 3],
169    pub right: [f32; 3],
170    pub down: [f32; 3],
171    pub forward: [f32; 3],
172}
173
174/// Near plane (camera-forward distance) below which a [`GpuLine`] endpoint
175/// is clipped, so the pinhole divide stays finite.
176const LINE_NEAR_Z: f32 = 0.0625;
177/// Depth-test slack (euclidean world distance) so a line resting on the
178/// surface it traces doesn't z-fight the marched geometry.
179const LINE_DEPTH_BIAS: f32 = 0.5;
180
181/// One expanded-quad vertex (`build_line_vertices` output). `pos` is NDC;
182/// `depth` is the euclidean world distance of the source endpoint (the
183/// marcher's `best_t` metric); `depth_test` is `1.0`/`0.0`.
184#[repr(C)]
185#[derive(Clone, Copy, Pod, Zeroable)]
186struct LineVertex {
187    pos: [f32; 2],
188    depth: f32,
189    depth_test: f32,
190    color: [f32; 4],
191}
192
193/// `line.wgsl` / `image.wgsl` fragment uniform (std140; padded to 32 bytes
194/// so the uniform's struct stride is a 16-byte multiple).
195#[repr(C)]
196#[derive(Clone, Copy, Pod, Zeroable)]
197struct LineParams {
198    screen_w: u32,
199    screen_h: u32,
200    depth_bias: f32,
201    no_depth: u32,
202    /// 1 when the viewport flip is on. The depth buffer is written
203    /// unflipped (the blit mirrors at read time), but these passes flip the
204    /// vertex NDC X, so the fragment must mirror its depth lookup to match.
205    flip_x: u32,
206    _pad: [u32; 3],
207}
208
209/// Lazy-built debug-line pipeline (L3.2). The bind group is rebuilt each
210/// draw (it references the current `scene_dda.depth_buffer`, which the
211/// swapchain resize recreates); the pipeline / layout / uniform persist.
212struct LineResources {
213    pipeline: wgpu::RenderPipeline,
214    bgl: wgpu::BindGroupLayout,
215    uniform_buf: wgpu::Buffer,
216    /// 1-word stand-in bound when no scene depth exists (sprite-only /
217    /// empty scene); `no_depth = 1` keeps the shader from indexing it.
218    dummy_depth: wgpu::Buffer,
219}
220
221/// Project + expand world-space [`GpuLine`]s into screen-space quad
222/// vertices (6 per visible segment) for `line.wgsl`. Mirrors the
223/// scene-DDA pinhole (`forward + ndc_x·half_w·right − ndc_y·half_h·down`)
224/// so lines land on the marched geometry, carrying each endpoint's
225/// euclidean world distance as the depth-test key (= the marcher's
226/// `best_t`). Segments fully behind the near plane are dropped; the rest
227/// are clipped to it.
228fn build_line_vertices(
229    cam: &GpuLineCamera,
230    lines: &[GpuLine],
231    w: u32,
232    h: u32,
233    fov_y: f32,
234    flip_x: bool,
235) -> Vec<LineVertex> {
236    let aspect = w as f32 / h as f32;
237    let half_h = (fov_y * 0.5).tan();
238    let half_w = half_h * aspect;
239    let (wf, hf) = (w as f32, h as f32);
240
241    let cam_coords = |p: [f32; 3]| -> [f32; 3] {
242        let d = [p[0] - cam.pos[0], p[1] - cam.pos[1], p[2] - cam.pos[2]];
243        [
244            cam.right[0] * d[0] + cam.right[1] * d[1] + cam.right[2] * d[2],
245            cam.down[0] * d[0] + cam.down[1] * d[1] + cam.down[2] * d[2],
246            cam.forward[0] * d[0] + cam.forward[1] * d[1] + cam.forward[2] * d[2],
247        ]
248    };
249    // Camera-space point → (NDC xy, euclidean depth). NDC y is up (+1 top),
250    // matching WebGPU clip space; depth is the marcher's world-t metric.
251    let project = |q: [f32; 3]| -> ([f32; 2], f32) {
252        let inv = 1.0 / q[2];
253        let nx = q[0] * inv / half_w;
254        let ny = -q[1] * inv / half_h;
255        let depth = (q[0] * q[0] + q[1] * q[1] + q[2] * q[2]).sqrt();
256        ([nx, ny], depth)
257    };
258
259    let mut out = Vec::with_capacity(lines.len() * 6);
260    for line in lines {
261        let ca = cam_coords(line.a);
262        let cb = cam_coords(line.b);
263        let (cfa, cfb) = (ca[2], cb[2]);
264        if cfa < LINE_NEAR_Z && cfb < LINE_NEAR_Z {
265            continue;
266        }
267        // Near-clip in segment-parameter space on the forward component.
268        let (mut t0, mut t1) = (0.0f32, 1.0f32);
269        let dz = cfb - cfa;
270        if dz.abs() > f32::EPSILON {
271            let tn = (LINE_NEAR_Z - cfa) / dz;
272            if dz > 0.0 {
273                t0 = t0.max(tn);
274            } else {
275                t1 = t1.min(tn);
276            }
277        }
278        if t0 > t1 {
279            continue;
280        }
281        let lerp3 = |t: f32| {
282            [
283                ca[0] + (cb[0] - ca[0]) * t,
284                ca[1] + (cb[1] - ca[1]) * t,
285                ca[2] + (cb[2] - ca[2]) * t,
286            ]
287        };
288        let (n0, d0) = project(lerp3(t0));
289        let (n1, d1) = project(lerp3(t1));
290
291        // Expand in pixel space for a uniform screen-space thickness.
292        let to_px = |n: [f32; 2]| [(n[0] * 0.5 + 0.5) * wf, (0.5 - n[1] * 0.5) * hf];
293        let to_ndc = |p: [f32; 2]| [p[0] / wf * 2.0 - 1.0, 1.0 - p[1] / hf * 2.0];
294        let p0 = to_px(n0);
295        let p1 = to_px(n1);
296        let (dx, dy) = (p1[0] - p0[0], p1[1] - p0[1]);
297        let len = (dx * dx + dy * dy).sqrt().max(1e-6);
298        let half = line.width_px.max(1.0) * 0.5;
299        let (ex, ey) = (-dy / len * half, dx / len * half);
300
301        let c0a = to_ndc([p0[0] + ex, p0[1] + ey]);
302        let c0b = to_ndc([p0[0] - ex, p0[1] - ey]);
303        let c1a = to_ndc([p1[0] + ex, p1[1] + ey]);
304        let c1b = to_ndc([p1[0] - ex, p1[1] - ey]);
305        let dt = if line.depth_test { 1.0 } else { 0.0 };
306        // Mirror the overlay's NDC x to match the flipped scene blit.
307        let vert = |pos: [f32; 2], depth: f32| LineVertex {
308            pos: [if flip_x { -pos[0] } else { pos[0] }, pos[1]],
309            depth,
310            depth_test: dt,
311            color: line.color,
312        };
313        // Two triangles, cull disabled so winding is irrelevant.
314        out.push(vert(c0a, d0));
315        out.push(vert(c0b, d0));
316        out.push(vert(c1a, d1));
317        out.push(vert(c1a, d1));
318        out.push(vert(c0b, d0));
319        out.push(vert(c1b, d1));
320    }
321    out
322}
323
324/// A world-space 2D image-sprite quad for [`GpuRenderer::draw_images_deferred`].
325/// `corners` are the four world points `TL, TR, BL, BR` (UVs `(0,0) (1,0)
326/// (0,1) (1,1)`); `image` indexes a texture uploaded via
327/// [`GpuRenderer::upload_image`]; `tint` is straight RGBA in `0..=1`
328/// (multiplied into every texel); `depth_test` occludes the quad behind
329/// nearer marched geometry. The facade resolves orientation + back-face
330/// culling, so this is pure geometry.
331#[derive(Clone, Copy, Debug)]
332pub struct GpuImageQuad {
333    pub corners: [[f32; 3]; 4],
334    pub image: usize,
335    pub tint: [f32; 4],
336    pub depth_test: bool,
337    /// Texels with alpha below this (`0..=1`) are discarded in the FS.
338    /// `0.0` keeps the plain over-blend.
339    pub alpha_cutoff: f32,
340}
341
342/// One expanded textured-quad vertex (`build_image_vertices` output).
343/// `ndc` is the projected NDC xy; `w` is the source `forward` depth, fed
344/// back into a homogeneous clip position so the rasterizer interpolates
345/// `uv` perspective-correctly; `depth` is the euclidean world distance
346/// (the marcher's `best_t`) for the manual depth test.
347#[repr(C)]
348#[derive(Clone, Copy, Pod, Zeroable)]
349struct ImageVertex {
350    ndc: [f32; 2],
351    w: f32,
352    depth: f32,
353    depth_test: f32,
354    cutoff: f32,
355    uv: [f32; 2],
356    tint: [f32; 4],
357}
358
359/// Lazy-built image-sprite pipeline (mirrors [`LineResources`]). The
360/// per-draw bind group adds the quad's texture + a sampler to the line
361/// pass's uniform + scene-depth bindings.
362struct ImageResources {
363    pipeline: wgpu::RenderPipeline,
364    bgl: wgpu::BindGroupLayout,
365    uniform_buf: wgpu::Buffer,
366    dummy_depth: wgpu::Buffer,
367    sampler: wgpu::Sampler,
368}
369
370/// A retained image-sprite texture (uploaded via
371/// [`GpuRenderer::upload_image`], referenced by [`GpuImageQuad::image`]).
372struct ImageResident {
373    view: wgpu::TextureView,
374    // Held so the view stays valid + the texture shows in profiler dumps.
375    _texture: wgpu::Texture,
376}
377
378/// Camera-space textured-quad vertex (near-clip working set): the
379/// `(right, down, forward)` components + the texture `uv`.
380#[derive(Clone, Copy)]
381struct ImgClipV {
382    cam: [f32; 3],
383    uv: [f32; 2],
384}
385
386/// Clip a convex camera-space polygon against the near plane
387/// (`forward >= LINE_NEAR_Z`), interpolating UVs at each crossing.
388fn clip_near_image(poly: &[ImgClipV]) -> Vec<ImgClipV> {
389    let n = poly.len();
390    let mut out: Vec<ImgClipV> = Vec::with_capacity(n + 1);
391    for i in 0..n {
392        let cur = poly[i];
393        let prev = poly[(i + n - 1) % n];
394        let cur_in = cur.cam[2] >= LINE_NEAR_Z;
395        let prev_in = prev.cam[2] >= LINE_NEAR_Z;
396        if cur_in != prev_in {
397            let t = (LINE_NEAR_Z - prev.cam[2]) / (cur.cam[2] - prev.cam[2]);
398            out.push(ImgClipV {
399                cam: [
400                    prev.cam[0] + (cur.cam[0] - prev.cam[0]) * t,
401                    prev.cam[1] + (cur.cam[1] - prev.cam[1]) * t,
402                    LINE_NEAR_Z,
403                ],
404                uv: [
405                    prev.uv[0] + (cur.uv[0] - prev.uv[0]) * t,
406                    prev.uv[1] + (cur.uv[1] - prev.uv[1]) * t,
407                ],
408            });
409        }
410        if cur_in {
411            out.push(cur);
412        }
413    }
414    out
415}
416
417/// Project + near-clip a world-space [`GpuImageQuad`] into perspective-correct
418/// textured-quad vertices for `image.wgsl`. Mirrors the scene-DDA pinhole
419/// (the same one [`build_line_vertices`] uses), carrying each vertex's
420/// euclidean world distance as the depth-test key. Quads fully behind the
421/// near plane produce no vertices.
422fn build_image_vertices(
423    cam: &GpuLineCamera,
424    quad: &GpuImageQuad,
425    w: u32,
426    h: u32,
427    fov_y: f32,
428    flip_x: bool,
429) -> Vec<ImageVertex> {
430    let aspect = w as f32 / h as f32;
431    let half_h = (fov_y * 0.5).tan();
432    let half_w = half_h * aspect;
433    let dt = if quad.depth_test { 1.0 } else { 0.0 };
434
435    let cam_coords = |p: [f32; 3]| -> [f32; 3] {
436        let d = [p[0] - cam.pos[0], p[1] - cam.pos[1], p[2] - cam.pos[2]];
437        [
438            cam.right[0] * d[0] + cam.right[1] * d[1] + cam.right[2] * d[2],
439            cam.down[0] * d[0] + cam.down[1] * d[1] + cam.down[2] * d[2],
440            cam.forward[0] * d[0] + cam.forward[1] * d[1] + cam.forward[2] * d[2],
441        ]
442    };
443    let project = |v: ImgClipV| -> ImageVertex {
444        let (cx, cy, cz) = (v.cam[0], v.cam[1], v.cam[2]);
445        let nx = cx / (cz * half_w);
446        ImageVertex {
447            // Mirror NDC x to match the flipped scene blit.
448            ndc: [if flip_x { -nx } else { nx }, -cy / (cz * half_h)],
449            w: cz,
450            depth: (cx * cx + cy * cy + cz * cz).sqrt(),
451            depth_test: dt,
452            cutoff: quad.alpha_cutoff,
453            uv: v.uv,
454            tint: quad.tint,
455        }
456    };
457
458    // Per-corner UV: TL(0,0) TR(1,0) BL(0,1) BR(1,1).
459    let uvs = [[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0]];
460    let verts: Vec<ImgClipV> = quad
461        .corners
462        .iter()
463        .zip(uvs)
464        .map(|(c, uv)| ImgClipV {
465            cam: cam_coords(*c),
466            uv,
467        })
468        .collect();
469
470    let mut out = Vec::with_capacity(12);
471    for tri in [[0usize, 1, 2], [1, 3, 2]] {
472        let poly = [verts[tri[0]], verts[tri[1]], verts[tri[2]]];
473        let clipped = clip_near_image(&poly);
474        if clipped.len() < 3 {
475            continue;
476        }
477        for i in 1..clipped.len() - 1 {
478            out.push(project(clipped[0]));
479            out.push(project(clipped[i]));
480            out.push(project(clipped[i + 1]));
481        }
482    }
483    out
484}
485
486#[allow(clippy::struct_excessive_bools)] // independent per-frame flags, not a state enum
487pub struct GpuRenderer {
488    surface: wgpu::Surface<'static>,
489    surface_config: wgpu::SurfaceConfiguration,
490    device: wgpu::Device,
491    queue: wgpu::Queue,
492    adapter_info: String,
493    clear_colour: [f64; 3],
494    frame_count: u32,
495    /// Mirror the marched scene horizontally on present (the scene blit
496    /// samples `width-1-x`, and line/image overlays mirror their NDC x).
497    /// The egui pass is unaffected. See [`Self::set_flip_x`].
498    flip_x: bool,
499    /// Lazy-built on first [`Self::render_chunk`] call; rebuilt when
500    /// the swapchain resizes (storage texture must match).
501    chunk_dda: Option<ChunkDdaResources>,
502    /// Lazy-built on first [`Self::render_grid`] call; same resize
503    /// trigger as `chunk_dda`. The two paths share the same blit
504    /// pipeline structure but bind different storage layouts.
505    grid_dda: Option<GridDdaResources>,
506    /// Lazy-built on first [`Self::render_scene`] call. Holds the
507    /// multi-grid pipeline + per-grid camera uniforms.
508    scene_dda: Option<SceneDdaResources>,
509    /// TV.6 — global voxel-material palette mirrored to the scene pass (256
510    /// entries, default all-opaque), set via [`Self::set_scene_materials`].
511    scene_materials: Box<[MaterialGpu; 256]>,
512    /// TV.6 — terrain colour→material map (`[rgb, material_id]` rows) +
513    /// whether any mapped material is translucent (the shader gate).
514    scene_terrain_map: Vec<[u32; 2]>,
515    scene_terrain_translucent: bool,
516    /// Whether the *current* deferred frame ran a scene pass that wrote
517    /// `scene_dda.depth_buffer`. [`Self::render_scene`] sets it; the
518    /// color-only [`Self::render_clear_deferred`] clears it. Without this,
519    /// depth-tested overlays (`draw_lines_deferred` / `draw_image`) drawn
520    /// over an empty/cleared scene would test against the *previous*
521    /// scene's stale depth and clip incorrectly.
522    scene_depth_valid: bool,
523    /// GPU.8 — panoramic sky texture + sampler. Created at
524    /// `new` as a 1×1 mid-grey default; [`Self::set_sky_panorama`]
525    /// replaces it. The scene-DDA bind group references this each
526    /// frame.
527    sky_texture: wgpu::Texture,
528    sky_view: wgpu::TextureView,
529    sky_sampler: wgpu::Sampler,
530    /// GPU.8 fog state. `color` is BGRA-style premultiplied (each
531    /// channel in [0, 1]); `near` is the world-t distance at which
532    /// fog starts kicking in; `far` is the distance at which it's
533    /// fully opaque. The shader does
534    /// `mix(hit, fog, smoothstep(near, far, t))`.
535    fog_color: [f32; 3],
536    fog_near: f32,
537    fog_far: f32,
538    /// GPU.10 — sprites rendered as DDA-marched voxel models (the
539    /// precise path; the GPU.9 compute splatter it replaced was
540    /// retired in 10.5). Holds the concatenated model registry + the
541    /// per-frame instance array; set via [`Self::set_sprite_instances`].
542    sprite_registry: Option<sprite_model::SpriteRegistryResident>,
543    /// Lazy-built pipeline + uniform for the model-DDA pass.
544    sprite_model_dda: Option<SpriteModelDdaResources>,
545    /// TV — global voxel-material palette mirrored to the sprite pass (256
546    /// entries, default all-opaque), set via [`Self::set_sprite_materials`].
547    /// `sprite_has_translucent` gates the shader's accumulate path.
548    sprite_materials: Box<[MaterialGpu; 256]>,
549    sprite_has_translucent: bool,
550    /// XS.4 — whether this device grants enough storage buffers per shader
551    /// stage for GPU sprite shadows (the cross-pass occupancy bindings push a
552    /// pass past the baseline 16). `false` ⇒ GPU sprites render unshadowed (the
553    /// pre-XS.4 path); the CPU backend always has sprite shadows. Computed once
554    /// at init from the granted device limits (see
555    /// [`SPRITE_SHADOW_MIN_STORAGE_BUFFERS`]).
556    sprite_shadows_capable: bool,
557    /// GPU.10.4 — LOD aggressiveness: step a sprite to the next mip
558    /// once a mip-0 voxel projects below this many screen pixels.
559    /// Defaults to 4.0 (the empirical sweet spot); the host can tune
560    /// via [`Self::set_sprite_lod_px`].
561    sprite_lod_px: f32,
562    /// GPU.11.1 — scene-grid LOD scan distance (world units). A chunk
563    /// entered at world-t `t` is marched at the mip level
564    /// `floor(log2(max(t, msd) / msd))`, clamped to the grid's mip
565    /// ladder. `0` disables LOD (always mip-0). Tunable via
566    /// [`Self::set_scene_mip_scan_dist`] — the axis-aligned-mip-beams
567    /// mitigation (GPU.11.2) pushes it outward if banding appears.
568    scene_mip_scan_dist: f32,
569    /// Per-face grid side-shades (voxlap setsideshades), packed for the
570    /// scene-DDA uniform: `[0]=(top,bot,left,right)`, `[1]=(up,down,_,_)`.
571    /// Each is the u8 shade intensity. `[[0;4];2]` = no shading. Set via
572    /// [`Self::set_scene_side_shades`].
573    scene_side_shades: [[i32; 4]; 2],
574    /// DL — per-frame dynamic lights (sun + point lights), already
575    /// transformed into each grid's local frame by the facade. Set via
576    /// [`Self::set_scene_lights`]; [`SceneLights::default`] = no lights
577    /// (the pre-DL render). Consumed by `render_scene` each frame.
578    scene_lights: SceneLights,
579    /// Vertical FOV (radians) the last `render_scene` marched with —
580    /// cached so [`Self::pixel_ray`] reconstructs the matching view ray
581    /// for picking. `0` until the first scene render.
582    last_fov_y_rad: f32,
583    /// The acquired-but-not-yet-presented swapchain frame from the most
584    /// recent deferred render ([`Self::render_scene`] /
585    /// [`Self::render_clear_deferred`]). [`Self::present`] shows it as
586    /// is; [`Self::paint_egui`] overlays egui first. Lets a host slot a
587    /// UI pass between the marcher and present. `None` between present
588    /// and the next render.
589    pending_frame: Option<(wgpu::SurfaceTexture, wgpu::TextureView)>,
590    /// Lazy-built debug-line pipeline (L3.2) — built on the first
591    /// [`Self::draw_lines_deferred`] call.
592    line_resources: Option<LineResources>,
593    /// Persistent debug-line vertex buffer (L3.3) — grown on demand and
594    /// reused across frames so a per-frame overlay (hundreds of segments)
595    /// costs one `write_buffer`, not a fresh allocation. `line_vbuf_cap`
596    /// is its capacity in bytes.
597    line_vbuf: Option<wgpu::Buffer>,
598    line_vbuf_cap: u64,
599    /// Lazy-built image-sprite pipeline — built on the first
600    /// [`Self::draw_images_deferred`] call.
601    image_resources: Option<ImageResources>,
602    /// Persistent image-sprite vertex buffer, grown on demand and reused
603    /// across frames (like [`Self::line_vbuf`]).
604    image_vbuf: Option<wgpu::Buffer>,
605    image_vbuf_cap: u64,
606    /// Retained image-sprite textures, indexed by the id
607    /// [`Self::upload_image`] returns. A dropped slot is `None` and is
608    /// re-used by a later upload.
609    images: Vec<Option<ImageResident>>,
610    /// Lazy-built `egui-wgpu` paint pipeline; created on the first
611    /// [`Self::paint_egui`] call (`hud` feature).
612    #[cfg(feature = "hud")]
613    egui_renderer: Option<egui_wgpu::Renderer>,
614}
615
616/// Per-renderer chunk-DDA pipeline state. The compute shader writes
617/// into the storage texture; a fullscreen-triangle render pass
618/// nearest-neighbour blits it to the swapchain.
619struct ChunkDdaResources {
620    storage_size: (u32, u32),
621    storage_view: wgpu::TextureView,
622    uniform_buf: wgpu::Buffer,
623    bgl_dda: wgpu::BindGroupLayout,
624    pipeline_dda: wgpu::ComputePipeline,
625    blit_bg: wgpu::BindGroup,
626    pipeline_blit: wgpu::RenderPipeline,
627    // wgpu BindGroups internally Arc their resources, but we keep
628    // the handle so the sampler shows up in profiler dumps.
629    _sampler: wgpu::Sampler,
630}
631
632struct GridDdaResources {
633    storage_size: (u32, u32),
634    storage_view: wgpu::TextureView,
635    uniform_buf: wgpu::Buffer,
636    bgl_dda: wgpu::BindGroupLayout,
637    pipeline_dda: wgpu::ComputePipeline,
638    blit_bg: wgpu::BindGroup,
639    pipeline_blit: wgpu::RenderPipeline,
640    _sampler: wgpu::Sampler,
641}
642
643struct SceneDdaResources {
644    storage_size: (u32, u32),
645    /// Framebuffer as a packed-`rgba8unorm` storage **buffer** (row
646    /// stride = width), written by the scene + sprite compute passes
647    /// and read by the blit. A buffer (not a storage texture) dodges
648    /// Chrome-Dawn's tiled write-texture layout (which produced a
649    /// 128×256-tiled image); linear + explicit stride is portable.
650    framebuffer: wgpu::Buffer,
651    uniform_buf: wgpu::Buffer,
652    bgl_dda: wgpu::BindGroupLayout,
653    pipeline_dda: wgpu::ComputePipeline,
654    blit_bg: wgpu::BindGroup,
655    pipeline_blit: wgpu::RenderPipeline,
656    /// Blit uniform: `[width, height, flip_x, _pad]`. Retained so the flip
657    /// flag (offset 8) can be re-written per frame.
658    blit_dims: wgpu::Buffer,
659    /// GPU.9 — per-pixel world-t depth (f32 bits as u32), sized
660    /// `width * height * 4`. The scene pass writes it when sprites
661    /// are present; the sprite model-DDA pass reads + composites
662    /// against it.
663    depth_buffer: wgpu::Buffer,
664    /// Picking — a `COPY_DST | MAP_READ` staging copy of `depth_buffer`
665    /// so the host can read back the per-pixel world-t after a frame
666    /// (e.g. click → which voxel). Same size as `depth_buffer`.
667    depth_readback: wgpu::Buffer,
668    /// TV.6 — global voxel-material palette (256 `MaterialGpu`, binding 16),
669    /// seeded from `scene_materials`, rewritten by [`GpuRenderer::set_scene_materials`].
670    materials_pal_buf: wgpu::Buffer,
671    /// TV.6 — terrain colour→material map (`[rgb, material_id]` rows, binding
672    /// 17); ≥1 element (wgpu rejects a zero-sized storage binding).
673    terrain_map_buf: wgpu::Buffer,
674    /// XS.4.3 — placeholder bound at the sprite-cast bindings (19..21) on a
675    /// capable device when no sprite registry exists (or this frame has no
676    /// sprites). `sprite_cast_count == 0` keeps the shader from indexing it.
677    /// `None` on non-capable devices (those bindings aren't in the BGL).
678    sprite_cast_dummy: Option<wgpu::Buffer>,
679}
680
681/// GPU.10.0 — single-sprite model-DDA pipeline: one thread per pixel
682/// marches the model voxel volume and composites against the scene
683/// depth buffer.
684struct SpriteModelDdaResources {
685    bgl: wgpu::BindGroupLayout,
686    pipeline: wgpu::ComputePipeline,
687    uniform_buf: wgpu::Buffer,
688    /// TV — global voxel-material palette (256 `MaterialGpu`, binding 12),
689    /// seeded from the renderer's `sprite_materials` and rewritten by
690    /// [`GpuRenderer::set_sprite_materials`].
691    materials_buf: wgpu::Buffer,
692}
693
694/// Per-frame uniform for the model-DDA pass. Mirrors `Uniform` in
695/// `sprite_model_dda.wgsl` (std140). Per-model + per-instance data
696/// now live in storage buffers; this holds only the camera, fog, and
697/// instance count.
698#[repr(C)]
699#[derive(Clone, Copy, Pod, Zeroable)]
700struct SpriteModelUniform {
701    cam_pos: [f32; 3],
702    _p0: f32,
703    cam_right: [f32; 3],
704    _p1: f32,
705    cam_down: [f32; 3],
706    _p2: f32,
707    cam_forward: [f32; 3],
708    _p3: f32,
709    fog_color: [f32; 4],
710    screen_size: [u32; 2],
711    instance_count: u32,
712    fog_far: f32,
713    fov_y_rad: f32,
714    tiles_x: u32,
715    tile_size: u32,
716    /// TV — 1 if any palette material is translucent: gates the shader's
717    /// accumulate path. 0 ⇒ the unchanged nearest-hit opaque path.
718    has_translucent: u32,
719    // ── DL.4 — dynamic lighting for sprites (world space; all-zero ⇒
720    // unchanged flat-lit sprites). No sprite shadows (deferred). ──
721    /// World-space unit direction TO the sun (xyz; w unused).
722    sun_dir: [f32; 4],
723    /// `rgb` = sun colour, `w` = sun intensity.
724    sun_color: [f32; 4],
725    /// `rgb` = ambient multiplier on the sprite's albedo, `w` unused.
726    ambient_color: [f32; 4],
727    /// bit0 = sun enabled, bit2 = dynamic lighting active (use the lit path).
728    sun_flags: u32,
729    point_light_count: u32,
730    _pad_dl: [u32; 2],
731    // ── DL.6 — stylized sprite lighting (cel + ramp + flat per voxel) ──
732    /// `rgb` = cool unlit end of the sun ramp; `w` unused.
733    shadow_tint: [f32; 4],
734    /// Cel band count; 0 = smooth.
735    style_bands: u32,
736    // ── XS.4.2 — GPU sprite-shadow (receive) params. Mirror the scene pass's
737    // paging + shadow uniform fields so the sprite pass's duplicated terrain
738    // occupancy march reads the exact same ABI. All zero ⇒ no sprite shadows
739    // (the capability fallback / pre-XS.4 path). ──
740    occ_num_pages: u32,
741    occ_page_words: u32,
742    grid_count: u32,
743    max_outer_steps: u32,
744    shadow_max_steps: u32,
745    shadow_bias: f32,
746    shadow_max_dist: f32,
747    /// Fraction of a caster's light removed in shadow (`in_shadow = 1 - this`).
748    shadow_strength: f32,
749    _pad_xs: [u32; 3],
750}
751
752/// GPU.10.3 — sprite screen-tile edge in pixels for instance binning.
753const SPRITE_TILE_SIZE: u32 = 16;
754
755/// One material in the GPU sprite material palette (binding 12). Mirrors
756/// `Mat` in `sprite_model_dda.wgsl` (std430, 8 bytes). TV stage.
757#[repr(C)]
758#[derive(Clone, Copy, Pod, Zeroable)]
759struct MaterialGpu {
760    /// Opacity / additive intensity, normalised to `0..=1`.
761    alpha: f32,
762    /// [`roxlap_formats::material::BlendMode`] discriminant.
763    mode: u32,
764}
765
766/// Convert the global [`MaterialTable`](roxlap_formats::material::MaterialTable)
767/// into the GPU palette + a flag of whether any material is non-opaque (the
768/// shader gate — an all-opaque palette runs the unchanged first-hit path).
769fn material_palette(
770    table: &roxlap_formats::material::MaterialTable,
771) -> (Box<[MaterialGpu; 256]>, bool) {
772    let mut out = Box::new(
773        [MaterialGpu {
774            alpha: 1.0,
775            mode: 0,
776        }; 256],
777    );
778    let mut any_translucent = false;
779    for (id, slot) in out.iter_mut().enumerate() {
780        let m = table.get(id as u8);
781        slot.alpha = f32::from(m.alpha) / 255.0;
782        slot.mode = u32::from(m.mode.as_u8());
783        if !m.is_opaque() {
784            any_translucent = true;
785        }
786    }
787    (out, any_translucent)
788}
789
790// ───────────────────────── DL — dynamic lighting ─────────────────────────
791// Stage DL (GPU-only). The scene-DDA pass gains a runtime sun + point
792// lights + stylized hard shadows. The host passes lights already
793// transformed into each grid's local frame (mirroring the per-grid
794// cameras); the shader works entirely in grid-local space. DL.0 wires the
795// buffers + uniform fields + bindings; the shader receives them but does
796// not yet read them (the hit-site shading lands in DL.1+).
797
798/// Max point lights honoured per frame. Excess are dropped with a warning
799/// (never silently truncated). The per-grid buffer is sized
800/// `grid_count * point_count`.
801pub const MAX_POINT_LIGHTS: usize = 32;
802/// Max simultaneous shadow casters (the sun counts as one). Lights flagged
803/// to cast beyond this are demoted to shadowless with a warning. Enforced
804/// in DL.3 (shadow stage); declared here so the budget is one constant.
805pub const MAX_SHADOW_CASTERS: usize = 4;
806
807/// A point light in a grid's **local** space, as handed to
808/// [`GpuRenderer::set_scene_lights`]. The facade transforms world-space
809/// [`roxlap_render::PointLight`]s into each grid's frame.
810#[derive(Clone, Copy, Debug)]
811pub struct GpuLight {
812    /// Grid-local position (voxel units).
813    pub position: [f32; 3],
814    /// Hard cutoff distance, world/voxel units.
815    pub radius: f32,
816    /// Linear RGB, `0..1`.
817    pub color: [f32; 3],
818    pub intensity: f32,
819    pub casts_shadow: bool,
820}
821
822/// The whole per-frame light environment, already transformed per grid.
823/// `grid_sun_dirs` and `grid_point_lights` are indexed by grid (outer
824/// length == `grid_count`); empty ⇒ that light type is off. Set each frame
825/// via [`GpuRenderer::set_scene_lights`]; [`Default`] = no lights (the
826/// pre-DL render).
827#[derive(Clone, Default)]
828pub struct SceneLights {
829    /// Whether a dynamic-lighting rig is active this frame. `false` (the
830    /// default) ⇒ the shader takes the unchanged baked-only path
831    /// (byte-identical to pre-DL). `true` ⇒ the lit path runs (ambient
832    /// term + sun + point lights), even with no sun/points set, so the
833    /// `ambient` multiplier still applies.
834    pub enabled: bool,
835    /// Per-grid unit direction **to** the sun (grid-local). Empty ⇒ no sun.
836    pub grid_sun_dirs: Vec<[f32; 3]>,
837    pub sun_color: [f32; 3],
838    pub sun_intensity: f32,
839    pub sun_casts_shadow: bool,
840    /// Per-grid point lights (grid-local). Outer len == `grid_count`; the
841    /// inner len (the point count) is the same for every grid.
842    pub grid_point_lights: Vec<Vec<GpuLight>>,
843    /// Multiplier on the baked ambient byte.
844    pub ambient: [f32; 3],
845    pub shadow_strength: f32,
846    pub shadow_bias: f32,
847    pub shadow_max_dist: f32,
848    pub shadow_max_steps: u32,
849    /// DL.4 — **world-space** unit direction to the sun, for the sprite
850    /// pass (sprites render in world space, not grid-local). `[0;3]` ⇒ no
851    /// sun. Empty `grid_sun_dirs` and a zero `world_sun_dir` both mean
852    /// "no sun" for their respective passes.
853    pub world_sun_dir: [f32; 3],
854    /// DL.4 — world-space point lights for the sprite pass (positions in
855    /// world coords; same colour/intensity/radius as the per-grid copies).
856    pub world_points: Vec<GpuLight>,
857    /// DL.6 — stylized cel banding: `0` = smooth, `≥1` = quantize the
858    /// diffuse to `bands + 1` levels + gradient-map the sun key.
859    pub style_bands: u32,
860    /// DL.6 — cool shadow/ambient tint (the stylized ramp's unlit end).
861    pub shadow_tint: [f32; 3],
862}
863
864/// One point light packed for the GPU (binding 18, std430, 48 bytes).
865/// Mirrors `PointLight` in `scene_dda.wgsl`.
866#[repr(C)]
867#[derive(Clone, Copy, Pod, Zeroable)]
868struct GpuPointLight {
869    pos: [f32; 3],
870    radius: f32,
871    color: [f32; 3],
872    intensity: f32,
873    casts_shadow: u32,
874    _pad: [u32; 3],
875}
876
877/// Build the per-grid point-light storage buffer (binding 18), grid-major:
878/// grid `g`'s lights occupy `[g*count .. (g+1)*count]`. Pads to one zeroed
879/// element when empty (wgpu rejects a zero-sized storage binding).
880fn upload_grid_point_lights(device: &wgpu::Device, lights: &[GpuPointLight]) -> wgpu::Buffer {
881    use wgpu::util::DeviceExt;
882    let one = [GpuPointLight::zeroed()];
883    let src: &[GpuPointLight] = if lights.is_empty() { &one } else { lights };
884    device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
885        label: Some("roxlap-gpu scene_dda.grid_point_lights"),
886        contents: bytemuck::cast_slice(src),
887        usage: wgpu::BufferUsages::STORAGE,
888    })
889}
890
891/// DL — pack `lights` for the scene-DDA pass, shared by the surface and
892/// headless paths. Injects each grid's sun direction into
893/// `cam_vec[g].sun_dir` (binding 15), builds the grid-major point-light
894/// buffer (binding 18), and returns `(point_light_buffer, sun_flags,
895/// point_count)`. `sun_flags`: bit0 = sun enabled, bit1 = sun casts shadow,
896/// bit2 = dynamic lighting active. Over-cap point lights are dropped with a
897/// warning (never silently truncated).
898fn pack_scene_lights(
899    device: &wgpu::Device,
900    lights: &SceneLights,
901    grid_count: usize,
902    cam_vec: &mut [SceneDdaPerGridCamera],
903) -> (wgpu::Buffer, u32, u32) {
904    let sun_enabled = !lights.grid_sun_dirs.is_empty();
905    if sun_enabled {
906        for (g, cam) in cam_vec.iter_mut().enumerate() {
907            let d = lights.grid_sun_dirs.get(g).copied().unwrap_or([0.0; 3]);
908            cam.sun_dir = [d[0], d[1], d[2], 0.0];
909        }
910    }
911    // Point-light count per grid (same across grids); capped + warned.
912    let mut point_count = lights
913        .grid_point_lights
914        .first()
915        .map_or(0, std::vec::Vec::len);
916    if point_count > MAX_POINT_LIGHTS {
917        eprintln!(
918            "roxlap-gpu: {point_count} point lights > MAX_POINT_LIGHTS ({MAX_POINT_LIGHTS}); dropping the excess"
919        );
920        point_count = MAX_POINT_LIGHTS;
921    }
922    // MAX_SHADOW_CASTERS cap (locked decision #5): the sun (if it casts) is
923    // the first caster; keep at most MAX_SHADOW_CASTERS shadow casters total
924    // and demote the rest to shadowless — never silently. The point list is
925    // identical across grids (only positions differ), so decide per index
926    // once from the representative (grid-0) row.
927    let mut budget = MAX_SHADOW_CASTERS;
928    if sun_enabled && lights.sun_casts_shadow {
929        budget = budget.saturating_sub(1);
930    }
931    let mut allow_shadow = vec![false; point_count];
932    let mut demoted = 0usize;
933    if let Some(rep) = lights.grid_point_lights.first() {
934        for (i, slot) in allow_shadow.iter_mut().enumerate() {
935            if rep.get(i).is_some_and(|l| l.casts_shadow) {
936                if budget > 0 {
937                    *slot = true;
938                    budget -= 1;
939                } else {
940                    demoted += 1;
941                }
942            }
943        }
944    }
945    if demoted > 0 {
946        eprintln!(
947            "roxlap-gpu: {demoted} shadow-casting point lights > MAX_SHADOW_CASTERS ({MAX_SHADOW_CASTERS}); demoting the excess to shadowless"
948        );
949    }
950    // Grid-major point-light buffer: grid g at [g*count .. (g+1)*count].
951    let mut packed: Vec<GpuPointLight> = Vec::with_capacity(grid_count * point_count);
952    for g in 0..grid_count {
953        let row = lights.grid_point_lights.get(g);
954        for (i, &allow) in allow_shadow.iter().enumerate() {
955            let p = row.and_then(|r| r.get(i));
956            packed.push(p.map_or(GpuPointLight::zeroed(), |l| GpuPointLight {
957                pos: l.position,
958                radius: l.radius,
959                color: l.color,
960                intensity: l.intensity,
961                casts_shadow: u32::from(l.casts_shadow && allow),
962                _pad: [0; 3],
963            }));
964        }
965    }
966    let buf = upload_grid_point_lights(device, &packed);
967    let sun_flags = u32::from(sun_enabled)
968        | (u32::from(sun_enabled && lights.sun_casts_shadow) << 1)
969        | (u32::from(lights.enabled) << 2);
970    (buf, sun_flags, point_count as u32)
971}
972
973/// Build the per-grid camera storage buffer bound at `scene_dda.wgsl`
974/// binding 15 (read-only). One [`SceneDdaPerGridCamera`] per grid; the
975/// shader only indexes `0..grid_count`. An empty scene pads to one
976/// zeroed element (wgpu rejects a zero-sized storage binding). This
977/// replaces the old fixed `[…; 16]` uniform array, so a scene can hold
978/// any number of grids — the only ceiling is the device's storage size.
979fn upload_grid_cameras(device: &wgpu::Device, cams: &[SceneDdaPerGridCamera]) -> wgpu::Buffer {
980    use wgpu::util::DeviceExt;
981    let one = [SceneDdaPerGridCamera::zeroed()];
982    let src: &[SceneDdaPerGridCamera] = if cams.is_empty() { &one } else { cams };
983    device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
984        label: Some("roxlap-gpu scene_dda.grid_cameras"),
985        contents: bytemuck::cast_slice(src),
986        usage: wgpu::BufferUsages::STORAGE,
987    })
988}
989
990// The scene_dda bind group + layout wire occupancy pages 1..=3 at
991// bindings 12..=14 explicitly; keep that in lockstep with the page
992// count. Bump the bindings (here, in the WGSL, and in the bind
993// group) if MAX_OCC_PAGES changes.
994const _: () = assert!(scene::MAX_OCC_PAGES == 4);
995
996#[repr(C)]
997#[derive(Clone, Copy, Pod, Zeroable)]
998struct SceneDdaPerGridCamera {
999    pos: [f32; 3],
1000    _pad0: f32,
1001    right: [f32; 3],
1002    _pad1: f32,
1003    down: [f32; 3],
1004    _pad2: f32,
1005    forward: [f32; 3],
1006    _pad3: f32,
1007    /// DL — unit direction TO the sun in this grid's local frame (xyz; w
1008    /// unused). Packed here rather than a separate per-grid storage buffer
1009    /// because the device's `max_storage_buffers_per_shader_stage` (16) is
1010    /// already saturated. Zero ⇒ no sun (the uniform's `sun_flags` gates).
1011    sun_dir: [f32; 4],
1012    /// XS.3 — this grid's world transform, for cross-grid shadows: a shadow
1013    /// ray (grid-local in the grid being shaded) is lifted to world space and
1014    /// tested against every grid. `world_origin` (xyz) is the grid origin;
1015    /// `rot0/1/2` (xyz) are the local→world rotation columns (world images of
1016    /// grid-local axes x/y/z). Packed here for the same buffer-limit reason.
1017    world_origin: [f32; 4],
1018    rot0: [f32; 4],
1019    rot1: [f32; 4],
1020    rot2: [f32; 4],
1021}
1022
1023impl SceneDdaPerGridCamera {
1024    fn from_camera(c: &Camera) -> Self {
1025        Self {
1026            pos: c.position,
1027            _pad0: 0.0,
1028            right: c.right,
1029            _pad1: 0.0,
1030            down: c.down,
1031            _pad2: 0.0,
1032            forward: c.forward,
1033            _pad3: 0.0,
1034            sun_dir: [0.0; 4],
1035            // Identity world transform by default; the per-grid build
1036            // (`grid_cameras`) overwrites it with the grid's real transform.
1037            world_origin: [0.0; 4],
1038            rot0: [1.0, 0.0, 0.0, 0.0],
1039            rot1: [0.0, 1.0, 0.0, 0.0],
1040            rot2: [0.0, 0.0, 1.0, 0.0],
1041        }
1042    }
1043
1044    /// XS.3 — stamp this grid's world transform (for cross-grid shadows).
1045    /// `rot_cols[i]` is the world image of grid-local axis `i` (the
1046    /// local→world rotation's columns).
1047    fn set_world_transform(&mut self, t: &GridWorldTransform) {
1048        self.world_origin = [t.origin[0], t.origin[1], t.origin[2], 0.0];
1049        self.rot0 = [t.rot_cols[0][0], t.rot_cols[0][1], t.rot_cols[0][2], 0.0];
1050        self.rot1 = [t.rot_cols[1][0], t.rot_cols[1][1], t.rot_cols[1][2], 0.0];
1051        self.rot2 = [t.rot_cols[2][0], t.rot_cols[2][1], t.rot_cols[2][2], 0.0];
1052    }
1053}
1054
1055/// XS.3 — a grid's world transform for cross-grid shadows: world origin +
1056/// the local→world rotation columns (`rot_cols[i]` = world image of grid-local
1057/// axis `i`). Built host-side per frame from the grid's `GridTransform` and
1058/// handed to [`SceneRenderer::render_scene`] alongside the per-grid cameras.
1059#[derive(Clone, Copy)]
1060pub struct GridWorldTransform {
1061    pub origin: [f32; 3],
1062    pub rot_cols: [[f32; 3]; 3],
1063}
1064
1065impl Default for GridWorldTransform {
1066    fn default() -> Self {
1067        Self {
1068            origin: [0.0; 3],
1069            rot_cols: [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]],
1070        }
1071    }
1072}
1073
1074#[repr(C)]
1075#[derive(Clone, Copy, Pod, Zeroable)]
1076struct SceneDdaUniform {
1077    fov_y_rad: f32,
1078    grid_count: u32,
1079    max_outer_steps: u32,
1080    _pad0: u32,
1081    screen_size: [u32; 2],
1082    _pad1: [u32; 2],
1083    /// GPU.8 — `[r, g, b, fog_near]`. The `near` distance is packed
1084    /// into the colour's alpha channel to keep std140 alignment
1085    /// tidy (a bare `f32` after the `vec4` would force extra pads).
1086    fog_color: [f32; 4],
1087    fog_far: f32,
1088    /// GPU.9 — `1` when the sprite pass is active (scene pass then
1089    /// records `best_t` into the depth buffer), `0` otherwise.
1090    write_depth: u32,
1091    /// Occupancy paging: words per storage page (see
1092    /// `scene::split_occupancy_pages`). Only consulted by the shader
1093    /// when `occ_num_pages > 1`.
1094    occ_page_words: u32,
1095    /// Number of real occupancy pages (1 on multi-GiB GPUs → the
1096    /// shader takes a branch-free single-page read).
1097    occ_num_pages: u32,
1098    /// GPU.11.1 — scene-grid LOD scan distance (world units). A chunk
1099    /// entered at world-t `t` marches at mip
1100    /// `floor(log2(max(t, msd) / msd))`, clamped to the grid's mip
1101    /// count. `0` disables LOD (always mip-0).
1102    mip_scan_dist: f32,
1103    /// TV.6 — `1` if any terrain material is translucent (gates the
1104    /// accumulate path; `0` ⇒ unchanged opaque first-hit march).
1105    terrain_has_translucent: u32,
1106    /// TV.6 — number of `(rgb, material_id)` entries in the terrain map.
1107    terrain_map_count: u32,
1108    _pad4: u32,
1109    /// World camera used only to derive the per-pixel sky direction —
1110    /// always valid, so a `grid_count == 0` (sprite-only / empty) scene
1111    /// still paints a proper sky instead of a degenerate `(0,0,1)`
1112    /// (whose `atan2(0,0)` sky lookup samples black).
1113    sky_cam: SceneDdaPerGridCamera,
1114    /// Per-face side-shade intensities (voxlap setsideshades), each the
1115    /// u8 shade subtracted from a voxel's brightness byte at a hit.
1116    /// `side_shades0 = (top, bot, left, right)`,
1117    /// `side_shades1 = (up, down, _, _)`. All-zero = no shading.
1118    side_shades0: [i32; 4],
1119    side_shades1: [i32; 4],
1120    // ── DL — dynamic lighting (appended; all-zero ⇒ pre-DL render) ──
1121    /// `rgb` = sun colour, `w` = sun intensity.
1122    sun_color: [f32; 4],
1123    /// `rgb` = ambient multiplier on the baked byte, `w` = shadow strength.
1124    ambient_color: [f32; 4],
1125    /// Bit 0 = sun enabled, bit 1 = sun casts shadow.
1126    sun_flags: u32,
1127    /// Number of point lights per grid (rows in the binding-18 buffer).
1128    point_light_count: u32,
1129    /// Shadow-ray step budget (DL.3).
1130    shadow_max_steps: u32,
1131    _pad5: u32,
1132    /// Shadow-ray origin bias along the surface normal (voxel units).
1133    shadow_bias: f32,
1134    /// Sun shadow-ray length cap (world units).
1135    shadow_max_dist: f32,
1136    _pad6: [f32; 2],
1137    /// DL.6 — stylized ramp's cool shadow tint (rgb; w unused).
1138    shadow_tint: [f32; 4],
1139    /// DL.6 — cel band count; 0 = smooth (no banding / gradient map).
1140    style_bands: u32,
1141    /// XS.4.3 — visible sprite-instance count for the scene pass's
1142    /// sprite-cast shadow march (sprites cast onto terrain). `0` ⇒ no sprite
1143    /// casters (the loop is skipped); only consulted by the capable variant.
1144    sprite_cast_count: u32,
1145    _pad7: [u32; 2],
1146}
1147
1148#[repr(C)]
1149#[derive(Clone, Copy, Pod, Zeroable)]
1150struct GridDdaUniform {
1151    camera_pos: [f32; 3],
1152    _pad0: f32,
1153    camera_right: [f32; 3],
1154    _pad1: f32,
1155    camera_down: [f32; 3],
1156    _pad2: f32,
1157    camera_forward: [f32; 3],
1158    fov_y_rad: f32,
1159    screen_size: [u32; 2],
1160    vsid: u32,
1161    max_outer_steps: u32,
1162    chunks_dims: [u32; 3],
1163    _pad3: u32,
1164    origin_chunk: [i32; 3],
1165    _pad4: u32,
1166}
1167
1168#[repr(C)]
1169#[derive(Clone, Copy, Pod, Zeroable)]
1170struct ChunkDdaUniform {
1171    camera_pos: [f32; 3],
1172    _pad0: f32,
1173    camera_right: [f32; 3],
1174    _pad1: f32,
1175    camera_down: [f32; 3],
1176    _pad2: f32,
1177    camera_forward: [f32; 3],
1178    fov_y_rad: f32,
1179    screen_size: [u32; 2],
1180    vsid: u32,
1181    max_scan_dist: u32,
1182}
1183
1184impl GpuRenderer {
1185    /// Stand up the device + surface + swapchain on `window`. Async
1186    /// because `wgpu::Adapter`/`Device` requests are.
1187    ///
1188    /// `window` is any [`raw-window-handle`] provider (winit, SDL,
1189    /// GLFW, …) wrapped in an `Arc`; `size` is its initial physical
1190    /// framebuffer size in pixels — passed explicitly so the renderer
1191    /// stays decoupled from any one windowing library's size API.
1192    ///
1193    /// [`raw-window-handle`]: raw_window_handle
1194    ///
1195    /// # Errors
1196    /// Returns [`GpuInitError`] if surface creation, adapter
1197    /// selection, or device request fails. Hosts treat any error as
1198    /// "fall back to the CPU path".
1199    pub async fn new<W>(
1200        window: Arc<W>,
1201        size: (u32, u32),
1202        settings: GpuRendererSettings,
1203    ) -> Result<Self, GpuInitError>
1204    where
1205        W: HasWindowHandle + HasDisplayHandle + Send + Sync + 'static,
1206    {
1207        let instance = wgpu::Instance::new(wgpu::InstanceDescriptor::new_without_display_handle());
1208        let surface = instance.create_surface(window.clone())?;
1209        let adapter = Self::request_adapter(&instance, Some(&surface), settings).await?;
1210        let (device, queue) = Self::request_device(&adapter).await?;
1211        Ok(Self::finish_init(
1212            &adapter, device, queue, surface, size, settings,
1213        ))
1214    }
1215
1216    /// wasm/WebGPU: build the renderer against an HTML `canvas`. No
1217    /// `Send + Sync` bound — wgpu's surface/device/queue are `!Send` on
1218    /// the `+atomics` shared-memory wasm build, and the browser host is
1219    /// single-threaded (`Rc<RefCell<…>>`). The native generic-`W` entry
1220    /// (which carries the bound) isn't reachable on wasm.
1221    ///
1222    /// Probes for an adapter **before** `create_surface`: on wasm,
1223    /// creating the surface calls `canvas.getContext("webgpu")`, which
1224    /// permanently locks the canvas's context type. If we bound it and
1225    /// then found no adapter, a CPU/WebGL2 fallback on the *same* canvas
1226    /// (the facade clones the handle, but it's the same DOM element)
1227    /// would fail with "no webgl2 context". Probing first leaves the
1228    /// canvas pristine when WebGPU is unavailable.
1229    ///
1230    /// # Errors
1231    /// See [`Self::new`].
1232    #[cfg(target_arch = "wasm32")]
1233    pub async fn new_from_canvas(
1234        canvas: web_sys::HtmlCanvasElement,
1235        size: (u32, u32),
1236        settings: GpuRendererSettings,
1237    ) -> Result<Self, GpuInitError> {
1238        let instance = wgpu::Instance::new(wgpu::InstanceDescriptor::new_without_display_handle());
1239        // Probe adapter AND device before binding the canvas — both
1240        // `requestAdapter` and `requestDevice` can fail on wasm, and
1241        // `create_surface` permanently locks the canvas to a WebGPU
1242        // context. Creating the surface last keeps the canvas pristine
1243        // for the CPU/WebGL2 fallback on any GPU-init failure.
1244        let adapter = Self::request_adapter(&instance, None, settings).await?;
1245        let (device, queue) = Self::request_device(&adapter).await?;
1246        let surface = instance.create_surface(wgpu::SurfaceTarget::Canvas(canvas))?;
1247        Ok(Self::finish_init(
1248            &adapter, device, queue, surface, size, settings,
1249        ))
1250    }
1251
1252    /// Pick a GPU adapter at the settings' power preference. `None`
1253    /// `compatible_surface` is used on the wasm canvas path so the probe
1254    /// doesn't bind the canvas's context (see [`Self::new_from_canvas`]);
1255    /// WebGPU exposes a single surface-independent adapter, so this is
1256    /// safe there.
1257    async fn request_adapter(
1258        instance: &wgpu::Instance,
1259        compatible_surface: Option<&wgpu::Surface<'static>>,
1260        settings: GpuRendererSettings,
1261    ) -> Result<wgpu::Adapter, GpuInitError> {
1262        let power_preference = match settings.power_preference {
1263            PowerPreference::Low => wgpu::PowerPreference::LowPower,
1264            PowerPreference::High => wgpu::PowerPreference::HighPerformance,
1265        };
1266        instance
1267            .request_adapter(&wgpu::RequestAdapterOptions {
1268                power_preference,
1269                compatible_surface,
1270                force_fallback_adapter: false,
1271            })
1272            .await
1273            .map_err(|_| GpuInitError::NoAdapter)
1274    }
1275
1276    /// Request the device + queue from `adapter`. Pulled out of
1277    /// [`Self::finish_init`] so the wasm canvas path can validate the
1278    /// device **before** `create_surface` binds the canvas's WebGPU
1279    /// context — if the device request fails (e.g. a browser that
1280    /// rejects a wgpu-sent limit), the canvas stays pristine for the
1281    /// CPU/WebGL2 fallback instead of being poisoned.
1282    async fn request_device(
1283        adapter: &wgpu::Adapter,
1284    ) -> Result<(wgpu::Device, wgpu::Queue), GpuInitError> {
1285        Ok(adapter
1286            .request_device(&wgpu::DeviceDescriptor {
1287                label: Some("roxlap-gpu device"),
1288                required_features: wgpu::Features::empty(),
1289                required_limits: pick_required_limits(&adapter.limits()),
1290                experimental_features: wgpu::ExperimentalFeatures::disabled(),
1291                memory_hints: wgpu::MemoryHints::default(),
1292                trace: wgpu::Trace::Off,
1293            })
1294            .await?)
1295    }
1296
1297    /// Shared swapchain → sky/sampler setup, run after the adapter +
1298    /// device + surface exist (the surface comes from a window handle on
1299    /// native, or an HTML canvas on wasm — created last on wasm so a
1300    /// failed device request never touches the canvas).
1301    fn finish_init(
1302        adapter: &wgpu::Adapter,
1303        device: wgpu::Device,
1304        queue: wgpu::Queue,
1305        surface: wgpu::Surface<'static>,
1306        size: (u32, u32),
1307        settings: GpuRendererSettings,
1308    ) -> Self {
1309        let info = adapter.get_info();
1310        let adapter_info = format!(
1311            "{name} ({backend:?}, {device_type:?})",
1312            name = info.name,
1313            backend = info.backend,
1314            device_type = info.device_type,
1315        );
1316
1317        let caps = surface.get_capabilities(adapter);
1318        // Pick a NON-sRGB, 8-bit swapchain format. Voxlap colours are
1319        // already sRGB-encoded (the slab bytes are display-ready,
1320        // matching what the CPU softbuffer path writes straight to the
1321        // framebuffer with no conversion); an sRGB swapchain would
1322        // re-apply the gamma curve, washing the look out. We also
1323        // *prefer 8-bit BGRA/RGBA* over any other non-sRGB format: some
1324        // adapters (e.g. NVK) advertise a 16-bit-unorm format first,
1325        // and wgpu 29 gates `create_view` on 16-bit-norm formats behind
1326        // the `TEXTURE_FORMAT_16BIT_NORM` device feature (which we don't
1327        // enable, to stay WebGPU-portable). Falls back to the first
1328        // non-sRGB format, then `caps.formats[0]`.
1329        let surface_format = caps
1330            .formats
1331            .iter()
1332            .copied()
1333            .find(|f| {
1334                matches!(
1335                    f,
1336                    wgpu::TextureFormat::Bgra8Unorm | wgpu::TextureFormat::Rgba8Unorm
1337                )
1338            })
1339            .or_else(|| caps.formats.iter().copied().find(|f| !f.is_srgb()))
1340            .unwrap_or(caps.formats[0]);
1341        let present_mode = if settings.uncapped_present {
1342            pick_present_mode(&caps.present_modes)
1343        } else {
1344            wgpu::PresentMode::Fifo
1345        };
1346        // GPU.11.2 — surface the present mode: `Fifo` is vsync-capped
1347        // (FPS pinned to refresh rate → compute optimisations like the
1348        // mip LOD won't show up in the FPS counter). Mailbox/Immediate
1349        // are uncapped. Wayland under Mesa frequently offers only Fifo.
1350        eprintln!(
1351            "roxlap-gpu: present mode = {present_mode:?} (available: {:?})",
1352            caps.present_modes,
1353        );
1354        let (init_w, init_h) = size;
1355        let surface_config = wgpu::SurfaceConfiguration {
1356            usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
1357            format: surface_format,
1358            width: init_w.max(1),
1359            height: init_h.max(1),
1360            present_mode,
1361            alpha_mode: caps.alpha_modes[0],
1362            view_formats: vec![],
1363            desired_maximum_frame_latency: 2,
1364        };
1365        surface.configure(&device, &surface_config);
1366
1367        // GPU.8 default sky: a 1×1 mid-grey texture. Hosts replace
1368        // it via `set_sky_panorama` with a real equirectangular
1369        // panorama; the default stops the shader sampling
1370        // uninitialised memory before that happens.
1371        let default_sky_pixel = [0x80u8, 0x80, 0x80, 0xff];
1372        let (sky_texture, sky_view) = create_sky_texture(&device, 1, 1, &default_sky_pixel);
1373        queue.write_texture(
1374            wgpu::TexelCopyTextureInfo {
1375                texture: &sky_texture,
1376                mip_level: 0,
1377                origin: wgpu::Origin3d::ZERO,
1378                aspect: wgpu::TextureAspect::All,
1379            },
1380            &default_sky_pixel,
1381            wgpu::TexelCopyBufferLayout {
1382                offset: 0,
1383                bytes_per_row: Some(4),
1384                rows_per_image: Some(1),
1385            },
1386            wgpu::Extent3d {
1387                width: 1,
1388                height: 1,
1389                depth_or_array_layers: 1,
1390            },
1391        );
1392        let sky_sampler = device.create_sampler(&wgpu::SamplerDescriptor {
1393            label: Some("roxlap-gpu sky_sampler"),
1394            // Voxlap-convention panorama: u = elevation [0, 1]
1395            // (Repeat is a no-op since values don't go outside),
1396            // v = azimuth (wraps 360° — Repeat is required).
1397            address_mode_u: wgpu::AddressMode::Repeat,
1398            address_mode_v: wgpu::AddressMode::Repeat,
1399            address_mode_w: wgpu::AddressMode::ClampToEdge,
1400            mag_filter: wgpu::FilterMode::Linear,
1401            min_filter: wgpu::FilterMode::Linear,
1402            mipmap_filter: wgpu::MipmapFilterMode::Nearest,
1403            ..Default::default()
1404        });
1405
1406        // XS.4 — did the device grant enough storage buffers per stage for the
1407        // GPU sprite-shadow cross-pass bindings? If not, sprites render
1408        // unshadowed (the CPU backend still has full sprite shadows).
1409        let sprite_shadows_capable = device.limits().max_storage_buffers_per_shader_stage
1410            >= SPRITE_SHADOW_MIN_STORAGE_BUFFERS;
1411
1412        Self {
1413            surface,
1414            surface_config,
1415            device,
1416            queue,
1417            adapter_info,
1418            clear_colour: settings.clear_colour,
1419            frame_count: 0,
1420            flip_x: false,
1421            chunk_dda: None,
1422            grid_dda: None,
1423            scene_dda: None,
1424            scene_materials: Box::new(
1425                [MaterialGpu {
1426                    alpha: 1.0,
1427                    mode: 0,
1428                }; 256],
1429            ),
1430            scene_terrain_map: Vec::new(),
1431            scene_terrain_translucent: false,
1432            scene_depth_valid: false,
1433            sky_texture,
1434            sky_view,
1435            sky_sampler,
1436            // Fog disabled by default — voxlap's CPU rasterizer
1437            // also runs without fog in the scene-demo, so matching
1438            // it means no GPU fog out of the box. Hosts can opt in
1439            // via `set_fog` (e.g. for atmospheric far-LOD masking).
1440            fog_color: [0.66, 0.74, 0.88],
1441            fog_near: 0.0,
1442            fog_far: 1.0e30,
1443            sprite_registry: None,
1444            sprite_model_dda: None,
1445            sprite_shadows_capable,
1446            sprite_materials: Box::new(
1447                [MaterialGpu {
1448                    alpha: 1.0,
1449                    mode: 0,
1450                }; 256],
1451            ),
1452            sprite_has_translucent: false,
1453            // GPU.10.4 — default LOD threshold: step to a coarser mip
1454            // once a voxel projects below 4 px. Empirically the best
1455            // quality/cost tradeoff; the host can override.
1456            sprite_lod_px: 4.0,
1457            // GPU.11.1 — matches the CPU demo's mip_scan_dist=64.
1458            scene_mip_scan_dist: 64.0,
1459            scene_side_shades: [[0; 4]; 2],
1460            scene_lights: SceneLights::default(),
1461            last_fov_y_rad: 0.0,
1462            pending_frame: None,
1463            line_resources: None,
1464            line_vbuf: None,
1465            line_vbuf_cap: 0,
1466            image_resources: None,
1467            image_vbuf: None,
1468            image_vbuf_cap: 0,
1469            images: Vec::new(),
1470            #[cfg(feature = "hud")]
1471            egui_renderer: None,
1472        }
1473    }
1474
1475    /// Synchronous wrapper for hosts that don't have an async
1476    /// runtime. Internally `pollster::block_on`s [`Self::new`].
1477    ///
1478    /// # Errors
1479    /// See [`Self::new`].
1480    #[cfg(not(target_arch = "wasm32"))]
1481    pub fn new_blocking<W>(
1482        window: Arc<W>,
1483        size: (u32, u32),
1484        settings: GpuRendererSettings,
1485    ) -> Result<Self, GpuInitError>
1486    where
1487        W: HasWindowHandle + HasDisplayHandle + Send + Sync + 'static,
1488    {
1489        pollster::block_on(Self::new(window, size, settings))
1490    }
1491
1492    /// Human-readable adapter description — name + backend +
1493    /// device type. The demo host prints this in the title bar.
1494    pub fn adapter_info(&self) -> &str {
1495        &self.adapter_info
1496    }
1497
1498    /// Borrow the underlying wgpu device — hosts use this to build
1499    /// chunk uploads (`GpuChunkResident::upload(gpu.device(), …)`).
1500    pub fn device(&self) -> &wgpu::Device {
1501        &self.device
1502    }
1503
1504    /// XS.4 — whether this device can run GPU sprite shadows (it granted
1505    /// enough storage buffers per shader stage for the cross-pass occupancy
1506    /// bindings). `false` ⇒ GPU sprites render unshadowed; the CPU backend
1507    /// always has sprite shadows. Lets the facade/host report the fallback.
1508    #[must_use]
1509    pub fn sprite_shadows_capable(&self) -> bool {
1510        self.sprite_shadows_capable
1511    }
1512
1513    /// Borrow the wgpu queue — hosts use this for read-back paths
1514    /// (`GpuChunkResident::read_voxel_blocking(gpu.device(), gpu.queue(), …)`).
1515    pub fn queue(&self) -> &wgpu::Queue {
1516        &self.queue
1517    }
1518
1519    /// GPU.8 — upload an equirectangular panorama as the scene's
1520    /// sky texture. `rgba` is row-major, `width × height` pixels,
1521    /// 4 bytes per pixel (R, G, B, A). The shader samples it with
1522    /// `u = atan2(dir.x, dir.y) / (2π) + 0.5` (azimuth) and
1523    /// `v = acos(-dir.z) / π` (elevation), matching standard
1524    /// equirectangular layout (top of image = zenith for voxlap's
1525    /// `+z = down` basis).
1526    /// Mirror the marched scene (and its line/image overlays) horizontally
1527    /// on present, leaving the egui overlay upright. See [`Self::flip_x`].
1528    pub fn set_flip_x(&mut self, flip: bool) {
1529        self.flip_x = flip;
1530    }
1531
1532    ///
1533    /// # Panics
1534    /// If `rgba.len() != (width * height * 4) as usize`.
1535    pub fn set_sky_panorama(&mut self, rgba: &[u8], width: u32, height: u32) {
1536        assert_eq!(
1537            rgba.len(),
1538            (width as usize) * (height as usize) * 4,
1539            "set_sky_panorama: expected w*h*4 bytes, got {}",
1540            rgba.len(),
1541        );
1542        let (tex, view) = create_sky_texture(&self.device, width, height, rgba);
1543        // Upload pixel data via `queue.write_texture` so we don't
1544        // have to map the buffer manually.
1545        self.queue.write_texture(
1546            wgpu::TexelCopyTextureInfo {
1547                texture: &tex,
1548                mip_level: 0,
1549                origin: wgpu::Origin3d::ZERO,
1550                aspect: wgpu::TextureAspect::All,
1551            },
1552            rgba,
1553            wgpu::TexelCopyBufferLayout {
1554                offset: 0,
1555                bytes_per_row: Some(width * 4),
1556                rows_per_image: Some(height),
1557            },
1558            wgpu::Extent3d {
1559                width,
1560                height,
1561                depth_or_array_layers: 1,
1562            },
1563        );
1564        self.sky_texture = tex;
1565        self.sky_view = view;
1566    }
1567
1568    /// GPU.8 — set the fog blend. `color` is per-channel [0, 1];
1569    /// `near`/`far` are world-space ray distances in voxel units.
1570    /// Hits with `t < near` show their full colour; hits with
1571    /// `t > far` show `color` exclusively; in between is a
1572    /// smoothstep blend.
1573    pub fn set_fog(&mut self, color: [f32; 3], near: f32, far: f32) {
1574        self.fog_color = color;
1575        self.fog_near = near;
1576        self.fog_far = far.max(near + 1.0);
1577    }
1578
1579    /// Re-configure the swapchain to a new physical size. Call from
1580    /// `WindowEvent::Resized`. Drops the chunk-DDA storage texture
1581    /// so [`Self::render_chunk`] rebuilds it at the new size.
1582    pub fn resize(&mut self, width: u32, height: u32) {
1583        if width == 0 || height == 0 {
1584            return;
1585        }
1586        self.surface_config.width = width;
1587        self.surface_config.height = height;
1588        self.surface.configure(&self.device, &self.surface_config);
1589        self.chunk_dda = None;
1590        self.grid_dda = None;
1591        self.scene_dda = None;
1592    }
1593
1594    /// Acquire the next swapchain frame, or `None` to skip this frame.
1595    /// wgpu 29's `get_current_texture` returns a
1596    /// [`wgpu::CurrentSurfaceTexture`] status enum (was
1597    /// `Result<_, SurfaceError>`): an outdated/lost surface reconfigures
1598    /// and skips, transient statuses just skip.
1599    fn acquire_frame(&self) -> Option<wgpu::SurfaceTexture> {
1600        use wgpu::CurrentSurfaceTexture as C;
1601        match self.surface.get_current_texture() {
1602            C::Success(t) | C::Suboptimal(t) => Some(t),
1603            C::Outdated | C::Lost => {
1604                self.surface.configure(&self.device, &self.surface_config);
1605                None
1606            }
1607            C::Timeout | C::Occluded | C::Validation => None,
1608        }
1609    }
1610
1611    /// GPU.1 render: single render pass clearing the swapchain to a
1612    /// slowly drifting colour, then presenting. Voxels arrive in
1613    /// GPU.3+.
1614    pub fn render(&mut self) {
1615        let Some(surf_tex) = self.acquire_frame() else {
1616            return;
1617        };
1618        let view = surf_tex
1619            .texture
1620            .create_view(&wgpu::TextureViewDescriptor::default());
1621
1622        // Slow colour drift so the user can tell the GPU path is
1623        // actually presenting frames vs. e.g. a frozen window.
1624        // Wrap at 2π/0.005 frames (~1257) so the cast stays exact.
1625        let phase = f64::from(self.frame_count % 1257) * 0.005;
1626        let [r, g, b] = self.clear_colour;
1627        let drift = (phase.sin() * 0.04 + 0.04).clamp(0.0, 0.1);
1628        let clear = wgpu::Color {
1629            r: (r + drift).clamp(0.0, 1.0),
1630            g: (g + drift * 0.5).clamp(0.0, 1.0),
1631            b: (b + drift * 0.25).clamp(0.0, 1.0),
1632            a: 1.0,
1633        };
1634
1635        let mut encoder = self
1636            .device
1637            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
1638                label: Some("roxlap-gpu encoder"),
1639            });
1640        {
1641            let _rp = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
1642                label: Some("roxlap-gpu clear"),
1643                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
1644                    view: &view,
1645                    depth_slice: None,
1646                    resolve_target: None,
1647                    ops: wgpu::Operations {
1648                        load: wgpu::LoadOp::Clear(clear),
1649                        store: wgpu::StoreOp::Store,
1650                    },
1651                })],
1652                depth_stencil_attachment: None,
1653                timestamp_writes: None,
1654                occlusion_query_set: None,
1655                multiview_mask: None,
1656            });
1657        }
1658        self.queue.submit(std::iter::once(encoder.finish()));
1659        surf_tex.present();
1660        self.frame_count = self.frame_count.wrapping_add(1);
1661    }
1662
1663    /// GPU.3 single-chunk render. Dispatches `chunk_dda.wgsl`
1664    /// against `resident`'s storage buffers, then blits the
1665    /// low-res storage texture to the swapchain. `camera.position`
1666    /// is in **chunk-local** voxel units (host translates from
1667    /// world coords). `max_scan_dist` caps the per-pixel DDA loop —
1668    /// scene-demo wires `+` / `-` through this each frame.
1669    ///
1670    /// # Panics
1671    /// Internally `expect`s the chunk-DDA resources to be built —
1672    /// they are constructed at the top of this function if missing.
1673    /// Cannot fire in normal control flow.
1674    pub fn render_chunk(
1675        &mut self,
1676        resident: &GpuChunkResident,
1677        camera: &Camera,
1678        max_scan_dist: u32,
1679    ) {
1680        let Some(surf_tex) = self.acquire_frame() else {
1681            return;
1682        };
1683        let surf_view = surf_tex
1684            .texture
1685            .create_view(&wgpu::TextureViewDescriptor::default());
1686
1687        let surface_w = self.surface_config.width;
1688        let surface_h = self.surface_config.height;
1689        let surface_format = self.surface_config.format;
1690
1691        // Lazy-build chunk-DDA resources; rebuild when the swapchain
1692        // grew or shrank.
1693        let needs_build = match &self.chunk_dda {
1694            Some(r) => r.storage_size != (surface_w, surface_h),
1695            None => true,
1696        };
1697        if needs_build {
1698            self.chunk_dda = Some(self.build_chunk_dda(surface_w, surface_h, surface_format));
1699        }
1700        let dda = self.chunk_dda.as_ref().expect("just built");
1701
1702        // Update uniforms.
1703        let uniform = ChunkDdaUniform {
1704            camera_pos: camera.position,
1705            _pad0: 0.0,
1706            camera_right: camera.right,
1707            _pad1: 0.0,
1708            camera_down: camera.down,
1709            _pad2: 0.0,
1710            camera_forward: camera.forward,
1711            fov_y_rad: camera.fov_y_rad,
1712            screen_size: [surface_w, surface_h],
1713            vsid: resident.vsid,
1714            max_scan_dist,
1715        };
1716        self.queue
1717            .write_buffer(&dda.uniform_buf, 0, bytemuck::bytes_of(&uniform));
1718
1719        // Per-frame DDA bind group — references the chunk's buffers
1720        // so we rebuild every frame (the resident can change between
1721        // calls).
1722        let dda_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1723            label: Some("roxlap-gpu chunk_dda.bg"),
1724            layout: &dda.bgl_dda,
1725            entries: &[
1726                wgpu::BindGroupEntry {
1727                    binding: 0,
1728                    resource: dda.uniform_buf.as_entire_binding(),
1729                },
1730                wgpu::BindGroupEntry {
1731                    binding: 1,
1732                    resource: resident.occupancy.as_entire_binding(),
1733                },
1734                wgpu::BindGroupEntry {
1735                    binding: 2,
1736                    resource: resident.color_offsets.as_entire_binding(),
1737                },
1738                wgpu::BindGroupEntry {
1739                    binding: 3,
1740                    resource: resident.colors.as_entire_binding(),
1741                },
1742                wgpu::BindGroupEntry {
1743                    binding: 4,
1744                    resource: wgpu::BindingResource::TextureView(&dda.storage_view),
1745                },
1746            ],
1747        });
1748
1749        let mut encoder = self
1750            .device
1751            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
1752                label: Some("roxlap-gpu chunk encoder"),
1753            });
1754        {
1755            let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
1756                label: Some("roxlap-gpu chunk_dda compute"),
1757                timestamp_writes: None,
1758            });
1759            cpass.set_pipeline(&dda.pipeline_dda);
1760            cpass.set_bind_group(0, &dda_bg, &[]);
1761            cpass.dispatch_workgroups(surface_w.div_ceil(8), surface_h.div_ceil(8), 1);
1762        }
1763        {
1764            let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
1765                label: Some("roxlap-gpu chunk_dda blit"),
1766                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
1767                    view: &surf_view,
1768                    depth_slice: None,
1769                    resolve_target: None,
1770                    ops: wgpu::Operations {
1771                        load: wgpu::LoadOp::Clear(wgpu::Color::BLACK),
1772                        store: wgpu::StoreOp::Store,
1773                    },
1774                })],
1775                depth_stencil_attachment: None,
1776                timestamp_writes: None,
1777                occlusion_query_set: None,
1778                multiview_mask: None,
1779            });
1780            rpass.set_pipeline(&dda.pipeline_blit);
1781            rpass.set_bind_group(0, &dda.blit_bg, &[]);
1782            rpass.draw(0..3, 0..1);
1783        }
1784        self.queue.submit(std::iter::once(encoder.finish()));
1785        surf_tex.present();
1786        self.frame_count = self.frame_count.wrapping_add(1);
1787    }
1788
1789    fn build_chunk_dda(
1790        &self,
1791        width: u32,
1792        height: u32,
1793        surface_format: wgpu::TextureFormat,
1794    ) -> ChunkDdaResources {
1795        let storage_tex = self.device.create_texture(&wgpu::TextureDescriptor {
1796            label: Some("roxlap-gpu chunk_dda.storage"),
1797            size: wgpu::Extent3d {
1798                width,
1799                height,
1800                depth_or_array_layers: 1,
1801            },
1802            mip_level_count: 1,
1803            sample_count: 1,
1804            dimension: wgpu::TextureDimension::D2,
1805            format: wgpu::TextureFormat::Rgba8Unorm,
1806            usage: wgpu::TextureUsages::STORAGE_BINDING | wgpu::TextureUsages::TEXTURE_BINDING,
1807            view_formats: &[],
1808        });
1809        let storage_view = storage_tex.create_view(&wgpu::TextureViewDescriptor::default());
1810
1811        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
1812            label: Some("roxlap-gpu chunk_dda.uniform"),
1813            size: std::mem::size_of::<ChunkDdaUniform>() as u64,
1814            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
1815            mapped_at_creation: false,
1816        });
1817
1818        let dda_shader = self
1819            .device
1820            .create_shader_module(wgpu::ShaderModuleDescriptor {
1821                label: Some("chunk_dda.wgsl"),
1822                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/chunk_dda.wgsl").into()),
1823            });
1824        let bgl_dda = self
1825            .device
1826            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
1827                label: Some("roxlap-gpu chunk_dda.bgl"),
1828                entries: &[
1829                    bgl_uniform_entry(0),
1830                    bgl_storage_entry(1, true),
1831                    bgl_storage_entry(2, true),
1832                    bgl_storage_entry(3, true),
1833                    wgpu::BindGroupLayoutEntry {
1834                        binding: 4,
1835                        visibility: wgpu::ShaderStages::COMPUTE,
1836                        ty: wgpu::BindingType::StorageTexture {
1837                            access: wgpu::StorageTextureAccess::WriteOnly,
1838                            format: wgpu::TextureFormat::Rgba8Unorm,
1839                            view_dimension: wgpu::TextureViewDimension::D2,
1840                        },
1841                        count: None,
1842                    },
1843                ],
1844            });
1845        let dda_pl = self
1846            .device
1847            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
1848                label: Some("roxlap-gpu chunk_dda.layout"),
1849                bind_group_layouts: &[Some(&bgl_dda)],
1850                immediate_size: 0,
1851            });
1852        let pipeline_dda = self
1853            .device
1854            .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
1855                label: Some("roxlap-gpu chunk_dda.pipeline"),
1856                layout: Some(&dda_pl),
1857                module: &dda_shader,
1858                entry_point: Some("render_chunk"),
1859                compilation_options: wgpu::PipelineCompilationOptions::default(),
1860                cache: None,
1861            });
1862
1863        // Fullscreen-triangle blit upscales the storage texture into
1864        // the swapchain. Nearest filter keeps the retro pixel look.
1865        let blit_shader = self
1866            .device
1867            .create_shader_module(wgpu::ShaderModuleDescriptor {
1868                label: Some("blit.wgsl"),
1869                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/blit.wgsl").into()),
1870            });
1871        let bgl_blit = self
1872            .device
1873            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
1874                label: Some("roxlap-gpu chunk_dda.blit_bgl"),
1875                entries: &[
1876                    wgpu::BindGroupLayoutEntry {
1877                        binding: 0,
1878                        visibility: wgpu::ShaderStages::FRAGMENT,
1879                        ty: wgpu::BindingType::Texture {
1880                            sample_type: wgpu::TextureSampleType::Float { filterable: false },
1881                            view_dimension: wgpu::TextureViewDimension::D2,
1882                            multisampled: false,
1883                        },
1884                        count: None,
1885                    },
1886                    wgpu::BindGroupLayoutEntry {
1887                        binding: 1,
1888                        visibility: wgpu::ShaderStages::FRAGMENT,
1889                        ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::NonFiltering),
1890                        count: None,
1891                    },
1892                ],
1893            });
1894        let blit_pl = self
1895            .device
1896            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
1897                label: Some("roxlap-gpu chunk_dda.blit_layout"),
1898                bind_group_layouts: &[Some(&bgl_blit)],
1899                immediate_size: 0,
1900            });
1901        let pipeline_blit = self
1902            .device
1903            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
1904                label: Some("roxlap-gpu chunk_dda.blit_pipeline"),
1905                layout: Some(&blit_pl),
1906                vertex: wgpu::VertexState {
1907                    module: &blit_shader,
1908                    entry_point: Some("vs_main"),
1909                    compilation_options: wgpu::PipelineCompilationOptions::default(),
1910                    buffers: &[],
1911                },
1912                fragment: Some(wgpu::FragmentState {
1913                    module: &blit_shader,
1914                    entry_point: Some("fs_main"),
1915                    compilation_options: wgpu::PipelineCompilationOptions::default(),
1916                    targets: &[Some(wgpu::ColorTargetState {
1917                        format: surface_format,
1918                        blend: None,
1919                        write_mask: wgpu::ColorWrites::ALL,
1920                    })],
1921                }),
1922                primitive: wgpu::PrimitiveState::default(),
1923                depth_stencil: None,
1924                multisample: wgpu::MultisampleState::default(),
1925                multiview_mask: None,
1926                cache: None,
1927            });
1928        let sampler = self.device.create_sampler(&wgpu::SamplerDescriptor {
1929            label: Some("roxlap-gpu chunk_dda.blit_sampler"),
1930            address_mode_u: wgpu::AddressMode::ClampToEdge,
1931            address_mode_v: wgpu::AddressMode::ClampToEdge,
1932            address_mode_w: wgpu::AddressMode::ClampToEdge,
1933            mag_filter: wgpu::FilterMode::Nearest,
1934            min_filter: wgpu::FilterMode::Nearest,
1935            mipmap_filter: wgpu::MipmapFilterMode::Nearest,
1936            ..Default::default()
1937        });
1938        let blit_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1939            label: Some("roxlap-gpu chunk_dda.blit_bg"),
1940            layout: &bgl_blit,
1941            entries: &[
1942                wgpu::BindGroupEntry {
1943                    binding: 0,
1944                    resource: wgpu::BindingResource::TextureView(&storage_view),
1945                },
1946                wgpu::BindGroupEntry {
1947                    binding: 1,
1948                    resource: wgpu::BindingResource::Sampler(&sampler),
1949                },
1950            ],
1951        });
1952
1953        ChunkDdaResources {
1954            storage_size: (width, height),
1955            storage_view,
1956            uniform_buf,
1957            bgl_dda,
1958            pipeline_dda,
1959            blit_bg,
1960            pipeline_blit,
1961            _sampler: sampler,
1962        }
1963    }
1964
1965    /// GPU.4 render — outer DDA over chunk indices + inner DDA into
1966    /// non-empty chunks. `camera.position` is in **grid-local**
1967    /// voxel units. `max_outer_steps` caps how many chunks the
1968    /// outer DDA may traverse per ray (scene-demo wires `+ / -`
1969    /// through this).
1970    ///
1971    /// # Panics
1972    /// Internally `expect`s the grid-DDA resources to be built;
1973    /// they are constructed at the top of this function if missing.
1974    pub fn render_grid(&mut self, grid: &GpuGridResident, camera: &Camera, max_outer_steps: u32) {
1975        let Some(surf_tex) = self.acquire_frame() else {
1976            return;
1977        };
1978        let surf_view = surf_tex
1979            .texture
1980            .create_view(&wgpu::TextureViewDescriptor::default());
1981
1982        let surface_w = self.surface_config.width;
1983        let surface_h = self.surface_config.height;
1984        let surface_format = self.surface_config.format;
1985
1986        let needs_build = match &self.grid_dda {
1987            Some(r) => r.storage_size != (surface_w, surface_h),
1988            None => true,
1989        };
1990        if needs_build {
1991            self.grid_dda = Some(self.build_grid_dda(surface_w, surface_h, surface_format));
1992        }
1993        let dda = self.grid_dda.as_ref().expect("just built");
1994
1995        let uniform = GridDdaUniform {
1996            camera_pos: camera.position,
1997            _pad0: 0.0,
1998            camera_right: camera.right,
1999            _pad1: 0.0,
2000            camera_down: camera.down,
2001            _pad2: 0.0,
2002            camera_forward: camera.forward,
2003            fov_y_rad: camera.fov_y_rad,
2004            screen_size: [surface_w, surface_h],
2005            vsid: grid.vsid,
2006            max_outer_steps,
2007            chunks_dims: grid.chunks_dims,
2008            _pad3: 0,
2009            origin_chunk: grid.origin_chunk,
2010            _pad4: 0,
2011        };
2012        self.queue
2013            .write_buffer(&dda.uniform_buf, 0, bytemuck::bytes_of(&uniform));
2014
2015        let dda_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
2016            label: Some("roxlap-gpu grid_dda.bg"),
2017            layout: &dda.bgl_dda,
2018            entries: &[
2019                wgpu::BindGroupEntry {
2020                    binding: 0,
2021                    resource: dda.uniform_buf.as_entire_binding(),
2022                },
2023                wgpu::BindGroupEntry {
2024                    binding: 1,
2025                    resource: grid.occupancy.as_entire_binding(),
2026                },
2027                wgpu::BindGroupEntry {
2028                    binding: 2,
2029                    resource: grid.color_offsets.as_entire_binding(),
2030                },
2031                wgpu::BindGroupEntry {
2032                    binding: 3,
2033                    resource: grid.colors.as_entire_binding(),
2034                },
2035                wgpu::BindGroupEntry {
2036                    binding: 4,
2037                    resource: grid.chunk_colors_base.as_entire_binding(),
2038                },
2039                wgpu::BindGroupEntry {
2040                    binding: 5,
2041                    resource: grid.chunk_occupancy.as_entire_binding(),
2042                },
2043                wgpu::BindGroupEntry {
2044                    binding: 6,
2045                    resource: wgpu::BindingResource::TextureView(&dda.storage_view),
2046                },
2047            ],
2048        });
2049
2050        let mut encoder = self
2051            .device
2052            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
2053                label: Some("roxlap-gpu grid encoder"),
2054            });
2055        {
2056            let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
2057                label: Some("roxlap-gpu grid_dda compute"),
2058                timestamp_writes: None,
2059            });
2060            cpass.set_pipeline(&dda.pipeline_dda);
2061            cpass.set_bind_group(0, &dda_bg, &[]);
2062            cpass.dispatch_workgroups(surface_w.div_ceil(8), surface_h.div_ceil(8), 1);
2063        }
2064        {
2065            let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
2066                label: Some("roxlap-gpu grid_dda blit"),
2067                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
2068                    view: &surf_view,
2069                    depth_slice: None,
2070                    resolve_target: None,
2071                    ops: wgpu::Operations {
2072                        load: wgpu::LoadOp::Clear(wgpu::Color::BLACK),
2073                        store: wgpu::StoreOp::Store,
2074                    },
2075                })],
2076                depth_stencil_attachment: None,
2077                timestamp_writes: None,
2078                occlusion_query_set: None,
2079                multiview_mask: None,
2080            });
2081            rpass.set_pipeline(&dda.pipeline_blit);
2082            rpass.set_bind_group(0, &dda.blit_bg, &[]);
2083            rpass.draw(0..3, 0..1);
2084        }
2085        self.queue.submit(std::iter::once(encoder.finish()));
2086        surf_tex.present();
2087        self.frame_count = self.frame_count.wrapping_add(1);
2088    }
2089
2090    fn build_grid_dda(
2091        &self,
2092        width: u32,
2093        height: u32,
2094        surface_format: wgpu::TextureFormat,
2095    ) -> GridDdaResources {
2096        let storage_tex = self.device.create_texture(&wgpu::TextureDescriptor {
2097            label: Some("roxlap-gpu grid_dda.storage"),
2098            size: wgpu::Extent3d {
2099                width,
2100                height,
2101                depth_or_array_layers: 1,
2102            },
2103            mip_level_count: 1,
2104            sample_count: 1,
2105            dimension: wgpu::TextureDimension::D2,
2106            format: wgpu::TextureFormat::Rgba8Unorm,
2107            usage: wgpu::TextureUsages::STORAGE_BINDING | wgpu::TextureUsages::TEXTURE_BINDING,
2108            view_formats: &[],
2109        });
2110        let storage_view = storage_tex.create_view(&wgpu::TextureViewDescriptor::default());
2111
2112        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
2113            label: Some("roxlap-gpu grid_dda.uniform"),
2114            size: std::mem::size_of::<GridDdaUniform>() as u64,
2115            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
2116            mapped_at_creation: false,
2117        });
2118
2119        let dda_shader = self
2120            .device
2121            .create_shader_module(wgpu::ShaderModuleDescriptor {
2122                label: Some("grid_dda.wgsl"),
2123                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/grid_dda.wgsl").into()),
2124            });
2125        let bgl_dda = self
2126            .device
2127            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
2128                label: Some("roxlap-gpu grid_dda.bgl"),
2129                entries: &[
2130                    bgl_uniform_entry(0),
2131                    bgl_storage_entry(1, true),
2132                    bgl_storage_entry(2, true),
2133                    bgl_storage_entry(3, true),
2134                    bgl_storage_entry(4, true),
2135                    bgl_storage_entry(5, true),
2136                    wgpu::BindGroupLayoutEntry {
2137                        binding: 6,
2138                        visibility: wgpu::ShaderStages::COMPUTE,
2139                        ty: wgpu::BindingType::StorageTexture {
2140                            access: wgpu::StorageTextureAccess::WriteOnly,
2141                            format: wgpu::TextureFormat::Rgba8Unorm,
2142                            view_dimension: wgpu::TextureViewDimension::D2,
2143                        },
2144                        count: None,
2145                    },
2146                ],
2147            });
2148        let dda_pl = self
2149            .device
2150            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
2151                label: Some("roxlap-gpu grid_dda.layout"),
2152                bind_group_layouts: &[Some(&bgl_dda)],
2153                immediate_size: 0,
2154            });
2155        let pipeline_dda = self
2156            .device
2157            .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
2158                label: Some("roxlap-gpu grid_dda.pipeline"),
2159                layout: Some(&dda_pl),
2160                module: &dda_shader,
2161                entry_point: Some("render_grid"),
2162                compilation_options: wgpu::PipelineCompilationOptions::default(),
2163                cache: None,
2164            });
2165
2166        let blit_shader = self
2167            .device
2168            .create_shader_module(wgpu::ShaderModuleDescriptor {
2169                label: Some("blit.wgsl"),
2170                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/blit.wgsl").into()),
2171            });
2172        let bgl_blit = self
2173            .device
2174            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
2175                label: Some("roxlap-gpu grid_dda.blit_bgl"),
2176                entries: &[
2177                    wgpu::BindGroupLayoutEntry {
2178                        binding: 0,
2179                        visibility: wgpu::ShaderStages::FRAGMENT,
2180                        ty: wgpu::BindingType::Texture {
2181                            sample_type: wgpu::TextureSampleType::Float { filterable: false },
2182                            view_dimension: wgpu::TextureViewDimension::D2,
2183                            multisampled: false,
2184                        },
2185                        count: None,
2186                    },
2187                    wgpu::BindGroupLayoutEntry {
2188                        binding: 1,
2189                        visibility: wgpu::ShaderStages::FRAGMENT,
2190                        ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::NonFiltering),
2191                        count: None,
2192                    },
2193                ],
2194            });
2195        let blit_pl = self
2196            .device
2197            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
2198                label: Some("roxlap-gpu grid_dda.blit_layout"),
2199                bind_group_layouts: &[Some(&bgl_blit)],
2200                immediate_size: 0,
2201            });
2202        let pipeline_blit = self
2203            .device
2204            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
2205                label: Some("roxlap-gpu grid_dda.blit_pipeline"),
2206                layout: Some(&blit_pl),
2207                vertex: wgpu::VertexState {
2208                    module: &blit_shader,
2209                    entry_point: Some("vs_main"),
2210                    compilation_options: wgpu::PipelineCompilationOptions::default(),
2211                    buffers: &[],
2212                },
2213                fragment: Some(wgpu::FragmentState {
2214                    module: &blit_shader,
2215                    entry_point: Some("fs_main"),
2216                    compilation_options: wgpu::PipelineCompilationOptions::default(),
2217                    targets: &[Some(wgpu::ColorTargetState {
2218                        format: surface_format,
2219                        blend: None,
2220                        write_mask: wgpu::ColorWrites::ALL,
2221                    })],
2222                }),
2223                primitive: wgpu::PrimitiveState::default(),
2224                depth_stencil: None,
2225                multisample: wgpu::MultisampleState::default(),
2226                multiview_mask: None,
2227                cache: None,
2228            });
2229        let sampler = self.device.create_sampler(&wgpu::SamplerDescriptor {
2230            label: Some("roxlap-gpu grid_dda.blit_sampler"),
2231            address_mode_u: wgpu::AddressMode::ClampToEdge,
2232            address_mode_v: wgpu::AddressMode::ClampToEdge,
2233            address_mode_w: wgpu::AddressMode::ClampToEdge,
2234            mag_filter: wgpu::FilterMode::Nearest,
2235            min_filter: wgpu::FilterMode::Nearest,
2236            mipmap_filter: wgpu::MipmapFilterMode::Nearest,
2237            ..Default::default()
2238        });
2239        let blit_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
2240            label: Some("roxlap-gpu grid_dda.blit_bg"),
2241            layout: &bgl_blit,
2242            entries: &[
2243                wgpu::BindGroupEntry {
2244                    binding: 0,
2245                    resource: wgpu::BindingResource::TextureView(&storage_view),
2246                },
2247                wgpu::BindGroupEntry {
2248                    binding: 1,
2249                    resource: wgpu::BindingResource::Sampler(&sampler),
2250                },
2251            ],
2252        });
2253
2254        GridDdaResources {
2255            storage_size: (width, height),
2256            storage_view,
2257            uniform_buf,
2258            bgl_dda,
2259            pipeline_dda,
2260            blit_bg,
2261            pipeline_blit,
2262            _sampler: sampler,
2263        }
2264    }
2265
2266    /// GPU.5 render — multi-grid scene marcher. `cameras[i]` is the
2267    /// world camera transformed into grid `i`'s local frame
2268    /// (caller-supplied; see scene-demo's `redraw_gpu` for the
2269    /// glam-based transform). `fov_y_rad` is the shared vertical
2270    /// FOV; `max_outer_steps` caps per-ray chunk-DDA work for each
2271    /// grid.
2272    ///
2273    /// # Panics
2274    /// If `cameras.len() != scene.grid_count`.
2275    /// `cameras[i]` is grid `i`'s world camera transformed into that
2276    /// grid's local frame (the grid marcher works in grid-local space).
2277    /// `sprite_camera` is the **world** camera: instanced sprites carry
2278    /// world-space positions/transforms, so they must project through
2279    /// the untransformed world camera — not `cameras[0]`, which is only
2280    /// the world camera when grid 0 is at identity.
2281    pub fn render_scene(
2282        &mut self,
2283        scene: &GpuSceneResident,
2284        cameras: &[Camera],
2285        // XS.3 — per-grid world transforms (parallel to `cameras`) for
2286        // cross-grid shadows. Empty ⇒ identity (shadows stay intra-grid).
2287        grid_world: &[GridWorldTransform],
2288        sprite_camera: &Camera,
2289        fov_y_rad: f32,
2290        max_outer_steps: u32,
2291    ) {
2292        assert_eq!(
2293            cameras.len(),
2294            scene.grid_count as usize,
2295            "render_scene: {} cameras supplied, scene has {} grids",
2296            cameras.len(),
2297            scene.grid_count,
2298        );
2299        self.last_fov_y_rad = fov_y_rad; // cached for pixel_ray (picking)
2300
2301        // Deferred present: drop any frame a prior render left
2302        // un-presented (a host that skipped present/paint_egui) so we
2303        // never hold two outstanding swapchain textures.
2304        self.pending_frame = None;
2305        let Some(surf_tex) = self.acquire_frame() else {
2306            return;
2307        };
2308        let surf_view = surf_tex
2309            .texture
2310            .create_view(&wgpu::TextureViewDescriptor::default());
2311
2312        let surface_w = self.surface_config.width;
2313        let surface_h = self.surface_config.height;
2314        let surface_format = self.surface_config.format;
2315
2316        let needs_build = match &self.scene_dda {
2317            Some(r) => r.storage_size != (surface_w, surface_h),
2318            None => true,
2319        };
2320        if needs_build {
2321            self.scene_dda = Some(self.build_scene_dda(surface_w, surface_h, surface_format));
2322        }
2323        // GPU.9 — materialise the sprite pipeline the first frame
2324        // sprites are present (before the immutable `dda` borrow).
2325        // GPU.10.0 — build the model-DDA pipeline the first frame a
2326        // sprite registry is present.
2327        if self.sprite_registry.is_some() && self.sprite_model_dda.is_none() {
2328            self.sprite_model_dda = Some(self.build_sprite_model_dda());
2329        }
2330        // GPU.10.3 — frustum-cull + screen-tile-bin the sprite instances
2331        // (needs &mut self for buffer growth, so before the immutable
2332        // scene_dda borrow). Captures (visible_count, tiles_x); None when
2333        // nothing is in view.
2334        let sprite_pass: Option<(u32, u32)> = if let Some(reg) = self.sprite_registry.as_mut() {
2335            if reg.instance_capacity > 0 {
2336                // World camera — sprite positions/transforms are world-
2337                // space (independent of any grid's transform).
2338                let cam = sprite_camera;
2339                #[allow(clippy::cast_precision_loss)]
2340                let aspect = surface_w as f32 / surface_h as f32;
2341                let half_h = (fov_y_rad * 0.5).tan();
2342                let frustum = sprite_model::ViewFrustum {
2343                    pos: cam.position,
2344                    right: cam.right,
2345                    down: cam.down,
2346                    forward: cam.forward,
2347                    half_w: half_h * aspect,
2348                    half_h,
2349                    far: 1.0e9,
2350                };
2351                let (visible, tiles_x, _tiles_y) = reg.cull_bin_upload(
2352                    &self.device,
2353                    &self.queue,
2354                    &frustum,
2355                    surface_w,
2356                    surface_h,
2357                    SPRITE_TILE_SIZE,
2358                    self.sprite_lod_px,
2359                );
2360                (visible > 0).then_some((visible, tiles_x))
2361            } else {
2362                None
2363            }
2364        } else {
2365            None
2366        };
2367        let dda = self.scene_dda.as_ref().expect("just built");
2368
2369        // Refresh the blit's flip flag each frame (offset 8, after the
2370        // width/height), so toggling the flip applies without a resize.
2371        self.queue.write_buffer(
2372            &dda.blit_dims,
2373            8,
2374            bytemuck::bytes_of(&[u32::from(self.flip_x), 0u32]),
2375        );
2376
2377        // Pack per-grid cameras into a runtime-sized storage buffer
2378        // (binding 15) — no fixed cap on grid count.
2379        let mut cam_vec: Vec<SceneDdaPerGridCamera> = cameras
2380            .iter()
2381            .map(SceneDdaPerGridCamera::from_camera)
2382            .collect();
2383        // XS.3 — stamp each grid's world transform for cross-grid shadows.
2384        for (c, t) in cam_vec.iter_mut().zip(grid_world.iter()) {
2385            c.set_world_transform(t);
2386        }
2387
2388        // DL — pack the per-frame lights (already grid-local). The per-grid
2389        // sun direction rides in each `PerGridCamera.sun_dir` (binding 15);
2390        // point lights go in one new storage buffer (binding 18). All-zero
2391        // ⇒ the pre-DL render. Shared with the headless path.
2392        let lights = self.scene_lights.clone();
2393        let (grid_point_lights, sun_flags, point_count) = pack_scene_lights(
2394            &self.device,
2395            &lights,
2396            scene.grid_count as usize,
2397            &mut cam_vec,
2398        );
2399        let grid_cameras = upload_grid_cameras(&self.device, &cam_vec);
2400
2401        let uniform = SceneDdaUniform {
2402            fov_y_rad,
2403            grid_count: scene.grid_count,
2404            max_outer_steps,
2405            _pad0: 0,
2406            screen_size: [surface_w, surface_h],
2407            _pad1: [0; 2],
2408            fog_color: [
2409                self.fog_color[0],
2410                self.fog_color[1],
2411                self.fog_color[2],
2412                self.fog_near,
2413            ],
2414            fog_far: self.fog_far,
2415            // L3.1: always write scene depth. Costs one storage store per
2416            // pixel, and the depth is needed for sprite z-test, sprite-less
2417            // `pick_depth`, and `draw_lines` occlusion alike.
2418            write_depth: 1,
2419            occ_page_words: scene.occupancy_page_words,
2420            occ_num_pages: scene.occupancy_num_pages,
2421            mip_scan_dist: self.scene_mip_scan_dist,
2422            terrain_has_translucent: u32::from(self.scene_terrain_translucent),
2423            terrain_map_count: self.scene_terrain_map.len() as u32,
2424            _pad4: 0,
2425            // Sky direction comes from the world (sprite) camera, so a
2426            // grid-less sprite-only scene still paints a real sky.
2427            sky_cam: SceneDdaPerGridCamera::from_camera(sprite_camera),
2428            side_shades0: self.scene_side_shades[0],
2429            side_shades1: self.scene_side_shades[1],
2430            sun_color: [
2431                lights.sun_color[0],
2432                lights.sun_color[1],
2433                lights.sun_color[2],
2434                lights.sun_intensity,
2435            ],
2436            ambient_color: [
2437                lights.ambient[0],
2438                lights.ambient[1],
2439                lights.ambient[2],
2440                lights.shadow_strength,
2441            ],
2442            sun_flags,
2443            point_light_count: point_count,
2444            shadow_max_steps: lights.shadow_max_steps,
2445            _pad5: 0,
2446            shadow_bias: lights.shadow_bias,
2447            shadow_max_dist: lights.shadow_max_dist,
2448            _pad6: [0.0; 2],
2449            shadow_tint: [
2450                lights.shadow_tint[0],
2451                lights.shadow_tint[1],
2452                lights.shadow_tint[2],
2453                0.0,
2454            ],
2455            style_bands: lights.style_bands,
2456            // XS.4.3 — visible sprite casters for the scene-pass cast march
2457            // (only when the device is sprite-shadow capable; else the cast
2458            // bindings/loop are absent).
2459            sprite_cast_count: if self.sprite_shadows_capable {
2460                sprite_pass.map_or(0, |(visible, _)| visible)
2461            } else {
2462                0
2463            },
2464            _pad7: [0; 2],
2465        };
2466        self.queue
2467            .write_buffer(&dda.uniform_buf, 0, bytemuck::bytes_of(&uniform));
2468
2469        let mut dda_entries = vec![
2470            wgpu::BindGroupEntry {
2471                binding: 0,
2472                resource: dda.uniform_buf.as_entire_binding(),
2473            },
2474            // Occupancy page 0 at binding 1; pages 1..MAX_OCC_PAGES
2475            // at bindings 12.. (see GPU.X occupancy paging).
2476            wgpu::BindGroupEntry {
2477                binding: 1,
2478                resource: scene.occupancy_pages[0].as_entire_binding(),
2479            },
2480            wgpu::BindGroupEntry {
2481                binding: 2,
2482                resource: scene.all_color_offsets.as_entire_binding(),
2483            },
2484            wgpu::BindGroupEntry {
2485                binding: 3,
2486                resource: scene.all_colors.as_entire_binding(),
2487            },
2488            wgpu::BindGroupEntry {
2489                binding: 4,
2490                resource: scene.all_chunk_colors_base.as_entire_binding(),
2491            },
2492            wgpu::BindGroupEntry {
2493                binding: 5,
2494                resource: scene.all_chunk_occupancy.as_entire_binding(),
2495            },
2496            wgpu::BindGroupEntry {
2497                binding: 6,
2498                resource: scene.grid_static_meta.as_entire_binding(),
2499            },
2500            wgpu::BindGroupEntry {
2501                binding: 7,
2502                resource: scene.all_slot_chunk_idx.as_entire_binding(),
2503            },
2504            wgpu::BindGroupEntry {
2505                binding: 8,
2506                resource: dda.framebuffer.as_entire_binding(),
2507            },
2508            wgpu::BindGroupEntry {
2509                binding: 9,
2510                resource: wgpu::BindingResource::TextureView(&self.sky_view),
2511            },
2512            wgpu::BindGroupEntry {
2513                binding: 10,
2514                resource: wgpu::BindingResource::Sampler(&self.sky_sampler),
2515            },
2516            wgpu::BindGroupEntry {
2517                binding: 11,
2518                resource: dda.depth_buffer.as_entire_binding(),
2519            },
2520            wgpu::BindGroupEntry {
2521                binding: 12,
2522                resource: scene.occupancy_pages[1].as_entire_binding(),
2523            },
2524            wgpu::BindGroupEntry {
2525                binding: 13,
2526                resource: scene.occupancy_pages[2].as_entire_binding(),
2527            },
2528            wgpu::BindGroupEntry {
2529                binding: 14,
2530                resource: scene.occupancy_pages[3].as_entire_binding(),
2531            },
2532            wgpu::BindGroupEntry {
2533                binding: 15,
2534                resource: grid_cameras.as_entire_binding(),
2535            },
2536            wgpu::BindGroupEntry {
2537                binding: 16,
2538                resource: dda.materials_pal_buf.as_entire_binding(),
2539            },
2540            wgpu::BindGroupEntry {
2541                binding: 17,
2542                resource: dda.terrain_map_buf.as_entire_binding(),
2543            },
2544            // DL — per-grid point lights (18). The per-grid sun dir
2545            // rides in PerGridCamera.sun_dir (binding 15).
2546            wgpu::BindGroupEntry {
2547                binding: 18,
2548                resource: grid_point_lights.as_entire_binding(),
2549            },
2550        ];
2551        // XS.4.3 — sprite-cast bindings (19..21). On a capable device the BGL
2552        // has them, so bind the sprite registry when present (terrain shadow
2553        // rays test sprite volumes), else the dummy (sprite_cast_count == 0).
2554        if self.sprite_shadows_capable {
2555            let dummy = dda
2556                .sprite_cast_dummy
2557                .as_ref()
2558                .expect("capable scene_dda has a sprite-cast dummy");
2559            let (insts, models, occ) = match &self.sprite_registry {
2560                Some(reg) => (&reg.instances, &reg.model_meta, &reg.occupancy),
2561                None => (dummy, dummy, dummy),
2562            };
2563            dda_entries.push(wgpu::BindGroupEntry {
2564                binding: 19,
2565                resource: insts.as_entire_binding(),
2566            });
2567            dda_entries.push(wgpu::BindGroupEntry {
2568                binding: 20,
2569                resource: models.as_entire_binding(),
2570            });
2571            dda_entries.push(wgpu::BindGroupEntry {
2572                binding: 21,
2573                resource: occ.as_entire_binding(),
2574            });
2575        }
2576        let dda_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
2577            label: Some("roxlap-gpu scene_dda.bg"),
2578            layout: &dda.bgl_dda,
2579            entries: &dda_entries,
2580        });
2581
2582        // GPU.9 — when sprites are present, build both splatter bind
2583        // groups up front (the splat pass writes the key buffer; the
2584        // resolve pass reads keys + scene depth and writes colour).
2585        // GPU.10.3 — model-DDA bind group + per-frame uniform, using the
2586        // cull/bin results captured above. Per-model + per-instance data
2587        // + the tile lists live in the registry buffers.
2588        let sprite_model_bg = match (&self.sprite_model_dda, &self.sprite_registry, sprite_pass) {
2589            (Some(smd), Some(reg), Some((visible, tiles_x))) => {
2590                // World camera (see the cull pass above) — sprites
2591                // project through it regardless of grid 0's transform.
2592                let cam = sprite_camera;
2593                // DL.4 — world-space lights for the sprite pass (sprites are
2594                // world-space, not grid-local). No sprite shadows (deferred).
2595                let dl = &self.scene_lights;
2596                let sprite_sun_enabled = dl.world_sun_dir != [0.0; 3];
2597                let sprite_pts: Vec<GpuPointLight> = dl
2598                    .world_points
2599                    .iter()
2600                    .take(MAX_POINT_LIGHTS)
2601                    .map(|l| GpuPointLight {
2602                        pos: l.position,
2603                        radius: l.radius,
2604                        color: l.color,
2605                        intensity: l.intensity,
2606                        // XS.4.2 — honour the light's caster flag so a
2607                        // receiving sprite is shadowed by it (capable devices).
2608                        casts_shadow: u32::from(l.casts_shadow),
2609                        _pad: [0; 3],
2610                    })
2611                    .collect();
2612                let sprite_point_count = sprite_pts.len() as u32;
2613                let sprite_point_buf = upload_grid_point_lights(&self.device, &sprite_pts);
2614                // sun_flags bit0 = sun enabled, bit1 = sun casts shadow (XS.4.2),
2615                // bit2 = dynamic lighting active.
2616                let sprite_sun_flags = u32::from(sprite_sun_enabled)
2617                    | (u32::from(dl.sun_casts_shadow) << 1)
2618                    | (u32::from(dl.enabled) << 2);
2619                let uni = SpriteModelUniform {
2620                    cam_pos: cam.position,
2621                    _p0: 0.0,
2622                    cam_right: cam.right,
2623                    _p1: 0.0,
2624                    cam_down: cam.down,
2625                    _p2: 0.0,
2626                    cam_forward: cam.forward,
2627                    _p3: 0.0,
2628                    fog_color: [
2629                        self.fog_color[0],
2630                        self.fog_color[1],
2631                        self.fog_color[2],
2632                        self.fog_near,
2633                    ],
2634                    screen_size: [surface_w, surface_h],
2635                    instance_count: visible,
2636                    fog_far: self.fog_far,
2637                    fov_y_rad,
2638                    tiles_x,
2639                    tile_size: SPRITE_TILE_SIZE,
2640                    has_translucent: u32::from(self.sprite_has_translucent),
2641                    sun_dir: [
2642                        dl.world_sun_dir[0],
2643                        dl.world_sun_dir[1],
2644                        dl.world_sun_dir[2],
2645                        0.0,
2646                    ],
2647                    sun_color: [
2648                        dl.sun_color[0],
2649                        dl.sun_color[1],
2650                        dl.sun_color[2],
2651                        dl.sun_intensity,
2652                    ],
2653                    ambient_color: [dl.ambient[0], dl.ambient[1], dl.ambient[2], 0.0],
2654                    sun_flags: sprite_sun_flags,
2655                    point_light_count: sprite_point_count,
2656                    _pad_dl: [0; 2],
2657                    shadow_tint: [dl.shadow_tint[0], dl.shadow_tint[1], dl.shadow_tint[2], 0.0],
2658                    style_bands: dl.style_bands,
2659                    // XS.4.2 — sprite-shadow (receive) ABI, mirroring the scene
2660                    // pass. Only consulted when the device is sprite-shadow
2661                    // capable (the shadowed shader variant is built); otherwise
2662                    // the stub `sprite_shadow_occluded` ignores them.
2663                    occ_num_pages: scene.occupancy_num_pages,
2664                    occ_page_words: scene.occupancy_page_words,
2665                    grid_count: scene.grid_count,
2666                    max_outer_steps,
2667                    shadow_max_steps: dl.shadow_max_steps,
2668                    shadow_bias: dl.shadow_bias,
2669                    shadow_max_dist: dl.shadow_max_dist,
2670                    shadow_strength: dl.shadow_strength,
2671                    _pad_xs: [0; 3],
2672                };
2673                self.queue
2674                    .write_buffer(&smd.uniform_buf, 0, bytemuck::bytes_of(&uni));
2675                let mut sprite_entries = vec![
2676                    wgpu::BindGroupEntry {
2677                        binding: 0,
2678                        resource: smd.uniform_buf.as_entire_binding(),
2679                    },
2680                    wgpu::BindGroupEntry {
2681                        binding: 1,
2682                        resource: reg.occupancy.as_entire_binding(),
2683                    },
2684                    wgpu::BindGroupEntry {
2685                        binding: 2,
2686                        resource: reg.colors.as_entire_binding(),
2687                    },
2688                    wgpu::BindGroupEntry {
2689                        binding: 3,
2690                        resource: reg.color_offsets.as_entire_binding(),
2691                    },
2692                    wgpu::BindGroupEntry {
2693                        binding: 4,
2694                        resource: reg.model_meta.as_entire_binding(),
2695                    },
2696                    wgpu::BindGroupEntry {
2697                        binding: 5,
2698                        resource: reg.instances.as_entire_binding(),
2699                    },
2700                    wgpu::BindGroupEntry {
2701                        binding: 6,
2702                        resource: dda.depth_buffer.as_entire_binding(),
2703                    },
2704                    wgpu::BindGroupEntry {
2705                        binding: 7,
2706                        resource: dda.framebuffer.as_entire_binding(),
2707                    },
2708                    wgpu::BindGroupEntry {
2709                        binding: 8,
2710                        resource: reg.tile_ranges.as_entire_binding(),
2711                    },
2712                    wgpu::BindGroupEntry {
2713                        binding: 9,
2714                        resource: reg.tile_instances.as_entire_binding(),
2715                    },
2716                    wgpu::BindGroupEntry {
2717                        binding: 10,
2718                        resource: reg.dirs.as_entire_binding(),
2719                    },
2720                    wgpu::BindGroupEntry {
2721                        binding: 11,
2722                        resource: reg.colmul.as_entire_binding(),
2723                    },
2724                    wgpu::BindGroupEntry {
2725                        binding: 12,
2726                        resource: smd.materials_buf.as_entire_binding(),
2727                    },
2728                    wgpu::BindGroupEntry {
2729                        binding: 13,
2730                        resource: reg.materials_vox.as_entire_binding(),
2731                    },
2732                    // DL.7 — world point lights (15). (Binding 14 univec
2733                    // normal table dropped — face-normal lighting now.)
2734                    wgpu::BindGroupEntry {
2735                        binding: 15,
2736                        resource: sprite_point_buf.as_entire_binding(),
2737                    },
2738                ];
2739                // XS.4.2 — when capable, bind the terrain occupancy set (the
2740                // same resident buffers + the per-frame grid cameras the scene
2741                // pass uses) so sprite shadow rays march terrain. Must match
2742                // the BGL built in `build_sprite_model_dda`.
2743                if self.sprite_shadows_capable {
2744                    let terrain: [(u32, &wgpu::Buffer); 8] = [
2745                        (16, &scene.occupancy_pages[0]),
2746                        (17, &scene.occupancy_pages[1]),
2747                        (18, &scene.occupancy_pages[2]),
2748                        (19, &scene.occupancy_pages[3]),
2749                        (20, &scene.all_chunk_occupancy),
2750                        (21, &scene.all_slot_chunk_idx),
2751                        (22, &scene.grid_static_meta),
2752                        (23, &grid_cameras),
2753                    ];
2754                    for (binding, buf) in terrain {
2755                        sprite_entries.push(wgpu::BindGroupEntry {
2756                            binding,
2757                            resource: buf.as_entire_binding(),
2758                        });
2759                    }
2760                }
2761                Some(self.device.create_bind_group(&wgpu::BindGroupDescriptor {
2762                    label: Some("roxlap-gpu sprite_model_dda.bg"),
2763                    layout: &smd.bgl,
2764                    entries: &sprite_entries,
2765                }))
2766            }
2767            _ => None,
2768        };
2769
2770        let mut encoder = self
2771            .device
2772            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
2773                label: Some("roxlap-gpu scene encoder"),
2774            });
2775        {
2776            let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
2777                label: Some("roxlap-gpu scene_dda compute"),
2778                timestamp_writes: None,
2779            });
2780            cpass.set_pipeline(&dda.pipeline_dda);
2781            cpass.set_bind_group(0, &dda_bg, &[]);
2782            cpass.dispatch_workgroups(surface_w.div_ceil(8), surface_h.div_ceil(8), 1);
2783        }
2784        // GPU.10 — sprite model-DDA pass: one thread per pixel marches
2785        // the tile's instances + composites against scene depth, after
2786        // the scene pass wrote the depth buffer and before the blit.
2787        if let (Some(smd), Some(bg)) = (&self.sprite_model_dda, &sprite_model_bg) {
2788            let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
2789                label: Some("roxlap-gpu sprite_model_dda"),
2790                timestamp_writes: None,
2791            });
2792            cpass.set_pipeline(&smd.pipeline);
2793            cpass.set_bind_group(0, bg, &[]);
2794            cpass.dispatch_workgroups(surface_w.div_ceil(8), surface_h.div_ceil(8), 1);
2795        }
2796        {
2797            let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
2798                label: Some("roxlap-gpu scene_dda blit"),
2799                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
2800                    view: &surf_view,
2801                    depth_slice: None,
2802                    resolve_target: None,
2803                    ops: wgpu::Operations {
2804                        load: wgpu::LoadOp::Clear(wgpu::Color::BLACK),
2805                        store: wgpu::StoreOp::Store,
2806                    },
2807                })],
2808                depth_stencil_attachment: None,
2809                timestamp_writes: None,
2810                occlusion_query_set: None,
2811                multiview_mask: None,
2812            });
2813            rpass.set_pipeline(&dda.pipeline_blit);
2814            rpass.set_bind_group(0, &dda.blit_bg, &[]);
2815            rpass.draw(0..3, 0..1);
2816        }
2817        self.queue.submit(std::iter::once(encoder.finish()));
2818        // This frame wrote `scene_dda.depth_buffer`, so depth-tested
2819        // overlays may test against it.
2820        self.scene_depth_valid = true;
2821        // Deferred present — the host calls `present` or `paint_egui`.
2822        self.pending_frame = Some((surf_tex, surf_view));
2823        self.frame_count = self.frame_count.wrapping_add(1);
2824    }
2825
2826    /// Like [`Self::render`] (clear to colour) but **deferred**: stashes
2827    /// the frame for [`Self::present`] / [`Self::paint_egui`] instead of
2828    /// presenting. The facade uses this before any grid is resident so a
2829    /// HUD can still be painted over an empty scene.
2830    pub fn render_clear_deferred(&mut self) {
2831        // No scene pass this frame ⇒ `scene_dda.depth_buffer` (if it
2832        // exists from an earlier scene) is stale; depth-tested overlays
2833        // must not test against it.
2834        self.scene_depth_valid = false;
2835        self.pending_frame = None;
2836        let Some(surf_tex) = self.acquire_frame() else {
2837            return;
2838        };
2839        let view = surf_tex
2840            .texture
2841            .create_view(&wgpu::TextureViewDescriptor::default());
2842        let [r, g, b] = self.clear_colour;
2843        let mut encoder = self
2844            .device
2845            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
2846                label: Some("roxlap-gpu clear (deferred)"),
2847            });
2848        {
2849            let _rp = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
2850                label: Some("roxlap-gpu clear (deferred)"),
2851                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
2852                    view: &view,
2853                    depth_slice: None,
2854                    resolve_target: None,
2855                    ops: wgpu::Operations {
2856                        load: wgpu::LoadOp::Clear(wgpu::Color { r, g, b, a: 1.0 }),
2857                        store: wgpu::StoreOp::Store,
2858                    },
2859                })],
2860                depth_stencil_attachment: None,
2861                timestamp_writes: None,
2862                occlusion_query_set: None,
2863                multiview_mask: None,
2864            });
2865        }
2866        self.queue.submit(std::iter::once(encoder.finish()));
2867        self.pending_frame = Some((surf_tex, view));
2868    }
2869
2870    /// Present the frame stashed by the last deferred render
2871    /// ([`Self::render_scene`] / [`Self::render_clear_deferred`]). No-op
2872    /// if nothing is pending (e.g. the surface was lost mid-render).
2873    pub fn present(&mut self) {
2874        if let Some((surf_tex, _view)) = self.pending_frame.take() {
2875            surf_tex.present();
2876        }
2877    }
2878
2879    /// Block until the GPU has drained every submitted command (queue
2880    /// idle), dropping any not-yet-presented swapchain frame first. Call at
2881    /// shutdown — before the [`GpuRenderer`] (and its window) drop — so the
2882    /// device is torn down with no work in flight and no half-presented
2883    /// frame, instead of yanking the swapchain mid-submission (which leaves
2884    /// the driver/compositor compositing stale buffers — the "leftover
2885    /// triangles / flicker after an unclean exit" symptom). No-op on wasm
2886    /// (`poll(Wait)` is unavailable there; the browser reclaims the device).
2887    pub fn wait_idle(&mut self) {
2888        // Release the acquired-but-unpresented frame so its swapchain image
2889        // isn't held across teardown.
2890        self.pending_frame = None;
2891        #[cfg(not(target_arch = "wasm32"))]
2892        {
2893            self.device.poll(wgpu::PollType::wait_indefinitely()).ok();
2894        }
2895    }
2896
2897    /// Draw depth-tested world-space [`GpuLine`]s over the pending frame
2898    /// (L3.2). Projects each endpoint with `cam` (the marcher's pinhole) +
2899    /// the last frame's FOV / surface size, expands to screen-space quads,
2900    /// and runs a `LoadOp::Load` pass into the pending swapchain view — so
2901    /// the lines land on the marched frame and a later `present` /
2902    /// `paint_egui` still finishes it (the pending frame is left intact).
2903    /// Depth-tested lines are occluded by nearer marched geometry (compared
2904    /// against the scene-DDA depth buffer's `best_t`); call after `render`,
2905    /// before `present` / `paint_egui`. No-op if no frame is pending.
2906    pub fn draw_lines_deferred(&mut self, cam: &GpuLineCamera, lines: &[GpuLine]) {
2907        if self.pending_frame.is_none() || lines.is_empty() {
2908            return;
2909        }
2910        let (w, h) = (self.surface_config.width, self.surface_config.height);
2911        let fov = self.last_fov_y_rad;
2912        if w == 0 || h == 0 || fov <= 0.0 {
2913            return; // no frame marched yet — no projection to reuse
2914        }
2915        let verts = build_line_vertices(cam, lines, w, h, fov, self.flip_x);
2916        if verts.is_empty() {
2917            return;
2918        }
2919        self.ensure_line_resources();
2920        let res = self.line_resources.as_ref().expect("just built");
2921
2922        // Skip the depth test when there's no current scene depth to read —
2923        // either no buffer at all (sprite-only / never-rendered) or this
2924        // frame was a color-only clear so the buffer is stale (an empty
2925        // scene drawn after a grid scene). The 1-word dummy / stale buffer
2926        // is still bound to satisfy the layout; `no_depth = 1` keeps the
2927        // shader from indexing it.
2928        let no_depth = u32::from(self.scene_dda.is_none() || !self.scene_depth_valid);
2929        let params = LineParams {
2930            screen_w: w,
2931            screen_h: h,
2932            depth_bias: LINE_DEPTH_BIAS,
2933            no_depth,
2934            flip_x: u32::from(self.flip_x),
2935            _pad: [0; 3],
2936        };
2937        self.queue
2938            .write_buffer(&res.uniform_buf, 0, bytemuck::bytes_of(&params));
2939
2940        let depth_resource = match &self.scene_dda {
2941            Some(dda) => dda.depth_buffer.as_entire_binding(),
2942            None => res.dummy_depth.as_entire_binding(),
2943        };
2944        let bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
2945            label: Some("roxlap-gpu line.bg"),
2946            layout: &res.bgl,
2947            entries: &[
2948                wgpu::BindGroupEntry {
2949                    binding: 0,
2950                    resource: res.uniform_buf.as_entire_binding(),
2951                },
2952                wgpu::BindGroupEntry {
2953                    binding: 1,
2954                    resource: depth_resource,
2955                },
2956            ],
2957        });
2958
2959        // Grow-only persistent vertex buffer (L3.3): one `write_buffer`
2960        // per overlay, reused across frames. Power-of-two capacity keeps
2961        // re-allocation rare as the segment count drifts.
2962        let needed = std::mem::size_of_val(verts.as_slice()) as u64;
2963        if self.line_vbuf_cap < needed {
2964            let cap = needed.next_power_of_two().max(4096);
2965            self.line_vbuf = Some(self.device.create_buffer(&wgpu::BufferDescriptor {
2966                label: Some("roxlap-gpu line.vbuf"),
2967                size: cap,
2968                usage: wgpu::BufferUsages::VERTEX | wgpu::BufferUsages::COPY_DST,
2969                mapped_at_creation: false,
2970            }));
2971            self.line_vbuf_cap = cap;
2972        }
2973        let vbuf = self.line_vbuf.as_ref().expect("ensured above");
2974        self.queue
2975            .write_buffer(vbuf, 0, bytemuck::cast_slice(&verts));
2976
2977        let view = &self.pending_frame.as_ref().expect("checked above").1;
2978        let mut encoder = self
2979            .device
2980            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
2981                label: Some("roxlap-gpu lines"),
2982            });
2983        {
2984            // `LoadOp::Load` keeps the marcher's frame; the lines draw over
2985            // it. Manual depth test in the FS (no depth-stencil attachment).
2986            let mut pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
2987                label: Some("roxlap-gpu line paint"),
2988                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
2989                    view,
2990                    depth_slice: None,
2991                    resolve_target: None,
2992                    ops: wgpu::Operations {
2993                        load: wgpu::LoadOp::Load,
2994                        store: wgpu::StoreOp::Store,
2995                    },
2996                })],
2997                depth_stencil_attachment: None,
2998                timestamp_writes: None,
2999                occlusion_query_set: None,
3000                multiview_mask: None,
3001            });
3002            pass.set_pipeline(&res.pipeline);
3003            pass.set_bind_group(0, &bg, &[]);
3004            pass.set_vertex_buffer(0, vbuf.slice(..));
3005            pass.draw(0..verts.len() as u32, 0..1);
3006        }
3007        self.queue.submit(std::iter::once(encoder.finish()));
3008        // pending_frame left intact — present/paint_egui finishes the frame.
3009    }
3010
3011    /// Lazy-build the [`LineResources`] (`line.wgsl` pipeline + uniform +
3012    /// dummy depth buffer). The colour target uses the surface format with
3013    /// straight-alpha over-blending; no depth-stencil attachment (the depth
3014    /// test is manual in the fragment shader against the scene depth buffer).
3015    fn ensure_line_resources(&mut self) {
3016        if self.line_resources.is_some() {
3017            return;
3018        }
3019        let shader = self
3020            .device
3021            .create_shader_module(wgpu::ShaderModuleDescriptor {
3022                label: Some("line.wgsl"),
3023                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/line.wgsl").into()),
3024            });
3025        let bgl = self
3026            .device
3027            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
3028                label: Some("roxlap-gpu line.bgl"),
3029                entries: &[
3030                    wgpu::BindGroupLayoutEntry {
3031                        binding: 0,
3032                        visibility: wgpu::ShaderStages::FRAGMENT,
3033                        ty: wgpu::BindingType::Buffer {
3034                            ty: wgpu::BufferBindingType::Uniform,
3035                            has_dynamic_offset: false,
3036                            min_binding_size: None,
3037                        },
3038                        count: None,
3039                    },
3040                    wgpu::BindGroupLayoutEntry {
3041                        binding: 1,
3042                        visibility: wgpu::ShaderStages::FRAGMENT,
3043                        ty: wgpu::BindingType::Buffer {
3044                            ty: wgpu::BufferBindingType::Storage { read_only: true },
3045                            has_dynamic_offset: false,
3046                            min_binding_size: None,
3047                        },
3048                        count: None,
3049                    },
3050                ],
3051            });
3052        let layout = self
3053            .device
3054            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
3055                label: Some("roxlap-gpu line.layout"),
3056                bind_group_layouts: &[Some(&bgl)],
3057                immediate_size: 0,
3058            });
3059        let pipeline = self
3060            .device
3061            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
3062                label: Some("roxlap-gpu line.pipeline"),
3063                layout: Some(&layout),
3064                vertex: wgpu::VertexState {
3065                    module: &shader,
3066                    entry_point: Some("vs_main"),
3067                    compilation_options: wgpu::PipelineCompilationOptions::default(),
3068                    buffers: &[wgpu::VertexBufferLayout {
3069                        array_stride: std::mem::size_of::<LineVertex>() as u64,
3070                        step_mode: wgpu::VertexStepMode::Vertex,
3071                        attributes: &wgpu::vertex_attr_array![
3072                            0 => Float32x2, // pos (NDC)
3073                            1 => Float32,   // depth
3074                            2 => Float32,   // depth_test
3075                            3 => Float32x4, // color
3076                        ],
3077                    }],
3078                },
3079                fragment: Some(wgpu::FragmentState {
3080                    module: &shader,
3081                    entry_point: Some("fs_main"),
3082                    compilation_options: wgpu::PipelineCompilationOptions::default(),
3083                    targets: &[Some(wgpu::ColorTargetState {
3084                        format: self.surface_config.format,
3085                        blend: Some(wgpu::BlendState::ALPHA_BLENDING),
3086                        write_mask: wgpu::ColorWrites::ALL,
3087                    })],
3088                }),
3089                primitive: wgpu::PrimitiveState {
3090                    cull_mode: None,
3091                    ..Default::default()
3092                },
3093                depth_stencil: None,
3094                multisample: wgpu::MultisampleState::default(),
3095                multiview_mask: None,
3096                cache: None,
3097            });
3098        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
3099            label: Some("roxlap-gpu line.uniform"),
3100            size: std::mem::size_of::<LineParams>() as u64,
3101            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
3102            mapped_at_creation: false,
3103        });
3104        let dummy_depth = self.device.create_buffer(&wgpu::BufferDescriptor {
3105            label: Some("roxlap-gpu line.dummy_depth"),
3106            size: 4,
3107            usage: wgpu::BufferUsages::STORAGE,
3108            mapped_at_creation: false,
3109        });
3110        self.line_resources = Some(LineResources {
3111            pipeline,
3112            bgl,
3113            uniform_buf,
3114            dummy_depth,
3115        });
3116    }
3117
3118    /// Upload (or replace) an RGBA8 image as a sampled texture, returning
3119    /// a stable id for [`GpuImageQuad::image`]. `rgba` is row-major,
3120    /// `width * height * 4` bytes, straight (un-premultiplied) alpha.
3121    /// Reuses a dropped slot when one exists. Returns `0` for malformed
3122    /// input (an id that draws nothing).
3123    pub fn upload_image(&mut self, rgba: &[u8], width: u32, height: u32) -> usize {
3124        if width == 0 || height == 0 || rgba.len() != (width as usize) * (height as usize) * 4 {
3125            return 0;
3126        }
3127        let texture = self.device.create_texture(&wgpu::TextureDescriptor {
3128            label: Some("roxlap-gpu image_sprite"),
3129            size: wgpu::Extent3d {
3130                width,
3131                height,
3132                depth_or_array_layers: 1,
3133            },
3134            mip_level_count: 1,
3135            sample_count: 1,
3136            dimension: wgpu::TextureDimension::D2,
3137            format: wgpu::TextureFormat::Rgba8Unorm,
3138            usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST,
3139            view_formats: &[],
3140        });
3141        self.queue.write_texture(
3142            wgpu::TexelCopyTextureInfo {
3143                texture: &texture,
3144                mip_level: 0,
3145                origin: wgpu::Origin3d::ZERO,
3146                aspect: wgpu::TextureAspect::All,
3147            },
3148            rgba,
3149            wgpu::TexelCopyBufferLayout {
3150                offset: 0,
3151                bytes_per_row: Some(width * 4),
3152                rows_per_image: Some(height),
3153            },
3154            wgpu::Extent3d {
3155                width,
3156                height,
3157                depth_or_array_layers: 1,
3158            },
3159        );
3160        let view = texture.create_view(&wgpu::TextureViewDescriptor::default());
3161        let resident = ImageResident {
3162            view,
3163            _texture: texture,
3164        };
3165        if let Some(slot) = self.images.iter().position(Option::is_none) {
3166            self.images[slot] = Some(resident);
3167            slot
3168        } else {
3169            self.images.push(Some(resident));
3170            self.images.len() - 1
3171        }
3172    }
3173
3174    /// Release an image uploaded with [`Self::upload_image`] (the slot
3175    /// becomes reusable).
3176    pub fn drop_image(&mut self, id: usize) {
3177        if let Some(slot) = self.images.get_mut(id) {
3178            *slot = None;
3179        }
3180    }
3181
3182    /// Draw world-space 2D image sprites ([`GpuImageQuad`]) over the
3183    /// pending frame — the textured-quad sibling of
3184    /// [`Self::draw_lines_deferred`]. Projects each quad with `cam` (the
3185    /// marcher's pinhole) + the last frame's FOV / surface size, expands +
3186    /// near-clips to triangles, and runs one `LoadOp::Load` pass with a
3187    /// draw per quad (each binds its own texture). UVs are perspective-correct;
3188    /// depth-tested quads are occluded by nearer marched geometry. Call
3189    /// after `render`, before `present` / `paint_egui`. No-op if no frame
3190    /// is pending.
3191    pub fn draw_images_deferred(&mut self, cam: &GpuLineCamera, quads: &[GpuImageQuad]) {
3192        if self.pending_frame.is_none() || quads.is_empty() {
3193            return;
3194        }
3195        let (w, h) = (self.surface_config.width, self.surface_config.height);
3196        let fov = self.last_fov_y_rad;
3197        if w == 0 || h == 0 || fov <= 0.0 {
3198            return;
3199        }
3200
3201        // Concatenate every quad's verts into one buffer, recording each
3202        // quad's (range, texture) so they share a single render pass.
3203        let mut verts: Vec<ImageVertex> = Vec::new();
3204        let mut draws: Vec<(u32, u32, usize)> = Vec::new();
3205        for quad in quads {
3206            if !matches!(self.images.get(quad.image), Some(Some(_))) {
3207                continue; // dropped / never-uploaded id
3208            }
3209            let v = build_image_vertices(cam, quad, w, h, fov, self.flip_x);
3210            if v.is_empty() {
3211                continue;
3212            }
3213            let start = verts.len() as u32;
3214            verts.extend_from_slice(&v);
3215            draws.push((start, verts.len() as u32, quad.image));
3216        }
3217        if draws.is_empty() {
3218            return;
3219        }
3220
3221        self.ensure_image_resources();
3222        // See `draw_lines_deferred`: skip depth when there's no valid
3223        // current-frame scene depth (none built, or a color-only clear).
3224        let no_depth = u32::from(self.scene_dda.is_none() || !self.scene_depth_valid);
3225        let params = LineParams {
3226            screen_w: w,
3227            screen_h: h,
3228            depth_bias: LINE_DEPTH_BIAS,
3229            no_depth,
3230            flip_x: u32::from(self.flip_x),
3231            _pad: [0; 3],
3232        };
3233        {
3234            let res = self.image_resources.as_ref().expect("just built");
3235            self.queue
3236                .write_buffer(&res.uniform_buf, 0, bytemuck::bytes_of(&params));
3237        }
3238
3239        // Grow-only persistent vertex buffer (mirrors the line vbuf).
3240        let needed = std::mem::size_of_val(verts.as_slice()) as u64;
3241        if self.image_vbuf_cap < needed {
3242            let cap = needed.next_power_of_two().max(4096);
3243            self.image_vbuf = Some(self.device.create_buffer(&wgpu::BufferDescriptor {
3244                label: Some("roxlap-gpu image.vbuf"),
3245                size: cap,
3246                usage: wgpu::BufferUsages::VERTEX | wgpu::BufferUsages::COPY_DST,
3247                mapped_at_creation: false,
3248            }));
3249            self.image_vbuf_cap = cap;
3250        }
3251        let vbuf = self.image_vbuf.as_ref().expect("ensured above");
3252        self.queue
3253            .write_buffer(vbuf, 0, bytemuck::cast_slice(&verts));
3254
3255        // One bind group per draw (the texture view differs per quad).
3256        let res = self.image_resources.as_ref().expect("just built");
3257        let depth_resource = match &self.scene_dda {
3258            Some(dda) => dda.depth_buffer.as_entire_binding(),
3259            None => res.dummy_depth.as_entire_binding(),
3260        };
3261        let bind_groups: Vec<wgpu::BindGroup> = draws
3262            .iter()
3263            .map(|&(_, _, image_id)| {
3264                let resident = self.images[image_id].as_ref().expect("checked present");
3265                self.device.create_bind_group(&wgpu::BindGroupDescriptor {
3266                    label: Some("roxlap-gpu image.bg"),
3267                    layout: &res.bgl,
3268                    entries: &[
3269                        wgpu::BindGroupEntry {
3270                            binding: 0,
3271                            resource: res.uniform_buf.as_entire_binding(),
3272                        },
3273                        wgpu::BindGroupEntry {
3274                            binding: 1,
3275                            resource: depth_resource.clone(),
3276                        },
3277                        wgpu::BindGroupEntry {
3278                            binding: 2,
3279                            resource: wgpu::BindingResource::TextureView(&resident.view),
3280                        },
3281                        wgpu::BindGroupEntry {
3282                            binding: 3,
3283                            resource: wgpu::BindingResource::Sampler(&res.sampler),
3284                        },
3285                    ],
3286                })
3287            })
3288            .collect();
3289
3290        let view = &self.pending_frame.as_ref().expect("checked above").1;
3291        let mut encoder = self
3292            .device
3293            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
3294                label: Some("roxlap-gpu images"),
3295            });
3296        {
3297            let mut pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
3298                label: Some("roxlap-gpu image paint"),
3299                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
3300                    view,
3301                    depth_slice: None,
3302                    resolve_target: None,
3303                    ops: wgpu::Operations {
3304                        load: wgpu::LoadOp::Load,
3305                        store: wgpu::StoreOp::Store,
3306                    },
3307                })],
3308                depth_stencil_attachment: None,
3309                timestamp_writes: None,
3310                occlusion_query_set: None,
3311                multiview_mask: None,
3312            });
3313            pass.set_pipeline(&res.pipeline);
3314            pass.set_vertex_buffer(0, vbuf.slice(..));
3315            for (&(start, end, _), bg) in draws.iter().zip(&bind_groups) {
3316                pass.set_bind_group(0, bg, &[]);
3317                pass.draw(start..end, 0..1);
3318            }
3319        }
3320        self.queue.submit(std::iter::once(encoder.finish()));
3321        // pending_frame left intact — present/paint_egui finishes it.
3322    }
3323
3324    /// Lazy-build the [`ImageResources`] (`image.wgsl` pipeline + uniform +
3325    /// nearest sampler + dummy depth). Straight-alpha over-blend, no
3326    /// depth-stencil attachment (the depth test is manual in the FS).
3327    fn ensure_image_resources(&mut self) {
3328        if self.image_resources.is_some() {
3329            return;
3330        }
3331        let shader = self
3332            .device
3333            .create_shader_module(wgpu::ShaderModuleDescriptor {
3334                label: Some("image.wgsl"),
3335                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/image.wgsl").into()),
3336            });
3337        let bgl = self
3338            .device
3339            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
3340                label: Some("roxlap-gpu image.bgl"),
3341                entries: &[
3342                    wgpu::BindGroupLayoutEntry {
3343                        binding: 0,
3344                        visibility: wgpu::ShaderStages::FRAGMENT,
3345                        ty: wgpu::BindingType::Buffer {
3346                            ty: wgpu::BufferBindingType::Uniform,
3347                            has_dynamic_offset: false,
3348                            min_binding_size: None,
3349                        },
3350                        count: None,
3351                    },
3352                    wgpu::BindGroupLayoutEntry {
3353                        binding: 1,
3354                        visibility: wgpu::ShaderStages::FRAGMENT,
3355                        ty: wgpu::BindingType::Buffer {
3356                            ty: wgpu::BufferBindingType::Storage { read_only: true },
3357                            has_dynamic_offset: false,
3358                            min_binding_size: None,
3359                        },
3360                        count: None,
3361                    },
3362                    wgpu::BindGroupLayoutEntry {
3363                        binding: 2,
3364                        visibility: wgpu::ShaderStages::FRAGMENT,
3365                        ty: wgpu::BindingType::Texture {
3366                            sample_type: wgpu::TextureSampleType::Float { filterable: true },
3367                            view_dimension: wgpu::TextureViewDimension::D2,
3368                            multisampled: false,
3369                        },
3370                        count: None,
3371                    },
3372                    wgpu::BindGroupLayoutEntry {
3373                        binding: 3,
3374                        visibility: wgpu::ShaderStages::FRAGMENT,
3375                        ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
3376                        count: None,
3377                    },
3378                ],
3379            });
3380        let layout = self
3381            .device
3382            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
3383                label: Some("roxlap-gpu image.layout"),
3384                bind_group_layouts: &[Some(&bgl)],
3385                immediate_size: 0,
3386            });
3387        let pipeline = self
3388            .device
3389            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
3390                label: Some("roxlap-gpu image.pipeline"),
3391                layout: Some(&layout),
3392                vertex: wgpu::VertexState {
3393                    module: &shader,
3394                    entry_point: Some("vs_main"),
3395                    compilation_options: wgpu::PipelineCompilationOptions::default(),
3396                    buffers: &[wgpu::VertexBufferLayout {
3397                        array_stride: std::mem::size_of::<ImageVertex>() as u64,
3398                        step_mode: wgpu::VertexStepMode::Vertex,
3399                        attributes: &wgpu::vertex_attr_array![
3400                            0 => Float32x2, // ndc
3401                            1 => Float32,   // w
3402                            2 => Float32,   // depth
3403                            3 => Float32,   // depth_test
3404                            4 => Float32,   // cutoff
3405                            5 => Float32x2, // uv
3406                            6 => Float32x4, // tint
3407                        ],
3408                    }],
3409                },
3410                fragment: Some(wgpu::FragmentState {
3411                    module: &shader,
3412                    entry_point: Some("fs_main"),
3413                    compilation_options: wgpu::PipelineCompilationOptions::default(),
3414                    targets: &[Some(wgpu::ColorTargetState {
3415                        format: self.surface_config.format,
3416                        blend: Some(wgpu::BlendState::ALPHA_BLENDING),
3417                        write_mask: wgpu::ColorWrites::ALL,
3418                    })],
3419                }),
3420                primitive: wgpu::PrimitiveState {
3421                    cull_mode: None,
3422                    ..Default::default()
3423                },
3424                depth_stencil: None,
3425                multisample: wgpu::MultisampleState::default(),
3426                multiview_mask: None,
3427                cache: None,
3428            });
3429        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
3430            label: Some("roxlap-gpu image.uniform"),
3431            size: std::mem::size_of::<LineParams>() as u64,
3432            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
3433            mapped_at_creation: false,
3434        });
3435        let dummy_depth = self.device.create_buffer(&wgpu::BufferDescriptor {
3436            label: Some("roxlap-gpu image.dummy_depth"),
3437            size: 4,
3438            usage: wgpu::BufferUsages::STORAGE,
3439            mapped_at_creation: false,
3440        });
3441        let sampler = self.device.create_sampler(&wgpu::SamplerDescriptor {
3442            label: Some("roxlap-gpu image.sampler"),
3443            // Nearest + clamp: pixel-art references want crisp texels and
3444            // no wrap bleed at the quad edges.
3445            address_mode_u: wgpu::AddressMode::ClampToEdge,
3446            address_mode_v: wgpu::AddressMode::ClampToEdge,
3447            address_mode_w: wgpu::AddressMode::ClampToEdge,
3448            mag_filter: wgpu::FilterMode::Nearest,
3449            min_filter: wgpu::FilterMode::Nearest,
3450            mipmap_filter: wgpu::MipmapFilterMode::Nearest,
3451            ..Default::default()
3452        });
3453        self.image_resources = Some(ImageResources {
3454            pipeline,
3455            bgl,
3456            uniform_buf,
3457            dummy_depth,
3458            sampler,
3459        });
3460    }
3461
3462    /// Project a world point to window pixels under the marcher's
3463    /// vertical-FOV pinhole (the inverse of [`Self::pixel_ray`]), using
3464    /// the last-rendered frame's size + FOV. `None` before the first
3465    /// scene render or for a point at/behind the near plane.
3466    #[must_use]
3467    pub fn project_point(
3468        &self,
3469        cam_pos: [f32; 3],
3470        right: [f32; 3],
3471        down: [f32; 3],
3472        forward: [f32; 3],
3473        world: [f32; 3],
3474    ) -> Option<(f32, f32)> {
3475        let dda = self.scene_dda.as_ref()?;
3476        let (w, h) = dda.storage_size;
3477        if w == 0 || h == 0 || self.last_fov_y_rad <= 0.0 {
3478            return None;
3479        }
3480        let d = [
3481            world[0] - cam_pos[0],
3482            world[1] - cam_pos[1],
3483            world[2] - cam_pos[2],
3484        ];
3485        let cz = forward[0] * d[0] + forward[1] * d[1] + forward[2] * d[2];
3486        if cz < LINE_NEAR_Z {
3487            return None;
3488        }
3489        let cx = right[0] * d[0] + right[1] * d[1] + right[2] * d[2];
3490        let cy = down[0] * d[0] + down[1] * d[1] + down[2] * d[2];
3491        let half_h = (self.last_fov_y_rad * 0.5).tan();
3492        let half_w = half_h * (w as f32 / h as f32);
3493        let ndc_x = (cx / cz) / half_w;
3494        let ndc_y = -(cy / cz) / half_h;
3495        let sx = (ndc_x * 0.5 + 0.5) * w as f32;
3496        let sy = (0.5 - ndc_y * 0.5) * h as f32;
3497        Some((sx, sy))
3498    }
3499
3500    /// Overlay an `egui` UI on the pending frame, then present it
3501    /// (`hud` feature). `jobs` are the host's tessellated primitives
3502    /// (`egui::Context::tessellate`), `textures` the per-frame texture
3503    /// delta from `egui::FullOutput`, `pixels_per_point` the UI scale.
3504    ///
3505    /// Draws with `LoadOp::Load` over the marcher's frame (a separate
3506    /// encoder submitted after the scene's), so the UI composites on top
3507    /// of the world. No-op if no frame is pending.
3508    #[cfg(feature = "hud")]
3509    pub fn paint_egui(
3510        &mut self,
3511        jobs: &[egui::ClippedPrimitive],
3512        textures: &egui::TexturesDelta,
3513        pixels_per_point: f32,
3514    ) {
3515        let Some((surf_tex, surf_view)) = self.pending_frame.take() else {
3516            return;
3517        };
3518        let format = self.surface_config.format;
3519        let egui_rend = self.egui_renderer.get_or_insert_with(|| {
3520            egui_wgpu::Renderer::new(
3521                &self.device,
3522                format,
3523                egui_wgpu::RendererOptions {
3524                    msaa_samples: 1,
3525                    depth_stencil_format: None,
3526                    dithering: false,
3527                    ..Default::default()
3528                },
3529            )
3530        });
3531
3532        let screen = egui_wgpu::ScreenDescriptor {
3533            size_in_pixels: [self.surface_config.width, self.surface_config.height],
3534            pixels_per_point,
3535        };
3536        for (id, delta) in &textures.set {
3537            egui_rend.update_texture(&self.device, &self.queue, *id, delta);
3538        }
3539        let mut encoder = self
3540            .device
3541            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
3542                label: Some("roxlap-gpu egui"),
3543            });
3544        let user_bufs =
3545            egui_rend.update_buffers(&self.device, &self.queue, &mut encoder, jobs, &screen);
3546        {
3547            // `LoadOp::Load` keeps the marcher's frame; egui draws over it.
3548            let mut pass = encoder
3549                .begin_render_pass(&wgpu::RenderPassDescriptor {
3550                    label: Some("roxlap-gpu egui paint"),
3551                    color_attachments: &[Some(wgpu::RenderPassColorAttachment {
3552                        view: &surf_view,
3553                        depth_slice: None,
3554                        resolve_target: None,
3555                        ops: wgpu::Operations {
3556                            load: wgpu::LoadOp::Load,
3557                            store: wgpu::StoreOp::Store,
3558                        },
3559                    })],
3560                    depth_stencil_attachment: None,
3561                    timestamp_writes: None,
3562                    occlusion_query_set: None,
3563                    multiview_mask: None,
3564                })
3565                // egui-wgpu 0.29 requires a `'static` pass (see its docs).
3566                .forget_lifetime();
3567            egui_rend.render(&mut pass, jobs, &screen);
3568        }
3569        for id in &textures.free {
3570            egui_rend.free_texture(id);
3571        }
3572        self.queue.submit(
3573            user_bufs
3574                .into_iter()
3575                .chain(std::iter::once(encoder.finish())),
3576        );
3577        surf_tex.present();
3578    }
3579
3580    fn build_scene_dda(
3581        &self,
3582        width: u32,
3583        height: u32,
3584        surface_format: wgpu::TextureFormat,
3585    ) -> SceneDdaResources {
3586        // Framebuffer as a packed-`rgba8unorm` storage buffer (1 u32 per
3587        // pixel, row stride = `width`). See the struct-field note.
3588        let framebuffer = self.device.create_buffer(&wgpu::BufferDescriptor {
3589            label: Some("roxlap-gpu scene_dda.framebuffer"),
3590            size: u64::from(width) * u64::from(height) * 4,
3591            usage: wgpu::BufferUsages::STORAGE,
3592            mapped_at_creation: false,
3593        });
3594        // Screen size + flip flag for the blit's pixel→index math
3595        // (`vec2<u32>` size, then `flip_x` + pad). Re-written per frame in
3596        // `render_scene` so a flip toggle takes effect without a resize.
3597        let blit_dims = self.device.create_buffer(&wgpu::BufferDescriptor {
3598            label: Some("roxlap-gpu scene_dda.blit_dims"),
3599            size: 16,
3600            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
3601            mapped_at_creation: false,
3602        });
3603        self.queue.write_buffer(
3604            &blit_dims,
3605            0,
3606            bytemuck::bytes_of(&[width, height, u32::from(self.flip_x), 0u32]),
3607        );
3608
3609        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
3610            label: Some("roxlap-gpu scene_dda.uniform"),
3611            size: std::mem::size_of::<SceneDdaUniform>() as u64,
3612            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
3613            mapped_at_creation: false,
3614        });
3615
3616        // GPU.9 — per-pixel world-t depth (f32 bits as u32). Sized to
3617        // the storage texture; written by the scene pass when sprites
3618        // are active, read+tested by the sprite splatter.
3619        let depth_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
3620            label: Some("roxlap-gpu scene_dda.depth"),
3621            size: u64::from(width) * u64::from(height) * 4,
3622            // COPY_SRC so `read_depth_pixel` can stage it for picking.
3623            usage: wgpu::BufferUsages::STORAGE
3624                | wgpu::BufferUsages::COPY_DST
3625                | wgpu::BufferUsages::COPY_SRC,
3626            mapped_at_creation: false,
3627        });
3628        let depth_readback = self.device.create_buffer(&wgpu::BufferDescriptor {
3629            label: Some("roxlap-gpu scene_dda.depth_readback"),
3630            size: u64::from(width) * u64::from(height) * 4,
3631            usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
3632            mapped_at_creation: false,
3633        });
3634        // XS.4.3 — on sprite-shadow-capable devices, splice the sprite-cast
3635        // snippet over the `sprites_occlude` stub (binds the sprite registry at
3636        // 19..21 so terrain shadow rays test sprite volumes).
3637        let capable = self.sprite_shadows_capable;
3638        let dda_shader = self
3639            .device
3640            .create_shader_module(wgpu::ShaderModuleDescriptor {
3641                label: Some("scene_dda.wgsl"),
3642                source: wgpu::ShaderSource::Wgsl(scene_shader_source(capable).into()),
3643            });
3644        let mut dda_entries = vec![
3645            bgl_uniform_entry(0),
3646            bgl_storage_entry(1, true),
3647            bgl_storage_entry(2, true),
3648            bgl_storage_entry(3, true),
3649            bgl_storage_entry(4, true),
3650            bgl_storage_entry(5, true),
3651            bgl_storage_entry(6, true),
3652            bgl_storage_entry(7, true),
3653            // Framebuffer storage buffer (read-write; the scene +
3654            // sprite passes write packed pixels into it).
3655            bgl_storage_entry(8, false),
3656            // GPU.8 sky panorama + sampler.
3657            wgpu::BindGroupLayoutEntry {
3658                binding: 9,
3659                visibility: wgpu::ShaderStages::COMPUTE,
3660                ty: wgpu::BindingType::Texture {
3661                    sample_type: wgpu::TextureSampleType::Float { filterable: true },
3662                    view_dimension: wgpu::TextureViewDimension::D2,
3663                    multisampled: false,
3664                },
3665                count: None,
3666            },
3667            wgpu::BindGroupLayoutEntry {
3668                binding: 10,
3669                visibility: wgpu::ShaderStages::COMPUTE,
3670                ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
3671                count: None,
3672            },
3673            // GPU.9 — read-write per-pixel depth buffer.
3674            bgl_storage_entry(11, false),
3675            // Occupancy pages 1..MAX_OCC_PAGES (page 0 is
3676            // binding 1). Unused pages bind a dummy buffer.
3677            bgl_storage_entry(12, true),
3678            bgl_storage_entry(13, true),
3679            bgl_storage_entry(14, true),
3680            // Per-grid cameras (runtime-sized; one per grid).
3681            bgl_storage_entry(15, true),
3682            // TV.6 — material palette + terrain colour→material map.
3683            bgl_storage_entry(16, true),
3684            bgl_storage_entry(17, true),
3685            // DL — per-grid point lights (18). Sun dir rides in
3686            // PerGridCamera (binding 15) to stay within the 16
3687            // storage-buffer limit.
3688            bgl_storage_entry(18, true),
3689        ];
3690        if capable {
3691            // XS.4.3 — sprite registry for the sprite-cast shadow march.
3692            dda_entries.push(bgl_storage_entry(19, true)); // sprite_instances
3693            dda_entries.push(bgl_storage_entry(20, true)); // sprite_models
3694            dda_entries.push(bgl_storage_entry(21, true)); // sprite_occupancy
3695        }
3696        let bgl_dda = self
3697            .device
3698            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
3699                label: Some("roxlap-gpu scene_dda.bgl"),
3700                entries: &dda_entries,
3701            });
3702        let dda_pl = self
3703            .device
3704            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
3705                label: Some("roxlap-gpu scene_dda.layout"),
3706                bind_group_layouts: &[Some(&bgl_dda)],
3707                immediate_size: 0,
3708            });
3709        let pipeline_dda = self
3710            .device
3711            .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
3712                label: Some("roxlap-gpu scene_dda.pipeline"),
3713                layout: Some(&dda_pl),
3714                module: &dda_shader,
3715                entry_point: Some("render_scene"),
3716                compilation_options: wgpu::PipelineCompilationOptions::default(),
3717                cache: None,
3718            });
3719
3720        let blit_shader = self
3721            .device
3722            .create_shader_module(wgpu::ShaderModuleDescriptor {
3723                label: Some("scene_blit.wgsl"),
3724                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/scene_blit.wgsl").into()),
3725            });
3726        let bgl_blit = self
3727            .device
3728            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
3729                label: Some("roxlap-gpu scene_dda.blit_bgl"),
3730                entries: &[
3731                    // Framebuffer storage buffer (read-only in the blit).
3732                    wgpu::BindGroupLayoutEntry {
3733                        binding: 0,
3734                        visibility: wgpu::ShaderStages::FRAGMENT,
3735                        ty: wgpu::BindingType::Buffer {
3736                            ty: wgpu::BufferBindingType::Storage { read_only: true },
3737                            has_dynamic_offset: false,
3738                            min_binding_size: None,
3739                        },
3740                        count: None,
3741                    },
3742                    // Screen-size uniform for the pixel→index math.
3743                    wgpu::BindGroupLayoutEntry {
3744                        binding: 1,
3745                        visibility: wgpu::ShaderStages::FRAGMENT,
3746                        ty: wgpu::BindingType::Buffer {
3747                            ty: wgpu::BufferBindingType::Uniform,
3748                            has_dynamic_offset: false,
3749                            min_binding_size: None,
3750                        },
3751                        count: None,
3752                    },
3753                ],
3754            });
3755        let blit_pl = self
3756            .device
3757            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
3758                label: Some("roxlap-gpu scene_dda.blit_layout"),
3759                bind_group_layouts: &[Some(&bgl_blit)],
3760                immediate_size: 0,
3761            });
3762        let pipeline_blit = self
3763            .device
3764            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
3765                label: Some("roxlap-gpu scene_dda.blit_pipeline"),
3766                layout: Some(&blit_pl),
3767                vertex: wgpu::VertexState {
3768                    module: &blit_shader,
3769                    entry_point: Some("vs_main"),
3770                    compilation_options: wgpu::PipelineCompilationOptions::default(),
3771                    buffers: &[],
3772                },
3773                fragment: Some(wgpu::FragmentState {
3774                    module: &blit_shader,
3775                    entry_point: Some("fs_main"),
3776                    compilation_options: wgpu::PipelineCompilationOptions::default(),
3777                    targets: &[Some(wgpu::ColorTargetState {
3778                        format: surface_format,
3779                        blend: None,
3780                        write_mask: wgpu::ColorWrites::ALL,
3781                    })],
3782                }),
3783                primitive: wgpu::PrimitiveState::default(),
3784                depth_stencil: None,
3785                multisample: wgpu::MultisampleState::default(),
3786                multiview_mask: None,
3787                cache: None,
3788            });
3789        let blit_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
3790            label: Some("roxlap-gpu scene_dda.blit_bg"),
3791            layout: &bgl_blit,
3792            entries: &[
3793                wgpu::BindGroupEntry {
3794                    binding: 0,
3795                    resource: framebuffer.as_entire_binding(),
3796                },
3797                wgpu::BindGroupEntry {
3798                    binding: 1,
3799                    resource: blit_dims.as_entire_binding(),
3800                },
3801            ],
3802        });
3803
3804        // TV.6 — material palette + terrain map buffers, seeded from the
3805        // renderer's current scene-material state (so a map defined before the
3806        // scene pass was built still takes effect).
3807        let (materials_pal_buf, terrain_map_buf) = {
3808            use wgpu::util::DeviceExt;
3809            let pal = self
3810                .device
3811                .create_buffer_init(&wgpu::util::BufferInitDescriptor {
3812                    label: Some("roxlap-gpu scene_dda.materials_pal"),
3813                    contents: bytemuck::cast_slice(self.scene_materials.as_slice()),
3814                    usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
3815                });
3816            // Fixed 256-row map (≤256 materials anyway) → no re-alloc when the
3817            // host changes the map after the scene pass is built.
3818            let mut rows = [[0u32; 2]; 256];
3819            for (slot, &row) in rows.iter_mut().zip(self.scene_terrain_map.iter()) {
3820                *slot = row;
3821            }
3822            let map = self
3823                .device
3824                .create_buffer_init(&wgpu::util::BufferInitDescriptor {
3825                    label: Some("roxlap-gpu scene_dda.terrain_map"),
3826                    contents: bytemuck::cast_slice(&rows),
3827                    usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
3828                });
3829            (pal, map)
3830        };
3831
3832        SceneDdaResources {
3833            storage_size: (width, height),
3834            framebuffer,
3835            uniform_buf,
3836            bgl_dda,
3837            pipeline_dda,
3838            blit_bg,
3839            pipeline_blit,
3840            blit_dims,
3841            depth_buffer,
3842            depth_readback,
3843            materials_pal_buf,
3844            terrain_map_buf,
3845            // XS.4.3 — 80-byte dummy (≥ one Instance) for the sprite-cast
3846            // bindings when capable but no sprite registry is bound this frame.
3847            sprite_cast_dummy: capable.then(|| {
3848                self.device.create_buffer(&wgpu::BufferDescriptor {
3849                    label: Some("roxlap-gpu scene_dda.sprite_cast_dummy"),
3850                    size: 80,
3851                    usage: wgpu::BufferUsages::STORAGE,
3852                    mapped_at_creation: false,
3853                })
3854            }),
3855        }
3856    }
3857
3858    /// Read back the per-pixel world-t depth at window pixel `(x, y)`
3859    /// from the last rendered frame, for screen→world picking. Returns
3860    /// the distance `t` along the (normalised) view ray to the nearest
3861    /// scene-grid surface, so the host reconstructs the world hit as
3862    /// `cam.pos + t * normalize(ray_dir)`. `None` for out-of-bounds
3863    /// pixels, sky / no-hit (the `T_INF` sentinel), or when no scene
3864    /// frame has been rendered.
3865    ///
3866    /// The depth buffer is the SCENE pass's output (terrain + grids),
3867    /// untouched by the sprite pass (which reads it read-only), so a
3868    /// cursor sprite under the pointer does not occlude the pick.
3869    ///
3870    /// Synchronous: copies the depth buffer to a mapped staging buffer
3871    /// and blocks on `device.poll(Wait)`. Cheap enough for click-time
3872    /// picks; do not call it every frame.
3873    ///
3874    /// Requires the last frame to have written depth, which happens
3875    /// when sprites are present (`write_depth`). The pick demo always
3876    /// has a cursor sprite, so this holds.
3877    ///
3878    /// Compiles on wasm, but the wasm facade never calls it: WebGPU's
3879    /// `device.poll` doesn't block for the GPU, so the blocking
3880    /// `recv()` here would hang the single browser thread. Picking is
3881    /// deferred on the wasm GPU path (the facade returns `None`).
3882    #[must_use]
3883    pub fn read_depth_pixel(&self, x: u32, y: u32) -> Option<f32> {
3884        let dda = self.scene_dda.as_ref()?;
3885        let (w, h) = dda.storage_size;
3886        if x >= w || y >= h {
3887            return None;
3888        }
3889        let mut enc = self
3890            .device
3891            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
3892                label: Some("roxlap-gpu depth readback"),
3893            });
3894        let size = u64::from(w) * u64::from(h) * 4;
3895        enc.copy_buffer_to_buffer(&dda.depth_buffer, 0, &dda.depth_readback, 0, size);
3896        self.queue.submit(std::iter::once(enc.finish()));
3897
3898        let slice = dda.depth_readback.slice(..);
3899        let (tx, rx) = std::sync::mpsc::channel();
3900        slice.map_async(wgpu::MapMode::Read, move |r| {
3901            let _ = tx.send(r);
3902        });
3903        self.device.poll(wgpu::PollType::wait_indefinitely()).ok();
3904        rx.recv().ok()?.ok()?;
3905
3906        let t = {
3907            let data = slice.get_mapped_range();
3908            let idx = ((y * w + x) * 4) as usize;
3909            let bytes: [u8; 4] = data[idx..idx + 4].try_into().ok()?;
3910            f32::from_le_bytes(bytes)
3911        };
3912        dda.depth_readback.unmap();
3913
3914        // Reject sky / no-hit (T_INF == 1e30 in the shader) + non-finite.
3915        if !t.is_finite() || t >= 1.0e29 {
3916            return None;
3917        }
3918        Some(t)
3919    }
3920
3921    /// World-space view-ray direction (un-normalised) for window pixel
3922    /// `(x, y)`, under the GPU marcher's projection — the canonical GPU
3923    /// unproject, mirroring `scene_dda.wgsl`'s `render_scene`
3924    /// (vertical-FOV pinhole). Uses the last-rendered frame's target
3925    /// size + FOV; `None` before the first scene render. Pair with
3926    /// [`Self::read_depth_pixel`] for screen→world picking.
3927    #[must_use]
3928    pub fn pixel_ray(
3929        &self,
3930        right: [f64; 3],
3931        down: [f64; 3],
3932        forward: [f64; 3],
3933        x: f64,
3934        y: f64,
3935    ) -> Option<[f64; 3]> {
3936        let dda = self.scene_dda.as_ref()?;
3937        let (w, h) = dda.storage_size;
3938        if w == 0 || h == 0 || self.last_fov_y_rad <= 0.0 {
3939            return None;
3940        }
3941        Some(pinhole_pixel_ray(
3942            right,
3943            down,
3944            forward,
3945            x,
3946            y,
3947            f64::from(w),
3948            f64::from(h),
3949            f64::from(self.last_fov_y_rad),
3950        ))
3951    }
3952
3953    /// GPU.10.1 — upload a sprite model registry + its instances for
3954    /// the DDA path. An empty instance slice clears all sprites.
3955    pub fn set_sprite_instances(
3956        &mut self,
3957        registry: &sprite_model::SpriteModelRegistry,
3958        instances: &[sprite_model::SpriteInstance],
3959    ) {
3960        if instances.is_empty() {
3961            self.sprite_registry = None;
3962            return;
3963        }
3964        self.sprite_registry = Some(sprite_model::SpriteRegistryResident::upload(
3965            &self.device,
3966            registry,
3967            instances,
3968        ));
3969    }
3970
3971    /// Incrementally append sprite instances **without** rebuilding the
3972    /// registry — the cheap streaming-spawn path (asteroids, projectiles).
3973    /// Returns the index of the first appended instance (`[base, base+N)`).
3974    ///
3975    /// Every appended instance must reference a model already registered
3976    /// by the [`Self::set_sprite_instances`] that established residency
3977    /// (model volumes are not re-uploaded here — build the full
3978    /// `SpriteModelRegistry` up front and seed it once, then stream
3979    /// instances). If no registry is resident yet, this performs the
3980    /// initial full upload and returns `0`.
3981    ///
3982    /// Cost is amortised O(1) per instance (the GPU instance buffer grows
3983    /// by powers of two), versus the full volume + buffer rebuild of
3984    /// [`Self::set_sprite_instances`].
3985    pub fn append_sprite_instances(
3986        &mut self,
3987        registry: &sprite_model::SpriteModelRegistry,
3988        instances: &[sprite_model::SpriteInstance],
3989    ) -> u32 {
3990        match self.sprite_registry.as_mut() {
3991            Some(reg) => reg.append_instances(&self.device, registry, instances),
3992            None => {
3993                self.set_sprite_instances(registry, instances);
3994                0
3995            }
3996        }
3997    }
3998
3999    /// Remove the sprite instance at `index` (swap-remove, O(1), no model
4000    /// re-upload). Returns `Some(old_last)` if a different instance was
4001    /// moved into `index` to fill the hole — its index changed from
4002    /// `old_last` to `index`, so a caller tracking instance handles must
4003    /// update that one. Returns `None` if `index` was the last element /
4004    /// out of range, or no registry is resident.
4005    pub fn remove_sprite_instance(&mut self, index: usize) -> Option<usize> {
4006        self.sprite_registry
4007            .as_mut()
4008            .and_then(|reg| reg.remove_instance(index))
4009    }
4010
4011    /// Incrementally add a new model (its full LOD chain) to the resident
4012    /// sprite registry **without** re-uploading the existing models — the
4013    /// counterpart to [`Self::append_sprite_instances`] for streaming in
4014    /// new geometry (unique asteroids, generated meshes).
4015    ///
4016    /// Usage mirrors `update_sprite_model`: you own the
4017    /// [`SpriteModelRegistry`](sprite_model::SpriteModelRegistry), append
4018    /// the model with [`add_lod`](sprite_model::SpriteModelRegistry::add_lod)
4019    /// (or `add`), then pass the returned `chain_id` here to sync that one
4020    /// chain to the GPU. Afterwards [`Self::append_sprite_instances`] may
4021    /// reference it.
4022    ///
4023    /// If no registry is resident yet, this performs the initial full
4024    /// upload of `registry` (all its current models, zero instances) to
4025    /// establish residency — so call it for your *first* model; only
4026    /// chains appended *after* residency exists are added incrementally.
4027    ///
4028    /// Cost is amortised O(new model voxels): the shared volume buffers
4029    /// carry slack and bump-append, growing (and rebuilding once from the
4030    /// registry) only on overflow.
4031    /// Flush queued `write_buffer` uploads by submitting an empty command
4032    /// stream. wgpu stages `write_buffer` data and flushes it on the next
4033    /// `Queue::submit`; calling this between batches of uploads (e.g. a
4034    /// flipbook's frames in [`Self::add_sprite_model`]) recycles the device
4035    /// staging pool so a big one-shot batch can't exhaust it (which would
4036    /// then crash egui-wgpu's own `write_buffer`).
4037    pub fn flush_writes(&self) {
4038        self.queue.submit(std::iter::empty::<wgpu::CommandBuffer>());
4039    }
4040
4041    pub fn add_sprite_model(
4042        &mut self,
4043        registry: &sprite_model::SpriteModelRegistry,
4044        chain_id: u32,
4045    ) {
4046        match self.sprite_registry.as_mut() {
4047            Some(reg) => reg.add_model(&self.device, &self.queue, registry, chain_id),
4048            None => {
4049                self.sprite_registry = Some(sprite_model::SpriteRegistryResident::upload(
4050                    &self.device,
4051                    registry,
4052                    &[],
4053                ));
4054            }
4055        }
4056    }
4057
4058    /// Remove a model (tombstone its LOD chain) from the resident sprite
4059    /// registry — the counterpart to [`Self::add_sprite_model`]. Frees its
4060    /// `colors`/`dirs` space for reuse by a later add; the smaller
4061    /// `occupancy`/`color_offsets` holes are reclaimed by
4062    /// [`Self::compact_sprite_models`]. Entry / chain ids stay stable, so
4063    /// other models' `chain_id`s remain valid.
4064    ///
4065    /// Instances of the removed model keep their slots but draw as nothing
4066    /// until the caller drops them via [`Self::remove_sprite_instance`].
4067    /// No-op if `chain_id` is unknown / already removed / no registry.
4068    pub fn remove_sprite_model(&mut self, chain_id: u32) {
4069        if let Some(reg) = self.sprite_registry.as_mut() {
4070            reg.remove_model(chain_id);
4071        }
4072    }
4073
4074    /// Reclaim the holes left by [`Self::remove_sprite_model`] by rebuilding
4075    /// the shared volume buffers from the live models only. `registry` must
4076    /// be the resident one. Cost is O(live volume) — call it when
4077    /// [`Self::dead_sprite_model_count`] is high (e.g. exceeds the live
4078    /// count), not every frame. No-op if no registry is resident.
4079    pub fn compact_sprite_models(&mut self, registry: &sprite_model::SpriteModelRegistry) {
4080        if let Some(reg) = self.sprite_registry.as_mut() {
4081            reg.compact(&self.device, &self.queue, registry);
4082        }
4083    }
4084
4085    /// Number of live (non-removed) sprite models (0 if none uploaded).
4086    #[must_use]
4087    pub fn sprite_model_count(&self) -> usize {
4088        self.sprite_registry
4089            .as_ref()
4090            .map_or(0, sprite_model::SpriteRegistryResident::live_model_count)
4091    }
4092
4093    /// Number of removed-but-not-yet-compacted sprite models — the
4094    /// fragmentation signal for deciding when to call
4095    /// [`Self::compact_sprite_models`].
4096    #[must_use]
4097    pub fn dead_sprite_model_count(&self) -> usize {
4098        self.sprite_registry
4099            .as_ref()
4100            .map_or(0, sprite_model::SpriteRegistryResident::dead_model_count)
4101    }
4102
4103    /// Number of resident sprite instances (0 if none uploaded).
4104    #[must_use]
4105    pub fn sprite_instance_count(&self) -> usize {
4106        self.sprite_registry
4107            .as_ref()
4108            .map_or(0, sprite_model::SpriteRegistryResident::instance_count)
4109    }
4110
4111    /// Re-pose the already-resident sprite instances in place (no model
4112    /// volume re-upload) — the cheap per-frame path for animated KFA
4113    /// limbs. `instances` must match the last [`Self::set_sprite_instances`]
4114    /// in length + order. No-op if no sprite registry is resident.
4115    pub fn update_sprite_instance_transforms(
4116        &mut self,
4117        instances: &[sprite_model::SpriteInstance],
4118    ) {
4119        if let Some(reg) = self.sprite_registry.as_mut() {
4120            reg.update_transforms(instances);
4121        }
4122    }
4123
4124    /// GPU.12 incremental — re-upload only LOD chain `chain_id`'s entries
4125    /// after an in-place edit of `registry` (carve / recolour), without
4126    /// rebuilding the whole sprite registry. `registry` must be the one
4127    /// last passed to [`Self::set_sprite_instances`] with chain
4128    /// `chain_id` already edited. No-op if no registry is resident.
4129    pub fn update_sprite_model(
4130        &mut self,
4131        registry: &sprite_model::SpriteModelRegistry,
4132        chain_id: u32,
4133    ) {
4134        if let Some(reg) = self.sprite_registry.as_mut() {
4135            reg.update_model(&self.device, &self.queue, registry, chain_id);
4136        }
4137    }
4138
4139    /// VCL.2 — repoint sprite instance `index` at LOD chain `chain_id`
4140    /// (the per-frame flipbook step for animated voxel clips). `registry`
4141    /// is the resident one; `chain_id`'s volume must already be uploaded
4142    /// (e.g. a clip's frames registered via [`Self::add_sprite_model`]).
4143    /// CPU-side rewrite picked up by the next frame's cull — no volume
4144    /// re-upload. No-op if no registry is resident.
4145    pub fn set_sprite_instance_model(
4146        &mut self,
4147        registry: &sprite_model::SpriteModelRegistry,
4148        index: usize,
4149        chain_id: u32,
4150    ) {
4151        if let Some(reg) = self.sprite_registry.as_mut() {
4152            reg.set_instance_model(registry, index, chain_id);
4153        }
4154    }
4155
4156    /// Set the per-instance `kv6colmul[256]` lighting tables (voxlap's
4157    /// `update_reflects` output, e.g. via `roxlap_core::sprite::
4158    /// sprite_colmul`), in the same order/length as the last
4159    /// [`Self::set_sprite_instances`]. The GPU sprite pass modulates each
4160    /// voxel by its surface normal's entry — matching the CPU rasteriser.
4161    /// No-op if no sprite registry is resident.
4162    pub fn set_sprite_instance_colmul(&mut self, tables: &[[u64; 256]]) {
4163        if let Some(reg) = self.sprite_registry.as_mut() {
4164            reg.set_instance_colmul(tables);
4165        }
4166    }
4167
4168    /// GPU.10.4 — set the LOD pixel threshold: a sprite steps to the
4169    /// next mip once a mip-0 voxel would project below `px` screen
4170    /// pixels. `1.0` is the natural "no sub-pixel voxels" default;
4171    /// larger values force LOD in closer (useful for inspection).
4172    /// Clamped to ≥ 0.25.
4173    pub fn set_sprite_lod_px(&mut self, px: f32) {
4174        self.sprite_lod_px = px.max(0.25);
4175    }
4176
4177    /// GPU.11.1 — set the scene-grid LOD scan distance (world units).
4178    /// A chunk entered at world-t `t` is marched at mip
4179    /// `floor(log2(max(t, msd) / msd))`, clamped to its grid's mip
4180    /// ladder. `0` disables LOD (always mip-0). Larger values push
4181    /// the coarser mips farther out — the axis-aligned-mip-beams
4182    /// mitigation lever (GPU.11.2). Default 64 (matches CPU
4183    /// `mip_scan_dist`).
4184    pub fn set_scene_mip_scan_dist(&mut self, dist: f32) {
4185        self.scene_mip_scan_dist = dist.max(0.0);
4186    }
4187
4188    /// Set per-face grid side-shading — voxlap's
4189    /// `setsideshades(top, bot, left, right, up, down)`. Each value is
4190    /// subtracted (as a u8, matching the CPU `gcsub` high byte) from a
4191    /// hit voxel's brightness byte before shading, so the scene-DDA pass
4192    /// darkens grid faces the same way the CPU rasteriser does. `[0; 6]`
4193    /// disables it (the default). The hit face is taken from the DDA's
4194    /// last-stepped axis + ray direction.
4195    pub fn set_scene_side_shades(&mut self, s: [i8; 6]) {
4196        // Reinterpret each i8 as u8 (voxlap stamps `sxx` into gcsub's
4197        // high byte verbatim), then pack (top, bot, left, right) /
4198        // (up, down, 0, 0) for the two uniform vec4s.
4199        let v = |i: usize| i32::from(s[i] as u8);
4200        self.scene_side_shades = [[v(0), v(1), v(2), v(3)], [v(4), v(5), 0, 0]];
4201    }
4202
4203    /// DL — set the per-frame dynamic lights (sun + point lights), already
4204    /// transformed into each grid's local frame. Call once per frame before
4205    /// [`Self::render_scene`] (the facade does this from
4206    /// `FrameParams::lights`). [`SceneLights::default`] clears all lights —
4207    /// the pre-DL render. GPU-only; the CPU backend has no analogue.
4208    pub fn set_scene_lights(&mut self, lights: SceneLights) {
4209        self.scene_lights = lights;
4210    }
4211
4212    /// GPU.10.1 — build the instanced model-DDA pipeline (one thread
4213    /// per pixel). Lazily invoked the first frame a registry is present.
4214    fn build_sprite_model_dda(&self) -> SpriteModelDdaResources {
4215        // XS.4.2 — on sprite-shadow-capable devices, splice the terrain shadow
4216        // snippet over the stub (`shadow_occluded_world` becomes a real terrain
4217        // march; binds occupancy 16..23). Otherwise the stub keeps sprites
4218        // unshadowed and the BGL stays at the base 14 storage buffers.
4219        let capable = self.sprite_shadows_capable;
4220        let src = sprite_shader_source(capable);
4221        let shader = self
4222            .device
4223            .create_shader_module(wgpu::ShaderModuleDescriptor {
4224                label: Some("sprite_model_dda.wgsl"),
4225                source: wgpu::ShaderSource::Wgsl(src.into()),
4226            });
4227        let mut entries = vec![
4228            bgl_uniform_entry(0),
4229            bgl_storage_entry(1, true),  // occupancy
4230            bgl_storage_entry(2, true),  // colors
4231            bgl_storage_entry(3, true),  // color_offsets
4232            bgl_storage_entry(4, true),  // model_meta
4233            bgl_storage_entry(5, true),  // instances
4234            bgl_storage_entry(6, true),  // scene depth
4235            bgl_storage_entry(7, false), // framebuffer (read-write buffer)
4236            bgl_storage_entry(8, true),  // tile_ranges
4237            bgl_storage_entry(9, true),  // tile_instances
4238            bgl_storage_entry(10, true), // per-voxel dir
4239            bgl_storage_entry(11, true), // per-instance kv6colmul
4240            bgl_storage_entry(12, true), // TV — material palette
4241            bgl_storage_entry(13, true), // TV.3 — per-voxel material id
4242            bgl_storage_entry(15, true), // DL.7 — world point lights
4243        ];
4244        if capable {
4245            // XS.4.2 — terrain occupancy set for sprite RECEIVE shadows.
4246            entries.push(bgl_storage_entry(16, true)); // occ_page0
4247            entries.push(bgl_storage_entry(17, true)); // occ_page1
4248            entries.push(bgl_storage_entry(18, true)); // occ_page2
4249            entries.push(bgl_storage_entry(19, true)); // occ_page3
4250            entries.push(bgl_storage_entry(20, true)); // all_chunk_occupancy
4251            entries.push(bgl_storage_entry(21, true)); // all_slot_chunk_idx
4252            entries.push(bgl_storage_entry(22, true)); // grid_static_meta
4253            entries.push(bgl_storage_entry(23, true)); // grid_cameras
4254        }
4255        let bgl = self
4256            .device
4257            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
4258                label: Some("roxlap-gpu sprite_model_dda.bgl"),
4259                entries: &entries,
4260            });
4261        let pl = self
4262            .device
4263            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
4264                label: Some("roxlap-gpu sprite_model_dda.layout"),
4265                bind_group_layouts: &[Some(&bgl)],
4266                immediate_size: 0,
4267            });
4268        let pipeline = self
4269            .device
4270            .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
4271                label: Some("roxlap-gpu sprite_model_dda.pipeline"),
4272                layout: Some(&pl),
4273                module: &shader,
4274                entry_point: Some("march"),
4275                compilation_options: wgpu::PipelineCompilationOptions::default(),
4276                cache: None,
4277            });
4278        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
4279            label: Some("roxlap-gpu sprite_model_dda.uniform"),
4280            size: std::mem::size_of::<SpriteModelUniform>() as u64,
4281            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
4282            mapped_at_creation: false,
4283        });
4284        // TV — material palette, seeded from the current renderer state so a
4285        // table defined before the sprite pass was built still takes effect.
4286        let materials_buf = {
4287            use wgpu::util::DeviceExt;
4288            self.device
4289                .create_buffer_init(&wgpu::util::BufferInitDescriptor {
4290                    label: Some("roxlap-gpu sprite_model_dda.materials"),
4291                    contents: bytemuck::cast_slice(self.sprite_materials.as_slice()),
4292                    usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
4293                })
4294        };
4295        SpriteModelDdaResources {
4296            bgl,
4297            pipeline,
4298            uniform_buf,
4299            materials_buf,
4300        }
4301    }
4302
4303    /// TV — set the global voxel-material palette for the GPU sprite pass.
4304    /// Mirrors the renderer's [`MaterialTable`](roxlap_formats::material::MaterialTable):
4305    /// every sprite/clip instance's `material` id indexes it for opacity +
4306    /// blend mode. Cheap (2 KB); call it whenever the palette changes (or
4307    /// each frame). While every material is opaque the shader stays on the
4308    /// unchanged first-hit path.
4309    pub fn set_sprite_materials(&mut self, table: &roxlap_formats::material::MaterialTable) {
4310        let (palette, any_translucent) = material_palette(table);
4311        self.sprite_materials = palette;
4312        self.sprite_has_translucent = any_translucent;
4313        if let Some(smd) = &self.sprite_model_dda {
4314            self.queue.write_buffer(
4315                &smd.materials_buf,
4316                0,
4317                bytemuck::cast_slice(self.sprite_materials.as_slice()),
4318            );
4319        }
4320    }
4321
4322    /// TV.6 — set the scene (terrain) material palette + colour→material map
4323    /// for the multi-grid scene pass. Matching-colour terrain voxels render
4324    /// translucent; an empty map / all-opaque palette renders unchanged. The
4325    /// map is capped at 256 rows (the fixed buffer size).
4326    pub fn set_scene_terrain_materials(
4327        &mut self,
4328        table: &roxlap_formats::material::MaterialTable,
4329        map: &[(u32, u8)],
4330    ) {
4331        let (palette, _) = material_palette(table);
4332        self.scene_materials = palette;
4333        self.scene_terrain_map = map
4334            .iter()
4335            .take(256)
4336            .map(|&(c, m)| [c & 0x00ff_ffff, u32::from(m)])
4337            .collect();
4338        self.scene_terrain_translucent = map.iter().any(|&(_, m)| !table.get(m).is_opaque());
4339        if let Some(dda) = &self.scene_dda {
4340            self.queue.write_buffer(
4341                &dda.materials_pal_buf,
4342                0,
4343                bytemuck::cast_slice(self.scene_materials.as_slice()),
4344            );
4345            if !self.scene_terrain_map.is_empty() {
4346                self.queue.write_buffer(
4347                    &dda.terrain_map_buf,
4348                    0,
4349                    bytemuck::cast_slice(&self.scene_terrain_map),
4350                );
4351            }
4352        }
4353    }
4354}
4355
4356/// GPU.11 — headless scene-DDA renderer for tests + offline visual
4357/// gates. Owns the `scene_dda.wgsl` compute pipeline with no surface
4358/// and no blit pass; renders a [`GpuSceneResident`] to an in-memory
4359/// RGBA framebuffer via texture readback. The per-substage visual
4360/// gate (render reference scenes, diff PPMs) and the GPU.11.1 mip
4361/// render-diff both ride on this.
4362pub struct HeadlessSceneRenderer {
4363    width: u32,
4364    height: u32,
4365    /// Framebuffer storage buffer (packed `rgba8unorm`, tight rows) —
4366    /// matches the buffer-output `scene_dda.wgsl` (see its note).
4367    framebuffer: wgpu::Buffer,
4368    depth_buffer: wgpu::Buffer,
4369    uniform_buf: wgpu::Buffer,
4370    _sky_texture: wgpu::Texture,
4371    sky_view: wgpu::TextureView,
4372    sky_sampler: wgpu::Sampler,
4373    bgl: wgpu::BindGroupLayout,
4374    pipeline: wgpu::ComputePipeline,
4375    readback: wgpu::Buffer,
4376    /// Per-face side-shades for the gate render (default none). Packed
4377    /// `[(top,bot,left,right), (up,down,_,_)]`; set via
4378    /// [`Self::set_side_shades`].
4379    side_shades: [[i32; 4]; 2],
4380    /// DL — dynamic lights for the render (already grid-local, like the
4381    /// surface path). Default = none (baked-only). Set via
4382    /// [`Self::set_scene_lights`]; lets tests exercise the lit path.
4383    lights: SceneLights,
4384}
4385
4386impl HeadlessSceneRenderer {
4387    /// Build the compute pipeline + output/readback resources for a
4388    /// `width × height` framebuffer. Validates `scene_dda.wgsl` and
4389    /// the [`scene::GridStaticMeta`] std430 layout at pipeline /
4390    /// bind-group time.
4391    #[must_use]
4392    pub fn new(device: &wgpu::Device, queue: &wgpu::Queue, width: u32, height: u32) -> Self {
4393        let framebuffer = device.create_buffer(&wgpu::BufferDescriptor {
4394            label: Some("roxlap-gpu headless.framebuffer"),
4395            size: u64::from(width) * u64::from(height) * 4,
4396            usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
4397            mapped_at_creation: false,
4398        });
4399
4400        let uniform_buf = device.create_buffer(&wgpu::BufferDescriptor {
4401            label: Some("roxlap-gpu headless.uniform"),
4402            size: std::mem::size_of::<SceneDdaUniform>() as u64,
4403            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
4404            mapped_at_creation: false,
4405        });
4406        let depth_buffer = device.create_buffer(&wgpu::BufferDescriptor {
4407            label: Some("roxlap-gpu headless.depth"),
4408            size: u64::from(width) * u64::from(height) * 4,
4409            usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
4410            mapped_at_creation: false,
4411        });
4412
4413        let default_sky_pixel = [120u8, 150, 220, 255];
4414        let (sky_texture, sky_view) = create_sky_texture(device, 1, 1, &default_sky_pixel);
4415        // Upload the default sky texel (create_sky_texture only allocates
4416        // — the texel must be written or the shader samples black, which
4417        // is why a grid-less headless render came back black).
4418        queue.write_texture(
4419            wgpu::TexelCopyTextureInfo {
4420                texture: &sky_texture,
4421                mip_level: 0,
4422                origin: wgpu::Origin3d::ZERO,
4423                aspect: wgpu::TextureAspect::All,
4424            },
4425            &default_sky_pixel,
4426            wgpu::TexelCopyBufferLayout {
4427                offset: 0,
4428                bytes_per_row: Some(4),
4429                rows_per_image: Some(1),
4430            },
4431            wgpu::Extent3d {
4432                width: 1,
4433                height: 1,
4434                depth_or_array_layers: 1,
4435            },
4436        );
4437        let sky_sampler = device.create_sampler(&wgpu::SamplerDescriptor {
4438            label: Some("roxlap-gpu headless.sky_sampler"),
4439            address_mode_u: wgpu::AddressMode::Repeat,
4440            address_mode_v: wgpu::AddressMode::Repeat,
4441            mag_filter: wgpu::FilterMode::Linear,
4442            min_filter: wgpu::FilterMode::Linear,
4443            ..Default::default()
4444        });
4445
4446        let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
4447            label: Some("scene_dda.wgsl (headless)"),
4448            source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/scene_dda.wgsl").into()),
4449        });
4450        let bgl = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
4451            label: Some("roxlap-gpu headless.bgl"),
4452            entries: &[
4453                bgl_uniform_entry(0),
4454                bgl_storage_entry(1, true),
4455                bgl_storage_entry(2, true),
4456                bgl_storage_entry(3, true),
4457                bgl_storage_entry(4, true),
4458                bgl_storage_entry(5, true),
4459                bgl_storage_entry(6, true),
4460                bgl_storage_entry(7, true),
4461                // Framebuffer storage buffer (read-write).
4462                bgl_storage_entry(8, false),
4463                wgpu::BindGroupLayoutEntry {
4464                    binding: 9,
4465                    visibility: wgpu::ShaderStages::COMPUTE,
4466                    ty: wgpu::BindingType::Texture {
4467                        sample_type: wgpu::TextureSampleType::Float { filterable: true },
4468                        view_dimension: wgpu::TextureViewDimension::D2,
4469                        multisampled: false,
4470                    },
4471                    count: None,
4472                },
4473                wgpu::BindGroupLayoutEntry {
4474                    binding: 10,
4475                    visibility: wgpu::ShaderStages::COMPUTE,
4476                    ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
4477                    count: None,
4478                },
4479                bgl_storage_entry(11, false),
4480                bgl_storage_entry(12, true),
4481                bgl_storage_entry(13, true),
4482                bgl_storage_entry(14, true),
4483                // Per-grid cameras (runtime-sized; one per grid).
4484                bgl_storage_entry(15, true),
4485                // TV.6 — material palette + terrain map (opaque dummies here).
4486                bgl_storage_entry(16, true),
4487                bgl_storage_entry(17, true),
4488                // DL — per-grid point lights (18). Sun dir rides in
4489                // PerGridCamera (binding 15).
4490                bgl_storage_entry(18, true),
4491            ],
4492        });
4493        let pl = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
4494            label: Some("roxlap-gpu headless.layout"),
4495            bind_group_layouts: &[Some(&bgl)],
4496            immediate_size: 0,
4497        });
4498        let pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
4499            label: Some("roxlap-gpu headless.pipeline"),
4500            layout: Some(&pl),
4501            module: &shader,
4502            entry_point: Some("render_scene"),
4503            compilation_options: wgpu::PipelineCompilationOptions::default(),
4504            cache: None,
4505        });
4506
4507        // Readback is a tight buffer-to-buffer copy (no 256-byte row
4508        // padding, unlike the old texture-to-buffer path).
4509        let readback = device.create_buffer(&wgpu::BufferDescriptor {
4510            label: Some("roxlap-gpu headless.readback"),
4511            size: u64::from(width) * u64::from(height) * 4,
4512            usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
4513            mapped_at_creation: false,
4514        });
4515
4516        Self {
4517            width,
4518            height,
4519            framebuffer,
4520            depth_buffer,
4521            uniform_buf,
4522            _sky_texture: sky_texture,
4523            sky_view,
4524            sky_sampler,
4525            bgl,
4526            pipeline,
4527            readback,
4528            side_shades: [[0; 4]; 2],
4529            lights: SceneLights::default(),
4530        }
4531    }
4532
4533    /// DL — set dynamic lights for subsequent [`Self::render`] calls
4534    /// (already in grid-local space). Lets tests exercise the lit path
4535    /// (sun N·L, point lights). Default = none (baked-only).
4536    pub fn set_scene_lights(&mut self, lights: SceneLights) {
4537        self.lights = lights;
4538    }
4539
4540    /// Set per-face side-shades for subsequent [`Self::render`] calls —
4541    /// voxlap `setsideshades(top, bot, left, right, up, down)`, each an
4542    /// i8 stamped as u8 (matching the engine path). Lets the gate test
4543    /// the GPU side-shade darkening.
4544    pub fn set_side_shades(&mut self, s: [i8; 6]) {
4545        let v = |i: usize| i32::from(s[i] as u8);
4546        self.side_shades = [[v(0), v(1), v(2), v(3)], [v(4), v(5), 0, 0]];
4547    }
4548
4549    /// Render `scene` from `cameras` (one per grid) and read the
4550    /// framebuffer back as `width*height` packed `0xAABBGGRR` pixels
4551    /// (R in the low byte). Fog is disabled. `mip_scan_dist` drives
4552    /// the GPU.11.1 scene-grid LOD (`0` = always mip-0). Blocks on
4553    /// readback.
4554    ///
4555    /// # Panics
4556    /// If `cameras.len() != scene.grid_count`.
4557    /// Headless render with identity per-grid world transforms (shadows stay
4558    /// intra-grid). See [`Self::render_with_transforms`] for the cross-grid
4559    /// (XS.3) variant.
4560    #[must_use]
4561    #[allow(clippy::too_many_arguments)]
4562    pub fn render(
4563        &self,
4564        device: &wgpu::Device,
4565        queue: &wgpu::Queue,
4566        scene: &GpuSceneResident,
4567        cameras: &[Camera],
4568        fov_y_rad: f32,
4569        max_outer_steps: u32,
4570        mip_scan_dist: f32,
4571    ) -> Vec<u32> {
4572        self.render_with_transforms(
4573            device,
4574            queue,
4575            scene,
4576            cameras,
4577            &[],
4578            fov_y_rad,
4579            max_outer_steps,
4580            mip_scan_dist,
4581        )
4582    }
4583
4584    /// XS.3 — headless render with explicit per-grid world transforms, so the
4585    /// scene shader can lift a shadow ray to world space and test it against
4586    /// every grid (cross-grid shadows). Empty `grid_world` ⇒ identity.
4587    #[must_use]
4588    #[allow(clippy::too_many_arguments)]
4589    pub fn render_with_transforms(
4590        &self,
4591        device: &wgpu::Device,
4592        queue: &wgpu::Queue,
4593        scene: &GpuSceneResident,
4594        cameras: &[Camera],
4595        grid_world: &[GridWorldTransform],
4596        fov_y_rad: f32,
4597        max_outer_steps: u32,
4598        mip_scan_dist: f32,
4599    ) -> Vec<u32> {
4600        assert_eq!(
4601            cameras.len(),
4602            scene.grid_count as usize,
4603            "headless render: {} cameras for {} grids",
4604            cameras.len(),
4605            scene.grid_count,
4606        );
4607
4608        let mut cam_vec: Vec<SceneDdaPerGridCamera> = cameras
4609            .iter()
4610            .map(SceneDdaPerGridCamera::from_camera)
4611            .collect();
4612        // XS.3 — stamp world transforms for cross-grid shadows (identity if absent).
4613        for (c, t) in cam_vec.iter_mut().zip(grid_world.iter()) {
4614            c.set_world_transform(t);
4615        }
4616        // TV.6 — opaque dummies for the material palette + terrain map
4617        // bindings (headless renders opaque-only: terrain_has_translucent=0).
4618        let (dummy_pal, dummy_map) = {
4619            use wgpu::util::DeviceExt;
4620            let pal: Vec<MaterialGpu> = vec![
4621                MaterialGpu {
4622                    alpha: 1.0,
4623                    mode: 0
4624                };
4625                256
4626            ];
4627            let p = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
4628                label: Some("roxlap-gpu headless.materials_pal"),
4629                contents: bytemuck::cast_slice(&pal),
4630                usage: wgpu::BufferUsages::STORAGE,
4631            });
4632            let m = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
4633                label: Some("roxlap-gpu headless.terrain_map"),
4634                contents: bytemuck::cast_slice(&[[0u32; 2]]),
4635                usage: wgpu::BufferUsages::STORAGE,
4636            });
4637            (p, m)
4638        };
4639        // DL — pack any dynamic lights (default none ⇒ the baked-only path,
4640        // matching the oracle goldens). Injects sun dir into cam_vec.sun_dir
4641        // and builds the point-light buffer (binding 18). Shared with the
4642        // surface path.
4643        let dl = self.lights.clone();
4644        let (dummy_point_lights, sun_flags, point_count) =
4645            pack_scene_lights(device, &dl, scene.grid_count as usize, &mut cam_vec);
4646        let grid_cameras = upload_grid_cameras(device, &cam_vec);
4647        let uniform = SceneDdaUniform {
4648            fov_y_rad,
4649            grid_count: scene.grid_count,
4650            max_outer_steps,
4651            _pad0: 0,
4652            screen_size: [self.width, self.height],
4653            _pad1: [0; 2],
4654            // Fog off: near/far past any reachable t → factor 0.
4655            fog_color: [0.0, 0.0, 0.0, 1.0e29],
4656            fog_far: 1.0e30,
4657            write_depth: 0,
4658            occ_page_words: scene.occupancy_page_words,
4659            occ_num_pages: scene.occupancy_num_pages,
4660            mip_scan_dist,
4661            terrain_has_translucent: 0, // headless gate: opaque only
4662            terrain_map_count: 0,
4663            _pad4: 0,
4664            // Sky direction from the first grid camera (the world frame
4665            // in these tests); a default forward camera when there are
4666            // none (grid_count == 0) so the sky lookup stays valid.
4667            sky_cam: SceneDdaPerGridCamera::from_camera(&cameras.first().copied().unwrap_or(
4668                Camera {
4669                    position: [0.0; 3],
4670                    right: [1.0, 0.0, 0.0],
4671                    down: [0.0, 0.0, 1.0],
4672                    forward: [0.0, 1.0, 0.0],
4673                    fov_y_rad,
4674                },
4675            )),
4676            side_shades0: self.side_shades[0],
4677            side_shades1: self.side_shades[1],
4678            // DL — light parameters (default = no lights ⇒ sun_flags 0).
4679            sun_color: [
4680                dl.sun_color[0],
4681                dl.sun_color[1],
4682                dl.sun_color[2],
4683                dl.sun_intensity,
4684            ],
4685            ambient_color: [
4686                dl.ambient[0],
4687                dl.ambient[1],
4688                dl.ambient[2],
4689                dl.shadow_strength,
4690            ],
4691            sun_flags,
4692            point_light_count: point_count,
4693            shadow_max_steps: dl.shadow_max_steps,
4694            _pad5: 0,
4695            shadow_bias: dl.shadow_bias,
4696            shadow_max_dist: dl.shadow_max_dist,
4697            _pad6: [0.0; 2],
4698            shadow_tint: [dl.shadow_tint[0], dl.shadow_tint[1], dl.shadow_tint[2], 0.0],
4699            style_bands: dl.style_bands,
4700            sprite_cast_count: 0, // headless renderer has no sprite pass
4701            _pad7: [0; 2],
4702        };
4703        queue.write_buffer(&self.uniform_buf, 0, bytemuck::bytes_of(&uniform));
4704
4705        let bg = device.create_bind_group(&wgpu::BindGroupDescriptor {
4706            label: Some("roxlap-gpu headless.bg"),
4707            layout: &self.bgl,
4708            entries: &[
4709                wgpu::BindGroupEntry {
4710                    binding: 0,
4711                    resource: self.uniform_buf.as_entire_binding(),
4712                },
4713                wgpu::BindGroupEntry {
4714                    binding: 1,
4715                    resource: scene.occupancy_pages[0].as_entire_binding(),
4716                },
4717                wgpu::BindGroupEntry {
4718                    binding: 2,
4719                    resource: scene.all_color_offsets.as_entire_binding(),
4720                },
4721                wgpu::BindGroupEntry {
4722                    binding: 3,
4723                    resource: scene.all_colors.as_entire_binding(),
4724                },
4725                wgpu::BindGroupEntry {
4726                    binding: 4,
4727                    resource: scene.all_chunk_colors_base.as_entire_binding(),
4728                },
4729                wgpu::BindGroupEntry {
4730                    binding: 5,
4731                    resource: scene.all_chunk_occupancy.as_entire_binding(),
4732                },
4733                wgpu::BindGroupEntry {
4734                    binding: 6,
4735                    resource: scene.grid_static_meta.as_entire_binding(),
4736                },
4737                wgpu::BindGroupEntry {
4738                    binding: 7,
4739                    resource: scene.all_slot_chunk_idx.as_entire_binding(),
4740                },
4741                wgpu::BindGroupEntry {
4742                    binding: 8,
4743                    resource: self.framebuffer.as_entire_binding(),
4744                },
4745                wgpu::BindGroupEntry {
4746                    binding: 9,
4747                    resource: wgpu::BindingResource::TextureView(&self.sky_view),
4748                },
4749                wgpu::BindGroupEntry {
4750                    binding: 10,
4751                    resource: wgpu::BindingResource::Sampler(&self.sky_sampler),
4752                },
4753                wgpu::BindGroupEntry {
4754                    binding: 11,
4755                    resource: self.depth_buffer.as_entire_binding(),
4756                },
4757                wgpu::BindGroupEntry {
4758                    binding: 12,
4759                    resource: scene.occupancy_pages[1].as_entire_binding(),
4760                },
4761                wgpu::BindGroupEntry {
4762                    binding: 13,
4763                    resource: scene.occupancy_pages[2].as_entire_binding(),
4764                },
4765                wgpu::BindGroupEntry {
4766                    binding: 14,
4767                    resource: scene.occupancy_pages[3].as_entire_binding(),
4768                },
4769                wgpu::BindGroupEntry {
4770                    binding: 15,
4771                    resource: grid_cameras.as_entire_binding(),
4772                },
4773                wgpu::BindGroupEntry {
4774                    binding: 16,
4775                    resource: dummy_pal.as_entire_binding(),
4776                },
4777                wgpu::BindGroupEntry {
4778                    binding: 17,
4779                    resource: dummy_map.as_entire_binding(),
4780                },
4781                // DL — dummy per-grid point lights (18). Sun dir rides in
4782                // PerGridCamera (binding 15).
4783                wgpu::BindGroupEntry {
4784                    binding: 18,
4785                    resource: dummy_point_lights.as_entire_binding(),
4786                },
4787            ],
4788        });
4789
4790        let mut enc =
4791            device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
4792        {
4793            let mut pass = enc.begin_compute_pass(&wgpu::ComputePassDescriptor {
4794                label: Some("roxlap-gpu headless.pass"),
4795                timestamp_writes: None,
4796            });
4797            pass.set_pipeline(&self.pipeline);
4798            pass.set_bind_group(0, &bg, &[]);
4799            pass.dispatch_workgroups(self.width.div_ceil(8), self.height.div_ceil(8), 1);
4800        }
4801        enc.copy_buffer_to_buffer(
4802            &self.framebuffer,
4803            0,
4804            &self.readback,
4805            0,
4806            u64::from(self.width) * u64::from(self.height) * 4,
4807        );
4808        queue.submit(Some(enc.finish()));
4809
4810        let slice = self.readback.slice(..);
4811        let (tx, rx) = std::sync::mpsc::channel();
4812        slice.map_async(wgpu::MapMode::Read, move |r| {
4813            let _ = tx.send(r);
4814        });
4815        device.poll(wgpu::PollType::wait_indefinitely()).ok();
4816        rx.recv().expect("map_async channel").expect("map_async");
4817
4818        let data = slice.get_mapped_range();
4819        // Tight `width*height` packed pixels — the shader's
4820        // `pack4x8unorm(vec4(r,g,b,a))` already yields `0xAABBGGRR`
4821        // little-endian, so a straight u32 read reconstructs each pixel.
4822        let out: Vec<u32> = data
4823            .chunks_exact(4)
4824            .map(|px| u32::from_le_bytes([px[0], px[1], px[2], px[3]]))
4825            .collect();
4826        drop(data);
4827        self.readback.unmap();
4828        out
4829    }
4830}
4831
4832fn bgl_uniform_entry(binding: u32) -> wgpu::BindGroupLayoutEntry {
4833    wgpu::BindGroupLayoutEntry {
4834        binding,
4835        visibility: wgpu::ShaderStages::COMPUTE,
4836        ty: wgpu::BindingType::Buffer {
4837            ty: wgpu::BufferBindingType::Uniform,
4838            has_dynamic_offset: false,
4839            min_binding_size: None,
4840        },
4841        count: None,
4842    }
4843}
4844
4845fn bgl_storage_entry(binding: u32, read_only: bool) -> wgpu::BindGroupLayoutEntry {
4846    wgpu::BindGroupLayoutEntry {
4847        binding,
4848        visibility: wgpu::ShaderStages::COMPUTE,
4849        ty: wgpu::BindingType::Buffer {
4850            ty: wgpu::BufferBindingType::Storage { read_only },
4851            has_dynamic_offset: false,
4852            min_binding_size: None,
4853        },
4854        count: None,
4855    }
4856}
4857
4858/// Create a fresh sky panorama texture sized `width × height` with
4859/// the initial pixel data uploaded via `write_texture`. Used by
4860/// `GpuRenderer::new` (1×1 default) and `set_sky_panorama` (host-
4861/// supplied panorama).
4862fn create_sky_texture(
4863    device: &wgpu::Device,
4864    width: u32,
4865    height: u32,
4866    _initial_pixels: &[u8],
4867) -> (wgpu::Texture, wgpu::TextureView) {
4868    let tex = device.create_texture(&wgpu::TextureDescriptor {
4869        label: Some("roxlap-gpu sky_texture"),
4870        size: wgpu::Extent3d {
4871            width,
4872            height,
4873            depth_or_array_layers: 1,
4874        },
4875        mip_level_count: 1,
4876        sample_count: 1,
4877        dimension: wgpu::TextureDimension::D2,
4878        format: wgpu::TextureFormat::Rgba8Unorm,
4879        usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST,
4880        view_formats: &[],
4881    });
4882    let view = tex.create_view(&wgpu::TextureViewDescriptor::default());
4883    (tex, view)
4884}
4885
4886/// GPU.4 needs to upload a whole grid (~hundreds of MiB) as a few
4887/// storage buffers. wgpu's default `max_storage_buffer_binding_size`
4888/// is 128 MiB, which is just enough for the demo's 32×32 ground
4889/// occupancy (~128 MiB) but not the colour array. We request as
4890/// much as the adapter is willing to give — most desktop GPUs cap
4891/// individual storage buffers at 2-4 GiB; iGPUs often offer the
4892/// full system memory.
4893pub(crate) fn pick_required_limits(adapter_limits: &wgpu::Limits) -> wgpu::Limits {
4894    wgpu::Limits {
4895        max_storage_buffer_binding_size: adapter_limits.max_storage_buffer_binding_size,
4896        max_buffer_size: adapter_limits.max_buffer_size,
4897        // Occupancy paging adds up to MAX_OCC_PAGES-1 extra storage
4898        // bindings; with the scene's other buffers + the GPU.9 depth
4899        // buffer the scene_dda stage needs 16. XS.4 GPU sprite shadows
4900        // need more (the sprite pass binds the terrain occupancy set on
4901        // top of its own — up to `SPRITE_SHADOW_MIN_STORAGE_BUFFERS`), so
4902        // request that many when the adapter offers them; capable devices
4903        // light up sprite shadows, others fall back (still ≥16 for the
4904        // base renderer). Both NVK and lavapipe advertise ≫16.
4905        max_storage_buffers_per_shader_stage: adapter_limits
4906            .max_storage_buffers_per_shader_stage
4907            .min(SPRITE_SHADOW_MIN_STORAGE_BUFFERS),
4908        ..wgpu::Limits::default()
4909    }
4910}
4911
4912/// XS.4.2 — build the sprite-pass shader source. On a sprite-shadow-capable
4913/// device, splice `sprite_terrain_shadow.wgsl` over the `//XS4_STUB_BEGIN`..
4914/// `//XS4_STUB_END` block so `shadow_occluded_world` becomes the real terrain
4915/// march (+ the occupancy bindings 16..23); otherwise the stub keeps GPU
4916/// sprites unshadowed. The base file is always valid WGSL (the stub variant),
4917/// so `wgsl_shaders_validate` covers the fallback path.
4918fn sprite_shader_source(capable: bool) -> String {
4919    let base = include_str!("../shaders/sprite_model_dda.wgsl");
4920    if !capable {
4921        return base.to_string();
4922    }
4923    let snippet = include_str!("../shaders/sprite_terrain_shadow.wgsl");
4924    const BEGIN: &str = "//XS4_STUB_BEGIN";
4925    const END: &str = "//XS4_STUB_END";
4926    let (Some(b), Some(e)) = (base.find(BEGIN), base.find(END)) else {
4927        // Markers missing — fail loud rather than silently shipping the stub.
4928        panic!("sprite_model_dda.wgsl: XS4 stub markers not found");
4929    };
4930    let e_end = e + END.len();
4931    let mut out = String::with_capacity(base.len() + snippet.len());
4932    out.push_str(&base[..b]);
4933    out.push_str(snippet);
4934    out.push_str(&base[e_end..]);
4935    out
4936}
4937
4938/// XS.4.3 — build the scene-pass shader source. On a sprite-shadow-capable
4939/// device, splice `scene_sprite_shadow.wgsl` over the `//XS4C_STUB_BEGIN`..
4940/// `//XS4C_STUB_END` block so `sprites_occlude` marches the sprite registry
4941/// (+ bindings 19..21) and terrain receives sprite-cast shadows; otherwise the
4942/// stub returns false. The base file is always valid WGSL (the stub variant).
4943fn scene_shader_source(capable: bool) -> String {
4944    let base = include_str!("../shaders/scene_dda.wgsl");
4945    if !capable {
4946        return base.to_string();
4947    }
4948    let snippet = include_str!("../shaders/scene_sprite_shadow.wgsl");
4949    const BEGIN: &str = "//XS4C_STUB_BEGIN";
4950    const END: &str = "//XS4C_STUB_END";
4951    let (Some(b), Some(e)) = (base.find(BEGIN), base.find(END)) else {
4952        panic!("scene_dda.wgsl: XS4C stub markers not found");
4953    };
4954    let e_end = e + END.len();
4955    let mut out = String::with_capacity(base.len() + snippet.len());
4956    out.push_str(&base[..b]);
4957    out.push_str(snippet);
4958    out.push_str(&base[e_end..]);
4959    out
4960}
4961
4962/// XS.4 — storage buffers per shader stage needed for GPU sprite shadows. The
4963/// sprite pass binds its own 14 + the terrain occupancy set (occupancy pages
4964/// 0..3, chunk occupancy, slot index, grid meta, per-grid cameras) to march
4965/// terrain shadows. Devices granting fewer fall back to unshadowed GPU sprites.
4966pub(crate) const SPRITE_SHADOW_MIN_STORAGE_BUFFERS: u32 = 22;
4967
4968fn pick_present_mode(modes: &[wgpu::PresentMode]) -> wgpu::PresentMode {
4969    // Prefer Mailbox > Immediate > Fifo. Fifo is the universal
4970    // fallback and the only one Wayland-on-Mesa always offers.
4971    for &m in &[wgpu::PresentMode::Mailbox, wgpu::PresentMode::Immediate] {
4972        if modes.contains(&m) {
4973            return m;
4974        }
4975    }
4976    wgpu::PresentMode::Fifo
4977}
4978
4979/// World-space view-ray direction (un-normalised) for window pixel
4980/// `(x, y)` under a vertical-FOV pinhole — the projection
4981/// `scene_dda.wgsl`'s `render_scene` uses. Shared by
4982/// [`GpuRenderer::pixel_ray`]; standalone so it's unit-testable without
4983/// a device. `right`/`down`/`forward` are the camera basis.
4984#[must_use]
4985#[allow(clippy::too_many_arguments)]
4986pub fn pinhole_pixel_ray(
4987    right: [f64; 3],
4988    down: [f64; 3],
4989    forward: [f64; 3],
4990    x: f64,
4991    y: f64,
4992    w: f64,
4993    h: f64,
4994    fov_y_rad: f64,
4995) -> [f64; 3] {
4996    let half_h = (fov_y_rad * 0.5).tan();
4997    let half_w = half_h * (w / h);
4998    let ndc_x = (x + 0.5) / w * 2.0 - 1.0;
4999    let ndc_y_top = 1.0 - (y + 0.5) / h * 2.0;
5000    let (kx, ky) = (ndc_x * half_w, ndc_y_top * half_h);
5001    [
5002        forward[0] + kx * right[0] - ky * down[0],
5003        forward[1] + kx * right[1] - ky * down[1],
5004        forward[2] + kx * right[2] - ky * down[2],
5005    ]
5006}
5007
5008#[cfg(test)]
5009mod pixel_ray_tests {
5010    use super::pinhole_pixel_ray;
5011
5012    const RIGHT: [f64; 3] = [1.0, 0.0, 0.0];
5013    const DOWN: [f64; 3] = [0.0, 1.0, 0.0];
5014    const FWD: [f64; 3] = [0.0, 0.0, 1.0]; // voxlap z-down "look down"
5015
5016    // Frame centre (NDC 0,0) points straight along `forward`.
5017    #[test]
5018    fn centre_pixel_is_forward() {
5019        let d = pinhole_pixel_ray(
5020            RIGHT,
5021            DOWN,
5022            FWD,
5023            639.5,
5024            359.5,
5025            1280.0,
5026            720.0,
5027            60_f64.to_radians(),
5028        );
5029        assert!(
5030            d[0].abs() < 1e-9 && d[1].abs() < 1e-9,
5031            "centre ≈ forward, got {d:?}"
5032        );
5033        assert!((d[2] - 1.0).abs() < 1e-9);
5034    }
5035
5036    // Right edge pixel tilts +right by tan(hfov/2); the lateral
5037    // component equals half_w = tan(fov_y/2)*aspect at the very edge.
5038    #[test]
5039    fn right_edge_tilts_by_half_w() {
5040        let fov = 60_f64.to_radians();
5041        let d = pinhole_pixel_ray(RIGHT, DOWN, FWD, 1279.5, 359.5, 1280.0, 720.0, fov);
5042        let half_w = (fov * 0.5).tan() * (1280.0 / 720.0);
5043        assert!((d[0] - half_w).abs() < 1e-6, "x={}, half_w={half_w}", d[0]);
5044        assert!(d[0] > 0.0, "right edge tilts +right");
5045    }
5046
5047    /// Statically validate every WGSL shader with naga (the same
5048    /// front-end + validator wgpu runs at pipeline creation), so shader
5049    /// edits — e.g. the GPU.10 sprite lighting bindings — are caught in
5050    /// CI without needing a GPU device.
5051    #[test]
5052    fn wgsl_shaders_validate() {
5053        let shaders: &[(&str, &str)] = &[
5054            (
5055                "sprite_model_dda.wgsl",
5056                include_str!("../shaders/sprite_model_dda.wgsl"),
5057            ),
5058            ("scene_dda.wgsl", include_str!("../shaders/scene_dda.wgsl")),
5059            ("blit.wgsl", include_str!("../shaders/blit.wgsl")),
5060            ("chunk_dda.wgsl", include_str!("../shaders/chunk_dda.wgsl")),
5061            ("grid_dda.wgsl", include_str!("../shaders/grid_dda.wgsl")),
5062            (
5063                "scene_blit.wgsl",
5064                include_str!("../shaders/scene_blit.wgsl"),
5065            ),
5066            ("line.wgsl", include_str!("../shaders/line.wgsl")),
5067            ("image.wgsl", include_str!("../shaders/image.wgsl")),
5068        ];
5069        let mut validator = naga::valid::Validator::new(
5070            naga::valid::ValidationFlags::all(),
5071            naga::valid::Capabilities::all(),
5072        );
5073        for (name, src) in shaders {
5074            let module = naga::front::wgsl::parse_str(src).unwrap_or_else(|e| {
5075                panic!("{name}: WGSL parse failed:\n{}", e.emit_to_string(src))
5076            });
5077            validator
5078                .validate(&module)
5079                .unwrap_or_else(|e| panic!("{name}: WGSL validation failed: {e:?}"));
5080        }
5081        // XS.4.2 — the raw `sprite_model_dda.wgsl` above is the unshadowed STUB
5082        // variant; also validate the sprite-shadow-CAPABLE spliced variant (the
5083        // terrain-shadow snippet injected) that capable devices build.
5084        let capable = super::sprite_shader_source(true);
5085        let module = naga::front::wgsl::parse_str(&capable).unwrap_or_else(|e| {
5086            panic!(
5087                "sprite_model_dda.wgsl (capable): parse failed:\n{}",
5088                e.emit_to_string(&capable)
5089            )
5090        });
5091        validator.validate(&module).unwrap_or_else(|e| {
5092            panic!("sprite_model_dda.wgsl (capable): validation failed: {e:?}")
5093        });
5094        // XS.4.3 — the capable scene variant (sprite-cast snippet spliced in).
5095        let scene_cap = super::scene_shader_source(true);
5096        let module = naga::front::wgsl::parse_str(&scene_cap).unwrap_or_else(|e| {
5097            panic!(
5098                "scene_dda.wgsl (capable): parse failed:\n{}",
5099                e.emit_to_string(&scene_cap)
5100            )
5101        });
5102        validator
5103            .validate(&module)
5104            .unwrap_or_else(|e| panic!("scene_dda.wgsl (capable): validation failed: {e:?}"));
5105    }
5106
5107    /// A 2×2 world quad centred straight ahead projects to vertices whose
5108    /// homogeneous `w` equals the camera-forward distance (so the shader's
5109    /// `clip = ndc·w` recovers perspective-correct UVs) and whose `depth`
5110    /// is the euclidean range. Verifies geometry without a GPU device.
5111    #[test]
5112    fn image_vertices_carry_forward_w_and_euclidean_depth() {
5113        let cam = crate::GpuLineCamera {
5114            pos: [0.0, 0.0, 0.0],
5115            right: [1.0, 0.0, 0.0],
5116            down: [0.0, 1.0, 0.0],
5117            forward: [0.0, 0.0, 1.0],
5118        };
5119        // Quad 10 units ahead (forward = +Z), spanning x∈[-1,1], y∈[-1,1].
5120        let quad = crate::GpuImageQuad {
5121            corners: [
5122                [-1.0, -1.0, 10.0], // TL
5123                [1.0, -1.0, 10.0],  // TR
5124                [-1.0, 1.0, 10.0],  // BL
5125                [1.0, 1.0, 10.0],   // BR
5126            ],
5127            image: 0,
5128            tint: [1.0, 1.0, 1.0, 1.0],
5129            depth_test: true,
5130            alpha_cutoff: 0.0,
5131        };
5132        let verts = crate::build_image_vertices(&cam, &quad, 800, 600, 60_f32.to_radians(), false);
5133        assert_eq!(verts.len(), 6, "two triangles, no near-clip");
5134        for v in &verts {
5135            assert!((v.w - 10.0).abs() < 1e-4, "w == forward distance");
5136            assert!(v.depth >= 10.0, "euclidean depth >= forward distance");
5137            assert_eq!(v.depth_test, 1.0);
5138        }
5139    }
5140}