Skip to main content

roxlap_gpu/
lib.rs

1//! WGPU-backed compute-shader renderer scaffold for the roxlap
2//! voxel engine. GPU.1 in `PORTING-GPU.md`.
3//!
4//! GPU.1's job: stand up the device + surface + swapchain on a
5//! host window (any [`raw-window-handle`](raw_window_handle)
6//! provider), present a clear-to-colour frame each render call,
7//! and give the host a one-call opt-in. No voxel marching yet — the
8//! [`examples/probe.rs`](../examples/probe.rs) standalone holds
9//! the empirical FPS baseline from GPU.0.
10//!
11//! Later sub-substages flesh `GpuRenderer::render` out: GPU.2
12//! uploads voxel data, GPU.3 dispatches the inner-DDA compute
13//! shader, GPU.4 layers in chunk skipping, GPU.5 plugs the renderer
14//! into `roxlap-scene::Scene`, …
15//!
16//! ## Host integration shape (GPU.1)
17//!
18//! ```no_run
19//! use std::sync::Arc;
20//! use roxlap_gpu::{GpuRenderer, GpuRendererSettings};
21//! # use winit::window::Window;
22//! # fn pick(w: Arc<Window>, size: (u32, u32)) -> Option<GpuRenderer> {
23//! match GpuRenderer::new_blocking(w, size, GpuRendererSettings::default()) {
24//!     Ok(r) => Some(r),
25//!     Err(e) => {
26//!         eprintln!("GPU init failed: {e}; falling back to CPU");
27//!         None
28//!     }
29//! }
30//! # }
31//! ```
32
33#![allow(clippy::must_use_candidate, clippy::too_many_lines)]
34
35pub mod camera;
36pub mod decompress;
37pub mod grid;
38// Headless rendering is a native-only test/bench aid: it blocks on
39// `pollster` + `device.poll(Wait)`, neither of which exists on wasm.
40#[cfg(not(target_arch = "wasm32"))]
41pub mod headless;
42pub mod resident;
43pub mod scene;
44pub mod sprite_model;
45
46pub use camera::Camera;
47pub use decompress::{decompress_chunk, ChunkUpload, BEDROCK_RGB, CHUNK_Z};
48pub use grid::{bounding_box_of, GpuGridResident, GridUpload};
49#[cfg(not(target_arch = "wasm32"))]
50pub use headless::HeadlessGpu;
51pub use resident::GpuChunkResident;
52pub use scene::{
53    GpuSceneResident, GridRuntimeTransform, GridStaticMeta, RefreshOutcome, SceneUpload,
54};
55pub use sprite_model::{
56    build_sprite_model, build_sprite_model_with_materials, sprite_model_from_clip_frame,
57    sprite_model_from_clip_frame_with_materials, sprite_model_from_voxel_frame,
58    sprite_model_from_voxel_frame_with_materials, SpriteInstance, SpriteInstanceTransform,
59    SpriteModel, SpriteModelRegistry, SpriteRegistryResident,
60};
61
62use std::sync::Arc;
63
64use bytemuck::{Pod, Zeroable};
65use raw_window_handle::{HasDisplayHandle, HasWindowHandle};
66
67/// Caller-controllable knobs for [`GpuRenderer::new`]. Defaults
68/// target "highest-performance GPU, prefer Mailbox/Immediate over
69/// vsync" — i.e. the same configuration the GPU.0 probe used to
70/// measure the FPS ceiling.
71#[derive(Debug, Clone, Copy)]
72pub struct GpuRendererSettings {
73    pub power_preference: PowerPreference,
74    /// Initial clear colour cycled by GPU.1's empty render path.
75    /// The voxel-rendering substages overwrite this entirely.
76    pub clear_colour: [f64; 3],
77    /// Prefer mailbox/immediate when offered; falls back to FIFO if
78    /// the surface only supports it (Wayland under Mesa often does).
79    pub uncapped_present: bool,
80}
81
82#[derive(Debug, Clone, Copy)]
83pub enum PowerPreference {
84    Low,
85    High,
86}
87
88impl Default for GpuRendererSettings {
89    fn default() -> Self {
90        Self {
91            power_preference: PowerPreference::High,
92            clear_colour: [0.06, 0.08, 0.12],
93            uncapped_present: true,
94        }
95    }
96}
97
98/// Errors `GpuRenderer::new` surfaces to the host. The host's
99/// expected flow is "try this, fall back to the CPU path on Err".
100#[derive(Debug)]
101pub enum GpuInitError {
102    CreateSurface(wgpu::CreateSurfaceError),
103    NoAdapter,
104    RequestDevice(wgpu::RequestDeviceError),
105}
106
107impl std::fmt::Display for GpuInitError {
108    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
109        match self {
110            Self::CreateSurface(e) => write!(f, "create_surface failed: {e}"),
111            Self::NoAdapter => write!(
112                f,
113                "no compatible adapter — does this system have a Vulkan/Metal/DX12 driver?"
114            ),
115            Self::RequestDevice(e) => write!(f, "request_device failed: {e}"),
116        }
117    }
118}
119
120impl std::error::Error for GpuInitError {
121    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
122        match self {
123            Self::CreateSurface(e) => Some(e),
124            Self::RequestDevice(e) => Some(e),
125            Self::NoAdapter => None,
126        }
127    }
128}
129
130impl From<wgpu::CreateSurfaceError> for GpuInitError {
131    fn from(value: wgpu::CreateSurfaceError) -> Self {
132        Self::CreateSurface(value)
133    }
134}
135
136impl From<wgpu::RequestDeviceError> for GpuInitError {
137    fn from(value: wgpu::RequestDeviceError) -> Self {
138        Self::RequestDevice(value)
139    }
140}
141
142/// WGPU-backed renderer. Owns the device, queue, and surface
143/// bound to the host's window. [`Self::render`] is the GPU.1
144/// clear-to-colour path; [`Self::render_chunk`] is GPU.3's
145/// single-chunk DDA marcher.
146///
147/// The window is consumed only at construction — `wgpu`'s
148/// `Surface<'static>` keeps its own `Arc` clone of the handle, so
149/// the renderer holds no window field of its own.
150/// A world-space line segment for [`GpuRenderer::draw_lines_deferred`].
151/// `color` is straight RGBA in `0..=1` (the alpha drives the over-blend);
152/// `width_px` is the screen-space thickness; `depth_test` occludes the
153/// segment behind nearer marched geometry.
154#[derive(Clone, Copy, Debug)]
155pub struct GpuLine {
156    pub a: [f32; 3],
157    pub b: [f32; 3],
158    pub color: [f32; 4],
159    pub width_px: f32,
160    pub depth_test: bool,
161}
162
163/// World camera basis for projecting [`GpuLine`] endpoints — the same
164/// pinhole the scene-DDA pass marches with (`right`/`down`/`forward`
165/// orthonormal, `pos` in world voxel units).
166#[derive(Clone, Copy, Debug)]
167pub struct GpuLineCamera {
168    pub pos: [f32; 3],
169    pub right: [f32; 3],
170    pub down: [f32; 3],
171    pub forward: [f32; 3],
172}
173
174/// Near plane (camera-forward distance) below which a [`GpuLine`] endpoint
175/// is clipped, so the pinhole divide stays finite.
176const LINE_NEAR_Z: f32 = 0.0625;
177/// Depth-test slack (euclidean world distance) so a line resting on the
178/// surface it traces doesn't z-fight the marched geometry.
179const LINE_DEPTH_BIAS: f32 = 0.5;
180
181/// One expanded-quad vertex (`build_line_vertices` output). `pos` is NDC;
182/// `depth` is the euclidean world distance of the source endpoint (the
183/// marcher's `best_t` metric); `depth_test` is `1.0`/`0.0`.
184#[repr(C)]
185#[derive(Clone, Copy, Pod, Zeroable)]
186struct LineVertex {
187    pos: [f32; 2],
188    depth: f32,
189    depth_test: f32,
190    color: [f32; 4],
191}
192
193/// `line.wgsl` / `image.wgsl` fragment uniform (std140; padded to 32 bytes
194/// so the uniform's struct stride is a 16-byte multiple).
195#[repr(C)]
196#[derive(Clone, Copy, Pod, Zeroable)]
197struct LineParams {
198    screen_w: u32,
199    screen_h: u32,
200    depth_bias: f32,
201    no_depth: u32,
202    /// 1 when the viewport flip is on. The depth buffer is written
203    /// unflipped (the blit mirrors at read time), but these passes flip the
204    /// vertex NDC X, so the fragment must mirror its depth lookup to match.
205    flip_x: u32,
206    _pad: [u32; 3],
207}
208
209/// Lazy-built debug-line pipeline (L3.2). The bind group is rebuilt each
210/// draw (it references the current `scene_dda.depth_buffer`, which the
211/// swapchain resize recreates); the pipeline / layout / uniform persist.
212struct LineResources {
213    pipeline: wgpu::RenderPipeline,
214    bgl: wgpu::BindGroupLayout,
215    uniform_buf: wgpu::Buffer,
216    /// 1-word stand-in bound when no scene depth exists (sprite-only /
217    /// empty scene); `no_depth = 1` keeps the shader from indexing it.
218    dummy_depth: wgpu::Buffer,
219}
220
221/// Project + expand world-space [`GpuLine`]s into screen-space quad
222/// vertices (6 per visible segment) for `line.wgsl`. Mirrors the
223/// scene-DDA pinhole (`forward + ndc_x·half_w·right − ndc_y·half_h·down`)
224/// so lines land on the marched geometry, carrying each endpoint's
225/// euclidean world distance as the depth-test key (= the marcher's
226/// `best_t`). Segments fully behind the near plane are dropped; the rest
227/// are clipped to it.
228fn build_line_vertices(
229    cam: &GpuLineCamera,
230    lines: &[GpuLine],
231    w: u32,
232    h: u32,
233    fov_y: f32,
234    flip_x: bool,
235) -> Vec<LineVertex> {
236    let aspect = w as f32 / h as f32;
237    let half_h = (fov_y * 0.5).tan();
238    let half_w = half_h * aspect;
239    let (wf, hf) = (w as f32, h as f32);
240
241    let cam_coords = |p: [f32; 3]| -> [f32; 3] {
242        let d = [p[0] - cam.pos[0], p[1] - cam.pos[1], p[2] - cam.pos[2]];
243        [
244            cam.right[0] * d[0] + cam.right[1] * d[1] + cam.right[2] * d[2],
245            cam.down[0] * d[0] + cam.down[1] * d[1] + cam.down[2] * d[2],
246            cam.forward[0] * d[0] + cam.forward[1] * d[1] + cam.forward[2] * d[2],
247        ]
248    };
249    // Camera-space point → (NDC xy, euclidean depth). NDC y is up (+1 top),
250    // matching WebGPU clip space; depth is the marcher's world-t metric.
251    let project = |q: [f32; 3]| -> ([f32; 2], f32) {
252        let inv = 1.0 / q[2];
253        let nx = q[0] * inv / half_w;
254        let ny = -q[1] * inv / half_h;
255        let depth = (q[0] * q[0] + q[1] * q[1] + q[2] * q[2]).sqrt();
256        ([nx, ny], depth)
257    };
258
259    let mut out = Vec::with_capacity(lines.len() * 6);
260    for line in lines {
261        let ca = cam_coords(line.a);
262        let cb = cam_coords(line.b);
263        let (cfa, cfb) = (ca[2], cb[2]);
264        if cfa < LINE_NEAR_Z && cfb < LINE_NEAR_Z {
265            continue;
266        }
267        // Near-clip in segment-parameter space on the forward component.
268        let (mut t0, mut t1) = (0.0f32, 1.0f32);
269        let dz = cfb - cfa;
270        if dz.abs() > f32::EPSILON {
271            let tn = (LINE_NEAR_Z - cfa) / dz;
272            if dz > 0.0 {
273                t0 = t0.max(tn);
274            } else {
275                t1 = t1.min(tn);
276            }
277        }
278        if t0 > t1 {
279            continue;
280        }
281        let lerp3 = |t: f32| {
282            [
283                ca[0] + (cb[0] - ca[0]) * t,
284                ca[1] + (cb[1] - ca[1]) * t,
285                ca[2] + (cb[2] - ca[2]) * t,
286            ]
287        };
288        let (n0, d0) = project(lerp3(t0));
289        let (n1, d1) = project(lerp3(t1));
290
291        // Expand in pixel space for a uniform screen-space thickness.
292        let to_px = |n: [f32; 2]| [(n[0] * 0.5 + 0.5) * wf, (0.5 - n[1] * 0.5) * hf];
293        let to_ndc = |p: [f32; 2]| [p[0] / wf * 2.0 - 1.0, 1.0 - p[1] / hf * 2.0];
294        let p0 = to_px(n0);
295        let p1 = to_px(n1);
296        let (dx, dy) = (p1[0] - p0[0], p1[1] - p0[1]);
297        let len = (dx * dx + dy * dy).sqrt().max(1e-6);
298        let half = line.width_px.max(1.0) * 0.5;
299        let (ex, ey) = (-dy / len * half, dx / len * half);
300
301        let c0a = to_ndc([p0[0] + ex, p0[1] + ey]);
302        let c0b = to_ndc([p0[0] - ex, p0[1] - ey]);
303        let c1a = to_ndc([p1[0] + ex, p1[1] + ey]);
304        let c1b = to_ndc([p1[0] - ex, p1[1] - ey]);
305        let dt = if line.depth_test { 1.0 } else { 0.0 };
306        // Mirror the overlay's NDC x to match the flipped scene blit.
307        let vert = |pos: [f32; 2], depth: f32| LineVertex {
308            pos: [if flip_x { -pos[0] } else { pos[0] }, pos[1]],
309            depth,
310            depth_test: dt,
311            color: line.color,
312        };
313        // Two triangles, cull disabled so winding is irrelevant.
314        out.push(vert(c0a, d0));
315        out.push(vert(c0b, d0));
316        out.push(vert(c1a, d1));
317        out.push(vert(c1a, d1));
318        out.push(vert(c0b, d0));
319        out.push(vert(c1b, d1));
320    }
321    out
322}
323
324/// A world-space 2D image-sprite quad for [`GpuRenderer::draw_images_deferred`].
325/// `corners` are the four world points `TL, TR, BL, BR` (UVs `(0,0) (1,0)
326/// (0,1) (1,1)`); `image` indexes a texture uploaded via
327/// [`GpuRenderer::upload_image`]; `tint` is straight RGBA in `0..=1`
328/// (multiplied into every texel); `depth_test` occludes the quad behind
329/// nearer marched geometry. The facade resolves orientation + back-face
330/// culling, so this is pure geometry.
331#[derive(Clone, Copy, Debug)]
332pub struct GpuImageQuad {
333    pub corners: [[f32; 3]; 4],
334    pub image: usize,
335    pub tint: [f32; 4],
336    pub depth_test: bool,
337    /// Texels with alpha below this (`0..=1`) are discarded in the FS.
338    /// `0.0` keeps the plain over-blend.
339    pub alpha_cutoff: f32,
340}
341
342/// One expanded textured-quad vertex (`build_image_vertices` output).
343/// `ndc` is the projected NDC xy; `w` is the source `forward` depth, fed
344/// back into a homogeneous clip position so the rasterizer interpolates
345/// `uv` perspective-correctly; `depth` is the euclidean world distance
346/// (the marcher's `best_t`) for the manual depth test.
347#[repr(C)]
348#[derive(Clone, Copy, Pod, Zeroable)]
349struct ImageVertex {
350    ndc: [f32; 2],
351    w: f32,
352    depth: f32,
353    depth_test: f32,
354    cutoff: f32,
355    uv: [f32; 2],
356    tint: [f32; 4],
357}
358
359/// Lazy-built image-sprite pipeline (mirrors [`LineResources`]). The
360/// per-draw bind group adds the quad's texture + a sampler to the line
361/// pass's uniform + scene-depth bindings.
362struct ImageResources {
363    pipeline: wgpu::RenderPipeline,
364    bgl: wgpu::BindGroupLayout,
365    uniform_buf: wgpu::Buffer,
366    dummy_depth: wgpu::Buffer,
367    sampler: wgpu::Sampler,
368}
369
370/// A retained image-sprite texture (uploaded via
371/// [`GpuRenderer::upload_image`], referenced by [`GpuImageQuad::image`]).
372struct ImageResident {
373    view: wgpu::TextureView,
374    // Held so the view stays valid + the texture shows in profiler dumps.
375    _texture: wgpu::Texture,
376}
377
378/// Camera-space textured-quad vertex (near-clip working set): the
379/// `(right, down, forward)` components + the texture `uv`.
380#[derive(Clone, Copy)]
381struct ImgClipV {
382    cam: [f32; 3],
383    uv: [f32; 2],
384}
385
386/// Clip a convex camera-space polygon against the near plane
387/// (`forward >= LINE_NEAR_Z`), interpolating UVs at each crossing.
388fn clip_near_image(poly: &[ImgClipV]) -> Vec<ImgClipV> {
389    let n = poly.len();
390    let mut out: Vec<ImgClipV> = Vec::with_capacity(n + 1);
391    for i in 0..n {
392        let cur = poly[i];
393        let prev = poly[(i + n - 1) % n];
394        let cur_in = cur.cam[2] >= LINE_NEAR_Z;
395        let prev_in = prev.cam[2] >= LINE_NEAR_Z;
396        if cur_in != prev_in {
397            let t = (LINE_NEAR_Z - prev.cam[2]) / (cur.cam[2] - prev.cam[2]);
398            out.push(ImgClipV {
399                cam: [
400                    prev.cam[0] + (cur.cam[0] - prev.cam[0]) * t,
401                    prev.cam[1] + (cur.cam[1] - prev.cam[1]) * t,
402                    LINE_NEAR_Z,
403                ],
404                uv: [
405                    prev.uv[0] + (cur.uv[0] - prev.uv[0]) * t,
406                    prev.uv[1] + (cur.uv[1] - prev.uv[1]) * t,
407                ],
408            });
409        }
410        if cur_in {
411            out.push(cur);
412        }
413    }
414    out
415}
416
417/// Project + near-clip a world-space [`GpuImageQuad`] into perspective-correct
418/// textured-quad vertices for `image.wgsl`. Mirrors the scene-DDA pinhole
419/// (the same one [`build_line_vertices`] uses), carrying each vertex's
420/// euclidean world distance as the depth-test key. Quads fully behind the
421/// near plane produce no vertices.
422fn build_image_vertices(
423    cam: &GpuLineCamera,
424    quad: &GpuImageQuad,
425    w: u32,
426    h: u32,
427    fov_y: f32,
428    flip_x: bool,
429) -> Vec<ImageVertex> {
430    let aspect = w as f32 / h as f32;
431    let half_h = (fov_y * 0.5).tan();
432    let half_w = half_h * aspect;
433    let dt = if quad.depth_test { 1.0 } else { 0.0 };
434
435    let cam_coords = |p: [f32; 3]| -> [f32; 3] {
436        let d = [p[0] - cam.pos[0], p[1] - cam.pos[1], p[2] - cam.pos[2]];
437        [
438            cam.right[0] * d[0] + cam.right[1] * d[1] + cam.right[2] * d[2],
439            cam.down[0] * d[0] + cam.down[1] * d[1] + cam.down[2] * d[2],
440            cam.forward[0] * d[0] + cam.forward[1] * d[1] + cam.forward[2] * d[2],
441        ]
442    };
443    let project = |v: ImgClipV| -> ImageVertex {
444        let (cx, cy, cz) = (v.cam[0], v.cam[1], v.cam[2]);
445        let nx = cx / (cz * half_w);
446        ImageVertex {
447            // Mirror NDC x to match the flipped scene blit.
448            ndc: [if flip_x { -nx } else { nx }, -cy / (cz * half_h)],
449            w: cz,
450            depth: (cx * cx + cy * cy + cz * cz).sqrt(),
451            depth_test: dt,
452            cutoff: quad.alpha_cutoff,
453            uv: v.uv,
454            tint: quad.tint,
455        }
456    };
457
458    // Per-corner UV: TL(0,0) TR(1,0) BL(0,1) BR(1,1).
459    let uvs = [[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0]];
460    let verts: Vec<ImgClipV> = quad
461        .corners
462        .iter()
463        .zip(uvs)
464        .map(|(c, uv)| ImgClipV {
465            cam: cam_coords(*c),
466            uv,
467        })
468        .collect();
469
470    let mut out = Vec::with_capacity(12);
471    for tri in [[0usize, 1, 2], [1, 3, 2]] {
472        let poly = [verts[tri[0]], verts[tri[1]], verts[tri[2]]];
473        let clipped = clip_near_image(&poly);
474        if clipped.len() < 3 {
475            continue;
476        }
477        for i in 1..clipped.len() - 1 {
478            out.push(project(clipped[0]));
479            out.push(project(clipped[i]));
480            out.push(project(clipped[i + 1]));
481        }
482    }
483    out
484}
485
486#[allow(clippy::struct_excessive_bools)] // independent per-frame flags, not a state enum
487pub struct GpuRenderer {
488    surface: wgpu::Surface<'static>,
489    surface_config: wgpu::SurfaceConfiguration,
490    device: wgpu::Device,
491    queue: wgpu::Queue,
492    adapter_info: String,
493    clear_colour: [f64; 3],
494    frame_count: u32,
495    /// Mirror the marched scene horizontally on present (the scene blit
496    /// samples `width-1-x`, and line/image overlays mirror their NDC x).
497    /// The egui pass is unaffected. See [`Self::set_flip_x`].
498    flip_x: bool,
499    /// Lazy-built on first [`Self::render_chunk`] call; rebuilt when
500    /// the swapchain resizes (storage texture must match).
501    chunk_dda: Option<ChunkDdaResources>,
502    /// Lazy-built on first [`Self::render_grid`] call; same resize
503    /// trigger as `chunk_dda`. The two paths share the same blit
504    /// pipeline structure but bind different storage layouts.
505    grid_dda: Option<GridDdaResources>,
506    /// Lazy-built on first [`Self::render_scene`] call. Holds the
507    /// multi-grid pipeline + per-grid camera uniforms.
508    scene_dda: Option<SceneDdaResources>,
509    /// TV.6 — global voxel-material palette mirrored to the scene pass (256
510    /// entries, default all-opaque), set via [`Self::set_scene_materials`].
511    scene_materials: Box<[MaterialGpu; 256]>,
512    /// TV.6 — terrain colour→material map (`[rgb, material_id]` rows) +
513    /// whether any mapped material is translucent (the shader gate).
514    scene_terrain_map: Vec<[u32; 2]>,
515    scene_terrain_translucent: bool,
516    /// Whether the *current* deferred frame ran a scene pass that wrote
517    /// `scene_dda.depth_buffer`. [`Self::render_scene`] sets it; the
518    /// color-only [`Self::render_clear_deferred`] clears it. Without this,
519    /// depth-tested overlays (`draw_lines_deferred` / `draw_image`) drawn
520    /// over an empty/cleared scene would test against the *previous*
521    /// scene's stale depth and clip incorrectly.
522    scene_depth_valid: bool,
523    /// GPU.8 — panoramic sky texture + sampler. Created at
524    /// `new` as a 1×1 mid-grey default; [`Self::set_sky_panorama`]
525    /// replaces it. The scene-DDA bind group references this each
526    /// frame.
527    sky_texture: wgpu::Texture,
528    sky_view: wgpu::TextureView,
529    sky_sampler: wgpu::Sampler,
530    /// GPU.8 fog state. `color` is BGRA-style premultiplied (each
531    /// channel in [0, 1]); `near` is the world-t distance at which
532    /// fog starts kicking in; `far` is the distance at which it's
533    /// fully opaque. The shader does
534    /// `mix(hit, fog, smoothstep(near, far, t))`.
535    fog_color: [f32; 3],
536    fog_near: f32,
537    fog_far: f32,
538    /// GPU.10 — sprites rendered as DDA-marched voxel models (the
539    /// precise path; the GPU.9 compute splatter it replaced was
540    /// retired in 10.5). Holds the concatenated model registry + the
541    /// per-frame instance array; set via [`Self::set_sprite_instances`].
542    sprite_registry: Option<sprite_model::SpriteRegistryResident>,
543    /// Lazy-built pipeline + uniform for the model-DDA pass.
544    sprite_model_dda: Option<SpriteModelDdaResources>,
545    /// TV — global voxel-material palette mirrored to the sprite pass (256
546    /// entries, default all-opaque), set via [`Self::set_sprite_materials`].
547    /// `sprite_has_translucent` gates the shader's accumulate path.
548    sprite_materials: Box<[MaterialGpu; 256]>,
549    sprite_has_translucent: bool,
550    /// GPU.10.4 — LOD aggressiveness: step a sprite to the next mip
551    /// once a mip-0 voxel projects below this many screen pixels.
552    /// Defaults to 4.0 (the empirical sweet spot); the host can tune
553    /// via [`Self::set_sprite_lod_px`].
554    sprite_lod_px: f32,
555    /// GPU.11.1 — scene-grid LOD scan distance (world units). A chunk
556    /// entered at world-t `t` is marched at the mip level
557    /// `floor(log2(max(t, msd) / msd))`, clamped to the grid's mip
558    /// ladder. `0` disables LOD (always mip-0). Tunable via
559    /// [`Self::set_scene_mip_scan_dist`] — the axis-aligned-mip-beams
560    /// mitigation (GPU.11.2) pushes it outward if banding appears.
561    scene_mip_scan_dist: f32,
562    /// Per-face grid side-shades (voxlap setsideshades), packed for the
563    /// scene-DDA uniform: `[0]=(top,bot,left,right)`, `[1]=(up,down,_,_)`.
564    /// Each is the u8 shade intensity. `[[0;4];2]` = no shading. Set via
565    /// [`Self::set_scene_side_shades`].
566    scene_side_shades: [[i32; 4]; 2],
567    /// Vertical FOV (radians) the last `render_scene` marched with —
568    /// cached so [`Self::pixel_ray`] reconstructs the matching view ray
569    /// for picking. `0` until the first scene render.
570    last_fov_y_rad: f32,
571    /// The acquired-but-not-yet-presented swapchain frame from the most
572    /// recent deferred render ([`Self::render_scene`] /
573    /// [`Self::render_clear_deferred`]). [`Self::present`] shows it as
574    /// is; [`Self::paint_egui`] overlays egui first. Lets a host slot a
575    /// UI pass between the marcher and present. `None` between present
576    /// and the next render.
577    pending_frame: Option<(wgpu::SurfaceTexture, wgpu::TextureView)>,
578    /// Lazy-built debug-line pipeline (L3.2) — built on the first
579    /// [`Self::draw_lines_deferred`] call.
580    line_resources: Option<LineResources>,
581    /// Persistent debug-line vertex buffer (L3.3) — grown on demand and
582    /// reused across frames so a per-frame overlay (hundreds of segments)
583    /// costs one `write_buffer`, not a fresh allocation. `line_vbuf_cap`
584    /// is its capacity in bytes.
585    line_vbuf: Option<wgpu::Buffer>,
586    line_vbuf_cap: u64,
587    /// Lazy-built image-sprite pipeline — built on the first
588    /// [`Self::draw_images_deferred`] call.
589    image_resources: Option<ImageResources>,
590    /// Persistent image-sprite vertex buffer, grown on demand and reused
591    /// across frames (like [`Self::line_vbuf`]).
592    image_vbuf: Option<wgpu::Buffer>,
593    image_vbuf_cap: u64,
594    /// Retained image-sprite textures, indexed by the id
595    /// [`Self::upload_image`] returns. A dropped slot is `None` and is
596    /// re-used by a later upload.
597    images: Vec<Option<ImageResident>>,
598    /// Lazy-built `egui-wgpu` paint pipeline; created on the first
599    /// [`Self::paint_egui`] call (`hud` feature).
600    #[cfg(feature = "hud")]
601    egui_renderer: Option<egui_wgpu::Renderer>,
602}
603
604/// Per-renderer chunk-DDA pipeline state. The compute shader writes
605/// into the storage texture; a fullscreen-triangle render pass
606/// nearest-neighbour blits it to the swapchain.
607struct ChunkDdaResources {
608    storage_size: (u32, u32),
609    storage_view: wgpu::TextureView,
610    uniform_buf: wgpu::Buffer,
611    bgl_dda: wgpu::BindGroupLayout,
612    pipeline_dda: wgpu::ComputePipeline,
613    blit_bg: wgpu::BindGroup,
614    pipeline_blit: wgpu::RenderPipeline,
615    // wgpu BindGroups internally Arc their resources, but we keep
616    // the handle so the sampler shows up in profiler dumps.
617    _sampler: wgpu::Sampler,
618}
619
620struct GridDdaResources {
621    storage_size: (u32, u32),
622    storage_view: wgpu::TextureView,
623    uniform_buf: wgpu::Buffer,
624    bgl_dda: wgpu::BindGroupLayout,
625    pipeline_dda: wgpu::ComputePipeline,
626    blit_bg: wgpu::BindGroup,
627    pipeline_blit: wgpu::RenderPipeline,
628    _sampler: wgpu::Sampler,
629}
630
631struct SceneDdaResources {
632    storage_size: (u32, u32),
633    /// Framebuffer as a packed-`rgba8unorm` storage **buffer** (row
634    /// stride = width), written by the scene + sprite compute passes
635    /// and read by the blit. A buffer (not a storage texture) dodges
636    /// Chrome-Dawn's tiled write-texture layout (which produced a
637    /// 128×256-tiled image); linear + explicit stride is portable.
638    framebuffer: wgpu::Buffer,
639    uniform_buf: wgpu::Buffer,
640    bgl_dda: wgpu::BindGroupLayout,
641    pipeline_dda: wgpu::ComputePipeline,
642    blit_bg: wgpu::BindGroup,
643    pipeline_blit: wgpu::RenderPipeline,
644    /// Blit uniform: `[width, height, flip_x, _pad]`. Retained so the flip
645    /// flag (offset 8) can be re-written per frame.
646    blit_dims: wgpu::Buffer,
647    /// GPU.9 — per-pixel world-t depth (f32 bits as u32), sized
648    /// `width * height * 4`. The scene pass writes it when sprites
649    /// are present; the sprite model-DDA pass reads + composites
650    /// against it.
651    depth_buffer: wgpu::Buffer,
652    /// Picking — a `COPY_DST | MAP_READ` staging copy of `depth_buffer`
653    /// so the host can read back the per-pixel world-t after a frame
654    /// (e.g. click → which voxel). Same size as `depth_buffer`.
655    depth_readback: wgpu::Buffer,
656    /// TV.6 — global voxel-material palette (256 `MaterialGpu`, binding 16),
657    /// seeded from `scene_materials`, rewritten by [`GpuRenderer::set_scene_materials`].
658    materials_pal_buf: wgpu::Buffer,
659    /// TV.6 — terrain colour→material map (`[rgb, material_id]` rows, binding
660    /// 17); ≥1 element (wgpu rejects a zero-sized storage binding).
661    terrain_map_buf: wgpu::Buffer,
662}
663
664/// GPU.10.0 — single-sprite model-DDA pipeline: one thread per pixel
665/// marches the model voxel volume and composites against the scene
666/// depth buffer.
667struct SpriteModelDdaResources {
668    bgl: wgpu::BindGroupLayout,
669    pipeline: wgpu::ComputePipeline,
670    uniform_buf: wgpu::Buffer,
671    /// TV — global voxel-material palette (256 `MaterialGpu`, binding 12),
672    /// seeded from the renderer's `sprite_materials` and rewritten by
673    /// [`GpuRenderer::set_sprite_materials`].
674    materials_buf: wgpu::Buffer,
675}
676
677/// Per-frame uniform for the model-DDA pass. Mirrors `Uniform` in
678/// `sprite_model_dda.wgsl` (std140). Per-model + per-instance data
679/// now live in storage buffers; this holds only the camera, fog, and
680/// instance count.
681#[repr(C)]
682#[derive(Clone, Copy, Pod, Zeroable)]
683struct SpriteModelUniform {
684    cam_pos: [f32; 3],
685    _p0: f32,
686    cam_right: [f32; 3],
687    _p1: f32,
688    cam_down: [f32; 3],
689    _p2: f32,
690    cam_forward: [f32; 3],
691    _p3: f32,
692    fog_color: [f32; 4],
693    screen_size: [u32; 2],
694    instance_count: u32,
695    fog_far: f32,
696    fov_y_rad: f32,
697    tiles_x: u32,
698    tile_size: u32,
699    /// TV — 1 if any palette material is translucent: gates the shader's
700    /// accumulate path. 0 ⇒ the unchanged nearest-hit opaque path.
701    has_translucent: u32,
702}
703
704/// GPU.10.3 — sprite screen-tile edge in pixels for instance binning.
705const SPRITE_TILE_SIZE: u32 = 16;
706
707/// One material in the GPU sprite material palette (binding 12). Mirrors
708/// `Mat` in `sprite_model_dda.wgsl` (std430, 8 bytes). TV stage.
709#[repr(C)]
710#[derive(Clone, Copy, Pod, Zeroable)]
711struct MaterialGpu {
712    /// Opacity / additive intensity, normalised to `0..=1`.
713    alpha: f32,
714    /// [`roxlap_formats::material::BlendMode`] discriminant.
715    mode: u32,
716}
717
718/// Convert the global [`MaterialTable`](roxlap_formats::material::MaterialTable)
719/// into the GPU palette + a flag of whether any material is non-opaque (the
720/// shader gate — an all-opaque palette runs the unchanged first-hit path).
721fn material_palette(
722    table: &roxlap_formats::material::MaterialTable,
723) -> (Box<[MaterialGpu; 256]>, bool) {
724    let mut out = Box::new(
725        [MaterialGpu {
726            alpha: 1.0,
727            mode: 0,
728        }; 256],
729    );
730    let mut any_translucent = false;
731    for (id, slot) in out.iter_mut().enumerate() {
732        let m = table.get(id as u8);
733        slot.alpha = f32::from(m.alpha) / 255.0;
734        slot.mode = u32::from(m.mode.as_u8());
735        if !m.is_opaque() {
736            any_translucent = true;
737        }
738    }
739    (out, any_translucent)
740}
741
742/// Build the per-grid camera storage buffer bound at `scene_dda.wgsl`
743/// binding 15 (read-only). One [`SceneDdaPerGridCamera`] per grid; the
744/// shader only indexes `0..grid_count`. An empty scene pads to one
745/// zeroed element (wgpu rejects a zero-sized storage binding). This
746/// replaces the old fixed `[…; 16]` uniform array, so a scene can hold
747/// any number of grids — the only ceiling is the device's storage size.
748fn upload_grid_cameras(device: &wgpu::Device, cams: &[SceneDdaPerGridCamera]) -> wgpu::Buffer {
749    use wgpu::util::DeviceExt;
750    let one = [SceneDdaPerGridCamera::zeroed()];
751    let src: &[SceneDdaPerGridCamera] = if cams.is_empty() { &one } else { cams };
752    device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
753        label: Some("roxlap-gpu scene_dda.grid_cameras"),
754        contents: bytemuck::cast_slice(src),
755        usage: wgpu::BufferUsages::STORAGE,
756    })
757}
758
759// The scene_dda bind group + layout wire occupancy pages 1..=3 at
760// bindings 12..=14 explicitly; keep that in lockstep with the page
761// count. Bump the bindings (here, in the WGSL, and in the bind
762// group) if MAX_OCC_PAGES changes.
763const _: () = assert!(scene::MAX_OCC_PAGES == 4);
764
765#[repr(C)]
766#[derive(Clone, Copy, Pod, Zeroable)]
767struct SceneDdaPerGridCamera {
768    pos: [f32; 3],
769    _pad0: f32,
770    right: [f32; 3],
771    _pad1: f32,
772    down: [f32; 3],
773    _pad2: f32,
774    forward: [f32; 3],
775    _pad3: f32,
776}
777
778impl SceneDdaPerGridCamera {
779    fn from_camera(c: &Camera) -> Self {
780        Self {
781            pos: c.position,
782            _pad0: 0.0,
783            right: c.right,
784            _pad1: 0.0,
785            down: c.down,
786            _pad2: 0.0,
787            forward: c.forward,
788            _pad3: 0.0,
789        }
790    }
791}
792
793#[repr(C)]
794#[derive(Clone, Copy, Pod, Zeroable)]
795struct SceneDdaUniform {
796    fov_y_rad: f32,
797    grid_count: u32,
798    max_outer_steps: u32,
799    _pad0: u32,
800    screen_size: [u32; 2],
801    _pad1: [u32; 2],
802    /// GPU.8 — `[r, g, b, fog_near]`. The `near` distance is packed
803    /// into the colour's alpha channel to keep std140 alignment
804    /// tidy (a bare `f32` after the `vec4` would force extra pads).
805    fog_color: [f32; 4],
806    fog_far: f32,
807    /// GPU.9 — `1` when the sprite pass is active (scene pass then
808    /// records `best_t` into the depth buffer), `0` otherwise.
809    write_depth: u32,
810    /// Occupancy paging: words per storage page (see
811    /// `scene::split_occupancy_pages`). Only consulted by the shader
812    /// when `occ_num_pages > 1`.
813    occ_page_words: u32,
814    /// Number of real occupancy pages (1 on multi-GiB GPUs → the
815    /// shader takes a branch-free single-page read).
816    occ_num_pages: u32,
817    /// GPU.11.1 — scene-grid LOD scan distance (world units). A chunk
818    /// entered at world-t `t` marches at mip
819    /// `floor(log2(max(t, msd) / msd))`, clamped to the grid's mip
820    /// count. `0` disables LOD (always mip-0).
821    mip_scan_dist: f32,
822    /// TV.6 — `1` if any terrain material is translucent (gates the
823    /// accumulate path; `0` ⇒ unchanged opaque first-hit march).
824    terrain_has_translucent: u32,
825    /// TV.6 — number of `(rgb, material_id)` entries in the terrain map.
826    terrain_map_count: u32,
827    _pad4: u32,
828    /// World camera used only to derive the per-pixel sky direction —
829    /// always valid, so a `grid_count == 0` (sprite-only / empty) scene
830    /// still paints a proper sky instead of a degenerate `(0,0,1)`
831    /// (whose `atan2(0,0)` sky lookup samples black).
832    sky_cam: SceneDdaPerGridCamera,
833    /// Per-face side-shade intensities (voxlap setsideshades), each the
834    /// u8 shade subtracted from a voxel's brightness byte at a hit.
835    /// `side_shades0 = (top, bot, left, right)`,
836    /// `side_shades1 = (up, down, _, _)`. All-zero = no shading.
837    side_shades0: [i32; 4],
838    side_shades1: [i32; 4],
839}
840
841#[repr(C)]
842#[derive(Clone, Copy, Pod, Zeroable)]
843struct GridDdaUniform {
844    camera_pos: [f32; 3],
845    _pad0: f32,
846    camera_right: [f32; 3],
847    _pad1: f32,
848    camera_down: [f32; 3],
849    _pad2: f32,
850    camera_forward: [f32; 3],
851    fov_y_rad: f32,
852    screen_size: [u32; 2],
853    vsid: u32,
854    max_outer_steps: u32,
855    chunks_dims: [u32; 3],
856    _pad3: u32,
857    origin_chunk: [i32; 3],
858    _pad4: u32,
859}
860
861#[repr(C)]
862#[derive(Clone, Copy, Pod, Zeroable)]
863struct ChunkDdaUniform {
864    camera_pos: [f32; 3],
865    _pad0: f32,
866    camera_right: [f32; 3],
867    _pad1: f32,
868    camera_down: [f32; 3],
869    _pad2: f32,
870    camera_forward: [f32; 3],
871    fov_y_rad: f32,
872    screen_size: [u32; 2],
873    vsid: u32,
874    max_scan_dist: u32,
875}
876
877impl GpuRenderer {
878    /// Stand up the device + surface + swapchain on `window`. Async
879    /// because `wgpu::Adapter`/`Device` requests are.
880    ///
881    /// `window` is any [`raw-window-handle`] provider (winit, SDL,
882    /// GLFW, …) wrapped in an `Arc`; `size` is its initial physical
883    /// framebuffer size in pixels — passed explicitly so the renderer
884    /// stays decoupled from any one windowing library's size API.
885    ///
886    /// [`raw-window-handle`]: raw_window_handle
887    ///
888    /// # Errors
889    /// Returns [`GpuInitError`] if surface creation, adapter
890    /// selection, or device request fails. Hosts treat any error as
891    /// "fall back to the CPU path".
892    pub async fn new<W>(
893        window: Arc<W>,
894        size: (u32, u32),
895        settings: GpuRendererSettings,
896    ) -> Result<Self, GpuInitError>
897    where
898        W: HasWindowHandle + HasDisplayHandle + Send + Sync + 'static,
899    {
900        let instance = wgpu::Instance::new(wgpu::InstanceDescriptor::new_without_display_handle());
901        let surface = instance.create_surface(window.clone())?;
902        let adapter = Self::request_adapter(&instance, Some(&surface), settings).await?;
903        let (device, queue) = Self::request_device(&adapter).await?;
904        Ok(Self::finish_init(
905            &adapter, device, queue, surface, size, settings,
906        ))
907    }
908
909    /// wasm/WebGPU: build the renderer against an HTML `canvas`. No
910    /// `Send + Sync` bound — wgpu's surface/device/queue are `!Send` on
911    /// the `+atomics` shared-memory wasm build, and the browser host is
912    /// single-threaded (`Rc<RefCell<…>>`). The native generic-`W` entry
913    /// (which carries the bound) isn't reachable on wasm.
914    ///
915    /// Probes for an adapter **before** `create_surface`: on wasm,
916    /// creating the surface calls `canvas.getContext("webgpu")`, which
917    /// permanently locks the canvas's context type. If we bound it and
918    /// then found no adapter, a CPU/WebGL2 fallback on the *same* canvas
919    /// (the facade clones the handle, but it's the same DOM element)
920    /// would fail with "no webgl2 context". Probing first leaves the
921    /// canvas pristine when WebGPU is unavailable.
922    ///
923    /// # Errors
924    /// See [`Self::new`].
925    #[cfg(target_arch = "wasm32")]
926    pub async fn new_from_canvas(
927        canvas: web_sys::HtmlCanvasElement,
928        size: (u32, u32),
929        settings: GpuRendererSettings,
930    ) -> Result<Self, GpuInitError> {
931        let instance = wgpu::Instance::new(wgpu::InstanceDescriptor::new_without_display_handle());
932        // Probe adapter AND device before binding the canvas — both
933        // `requestAdapter` and `requestDevice` can fail on wasm, and
934        // `create_surface` permanently locks the canvas to a WebGPU
935        // context. Creating the surface last keeps the canvas pristine
936        // for the CPU/WebGL2 fallback on any GPU-init failure.
937        let adapter = Self::request_adapter(&instance, None, settings).await?;
938        let (device, queue) = Self::request_device(&adapter).await?;
939        let surface = instance.create_surface(wgpu::SurfaceTarget::Canvas(canvas))?;
940        Ok(Self::finish_init(
941            &adapter, device, queue, surface, size, settings,
942        ))
943    }
944
945    /// Pick a GPU adapter at the settings' power preference. `None`
946    /// `compatible_surface` is used on the wasm canvas path so the probe
947    /// doesn't bind the canvas's context (see [`Self::new_from_canvas`]);
948    /// WebGPU exposes a single surface-independent adapter, so this is
949    /// safe there.
950    async fn request_adapter(
951        instance: &wgpu::Instance,
952        compatible_surface: Option<&wgpu::Surface<'static>>,
953        settings: GpuRendererSettings,
954    ) -> Result<wgpu::Adapter, GpuInitError> {
955        let power_preference = match settings.power_preference {
956            PowerPreference::Low => wgpu::PowerPreference::LowPower,
957            PowerPreference::High => wgpu::PowerPreference::HighPerformance,
958        };
959        instance
960            .request_adapter(&wgpu::RequestAdapterOptions {
961                power_preference,
962                compatible_surface,
963                force_fallback_adapter: false,
964            })
965            .await
966            .map_err(|_| GpuInitError::NoAdapter)
967    }
968
969    /// Request the device + queue from `adapter`. Pulled out of
970    /// [`Self::finish_init`] so the wasm canvas path can validate the
971    /// device **before** `create_surface` binds the canvas's WebGPU
972    /// context — if the device request fails (e.g. a browser that
973    /// rejects a wgpu-sent limit), the canvas stays pristine for the
974    /// CPU/WebGL2 fallback instead of being poisoned.
975    async fn request_device(
976        adapter: &wgpu::Adapter,
977    ) -> Result<(wgpu::Device, wgpu::Queue), GpuInitError> {
978        Ok(adapter
979            .request_device(&wgpu::DeviceDescriptor {
980                label: Some("roxlap-gpu device"),
981                required_features: wgpu::Features::empty(),
982                required_limits: pick_required_limits(&adapter.limits()),
983                experimental_features: wgpu::ExperimentalFeatures::disabled(),
984                memory_hints: wgpu::MemoryHints::default(),
985                trace: wgpu::Trace::Off,
986            })
987            .await?)
988    }
989
990    /// Shared swapchain → sky/sampler setup, run after the adapter +
991    /// device + surface exist (the surface comes from a window handle on
992    /// native, or an HTML canvas on wasm — created last on wasm so a
993    /// failed device request never touches the canvas).
994    fn finish_init(
995        adapter: &wgpu::Adapter,
996        device: wgpu::Device,
997        queue: wgpu::Queue,
998        surface: wgpu::Surface<'static>,
999        size: (u32, u32),
1000        settings: GpuRendererSettings,
1001    ) -> Self {
1002        let info = adapter.get_info();
1003        let adapter_info = format!(
1004            "{name} ({backend:?}, {device_type:?})",
1005            name = info.name,
1006            backend = info.backend,
1007            device_type = info.device_type,
1008        );
1009
1010        let caps = surface.get_capabilities(adapter);
1011        // Pick a NON-sRGB, 8-bit swapchain format. Voxlap colours are
1012        // already sRGB-encoded (the slab bytes are display-ready,
1013        // matching what the CPU softbuffer path writes straight to the
1014        // framebuffer with no conversion); an sRGB swapchain would
1015        // re-apply the gamma curve, washing the look out. We also
1016        // *prefer 8-bit BGRA/RGBA* over any other non-sRGB format: some
1017        // adapters (e.g. NVK) advertise a 16-bit-unorm format first,
1018        // and wgpu 29 gates `create_view` on 16-bit-norm formats behind
1019        // the `TEXTURE_FORMAT_16BIT_NORM` device feature (which we don't
1020        // enable, to stay WebGPU-portable). Falls back to the first
1021        // non-sRGB format, then `caps.formats[0]`.
1022        let surface_format = caps
1023            .formats
1024            .iter()
1025            .copied()
1026            .find(|f| {
1027                matches!(
1028                    f,
1029                    wgpu::TextureFormat::Bgra8Unorm | wgpu::TextureFormat::Rgba8Unorm
1030                )
1031            })
1032            .or_else(|| caps.formats.iter().copied().find(|f| !f.is_srgb()))
1033            .unwrap_or(caps.formats[0]);
1034        let present_mode = if settings.uncapped_present {
1035            pick_present_mode(&caps.present_modes)
1036        } else {
1037            wgpu::PresentMode::Fifo
1038        };
1039        // GPU.11.2 — surface the present mode: `Fifo` is vsync-capped
1040        // (FPS pinned to refresh rate → compute optimisations like the
1041        // mip LOD won't show up in the FPS counter). Mailbox/Immediate
1042        // are uncapped. Wayland under Mesa frequently offers only Fifo.
1043        eprintln!(
1044            "roxlap-gpu: present mode = {present_mode:?} (available: {:?})",
1045            caps.present_modes,
1046        );
1047        let (init_w, init_h) = size;
1048        let surface_config = wgpu::SurfaceConfiguration {
1049            usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
1050            format: surface_format,
1051            width: init_w.max(1),
1052            height: init_h.max(1),
1053            present_mode,
1054            alpha_mode: caps.alpha_modes[0],
1055            view_formats: vec![],
1056            desired_maximum_frame_latency: 2,
1057        };
1058        surface.configure(&device, &surface_config);
1059
1060        // GPU.8 default sky: a 1×1 mid-grey texture. Hosts replace
1061        // it via `set_sky_panorama` with a real equirectangular
1062        // panorama; the default stops the shader sampling
1063        // uninitialised memory before that happens.
1064        let default_sky_pixel = [0x80u8, 0x80, 0x80, 0xff];
1065        let (sky_texture, sky_view) = create_sky_texture(&device, 1, 1, &default_sky_pixel);
1066        queue.write_texture(
1067            wgpu::TexelCopyTextureInfo {
1068                texture: &sky_texture,
1069                mip_level: 0,
1070                origin: wgpu::Origin3d::ZERO,
1071                aspect: wgpu::TextureAspect::All,
1072            },
1073            &default_sky_pixel,
1074            wgpu::TexelCopyBufferLayout {
1075                offset: 0,
1076                bytes_per_row: Some(4),
1077                rows_per_image: Some(1),
1078            },
1079            wgpu::Extent3d {
1080                width: 1,
1081                height: 1,
1082                depth_or_array_layers: 1,
1083            },
1084        );
1085        let sky_sampler = device.create_sampler(&wgpu::SamplerDescriptor {
1086            label: Some("roxlap-gpu sky_sampler"),
1087            // Voxlap-convention panorama: u = elevation [0, 1]
1088            // (Repeat is a no-op since values don't go outside),
1089            // v = azimuth (wraps 360° — Repeat is required).
1090            address_mode_u: wgpu::AddressMode::Repeat,
1091            address_mode_v: wgpu::AddressMode::Repeat,
1092            address_mode_w: wgpu::AddressMode::ClampToEdge,
1093            mag_filter: wgpu::FilterMode::Linear,
1094            min_filter: wgpu::FilterMode::Linear,
1095            mipmap_filter: wgpu::MipmapFilterMode::Nearest,
1096            ..Default::default()
1097        });
1098
1099        Self {
1100            surface,
1101            surface_config,
1102            device,
1103            queue,
1104            adapter_info,
1105            clear_colour: settings.clear_colour,
1106            frame_count: 0,
1107            flip_x: false,
1108            chunk_dda: None,
1109            grid_dda: None,
1110            scene_dda: None,
1111            scene_materials: Box::new(
1112                [MaterialGpu {
1113                    alpha: 1.0,
1114                    mode: 0,
1115                }; 256],
1116            ),
1117            scene_terrain_map: Vec::new(),
1118            scene_terrain_translucent: false,
1119            scene_depth_valid: false,
1120            sky_texture,
1121            sky_view,
1122            sky_sampler,
1123            // Fog disabled by default — voxlap's CPU rasterizer
1124            // also runs without fog in the scene-demo, so matching
1125            // it means no GPU fog out of the box. Hosts can opt in
1126            // via `set_fog` (e.g. for atmospheric far-LOD masking).
1127            fog_color: [0.66, 0.74, 0.88],
1128            fog_near: 0.0,
1129            fog_far: 1.0e30,
1130            sprite_registry: None,
1131            sprite_model_dda: None,
1132            sprite_materials: Box::new(
1133                [MaterialGpu {
1134                    alpha: 1.0,
1135                    mode: 0,
1136                }; 256],
1137            ),
1138            sprite_has_translucent: false,
1139            // GPU.10.4 — default LOD threshold: step to a coarser mip
1140            // once a voxel projects below 4 px. Empirically the best
1141            // quality/cost tradeoff; the host can override.
1142            sprite_lod_px: 4.0,
1143            // GPU.11.1 — matches the CPU demo's mip_scan_dist=64.
1144            scene_mip_scan_dist: 64.0,
1145            scene_side_shades: [[0; 4]; 2],
1146            last_fov_y_rad: 0.0,
1147            pending_frame: None,
1148            line_resources: None,
1149            line_vbuf: None,
1150            line_vbuf_cap: 0,
1151            image_resources: None,
1152            image_vbuf: None,
1153            image_vbuf_cap: 0,
1154            images: Vec::new(),
1155            #[cfg(feature = "hud")]
1156            egui_renderer: None,
1157        }
1158    }
1159
1160    /// Synchronous wrapper for hosts that don't have an async
1161    /// runtime. Internally `pollster::block_on`s [`Self::new`].
1162    ///
1163    /// # Errors
1164    /// See [`Self::new`].
1165    #[cfg(not(target_arch = "wasm32"))]
1166    pub fn new_blocking<W>(
1167        window: Arc<W>,
1168        size: (u32, u32),
1169        settings: GpuRendererSettings,
1170    ) -> Result<Self, GpuInitError>
1171    where
1172        W: HasWindowHandle + HasDisplayHandle + Send + Sync + 'static,
1173    {
1174        pollster::block_on(Self::new(window, size, settings))
1175    }
1176
1177    /// Human-readable adapter description — name + backend +
1178    /// device type. The demo host prints this in the title bar.
1179    pub fn adapter_info(&self) -> &str {
1180        &self.adapter_info
1181    }
1182
1183    /// Borrow the underlying wgpu device — hosts use this to build
1184    /// chunk uploads (`GpuChunkResident::upload(gpu.device(), …)`).
1185    pub fn device(&self) -> &wgpu::Device {
1186        &self.device
1187    }
1188
1189    /// Borrow the wgpu queue — hosts use this for read-back paths
1190    /// (`GpuChunkResident::read_voxel_blocking(gpu.device(), gpu.queue(), …)`).
1191    pub fn queue(&self) -> &wgpu::Queue {
1192        &self.queue
1193    }
1194
1195    /// GPU.8 — upload an equirectangular panorama as the scene's
1196    /// sky texture. `rgba` is row-major, `width × height` pixels,
1197    /// 4 bytes per pixel (R, G, B, A). The shader samples it with
1198    /// `u = atan2(dir.x, dir.y) / (2π) + 0.5` (azimuth) and
1199    /// `v = acos(-dir.z) / π` (elevation), matching standard
1200    /// equirectangular layout (top of image = zenith for voxlap's
1201    /// `+z = down` basis).
1202    /// Mirror the marched scene (and its line/image overlays) horizontally
1203    /// on present, leaving the egui overlay upright. See [`Self::flip_x`].
1204    pub fn set_flip_x(&mut self, flip: bool) {
1205        self.flip_x = flip;
1206    }
1207
1208    ///
1209    /// # Panics
1210    /// If `rgba.len() != (width * height * 4) as usize`.
1211    pub fn set_sky_panorama(&mut self, rgba: &[u8], width: u32, height: u32) {
1212        assert_eq!(
1213            rgba.len(),
1214            (width as usize) * (height as usize) * 4,
1215            "set_sky_panorama: expected w*h*4 bytes, got {}",
1216            rgba.len(),
1217        );
1218        let (tex, view) = create_sky_texture(&self.device, width, height, rgba);
1219        // Upload pixel data via `queue.write_texture` so we don't
1220        // have to map the buffer manually.
1221        self.queue.write_texture(
1222            wgpu::TexelCopyTextureInfo {
1223                texture: &tex,
1224                mip_level: 0,
1225                origin: wgpu::Origin3d::ZERO,
1226                aspect: wgpu::TextureAspect::All,
1227            },
1228            rgba,
1229            wgpu::TexelCopyBufferLayout {
1230                offset: 0,
1231                bytes_per_row: Some(width * 4),
1232                rows_per_image: Some(height),
1233            },
1234            wgpu::Extent3d {
1235                width,
1236                height,
1237                depth_or_array_layers: 1,
1238            },
1239        );
1240        self.sky_texture = tex;
1241        self.sky_view = view;
1242    }
1243
1244    /// GPU.8 — set the fog blend. `color` is per-channel [0, 1];
1245    /// `near`/`far` are world-space ray distances in voxel units.
1246    /// Hits with `t < near` show their full colour; hits with
1247    /// `t > far` show `color` exclusively; in between is a
1248    /// smoothstep blend.
1249    pub fn set_fog(&mut self, color: [f32; 3], near: f32, far: f32) {
1250        self.fog_color = color;
1251        self.fog_near = near;
1252        self.fog_far = far.max(near + 1.0);
1253    }
1254
1255    /// Re-configure the swapchain to a new physical size. Call from
1256    /// `WindowEvent::Resized`. Drops the chunk-DDA storage texture
1257    /// so [`Self::render_chunk`] rebuilds it at the new size.
1258    pub fn resize(&mut self, width: u32, height: u32) {
1259        if width == 0 || height == 0 {
1260            return;
1261        }
1262        self.surface_config.width = width;
1263        self.surface_config.height = height;
1264        self.surface.configure(&self.device, &self.surface_config);
1265        self.chunk_dda = None;
1266        self.grid_dda = None;
1267        self.scene_dda = None;
1268    }
1269
1270    /// Acquire the next swapchain frame, or `None` to skip this frame.
1271    /// wgpu 29's `get_current_texture` returns a
1272    /// [`wgpu::CurrentSurfaceTexture`] status enum (was
1273    /// `Result<_, SurfaceError>`): an outdated/lost surface reconfigures
1274    /// and skips, transient statuses just skip.
1275    fn acquire_frame(&self) -> Option<wgpu::SurfaceTexture> {
1276        use wgpu::CurrentSurfaceTexture as C;
1277        match self.surface.get_current_texture() {
1278            C::Success(t) | C::Suboptimal(t) => Some(t),
1279            C::Outdated | C::Lost => {
1280                self.surface.configure(&self.device, &self.surface_config);
1281                None
1282            }
1283            C::Timeout | C::Occluded | C::Validation => None,
1284        }
1285    }
1286
1287    /// GPU.1 render: single render pass clearing the swapchain to a
1288    /// slowly drifting colour, then presenting. Voxels arrive in
1289    /// GPU.3+.
1290    pub fn render(&mut self) {
1291        let Some(surf_tex) = self.acquire_frame() else {
1292            return;
1293        };
1294        let view = surf_tex
1295            .texture
1296            .create_view(&wgpu::TextureViewDescriptor::default());
1297
1298        // Slow colour drift so the user can tell the GPU path is
1299        // actually presenting frames vs. e.g. a frozen window.
1300        // Wrap at 2π/0.005 frames (~1257) so the cast stays exact.
1301        let phase = f64::from(self.frame_count % 1257) * 0.005;
1302        let [r, g, b] = self.clear_colour;
1303        let drift = (phase.sin() * 0.04 + 0.04).clamp(0.0, 0.1);
1304        let clear = wgpu::Color {
1305            r: (r + drift).clamp(0.0, 1.0),
1306            g: (g + drift * 0.5).clamp(0.0, 1.0),
1307            b: (b + drift * 0.25).clamp(0.0, 1.0),
1308            a: 1.0,
1309        };
1310
1311        let mut encoder = self
1312            .device
1313            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
1314                label: Some("roxlap-gpu encoder"),
1315            });
1316        {
1317            let _rp = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
1318                label: Some("roxlap-gpu clear"),
1319                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
1320                    view: &view,
1321                    depth_slice: None,
1322                    resolve_target: None,
1323                    ops: wgpu::Operations {
1324                        load: wgpu::LoadOp::Clear(clear),
1325                        store: wgpu::StoreOp::Store,
1326                    },
1327                })],
1328                depth_stencil_attachment: None,
1329                timestamp_writes: None,
1330                occlusion_query_set: None,
1331                multiview_mask: None,
1332            });
1333        }
1334        self.queue.submit(std::iter::once(encoder.finish()));
1335        surf_tex.present();
1336        self.frame_count = self.frame_count.wrapping_add(1);
1337    }
1338
1339    /// GPU.3 single-chunk render. Dispatches `chunk_dda.wgsl`
1340    /// against `resident`'s storage buffers, then blits the
1341    /// low-res storage texture to the swapchain. `camera.position`
1342    /// is in **chunk-local** voxel units (host translates from
1343    /// world coords). `max_scan_dist` caps the per-pixel DDA loop —
1344    /// scene-demo wires `+` / `-` through this each frame.
1345    ///
1346    /// # Panics
1347    /// Internally `expect`s the chunk-DDA resources to be built —
1348    /// they are constructed at the top of this function if missing.
1349    /// Cannot fire in normal control flow.
1350    pub fn render_chunk(
1351        &mut self,
1352        resident: &GpuChunkResident,
1353        camera: &Camera,
1354        max_scan_dist: u32,
1355    ) {
1356        let Some(surf_tex) = self.acquire_frame() else {
1357            return;
1358        };
1359        let surf_view = surf_tex
1360            .texture
1361            .create_view(&wgpu::TextureViewDescriptor::default());
1362
1363        let surface_w = self.surface_config.width;
1364        let surface_h = self.surface_config.height;
1365        let surface_format = self.surface_config.format;
1366
1367        // Lazy-build chunk-DDA resources; rebuild when the swapchain
1368        // grew or shrank.
1369        let needs_build = match &self.chunk_dda {
1370            Some(r) => r.storage_size != (surface_w, surface_h),
1371            None => true,
1372        };
1373        if needs_build {
1374            self.chunk_dda = Some(self.build_chunk_dda(surface_w, surface_h, surface_format));
1375        }
1376        let dda = self.chunk_dda.as_ref().expect("just built");
1377
1378        // Update uniforms.
1379        let uniform = ChunkDdaUniform {
1380            camera_pos: camera.position,
1381            _pad0: 0.0,
1382            camera_right: camera.right,
1383            _pad1: 0.0,
1384            camera_down: camera.down,
1385            _pad2: 0.0,
1386            camera_forward: camera.forward,
1387            fov_y_rad: camera.fov_y_rad,
1388            screen_size: [surface_w, surface_h],
1389            vsid: resident.vsid,
1390            max_scan_dist,
1391        };
1392        self.queue
1393            .write_buffer(&dda.uniform_buf, 0, bytemuck::bytes_of(&uniform));
1394
1395        // Per-frame DDA bind group — references the chunk's buffers
1396        // so we rebuild every frame (the resident can change between
1397        // calls).
1398        let dda_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1399            label: Some("roxlap-gpu chunk_dda.bg"),
1400            layout: &dda.bgl_dda,
1401            entries: &[
1402                wgpu::BindGroupEntry {
1403                    binding: 0,
1404                    resource: dda.uniform_buf.as_entire_binding(),
1405                },
1406                wgpu::BindGroupEntry {
1407                    binding: 1,
1408                    resource: resident.occupancy.as_entire_binding(),
1409                },
1410                wgpu::BindGroupEntry {
1411                    binding: 2,
1412                    resource: resident.color_offsets.as_entire_binding(),
1413                },
1414                wgpu::BindGroupEntry {
1415                    binding: 3,
1416                    resource: resident.colors.as_entire_binding(),
1417                },
1418                wgpu::BindGroupEntry {
1419                    binding: 4,
1420                    resource: wgpu::BindingResource::TextureView(&dda.storage_view),
1421                },
1422            ],
1423        });
1424
1425        let mut encoder = self
1426            .device
1427            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
1428                label: Some("roxlap-gpu chunk encoder"),
1429            });
1430        {
1431            let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
1432                label: Some("roxlap-gpu chunk_dda compute"),
1433                timestamp_writes: None,
1434            });
1435            cpass.set_pipeline(&dda.pipeline_dda);
1436            cpass.set_bind_group(0, &dda_bg, &[]);
1437            cpass.dispatch_workgroups(surface_w.div_ceil(8), surface_h.div_ceil(8), 1);
1438        }
1439        {
1440            let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
1441                label: Some("roxlap-gpu chunk_dda blit"),
1442                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
1443                    view: &surf_view,
1444                    depth_slice: None,
1445                    resolve_target: None,
1446                    ops: wgpu::Operations {
1447                        load: wgpu::LoadOp::Clear(wgpu::Color::BLACK),
1448                        store: wgpu::StoreOp::Store,
1449                    },
1450                })],
1451                depth_stencil_attachment: None,
1452                timestamp_writes: None,
1453                occlusion_query_set: None,
1454                multiview_mask: None,
1455            });
1456            rpass.set_pipeline(&dda.pipeline_blit);
1457            rpass.set_bind_group(0, &dda.blit_bg, &[]);
1458            rpass.draw(0..3, 0..1);
1459        }
1460        self.queue.submit(std::iter::once(encoder.finish()));
1461        surf_tex.present();
1462        self.frame_count = self.frame_count.wrapping_add(1);
1463    }
1464
1465    fn build_chunk_dda(
1466        &self,
1467        width: u32,
1468        height: u32,
1469        surface_format: wgpu::TextureFormat,
1470    ) -> ChunkDdaResources {
1471        let storage_tex = self.device.create_texture(&wgpu::TextureDescriptor {
1472            label: Some("roxlap-gpu chunk_dda.storage"),
1473            size: wgpu::Extent3d {
1474                width,
1475                height,
1476                depth_or_array_layers: 1,
1477            },
1478            mip_level_count: 1,
1479            sample_count: 1,
1480            dimension: wgpu::TextureDimension::D2,
1481            format: wgpu::TextureFormat::Rgba8Unorm,
1482            usage: wgpu::TextureUsages::STORAGE_BINDING | wgpu::TextureUsages::TEXTURE_BINDING,
1483            view_formats: &[],
1484        });
1485        let storage_view = storage_tex.create_view(&wgpu::TextureViewDescriptor::default());
1486
1487        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
1488            label: Some("roxlap-gpu chunk_dda.uniform"),
1489            size: std::mem::size_of::<ChunkDdaUniform>() as u64,
1490            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
1491            mapped_at_creation: false,
1492        });
1493
1494        let dda_shader = self
1495            .device
1496            .create_shader_module(wgpu::ShaderModuleDescriptor {
1497                label: Some("chunk_dda.wgsl"),
1498                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/chunk_dda.wgsl").into()),
1499            });
1500        let bgl_dda = self
1501            .device
1502            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
1503                label: Some("roxlap-gpu chunk_dda.bgl"),
1504                entries: &[
1505                    bgl_uniform_entry(0),
1506                    bgl_storage_entry(1, true),
1507                    bgl_storage_entry(2, true),
1508                    bgl_storage_entry(3, true),
1509                    wgpu::BindGroupLayoutEntry {
1510                        binding: 4,
1511                        visibility: wgpu::ShaderStages::COMPUTE,
1512                        ty: wgpu::BindingType::StorageTexture {
1513                            access: wgpu::StorageTextureAccess::WriteOnly,
1514                            format: wgpu::TextureFormat::Rgba8Unorm,
1515                            view_dimension: wgpu::TextureViewDimension::D2,
1516                        },
1517                        count: None,
1518                    },
1519                ],
1520            });
1521        let dda_pl = self
1522            .device
1523            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
1524                label: Some("roxlap-gpu chunk_dda.layout"),
1525                bind_group_layouts: &[Some(&bgl_dda)],
1526                immediate_size: 0,
1527            });
1528        let pipeline_dda = self
1529            .device
1530            .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
1531                label: Some("roxlap-gpu chunk_dda.pipeline"),
1532                layout: Some(&dda_pl),
1533                module: &dda_shader,
1534                entry_point: Some("render_chunk"),
1535                compilation_options: wgpu::PipelineCompilationOptions::default(),
1536                cache: None,
1537            });
1538
1539        // Fullscreen-triangle blit upscales the storage texture into
1540        // the swapchain. Nearest filter keeps the retro pixel look.
1541        let blit_shader = self
1542            .device
1543            .create_shader_module(wgpu::ShaderModuleDescriptor {
1544                label: Some("blit.wgsl"),
1545                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/blit.wgsl").into()),
1546            });
1547        let bgl_blit = self
1548            .device
1549            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
1550                label: Some("roxlap-gpu chunk_dda.blit_bgl"),
1551                entries: &[
1552                    wgpu::BindGroupLayoutEntry {
1553                        binding: 0,
1554                        visibility: wgpu::ShaderStages::FRAGMENT,
1555                        ty: wgpu::BindingType::Texture {
1556                            sample_type: wgpu::TextureSampleType::Float { filterable: false },
1557                            view_dimension: wgpu::TextureViewDimension::D2,
1558                            multisampled: false,
1559                        },
1560                        count: None,
1561                    },
1562                    wgpu::BindGroupLayoutEntry {
1563                        binding: 1,
1564                        visibility: wgpu::ShaderStages::FRAGMENT,
1565                        ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::NonFiltering),
1566                        count: None,
1567                    },
1568                ],
1569            });
1570        let blit_pl = self
1571            .device
1572            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
1573                label: Some("roxlap-gpu chunk_dda.blit_layout"),
1574                bind_group_layouts: &[Some(&bgl_blit)],
1575                immediate_size: 0,
1576            });
1577        let pipeline_blit = self
1578            .device
1579            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
1580                label: Some("roxlap-gpu chunk_dda.blit_pipeline"),
1581                layout: Some(&blit_pl),
1582                vertex: wgpu::VertexState {
1583                    module: &blit_shader,
1584                    entry_point: Some("vs_main"),
1585                    compilation_options: wgpu::PipelineCompilationOptions::default(),
1586                    buffers: &[],
1587                },
1588                fragment: Some(wgpu::FragmentState {
1589                    module: &blit_shader,
1590                    entry_point: Some("fs_main"),
1591                    compilation_options: wgpu::PipelineCompilationOptions::default(),
1592                    targets: &[Some(wgpu::ColorTargetState {
1593                        format: surface_format,
1594                        blend: None,
1595                        write_mask: wgpu::ColorWrites::ALL,
1596                    })],
1597                }),
1598                primitive: wgpu::PrimitiveState::default(),
1599                depth_stencil: None,
1600                multisample: wgpu::MultisampleState::default(),
1601                multiview_mask: None,
1602                cache: None,
1603            });
1604        let sampler = self.device.create_sampler(&wgpu::SamplerDescriptor {
1605            label: Some("roxlap-gpu chunk_dda.blit_sampler"),
1606            address_mode_u: wgpu::AddressMode::ClampToEdge,
1607            address_mode_v: wgpu::AddressMode::ClampToEdge,
1608            address_mode_w: wgpu::AddressMode::ClampToEdge,
1609            mag_filter: wgpu::FilterMode::Nearest,
1610            min_filter: wgpu::FilterMode::Nearest,
1611            mipmap_filter: wgpu::MipmapFilterMode::Nearest,
1612            ..Default::default()
1613        });
1614        let blit_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1615            label: Some("roxlap-gpu chunk_dda.blit_bg"),
1616            layout: &bgl_blit,
1617            entries: &[
1618                wgpu::BindGroupEntry {
1619                    binding: 0,
1620                    resource: wgpu::BindingResource::TextureView(&storage_view),
1621                },
1622                wgpu::BindGroupEntry {
1623                    binding: 1,
1624                    resource: wgpu::BindingResource::Sampler(&sampler),
1625                },
1626            ],
1627        });
1628
1629        ChunkDdaResources {
1630            storage_size: (width, height),
1631            storage_view,
1632            uniform_buf,
1633            bgl_dda,
1634            pipeline_dda,
1635            blit_bg,
1636            pipeline_blit,
1637            _sampler: sampler,
1638        }
1639    }
1640
1641    /// GPU.4 render — outer DDA over chunk indices + inner DDA into
1642    /// non-empty chunks. `camera.position` is in **grid-local**
1643    /// voxel units. `max_outer_steps` caps how many chunks the
1644    /// outer DDA may traverse per ray (scene-demo wires `+ / -`
1645    /// through this).
1646    ///
1647    /// # Panics
1648    /// Internally `expect`s the grid-DDA resources to be built;
1649    /// they are constructed at the top of this function if missing.
1650    pub fn render_grid(&mut self, grid: &GpuGridResident, camera: &Camera, max_outer_steps: u32) {
1651        let Some(surf_tex) = self.acquire_frame() else {
1652            return;
1653        };
1654        let surf_view = surf_tex
1655            .texture
1656            .create_view(&wgpu::TextureViewDescriptor::default());
1657
1658        let surface_w = self.surface_config.width;
1659        let surface_h = self.surface_config.height;
1660        let surface_format = self.surface_config.format;
1661
1662        let needs_build = match &self.grid_dda {
1663            Some(r) => r.storage_size != (surface_w, surface_h),
1664            None => true,
1665        };
1666        if needs_build {
1667            self.grid_dda = Some(self.build_grid_dda(surface_w, surface_h, surface_format));
1668        }
1669        let dda = self.grid_dda.as_ref().expect("just built");
1670
1671        let uniform = GridDdaUniform {
1672            camera_pos: camera.position,
1673            _pad0: 0.0,
1674            camera_right: camera.right,
1675            _pad1: 0.0,
1676            camera_down: camera.down,
1677            _pad2: 0.0,
1678            camera_forward: camera.forward,
1679            fov_y_rad: camera.fov_y_rad,
1680            screen_size: [surface_w, surface_h],
1681            vsid: grid.vsid,
1682            max_outer_steps,
1683            chunks_dims: grid.chunks_dims,
1684            _pad3: 0,
1685            origin_chunk: grid.origin_chunk,
1686            _pad4: 0,
1687        };
1688        self.queue
1689            .write_buffer(&dda.uniform_buf, 0, bytemuck::bytes_of(&uniform));
1690
1691        let dda_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1692            label: Some("roxlap-gpu grid_dda.bg"),
1693            layout: &dda.bgl_dda,
1694            entries: &[
1695                wgpu::BindGroupEntry {
1696                    binding: 0,
1697                    resource: dda.uniform_buf.as_entire_binding(),
1698                },
1699                wgpu::BindGroupEntry {
1700                    binding: 1,
1701                    resource: grid.occupancy.as_entire_binding(),
1702                },
1703                wgpu::BindGroupEntry {
1704                    binding: 2,
1705                    resource: grid.color_offsets.as_entire_binding(),
1706                },
1707                wgpu::BindGroupEntry {
1708                    binding: 3,
1709                    resource: grid.colors.as_entire_binding(),
1710                },
1711                wgpu::BindGroupEntry {
1712                    binding: 4,
1713                    resource: grid.chunk_colors_base.as_entire_binding(),
1714                },
1715                wgpu::BindGroupEntry {
1716                    binding: 5,
1717                    resource: grid.chunk_occupancy.as_entire_binding(),
1718                },
1719                wgpu::BindGroupEntry {
1720                    binding: 6,
1721                    resource: wgpu::BindingResource::TextureView(&dda.storage_view),
1722                },
1723            ],
1724        });
1725
1726        let mut encoder = self
1727            .device
1728            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
1729                label: Some("roxlap-gpu grid encoder"),
1730            });
1731        {
1732            let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
1733                label: Some("roxlap-gpu grid_dda compute"),
1734                timestamp_writes: None,
1735            });
1736            cpass.set_pipeline(&dda.pipeline_dda);
1737            cpass.set_bind_group(0, &dda_bg, &[]);
1738            cpass.dispatch_workgroups(surface_w.div_ceil(8), surface_h.div_ceil(8), 1);
1739        }
1740        {
1741            let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
1742                label: Some("roxlap-gpu grid_dda blit"),
1743                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
1744                    view: &surf_view,
1745                    depth_slice: None,
1746                    resolve_target: None,
1747                    ops: wgpu::Operations {
1748                        load: wgpu::LoadOp::Clear(wgpu::Color::BLACK),
1749                        store: wgpu::StoreOp::Store,
1750                    },
1751                })],
1752                depth_stencil_attachment: None,
1753                timestamp_writes: None,
1754                occlusion_query_set: None,
1755                multiview_mask: None,
1756            });
1757            rpass.set_pipeline(&dda.pipeline_blit);
1758            rpass.set_bind_group(0, &dda.blit_bg, &[]);
1759            rpass.draw(0..3, 0..1);
1760        }
1761        self.queue.submit(std::iter::once(encoder.finish()));
1762        surf_tex.present();
1763        self.frame_count = self.frame_count.wrapping_add(1);
1764    }
1765
1766    fn build_grid_dda(
1767        &self,
1768        width: u32,
1769        height: u32,
1770        surface_format: wgpu::TextureFormat,
1771    ) -> GridDdaResources {
1772        let storage_tex = self.device.create_texture(&wgpu::TextureDescriptor {
1773            label: Some("roxlap-gpu grid_dda.storage"),
1774            size: wgpu::Extent3d {
1775                width,
1776                height,
1777                depth_or_array_layers: 1,
1778            },
1779            mip_level_count: 1,
1780            sample_count: 1,
1781            dimension: wgpu::TextureDimension::D2,
1782            format: wgpu::TextureFormat::Rgba8Unorm,
1783            usage: wgpu::TextureUsages::STORAGE_BINDING | wgpu::TextureUsages::TEXTURE_BINDING,
1784            view_formats: &[],
1785        });
1786        let storage_view = storage_tex.create_view(&wgpu::TextureViewDescriptor::default());
1787
1788        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
1789            label: Some("roxlap-gpu grid_dda.uniform"),
1790            size: std::mem::size_of::<GridDdaUniform>() as u64,
1791            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
1792            mapped_at_creation: false,
1793        });
1794
1795        let dda_shader = self
1796            .device
1797            .create_shader_module(wgpu::ShaderModuleDescriptor {
1798                label: Some("grid_dda.wgsl"),
1799                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/grid_dda.wgsl").into()),
1800            });
1801        let bgl_dda = self
1802            .device
1803            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
1804                label: Some("roxlap-gpu grid_dda.bgl"),
1805                entries: &[
1806                    bgl_uniform_entry(0),
1807                    bgl_storage_entry(1, true),
1808                    bgl_storage_entry(2, true),
1809                    bgl_storage_entry(3, true),
1810                    bgl_storage_entry(4, true),
1811                    bgl_storage_entry(5, true),
1812                    wgpu::BindGroupLayoutEntry {
1813                        binding: 6,
1814                        visibility: wgpu::ShaderStages::COMPUTE,
1815                        ty: wgpu::BindingType::StorageTexture {
1816                            access: wgpu::StorageTextureAccess::WriteOnly,
1817                            format: wgpu::TextureFormat::Rgba8Unorm,
1818                            view_dimension: wgpu::TextureViewDimension::D2,
1819                        },
1820                        count: None,
1821                    },
1822                ],
1823            });
1824        let dda_pl = self
1825            .device
1826            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
1827                label: Some("roxlap-gpu grid_dda.layout"),
1828                bind_group_layouts: &[Some(&bgl_dda)],
1829                immediate_size: 0,
1830            });
1831        let pipeline_dda = self
1832            .device
1833            .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
1834                label: Some("roxlap-gpu grid_dda.pipeline"),
1835                layout: Some(&dda_pl),
1836                module: &dda_shader,
1837                entry_point: Some("render_grid"),
1838                compilation_options: wgpu::PipelineCompilationOptions::default(),
1839                cache: None,
1840            });
1841
1842        let blit_shader = self
1843            .device
1844            .create_shader_module(wgpu::ShaderModuleDescriptor {
1845                label: Some("blit.wgsl"),
1846                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/blit.wgsl").into()),
1847            });
1848        let bgl_blit = self
1849            .device
1850            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
1851                label: Some("roxlap-gpu grid_dda.blit_bgl"),
1852                entries: &[
1853                    wgpu::BindGroupLayoutEntry {
1854                        binding: 0,
1855                        visibility: wgpu::ShaderStages::FRAGMENT,
1856                        ty: wgpu::BindingType::Texture {
1857                            sample_type: wgpu::TextureSampleType::Float { filterable: false },
1858                            view_dimension: wgpu::TextureViewDimension::D2,
1859                            multisampled: false,
1860                        },
1861                        count: None,
1862                    },
1863                    wgpu::BindGroupLayoutEntry {
1864                        binding: 1,
1865                        visibility: wgpu::ShaderStages::FRAGMENT,
1866                        ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::NonFiltering),
1867                        count: None,
1868                    },
1869                ],
1870            });
1871        let blit_pl = self
1872            .device
1873            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
1874                label: Some("roxlap-gpu grid_dda.blit_layout"),
1875                bind_group_layouts: &[Some(&bgl_blit)],
1876                immediate_size: 0,
1877            });
1878        let pipeline_blit = self
1879            .device
1880            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
1881                label: Some("roxlap-gpu grid_dda.blit_pipeline"),
1882                layout: Some(&blit_pl),
1883                vertex: wgpu::VertexState {
1884                    module: &blit_shader,
1885                    entry_point: Some("vs_main"),
1886                    compilation_options: wgpu::PipelineCompilationOptions::default(),
1887                    buffers: &[],
1888                },
1889                fragment: Some(wgpu::FragmentState {
1890                    module: &blit_shader,
1891                    entry_point: Some("fs_main"),
1892                    compilation_options: wgpu::PipelineCompilationOptions::default(),
1893                    targets: &[Some(wgpu::ColorTargetState {
1894                        format: surface_format,
1895                        blend: None,
1896                        write_mask: wgpu::ColorWrites::ALL,
1897                    })],
1898                }),
1899                primitive: wgpu::PrimitiveState::default(),
1900                depth_stencil: None,
1901                multisample: wgpu::MultisampleState::default(),
1902                multiview_mask: None,
1903                cache: None,
1904            });
1905        let sampler = self.device.create_sampler(&wgpu::SamplerDescriptor {
1906            label: Some("roxlap-gpu grid_dda.blit_sampler"),
1907            address_mode_u: wgpu::AddressMode::ClampToEdge,
1908            address_mode_v: wgpu::AddressMode::ClampToEdge,
1909            address_mode_w: wgpu::AddressMode::ClampToEdge,
1910            mag_filter: wgpu::FilterMode::Nearest,
1911            min_filter: wgpu::FilterMode::Nearest,
1912            mipmap_filter: wgpu::MipmapFilterMode::Nearest,
1913            ..Default::default()
1914        });
1915        let blit_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1916            label: Some("roxlap-gpu grid_dda.blit_bg"),
1917            layout: &bgl_blit,
1918            entries: &[
1919                wgpu::BindGroupEntry {
1920                    binding: 0,
1921                    resource: wgpu::BindingResource::TextureView(&storage_view),
1922                },
1923                wgpu::BindGroupEntry {
1924                    binding: 1,
1925                    resource: wgpu::BindingResource::Sampler(&sampler),
1926                },
1927            ],
1928        });
1929
1930        GridDdaResources {
1931            storage_size: (width, height),
1932            storage_view,
1933            uniform_buf,
1934            bgl_dda,
1935            pipeline_dda,
1936            blit_bg,
1937            pipeline_blit,
1938            _sampler: sampler,
1939        }
1940    }
1941
1942    /// GPU.5 render — multi-grid scene marcher. `cameras[i]` is the
1943    /// world camera transformed into grid `i`'s local frame
1944    /// (caller-supplied; see scene-demo's `redraw_gpu` for the
1945    /// glam-based transform). `fov_y_rad` is the shared vertical
1946    /// FOV; `max_outer_steps` caps per-ray chunk-DDA work for each
1947    /// grid.
1948    ///
1949    /// # Panics
1950    /// If `cameras.len() != scene.grid_count`.
1951    /// `cameras[i]` is grid `i`'s world camera transformed into that
1952    /// grid's local frame (the grid marcher works in grid-local space).
1953    /// `sprite_camera` is the **world** camera: instanced sprites carry
1954    /// world-space positions/transforms, so they must project through
1955    /// the untransformed world camera — not `cameras[0]`, which is only
1956    /// the world camera when grid 0 is at identity.
1957    pub fn render_scene(
1958        &mut self,
1959        scene: &GpuSceneResident,
1960        cameras: &[Camera],
1961        sprite_camera: &Camera,
1962        fov_y_rad: f32,
1963        max_outer_steps: u32,
1964    ) {
1965        assert_eq!(
1966            cameras.len(),
1967            scene.grid_count as usize,
1968            "render_scene: {} cameras supplied, scene has {} grids",
1969            cameras.len(),
1970            scene.grid_count,
1971        );
1972        self.last_fov_y_rad = fov_y_rad; // cached for pixel_ray (picking)
1973
1974        // Deferred present: drop any frame a prior render left
1975        // un-presented (a host that skipped present/paint_egui) so we
1976        // never hold two outstanding swapchain textures.
1977        self.pending_frame = None;
1978        let Some(surf_tex) = self.acquire_frame() else {
1979            return;
1980        };
1981        let surf_view = surf_tex
1982            .texture
1983            .create_view(&wgpu::TextureViewDescriptor::default());
1984
1985        let surface_w = self.surface_config.width;
1986        let surface_h = self.surface_config.height;
1987        let surface_format = self.surface_config.format;
1988
1989        let needs_build = match &self.scene_dda {
1990            Some(r) => r.storage_size != (surface_w, surface_h),
1991            None => true,
1992        };
1993        if needs_build {
1994            self.scene_dda = Some(self.build_scene_dda(surface_w, surface_h, surface_format));
1995        }
1996        // GPU.9 — materialise the sprite pipeline the first frame
1997        // sprites are present (before the immutable `dda` borrow).
1998        // GPU.10.0 — build the model-DDA pipeline the first frame a
1999        // sprite registry is present.
2000        if self.sprite_registry.is_some() && self.sprite_model_dda.is_none() {
2001            self.sprite_model_dda = Some(self.build_sprite_model_dda());
2002        }
2003        // GPU.10.3 — frustum-cull + screen-tile-bin the sprite instances
2004        // (needs &mut self for buffer growth, so before the immutable
2005        // scene_dda borrow). Captures (visible_count, tiles_x); None when
2006        // nothing is in view.
2007        let sprite_pass: Option<(u32, u32)> = if let Some(reg) = self.sprite_registry.as_mut() {
2008            if reg.instance_capacity > 0 {
2009                // World camera — sprite positions/transforms are world-
2010                // space (independent of any grid's transform).
2011                let cam = sprite_camera;
2012                #[allow(clippy::cast_precision_loss)]
2013                let aspect = surface_w as f32 / surface_h as f32;
2014                let half_h = (fov_y_rad * 0.5).tan();
2015                let frustum = sprite_model::ViewFrustum {
2016                    pos: cam.position,
2017                    right: cam.right,
2018                    down: cam.down,
2019                    forward: cam.forward,
2020                    half_w: half_h * aspect,
2021                    half_h,
2022                    far: 1.0e9,
2023                };
2024                let (visible, tiles_x, _tiles_y) = reg.cull_bin_upload(
2025                    &self.device,
2026                    &self.queue,
2027                    &frustum,
2028                    surface_w,
2029                    surface_h,
2030                    SPRITE_TILE_SIZE,
2031                    self.sprite_lod_px,
2032                );
2033                (visible > 0).then_some((visible, tiles_x))
2034            } else {
2035                None
2036            }
2037        } else {
2038            None
2039        };
2040        let dda = self.scene_dda.as_ref().expect("just built");
2041
2042        // Refresh the blit's flip flag each frame (offset 8, after the
2043        // width/height), so toggling the flip applies without a resize.
2044        self.queue.write_buffer(
2045            &dda.blit_dims,
2046            8,
2047            bytemuck::bytes_of(&[u32::from(self.flip_x), 0u32]),
2048        );
2049
2050        // Pack per-grid cameras into a runtime-sized storage buffer
2051        // (binding 15) — no fixed cap on grid count.
2052        let cam_vec: Vec<SceneDdaPerGridCamera> = cameras
2053            .iter()
2054            .map(SceneDdaPerGridCamera::from_camera)
2055            .collect();
2056        let grid_cameras = upload_grid_cameras(&self.device, &cam_vec);
2057        let uniform = SceneDdaUniform {
2058            fov_y_rad,
2059            grid_count: scene.grid_count,
2060            max_outer_steps,
2061            _pad0: 0,
2062            screen_size: [surface_w, surface_h],
2063            _pad1: [0; 2],
2064            fog_color: [
2065                self.fog_color[0],
2066                self.fog_color[1],
2067                self.fog_color[2],
2068                self.fog_near,
2069            ],
2070            fog_far: self.fog_far,
2071            // L3.1: always write scene depth. Costs one storage store per
2072            // pixel, and the depth is needed for sprite z-test, sprite-less
2073            // `pick_depth`, and `draw_lines` occlusion alike.
2074            write_depth: 1,
2075            occ_page_words: scene.occupancy_page_words,
2076            occ_num_pages: scene.occupancy_num_pages,
2077            mip_scan_dist: self.scene_mip_scan_dist,
2078            terrain_has_translucent: u32::from(self.scene_terrain_translucent),
2079            terrain_map_count: self.scene_terrain_map.len() as u32,
2080            _pad4: 0,
2081            // Sky direction comes from the world (sprite) camera, so a
2082            // grid-less sprite-only scene still paints a real sky.
2083            sky_cam: SceneDdaPerGridCamera::from_camera(sprite_camera),
2084            side_shades0: self.scene_side_shades[0],
2085            side_shades1: self.scene_side_shades[1],
2086        };
2087        self.queue
2088            .write_buffer(&dda.uniform_buf, 0, bytemuck::bytes_of(&uniform));
2089
2090        let dda_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
2091            label: Some("roxlap-gpu scene_dda.bg"),
2092            layout: &dda.bgl_dda,
2093            entries: &[
2094                wgpu::BindGroupEntry {
2095                    binding: 0,
2096                    resource: dda.uniform_buf.as_entire_binding(),
2097                },
2098                // Occupancy page 0 at binding 1; pages 1..MAX_OCC_PAGES
2099                // at bindings 12.. (see GPU.X occupancy paging).
2100                wgpu::BindGroupEntry {
2101                    binding: 1,
2102                    resource: scene.occupancy_pages[0].as_entire_binding(),
2103                },
2104                wgpu::BindGroupEntry {
2105                    binding: 2,
2106                    resource: scene.all_color_offsets.as_entire_binding(),
2107                },
2108                wgpu::BindGroupEntry {
2109                    binding: 3,
2110                    resource: scene.all_colors.as_entire_binding(),
2111                },
2112                wgpu::BindGroupEntry {
2113                    binding: 4,
2114                    resource: scene.all_chunk_colors_base.as_entire_binding(),
2115                },
2116                wgpu::BindGroupEntry {
2117                    binding: 5,
2118                    resource: scene.all_chunk_occupancy.as_entire_binding(),
2119                },
2120                wgpu::BindGroupEntry {
2121                    binding: 6,
2122                    resource: scene.grid_static_meta.as_entire_binding(),
2123                },
2124                wgpu::BindGroupEntry {
2125                    binding: 7,
2126                    resource: scene.all_slot_chunk_idx.as_entire_binding(),
2127                },
2128                wgpu::BindGroupEntry {
2129                    binding: 8,
2130                    resource: dda.framebuffer.as_entire_binding(),
2131                },
2132                wgpu::BindGroupEntry {
2133                    binding: 9,
2134                    resource: wgpu::BindingResource::TextureView(&self.sky_view),
2135                },
2136                wgpu::BindGroupEntry {
2137                    binding: 10,
2138                    resource: wgpu::BindingResource::Sampler(&self.sky_sampler),
2139                },
2140                wgpu::BindGroupEntry {
2141                    binding: 11,
2142                    resource: dda.depth_buffer.as_entire_binding(),
2143                },
2144                wgpu::BindGroupEntry {
2145                    binding: 12,
2146                    resource: scene.occupancy_pages[1].as_entire_binding(),
2147                },
2148                wgpu::BindGroupEntry {
2149                    binding: 13,
2150                    resource: scene.occupancy_pages[2].as_entire_binding(),
2151                },
2152                wgpu::BindGroupEntry {
2153                    binding: 14,
2154                    resource: scene.occupancy_pages[3].as_entire_binding(),
2155                },
2156                wgpu::BindGroupEntry {
2157                    binding: 15,
2158                    resource: grid_cameras.as_entire_binding(),
2159                },
2160                wgpu::BindGroupEntry {
2161                    binding: 16,
2162                    resource: dda.materials_pal_buf.as_entire_binding(),
2163                },
2164                wgpu::BindGroupEntry {
2165                    binding: 17,
2166                    resource: dda.terrain_map_buf.as_entire_binding(),
2167                },
2168            ],
2169        });
2170
2171        // GPU.9 — when sprites are present, build both splatter bind
2172        // groups up front (the splat pass writes the key buffer; the
2173        // resolve pass reads keys + scene depth and writes colour).
2174        // GPU.10.3 — model-DDA bind group + per-frame uniform, using the
2175        // cull/bin results captured above. Per-model + per-instance data
2176        // + the tile lists live in the registry buffers.
2177        let sprite_model_bg = match (&self.sprite_model_dda, &self.sprite_registry, sprite_pass) {
2178            (Some(smd), Some(reg), Some((visible, tiles_x))) => {
2179                // World camera (see the cull pass above) — sprites
2180                // project through it regardless of grid 0's transform.
2181                let cam = sprite_camera;
2182                let uni = SpriteModelUniform {
2183                    cam_pos: cam.position,
2184                    _p0: 0.0,
2185                    cam_right: cam.right,
2186                    _p1: 0.0,
2187                    cam_down: cam.down,
2188                    _p2: 0.0,
2189                    cam_forward: cam.forward,
2190                    _p3: 0.0,
2191                    fog_color: [
2192                        self.fog_color[0],
2193                        self.fog_color[1],
2194                        self.fog_color[2],
2195                        self.fog_near,
2196                    ],
2197                    screen_size: [surface_w, surface_h],
2198                    instance_count: visible,
2199                    fog_far: self.fog_far,
2200                    fov_y_rad,
2201                    tiles_x,
2202                    tile_size: SPRITE_TILE_SIZE,
2203                    has_translucent: u32::from(self.sprite_has_translucent),
2204                };
2205                self.queue
2206                    .write_buffer(&smd.uniform_buf, 0, bytemuck::bytes_of(&uni));
2207                Some(self.device.create_bind_group(&wgpu::BindGroupDescriptor {
2208                    label: Some("roxlap-gpu sprite_model_dda.bg"),
2209                    layout: &smd.bgl,
2210                    entries: &[
2211                        wgpu::BindGroupEntry {
2212                            binding: 0,
2213                            resource: smd.uniform_buf.as_entire_binding(),
2214                        },
2215                        wgpu::BindGroupEntry {
2216                            binding: 1,
2217                            resource: reg.occupancy.as_entire_binding(),
2218                        },
2219                        wgpu::BindGroupEntry {
2220                            binding: 2,
2221                            resource: reg.colors.as_entire_binding(),
2222                        },
2223                        wgpu::BindGroupEntry {
2224                            binding: 3,
2225                            resource: reg.color_offsets.as_entire_binding(),
2226                        },
2227                        wgpu::BindGroupEntry {
2228                            binding: 4,
2229                            resource: reg.model_meta.as_entire_binding(),
2230                        },
2231                        wgpu::BindGroupEntry {
2232                            binding: 5,
2233                            resource: reg.instances.as_entire_binding(),
2234                        },
2235                        wgpu::BindGroupEntry {
2236                            binding: 6,
2237                            resource: dda.depth_buffer.as_entire_binding(),
2238                        },
2239                        wgpu::BindGroupEntry {
2240                            binding: 7,
2241                            resource: dda.framebuffer.as_entire_binding(),
2242                        },
2243                        wgpu::BindGroupEntry {
2244                            binding: 8,
2245                            resource: reg.tile_ranges.as_entire_binding(),
2246                        },
2247                        wgpu::BindGroupEntry {
2248                            binding: 9,
2249                            resource: reg.tile_instances.as_entire_binding(),
2250                        },
2251                        wgpu::BindGroupEntry {
2252                            binding: 10,
2253                            resource: reg.dirs.as_entire_binding(),
2254                        },
2255                        wgpu::BindGroupEntry {
2256                            binding: 11,
2257                            resource: reg.colmul.as_entire_binding(),
2258                        },
2259                        wgpu::BindGroupEntry {
2260                            binding: 12,
2261                            resource: smd.materials_buf.as_entire_binding(),
2262                        },
2263                        wgpu::BindGroupEntry {
2264                            binding: 13,
2265                            resource: reg.materials_vox.as_entire_binding(),
2266                        },
2267                    ],
2268                }))
2269            }
2270            _ => None,
2271        };
2272
2273        let mut encoder = self
2274            .device
2275            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
2276                label: Some("roxlap-gpu scene encoder"),
2277            });
2278        {
2279            let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
2280                label: Some("roxlap-gpu scene_dda compute"),
2281                timestamp_writes: None,
2282            });
2283            cpass.set_pipeline(&dda.pipeline_dda);
2284            cpass.set_bind_group(0, &dda_bg, &[]);
2285            cpass.dispatch_workgroups(surface_w.div_ceil(8), surface_h.div_ceil(8), 1);
2286        }
2287        // GPU.10 — sprite model-DDA pass: one thread per pixel marches
2288        // the tile's instances + composites against scene depth, after
2289        // the scene pass wrote the depth buffer and before the blit.
2290        if let (Some(smd), Some(bg)) = (&self.sprite_model_dda, &sprite_model_bg) {
2291            let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
2292                label: Some("roxlap-gpu sprite_model_dda"),
2293                timestamp_writes: None,
2294            });
2295            cpass.set_pipeline(&smd.pipeline);
2296            cpass.set_bind_group(0, bg, &[]);
2297            cpass.dispatch_workgroups(surface_w.div_ceil(8), surface_h.div_ceil(8), 1);
2298        }
2299        {
2300            let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
2301                label: Some("roxlap-gpu scene_dda blit"),
2302                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
2303                    view: &surf_view,
2304                    depth_slice: None,
2305                    resolve_target: None,
2306                    ops: wgpu::Operations {
2307                        load: wgpu::LoadOp::Clear(wgpu::Color::BLACK),
2308                        store: wgpu::StoreOp::Store,
2309                    },
2310                })],
2311                depth_stencil_attachment: None,
2312                timestamp_writes: None,
2313                occlusion_query_set: None,
2314                multiview_mask: None,
2315            });
2316            rpass.set_pipeline(&dda.pipeline_blit);
2317            rpass.set_bind_group(0, &dda.blit_bg, &[]);
2318            rpass.draw(0..3, 0..1);
2319        }
2320        self.queue.submit(std::iter::once(encoder.finish()));
2321        // This frame wrote `scene_dda.depth_buffer`, so depth-tested
2322        // overlays may test against it.
2323        self.scene_depth_valid = true;
2324        // Deferred present — the host calls `present` or `paint_egui`.
2325        self.pending_frame = Some((surf_tex, surf_view));
2326        self.frame_count = self.frame_count.wrapping_add(1);
2327    }
2328
2329    /// Like [`Self::render`] (clear to colour) but **deferred**: stashes
2330    /// the frame for [`Self::present`] / [`Self::paint_egui`] instead of
2331    /// presenting. The facade uses this before any grid is resident so a
2332    /// HUD can still be painted over an empty scene.
2333    pub fn render_clear_deferred(&mut self) {
2334        // No scene pass this frame ⇒ `scene_dda.depth_buffer` (if it
2335        // exists from an earlier scene) is stale; depth-tested overlays
2336        // must not test against it.
2337        self.scene_depth_valid = false;
2338        self.pending_frame = None;
2339        let Some(surf_tex) = self.acquire_frame() else {
2340            return;
2341        };
2342        let view = surf_tex
2343            .texture
2344            .create_view(&wgpu::TextureViewDescriptor::default());
2345        let [r, g, b] = self.clear_colour;
2346        let mut encoder = self
2347            .device
2348            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
2349                label: Some("roxlap-gpu clear (deferred)"),
2350            });
2351        {
2352            let _rp = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
2353                label: Some("roxlap-gpu clear (deferred)"),
2354                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
2355                    view: &view,
2356                    depth_slice: None,
2357                    resolve_target: None,
2358                    ops: wgpu::Operations {
2359                        load: wgpu::LoadOp::Clear(wgpu::Color { r, g, b, a: 1.0 }),
2360                        store: wgpu::StoreOp::Store,
2361                    },
2362                })],
2363                depth_stencil_attachment: None,
2364                timestamp_writes: None,
2365                occlusion_query_set: None,
2366                multiview_mask: None,
2367            });
2368        }
2369        self.queue.submit(std::iter::once(encoder.finish()));
2370        self.pending_frame = Some((surf_tex, view));
2371    }
2372
2373    /// Present the frame stashed by the last deferred render
2374    /// ([`Self::render_scene`] / [`Self::render_clear_deferred`]). No-op
2375    /// if nothing is pending (e.g. the surface was lost mid-render).
2376    pub fn present(&mut self) {
2377        if let Some((surf_tex, _view)) = self.pending_frame.take() {
2378            surf_tex.present();
2379        }
2380    }
2381
2382    /// Block until the GPU has drained every submitted command (queue
2383    /// idle), dropping any not-yet-presented swapchain frame first. Call at
2384    /// shutdown — before the [`GpuRenderer`] (and its window) drop — so the
2385    /// device is torn down with no work in flight and no half-presented
2386    /// frame, instead of yanking the swapchain mid-submission (which leaves
2387    /// the driver/compositor compositing stale buffers — the "leftover
2388    /// triangles / flicker after an unclean exit" symptom). No-op on wasm
2389    /// (`poll(Wait)` is unavailable there; the browser reclaims the device).
2390    pub fn wait_idle(&mut self) {
2391        // Release the acquired-but-unpresented frame so its swapchain image
2392        // isn't held across teardown.
2393        self.pending_frame = None;
2394        #[cfg(not(target_arch = "wasm32"))]
2395        {
2396            self.device.poll(wgpu::PollType::wait_indefinitely()).ok();
2397        }
2398    }
2399
2400    /// Draw depth-tested world-space [`GpuLine`]s over the pending frame
2401    /// (L3.2). Projects each endpoint with `cam` (the marcher's pinhole) +
2402    /// the last frame's FOV / surface size, expands to screen-space quads,
2403    /// and runs a `LoadOp::Load` pass into the pending swapchain view — so
2404    /// the lines land on the marched frame and a later `present` /
2405    /// `paint_egui` still finishes it (the pending frame is left intact).
2406    /// Depth-tested lines are occluded by nearer marched geometry (compared
2407    /// against the scene-DDA depth buffer's `best_t`); call after `render`,
2408    /// before `present` / `paint_egui`. No-op if no frame is pending.
2409    pub fn draw_lines_deferred(&mut self, cam: &GpuLineCamera, lines: &[GpuLine]) {
2410        if self.pending_frame.is_none() || lines.is_empty() {
2411            return;
2412        }
2413        let (w, h) = (self.surface_config.width, self.surface_config.height);
2414        let fov = self.last_fov_y_rad;
2415        if w == 0 || h == 0 || fov <= 0.0 {
2416            return; // no frame marched yet — no projection to reuse
2417        }
2418        let verts = build_line_vertices(cam, lines, w, h, fov, self.flip_x);
2419        if verts.is_empty() {
2420            return;
2421        }
2422        self.ensure_line_resources();
2423        let res = self.line_resources.as_ref().expect("just built");
2424
2425        // Skip the depth test when there's no current scene depth to read —
2426        // either no buffer at all (sprite-only / never-rendered) or this
2427        // frame was a color-only clear so the buffer is stale (an empty
2428        // scene drawn after a grid scene). The 1-word dummy / stale buffer
2429        // is still bound to satisfy the layout; `no_depth = 1` keeps the
2430        // shader from indexing it.
2431        let no_depth = u32::from(self.scene_dda.is_none() || !self.scene_depth_valid);
2432        let params = LineParams {
2433            screen_w: w,
2434            screen_h: h,
2435            depth_bias: LINE_DEPTH_BIAS,
2436            no_depth,
2437            flip_x: u32::from(self.flip_x),
2438            _pad: [0; 3],
2439        };
2440        self.queue
2441            .write_buffer(&res.uniform_buf, 0, bytemuck::bytes_of(&params));
2442
2443        let depth_resource = match &self.scene_dda {
2444            Some(dda) => dda.depth_buffer.as_entire_binding(),
2445            None => res.dummy_depth.as_entire_binding(),
2446        };
2447        let bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
2448            label: Some("roxlap-gpu line.bg"),
2449            layout: &res.bgl,
2450            entries: &[
2451                wgpu::BindGroupEntry {
2452                    binding: 0,
2453                    resource: res.uniform_buf.as_entire_binding(),
2454                },
2455                wgpu::BindGroupEntry {
2456                    binding: 1,
2457                    resource: depth_resource,
2458                },
2459            ],
2460        });
2461
2462        // Grow-only persistent vertex buffer (L3.3): one `write_buffer`
2463        // per overlay, reused across frames. Power-of-two capacity keeps
2464        // re-allocation rare as the segment count drifts.
2465        let needed = std::mem::size_of_val(verts.as_slice()) as u64;
2466        if self.line_vbuf_cap < needed {
2467            let cap = needed.next_power_of_two().max(4096);
2468            self.line_vbuf = Some(self.device.create_buffer(&wgpu::BufferDescriptor {
2469                label: Some("roxlap-gpu line.vbuf"),
2470                size: cap,
2471                usage: wgpu::BufferUsages::VERTEX | wgpu::BufferUsages::COPY_DST,
2472                mapped_at_creation: false,
2473            }));
2474            self.line_vbuf_cap = cap;
2475        }
2476        let vbuf = self.line_vbuf.as_ref().expect("ensured above");
2477        self.queue
2478            .write_buffer(vbuf, 0, bytemuck::cast_slice(&verts));
2479
2480        let view = &self.pending_frame.as_ref().expect("checked above").1;
2481        let mut encoder = self
2482            .device
2483            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
2484                label: Some("roxlap-gpu lines"),
2485            });
2486        {
2487            // `LoadOp::Load` keeps the marcher's frame; the lines draw over
2488            // it. Manual depth test in the FS (no depth-stencil attachment).
2489            let mut pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
2490                label: Some("roxlap-gpu line paint"),
2491                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
2492                    view,
2493                    depth_slice: None,
2494                    resolve_target: None,
2495                    ops: wgpu::Operations {
2496                        load: wgpu::LoadOp::Load,
2497                        store: wgpu::StoreOp::Store,
2498                    },
2499                })],
2500                depth_stencil_attachment: None,
2501                timestamp_writes: None,
2502                occlusion_query_set: None,
2503                multiview_mask: None,
2504            });
2505            pass.set_pipeline(&res.pipeline);
2506            pass.set_bind_group(0, &bg, &[]);
2507            pass.set_vertex_buffer(0, vbuf.slice(..));
2508            pass.draw(0..verts.len() as u32, 0..1);
2509        }
2510        self.queue.submit(std::iter::once(encoder.finish()));
2511        // pending_frame left intact — present/paint_egui finishes the frame.
2512    }
2513
2514    /// Lazy-build the [`LineResources`] (`line.wgsl` pipeline + uniform +
2515    /// dummy depth buffer). The colour target uses the surface format with
2516    /// straight-alpha over-blending; no depth-stencil attachment (the depth
2517    /// test is manual in the fragment shader against the scene depth buffer).
2518    fn ensure_line_resources(&mut self) {
2519        if self.line_resources.is_some() {
2520            return;
2521        }
2522        let shader = self
2523            .device
2524            .create_shader_module(wgpu::ShaderModuleDescriptor {
2525                label: Some("line.wgsl"),
2526                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/line.wgsl").into()),
2527            });
2528        let bgl = self
2529            .device
2530            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
2531                label: Some("roxlap-gpu line.bgl"),
2532                entries: &[
2533                    wgpu::BindGroupLayoutEntry {
2534                        binding: 0,
2535                        visibility: wgpu::ShaderStages::FRAGMENT,
2536                        ty: wgpu::BindingType::Buffer {
2537                            ty: wgpu::BufferBindingType::Uniform,
2538                            has_dynamic_offset: false,
2539                            min_binding_size: None,
2540                        },
2541                        count: None,
2542                    },
2543                    wgpu::BindGroupLayoutEntry {
2544                        binding: 1,
2545                        visibility: wgpu::ShaderStages::FRAGMENT,
2546                        ty: wgpu::BindingType::Buffer {
2547                            ty: wgpu::BufferBindingType::Storage { read_only: true },
2548                            has_dynamic_offset: false,
2549                            min_binding_size: None,
2550                        },
2551                        count: None,
2552                    },
2553                ],
2554            });
2555        let layout = self
2556            .device
2557            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
2558                label: Some("roxlap-gpu line.layout"),
2559                bind_group_layouts: &[Some(&bgl)],
2560                immediate_size: 0,
2561            });
2562        let pipeline = self
2563            .device
2564            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
2565                label: Some("roxlap-gpu line.pipeline"),
2566                layout: Some(&layout),
2567                vertex: wgpu::VertexState {
2568                    module: &shader,
2569                    entry_point: Some("vs_main"),
2570                    compilation_options: wgpu::PipelineCompilationOptions::default(),
2571                    buffers: &[wgpu::VertexBufferLayout {
2572                        array_stride: std::mem::size_of::<LineVertex>() as u64,
2573                        step_mode: wgpu::VertexStepMode::Vertex,
2574                        attributes: &wgpu::vertex_attr_array![
2575                            0 => Float32x2, // pos (NDC)
2576                            1 => Float32,   // depth
2577                            2 => Float32,   // depth_test
2578                            3 => Float32x4, // color
2579                        ],
2580                    }],
2581                },
2582                fragment: Some(wgpu::FragmentState {
2583                    module: &shader,
2584                    entry_point: Some("fs_main"),
2585                    compilation_options: wgpu::PipelineCompilationOptions::default(),
2586                    targets: &[Some(wgpu::ColorTargetState {
2587                        format: self.surface_config.format,
2588                        blend: Some(wgpu::BlendState::ALPHA_BLENDING),
2589                        write_mask: wgpu::ColorWrites::ALL,
2590                    })],
2591                }),
2592                primitive: wgpu::PrimitiveState {
2593                    cull_mode: None,
2594                    ..Default::default()
2595                },
2596                depth_stencil: None,
2597                multisample: wgpu::MultisampleState::default(),
2598                multiview_mask: None,
2599                cache: None,
2600            });
2601        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
2602            label: Some("roxlap-gpu line.uniform"),
2603            size: std::mem::size_of::<LineParams>() as u64,
2604            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
2605            mapped_at_creation: false,
2606        });
2607        let dummy_depth = self.device.create_buffer(&wgpu::BufferDescriptor {
2608            label: Some("roxlap-gpu line.dummy_depth"),
2609            size: 4,
2610            usage: wgpu::BufferUsages::STORAGE,
2611            mapped_at_creation: false,
2612        });
2613        self.line_resources = Some(LineResources {
2614            pipeline,
2615            bgl,
2616            uniform_buf,
2617            dummy_depth,
2618        });
2619    }
2620
2621    /// Upload (or replace) an RGBA8 image as a sampled texture, returning
2622    /// a stable id for [`GpuImageQuad::image`]. `rgba` is row-major,
2623    /// `width * height * 4` bytes, straight (un-premultiplied) alpha.
2624    /// Reuses a dropped slot when one exists. Returns `0` for malformed
2625    /// input (an id that draws nothing).
2626    pub fn upload_image(&mut self, rgba: &[u8], width: u32, height: u32) -> usize {
2627        if width == 0 || height == 0 || rgba.len() != (width as usize) * (height as usize) * 4 {
2628            return 0;
2629        }
2630        let texture = self.device.create_texture(&wgpu::TextureDescriptor {
2631            label: Some("roxlap-gpu image_sprite"),
2632            size: wgpu::Extent3d {
2633                width,
2634                height,
2635                depth_or_array_layers: 1,
2636            },
2637            mip_level_count: 1,
2638            sample_count: 1,
2639            dimension: wgpu::TextureDimension::D2,
2640            format: wgpu::TextureFormat::Rgba8Unorm,
2641            usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST,
2642            view_formats: &[],
2643        });
2644        self.queue.write_texture(
2645            wgpu::TexelCopyTextureInfo {
2646                texture: &texture,
2647                mip_level: 0,
2648                origin: wgpu::Origin3d::ZERO,
2649                aspect: wgpu::TextureAspect::All,
2650            },
2651            rgba,
2652            wgpu::TexelCopyBufferLayout {
2653                offset: 0,
2654                bytes_per_row: Some(width * 4),
2655                rows_per_image: Some(height),
2656            },
2657            wgpu::Extent3d {
2658                width,
2659                height,
2660                depth_or_array_layers: 1,
2661            },
2662        );
2663        let view = texture.create_view(&wgpu::TextureViewDescriptor::default());
2664        let resident = ImageResident {
2665            view,
2666            _texture: texture,
2667        };
2668        if let Some(slot) = self.images.iter().position(Option::is_none) {
2669            self.images[slot] = Some(resident);
2670            slot
2671        } else {
2672            self.images.push(Some(resident));
2673            self.images.len() - 1
2674        }
2675    }
2676
2677    /// Release an image uploaded with [`Self::upload_image`] (the slot
2678    /// becomes reusable).
2679    pub fn drop_image(&mut self, id: usize) {
2680        if let Some(slot) = self.images.get_mut(id) {
2681            *slot = None;
2682        }
2683    }
2684
2685    /// Draw world-space 2D image sprites ([`GpuImageQuad`]) over the
2686    /// pending frame — the textured-quad sibling of
2687    /// [`Self::draw_lines_deferred`]. Projects each quad with `cam` (the
2688    /// marcher's pinhole) + the last frame's FOV / surface size, expands +
2689    /// near-clips to triangles, and runs one `LoadOp::Load` pass with a
2690    /// draw per quad (each binds its own texture). UVs are perspective-correct;
2691    /// depth-tested quads are occluded by nearer marched geometry. Call
2692    /// after `render`, before `present` / `paint_egui`. No-op if no frame
2693    /// is pending.
2694    pub fn draw_images_deferred(&mut self, cam: &GpuLineCamera, quads: &[GpuImageQuad]) {
2695        if self.pending_frame.is_none() || quads.is_empty() {
2696            return;
2697        }
2698        let (w, h) = (self.surface_config.width, self.surface_config.height);
2699        let fov = self.last_fov_y_rad;
2700        if w == 0 || h == 0 || fov <= 0.0 {
2701            return;
2702        }
2703
2704        // Concatenate every quad's verts into one buffer, recording each
2705        // quad's (range, texture) so they share a single render pass.
2706        let mut verts: Vec<ImageVertex> = Vec::new();
2707        let mut draws: Vec<(u32, u32, usize)> = Vec::new();
2708        for quad in quads {
2709            if !matches!(self.images.get(quad.image), Some(Some(_))) {
2710                continue; // dropped / never-uploaded id
2711            }
2712            let v = build_image_vertices(cam, quad, w, h, fov, self.flip_x);
2713            if v.is_empty() {
2714                continue;
2715            }
2716            let start = verts.len() as u32;
2717            verts.extend_from_slice(&v);
2718            draws.push((start, verts.len() as u32, quad.image));
2719        }
2720        if draws.is_empty() {
2721            return;
2722        }
2723
2724        self.ensure_image_resources();
2725        // See `draw_lines_deferred`: skip depth when there's no valid
2726        // current-frame scene depth (none built, or a color-only clear).
2727        let no_depth = u32::from(self.scene_dda.is_none() || !self.scene_depth_valid);
2728        let params = LineParams {
2729            screen_w: w,
2730            screen_h: h,
2731            depth_bias: LINE_DEPTH_BIAS,
2732            no_depth,
2733            flip_x: u32::from(self.flip_x),
2734            _pad: [0; 3],
2735        };
2736        {
2737            let res = self.image_resources.as_ref().expect("just built");
2738            self.queue
2739                .write_buffer(&res.uniform_buf, 0, bytemuck::bytes_of(&params));
2740        }
2741
2742        // Grow-only persistent vertex buffer (mirrors the line vbuf).
2743        let needed = std::mem::size_of_val(verts.as_slice()) as u64;
2744        if self.image_vbuf_cap < needed {
2745            let cap = needed.next_power_of_two().max(4096);
2746            self.image_vbuf = Some(self.device.create_buffer(&wgpu::BufferDescriptor {
2747                label: Some("roxlap-gpu image.vbuf"),
2748                size: cap,
2749                usage: wgpu::BufferUsages::VERTEX | wgpu::BufferUsages::COPY_DST,
2750                mapped_at_creation: false,
2751            }));
2752            self.image_vbuf_cap = cap;
2753        }
2754        let vbuf = self.image_vbuf.as_ref().expect("ensured above");
2755        self.queue
2756            .write_buffer(vbuf, 0, bytemuck::cast_slice(&verts));
2757
2758        // One bind group per draw (the texture view differs per quad).
2759        let res = self.image_resources.as_ref().expect("just built");
2760        let depth_resource = match &self.scene_dda {
2761            Some(dda) => dda.depth_buffer.as_entire_binding(),
2762            None => res.dummy_depth.as_entire_binding(),
2763        };
2764        let bind_groups: Vec<wgpu::BindGroup> = draws
2765            .iter()
2766            .map(|&(_, _, image_id)| {
2767                let resident = self.images[image_id].as_ref().expect("checked present");
2768                self.device.create_bind_group(&wgpu::BindGroupDescriptor {
2769                    label: Some("roxlap-gpu image.bg"),
2770                    layout: &res.bgl,
2771                    entries: &[
2772                        wgpu::BindGroupEntry {
2773                            binding: 0,
2774                            resource: res.uniform_buf.as_entire_binding(),
2775                        },
2776                        wgpu::BindGroupEntry {
2777                            binding: 1,
2778                            resource: depth_resource.clone(),
2779                        },
2780                        wgpu::BindGroupEntry {
2781                            binding: 2,
2782                            resource: wgpu::BindingResource::TextureView(&resident.view),
2783                        },
2784                        wgpu::BindGroupEntry {
2785                            binding: 3,
2786                            resource: wgpu::BindingResource::Sampler(&res.sampler),
2787                        },
2788                    ],
2789                })
2790            })
2791            .collect();
2792
2793        let view = &self.pending_frame.as_ref().expect("checked above").1;
2794        let mut encoder = self
2795            .device
2796            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
2797                label: Some("roxlap-gpu images"),
2798            });
2799        {
2800            let mut pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
2801                label: Some("roxlap-gpu image paint"),
2802                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
2803                    view,
2804                    depth_slice: None,
2805                    resolve_target: None,
2806                    ops: wgpu::Operations {
2807                        load: wgpu::LoadOp::Load,
2808                        store: wgpu::StoreOp::Store,
2809                    },
2810                })],
2811                depth_stencil_attachment: None,
2812                timestamp_writes: None,
2813                occlusion_query_set: None,
2814                multiview_mask: None,
2815            });
2816            pass.set_pipeline(&res.pipeline);
2817            pass.set_vertex_buffer(0, vbuf.slice(..));
2818            for (&(start, end, _), bg) in draws.iter().zip(&bind_groups) {
2819                pass.set_bind_group(0, bg, &[]);
2820                pass.draw(start..end, 0..1);
2821            }
2822        }
2823        self.queue.submit(std::iter::once(encoder.finish()));
2824        // pending_frame left intact — present/paint_egui finishes it.
2825    }
2826
2827    /// Lazy-build the [`ImageResources`] (`image.wgsl` pipeline + uniform +
2828    /// nearest sampler + dummy depth). Straight-alpha over-blend, no
2829    /// depth-stencil attachment (the depth test is manual in the FS).
2830    fn ensure_image_resources(&mut self) {
2831        if self.image_resources.is_some() {
2832            return;
2833        }
2834        let shader = self
2835            .device
2836            .create_shader_module(wgpu::ShaderModuleDescriptor {
2837                label: Some("image.wgsl"),
2838                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/image.wgsl").into()),
2839            });
2840        let bgl = self
2841            .device
2842            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
2843                label: Some("roxlap-gpu image.bgl"),
2844                entries: &[
2845                    wgpu::BindGroupLayoutEntry {
2846                        binding: 0,
2847                        visibility: wgpu::ShaderStages::FRAGMENT,
2848                        ty: wgpu::BindingType::Buffer {
2849                            ty: wgpu::BufferBindingType::Uniform,
2850                            has_dynamic_offset: false,
2851                            min_binding_size: None,
2852                        },
2853                        count: None,
2854                    },
2855                    wgpu::BindGroupLayoutEntry {
2856                        binding: 1,
2857                        visibility: wgpu::ShaderStages::FRAGMENT,
2858                        ty: wgpu::BindingType::Buffer {
2859                            ty: wgpu::BufferBindingType::Storage { read_only: true },
2860                            has_dynamic_offset: false,
2861                            min_binding_size: None,
2862                        },
2863                        count: None,
2864                    },
2865                    wgpu::BindGroupLayoutEntry {
2866                        binding: 2,
2867                        visibility: wgpu::ShaderStages::FRAGMENT,
2868                        ty: wgpu::BindingType::Texture {
2869                            sample_type: wgpu::TextureSampleType::Float { filterable: true },
2870                            view_dimension: wgpu::TextureViewDimension::D2,
2871                            multisampled: false,
2872                        },
2873                        count: None,
2874                    },
2875                    wgpu::BindGroupLayoutEntry {
2876                        binding: 3,
2877                        visibility: wgpu::ShaderStages::FRAGMENT,
2878                        ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
2879                        count: None,
2880                    },
2881                ],
2882            });
2883        let layout = self
2884            .device
2885            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
2886                label: Some("roxlap-gpu image.layout"),
2887                bind_group_layouts: &[Some(&bgl)],
2888                immediate_size: 0,
2889            });
2890        let pipeline = self
2891            .device
2892            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
2893                label: Some("roxlap-gpu image.pipeline"),
2894                layout: Some(&layout),
2895                vertex: wgpu::VertexState {
2896                    module: &shader,
2897                    entry_point: Some("vs_main"),
2898                    compilation_options: wgpu::PipelineCompilationOptions::default(),
2899                    buffers: &[wgpu::VertexBufferLayout {
2900                        array_stride: std::mem::size_of::<ImageVertex>() as u64,
2901                        step_mode: wgpu::VertexStepMode::Vertex,
2902                        attributes: &wgpu::vertex_attr_array![
2903                            0 => Float32x2, // ndc
2904                            1 => Float32,   // w
2905                            2 => Float32,   // depth
2906                            3 => Float32,   // depth_test
2907                            4 => Float32,   // cutoff
2908                            5 => Float32x2, // uv
2909                            6 => Float32x4, // tint
2910                        ],
2911                    }],
2912                },
2913                fragment: Some(wgpu::FragmentState {
2914                    module: &shader,
2915                    entry_point: Some("fs_main"),
2916                    compilation_options: wgpu::PipelineCompilationOptions::default(),
2917                    targets: &[Some(wgpu::ColorTargetState {
2918                        format: self.surface_config.format,
2919                        blend: Some(wgpu::BlendState::ALPHA_BLENDING),
2920                        write_mask: wgpu::ColorWrites::ALL,
2921                    })],
2922                }),
2923                primitive: wgpu::PrimitiveState {
2924                    cull_mode: None,
2925                    ..Default::default()
2926                },
2927                depth_stencil: None,
2928                multisample: wgpu::MultisampleState::default(),
2929                multiview_mask: None,
2930                cache: None,
2931            });
2932        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
2933            label: Some("roxlap-gpu image.uniform"),
2934            size: std::mem::size_of::<LineParams>() as u64,
2935            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
2936            mapped_at_creation: false,
2937        });
2938        let dummy_depth = self.device.create_buffer(&wgpu::BufferDescriptor {
2939            label: Some("roxlap-gpu image.dummy_depth"),
2940            size: 4,
2941            usage: wgpu::BufferUsages::STORAGE,
2942            mapped_at_creation: false,
2943        });
2944        let sampler = self.device.create_sampler(&wgpu::SamplerDescriptor {
2945            label: Some("roxlap-gpu image.sampler"),
2946            // Nearest + clamp: pixel-art references want crisp texels and
2947            // no wrap bleed at the quad edges.
2948            address_mode_u: wgpu::AddressMode::ClampToEdge,
2949            address_mode_v: wgpu::AddressMode::ClampToEdge,
2950            address_mode_w: wgpu::AddressMode::ClampToEdge,
2951            mag_filter: wgpu::FilterMode::Nearest,
2952            min_filter: wgpu::FilterMode::Nearest,
2953            mipmap_filter: wgpu::MipmapFilterMode::Nearest,
2954            ..Default::default()
2955        });
2956        self.image_resources = Some(ImageResources {
2957            pipeline,
2958            bgl,
2959            uniform_buf,
2960            dummy_depth,
2961            sampler,
2962        });
2963    }
2964
2965    /// Project a world point to window pixels under the marcher's
2966    /// vertical-FOV pinhole (the inverse of [`Self::pixel_ray`]), using
2967    /// the last-rendered frame's size + FOV. `None` before the first
2968    /// scene render or for a point at/behind the near plane.
2969    #[must_use]
2970    pub fn project_point(
2971        &self,
2972        cam_pos: [f32; 3],
2973        right: [f32; 3],
2974        down: [f32; 3],
2975        forward: [f32; 3],
2976        world: [f32; 3],
2977    ) -> Option<(f32, f32)> {
2978        let dda = self.scene_dda.as_ref()?;
2979        let (w, h) = dda.storage_size;
2980        if w == 0 || h == 0 || self.last_fov_y_rad <= 0.0 {
2981            return None;
2982        }
2983        let d = [
2984            world[0] - cam_pos[0],
2985            world[1] - cam_pos[1],
2986            world[2] - cam_pos[2],
2987        ];
2988        let cz = forward[0] * d[0] + forward[1] * d[1] + forward[2] * d[2];
2989        if cz < LINE_NEAR_Z {
2990            return None;
2991        }
2992        let cx = right[0] * d[0] + right[1] * d[1] + right[2] * d[2];
2993        let cy = down[0] * d[0] + down[1] * d[1] + down[2] * d[2];
2994        let half_h = (self.last_fov_y_rad * 0.5).tan();
2995        let half_w = half_h * (w as f32 / h as f32);
2996        let ndc_x = (cx / cz) / half_w;
2997        let ndc_y = -(cy / cz) / half_h;
2998        let sx = (ndc_x * 0.5 + 0.5) * w as f32;
2999        let sy = (0.5 - ndc_y * 0.5) * h as f32;
3000        Some((sx, sy))
3001    }
3002
3003    /// Overlay an `egui` UI on the pending frame, then present it
3004    /// (`hud` feature). `jobs` are the host's tessellated primitives
3005    /// (`egui::Context::tessellate`), `textures` the per-frame texture
3006    /// delta from `egui::FullOutput`, `pixels_per_point` the UI scale.
3007    ///
3008    /// Draws with `LoadOp::Load` over the marcher's frame (a separate
3009    /// encoder submitted after the scene's), so the UI composites on top
3010    /// of the world. No-op if no frame is pending.
3011    #[cfg(feature = "hud")]
3012    pub fn paint_egui(
3013        &mut self,
3014        jobs: &[egui::ClippedPrimitive],
3015        textures: &egui::TexturesDelta,
3016        pixels_per_point: f32,
3017    ) {
3018        let Some((surf_tex, surf_view)) = self.pending_frame.take() else {
3019            return;
3020        };
3021        let format = self.surface_config.format;
3022        let egui_rend = self.egui_renderer.get_or_insert_with(|| {
3023            egui_wgpu::Renderer::new(
3024                &self.device,
3025                format,
3026                egui_wgpu::RendererOptions {
3027                    msaa_samples: 1,
3028                    depth_stencil_format: None,
3029                    dithering: false,
3030                    ..Default::default()
3031                },
3032            )
3033        });
3034
3035        let screen = egui_wgpu::ScreenDescriptor {
3036            size_in_pixels: [self.surface_config.width, self.surface_config.height],
3037            pixels_per_point,
3038        };
3039        for (id, delta) in &textures.set {
3040            egui_rend.update_texture(&self.device, &self.queue, *id, delta);
3041        }
3042        let mut encoder = self
3043            .device
3044            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
3045                label: Some("roxlap-gpu egui"),
3046            });
3047        let user_bufs =
3048            egui_rend.update_buffers(&self.device, &self.queue, &mut encoder, jobs, &screen);
3049        {
3050            // `LoadOp::Load` keeps the marcher's frame; egui draws over it.
3051            let mut pass = encoder
3052                .begin_render_pass(&wgpu::RenderPassDescriptor {
3053                    label: Some("roxlap-gpu egui paint"),
3054                    color_attachments: &[Some(wgpu::RenderPassColorAttachment {
3055                        view: &surf_view,
3056                        depth_slice: None,
3057                        resolve_target: None,
3058                        ops: wgpu::Operations {
3059                            load: wgpu::LoadOp::Load,
3060                            store: wgpu::StoreOp::Store,
3061                        },
3062                    })],
3063                    depth_stencil_attachment: None,
3064                    timestamp_writes: None,
3065                    occlusion_query_set: None,
3066                    multiview_mask: None,
3067                })
3068                // egui-wgpu 0.29 requires a `'static` pass (see its docs).
3069                .forget_lifetime();
3070            egui_rend.render(&mut pass, jobs, &screen);
3071        }
3072        for id in &textures.free {
3073            egui_rend.free_texture(id);
3074        }
3075        self.queue.submit(
3076            user_bufs
3077                .into_iter()
3078                .chain(std::iter::once(encoder.finish())),
3079        );
3080        surf_tex.present();
3081    }
3082
3083    fn build_scene_dda(
3084        &self,
3085        width: u32,
3086        height: u32,
3087        surface_format: wgpu::TextureFormat,
3088    ) -> SceneDdaResources {
3089        // Framebuffer as a packed-`rgba8unorm` storage buffer (1 u32 per
3090        // pixel, row stride = `width`). See the struct-field note.
3091        let framebuffer = self.device.create_buffer(&wgpu::BufferDescriptor {
3092            label: Some("roxlap-gpu scene_dda.framebuffer"),
3093            size: u64::from(width) * u64::from(height) * 4,
3094            usage: wgpu::BufferUsages::STORAGE,
3095            mapped_at_creation: false,
3096        });
3097        // Screen size + flip flag for the blit's pixel→index math
3098        // (`vec2<u32>` size, then `flip_x` + pad). Re-written per frame in
3099        // `render_scene` so a flip toggle takes effect without a resize.
3100        let blit_dims = self.device.create_buffer(&wgpu::BufferDescriptor {
3101            label: Some("roxlap-gpu scene_dda.blit_dims"),
3102            size: 16,
3103            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
3104            mapped_at_creation: false,
3105        });
3106        self.queue.write_buffer(
3107            &blit_dims,
3108            0,
3109            bytemuck::bytes_of(&[width, height, u32::from(self.flip_x), 0u32]),
3110        );
3111
3112        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
3113            label: Some("roxlap-gpu scene_dda.uniform"),
3114            size: std::mem::size_of::<SceneDdaUniform>() as u64,
3115            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
3116            mapped_at_creation: false,
3117        });
3118
3119        // GPU.9 — per-pixel world-t depth (f32 bits as u32). Sized to
3120        // the storage texture; written by the scene pass when sprites
3121        // are active, read+tested by the sprite splatter.
3122        let depth_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
3123            label: Some("roxlap-gpu scene_dda.depth"),
3124            size: u64::from(width) * u64::from(height) * 4,
3125            // COPY_SRC so `read_depth_pixel` can stage it for picking.
3126            usage: wgpu::BufferUsages::STORAGE
3127                | wgpu::BufferUsages::COPY_DST
3128                | wgpu::BufferUsages::COPY_SRC,
3129            mapped_at_creation: false,
3130        });
3131        let depth_readback = self.device.create_buffer(&wgpu::BufferDescriptor {
3132            label: Some("roxlap-gpu scene_dda.depth_readback"),
3133            size: u64::from(width) * u64::from(height) * 4,
3134            usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
3135            mapped_at_creation: false,
3136        });
3137        let dda_shader = self
3138            .device
3139            .create_shader_module(wgpu::ShaderModuleDescriptor {
3140                label: Some("scene_dda.wgsl"),
3141                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/scene_dda.wgsl").into()),
3142            });
3143        let bgl_dda = self
3144            .device
3145            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
3146                label: Some("roxlap-gpu scene_dda.bgl"),
3147                entries: &[
3148                    bgl_uniform_entry(0),
3149                    bgl_storage_entry(1, true),
3150                    bgl_storage_entry(2, true),
3151                    bgl_storage_entry(3, true),
3152                    bgl_storage_entry(4, true),
3153                    bgl_storage_entry(5, true),
3154                    bgl_storage_entry(6, true),
3155                    bgl_storage_entry(7, true),
3156                    // Framebuffer storage buffer (read-write; the scene +
3157                    // sprite passes write packed pixels into it).
3158                    bgl_storage_entry(8, false),
3159                    // GPU.8 sky panorama + sampler.
3160                    wgpu::BindGroupLayoutEntry {
3161                        binding: 9,
3162                        visibility: wgpu::ShaderStages::COMPUTE,
3163                        ty: wgpu::BindingType::Texture {
3164                            sample_type: wgpu::TextureSampleType::Float { filterable: true },
3165                            view_dimension: wgpu::TextureViewDimension::D2,
3166                            multisampled: false,
3167                        },
3168                        count: None,
3169                    },
3170                    wgpu::BindGroupLayoutEntry {
3171                        binding: 10,
3172                        visibility: wgpu::ShaderStages::COMPUTE,
3173                        ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
3174                        count: None,
3175                    },
3176                    // GPU.9 — read-write per-pixel depth buffer.
3177                    bgl_storage_entry(11, false),
3178                    // Occupancy pages 1..MAX_OCC_PAGES (page 0 is
3179                    // binding 1). Unused pages bind a dummy buffer.
3180                    bgl_storage_entry(12, true),
3181                    bgl_storage_entry(13, true),
3182                    bgl_storage_entry(14, true),
3183                    // Per-grid cameras (runtime-sized; one per grid).
3184                    bgl_storage_entry(15, true),
3185                    // TV.6 — material palette + terrain colour→material map.
3186                    bgl_storage_entry(16, true),
3187                    bgl_storage_entry(17, true),
3188                ],
3189            });
3190        let dda_pl = self
3191            .device
3192            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
3193                label: Some("roxlap-gpu scene_dda.layout"),
3194                bind_group_layouts: &[Some(&bgl_dda)],
3195                immediate_size: 0,
3196            });
3197        let pipeline_dda = self
3198            .device
3199            .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
3200                label: Some("roxlap-gpu scene_dda.pipeline"),
3201                layout: Some(&dda_pl),
3202                module: &dda_shader,
3203                entry_point: Some("render_scene"),
3204                compilation_options: wgpu::PipelineCompilationOptions::default(),
3205                cache: None,
3206            });
3207
3208        let blit_shader = self
3209            .device
3210            .create_shader_module(wgpu::ShaderModuleDescriptor {
3211                label: Some("scene_blit.wgsl"),
3212                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/scene_blit.wgsl").into()),
3213            });
3214        let bgl_blit = self
3215            .device
3216            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
3217                label: Some("roxlap-gpu scene_dda.blit_bgl"),
3218                entries: &[
3219                    // Framebuffer storage buffer (read-only in the blit).
3220                    wgpu::BindGroupLayoutEntry {
3221                        binding: 0,
3222                        visibility: wgpu::ShaderStages::FRAGMENT,
3223                        ty: wgpu::BindingType::Buffer {
3224                            ty: wgpu::BufferBindingType::Storage { read_only: true },
3225                            has_dynamic_offset: false,
3226                            min_binding_size: None,
3227                        },
3228                        count: None,
3229                    },
3230                    // Screen-size uniform for the pixel→index math.
3231                    wgpu::BindGroupLayoutEntry {
3232                        binding: 1,
3233                        visibility: wgpu::ShaderStages::FRAGMENT,
3234                        ty: wgpu::BindingType::Buffer {
3235                            ty: wgpu::BufferBindingType::Uniform,
3236                            has_dynamic_offset: false,
3237                            min_binding_size: None,
3238                        },
3239                        count: None,
3240                    },
3241                ],
3242            });
3243        let blit_pl = self
3244            .device
3245            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
3246                label: Some("roxlap-gpu scene_dda.blit_layout"),
3247                bind_group_layouts: &[Some(&bgl_blit)],
3248                immediate_size: 0,
3249            });
3250        let pipeline_blit = self
3251            .device
3252            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
3253                label: Some("roxlap-gpu scene_dda.blit_pipeline"),
3254                layout: Some(&blit_pl),
3255                vertex: wgpu::VertexState {
3256                    module: &blit_shader,
3257                    entry_point: Some("vs_main"),
3258                    compilation_options: wgpu::PipelineCompilationOptions::default(),
3259                    buffers: &[],
3260                },
3261                fragment: Some(wgpu::FragmentState {
3262                    module: &blit_shader,
3263                    entry_point: Some("fs_main"),
3264                    compilation_options: wgpu::PipelineCompilationOptions::default(),
3265                    targets: &[Some(wgpu::ColorTargetState {
3266                        format: surface_format,
3267                        blend: None,
3268                        write_mask: wgpu::ColorWrites::ALL,
3269                    })],
3270                }),
3271                primitive: wgpu::PrimitiveState::default(),
3272                depth_stencil: None,
3273                multisample: wgpu::MultisampleState::default(),
3274                multiview_mask: None,
3275                cache: None,
3276            });
3277        let blit_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
3278            label: Some("roxlap-gpu scene_dda.blit_bg"),
3279            layout: &bgl_blit,
3280            entries: &[
3281                wgpu::BindGroupEntry {
3282                    binding: 0,
3283                    resource: framebuffer.as_entire_binding(),
3284                },
3285                wgpu::BindGroupEntry {
3286                    binding: 1,
3287                    resource: blit_dims.as_entire_binding(),
3288                },
3289            ],
3290        });
3291
3292        // TV.6 — material palette + terrain map buffers, seeded from the
3293        // renderer's current scene-material state (so a map defined before the
3294        // scene pass was built still takes effect).
3295        let (materials_pal_buf, terrain_map_buf) = {
3296            use wgpu::util::DeviceExt;
3297            let pal = self
3298                .device
3299                .create_buffer_init(&wgpu::util::BufferInitDescriptor {
3300                    label: Some("roxlap-gpu scene_dda.materials_pal"),
3301                    contents: bytemuck::cast_slice(self.scene_materials.as_slice()),
3302                    usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
3303                });
3304            // Fixed 256-row map (≤256 materials anyway) → no re-alloc when the
3305            // host changes the map after the scene pass is built.
3306            let mut rows = [[0u32; 2]; 256];
3307            for (slot, &row) in rows.iter_mut().zip(self.scene_terrain_map.iter()) {
3308                *slot = row;
3309            }
3310            let map = self
3311                .device
3312                .create_buffer_init(&wgpu::util::BufferInitDescriptor {
3313                    label: Some("roxlap-gpu scene_dda.terrain_map"),
3314                    contents: bytemuck::cast_slice(&rows),
3315                    usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
3316                });
3317            (pal, map)
3318        };
3319
3320        SceneDdaResources {
3321            storage_size: (width, height),
3322            framebuffer,
3323            uniform_buf,
3324            bgl_dda,
3325            pipeline_dda,
3326            blit_bg,
3327            pipeline_blit,
3328            blit_dims,
3329            depth_buffer,
3330            depth_readback,
3331            materials_pal_buf,
3332            terrain_map_buf,
3333        }
3334    }
3335
3336    /// Read back the per-pixel world-t depth at window pixel `(x, y)`
3337    /// from the last rendered frame, for screen→world picking. Returns
3338    /// the distance `t` along the (normalised) view ray to the nearest
3339    /// scene-grid surface, so the host reconstructs the world hit as
3340    /// `cam.pos + t * normalize(ray_dir)`. `None` for out-of-bounds
3341    /// pixels, sky / no-hit (the `T_INF` sentinel), or when no scene
3342    /// frame has been rendered.
3343    ///
3344    /// The depth buffer is the SCENE pass's output (terrain + grids),
3345    /// untouched by the sprite pass (which reads it read-only), so a
3346    /// cursor sprite under the pointer does not occlude the pick.
3347    ///
3348    /// Synchronous: copies the depth buffer to a mapped staging buffer
3349    /// and blocks on `device.poll(Wait)`. Cheap enough for click-time
3350    /// picks; do not call it every frame.
3351    ///
3352    /// Requires the last frame to have written depth, which happens
3353    /// when sprites are present (`write_depth`). The pick demo always
3354    /// has a cursor sprite, so this holds.
3355    ///
3356    /// Compiles on wasm, but the wasm facade never calls it: WebGPU's
3357    /// `device.poll` doesn't block for the GPU, so the blocking
3358    /// `recv()` here would hang the single browser thread. Picking is
3359    /// deferred on the wasm GPU path (the facade returns `None`).
3360    #[must_use]
3361    pub fn read_depth_pixel(&self, x: u32, y: u32) -> Option<f32> {
3362        let dda = self.scene_dda.as_ref()?;
3363        let (w, h) = dda.storage_size;
3364        if x >= w || y >= h {
3365            return None;
3366        }
3367        let mut enc = self
3368            .device
3369            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
3370                label: Some("roxlap-gpu depth readback"),
3371            });
3372        let size = u64::from(w) * u64::from(h) * 4;
3373        enc.copy_buffer_to_buffer(&dda.depth_buffer, 0, &dda.depth_readback, 0, size);
3374        self.queue.submit(std::iter::once(enc.finish()));
3375
3376        let slice = dda.depth_readback.slice(..);
3377        let (tx, rx) = std::sync::mpsc::channel();
3378        slice.map_async(wgpu::MapMode::Read, move |r| {
3379            let _ = tx.send(r);
3380        });
3381        self.device.poll(wgpu::PollType::wait_indefinitely()).ok();
3382        rx.recv().ok()?.ok()?;
3383
3384        let t = {
3385            let data = slice.get_mapped_range();
3386            let idx = ((y * w + x) * 4) as usize;
3387            let bytes: [u8; 4] = data[idx..idx + 4].try_into().ok()?;
3388            f32::from_le_bytes(bytes)
3389        };
3390        dda.depth_readback.unmap();
3391
3392        // Reject sky / no-hit (T_INF == 1e30 in the shader) + non-finite.
3393        if !t.is_finite() || t >= 1.0e29 {
3394            return None;
3395        }
3396        Some(t)
3397    }
3398
3399    /// World-space view-ray direction (un-normalised) for window pixel
3400    /// `(x, y)`, under the GPU marcher's projection — the canonical GPU
3401    /// unproject, mirroring `scene_dda.wgsl`'s `render_scene`
3402    /// (vertical-FOV pinhole). Uses the last-rendered frame's target
3403    /// size + FOV; `None` before the first scene render. Pair with
3404    /// [`Self::read_depth_pixel`] for screen→world picking.
3405    #[must_use]
3406    pub fn pixel_ray(
3407        &self,
3408        right: [f64; 3],
3409        down: [f64; 3],
3410        forward: [f64; 3],
3411        x: f64,
3412        y: f64,
3413    ) -> Option<[f64; 3]> {
3414        let dda = self.scene_dda.as_ref()?;
3415        let (w, h) = dda.storage_size;
3416        if w == 0 || h == 0 || self.last_fov_y_rad <= 0.0 {
3417            return None;
3418        }
3419        Some(pinhole_pixel_ray(
3420            right,
3421            down,
3422            forward,
3423            x,
3424            y,
3425            f64::from(w),
3426            f64::from(h),
3427            f64::from(self.last_fov_y_rad),
3428        ))
3429    }
3430
3431    /// GPU.10.1 — upload a sprite model registry + its instances for
3432    /// the DDA path. An empty instance slice clears all sprites.
3433    pub fn set_sprite_instances(
3434        &mut self,
3435        registry: &sprite_model::SpriteModelRegistry,
3436        instances: &[sprite_model::SpriteInstance],
3437    ) {
3438        if instances.is_empty() {
3439            self.sprite_registry = None;
3440            return;
3441        }
3442        self.sprite_registry = Some(sprite_model::SpriteRegistryResident::upload(
3443            &self.device,
3444            registry,
3445            instances,
3446        ));
3447    }
3448
3449    /// Incrementally append sprite instances **without** rebuilding the
3450    /// registry — the cheap streaming-spawn path (asteroids, projectiles).
3451    /// Returns the index of the first appended instance (`[base, base+N)`).
3452    ///
3453    /// Every appended instance must reference a model already registered
3454    /// by the [`Self::set_sprite_instances`] that established residency
3455    /// (model volumes are not re-uploaded here — build the full
3456    /// `SpriteModelRegistry` up front and seed it once, then stream
3457    /// instances). If no registry is resident yet, this performs the
3458    /// initial full upload and returns `0`.
3459    ///
3460    /// Cost is amortised O(1) per instance (the GPU instance buffer grows
3461    /// by powers of two), versus the full volume + buffer rebuild of
3462    /// [`Self::set_sprite_instances`].
3463    pub fn append_sprite_instances(
3464        &mut self,
3465        registry: &sprite_model::SpriteModelRegistry,
3466        instances: &[sprite_model::SpriteInstance],
3467    ) -> u32 {
3468        match self.sprite_registry.as_mut() {
3469            Some(reg) => reg.append_instances(&self.device, registry, instances),
3470            None => {
3471                self.set_sprite_instances(registry, instances);
3472                0
3473            }
3474        }
3475    }
3476
3477    /// Remove the sprite instance at `index` (swap-remove, O(1), no model
3478    /// re-upload). Returns `Some(old_last)` if a different instance was
3479    /// moved into `index` to fill the hole — its index changed from
3480    /// `old_last` to `index`, so a caller tracking instance handles must
3481    /// update that one. Returns `None` if `index` was the last element /
3482    /// out of range, or no registry is resident.
3483    pub fn remove_sprite_instance(&mut self, index: usize) -> Option<usize> {
3484        self.sprite_registry
3485            .as_mut()
3486            .and_then(|reg| reg.remove_instance(index))
3487    }
3488
3489    /// Incrementally add a new model (its full LOD chain) to the resident
3490    /// sprite registry **without** re-uploading the existing models — the
3491    /// counterpart to [`Self::append_sprite_instances`] for streaming in
3492    /// new geometry (unique asteroids, generated meshes).
3493    ///
3494    /// Usage mirrors `update_sprite_model`: you own the
3495    /// [`SpriteModelRegistry`](sprite_model::SpriteModelRegistry), append
3496    /// the model with [`add_lod`](sprite_model::SpriteModelRegistry::add_lod)
3497    /// (or `add`), then pass the returned `chain_id` here to sync that one
3498    /// chain to the GPU. Afterwards [`Self::append_sprite_instances`] may
3499    /// reference it.
3500    ///
3501    /// If no registry is resident yet, this performs the initial full
3502    /// upload of `registry` (all its current models, zero instances) to
3503    /// establish residency — so call it for your *first* model; only
3504    /// chains appended *after* residency exists are added incrementally.
3505    ///
3506    /// Cost is amortised O(new model voxels): the shared volume buffers
3507    /// carry slack and bump-append, growing (and rebuilding once from the
3508    /// registry) only on overflow.
3509    /// Flush queued `write_buffer` uploads by submitting an empty command
3510    /// stream. wgpu stages `write_buffer` data and flushes it on the next
3511    /// `Queue::submit`; calling this between batches of uploads (e.g. a
3512    /// flipbook's frames in [`Self::add_sprite_model`]) recycles the device
3513    /// staging pool so a big one-shot batch can't exhaust it (which would
3514    /// then crash egui-wgpu's own `write_buffer`).
3515    pub fn flush_writes(&self) {
3516        self.queue.submit(std::iter::empty::<wgpu::CommandBuffer>());
3517    }
3518
3519    pub fn add_sprite_model(
3520        &mut self,
3521        registry: &sprite_model::SpriteModelRegistry,
3522        chain_id: u32,
3523    ) {
3524        match self.sprite_registry.as_mut() {
3525            Some(reg) => reg.add_model(&self.device, &self.queue, registry, chain_id),
3526            None => {
3527                self.sprite_registry = Some(sprite_model::SpriteRegistryResident::upload(
3528                    &self.device,
3529                    registry,
3530                    &[],
3531                ));
3532            }
3533        }
3534    }
3535
3536    /// Remove a model (tombstone its LOD chain) from the resident sprite
3537    /// registry — the counterpart to [`Self::add_sprite_model`]. Frees its
3538    /// `colors`/`dirs` space for reuse by a later add; the smaller
3539    /// `occupancy`/`color_offsets` holes are reclaimed by
3540    /// [`Self::compact_sprite_models`]. Entry / chain ids stay stable, so
3541    /// other models' `chain_id`s remain valid.
3542    ///
3543    /// Instances of the removed model keep their slots but draw as nothing
3544    /// until the caller drops them via [`Self::remove_sprite_instance`].
3545    /// No-op if `chain_id` is unknown / already removed / no registry.
3546    pub fn remove_sprite_model(&mut self, chain_id: u32) {
3547        if let Some(reg) = self.sprite_registry.as_mut() {
3548            reg.remove_model(chain_id);
3549        }
3550    }
3551
3552    /// Reclaim the holes left by [`Self::remove_sprite_model`] by rebuilding
3553    /// the shared volume buffers from the live models only. `registry` must
3554    /// be the resident one. Cost is O(live volume) — call it when
3555    /// [`Self::dead_sprite_model_count`] is high (e.g. exceeds the live
3556    /// count), not every frame. No-op if no registry is resident.
3557    pub fn compact_sprite_models(&mut self, registry: &sprite_model::SpriteModelRegistry) {
3558        if let Some(reg) = self.sprite_registry.as_mut() {
3559            reg.compact(&self.device, &self.queue, registry);
3560        }
3561    }
3562
3563    /// Number of live (non-removed) sprite models (0 if none uploaded).
3564    #[must_use]
3565    pub fn sprite_model_count(&self) -> usize {
3566        self.sprite_registry
3567            .as_ref()
3568            .map_or(0, sprite_model::SpriteRegistryResident::live_model_count)
3569    }
3570
3571    /// Number of removed-but-not-yet-compacted sprite models — the
3572    /// fragmentation signal for deciding when to call
3573    /// [`Self::compact_sprite_models`].
3574    #[must_use]
3575    pub fn dead_sprite_model_count(&self) -> usize {
3576        self.sprite_registry
3577            .as_ref()
3578            .map_or(0, sprite_model::SpriteRegistryResident::dead_model_count)
3579    }
3580
3581    /// Number of resident sprite instances (0 if none uploaded).
3582    #[must_use]
3583    pub fn sprite_instance_count(&self) -> usize {
3584        self.sprite_registry
3585            .as_ref()
3586            .map_or(0, sprite_model::SpriteRegistryResident::instance_count)
3587    }
3588
3589    /// Re-pose the already-resident sprite instances in place (no model
3590    /// volume re-upload) — the cheap per-frame path for animated KFA
3591    /// limbs. `instances` must match the last [`Self::set_sprite_instances`]
3592    /// in length + order. No-op if no sprite registry is resident.
3593    pub fn update_sprite_instance_transforms(
3594        &mut self,
3595        instances: &[sprite_model::SpriteInstance],
3596    ) {
3597        if let Some(reg) = self.sprite_registry.as_mut() {
3598            reg.update_transforms(instances);
3599        }
3600    }
3601
3602    /// GPU.12 incremental — re-upload only LOD chain `chain_id`'s entries
3603    /// after an in-place edit of `registry` (carve / recolour), without
3604    /// rebuilding the whole sprite registry. `registry` must be the one
3605    /// last passed to [`Self::set_sprite_instances`] with chain
3606    /// `chain_id` already edited. No-op if no registry is resident.
3607    pub fn update_sprite_model(
3608        &mut self,
3609        registry: &sprite_model::SpriteModelRegistry,
3610        chain_id: u32,
3611    ) {
3612        if let Some(reg) = self.sprite_registry.as_mut() {
3613            reg.update_model(&self.device, &self.queue, registry, chain_id);
3614        }
3615    }
3616
3617    /// VCL.2 — repoint sprite instance `index` at LOD chain `chain_id`
3618    /// (the per-frame flipbook step for animated voxel clips). `registry`
3619    /// is the resident one; `chain_id`'s volume must already be uploaded
3620    /// (e.g. a clip's frames registered via [`Self::add_sprite_model`]).
3621    /// CPU-side rewrite picked up by the next frame's cull — no volume
3622    /// re-upload. No-op if no registry is resident.
3623    pub fn set_sprite_instance_model(
3624        &mut self,
3625        registry: &sprite_model::SpriteModelRegistry,
3626        index: usize,
3627        chain_id: u32,
3628    ) {
3629        if let Some(reg) = self.sprite_registry.as_mut() {
3630            reg.set_instance_model(registry, index, chain_id);
3631        }
3632    }
3633
3634    /// Set the per-instance `kv6colmul[256]` lighting tables (voxlap's
3635    /// `update_reflects` output, e.g. via `roxlap_core::sprite::
3636    /// sprite_colmul`), in the same order/length as the last
3637    /// [`Self::set_sprite_instances`]. The GPU sprite pass modulates each
3638    /// voxel by its surface normal's entry — matching the CPU rasteriser.
3639    /// No-op if no sprite registry is resident.
3640    pub fn set_sprite_instance_colmul(&mut self, tables: &[[u64; 256]]) {
3641        if let Some(reg) = self.sprite_registry.as_mut() {
3642            reg.set_instance_colmul(tables);
3643        }
3644    }
3645
3646    /// GPU.10.4 — set the LOD pixel threshold: a sprite steps to the
3647    /// next mip once a mip-0 voxel would project below `px` screen
3648    /// pixels. `1.0` is the natural "no sub-pixel voxels" default;
3649    /// larger values force LOD in closer (useful for inspection).
3650    /// Clamped to ≥ 0.25.
3651    pub fn set_sprite_lod_px(&mut self, px: f32) {
3652        self.sprite_lod_px = px.max(0.25);
3653    }
3654
3655    /// GPU.11.1 — set the scene-grid LOD scan distance (world units).
3656    /// A chunk entered at world-t `t` is marched at mip
3657    /// `floor(log2(max(t, msd) / msd))`, clamped to its grid's mip
3658    /// ladder. `0` disables LOD (always mip-0). Larger values push
3659    /// the coarser mips farther out — the axis-aligned-mip-beams
3660    /// mitigation lever (GPU.11.2). Default 64 (matches CPU
3661    /// `mip_scan_dist`).
3662    pub fn set_scene_mip_scan_dist(&mut self, dist: f32) {
3663        self.scene_mip_scan_dist = dist.max(0.0);
3664    }
3665
3666    /// Set per-face grid side-shading — voxlap's
3667    /// `setsideshades(top, bot, left, right, up, down)`. Each value is
3668    /// subtracted (as a u8, matching the CPU `gcsub` high byte) from a
3669    /// hit voxel's brightness byte before shading, so the scene-DDA pass
3670    /// darkens grid faces the same way the CPU rasteriser does. `[0; 6]`
3671    /// disables it (the default). The hit face is taken from the DDA's
3672    /// last-stepped axis + ray direction.
3673    pub fn set_scene_side_shades(&mut self, s: [i8; 6]) {
3674        // Reinterpret each i8 as u8 (voxlap stamps `sxx` into gcsub's
3675        // high byte verbatim), then pack (top, bot, left, right) /
3676        // (up, down, 0, 0) for the two uniform vec4s.
3677        let v = |i: usize| i32::from(s[i] as u8);
3678        self.scene_side_shades = [[v(0), v(1), v(2), v(3)], [v(4), v(5), 0, 0]];
3679    }
3680
3681    /// GPU.10.1 — build the instanced model-DDA pipeline (one thread
3682    /// per pixel). Lazily invoked the first frame a registry is present.
3683    fn build_sprite_model_dda(&self) -> SpriteModelDdaResources {
3684        let shader = self
3685            .device
3686            .create_shader_module(wgpu::ShaderModuleDescriptor {
3687                label: Some("sprite_model_dda.wgsl"),
3688                source: wgpu::ShaderSource::Wgsl(
3689                    include_str!("../shaders/sprite_model_dda.wgsl").into(),
3690                ),
3691            });
3692        let bgl = self
3693            .device
3694            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
3695                label: Some("roxlap-gpu sprite_model_dda.bgl"),
3696                entries: &[
3697                    bgl_uniform_entry(0),
3698                    bgl_storage_entry(1, true),  // occupancy
3699                    bgl_storage_entry(2, true),  // colors
3700                    bgl_storage_entry(3, true),  // color_offsets
3701                    bgl_storage_entry(4, true),  // model_meta
3702                    bgl_storage_entry(5, true),  // instances
3703                    bgl_storage_entry(6, true),  // scene depth
3704                    bgl_storage_entry(7, false), // framebuffer (read-write buffer)
3705                    bgl_storage_entry(8, true),  // tile_ranges
3706                    bgl_storage_entry(9, true),  // tile_instances
3707                    bgl_storage_entry(10, true), // per-voxel dir
3708                    bgl_storage_entry(11, true), // per-instance kv6colmul
3709                    bgl_storage_entry(12, true), // TV — material palette
3710                    bgl_storage_entry(13, true), // TV.3 — per-voxel material id
3711                ],
3712            });
3713        let pl = self
3714            .device
3715            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
3716                label: Some("roxlap-gpu sprite_model_dda.layout"),
3717                bind_group_layouts: &[Some(&bgl)],
3718                immediate_size: 0,
3719            });
3720        let pipeline = self
3721            .device
3722            .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
3723                label: Some("roxlap-gpu sprite_model_dda.pipeline"),
3724                layout: Some(&pl),
3725                module: &shader,
3726                entry_point: Some("march"),
3727                compilation_options: wgpu::PipelineCompilationOptions::default(),
3728                cache: None,
3729            });
3730        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
3731            label: Some("roxlap-gpu sprite_model_dda.uniform"),
3732            size: std::mem::size_of::<SpriteModelUniform>() as u64,
3733            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
3734            mapped_at_creation: false,
3735        });
3736        // TV — material palette, seeded from the current renderer state so a
3737        // table defined before the sprite pass was built still takes effect.
3738        let materials_buf = {
3739            use wgpu::util::DeviceExt;
3740            self.device
3741                .create_buffer_init(&wgpu::util::BufferInitDescriptor {
3742                    label: Some("roxlap-gpu sprite_model_dda.materials"),
3743                    contents: bytemuck::cast_slice(self.sprite_materials.as_slice()),
3744                    usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
3745                })
3746        };
3747        SpriteModelDdaResources {
3748            bgl,
3749            pipeline,
3750            uniform_buf,
3751            materials_buf,
3752        }
3753    }
3754
3755    /// TV — set the global voxel-material palette for the GPU sprite pass.
3756    /// Mirrors the renderer's [`MaterialTable`](roxlap_formats::material::MaterialTable):
3757    /// every sprite/clip instance's `material` id indexes it for opacity +
3758    /// blend mode. Cheap (2 KB); call it whenever the palette changes (or
3759    /// each frame). While every material is opaque the shader stays on the
3760    /// unchanged first-hit path.
3761    pub fn set_sprite_materials(&mut self, table: &roxlap_formats::material::MaterialTable) {
3762        let (palette, any_translucent) = material_palette(table);
3763        self.sprite_materials = palette;
3764        self.sprite_has_translucent = any_translucent;
3765        if let Some(smd) = &self.sprite_model_dda {
3766            self.queue.write_buffer(
3767                &smd.materials_buf,
3768                0,
3769                bytemuck::cast_slice(self.sprite_materials.as_slice()),
3770            );
3771        }
3772    }
3773
3774    /// TV.6 — set the scene (terrain) material palette + colour→material map
3775    /// for the multi-grid scene pass. Matching-colour terrain voxels render
3776    /// translucent; an empty map / all-opaque palette renders unchanged. The
3777    /// map is capped at 256 rows (the fixed buffer size).
3778    pub fn set_scene_terrain_materials(
3779        &mut self,
3780        table: &roxlap_formats::material::MaterialTable,
3781        map: &[(u32, u8)],
3782    ) {
3783        let (palette, _) = material_palette(table);
3784        self.scene_materials = palette;
3785        self.scene_terrain_map = map
3786            .iter()
3787            .take(256)
3788            .map(|&(c, m)| [c & 0x00ff_ffff, u32::from(m)])
3789            .collect();
3790        self.scene_terrain_translucent = map.iter().any(|&(_, m)| !table.get(m).is_opaque());
3791        if let Some(dda) = &self.scene_dda {
3792            self.queue.write_buffer(
3793                &dda.materials_pal_buf,
3794                0,
3795                bytemuck::cast_slice(self.scene_materials.as_slice()),
3796            );
3797            if !self.scene_terrain_map.is_empty() {
3798                self.queue.write_buffer(
3799                    &dda.terrain_map_buf,
3800                    0,
3801                    bytemuck::cast_slice(&self.scene_terrain_map),
3802                );
3803            }
3804        }
3805    }
3806}
3807
3808/// GPU.11 — headless scene-DDA renderer for tests + offline visual
3809/// gates. Owns the `scene_dda.wgsl` compute pipeline with no surface
3810/// and no blit pass; renders a [`GpuSceneResident`] to an in-memory
3811/// RGBA framebuffer via texture readback. The per-substage visual
3812/// gate (render reference scenes, diff PPMs) and the GPU.11.1 mip
3813/// render-diff both ride on this.
3814pub struct HeadlessSceneRenderer {
3815    width: u32,
3816    height: u32,
3817    /// Framebuffer storage buffer (packed `rgba8unorm`, tight rows) —
3818    /// matches the buffer-output `scene_dda.wgsl` (see its note).
3819    framebuffer: wgpu::Buffer,
3820    depth_buffer: wgpu::Buffer,
3821    uniform_buf: wgpu::Buffer,
3822    _sky_texture: wgpu::Texture,
3823    sky_view: wgpu::TextureView,
3824    sky_sampler: wgpu::Sampler,
3825    bgl: wgpu::BindGroupLayout,
3826    pipeline: wgpu::ComputePipeline,
3827    readback: wgpu::Buffer,
3828    /// Per-face side-shades for the gate render (default none). Packed
3829    /// `[(top,bot,left,right), (up,down,_,_)]`; set via
3830    /// [`Self::set_side_shades`].
3831    side_shades: [[i32; 4]; 2],
3832}
3833
3834impl HeadlessSceneRenderer {
3835    /// Build the compute pipeline + output/readback resources for a
3836    /// `width × height` framebuffer. Validates `scene_dda.wgsl` and
3837    /// the [`scene::GridStaticMeta`] std430 layout at pipeline /
3838    /// bind-group time.
3839    #[must_use]
3840    pub fn new(device: &wgpu::Device, queue: &wgpu::Queue, width: u32, height: u32) -> Self {
3841        let framebuffer = device.create_buffer(&wgpu::BufferDescriptor {
3842            label: Some("roxlap-gpu headless.framebuffer"),
3843            size: u64::from(width) * u64::from(height) * 4,
3844            usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
3845            mapped_at_creation: false,
3846        });
3847
3848        let uniform_buf = device.create_buffer(&wgpu::BufferDescriptor {
3849            label: Some("roxlap-gpu headless.uniform"),
3850            size: std::mem::size_of::<SceneDdaUniform>() as u64,
3851            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
3852            mapped_at_creation: false,
3853        });
3854        let depth_buffer = device.create_buffer(&wgpu::BufferDescriptor {
3855            label: Some("roxlap-gpu headless.depth"),
3856            size: u64::from(width) * u64::from(height) * 4,
3857            usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
3858            mapped_at_creation: false,
3859        });
3860
3861        let default_sky_pixel = [120u8, 150, 220, 255];
3862        let (sky_texture, sky_view) = create_sky_texture(device, 1, 1, &default_sky_pixel);
3863        // Upload the default sky texel (create_sky_texture only allocates
3864        // — the texel must be written or the shader samples black, which
3865        // is why a grid-less headless render came back black).
3866        queue.write_texture(
3867            wgpu::TexelCopyTextureInfo {
3868                texture: &sky_texture,
3869                mip_level: 0,
3870                origin: wgpu::Origin3d::ZERO,
3871                aspect: wgpu::TextureAspect::All,
3872            },
3873            &default_sky_pixel,
3874            wgpu::TexelCopyBufferLayout {
3875                offset: 0,
3876                bytes_per_row: Some(4),
3877                rows_per_image: Some(1),
3878            },
3879            wgpu::Extent3d {
3880                width: 1,
3881                height: 1,
3882                depth_or_array_layers: 1,
3883            },
3884        );
3885        let sky_sampler = device.create_sampler(&wgpu::SamplerDescriptor {
3886            label: Some("roxlap-gpu headless.sky_sampler"),
3887            address_mode_u: wgpu::AddressMode::Repeat,
3888            address_mode_v: wgpu::AddressMode::Repeat,
3889            mag_filter: wgpu::FilterMode::Linear,
3890            min_filter: wgpu::FilterMode::Linear,
3891            ..Default::default()
3892        });
3893
3894        let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
3895            label: Some("scene_dda.wgsl (headless)"),
3896            source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/scene_dda.wgsl").into()),
3897        });
3898        let bgl = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
3899            label: Some("roxlap-gpu headless.bgl"),
3900            entries: &[
3901                bgl_uniform_entry(0),
3902                bgl_storage_entry(1, true),
3903                bgl_storage_entry(2, true),
3904                bgl_storage_entry(3, true),
3905                bgl_storage_entry(4, true),
3906                bgl_storage_entry(5, true),
3907                bgl_storage_entry(6, true),
3908                bgl_storage_entry(7, true),
3909                // Framebuffer storage buffer (read-write).
3910                bgl_storage_entry(8, false),
3911                wgpu::BindGroupLayoutEntry {
3912                    binding: 9,
3913                    visibility: wgpu::ShaderStages::COMPUTE,
3914                    ty: wgpu::BindingType::Texture {
3915                        sample_type: wgpu::TextureSampleType::Float { filterable: true },
3916                        view_dimension: wgpu::TextureViewDimension::D2,
3917                        multisampled: false,
3918                    },
3919                    count: None,
3920                },
3921                wgpu::BindGroupLayoutEntry {
3922                    binding: 10,
3923                    visibility: wgpu::ShaderStages::COMPUTE,
3924                    ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
3925                    count: None,
3926                },
3927                bgl_storage_entry(11, false),
3928                bgl_storage_entry(12, true),
3929                bgl_storage_entry(13, true),
3930                bgl_storage_entry(14, true),
3931                // Per-grid cameras (runtime-sized; one per grid).
3932                bgl_storage_entry(15, true),
3933                // TV.6 — material palette + terrain map (opaque dummies here).
3934                bgl_storage_entry(16, true),
3935                bgl_storage_entry(17, true),
3936            ],
3937        });
3938        let pl = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
3939            label: Some("roxlap-gpu headless.layout"),
3940            bind_group_layouts: &[Some(&bgl)],
3941            immediate_size: 0,
3942        });
3943        let pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
3944            label: Some("roxlap-gpu headless.pipeline"),
3945            layout: Some(&pl),
3946            module: &shader,
3947            entry_point: Some("render_scene"),
3948            compilation_options: wgpu::PipelineCompilationOptions::default(),
3949            cache: None,
3950        });
3951
3952        // Readback is a tight buffer-to-buffer copy (no 256-byte row
3953        // padding, unlike the old texture-to-buffer path).
3954        let readback = device.create_buffer(&wgpu::BufferDescriptor {
3955            label: Some("roxlap-gpu headless.readback"),
3956            size: u64::from(width) * u64::from(height) * 4,
3957            usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
3958            mapped_at_creation: false,
3959        });
3960
3961        Self {
3962            width,
3963            height,
3964            framebuffer,
3965            depth_buffer,
3966            uniform_buf,
3967            _sky_texture: sky_texture,
3968            sky_view,
3969            sky_sampler,
3970            bgl,
3971            pipeline,
3972            readback,
3973            side_shades: [[0; 4]; 2],
3974        }
3975    }
3976
3977    /// Set per-face side-shades for subsequent [`Self::render`] calls —
3978    /// voxlap `setsideshades(top, bot, left, right, up, down)`, each an
3979    /// i8 stamped as u8 (matching the engine path). Lets the gate test
3980    /// the GPU side-shade darkening.
3981    pub fn set_side_shades(&mut self, s: [i8; 6]) {
3982        let v = |i: usize| i32::from(s[i] as u8);
3983        self.side_shades = [[v(0), v(1), v(2), v(3)], [v(4), v(5), 0, 0]];
3984    }
3985
3986    /// Render `scene` from `cameras` (one per grid) and read the
3987    /// framebuffer back as `width*height` packed `0xAABBGGRR` pixels
3988    /// (R in the low byte). Fog is disabled. `mip_scan_dist` drives
3989    /// the GPU.11.1 scene-grid LOD (`0` = always mip-0). Blocks on
3990    /// readback.
3991    ///
3992    /// # Panics
3993    /// If `cameras.len() != scene.grid_count`.
3994    #[must_use]
3995    #[allow(clippy::too_many_arguments)]
3996    pub fn render(
3997        &self,
3998        device: &wgpu::Device,
3999        queue: &wgpu::Queue,
4000        scene: &GpuSceneResident,
4001        cameras: &[Camera],
4002        fov_y_rad: f32,
4003        max_outer_steps: u32,
4004        mip_scan_dist: f32,
4005    ) -> Vec<u32> {
4006        assert_eq!(
4007            cameras.len(),
4008            scene.grid_count as usize,
4009            "headless render: {} cameras for {} grids",
4010            cameras.len(),
4011            scene.grid_count,
4012        );
4013
4014        let cam_vec: Vec<SceneDdaPerGridCamera> = cameras
4015            .iter()
4016            .map(SceneDdaPerGridCamera::from_camera)
4017            .collect();
4018        let grid_cameras = upload_grid_cameras(device, &cam_vec);
4019        // TV.6 — opaque dummies for the material palette + terrain map
4020        // bindings (headless renders opaque-only: terrain_has_translucent=0).
4021        let (dummy_pal, dummy_map) = {
4022            use wgpu::util::DeviceExt;
4023            let pal: Vec<MaterialGpu> = vec![
4024                MaterialGpu {
4025                    alpha: 1.0,
4026                    mode: 0
4027                };
4028                256
4029            ];
4030            let p = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
4031                label: Some("roxlap-gpu headless.materials_pal"),
4032                contents: bytemuck::cast_slice(&pal),
4033                usage: wgpu::BufferUsages::STORAGE,
4034            });
4035            let m = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
4036                label: Some("roxlap-gpu headless.terrain_map"),
4037                contents: bytemuck::cast_slice(&[[0u32; 2]]),
4038                usage: wgpu::BufferUsages::STORAGE,
4039            });
4040            (p, m)
4041        };
4042        let uniform = SceneDdaUniform {
4043            fov_y_rad,
4044            grid_count: scene.grid_count,
4045            max_outer_steps,
4046            _pad0: 0,
4047            screen_size: [self.width, self.height],
4048            _pad1: [0; 2],
4049            // Fog off: near/far past any reachable t → factor 0.
4050            fog_color: [0.0, 0.0, 0.0, 1.0e29],
4051            fog_far: 1.0e30,
4052            write_depth: 0,
4053            occ_page_words: scene.occupancy_page_words,
4054            occ_num_pages: scene.occupancy_num_pages,
4055            mip_scan_dist,
4056            terrain_has_translucent: 0, // headless gate: opaque only
4057            terrain_map_count: 0,
4058            _pad4: 0,
4059            // Sky direction from the first grid camera (the world frame
4060            // in these tests); a default forward camera when there are
4061            // none (grid_count == 0) so the sky lookup stays valid.
4062            sky_cam: SceneDdaPerGridCamera::from_camera(&cameras.first().copied().unwrap_or(
4063                Camera {
4064                    position: [0.0; 3],
4065                    right: [1.0, 0.0, 0.0],
4066                    down: [0.0, 0.0, 1.0],
4067                    forward: [0.0, 1.0, 0.0],
4068                    fov_y_rad,
4069                },
4070            )),
4071            side_shades0: self.side_shades[0],
4072            side_shades1: self.side_shades[1],
4073        };
4074        queue.write_buffer(&self.uniform_buf, 0, bytemuck::bytes_of(&uniform));
4075
4076        let bg = device.create_bind_group(&wgpu::BindGroupDescriptor {
4077            label: Some("roxlap-gpu headless.bg"),
4078            layout: &self.bgl,
4079            entries: &[
4080                wgpu::BindGroupEntry {
4081                    binding: 0,
4082                    resource: self.uniform_buf.as_entire_binding(),
4083                },
4084                wgpu::BindGroupEntry {
4085                    binding: 1,
4086                    resource: scene.occupancy_pages[0].as_entire_binding(),
4087                },
4088                wgpu::BindGroupEntry {
4089                    binding: 2,
4090                    resource: scene.all_color_offsets.as_entire_binding(),
4091                },
4092                wgpu::BindGroupEntry {
4093                    binding: 3,
4094                    resource: scene.all_colors.as_entire_binding(),
4095                },
4096                wgpu::BindGroupEntry {
4097                    binding: 4,
4098                    resource: scene.all_chunk_colors_base.as_entire_binding(),
4099                },
4100                wgpu::BindGroupEntry {
4101                    binding: 5,
4102                    resource: scene.all_chunk_occupancy.as_entire_binding(),
4103                },
4104                wgpu::BindGroupEntry {
4105                    binding: 6,
4106                    resource: scene.grid_static_meta.as_entire_binding(),
4107                },
4108                wgpu::BindGroupEntry {
4109                    binding: 7,
4110                    resource: scene.all_slot_chunk_idx.as_entire_binding(),
4111                },
4112                wgpu::BindGroupEntry {
4113                    binding: 8,
4114                    resource: self.framebuffer.as_entire_binding(),
4115                },
4116                wgpu::BindGroupEntry {
4117                    binding: 9,
4118                    resource: wgpu::BindingResource::TextureView(&self.sky_view),
4119                },
4120                wgpu::BindGroupEntry {
4121                    binding: 10,
4122                    resource: wgpu::BindingResource::Sampler(&self.sky_sampler),
4123                },
4124                wgpu::BindGroupEntry {
4125                    binding: 11,
4126                    resource: self.depth_buffer.as_entire_binding(),
4127                },
4128                wgpu::BindGroupEntry {
4129                    binding: 12,
4130                    resource: scene.occupancy_pages[1].as_entire_binding(),
4131                },
4132                wgpu::BindGroupEntry {
4133                    binding: 13,
4134                    resource: scene.occupancy_pages[2].as_entire_binding(),
4135                },
4136                wgpu::BindGroupEntry {
4137                    binding: 14,
4138                    resource: scene.occupancy_pages[3].as_entire_binding(),
4139                },
4140                wgpu::BindGroupEntry {
4141                    binding: 15,
4142                    resource: grid_cameras.as_entire_binding(),
4143                },
4144                wgpu::BindGroupEntry {
4145                    binding: 16,
4146                    resource: dummy_pal.as_entire_binding(),
4147                },
4148                wgpu::BindGroupEntry {
4149                    binding: 17,
4150                    resource: dummy_map.as_entire_binding(),
4151                },
4152            ],
4153        });
4154
4155        let mut enc =
4156            device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
4157        {
4158            let mut pass = enc.begin_compute_pass(&wgpu::ComputePassDescriptor {
4159                label: Some("roxlap-gpu headless.pass"),
4160                timestamp_writes: None,
4161            });
4162            pass.set_pipeline(&self.pipeline);
4163            pass.set_bind_group(0, &bg, &[]);
4164            pass.dispatch_workgroups(self.width.div_ceil(8), self.height.div_ceil(8), 1);
4165        }
4166        enc.copy_buffer_to_buffer(
4167            &self.framebuffer,
4168            0,
4169            &self.readback,
4170            0,
4171            u64::from(self.width) * u64::from(self.height) * 4,
4172        );
4173        queue.submit(Some(enc.finish()));
4174
4175        let slice = self.readback.slice(..);
4176        let (tx, rx) = std::sync::mpsc::channel();
4177        slice.map_async(wgpu::MapMode::Read, move |r| {
4178            let _ = tx.send(r);
4179        });
4180        device.poll(wgpu::PollType::wait_indefinitely()).ok();
4181        rx.recv().expect("map_async channel").expect("map_async");
4182
4183        let data = slice.get_mapped_range();
4184        // Tight `width*height` packed pixels — the shader's
4185        // `pack4x8unorm(vec4(r,g,b,a))` already yields `0xAABBGGRR`
4186        // little-endian, so a straight u32 read reconstructs each pixel.
4187        let out: Vec<u32> = data
4188            .chunks_exact(4)
4189            .map(|px| u32::from_le_bytes([px[0], px[1], px[2], px[3]]))
4190            .collect();
4191        drop(data);
4192        self.readback.unmap();
4193        out
4194    }
4195}
4196
4197fn bgl_uniform_entry(binding: u32) -> wgpu::BindGroupLayoutEntry {
4198    wgpu::BindGroupLayoutEntry {
4199        binding,
4200        visibility: wgpu::ShaderStages::COMPUTE,
4201        ty: wgpu::BindingType::Buffer {
4202            ty: wgpu::BufferBindingType::Uniform,
4203            has_dynamic_offset: false,
4204            min_binding_size: None,
4205        },
4206        count: None,
4207    }
4208}
4209
4210fn bgl_storage_entry(binding: u32, read_only: bool) -> wgpu::BindGroupLayoutEntry {
4211    wgpu::BindGroupLayoutEntry {
4212        binding,
4213        visibility: wgpu::ShaderStages::COMPUTE,
4214        ty: wgpu::BindingType::Buffer {
4215            ty: wgpu::BufferBindingType::Storage { read_only },
4216            has_dynamic_offset: false,
4217            min_binding_size: None,
4218        },
4219        count: None,
4220    }
4221}
4222
4223/// Create a fresh sky panorama texture sized `width × height` with
4224/// the initial pixel data uploaded via `write_texture`. Used by
4225/// `GpuRenderer::new` (1×1 default) and `set_sky_panorama` (host-
4226/// supplied panorama).
4227fn create_sky_texture(
4228    device: &wgpu::Device,
4229    width: u32,
4230    height: u32,
4231    _initial_pixels: &[u8],
4232) -> (wgpu::Texture, wgpu::TextureView) {
4233    let tex = device.create_texture(&wgpu::TextureDescriptor {
4234        label: Some("roxlap-gpu sky_texture"),
4235        size: wgpu::Extent3d {
4236            width,
4237            height,
4238            depth_or_array_layers: 1,
4239        },
4240        mip_level_count: 1,
4241        sample_count: 1,
4242        dimension: wgpu::TextureDimension::D2,
4243        format: wgpu::TextureFormat::Rgba8Unorm,
4244        usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST,
4245        view_formats: &[],
4246    });
4247    let view = tex.create_view(&wgpu::TextureViewDescriptor::default());
4248    (tex, view)
4249}
4250
4251/// GPU.4 needs to upload a whole grid (~hundreds of MiB) as a few
4252/// storage buffers. wgpu's default `max_storage_buffer_binding_size`
4253/// is 128 MiB, which is just enough for the demo's 32×32 ground
4254/// occupancy (~128 MiB) but not the colour array. We request as
4255/// much as the adapter is willing to give — most desktop GPUs cap
4256/// individual storage buffers at 2-4 GiB; iGPUs often offer the
4257/// full system memory.
4258pub(crate) fn pick_required_limits(adapter_limits: &wgpu::Limits) -> wgpu::Limits {
4259    wgpu::Limits {
4260        max_storage_buffer_binding_size: adapter_limits.max_storage_buffer_binding_size,
4261        max_buffer_size: adapter_limits.max_buffer_size,
4262        // Occupancy paging adds up to MAX_OCC_PAGES-1 extra storage
4263        // bindings; with the scene's other buffers + the GPU.9 depth
4264        // buffer the scene_dda stage needs ~11. The default cap is 8.
4265        // Both NVK and lavapipe advertise ≫16, so request 16.
4266        max_storage_buffers_per_shader_stage: adapter_limits
4267            .max_storage_buffers_per_shader_stage
4268            .min(16),
4269        ..wgpu::Limits::default()
4270    }
4271}
4272
4273fn pick_present_mode(modes: &[wgpu::PresentMode]) -> wgpu::PresentMode {
4274    // Prefer Mailbox > Immediate > Fifo. Fifo is the universal
4275    // fallback and the only one Wayland-on-Mesa always offers.
4276    for &m in &[wgpu::PresentMode::Mailbox, wgpu::PresentMode::Immediate] {
4277        if modes.contains(&m) {
4278            return m;
4279        }
4280    }
4281    wgpu::PresentMode::Fifo
4282}
4283
4284/// World-space view-ray direction (un-normalised) for window pixel
4285/// `(x, y)` under a vertical-FOV pinhole — the projection
4286/// `scene_dda.wgsl`'s `render_scene` uses. Shared by
4287/// [`GpuRenderer::pixel_ray`]; standalone so it's unit-testable without
4288/// a device. `right`/`down`/`forward` are the camera basis.
4289#[must_use]
4290#[allow(clippy::too_many_arguments)]
4291pub fn pinhole_pixel_ray(
4292    right: [f64; 3],
4293    down: [f64; 3],
4294    forward: [f64; 3],
4295    x: f64,
4296    y: f64,
4297    w: f64,
4298    h: f64,
4299    fov_y_rad: f64,
4300) -> [f64; 3] {
4301    let half_h = (fov_y_rad * 0.5).tan();
4302    let half_w = half_h * (w / h);
4303    let ndc_x = (x + 0.5) / w * 2.0 - 1.0;
4304    let ndc_y_top = 1.0 - (y + 0.5) / h * 2.0;
4305    let (kx, ky) = (ndc_x * half_w, ndc_y_top * half_h);
4306    [
4307        forward[0] + kx * right[0] - ky * down[0],
4308        forward[1] + kx * right[1] - ky * down[1],
4309        forward[2] + kx * right[2] - ky * down[2],
4310    ]
4311}
4312
4313#[cfg(test)]
4314mod pixel_ray_tests {
4315    use super::pinhole_pixel_ray;
4316
4317    const RIGHT: [f64; 3] = [1.0, 0.0, 0.0];
4318    const DOWN: [f64; 3] = [0.0, 1.0, 0.0];
4319    const FWD: [f64; 3] = [0.0, 0.0, 1.0]; // voxlap z-down "look down"
4320
4321    // Frame centre (NDC 0,0) points straight along `forward`.
4322    #[test]
4323    fn centre_pixel_is_forward() {
4324        let d = pinhole_pixel_ray(
4325            RIGHT,
4326            DOWN,
4327            FWD,
4328            639.5,
4329            359.5,
4330            1280.0,
4331            720.0,
4332            60_f64.to_radians(),
4333        );
4334        assert!(
4335            d[0].abs() < 1e-9 && d[1].abs() < 1e-9,
4336            "centre ≈ forward, got {d:?}"
4337        );
4338        assert!((d[2] - 1.0).abs() < 1e-9);
4339    }
4340
4341    // Right edge pixel tilts +right by tan(hfov/2); the lateral
4342    // component equals half_w = tan(fov_y/2)*aspect at the very edge.
4343    #[test]
4344    fn right_edge_tilts_by_half_w() {
4345        let fov = 60_f64.to_radians();
4346        let d = pinhole_pixel_ray(RIGHT, DOWN, FWD, 1279.5, 359.5, 1280.0, 720.0, fov);
4347        let half_w = (fov * 0.5).tan() * (1280.0 / 720.0);
4348        assert!((d[0] - half_w).abs() < 1e-6, "x={}, half_w={half_w}", d[0]);
4349        assert!(d[0] > 0.0, "right edge tilts +right");
4350    }
4351
4352    /// Statically validate every WGSL shader with naga (the same
4353    /// front-end + validator wgpu runs at pipeline creation), so shader
4354    /// edits — e.g. the GPU.10 sprite lighting bindings — are caught in
4355    /// CI without needing a GPU device.
4356    #[test]
4357    fn wgsl_shaders_validate() {
4358        let shaders: &[(&str, &str)] = &[
4359            (
4360                "sprite_model_dda.wgsl",
4361                include_str!("../shaders/sprite_model_dda.wgsl"),
4362            ),
4363            ("scene_dda.wgsl", include_str!("../shaders/scene_dda.wgsl")),
4364            ("blit.wgsl", include_str!("../shaders/blit.wgsl")),
4365            ("chunk_dda.wgsl", include_str!("../shaders/chunk_dda.wgsl")),
4366            ("grid_dda.wgsl", include_str!("../shaders/grid_dda.wgsl")),
4367            (
4368                "scene_blit.wgsl",
4369                include_str!("../shaders/scene_blit.wgsl"),
4370            ),
4371            ("line.wgsl", include_str!("../shaders/line.wgsl")),
4372            ("image.wgsl", include_str!("../shaders/image.wgsl")),
4373        ];
4374        let mut validator = naga::valid::Validator::new(
4375            naga::valid::ValidationFlags::all(),
4376            naga::valid::Capabilities::all(),
4377        );
4378        for (name, src) in shaders {
4379            let module = naga::front::wgsl::parse_str(src).unwrap_or_else(|e| {
4380                panic!("{name}: WGSL parse failed:\n{}", e.emit_to_string(src))
4381            });
4382            validator
4383                .validate(&module)
4384                .unwrap_or_else(|e| panic!("{name}: WGSL validation failed: {e:?}"));
4385        }
4386    }
4387
4388    /// A 2×2 world quad centred straight ahead projects to vertices whose
4389    /// homogeneous `w` equals the camera-forward distance (so the shader's
4390    /// `clip = ndc·w` recovers perspective-correct UVs) and whose `depth`
4391    /// is the euclidean range. Verifies geometry without a GPU device.
4392    #[test]
4393    fn image_vertices_carry_forward_w_and_euclidean_depth() {
4394        let cam = crate::GpuLineCamera {
4395            pos: [0.0, 0.0, 0.0],
4396            right: [1.0, 0.0, 0.0],
4397            down: [0.0, 1.0, 0.0],
4398            forward: [0.0, 0.0, 1.0],
4399        };
4400        // Quad 10 units ahead (forward = +Z), spanning x∈[-1,1], y∈[-1,1].
4401        let quad = crate::GpuImageQuad {
4402            corners: [
4403                [-1.0, -1.0, 10.0], // TL
4404                [1.0, -1.0, 10.0],  // TR
4405                [-1.0, 1.0, 10.0],  // BL
4406                [1.0, 1.0, 10.0],   // BR
4407            ],
4408            image: 0,
4409            tint: [1.0, 1.0, 1.0, 1.0],
4410            depth_test: true,
4411            alpha_cutoff: 0.0,
4412        };
4413        let verts = crate::build_image_vertices(&cam, &quad, 800, 600, 60_f32.to_radians(), false);
4414        assert_eq!(verts.len(), 6, "two triangles, no near-clip");
4415        for v in &verts {
4416            assert!((v.w - 10.0).abs() < 1e-4, "w == forward distance");
4417            assert!(v.depth >= 10.0, "euclidean depth >= forward distance");
4418            assert_eq!(v.depth_test, 1.0);
4419        }
4420    }
4421}