Skip to main content

roxlap_gpu/
lib.rs

1//! WGPU-backed compute-shader renderer scaffold for the roxlap
2//! voxel engine. GPU.1 in `PORTING-GPU.md`.
3//!
4//! GPU.1's job: stand up the device + surface + swapchain on a
5//! host window (any [`raw-window-handle`](raw_window_handle)
6//! provider), present a clear-to-colour frame each render call,
7//! and give the host a one-call opt-in. No voxel marching yet — the
8//! [`examples/probe.rs`](../examples/probe.rs) standalone holds
9//! the empirical FPS baseline from GPU.0.
10//!
11//! Later sub-substages flesh `GpuRenderer::render` out: GPU.2
12//! uploads voxel data, GPU.3 dispatches the inner-DDA compute
13//! shader, GPU.4 layers in chunk skipping, GPU.5 plugs the renderer
14//! into `roxlap-scene::Scene`, …
15//!
16//! ## Host integration shape (GPU.1)
17//!
18//! ```no_run
19//! use std::sync::Arc;
20//! use roxlap_gpu::{GpuRenderer, GpuRendererSettings};
21//! # use winit::window::Window;
22//! # fn pick(w: Arc<Window>, size: (u32, u32)) -> Option<GpuRenderer> {
23//! match GpuRenderer::new_blocking(w, size, GpuRendererSettings::default()) {
24//!     Ok(r) => Some(r),
25//!     Err(e) => {
26//!         eprintln!("GPU init failed: {e}; falling back to CPU");
27//!         None
28//!     }
29//! }
30//! # }
31//! ```
32
33#![allow(clippy::must_use_candidate, clippy::too_many_lines)]
34
35pub mod camera;
36pub mod decompress;
37pub mod grid;
38// Headless rendering is a native-only test/bench aid: it blocks on
39// `pollster` + `device.poll(Wait)`, neither of which exists on wasm.
40#[cfg(not(target_arch = "wasm32"))]
41pub mod headless;
42pub mod resident;
43pub mod scene;
44pub mod sprite_model;
45
46pub use camera::Camera;
47pub use decompress::{decompress_chunk, ChunkUpload, BEDROCK_RGB, CHUNK_Z};
48pub use grid::{bounding_box_of, GpuGridResident, GridUpload};
49#[cfg(not(target_arch = "wasm32"))]
50pub use headless::HeadlessGpu;
51pub use resident::GpuChunkResident;
52pub use scene::{
53    GpuSceneResident, GridRuntimeTransform, GridStaticMeta, RefreshOutcome, SceneUpload,
54};
55pub use sprite_model::{
56    build_sprite_model, build_sprite_model_with_materials, sprite_model_from_clip_frame,
57    sprite_model_from_voxel_frame, SpriteInstance, SpriteInstanceTransform, SpriteModel,
58    SpriteModelRegistry, SpriteRegistryResident,
59};
60
61use std::sync::Arc;
62
63use bytemuck::{Pod, Zeroable};
64use raw_window_handle::{HasDisplayHandle, HasWindowHandle};
65
66/// Caller-controllable knobs for [`GpuRenderer::new`]. Defaults
67/// target "highest-performance GPU, prefer Mailbox/Immediate over
68/// vsync" — i.e. the same configuration the GPU.0 probe used to
69/// measure the FPS ceiling.
70#[derive(Debug, Clone, Copy)]
71pub struct GpuRendererSettings {
72    pub power_preference: PowerPreference,
73    /// Initial clear colour cycled by GPU.1's empty render path.
74    /// The voxel-rendering substages overwrite this entirely.
75    pub clear_colour: [f64; 3],
76    /// Prefer mailbox/immediate when offered; falls back to FIFO if
77    /// the surface only supports it (Wayland under Mesa often does).
78    pub uncapped_present: bool,
79}
80
81#[derive(Debug, Clone, Copy)]
82pub enum PowerPreference {
83    Low,
84    High,
85}
86
87impl Default for GpuRendererSettings {
88    fn default() -> Self {
89        Self {
90            power_preference: PowerPreference::High,
91            clear_colour: [0.06, 0.08, 0.12],
92            uncapped_present: true,
93        }
94    }
95}
96
97/// Errors `GpuRenderer::new` surfaces to the host. The host's
98/// expected flow is "try this, fall back to the CPU path on Err".
99#[derive(Debug)]
100pub enum GpuInitError {
101    CreateSurface(wgpu::CreateSurfaceError),
102    NoAdapter,
103    RequestDevice(wgpu::RequestDeviceError),
104}
105
106impl std::fmt::Display for GpuInitError {
107    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
108        match self {
109            Self::CreateSurface(e) => write!(f, "create_surface failed: {e}"),
110            Self::NoAdapter => write!(
111                f,
112                "no compatible adapter — does this system have a Vulkan/Metal/DX12 driver?"
113            ),
114            Self::RequestDevice(e) => write!(f, "request_device failed: {e}"),
115        }
116    }
117}
118
119impl std::error::Error for GpuInitError {
120    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
121        match self {
122            Self::CreateSurface(e) => Some(e),
123            Self::RequestDevice(e) => Some(e),
124            Self::NoAdapter => None,
125        }
126    }
127}
128
129impl From<wgpu::CreateSurfaceError> for GpuInitError {
130    fn from(value: wgpu::CreateSurfaceError) -> Self {
131        Self::CreateSurface(value)
132    }
133}
134
135impl From<wgpu::RequestDeviceError> for GpuInitError {
136    fn from(value: wgpu::RequestDeviceError) -> Self {
137        Self::RequestDevice(value)
138    }
139}
140
141/// WGPU-backed renderer. Owns the device, queue, and surface
142/// bound to the host's window. [`Self::render`] is the GPU.1
143/// clear-to-colour path; [`Self::render_chunk`] is GPU.3's
144/// single-chunk DDA marcher.
145///
146/// The window is consumed only at construction — `wgpu`'s
147/// `Surface<'static>` keeps its own `Arc` clone of the handle, so
148/// the renderer holds no window field of its own.
149/// A world-space line segment for [`GpuRenderer::draw_lines_deferred`].
150/// `color` is straight RGBA in `0..=1` (the alpha drives the over-blend);
151/// `width_px` is the screen-space thickness; `depth_test` occludes the
152/// segment behind nearer marched geometry.
153#[derive(Clone, Copy, Debug)]
154pub struct GpuLine {
155    pub a: [f32; 3],
156    pub b: [f32; 3],
157    pub color: [f32; 4],
158    pub width_px: f32,
159    pub depth_test: bool,
160}
161
162/// World camera basis for projecting [`GpuLine`] endpoints — the same
163/// pinhole the scene-DDA pass marches with (`right`/`down`/`forward`
164/// orthonormal, `pos` in world voxel units).
165#[derive(Clone, Copy, Debug)]
166pub struct GpuLineCamera {
167    pub pos: [f32; 3],
168    pub right: [f32; 3],
169    pub down: [f32; 3],
170    pub forward: [f32; 3],
171}
172
173/// Near plane (camera-forward distance) below which a [`GpuLine`] endpoint
174/// is clipped, so the pinhole divide stays finite.
175const LINE_NEAR_Z: f32 = 0.0625;
176/// Depth-test slack (euclidean world distance) so a line resting on the
177/// surface it traces doesn't z-fight the marched geometry.
178const LINE_DEPTH_BIAS: f32 = 0.5;
179
180/// One expanded-quad vertex (`build_line_vertices` output). `pos` is NDC;
181/// `depth` is the euclidean world distance of the source endpoint (the
182/// marcher's `best_t` metric); `depth_test` is `1.0`/`0.0`.
183#[repr(C)]
184#[derive(Clone, Copy, Pod, Zeroable)]
185struct LineVertex {
186    pos: [f32; 2],
187    depth: f32,
188    depth_test: f32,
189    color: [f32; 4],
190}
191
192/// `line.wgsl` / `image.wgsl` fragment uniform (std140; padded to 32 bytes
193/// so the uniform's struct stride is a 16-byte multiple).
194#[repr(C)]
195#[derive(Clone, Copy, Pod, Zeroable)]
196struct LineParams {
197    screen_w: u32,
198    screen_h: u32,
199    depth_bias: f32,
200    no_depth: u32,
201    /// 1 when the viewport flip is on. The depth buffer is written
202    /// unflipped (the blit mirrors at read time), but these passes flip the
203    /// vertex NDC X, so the fragment must mirror its depth lookup to match.
204    flip_x: u32,
205    _pad: [u32; 3],
206}
207
208/// Lazy-built debug-line pipeline (L3.2). The bind group is rebuilt each
209/// draw (it references the current `scene_dda.depth_buffer`, which the
210/// swapchain resize recreates); the pipeline / layout / uniform persist.
211struct LineResources {
212    pipeline: wgpu::RenderPipeline,
213    bgl: wgpu::BindGroupLayout,
214    uniform_buf: wgpu::Buffer,
215    /// 1-word stand-in bound when no scene depth exists (sprite-only /
216    /// empty scene); `no_depth = 1` keeps the shader from indexing it.
217    dummy_depth: wgpu::Buffer,
218}
219
220/// Project + expand world-space [`GpuLine`]s into screen-space quad
221/// vertices (6 per visible segment) for `line.wgsl`. Mirrors the
222/// scene-DDA pinhole (`forward + ndc_x·half_w·right − ndc_y·half_h·down`)
223/// so lines land on the marched geometry, carrying each endpoint's
224/// euclidean world distance as the depth-test key (= the marcher's
225/// `best_t`). Segments fully behind the near plane are dropped; the rest
226/// are clipped to it.
227fn build_line_vertices(
228    cam: &GpuLineCamera,
229    lines: &[GpuLine],
230    w: u32,
231    h: u32,
232    fov_y: f32,
233    flip_x: bool,
234) -> Vec<LineVertex> {
235    let aspect = w as f32 / h as f32;
236    let half_h = (fov_y * 0.5).tan();
237    let half_w = half_h * aspect;
238    let (wf, hf) = (w as f32, h as f32);
239
240    let cam_coords = |p: [f32; 3]| -> [f32; 3] {
241        let d = [p[0] - cam.pos[0], p[1] - cam.pos[1], p[2] - cam.pos[2]];
242        [
243            cam.right[0] * d[0] + cam.right[1] * d[1] + cam.right[2] * d[2],
244            cam.down[0] * d[0] + cam.down[1] * d[1] + cam.down[2] * d[2],
245            cam.forward[0] * d[0] + cam.forward[1] * d[1] + cam.forward[2] * d[2],
246        ]
247    };
248    // Camera-space point → (NDC xy, euclidean depth). NDC y is up (+1 top),
249    // matching WebGPU clip space; depth is the marcher's world-t metric.
250    let project = |q: [f32; 3]| -> ([f32; 2], f32) {
251        let inv = 1.0 / q[2];
252        let nx = q[0] * inv / half_w;
253        let ny = -q[1] * inv / half_h;
254        let depth = (q[0] * q[0] + q[1] * q[1] + q[2] * q[2]).sqrt();
255        ([nx, ny], depth)
256    };
257
258    let mut out = Vec::with_capacity(lines.len() * 6);
259    for line in lines {
260        let ca = cam_coords(line.a);
261        let cb = cam_coords(line.b);
262        let (cfa, cfb) = (ca[2], cb[2]);
263        if cfa < LINE_NEAR_Z && cfb < LINE_NEAR_Z {
264            continue;
265        }
266        // Near-clip in segment-parameter space on the forward component.
267        let (mut t0, mut t1) = (0.0f32, 1.0f32);
268        let dz = cfb - cfa;
269        if dz.abs() > f32::EPSILON {
270            let tn = (LINE_NEAR_Z - cfa) / dz;
271            if dz > 0.0 {
272                t0 = t0.max(tn);
273            } else {
274                t1 = t1.min(tn);
275            }
276        }
277        if t0 > t1 {
278            continue;
279        }
280        let lerp3 = |t: f32| {
281            [
282                ca[0] + (cb[0] - ca[0]) * t,
283                ca[1] + (cb[1] - ca[1]) * t,
284                ca[2] + (cb[2] - ca[2]) * t,
285            ]
286        };
287        let (n0, d0) = project(lerp3(t0));
288        let (n1, d1) = project(lerp3(t1));
289
290        // Expand in pixel space for a uniform screen-space thickness.
291        let to_px = |n: [f32; 2]| [(n[0] * 0.5 + 0.5) * wf, (0.5 - n[1] * 0.5) * hf];
292        let to_ndc = |p: [f32; 2]| [p[0] / wf * 2.0 - 1.0, 1.0 - p[1] / hf * 2.0];
293        let p0 = to_px(n0);
294        let p1 = to_px(n1);
295        let (dx, dy) = (p1[0] - p0[0], p1[1] - p0[1]);
296        let len = (dx * dx + dy * dy).sqrt().max(1e-6);
297        let half = line.width_px.max(1.0) * 0.5;
298        let (ex, ey) = (-dy / len * half, dx / len * half);
299
300        let c0a = to_ndc([p0[0] + ex, p0[1] + ey]);
301        let c0b = to_ndc([p0[0] - ex, p0[1] - ey]);
302        let c1a = to_ndc([p1[0] + ex, p1[1] + ey]);
303        let c1b = to_ndc([p1[0] - ex, p1[1] - ey]);
304        let dt = if line.depth_test { 1.0 } else { 0.0 };
305        // Mirror the overlay's NDC x to match the flipped scene blit.
306        let vert = |pos: [f32; 2], depth: f32| LineVertex {
307            pos: [if flip_x { -pos[0] } else { pos[0] }, pos[1]],
308            depth,
309            depth_test: dt,
310            color: line.color,
311        };
312        // Two triangles, cull disabled so winding is irrelevant.
313        out.push(vert(c0a, d0));
314        out.push(vert(c0b, d0));
315        out.push(vert(c1a, d1));
316        out.push(vert(c1a, d1));
317        out.push(vert(c0b, d0));
318        out.push(vert(c1b, d1));
319    }
320    out
321}
322
323/// A world-space 2D image-sprite quad for [`GpuRenderer::draw_images_deferred`].
324/// `corners` are the four world points `TL, TR, BL, BR` (UVs `(0,0) (1,0)
325/// (0,1) (1,1)`); `image` indexes a texture uploaded via
326/// [`GpuRenderer::upload_image`]; `tint` is straight RGBA in `0..=1`
327/// (multiplied into every texel); `depth_test` occludes the quad behind
328/// nearer marched geometry. The facade resolves orientation + back-face
329/// culling, so this is pure geometry.
330#[derive(Clone, Copy, Debug)]
331pub struct GpuImageQuad {
332    pub corners: [[f32; 3]; 4],
333    pub image: usize,
334    pub tint: [f32; 4],
335    pub depth_test: bool,
336    /// Texels with alpha below this (`0..=1`) are discarded in the FS.
337    /// `0.0` keeps the plain over-blend.
338    pub alpha_cutoff: f32,
339}
340
341/// One expanded textured-quad vertex (`build_image_vertices` output).
342/// `ndc` is the projected NDC xy; `w` is the source `forward` depth, fed
343/// back into a homogeneous clip position so the rasterizer interpolates
344/// `uv` perspective-correctly; `depth` is the euclidean world distance
345/// (the marcher's `best_t`) for the manual depth test.
346#[repr(C)]
347#[derive(Clone, Copy, Pod, Zeroable)]
348struct ImageVertex {
349    ndc: [f32; 2],
350    w: f32,
351    depth: f32,
352    depth_test: f32,
353    cutoff: f32,
354    uv: [f32; 2],
355    tint: [f32; 4],
356}
357
358/// Lazy-built image-sprite pipeline (mirrors [`LineResources`]). The
359/// per-draw bind group adds the quad's texture + a sampler to the line
360/// pass's uniform + scene-depth bindings.
361struct ImageResources {
362    pipeline: wgpu::RenderPipeline,
363    bgl: wgpu::BindGroupLayout,
364    uniform_buf: wgpu::Buffer,
365    dummy_depth: wgpu::Buffer,
366    sampler: wgpu::Sampler,
367}
368
369/// A retained image-sprite texture (uploaded via
370/// [`GpuRenderer::upload_image`], referenced by [`GpuImageQuad::image`]).
371struct ImageResident {
372    view: wgpu::TextureView,
373    // Held so the view stays valid + the texture shows in profiler dumps.
374    _texture: wgpu::Texture,
375}
376
377/// Camera-space textured-quad vertex (near-clip working set): the
378/// `(right, down, forward)` components + the texture `uv`.
379#[derive(Clone, Copy)]
380struct ImgClipV {
381    cam: [f32; 3],
382    uv: [f32; 2],
383}
384
385/// Clip a convex camera-space polygon against the near plane
386/// (`forward >= LINE_NEAR_Z`), interpolating UVs at each crossing.
387fn clip_near_image(poly: &[ImgClipV]) -> Vec<ImgClipV> {
388    let n = poly.len();
389    let mut out: Vec<ImgClipV> = Vec::with_capacity(n + 1);
390    for i in 0..n {
391        let cur = poly[i];
392        let prev = poly[(i + n - 1) % n];
393        let cur_in = cur.cam[2] >= LINE_NEAR_Z;
394        let prev_in = prev.cam[2] >= LINE_NEAR_Z;
395        if cur_in != prev_in {
396            let t = (LINE_NEAR_Z - prev.cam[2]) / (cur.cam[2] - prev.cam[2]);
397            out.push(ImgClipV {
398                cam: [
399                    prev.cam[0] + (cur.cam[0] - prev.cam[0]) * t,
400                    prev.cam[1] + (cur.cam[1] - prev.cam[1]) * t,
401                    LINE_NEAR_Z,
402                ],
403                uv: [
404                    prev.uv[0] + (cur.uv[0] - prev.uv[0]) * t,
405                    prev.uv[1] + (cur.uv[1] - prev.uv[1]) * t,
406                ],
407            });
408        }
409        if cur_in {
410            out.push(cur);
411        }
412    }
413    out
414}
415
416/// Project + near-clip a world-space [`GpuImageQuad`] into perspective-correct
417/// textured-quad vertices for `image.wgsl`. Mirrors the scene-DDA pinhole
418/// (the same one [`build_line_vertices`] uses), carrying each vertex's
419/// euclidean world distance as the depth-test key. Quads fully behind the
420/// near plane produce no vertices.
421fn build_image_vertices(
422    cam: &GpuLineCamera,
423    quad: &GpuImageQuad,
424    w: u32,
425    h: u32,
426    fov_y: f32,
427    flip_x: bool,
428) -> Vec<ImageVertex> {
429    let aspect = w as f32 / h as f32;
430    let half_h = (fov_y * 0.5).tan();
431    let half_w = half_h * aspect;
432    let dt = if quad.depth_test { 1.0 } else { 0.0 };
433
434    let cam_coords = |p: [f32; 3]| -> [f32; 3] {
435        let d = [p[0] - cam.pos[0], p[1] - cam.pos[1], p[2] - cam.pos[2]];
436        [
437            cam.right[0] * d[0] + cam.right[1] * d[1] + cam.right[2] * d[2],
438            cam.down[0] * d[0] + cam.down[1] * d[1] + cam.down[2] * d[2],
439            cam.forward[0] * d[0] + cam.forward[1] * d[1] + cam.forward[2] * d[2],
440        ]
441    };
442    let project = |v: ImgClipV| -> ImageVertex {
443        let (cx, cy, cz) = (v.cam[0], v.cam[1], v.cam[2]);
444        let nx = cx / (cz * half_w);
445        ImageVertex {
446            // Mirror NDC x to match the flipped scene blit.
447            ndc: [if flip_x { -nx } else { nx }, -cy / (cz * half_h)],
448            w: cz,
449            depth: (cx * cx + cy * cy + cz * cz).sqrt(),
450            depth_test: dt,
451            cutoff: quad.alpha_cutoff,
452            uv: v.uv,
453            tint: quad.tint,
454        }
455    };
456
457    // Per-corner UV: TL(0,0) TR(1,0) BL(0,1) BR(1,1).
458    let uvs = [[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0]];
459    let verts: Vec<ImgClipV> = quad
460        .corners
461        .iter()
462        .zip(uvs)
463        .map(|(c, uv)| ImgClipV {
464            cam: cam_coords(*c),
465            uv,
466        })
467        .collect();
468
469    let mut out = Vec::with_capacity(12);
470    for tri in [[0usize, 1, 2], [1, 3, 2]] {
471        let poly = [verts[tri[0]], verts[tri[1]], verts[tri[2]]];
472        let clipped = clip_near_image(&poly);
473        if clipped.len() < 3 {
474            continue;
475        }
476        for i in 1..clipped.len() - 1 {
477            out.push(project(clipped[0]));
478            out.push(project(clipped[i]));
479            out.push(project(clipped[i + 1]));
480        }
481    }
482    out
483}
484
485#[allow(clippy::struct_excessive_bools)] // independent per-frame flags, not a state enum
486pub struct GpuRenderer {
487    surface: wgpu::Surface<'static>,
488    surface_config: wgpu::SurfaceConfiguration,
489    device: wgpu::Device,
490    queue: wgpu::Queue,
491    adapter_info: String,
492    clear_colour: [f64; 3],
493    frame_count: u32,
494    /// Mirror the marched scene horizontally on present (the scene blit
495    /// samples `width-1-x`, and line/image overlays mirror their NDC x).
496    /// The egui pass is unaffected. See [`Self::set_flip_x`].
497    flip_x: bool,
498    /// Lazy-built on first [`Self::render_chunk`] call; rebuilt when
499    /// the swapchain resizes (storage texture must match).
500    chunk_dda: Option<ChunkDdaResources>,
501    /// Lazy-built on first [`Self::render_grid`] call; same resize
502    /// trigger as `chunk_dda`. The two paths share the same blit
503    /// pipeline structure but bind different storage layouts.
504    grid_dda: Option<GridDdaResources>,
505    /// Lazy-built on first [`Self::render_scene`] call. Holds the
506    /// multi-grid pipeline + per-grid camera uniforms.
507    scene_dda: Option<SceneDdaResources>,
508    /// TV.6 — global voxel-material palette mirrored to the scene pass (256
509    /// entries, default all-opaque), set via [`Self::set_scene_materials`].
510    scene_materials: Box<[MaterialGpu; 256]>,
511    /// TV.6 — terrain colour→material map (`[rgb, material_id]` rows) +
512    /// whether any mapped material is translucent (the shader gate).
513    scene_terrain_map: Vec<[u32; 2]>,
514    scene_terrain_translucent: bool,
515    /// Whether the *current* deferred frame ran a scene pass that wrote
516    /// `scene_dda.depth_buffer`. [`Self::render_scene`] sets it; the
517    /// color-only [`Self::render_clear_deferred`] clears it. Without this,
518    /// depth-tested overlays (`draw_lines_deferred` / `draw_image`) drawn
519    /// over an empty/cleared scene would test against the *previous*
520    /// scene's stale depth and clip incorrectly.
521    scene_depth_valid: bool,
522    /// GPU.8 — panoramic sky texture + sampler. Created at
523    /// `new` as a 1×1 mid-grey default; [`Self::set_sky_panorama`]
524    /// replaces it. The scene-DDA bind group references this each
525    /// frame.
526    sky_texture: wgpu::Texture,
527    sky_view: wgpu::TextureView,
528    sky_sampler: wgpu::Sampler,
529    /// GPU.8 fog state. `color` is BGRA-style premultiplied (each
530    /// channel in [0, 1]); `near` is the world-t distance at which
531    /// fog starts kicking in; `far` is the distance at which it's
532    /// fully opaque. The shader does
533    /// `mix(hit, fog, smoothstep(near, far, t))`.
534    fog_color: [f32; 3],
535    fog_near: f32,
536    fog_far: f32,
537    /// GPU.10 — sprites rendered as DDA-marched voxel models (the
538    /// precise path; the GPU.9 compute splatter it replaced was
539    /// retired in 10.5). Holds the concatenated model registry + the
540    /// per-frame instance array; set via [`Self::set_sprite_instances`].
541    sprite_registry: Option<sprite_model::SpriteRegistryResident>,
542    /// Lazy-built pipeline + uniform for the model-DDA pass.
543    sprite_model_dda: Option<SpriteModelDdaResources>,
544    /// TV — global voxel-material palette mirrored to the sprite pass (256
545    /// entries, default all-opaque), set via [`Self::set_sprite_materials`].
546    /// `sprite_has_translucent` gates the shader's accumulate path.
547    sprite_materials: Box<[MaterialGpu; 256]>,
548    sprite_has_translucent: bool,
549    /// GPU.10.4 — LOD aggressiveness: step a sprite to the next mip
550    /// once a mip-0 voxel projects below this many screen pixels.
551    /// Defaults to 4.0 (the empirical sweet spot); the host can tune
552    /// via [`Self::set_sprite_lod_px`].
553    sprite_lod_px: f32,
554    /// GPU.11.1 — scene-grid LOD scan distance (world units). A chunk
555    /// entered at world-t `t` is marched at the mip level
556    /// `floor(log2(max(t, msd) / msd))`, clamped to the grid's mip
557    /// ladder. `0` disables LOD (always mip-0). Tunable via
558    /// [`Self::set_scene_mip_scan_dist`] — the axis-aligned-mip-beams
559    /// mitigation (GPU.11.2) pushes it outward if banding appears.
560    scene_mip_scan_dist: f32,
561    /// Per-face grid side-shades (voxlap setsideshades), packed for the
562    /// scene-DDA uniform: `[0]=(top,bot,left,right)`, `[1]=(up,down,_,_)`.
563    /// Each is the u8 shade intensity. `[[0;4];2]` = no shading. Set via
564    /// [`Self::set_scene_side_shades`].
565    scene_side_shades: [[i32; 4]; 2],
566    /// Vertical FOV (radians) the last `render_scene` marched with —
567    /// cached so [`Self::pixel_ray`] reconstructs the matching view ray
568    /// for picking. `0` until the first scene render.
569    last_fov_y_rad: f32,
570    /// The acquired-but-not-yet-presented swapchain frame from the most
571    /// recent deferred render ([`Self::render_scene`] /
572    /// [`Self::render_clear_deferred`]). [`Self::present`] shows it as
573    /// is; [`Self::paint_egui`] overlays egui first. Lets a host slot a
574    /// UI pass between the marcher and present. `None` between present
575    /// and the next render.
576    pending_frame: Option<(wgpu::SurfaceTexture, wgpu::TextureView)>,
577    /// Lazy-built debug-line pipeline (L3.2) — built on the first
578    /// [`Self::draw_lines_deferred`] call.
579    line_resources: Option<LineResources>,
580    /// Persistent debug-line vertex buffer (L3.3) — grown on demand and
581    /// reused across frames so a per-frame overlay (hundreds of segments)
582    /// costs one `write_buffer`, not a fresh allocation. `line_vbuf_cap`
583    /// is its capacity in bytes.
584    line_vbuf: Option<wgpu::Buffer>,
585    line_vbuf_cap: u64,
586    /// Lazy-built image-sprite pipeline — built on the first
587    /// [`Self::draw_images_deferred`] call.
588    image_resources: Option<ImageResources>,
589    /// Persistent image-sprite vertex buffer, grown on demand and reused
590    /// across frames (like [`Self::line_vbuf`]).
591    image_vbuf: Option<wgpu::Buffer>,
592    image_vbuf_cap: u64,
593    /// Retained image-sprite textures, indexed by the id
594    /// [`Self::upload_image`] returns. A dropped slot is `None` and is
595    /// re-used by a later upload.
596    images: Vec<Option<ImageResident>>,
597    /// Lazy-built `egui-wgpu` paint pipeline; created on the first
598    /// [`Self::paint_egui`] call (`hud` feature).
599    #[cfg(feature = "hud")]
600    egui_renderer: Option<egui_wgpu::Renderer>,
601}
602
603/// Per-renderer chunk-DDA pipeline state. The compute shader writes
604/// into the storage texture; a fullscreen-triangle render pass
605/// nearest-neighbour blits it to the swapchain.
606struct ChunkDdaResources {
607    storage_size: (u32, u32),
608    storage_view: wgpu::TextureView,
609    uniform_buf: wgpu::Buffer,
610    bgl_dda: wgpu::BindGroupLayout,
611    pipeline_dda: wgpu::ComputePipeline,
612    blit_bg: wgpu::BindGroup,
613    pipeline_blit: wgpu::RenderPipeline,
614    // wgpu BindGroups internally Arc their resources, but we keep
615    // the handle so the sampler shows up in profiler dumps.
616    _sampler: wgpu::Sampler,
617}
618
619struct GridDdaResources {
620    storage_size: (u32, u32),
621    storage_view: wgpu::TextureView,
622    uniform_buf: wgpu::Buffer,
623    bgl_dda: wgpu::BindGroupLayout,
624    pipeline_dda: wgpu::ComputePipeline,
625    blit_bg: wgpu::BindGroup,
626    pipeline_blit: wgpu::RenderPipeline,
627    _sampler: wgpu::Sampler,
628}
629
630struct SceneDdaResources {
631    storage_size: (u32, u32),
632    /// Framebuffer as a packed-`rgba8unorm` storage **buffer** (row
633    /// stride = width), written by the scene + sprite compute passes
634    /// and read by the blit. A buffer (not a storage texture) dodges
635    /// Chrome-Dawn's tiled write-texture layout (which produced a
636    /// 128×256-tiled image); linear + explicit stride is portable.
637    framebuffer: wgpu::Buffer,
638    uniform_buf: wgpu::Buffer,
639    bgl_dda: wgpu::BindGroupLayout,
640    pipeline_dda: wgpu::ComputePipeline,
641    blit_bg: wgpu::BindGroup,
642    pipeline_blit: wgpu::RenderPipeline,
643    /// Blit uniform: `[width, height, flip_x, _pad]`. Retained so the flip
644    /// flag (offset 8) can be re-written per frame.
645    blit_dims: wgpu::Buffer,
646    /// GPU.9 — per-pixel world-t depth (f32 bits as u32), sized
647    /// `width * height * 4`. The scene pass writes it when sprites
648    /// are present; the sprite model-DDA pass reads + composites
649    /// against it.
650    depth_buffer: wgpu::Buffer,
651    /// Picking — a `COPY_DST | MAP_READ` staging copy of `depth_buffer`
652    /// so the host can read back the per-pixel world-t after a frame
653    /// (e.g. click → which voxel). Same size as `depth_buffer`.
654    depth_readback: wgpu::Buffer,
655    /// TV.6 — global voxel-material palette (256 `MaterialGpu`, binding 16),
656    /// seeded from `scene_materials`, rewritten by [`GpuRenderer::set_scene_materials`].
657    materials_pal_buf: wgpu::Buffer,
658    /// TV.6 — terrain colour→material map (`[rgb, material_id]` rows, binding
659    /// 17); ≥1 element (wgpu rejects a zero-sized storage binding).
660    terrain_map_buf: wgpu::Buffer,
661}
662
663/// GPU.10.0 — single-sprite model-DDA pipeline: one thread per pixel
664/// marches the model voxel volume and composites against the scene
665/// depth buffer.
666struct SpriteModelDdaResources {
667    bgl: wgpu::BindGroupLayout,
668    pipeline: wgpu::ComputePipeline,
669    uniform_buf: wgpu::Buffer,
670    /// TV — global voxel-material palette (256 `MaterialGpu`, binding 12),
671    /// seeded from the renderer's `sprite_materials` and rewritten by
672    /// [`GpuRenderer::set_sprite_materials`].
673    materials_buf: wgpu::Buffer,
674}
675
676/// Per-frame uniform for the model-DDA pass. Mirrors `Uniform` in
677/// `sprite_model_dda.wgsl` (std140). Per-model + per-instance data
678/// now live in storage buffers; this holds only the camera, fog, and
679/// instance count.
680#[repr(C)]
681#[derive(Clone, Copy, Pod, Zeroable)]
682struct SpriteModelUniform {
683    cam_pos: [f32; 3],
684    _p0: f32,
685    cam_right: [f32; 3],
686    _p1: f32,
687    cam_down: [f32; 3],
688    _p2: f32,
689    cam_forward: [f32; 3],
690    _p3: f32,
691    fog_color: [f32; 4],
692    screen_size: [u32; 2],
693    instance_count: u32,
694    fog_far: f32,
695    fov_y_rad: f32,
696    tiles_x: u32,
697    tile_size: u32,
698    /// TV — 1 if any palette material is translucent: gates the shader's
699    /// accumulate path. 0 ⇒ the unchanged nearest-hit opaque path.
700    has_translucent: u32,
701}
702
703/// GPU.10.3 — sprite screen-tile edge in pixels for instance binning.
704const SPRITE_TILE_SIZE: u32 = 16;
705
706/// One material in the GPU sprite material palette (binding 12). Mirrors
707/// `Mat` in `sprite_model_dda.wgsl` (std430, 8 bytes). TV stage.
708#[repr(C)]
709#[derive(Clone, Copy, Pod, Zeroable)]
710struct MaterialGpu {
711    /// Opacity / additive intensity, normalised to `0..=1`.
712    alpha: f32,
713    /// [`roxlap_formats::material::BlendMode`] discriminant.
714    mode: u32,
715}
716
717/// Convert the global [`MaterialTable`](roxlap_formats::material::MaterialTable)
718/// into the GPU palette + a flag of whether any material is non-opaque (the
719/// shader gate — an all-opaque palette runs the unchanged first-hit path).
720fn material_palette(
721    table: &roxlap_formats::material::MaterialTable,
722) -> (Box<[MaterialGpu; 256]>, bool) {
723    let mut out = Box::new(
724        [MaterialGpu {
725            alpha: 1.0,
726            mode: 0,
727        }; 256],
728    );
729    let mut any_translucent = false;
730    for (id, slot) in out.iter_mut().enumerate() {
731        let m = table.get(id as u8);
732        slot.alpha = f32::from(m.alpha) / 255.0;
733        slot.mode = u32::from(m.mode.as_u8());
734        if !m.is_opaque() {
735            any_translucent = true;
736        }
737    }
738    (out, any_translucent)
739}
740
741/// Build the per-grid camera storage buffer bound at `scene_dda.wgsl`
742/// binding 15 (read-only). One [`SceneDdaPerGridCamera`] per grid; the
743/// shader only indexes `0..grid_count`. An empty scene pads to one
744/// zeroed element (wgpu rejects a zero-sized storage binding). This
745/// replaces the old fixed `[…; 16]` uniform array, so a scene can hold
746/// any number of grids — the only ceiling is the device's storage size.
747fn upload_grid_cameras(device: &wgpu::Device, cams: &[SceneDdaPerGridCamera]) -> wgpu::Buffer {
748    use wgpu::util::DeviceExt;
749    let one = [SceneDdaPerGridCamera::zeroed()];
750    let src: &[SceneDdaPerGridCamera] = if cams.is_empty() { &one } else { cams };
751    device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
752        label: Some("roxlap-gpu scene_dda.grid_cameras"),
753        contents: bytemuck::cast_slice(src),
754        usage: wgpu::BufferUsages::STORAGE,
755    })
756}
757
758// The scene_dda bind group + layout wire occupancy pages 1..=3 at
759// bindings 12..=14 explicitly; keep that in lockstep with the page
760// count. Bump the bindings (here, in the WGSL, and in the bind
761// group) if MAX_OCC_PAGES changes.
762const _: () = assert!(scene::MAX_OCC_PAGES == 4);
763
764#[repr(C)]
765#[derive(Clone, Copy, Pod, Zeroable)]
766struct SceneDdaPerGridCamera {
767    pos: [f32; 3],
768    _pad0: f32,
769    right: [f32; 3],
770    _pad1: f32,
771    down: [f32; 3],
772    _pad2: f32,
773    forward: [f32; 3],
774    _pad3: f32,
775}
776
777impl SceneDdaPerGridCamera {
778    fn from_camera(c: &Camera) -> Self {
779        Self {
780            pos: c.position,
781            _pad0: 0.0,
782            right: c.right,
783            _pad1: 0.0,
784            down: c.down,
785            _pad2: 0.0,
786            forward: c.forward,
787            _pad3: 0.0,
788        }
789    }
790}
791
792#[repr(C)]
793#[derive(Clone, Copy, Pod, Zeroable)]
794struct SceneDdaUniform {
795    fov_y_rad: f32,
796    grid_count: u32,
797    max_outer_steps: u32,
798    _pad0: u32,
799    screen_size: [u32; 2],
800    _pad1: [u32; 2],
801    /// GPU.8 — `[r, g, b, fog_near]`. The `near` distance is packed
802    /// into the colour's alpha channel to keep std140 alignment
803    /// tidy (a bare `f32` after the `vec4` would force extra pads).
804    fog_color: [f32; 4],
805    fog_far: f32,
806    /// GPU.9 — `1` when the sprite pass is active (scene pass then
807    /// records `best_t` into the depth buffer), `0` otherwise.
808    write_depth: u32,
809    /// Occupancy paging: words per storage page (see
810    /// `scene::split_occupancy_pages`). Only consulted by the shader
811    /// when `occ_num_pages > 1`.
812    occ_page_words: u32,
813    /// Number of real occupancy pages (1 on multi-GiB GPUs → the
814    /// shader takes a branch-free single-page read).
815    occ_num_pages: u32,
816    /// GPU.11.1 — scene-grid LOD scan distance (world units). A chunk
817    /// entered at world-t `t` marches at mip
818    /// `floor(log2(max(t, msd) / msd))`, clamped to the grid's mip
819    /// count. `0` disables LOD (always mip-0).
820    mip_scan_dist: f32,
821    /// TV.6 — `1` if any terrain material is translucent (gates the
822    /// accumulate path; `0` ⇒ unchanged opaque first-hit march).
823    terrain_has_translucent: u32,
824    /// TV.6 — number of `(rgb, material_id)` entries in the terrain map.
825    terrain_map_count: u32,
826    _pad4: u32,
827    /// World camera used only to derive the per-pixel sky direction —
828    /// always valid, so a `grid_count == 0` (sprite-only / empty) scene
829    /// still paints a proper sky instead of a degenerate `(0,0,1)`
830    /// (whose `atan2(0,0)` sky lookup samples black).
831    sky_cam: SceneDdaPerGridCamera,
832    /// Per-face side-shade intensities (voxlap setsideshades), each the
833    /// u8 shade subtracted from a voxel's brightness byte at a hit.
834    /// `side_shades0 = (top, bot, left, right)`,
835    /// `side_shades1 = (up, down, _, _)`. All-zero = no shading.
836    side_shades0: [i32; 4],
837    side_shades1: [i32; 4],
838}
839
840#[repr(C)]
841#[derive(Clone, Copy, Pod, Zeroable)]
842struct GridDdaUniform {
843    camera_pos: [f32; 3],
844    _pad0: f32,
845    camera_right: [f32; 3],
846    _pad1: f32,
847    camera_down: [f32; 3],
848    _pad2: f32,
849    camera_forward: [f32; 3],
850    fov_y_rad: f32,
851    screen_size: [u32; 2],
852    vsid: u32,
853    max_outer_steps: u32,
854    chunks_dims: [u32; 3],
855    _pad3: u32,
856    origin_chunk: [i32; 3],
857    _pad4: u32,
858}
859
860#[repr(C)]
861#[derive(Clone, Copy, Pod, Zeroable)]
862struct ChunkDdaUniform {
863    camera_pos: [f32; 3],
864    _pad0: f32,
865    camera_right: [f32; 3],
866    _pad1: f32,
867    camera_down: [f32; 3],
868    _pad2: f32,
869    camera_forward: [f32; 3],
870    fov_y_rad: f32,
871    screen_size: [u32; 2],
872    vsid: u32,
873    max_scan_dist: u32,
874}
875
876impl GpuRenderer {
877    /// Stand up the device + surface + swapchain on `window`. Async
878    /// because `wgpu::Adapter`/`Device` requests are.
879    ///
880    /// `window` is any [`raw-window-handle`] provider (winit, SDL,
881    /// GLFW, …) wrapped in an `Arc`; `size` is its initial physical
882    /// framebuffer size in pixels — passed explicitly so the renderer
883    /// stays decoupled from any one windowing library's size API.
884    ///
885    /// [`raw-window-handle`]: raw_window_handle
886    ///
887    /// # Errors
888    /// Returns [`GpuInitError`] if surface creation, adapter
889    /// selection, or device request fails. Hosts treat any error as
890    /// "fall back to the CPU path".
891    pub async fn new<W>(
892        window: Arc<W>,
893        size: (u32, u32),
894        settings: GpuRendererSettings,
895    ) -> Result<Self, GpuInitError>
896    where
897        W: HasWindowHandle + HasDisplayHandle + Send + Sync + 'static,
898    {
899        let instance = wgpu::Instance::new(wgpu::InstanceDescriptor::new_without_display_handle());
900        let surface = instance.create_surface(window.clone())?;
901        let adapter = Self::request_adapter(&instance, Some(&surface), settings).await?;
902        let (device, queue) = Self::request_device(&adapter).await?;
903        Ok(Self::finish_init(
904            &adapter, device, queue, surface, size, settings,
905        ))
906    }
907
908    /// wasm/WebGPU: build the renderer against an HTML `canvas`. No
909    /// `Send + Sync` bound — wgpu's surface/device/queue are `!Send` on
910    /// the `+atomics` shared-memory wasm build, and the browser host is
911    /// single-threaded (`Rc<RefCell<…>>`). The native generic-`W` entry
912    /// (which carries the bound) isn't reachable on wasm.
913    ///
914    /// Probes for an adapter **before** `create_surface`: on wasm,
915    /// creating the surface calls `canvas.getContext("webgpu")`, which
916    /// permanently locks the canvas's context type. If we bound it and
917    /// then found no adapter, a CPU/WebGL2 fallback on the *same* canvas
918    /// (the facade clones the handle, but it's the same DOM element)
919    /// would fail with "no webgl2 context". Probing first leaves the
920    /// canvas pristine when WebGPU is unavailable.
921    ///
922    /// # Errors
923    /// See [`Self::new`].
924    #[cfg(target_arch = "wasm32")]
925    pub async fn new_from_canvas(
926        canvas: web_sys::HtmlCanvasElement,
927        size: (u32, u32),
928        settings: GpuRendererSettings,
929    ) -> Result<Self, GpuInitError> {
930        let instance = wgpu::Instance::new(wgpu::InstanceDescriptor::new_without_display_handle());
931        // Probe adapter AND device before binding the canvas — both
932        // `requestAdapter` and `requestDevice` can fail on wasm, and
933        // `create_surface` permanently locks the canvas to a WebGPU
934        // context. Creating the surface last keeps the canvas pristine
935        // for the CPU/WebGL2 fallback on any GPU-init failure.
936        let adapter = Self::request_adapter(&instance, None, settings).await?;
937        let (device, queue) = Self::request_device(&adapter).await?;
938        let surface = instance.create_surface(wgpu::SurfaceTarget::Canvas(canvas))?;
939        Ok(Self::finish_init(
940            &adapter, device, queue, surface, size, settings,
941        ))
942    }
943
944    /// Pick a GPU adapter at the settings' power preference. `None`
945    /// `compatible_surface` is used on the wasm canvas path so the probe
946    /// doesn't bind the canvas's context (see [`Self::new_from_canvas`]);
947    /// WebGPU exposes a single surface-independent adapter, so this is
948    /// safe there.
949    async fn request_adapter(
950        instance: &wgpu::Instance,
951        compatible_surface: Option<&wgpu::Surface<'static>>,
952        settings: GpuRendererSettings,
953    ) -> Result<wgpu::Adapter, GpuInitError> {
954        let power_preference = match settings.power_preference {
955            PowerPreference::Low => wgpu::PowerPreference::LowPower,
956            PowerPreference::High => wgpu::PowerPreference::HighPerformance,
957        };
958        instance
959            .request_adapter(&wgpu::RequestAdapterOptions {
960                power_preference,
961                compatible_surface,
962                force_fallback_adapter: false,
963            })
964            .await
965            .map_err(|_| GpuInitError::NoAdapter)
966    }
967
968    /// Request the device + queue from `adapter`. Pulled out of
969    /// [`Self::finish_init`] so the wasm canvas path can validate the
970    /// device **before** `create_surface` binds the canvas's WebGPU
971    /// context — if the device request fails (e.g. a browser that
972    /// rejects a wgpu-sent limit), the canvas stays pristine for the
973    /// CPU/WebGL2 fallback instead of being poisoned.
974    async fn request_device(
975        adapter: &wgpu::Adapter,
976    ) -> Result<(wgpu::Device, wgpu::Queue), GpuInitError> {
977        Ok(adapter
978            .request_device(&wgpu::DeviceDescriptor {
979                label: Some("roxlap-gpu device"),
980                required_features: wgpu::Features::empty(),
981                required_limits: pick_required_limits(&adapter.limits()),
982                experimental_features: wgpu::ExperimentalFeatures::disabled(),
983                memory_hints: wgpu::MemoryHints::default(),
984                trace: wgpu::Trace::Off,
985            })
986            .await?)
987    }
988
989    /// Shared swapchain → sky/sampler setup, run after the adapter +
990    /// device + surface exist (the surface comes from a window handle on
991    /// native, or an HTML canvas on wasm — created last on wasm so a
992    /// failed device request never touches the canvas).
993    fn finish_init(
994        adapter: &wgpu::Adapter,
995        device: wgpu::Device,
996        queue: wgpu::Queue,
997        surface: wgpu::Surface<'static>,
998        size: (u32, u32),
999        settings: GpuRendererSettings,
1000    ) -> Self {
1001        let info = adapter.get_info();
1002        let adapter_info = format!(
1003            "{name} ({backend:?}, {device_type:?})",
1004            name = info.name,
1005            backend = info.backend,
1006            device_type = info.device_type,
1007        );
1008
1009        let caps = surface.get_capabilities(adapter);
1010        // Pick a NON-sRGB, 8-bit swapchain format. Voxlap colours are
1011        // already sRGB-encoded (the slab bytes are display-ready,
1012        // matching what the CPU softbuffer path writes straight to the
1013        // framebuffer with no conversion); an sRGB swapchain would
1014        // re-apply the gamma curve, washing the look out. We also
1015        // *prefer 8-bit BGRA/RGBA* over any other non-sRGB format: some
1016        // adapters (e.g. NVK) advertise a 16-bit-unorm format first,
1017        // and wgpu 29 gates `create_view` on 16-bit-norm formats behind
1018        // the `TEXTURE_FORMAT_16BIT_NORM` device feature (which we don't
1019        // enable, to stay WebGPU-portable). Falls back to the first
1020        // non-sRGB format, then `caps.formats[0]`.
1021        let surface_format = caps
1022            .formats
1023            .iter()
1024            .copied()
1025            .find(|f| {
1026                matches!(
1027                    f,
1028                    wgpu::TextureFormat::Bgra8Unorm | wgpu::TextureFormat::Rgba8Unorm
1029                )
1030            })
1031            .or_else(|| caps.formats.iter().copied().find(|f| !f.is_srgb()))
1032            .unwrap_or(caps.formats[0]);
1033        let present_mode = if settings.uncapped_present {
1034            pick_present_mode(&caps.present_modes)
1035        } else {
1036            wgpu::PresentMode::Fifo
1037        };
1038        // GPU.11.2 — surface the present mode: `Fifo` is vsync-capped
1039        // (FPS pinned to refresh rate → compute optimisations like the
1040        // mip LOD won't show up in the FPS counter). Mailbox/Immediate
1041        // are uncapped. Wayland under Mesa frequently offers only Fifo.
1042        eprintln!(
1043            "roxlap-gpu: present mode = {present_mode:?} (available: {:?})",
1044            caps.present_modes,
1045        );
1046        let (init_w, init_h) = size;
1047        let surface_config = wgpu::SurfaceConfiguration {
1048            usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
1049            format: surface_format,
1050            width: init_w.max(1),
1051            height: init_h.max(1),
1052            present_mode,
1053            alpha_mode: caps.alpha_modes[0],
1054            view_formats: vec![],
1055            desired_maximum_frame_latency: 2,
1056        };
1057        surface.configure(&device, &surface_config);
1058
1059        // GPU.8 default sky: a 1×1 mid-grey texture. Hosts replace
1060        // it via `set_sky_panorama` with a real equirectangular
1061        // panorama; the default stops the shader sampling
1062        // uninitialised memory before that happens.
1063        let default_sky_pixel = [0x80u8, 0x80, 0x80, 0xff];
1064        let (sky_texture, sky_view) = create_sky_texture(&device, 1, 1, &default_sky_pixel);
1065        queue.write_texture(
1066            wgpu::TexelCopyTextureInfo {
1067                texture: &sky_texture,
1068                mip_level: 0,
1069                origin: wgpu::Origin3d::ZERO,
1070                aspect: wgpu::TextureAspect::All,
1071            },
1072            &default_sky_pixel,
1073            wgpu::TexelCopyBufferLayout {
1074                offset: 0,
1075                bytes_per_row: Some(4),
1076                rows_per_image: Some(1),
1077            },
1078            wgpu::Extent3d {
1079                width: 1,
1080                height: 1,
1081                depth_or_array_layers: 1,
1082            },
1083        );
1084        let sky_sampler = device.create_sampler(&wgpu::SamplerDescriptor {
1085            label: Some("roxlap-gpu sky_sampler"),
1086            // Voxlap-convention panorama: u = elevation [0, 1]
1087            // (Repeat is a no-op since values don't go outside),
1088            // v = azimuth (wraps 360° — Repeat is required).
1089            address_mode_u: wgpu::AddressMode::Repeat,
1090            address_mode_v: wgpu::AddressMode::Repeat,
1091            address_mode_w: wgpu::AddressMode::ClampToEdge,
1092            mag_filter: wgpu::FilterMode::Linear,
1093            min_filter: wgpu::FilterMode::Linear,
1094            mipmap_filter: wgpu::MipmapFilterMode::Nearest,
1095            ..Default::default()
1096        });
1097
1098        Self {
1099            surface,
1100            surface_config,
1101            device,
1102            queue,
1103            adapter_info,
1104            clear_colour: settings.clear_colour,
1105            frame_count: 0,
1106            flip_x: false,
1107            chunk_dda: None,
1108            grid_dda: None,
1109            scene_dda: None,
1110            scene_materials: Box::new(
1111                [MaterialGpu {
1112                    alpha: 1.0,
1113                    mode: 0,
1114                }; 256],
1115            ),
1116            scene_terrain_map: Vec::new(),
1117            scene_terrain_translucent: false,
1118            scene_depth_valid: false,
1119            sky_texture,
1120            sky_view,
1121            sky_sampler,
1122            // Fog disabled by default — voxlap's CPU rasterizer
1123            // also runs without fog in the scene-demo, so matching
1124            // it means no GPU fog out of the box. Hosts can opt in
1125            // via `set_fog` (e.g. for atmospheric far-LOD masking).
1126            fog_color: [0.66, 0.74, 0.88],
1127            fog_near: 0.0,
1128            fog_far: 1.0e30,
1129            sprite_registry: None,
1130            sprite_model_dda: None,
1131            sprite_materials: Box::new(
1132                [MaterialGpu {
1133                    alpha: 1.0,
1134                    mode: 0,
1135                }; 256],
1136            ),
1137            sprite_has_translucent: false,
1138            // GPU.10.4 — default LOD threshold: step to a coarser mip
1139            // once a voxel projects below 4 px. Empirically the best
1140            // quality/cost tradeoff; the host can override.
1141            sprite_lod_px: 4.0,
1142            // GPU.11.1 — matches the CPU demo's mip_scan_dist=64.
1143            scene_mip_scan_dist: 64.0,
1144            scene_side_shades: [[0; 4]; 2],
1145            last_fov_y_rad: 0.0,
1146            pending_frame: None,
1147            line_resources: None,
1148            line_vbuf: None,
1149            line_vbuf_cap: 0,
1150            image_resources: None,
1151            image_vbuf: None,
1152            image_vbuf_cap: 0,
1153            images: Vec::new(),
1154            #[cfg(feature = "hud")]
1155            egui_renderer: None,
1156        }
1157    }
1158
1159    /// Synchronous wrapper for hosts that don't have an async
1160    /// runtime. Internally `pollster::block_on`s [`Self::new`].
1161    ///
1162    /// # Errors
1163    /// See [`Self::new`].
1164    #[cfg(not(target_arch = "wasm32"))]
1165    pub fn new_blocking<W>(
1166        window: Arc<W>,
1167        size: (u32, u32),
1168        settings: GpuRendererSettings,
1169    ) -> Result<Self, GpuInitError>
1170    where
1171        W: HasWindowHandle + HasDisplayHandle + Send + Sync + 'static,
1172    {
1173        pollster::block_on(Self::new(window, size, settings))
1174    }
1175
1176    /// Human-readable adapter description — name + backend +
1177    /// device type. The demo host prints this in the title bar.
1178    pub fn adapter_info(&self) -> &str {
1179        &self.adapter_info
1180    }
1181
1182    /// Borrow the underlying wgpu device — hosts use this to build
1183    /// chunk uploads (`GpuChunkResident::upload(gpu.device(), …)`).
1184    pub fn device(&self) -> &wgpu::Device {
1185        &self.device
1186    }
1187
1188    /// Borrow the wgpu queue — hosts use this for read-back paths
1189    /// (`GpuChunkResident::read_voxel_blocking(gpu.device(), gpu.queue(), …)`).
1190    pub fn queue(&self) -> &wgpu::Queue {
1191        &self.queue
1192    }
1193
1194    /// GPU.8 — upload an equirectangular panorama as the scene's
1195    /// sky texture. `rgba` is row-major, `width × height` pixels,
1196    /// 4 bytes per pixel (R, G, B, A). The shader samples it with
1197    /// `u = atan2(dir.x, dir.y) / (2π) + 0.5` (azimuth) and
1198    /// `v = acos(-dir.z) / π` (elevation), matching standard
1199    /// equirectangular layout (top of image = zenith for voxlap's
1200    /// `+z = down` basis).
1201    /// Mirror the marched scene (and its line/image overlays) horizontally
1202    /// on present, leaving the egui overlay upright. See [`Self::flip_x`].
1203    pub fn set_flip_x(&mut self, flip: bool) {
1204        self.flip_x = flip;
1205    }
1206
1207    ///
1208    /// # Panics
1209    /// If `rgba.len() != (width * height * 4) as usize`.
1210    pub fn set_sky_panorama(&mut self, rgba: &[u8], width: u32, height: u32) {
1211        assert_eq!(
1212            rgba.len(),
1213            (width as usize) * (height as usize) * 4,
1214            "set_sky_panorama: expected w*h*4 bytes, got {}",
1215            rgba.len(),
1216        );
1217        let (tex, view) = create_sky_texture(&self.device, width, height, rgba);
1218        // Upload pixel data via `queue.write_texture` so we don't
1219        // have to map the buffer manually.
1220        self.queue.write_texture(
1221            wgpu::TexelCopyTextureInfo {
1222                texture: &tex,
1223                mip_level: 0,
1224                origin: wgpu::Origin3d::ZERO,
1225                aspect: wgpu::TextureAspect::All,
1226            },
1227            rgba,
1228            wgpu::TexelCopyBufferLayout {
1229                offset: 0,
1230                bytes_per_row: Some(width * 4),
1231                rows_per_image: Some(height),
1232            },
1233            wgpu::Extent3d {
1234                width,
1235                height,
1236                depth_or_array_layers: 1,
1237            },
1238        );
1239        self.sky_texture = tex;
1240        self.sky_view = view;
1241    }
1242
1243    /// GPU.8 — set the fog blend. `color` is per-channel [0, 1];
1244    /// `near`/`far` are world-space ray distances in voxel units.
1245    /// Hits with `t < near` show their full colour; hits with
1246    /// `t > far` show `color` exclusively; in between is a
1247    /// smoothstep blend.
1248    pub fn set_fog(&mut self, color: [f32; 3], near: f32, far: f32) {
1249        self.fog_color = color;
1250        self.fog_near = near;
1251        self.fog_far = far.max(near + 1.0);
1252    }
1253
1254    /// Re-configure the swapchain to a new physical size. Call from
1255    /// `WindowEvent::Resized`. Drops the chunk-DDA storage texture
1256    /// so [`Self::render_chunk`] rebuilds it at the new size.
1257    pub fn resize(&mut self, width: u32, height: u32) {
1258        if width == 0 || height == 0 {
1259            return;
1260        }
1261        self.surface_config.width = width;
1262        self.surface_config.height = height;
1263        self.surface.configure(&self.device, &self.surface_config);
1264        self.chunk_dda = None;
1265        self.grid_dda = None;
1266        self.scene_dda = None;
1267    }
1268
1269    /// Acquire the next swapchain frame, or `None` to skip this frame.
1270    /// wgpu 29's `get_current_texture` returns a
1271    /// [`wgpu::CurrentSurfaceTexture`] status enum (was
1272    /// `Result<_, SurfaceError>`): an outdated/lost surface reconfigures
1273    /// and skips, transient statuses just skip.
1274    fn acquire_frame(&self) -> Option<wgpu::SurfaceTexture> {
1275        use wgpu::CurrentSurfaceTexture as C;
1276        match self.surface.get_current_texture() {
1277            C::Success(t) | C::Suboptimal(t) => Some(t),
1278            C::Outdated | C::Lost => {
1279                self.surface.configure(&self.device, &self.surface_config);
1280                None
1281            }
1282            C::Timeout | C::Occluded | C::Validation => None,
1283        }
1284    }
1285
1286    /// GPU.1 render: single render pass clearing the swapchain to a
1287    /// slowly drifting colour, then presenting. Voxels arrive in
1288    /// GPU.3+.
1289    pub fn render(&mut self) {
1290        let Some(surf_tex) = self.acquire_frame() else {
1291            return;
1292        };
1293        let view = surf_tex
1294            .texture
1295            .create_view(&wgpu::TextureViewDescriptor::default());
1296
1297        // Slow colour drift so the user can tell the GPU path is
1298        // actually presenting frames vs. e.g. a frozen window.
1299        // Wrap at 2π/0.005 frames (~1257) so the cast stays exact.
1300        let phase = f64::from(self.frame_count % 1257) * 0.005;
1301        let [r, g, b] = self.clear_colour;
1302        let drift = (phase.sin() * 0.04 + 0.04).clamp(0.0, 0.1);
1303        let clear = wgpu::Color {
1304            r: (r + drift).clamp(0.0, 1.0),
1305            g: (g + drift * 0.5).clamp(0.0, 1.0),
1306            b: (b + drift * 0.25).clamp(0.0, 1.0),
1307            a: 1.0,
1308        };
1309
1310        let mut encoder = self
1311            .device
1312            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
1313                label: Some("roxlap-gpu encoder"),
1314            });
1315        {
1316            let _rp = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
1317                label: Some("roxlap-gpu clear"),
1318                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
1319                    view: &view,
1320                    depth_slice: None,
1321                    resolve_target: None,
1322                    ops: wgpu::Operations {
1323                        load: wgpu::LoadOp::Clear(clear),
1324                        store: wgpu::StoreOp::Store,
1325                    },
1326                })],
1327                depth_stencil_attachment: None,
1328                timestamp_writes: None,
1329                occlusion_query_set: None,
1330                multiview_mask: None,
1331            });
1332        }
1333        self.queue.submit(std::iter::once(encoder.finish()));
1334        surf_tex.present();
1335        self.frame_count = self.frame_count.wrapping_add(1);
1336    }
1337
1338    /// GPU.3 single-chunk render. Dispatches `chunk_dda.wgsl`
1339    /// against `resident`'s storage buffers, then blits the
1340    /// low-res storage texture to the swapchain. `camera.position`
1341    /// is in **chunk-local** voxel units (host translates from
1342    /// world coords). `max_scan_dist` caps the per-pixel DDA loop —
1343    /// scene-demo wires `+` / `-` through this each frame.
1344    ///
1345    /// # Panics
1346    /// Internally `expect`s the chunk-DDA resources to be built —
1347    /// they are constructed at the top of this function if missing.
1348    /// Cannot fire in normal control flow.
1349    pub fn render_chunk(
1350        &mut self,
1351        resident: &GpuChunkResident,
1352        camera: &Camera,
1353        max_scan_dist: u32,
1354    ) {
1355        let Some(surf_tex) = self.acquire_frame() else {
1356            return;
1357        };
1358        let surf_view = surf_tex
1359            .texture
1360            .create_view(&wgpu::TextureViewDescriptor::default());
1361
1362        let surface_w = self.surface_config.width;
1363        let surface_h = self.surface_config.height;
1364        let surface_format = self.surface_config.format;
1365
1366        // Lazy-build chunk-DDA resources; rebuild when the swapchain
1367        // grew or shrank.
1368        let needs_build = match &self.chunk_dda {
1369            Some(r) => r.storage_size != (surface_w, surface_h),
1370            None => true,
1371        };
1372        if needs_build {
1373            self.chunk_dda = Some(self.build_chunk_dda(surface_w, surface_h, surface_format));
1374        }
1375        let dda = self.chunk_dda.as_ref().expect("just built");
1376
1377        // Update uniforms.
1378        let uniform = ChunkDdaUniform {
1379            camera_pos: camera.position,
1380            _pad0: 0.0,
1381            camera_right: camera.right,
1382            _pad1: 0.0,
1383            camera_down: camera.down,
1384            _pad2: 0.0,
1385            camera_forward: camera.forward,
1386            fov_y_rad: camera.fov_y_rad,
1387            screen_size: [surface_w, surface_h],
1388            vsid: resident.vsid,
1389            max_scan_dist,
1390        };
1391        self.queue
1392            .write_buffer(&dda.uniform_buf, 0, bytemuck::bytes_of(&uniform));
1393
1394        // Per-frame DDA bind group — references the chunk's buffers
1395        // so we rebuild every frame (the resident can change between
1396        // calls).
1397        let dda_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1398            label: Some("roxlap-gpu chunk_dda.bg"),
1399            layout: &dda.bgl_dda,
1400            entries: &[
1401                wgpu::BindGroupEntry {
1402                    binding: 0,
1403                    resource: dda.uniform_buf.as_entire_binding(),
1404                },
1405                wgpu::BindGroupEntry {
1406                    binding: 1,
1407                    resource: resident.occupancy.as_entire_binding(),
1408                },
1409                wgpu::BindGroupEntry {
1410                    binding: 2,
1411                    resource: resident.color_offsets.as_entire_binding(),
1412                },
1413                wgpu::BindGroupEntry {
1414                    binding: 3,
1415                    resource: resident.colors.as_entire_binding(),
1416                },
1417                wgpu::BindGroupEntry {
1418                    binding: 4,
1419                    resource: wgpu::BindingResource::TextureView(&dda.storage_view),
1420                },
1421            ],
1422        });
1423
1424        let mut encoder = self
1425            .device
1426            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
1427                label: Some("roxlap-gpu chunk encoder"),
1428            });
1429        {
1430            let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
1431                label: Some("roxlap-gpu chunk_dda compute"),
1432                timestamp_writes: None,
1433            });
1434            cpass.set_pipeline(&dda.pipeline_dda);
1435            cpass.set_bind_group(0, &dda_bg, &[]);
1436            cpass.dispatch_workgroups(surface_w.div_ceil(8), surface_h.div_ceil(8), 1);
1437        }
1438        {
1439            let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
1440                label: Some("roxlap-gpu chunk_dda blit"),
1441                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
1442                    view: &surf_view,
1443                    depth_slice: None,
1444                    resolve_target: None,
1445                    ops: wgpu::Operations {
1446                        load: wgpu::LoadOp::Clear(wgpu::Color::BLACK),
1447                        store: wgpu::StoreOp::Store,
1448                    },
1449                })],
1450                depth_stencil_attachment: None,
1451                timestamp_writes: None,
1452                occlusion_query_set: None,
1453                multiview_mask: None,
1454            });
1455            rpass.set_pipeline(&dda.pipeline_blit);
1456            rpass.set_bind_group(0, &dda.blit_bg, &[]);
1457            rpass.draw(0..3, 0..1);
1458        }
1459        self.queue.submit(std::iter::once(encoder.finish()));
1460        surf_tex.present();
1461        self.frame_count = self.frame_count.wrapping_add(1);
1462    }
1463
1464    fn build_chunk_dda(
1465        &self,
1466        width: u32,
1467        height: u32,
1468        surface_format: wgpu::TextureFormat,
1469    ) -> ChunkDdaResources {
1470        let storage_tex = self.device.create_texture(&wgpu::TextureDescriptor {
1471            label: Some("roxlap-gpu chunk_dda.storage"),
1472            size: wgpu::Extent3d {
1473                width,
1474                height,
1475                depth_or_array_layers: 1,
1476            },
1477            mip_level_count: 1,
1478            sample_count: 1,
1479            dimension: wgpu::TextureDimension::D2,
1480            format: wgpu::TextureFormat::Rgba8Unorm,
1481            usage: wgpu::TextureUsages::STORAGE_BINDING | wgpu::TextureUsages::TEXTURE_BINDING,
1482            view_formats: &[],
1483        });
1484        let storage_view = storage_tex.create_view(&wgpu::TextureViewDescriptor::default());
1485
1486        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
1487            label: Some("roxlap-gpu chunk_dda.uniform"),
1488            size: std::mem::size_of::<ChunkDdaUniform>() as u64,
1489            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
1490            mapped_at_creation: false,
1491        });
1492
1493        let dda_shader = self
1494            .device
1495            .create_shader_module(wgpu::ShaderModuleDescriptor {
1496                label: Some("chunk_dda.wgsl"),
1497                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/chunk_dda.wgsl").into()),
1498            });
1499        let bgl_dda = self
1500            .device
1501            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
1502                label: Some("roxlap-gpu chunk_dda.bgl"),
1503                entries: &[
1504                    bgl_uniform_entry(0),
1505                    bgl_storage_entry(1, true),
1506                    bgl_storage_entry(2, true),
1507                    bgl_storage_entry(3, true),
1508                    wgpu::BindGroupLayoutEntry {
1509                        binding: 4,
1510                        visibility: wgpu::ShaderStages::COMPUTE,
1511                        ty: wgpu::BindingType::StorageTexture {
1512                            access: wgpu::StorageTextureAccess::WriteOnly,
1513                            format: wgpu::TextureFormat::Rgba8Unorm,
1514                            view_dimension: wgpu::TextureViewDimension::D2,
1515                        },
1516                        count: None,
1517                    },
1518                ],
1519            });
1520        let dda_pl = self
1521            .device
1522            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
1523                label: Some("roxlap-gpu chunk_dda.layout"),
1524                bind_group_layouts: &[Some(&bgl_dda)],
1525                immediate_size: 0,
1526            });
1527        let pipeline_dda = self
1528            .device
1529            .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
1530                label: Some("roxlap-gpu chunk_dda.pipeline"),
1531                layout: Some(&dda_pl),
1532                module: &dda_shader,
1533                entry_point: Some("render_chunk"),
1534                compilation_options: wgpu::PipelineCompilationOptions::default(),
1535                cache: None,
1536            });
1537
1538        // Fullscreen-triangle blit upscales the storage texture into
1539        // the swapchain. Nearest filter keeps the retro pixel look.
1540        let blit_shader = self
1541            .device
1542            .create_shader_module(wgpu::ShaderModuleDescriptor {
1543                label: Some("blit.wgsl"),
1544                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/blit.wgsl").into()),
1545            });
1546        let bgl_blit = self
1547            .device
1548            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
1549                label: Some("roxlap-gpu chunk_dda.blit_bgl"),
1550                entries: &[
1551                    wgpu::BindGroupLayoutEntry {
1552                        binding: 0,
1553                        visibility: wgpu::ShaderStages::FRAGMENT,
1554                        ty: wgpu::BindingType::Texture {
1555                            sample_type: wgpu::TextureSampleType::Float { filterable: false },
1556                            view_dimension: wgpu::TextureViewDimension::D2,
1557                            multisampled: false,
1558                        },
1559                        count: None,
1560                    },
1561                    wgpu::BindGroupLayoutEntry {
1562                        binding: 1,
1563                        visibility: wgpu::ShaderStages::FRAGMENT,
1564                        ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::NonFiltering),
1565                        count: None,
1566                    },
1567                ],
1568            });
1569        let blit_pl = self
1570            .device
1571            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
1572                label: Some("roxlap-gpu chunk_dda.blit_layout"),
1573                bind_group_layouts: &[Some(&bgl_blit)],
1574                immediate_size: 0,
1575            });
1576        let pipeline_blit = self
1577            .device
1578            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
1579                label: Some("roxlap-gpu chunk_dda.blit_pipeline"),
1580                layout: Some(&blit_pl),
1581                vertex: wgpu::VertexState {
1582                    module: &blit_shader,
1583                    entry_point: Some("vs_main"),
1584                    compilation_options: wgpu::PipelineCompilationOptions::default(),
1585                    buffers: &[],
1586                },
1587                fragment: Some(wgpu::FragmentState {
1588                    module: &blit_shader,
1589                    entry_point: Some("fs_main"),
1590                    compilation_options: wgpu::PipelineCompilationOptions::default(),
1591                    targets: &[Some(wgpu::ColorTargetState {
1592                        format: surface_format,
1593                        blend: None,
1594                        write_mask: wgpu::ColorWrites::ALL,
1595                    })],
1596                }),
1597                primitive: wgpu::PrimitiveState::default(),
1598                depth_stencil: None,
1599                multisample: wgpu::MultisampleState::default(),
1600                multiview_mask: None,
1601                cache: None,
1602            });
1603        let sampler = self.device.create_sampler(&wgpu::SamplerDescriptor {
1604            label: Some("roxlap-gpu chunk_dda.blit_sampler"),
1605            address_mode_u: wgpu::AddressMode::ClampToEdge,
1606            address_mode_v: wgpu::AddressMode::ClampToEdge,
1607            address_mode_w: wgpu::AddressMode::ClampToEdge,
1608            mag_filter: wgpu::FilterMode::Nearest,
1609            min_filter: wgpu::FilterMode::Nearest,
1610            mipmap_filter: wgpu::MipmapFilterMode::Nearest,
1611            ..Default::default()
1612        });
1613        let blit_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1614            label: Some("roxlap-gpu chunk_dda.blit_bg"),
1615            layout: &bgl_blit,
1616            entries: &[
1617                wgpu::BindGroupEntry {
1618                    binding: 0,
1619                    resource: wgpu::BindingResource::TextureView(&storage_view),
1620                },
1621                wgpu::BindGroupEntry {
1622                    binding: 1,
1623                    resource: wgpu::BindingResource::Sampler(&sampler),
1624                },
1625            ],
1626        });
1627
1628        ChunkDdaResources {
1629            storage_size: (width, height),
1630            storage_view,
1631            uniform_buf,
1632            bgl_dda,
1633            pipeline_dda,
1634            blit_bg,
1635            pipeline_blit,
1636            _sampler: sampler,
1637        }
1638    }
1639
1640    /// GPU.4 render — outer DDA over chunk indices + inner DDA into
1641    /// non-empty chunks. `camera.position` is in **grid-local**
1642    /// voxel units. `max_outer_steps` caps how many chunks the
1643    /// outer DDA may traverse per ray (scene-demo wires `+ / -`
1644    /// through this).
1645    ///
1646    /// # Panics
1647    /// Internally `expect`s the grid-DDA resources to be built;
1648    /// they are constructed at the top of this function if missing.
1649    pub fn render_grid(&mut self, grid: &GpuGridResident, camera: &Camera, max_outer_steps: u32) {
1650        let Some(surf_tex) = self.acquire_frame() else {
1651            return;
1652        };
1653        let surf_view = surf_tex
1654            .texture
1655            .create_view(&wgpu::TextureViewDescriptor::default());
1656
1657        let surface_w = self.surface_config.width;
1658        let surface_h = self.surface_config.height;
1659        let surface_format = self.surface_config.format;
1660
1661        let needs_build = match &self.grid_dda {
1662            Some(r) => r.storage_size != (surface_w, surface_h),
1663            None => true,
1664        };
1665        if needs_build {
1666            self.grid_dda = Some(self.build_grid_dda(surface_w, surface_h, surface_format));
1667        }
1668        let dda = self.grid_dda.as_ref().expect("just built");
1669
1670        let uniform = GridDdaUniform {
1671            camera_pos: camera.position,
1672            _pad0: 0.0,
1673            camera_right: camera.right,
1674            _pad1: 0.0,
1675            camera_down: camera.down,
1676            _pad2: 0.0,
1677            camera_forward: camera.forward,
1678            fov_y_rad: camera.fov_y_rad,
1679            screen_size: [surface_w, surface_h],
1680            vsid: grid.vsid,
1681            max_outer_steps,
1682            chunks_dims: grid.chunks_dims,
1683            _pad3: 0,
1684            origin_chunk: grid.origin_chunk,
1685            _pad4: 0,
1686        };
1687        self.queue
1688            .write_buffer(&dda.uniform_buf, 0, bytemuck::bytes_of(&uniform));
1689
1690        let dda_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1691            label: Some("roxlap-gpu grid_dda.bg"),
1692            layout: &dda.bgl_dda,
1693            entries: &[
1694                wgpu::BindGroupEntry {
1695                    binding: 0,
1696                    resource: dda.uniform_buf.as_entire_binding(),
1697                },
1698                wgpu::BindGroupEntry {
1699                    binding: 1,
1700                    resource: grid.occupancy.as_entire_binding(),
1701                },
1702                wgpu::BindGroupEntry {
1703                    binding: 2,
1704                    resource: grid.color_offsets.as_entire_binding(),
1705                },
1706                wgpu::BindGroupEntry {
1707                    binding: 3,
1708                    resource: grid.colors.as_entire_binding(),
1709                },
1710                wgpu::BindGroupEntry {
1711                    binding: 4,
1712                    resource: grid.chunk_colors_base.as_entire_binding(),
1713                },
1714                wgpu::BindGroupEntry {
1715                    binding: 5,
1716                    resource: grid.chunk_occupancy.as_entire_binding(),
1717                },
1718                wgpu::BindGroupEntry {
1719                    binding: 6,
1720                    resource: wgpu::BindingResource::TextureView(&dda.storage_view),
1721                },
1722            ],
1723        });
1724
1725        let mut encoder = self
1726            .device
1727            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
1728                label: Some("roxlap-gpu grid encoder"),
1729            });
1730        {
1731            let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
1732                label: Some("roxlap-gpu grid_dda compute"),
1733                timestamp_writes: None,
1734            });
1735            cpass.set_pipeline(&dda.pipeline_dda);
1736            cpass.set_bind_group(0, &dda_bg, &[]);
1737            cpass.dispatch_workgroups(surface_w.div_ceil(8), surface_h.div_ceil(8), 1);
1738        }
1739        {
1740            let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
1741                label: Some("roxlap-gpu grid_dda blit"),
1742                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
1743                    view: &surf_view,
1744                    depth_slice: None,
1745                    resolve_target: None,
1746                    ops: wgpu::Operations {
1747                        load: wgpu::LoadOp::Clear(wgpu::Color::BLACK),
1748                        store: wgpu::StoreOp::Store,
1749                    },
1750                })],
1751                depth_stencil_attachment: None,
1752                timestamp_writes: None,
1753                occlusion_query_set: None,
1754                multiview_mask: None,
1755            });
1756            rpass.set_pipeline(&dda.pipeline_blit);
1757            rpass.set_bind_group(0, &dda.blit_bg, &[]);
1758            rpass.draw(0..3, 0..1);
1759        }
1760        self.queue.submit(std::iter::once(encoder.finish()));
1761        surf_tex.present();
1762        self.frame_count = self.frame_count.wrapping_add(1);
1763    }
1764
1765    fn build_grid_dda(
1766        &self,
1767        width: u32,
1768        height: u32,
1769        surface_format: wgpu::TextureFormat,
1770    ) -> GridDdaResources {
1771        let storage_tex = self.device.create_texture(&wgpu::TextureDescriptor {
1772            label: Some("roxlap-gpu grid_dda.storage"),
1773            size: wgpu::Extent3d {
1774                width,
1775                height,
1776                depth_or_array_layers: 1,
1777            },
1778            mip_level_count: 1,
1779            sample_count: 1,
1780            dimension: wgpu::TextureDimension::D2,
1781            format: wgpu::TextureFormat::Rgba8Unorm,
1782            usage: wgpu::TextureUsages::STORAGE_BINDING | wgpu::TextureUsages::TEXTURE_BINDING,
1783            view_formats: &[],
1784        });
1785        let storage_view = storage_tex.create_view(&wgpu::TextureViewDescriptor::default());
1786
1787        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
1788            label: Some("roxlap-gpu grid_dda.uniform"),
1789            size: std::mem::size_of::<GridDdaUniform>() as u64,
1790            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
1791            mapped_at_creation: false,
1792        });
1793
1794        let dda_shader = self
1795            .device
1796            .create_shader_module(wgpu::ShaderModuleDescriptor {
1797                label: Some("grid_dda.wgsl"),
1798                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/grid_dda.wgsl").into()),
1799            });
1800        let bgl_dda = self
1801            .device
1802            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
1803                label: Some("roxlap-gpu grid_dda.bgl"),
1804                entries: &[
1805                    bgl_uniform_entry(0),
1806                    bgl_storage_entry(1, true),
1807                    bgl_storage_entry(2, true),
1808                    bgl_storage_entry(3, true),
1809                    bgl_storage_entry(4, true),
1810                    bgl_storage_entry(5, true),
1811                    wgpu::BindGroupLayoutEntry {
1812                        binding: 6,
1813                        visibility: wgpu::ShaderStages::COMPUTE,
1814                        ty: wgpu::BindingType::StorageTexture {
1815                            access: wgpu::StorageTextureAccess::WriteOnly,
1816                            format: wgpu::TextureFormat::Rgba8Unorm,
1817                            view_dimension: wgpu::TextureViewDimension::D2,
1818                        },
1819                        count: None,
1820                    },
1821                ],
1822            });
1823        let dda_pl = self
1824            .device
1825            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
1826                label: Some("roxlap-gpu grid_dda.layout"),
1827                bind_group_layouts: &[Some(&bgl_dda)],
1828                immediate_size: 0,
1829            });
1830        let pipeline_dda = self
1831            .device
1832            .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
1833                label: Some("roxlap-gpu grid_dda.pipeline"),
1834                layout: Some(&dda_pl),
1835                module: &dda_shader,
1836                entry_point: Some("render_grid"),
1837                compilation_options: wgpu::PipelineCompilationOptions::default(),
1838                cache: None,
1839            });
1840
1841        let blit_shader = self
1842            .device
1843            .create_shader_module(wgpu::ShaderModuleDescriptor {
1844                label: Some("blit.wgsl"),
1845                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/blit.wgsl").into()),
1846            });
1847        let bgl_blit = self
1848            .device
1849            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
1850                label: Some("roxlap-gpu grid_dda.blit_bgl"),
1851                entries: &[
1852                    wgpu::BindGroupLayoutEntry {
1853                        binding: 0,
1854                        visibility: wgpu::ShaderStages::FRAGMENT,
1855                        ty: wgpu::BindingType::Texture {
1856                            sample_type: wgpu::TextureSampleType::Float { filterable: false },
1857                            view_dimension: wgpu::TextureViewDimension::D2,
1858                            multisampled: false,
1859                        },
1860                        count: None,
1861                    },
1862                    wgpu::BindGroupLayoutEntry {
1863                        binding: 1,
1864                        visibility: wgpu::ShaderStages::FRAGMENT,
1865                        ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::NonFiltering),
1866                        count: None,
1867                    },
1868                ],
1869            });
1870        let blit_pl = self
1871            .device
1872            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
1873                label: Some("roxlap-gpu grid_dda.blit_layout"),
1874                bind_group_layouts: &[Some(&bgl_blit)],
1875                immediate_size: 0,
1876            });
1877        let pipeline_blit = self
1878            .device
1879            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
1880                label: Some("roxlap-gpu grid_dda.blit_pipeline"),
1881                layout: Some(&blit_pl),
1882                vertex: wgpu::VertexState {
1883                    module: &blit_shader,
1884                    entry_point: Some("vs_main"),
1885                    compilation_options: wgpu::PipelineCompilationOptions::default(),
1886                    buffers: &[],
1887                },
1888                fragment: Some(wgpu::FragmentState {
1889                    module: &blit_shader,
1890                    entry_point: Some("fs_main"),
1891                    compilation_options: wgpu::PipelineCompilationOptions::default(),
1892                    targets: &[Some(wgpu::ColorTargetState {
1893                        format: surface_format,
1894                        blend: None,
1895                        write_mask: wgpu::ColorWrites::ALL,
1896                    })],
1897                }),
1898                primitive: wgpu::PrimitiveState::default(),
1899                depth_stencil: None,
1900                multisample: wgpu::MultisampleState::default(),
1901                multiview_mask: None,
1902                cache: None,
1903            });
1904        let sampler = self.device.create_sampler(&wgpu::SamplerDescriptor {
1905            label: Some("roxlap-gpu grid_dda.blit_sampler"),
1906            address_mode_u: wgpu::AddressMode::ClampToEdge,
1907            address_mode_v: wgpu::AddressMode::ClampToEdge,
1908            address_mode_w: wgpu::AddressMode::ClampToEdge,
1909            mag_filter: wgpu::FilterMode::Nearest,
1910            min_filter: wgpu::FilterMode::Nearest,
1911            mipmap_filter: wgpu::MipmapFilterMode::Nearest,
1912            ..Default::default()
1913        });
1914        let blit_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1915            label: Some("roxlap-gpu grid_dda.blit_bg"),
1916            layout: &bgl_blit,
1917            entries: &[
1918                wgpu::BindGroupEntry {
1919                    binding: 0,
1920                    resource: wgpu::BindingResource::TextureView(&storage_view),
1921                },
1922                wgpu::BindGroupEntry {
1923                    binding: 1,
1924                    resource: wgpu::BindingResource::Sampler(&sampler),
1925                },
1926            ],
1927        });
1928
1929        GridDdaResources {
1930            storage_size: (width, height),
1931            storage_view,
1932            uniform_buf,
1933            bgl_dda,
1934            pipeline_dda,
1935            blit_bg,
1936            pipeline_blit,
1937            _sampler: sampler,
1938        }
1939    }
1940
1941    /// GPU.5 render — multi-grid scene marcher. `cameras[i]` is the
1942    /// world camera transformed into grid `i`'s local frame
1943    /// (caller-supplied; see scene-demo's `redraw_gpu` for the
1944    /// glam-based transform). `fov_y_rad` is the shared vertical
1945    /// FOV; `max_outer_steps` caps per-ray chunk-DDA work for each
1946    /// grid.
1947    ///
1948    /// # Panics
1949    /// If `cameras.len() != scene.grid_count`.
1950    /// `cameras[i]` is grid `i`'s world camera transformed into that
1951    /// grid's local frame (the grid marcher works in grid-local space).
1952    /// `sprite_camera` is the **world** camera: instanced sprites carry
1953    /// world-space positions/transforms, so they must project through
1954    /// the untransformed world camera — not `cameras[0]`, which is only
1955    /// the world camera when grid 0 is at identity.
1956    pub fn render_scene(
1957        &mut self,
1958        scene: &GpuSceneResident,
1959        cameras: &[Camera],
1960        sprite_camera: &Camera,
1961        fov_y_rad: f32,
1962        max_outer_steps: u32,
1963    ) {
1964        assert_eq!(
1965            cameras.len(),
1966            scene.grid_count as usize,
1967            "render_scene: {} cameras supplied, scene has {} grids",
1968            cameras.len(),
1969            scene.grid_count,
1970        );
1971        self.last_fov_y_rad = fov_y_rad; // cached for pixel_ray (picking)
1972
1973        // Deferred present: drop any frame a prior render left
1974        // un-presented (a host that skipped present/paint_egui) so we
1975        // never hold two outstanding swapchain textures.
1976        self.pending_frame = None;
1977        let Some(surf_tex) = self.acquire_frame() else {
1978            return;
1979        };
1980        let surf_view = surf_tex
1981            .texture
1982            .create_view(&wgpu::TextureViewDescriptor::default());
1983
1984        let surface_w = self.surface_config.width;
1985        let surface_h = self.surface_config.height;
1986        let surface_format = self.surface_config.format;
1987
1988        let needs_build = match &self.scene_dda {
1989            Some(r) => r.storage_size != (surface_w, surface_h),
1990            None => true,
1991        };
1992        if needs_build {
1993            self.scene_dda = Some(self.build_scene_dda(surface_w, surface_h, surface_format));
1994        }
1995        // GPU.9 — materialise the sprite pipeline the first frame
1996        // sprites are present (before the immutable `dda` borrow).
1997        // GPU.10.0 — build the model-DDA pipeline the first frame a
1998        // sprite registry is present.
1999        if self.sprite_registry.is_some() && self.sprite_model_dda.is_none() {
2000            self.sprite_model_dda = Some(self.build_sprite_model_dda());
2001        }
2002        // GPU.10.3 — frustum-cull + screen-tile-bin the sprite instances
2003        // (needs &mut self for buffer growth, so before the immutable
2004        // scene_dda borrow). Captures (visible_count, tiles_x); None when
2005        // nothing is in view.
2006        let sprite_pass: Option<(u32, u32)> = if let Some(reg) = self.sprite_registry.as_mut() {
2007            if reg.instance_capacity > 0 {
2008                // World camera — sprite positions/transforms are world-
2009                // space (independent of any grid's transform).
2010                let cam = sprite_camera;
2011                #[allow(clippy::cast_precision_loss)]
2012                let aspect = surface_w as f32 / surface_h as f32;
2013                let half_h = (fov_y_rad * 0.5).tan();
2014                let frustum = sprite_model::ViewFrustum {
2015                    pos: cam.position,
2016                    right: cam.right,
2017                    down: cam.down,
2018                    forward: cam.forward,
2019                    half_w: half_h * aspect,
2020                    half_h,
2021                    far: 1.0e9,
2022                };
2023                let (visible, tiles_x, _tiles_y) = reg.cull_bin_upload(
2024                    &self.device,
2025                    &self.queue,
2026                    &frustum,
2027                    surface_w,
2028                    surface_h,
2029                    SPRITE_TILE_SIZE,
2030                    self.sprite_lod_px,
2031                );
2032                (visible > 0).then_some((visible, tiles_x))
2033            } else {
2034                None
2035            }
2036        } else {
2037            None
2038        };
2039        let dda = self.scene_dda.as_ref().expect("just built");
2040
2041        // Refresh the blit's flip flag each frame (offset 8, after the
2042        // width/height), so toggling the flip applies without a resize.
2043        self.queue.write_buffer(
2044            &dda.blit_dims,
2045            8,
2046            bytemuck::bytes_of(&[u32::from(self.flip_x), 0u32]),
2047        );
2048
2049        // Pack per-grid cameras into a runtime-sized storage buffer
2050        // (binding 15) — no fixed cap on grid count.
2051        let cam_vec: Vec<SceneDdaPerGridCamera> = cameras
2052            .iter()
2053            .map(SceneDdaPerGridCamera::from_camera)
2054            .collect();
2055        let grid_cameras = upload_grid_cameras(&self.device, &cam_vec);
2056        let uniform = SceneDdaUniform {
2057            fov_y_rad,
2058            grid_count: scene.grid_count,
2059            max_outer_steps,
2060            _pad0: 0,
2061            screen_size: [surface_w, surface_h],
2062            _pad1: [0; 2],
2063            fog_color: [
2064                self.fog_color[0],
2065                self.fog_color[1],
2066                self.fog_color[2],
2067                self.fog_near,
2068            ],
2069            fog_far: self.fog_far,
2070            // L3.1: always write scene depth. Costs one storage store per
2071            // pixel, and the depth is needed for sprite z-test, sprite-less
2072            // `pick_depth`, and `draw_lines` occlusion alike.
2073            write_depth: 1,
2074            occ_page_words: scene.occupancy_page_words,
2075            occ_num_pages: scene.occupancy_num_pages,
2076            mip_scan_dist: self.scene_mip_scan_dist,
2077            terrain_has_translucent: u32::from(self.scene_terrain_translucent),
2078            terrain_map_count: self.scene_terrain_map.len() as u32,
2079            _pad4: 0,
2080            // Sky direction comes from the world (sprite) camera, so a
2081            // grid-less sprite-only scene still paints a real sky.
2082            sky_cam: SceneDdaPerGridCamera::from_camera(sprite_camera),
2083            side_shades0: self.scene_side_shades[0],
2084            side_shades1: self.scene_side_shades[1],
2085        };
2086        self.queue
2087            .write_buffer(&dda.uniform_buf, 0, bytemuck::bytes_of(&uniform));
2088
2089        let dda_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
2090            label: Some("roxlap-gpu scene_dda.bg"),
2091            layout: &dda.bgl_dda,
2092            entries: &[
2093                wgpu::BindGroupEntry {
2094                    binding: 0,
2095                    resource: dda.uniform_buf.as_entire_binding(),
2096                },
2097                // Occupancy page 0 at binding 1; pages 1..MAX_OCC_PAGES
2098                // at bindings 12.. (see GPU.X occupancy paging).
2099                wgpu::BindGroupEntry {
2100                    binding: 1,
2101                    resource: scene.occupancy_pages[0].as_entire_binding(),
2102                },
2103                wgpu::BindGroupEntry {
2104                    binding: 2,
2105                    resource: scene.all_color_offsets.as_entire_binding(),
2106                },
2107                wgpu::BindGroupEntry {
2108                    binding: 3,
2109                    resource: scene.all_colors.as_entire_binding(),
2110                },
2111                wgpu::BindGroupEntry {
2112                    binding: 4,
2113                    resource: scene.all_chunk_colors_base.as_entire_binding(),
2114                },
2115                wgpu::BindGroupEntry {
2116                    binding: 5,
2117                    resource: scene.all_chunk_occupancy.as_entire_binding(),
2118                },
2119                wgpu::BindGroupEntry {
2120                    binding: 6,
2121                    resource: scene.grid_static_meta.as_entire_binding(),
2122                },
2123                wgpu::BindGroupEntry {
2124                    binding: 7,
2125                    resource: scene.all_slot_chunk_idx.as_entire_binding(),
2126                },
2127                wgpu::BindGroupEntry {
2128                    binding: 8,
2129                    resource: dda.framebuffer.as_entire_binding(),
2130                },
2131                wgpu::BindGroupEntry {
2132                    binding: 9,
2133                    resource: wgpu::BindingResource::TextureView(&self.sky_view),
2134                },
2135                wgpu::BindGroupEntry {
2136                    binding: 10,
2137                    resource: wgpu::BindingResource::Sampler(&self.sky_sampler),
2138                },
2139                wgpu::BindGroupEntry {
2140                    binding: 11,
2141                    resource: dda.depth_buffer.as_entire_binding(),
2142                },
2143                wgpu::BindGroupEntry {
2144                    binding: 12,
2145                    resource: scene.occupancy_pages[1].as_entire_binding(),
2146                },
2147                wgpu::BindGroupEntry {
2148                    binding: 13,
2149                    resource: scene.occupancy_pages[2].as_entire_binding(),
2150                },
2151                wgpu::BindGroupEntry {
2152                    binding: 14,
2153                    resource: scene.occupancy_pages[3].as_entire_binding(),
2154                },
2155                wgpu::BindGroupEntry {
2156                    binding: 15,
2157                    resource: grid_cameras.as_entire_binding(),
2158                },
2159                wgpu::BindGroupEntry {
2160                    binding: 16,
2161                    resource: dda.materials_pal_buf.as_entire_binding(),
2162                },
2163                wgpu::BindGroupEntry {
2164                    binding: 17,
2165                    resource: dda.terrain_map_buf.as_entire_binding(),
2166                },
2167            ],
2168        });
2169
2170        // GPU.9 — when sprites are present, build both splatter bind
2171        // groups up front (the splat pass writes the key buffer; the
2172        // resolve pass reads keys + scene depth and writes colour).
2173        // GPU.10.3 — model-DDA bind group + per-frame uniform, using the
2174        // cull/bin results captured above. Per-model + per-instance data
2175        // + the tile lists live in the registry buffers.
2176        let sprite_model_bg = match (&self.sprite_model_dda, &self.sprite_registry, sprite_pass) {
2177            (Some(smd), Some(reg), Some((visible, tiles_x))) => {
2178                // World camera (see the cull pass above) — sprites
2179                // project through it regardless of grid 0's transform.
2180                let cam = sprite_camera;
2181                let uni = SpriteModelUniform {
2182                    cam_pos: cam.position,
2183                    _p0: 0.0,
2184                    cam_right: cam.right,
2185                    _p1: 0.0,
2186                    cam_down: cam.down,
2187                    _p2: 0.0,
2188                    cam_forward: cam.forward,
2189                    _p3: 0.0,
2190                    fog_color: [
2191                        self.fog_color[0],
2192                        self.fog_color[1],
2193                        self.fog_color[2],
2194                        self.fog_near,
2195                    ],
2196                    screen_size: [surface_w, surface_h],
2197                    instance_count: visible,
2198                    fog_far: self.fog_far,
2199                    fov_y_rad,
2200                    tiles_x,
2201                    tile_size: SPRITE_TILE_SIZE,
2202                    has_translucent: u32::from(self.sprite_has_translucent),
2203                };
2204                self.queue
2205                    .write_buffer(&smd.uniform_buf, 0, bytemuck::bytes_of(&uni));
2206                Some(self.device.create_bind_group(&wgpu::BindGroupDescriptor {
2207                    label: Some("roxlap-gpu sprite_model_dda.bg"),
2208                    layout: &smd.bgl,
2209                    entries: &[
2210                        wgpu::BindGroupEntry {
2211                            binding: 0,
2212                            resource: smd.uniform_buf.as_entire_binding(),
2213                        },
2214                        wgpu::BindGroupEntry {
2215                            binding: 1,
2216                            resource: reg.occupancy.as_entire_binding(),
2217                        },
2218                        wgpu::BindGroupEntry {
2219                            binding: 2,
2220                            resource: reg.colors.as_entire_binding(),
2221                        },
2222                        wgpu::BindGroupEntry {
2223                            binding: 3,
2224                            resource: reg.color_offsets.as_entire_binding(),
2225                        },
2226                        wgpu::BindGroupEntry {
2227                            binding: 4,
2228                            resource: reg.model_meta.as_entire_binding(),
2229                        },
2230                        wgpu::BindGroupEntry {
2231                            binding: 5,
2232                            resource: reg.instances.as_entire_binding(),
2233                        },
2234                        wgpu::BindGroupEntry {
2235                            binding: 6,
2236                            resource: dda.depth_buffer.as_entire_binding(),
2237                        },
2238                        wgpu::BindGroupEntry {
2239                            binding: 7,
2240                            resource: dda.framebuffer.as_entire_binding(),
2241                        },
2242                        wgpu::BindGroupEntry {
2243                            binding: 8,
2244                            resource: reg.tile_ranges.as_entire_binding(),
2245                        },
2246                        wgpu::BindGroupEntry {
2247                            binding: 9,
2248                            resource: reg.tile_instances.as_entire_binding(),
2249                        },
2250                        wgpu::BindGroupEntry {
2251                            binding: 10,
2252                            resource: reg.dirs.as_entire_binding(),
2253                        },
2254                        wgpu::BindGroupEntry {
2255                            binding: 11,
2256                            resource: reg.colmul.as_entire_binding(),
2257                        },
2258                        wgpu::BindGroupEntry {
2259                            binding: 12,
2260                            resource: smd.materials_buf.as_entire_binding(),
2261                        },
2262                        wgpu::BindGroupEntry {
2263                            binding: 13,
2264                            resource: reg.materials_vox.as_entire_binding(),
2265                        },
2266                    ],
2267                }))
2268            }
2269            _ => None,
2270        };
2271
2272        let mut encoder = self
2273            .device
2274            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
2275                label: Some("roxlap-gpu scene encoder"),
2276            });
2277        {
2278            let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
2279                label: Some("roxlap-gpu scene_dda compute"),
2280                timestamp_writes: None,
2281            });
2282            cpass.set_pipeline(&dda.pipeline_dda);
2283            cpass.set_bind_group(0, &dda_bg, &[]);
2284            cpass.dispatch_workgroups(surface_w.div_ceil(8), surface_h.div_ceil(8), 1);
2285        }
2286        // GPU.10 — sprite model-DDA pass: one thread per pixel marches
2287        // the tile's instances + composites against scene depth, after
2288        // the scene pass wrote the depth buffer and before the blit.
2289        if let (Some(smd), Some(bg)) = (&self.sprite_model_dda, &sprite_model_bg) {
2290            let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
2291                label: Some("roxlap-gpu sprite_model_dda"),
2292                timestamp_writes: None,
2293            });
2294            cpass.set_pipeline(&smd.pipeline);
2295            cpass.set_bind_group(0, bg, &[]);
2296            cpass.dispatch_workgroups(surface_w.div_ceil(8), surface_h.div_ceil(8), 1);
2297        }
2298        {
2299            let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
2300                label: Some("roxlap-gpu scene_dda blit"),
2301                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
2302                    view: &surf_view,
2303                    depth_slice: None,
2304                    resolve_target: None,
2305                    ops: wgpu::Operations {
2306                        load: wgpu::LoadOp::Clear(wgpu::Color::BLACK),
2307                        store: wgpu::StoreOp::Store,
2308                    },
2309                })],
2310                depth_stencil_attachment: None,
2311                timestamp_writes: None,
2312                occlusion_query_set: None,
2313                multiview_mask: None,
2314            });
2315            rpass.set_pipeline(&dda.pipeline_blit);
2316            rpass.set_bind_group(0, &dda.blit_bg, &[]);
2317            rpass.draw(0..3, 0..1);
2318        }
2319        self.queue.submit(std::iter::once(encoder.finish()));
2320        // This frame wrote `scene_dda.depth_buffer`, so depth-tested
2321        // overlays may test against it.
2322        self.scene_depth_valid = true;
2323        // Deferred present — the host calls `present` or `paint_egui`.
2324        self.pending_frame = Some((surf_tex, surf_view));
2325        self.frame_count = self.frame_count.wrapping_add(1);
2326    }
2327
2328    /// Like [`Self::render`] (clear to colour) but **deferred**: stashes
2329    /// the frame for [`Self::present`] / [`Self::paint_egui`] instead of
2330    /// presenting. The facade uses this before any grid is resident so a
2331    /// HUD can still be painted over an empty scene.
2332    pub fn render_clear_deferred(&mut self) {
2333        // No scene pass this frame ⇒ `scene_dda.depth_buffer` (if it
2334        // exists from an earlier scene) is stale; depth-tested overlays
2335        // must not test against it.
2336        self.scene_depth_valid = false;
2337        self.pending_frame = None;
2338        let Some(surf_tex) = self.acquire_frame() else {
2339            return;
2340        };
2341        let view = surf_tex
2342            .texture
2343            .create_view(&wgpu::TextureViewDescriptor::default());
2344        let [r, g, b] = self.clear_colour;
2345        let mut encoder = self
2346            .device
2347            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
2348                label: Some("roxlap-gpu clear (deferred)"),
2349            });
2350        {
2351            let _rp = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
2352                label: Some("roxlap-gpu clear (deferred)"),
2353                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
2354                    view: &view,
2355                    depth_slice: None,
2356                    resolve_target: None,
2357                    ops: wgpu::Operations {
2358                        load: wgpu::LoadOp::Clear(wgpu::Color { r, g, b, a: 1.0 }),
2359                        store: wgpu::StoreOp::Store,
2360                    },
2361                })],
2362                depth_stencil_attachment: None,
2363                timestamp_writes: None,
2364                occlusion_query_set: None,
2365                multiview_mask: None,
2366            });
2367        }
2368        self.queue.submit(std::iter::once(encoder.finish()));
2369        self.pending_frame = Some((surf_tex, view));
2370    }
2371
2372    /// Present the frame stashed by the last deferred render
2373    /// ([`Self::render_scene`] / [`Self::render_clear_deferred`]). No-op
2374    /// if nothing is pending (e.g. the surface was lost mid-render).
2375    pub fn present(&mut self) {
2376        if let Some((surf_tex, _view)) = self.pending_frame.take() {
2377            surf_tex.present();
2378        }
2379    }
2380
2381    /// Draw depth-tested world-space [`GpuLine`]s over the pending frame
2382    /// (L3.2). Projects each endpoint with `cam` (the marcher's pinhole) +
2383    /// the last frame's FOV / surface size, expands to screen-space quads,
2384    /// and runs a `LoadOp::Load` pass into the pending swapchain view — so
2385    /// the lines land on the marched frame and a later `present` /
2386    /// `paint_egui` still finishes it (the pending frame is left intact).
2387    /// Depth-tested lines are occluded by nearer marched geometry (compared
2388    /// against the scene-DDA depth buffer's `best_t`); call after `render`,
2389    /// before `present` / `paint_egui`. No-op if no frame is pending.
2390    pub fn draw_lines_deferred(&mut self, cam: &GpuLineCamera, lines: &[GpuLine]) {
2391        if self.pending_frame.is_none() || lines.is_empty() {
2392            return;
2393        }
2394        let (w, h) = (self.surface_config.width, self.surface_config.height);
2395        let fov = self.last_fov_y_rad;
2396        if w == 0 || h == 0 || fov <= 0.0 {
2397            return; // no frame marched yet — no projection to reuse
2398        }
2399        let verts = build_line_vertices(cam, lines, w, h, fov, self.flip_x);
2400        if verts.is_empty() {
2401            return;
2402        }
2403        self.ensure_line_resources();
2404        let res = self.line_resources.as_ref().expect("just built");
2405
2406        // Skip the depth test when there's no current scene depth to read —
2407        // either no buffer at all (sprite-only / never-rendered) or this
2408        // frame was a color-only clear so the buffer is stale (an empty
2409        // scene drawn after a grid scene). The 1-word dummy / stale buffer
2410        // is still bound to satisfy the layout; `no_depth = 1` keeps the
2411        // shader from indexing it.
2412        let no_depth = u32::from(self.scene_dda.is_none() || !self.scene_depth_valid);
2413        let params = LineParams {
2414            screen_w: w,
2415            screen_h: h,
2416            depth_bias: LINE_DEPTH_BIAS,
2417            no_depth,
2418            flip_x: u32::from(self.flip_x),
2419            _pad: [0; 3],
2420        };
2421        self.queue
2422            .write_buffer(&res.uniform_buf, 0, bytemuck::bytes_of(&params));
2423
2424        let depth_resource = match &self.scene_dda {
2425            Some(dda) => dda.depth_buffer.as_entire_binding(),
2426            None => res.dummy_depth.as_entire_binding(),
2427        };
2428        let bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
2429            label: Some("roxlap-gpu line.bg"),
2430            layout: &res.bgl,
2431            entries: &[
2432                wgpu::BindGroupEntry {
2433                    binding: 0,
2434                    resource: res.uniform_buf.as_entire_binding(),
2435                },
2436                wgpu::BindGroupEntry {
2437                    binding: 1,
2438                    resource: depth_resource,
2439                },
2440            ],
2441        });
2442
2443        // Grow-only persistent vertex buffer (L3.3): one `write_buffer`
2444        // per overlay, reused across frames. Power-of-two capacity keeps
2445        // re-allocation rare as the segment count drifts.
2446        let needed = std::mem::size_of_val(verts.as_slice()) as u64;
2447        if self.line_vbuf_cap < needed {
2448            let cap = needed.next_power_of_two().max(4096);
2449            self.line_vbuf = Some(self.device.create_buffer(&wgpu::BufferDescriptor {
2450                label: Some("roxlap-gpu line.vbuf"),
2451                size: cap,
2452                usage: wgpu::BufferUsages::VERTEX | wgpu::BufferUsages::COPY_DST,
2453                mapped_at_creation: false,
2454            }));
2455            self.line_vbuf_cap = cap;
2456        }
2457        let vbuf = self.line_vbuf.as_ref().expect("ensured above");
2458        self.queue
2459            .write_buffer(vbuf, 0, bytemuck::cast_slice(&verts));
2460
2461        let view = &self.pending_frame.as_ref().expect("checked above").1;
2462        let mut encoder = self
2463            .device
2464            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
2465                label: Some("roxlap-gpu lines"),
2466            });
2467        {
2468            // `LoadOp::Load` keeps the marcher's frame; the lines draw over
2469            // it. Manual depth test in the FS (no depth-stencil attachment).
2470            let mut pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
2471                label: Some("roxlap-gpu line paint"),
2472                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
2473                    view,
2474                    depth_slice: None,
2475                    resolve_target: None,
2476                    ops: wgpu::Operations {
2477                        load: wgpu::LoadOp::Load,
2478                        store: wgpu::StoreOp::Store,
2479                    },
2480                })],
2481                depth_stencil_attachment: None,
2482                timestamp_writes: None,
2483                occlusion_query_set: None,
2484                multiview_mask: None,
2485            });
2486            pass.set_pipeline(&res.pipeline);
2487            pass.set_bind_group(0, &bg, &[]);
2488            pass.set_vertex_buffer(0, vbuf.slice(..));
2489            pass.draw(0..verts.len() as u32, 0..1);
2490        }
2491        self.queue.submit(std::iter::once(encoder.finish()));
2492        // pending_frame left intact — present/paint_egui finishes the frame.
2493    }
2494
2495    /// Lazy-build the [`LineResources`] (`line.wgsl` pipeline + uniform +
2496    /// dummy depth buffer). The colour target uses the surface format with
2497    /// straight-alpha over-blending; no depth-stencil attachment (the depth
2498    /// test is manual in the fragment shader against the scene depth buffer).
2499    fn ensure_line_resources(&mut self) {
2500        if self.line_resources.is_some() {
2501            return;
2502        }
2503        let shader = self
2504            .device
2505            .create_shader_module(wgpu::ShaderModuleDescriptor {
2506                label: Some("line.wgsl"),
2507                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/line.wgsl").into()),
2508            });
2509        let bgl = self
2510            .device
2511            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
2512                label: Some("roxlap-gpu line.bgl"),
2513                entries: &[
2514                    wgpu::BindGroupLayoutEntry {
2515                        binding: 0,
2516                        visibility: wgpu::ShaderStages::FRAGMENT,
2517                        ty: wgpu::BindingType::Buffer {
2518                            ty: wgpu::BufferBindingType::Uniform,
2519                            has_dynamic_offset: false,
2520                            min_binding_size: None,
2521                        },
2522                        count: None,
2523                    },
2524                    wgpu::BindGroupLayoutEntry {
2525                        binding: 1,
2526                        visibility: wgpu::ShaderStages::FRAGMENT,
2527                        ty: wgpu::BindingType::Buffer {
2528                            ty: wgpu::BufferBindingType::Storage { read_only: true },
2529                            has_dynamic_offset: false,
2530                            min_binding_size: None,
2531                        },
2532                        count: None,
2533                    },
2534                ],
2535            });
2536        let layout = self
2537            .device
2538            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
2539                label: Some("roxlap-gpu line.layout"),
2540                bind_group_layouts: &[Some(&bgl)],
2541                immediate_size: 0,
2542            });
2543        let pipeline = self
2544            .device
2545            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
2546                label: Some("roxlap-gpu line.pipeline"),
2547                layout: Some(&layout),
2548                vertex: wgpu::VertexState {
2549                    module: &shader,
2550                    entry_point: Some("vs_main"),
2551                    compilation_options: wgpu::PipelineCompilationOptions::default(),
2552                    buffers: &[wgpu::VertexBufferLayout {
2553                        array_stride: std::mem::size_of::<LineVertex>() as u64,
2554                        step_mode: wgpu::VertexStepMode::Vertex,
2555                        attributes: &wgpu::vertex_attr_array![
2556                            0 => Float32x2, // pos (NDC)
2557                            1 => Float32,   // depth
2558                            2 => Float32,   // depth_test
2559                            3 => Float32x4, // color
2560                        ],
2561                    }],
2562                },
2563                fragment: Some(wgpu::FragmentState {
2564                    module: &shader,
2565                    entry_point: Some("fs_main"),
2566                    compilation_options: wgpu::PipelineCompilationOptions::default(),
2567                    targets: &[Some(wgpu::ColorTargetState {
2568                        format: self.surface_config.format,
2569                        blend: Some(wgpu::BlendState::ALPHA_BLENDING),
2570                        write_mask: wgpu::ColorWrites::ALL,
2571                    })],
2572                }),
2573                primitive: wgpu::PrimitiveState {
2574                    cull_mode: None,
2575                    ..Default::default()
2576                },
2577                depth_stencil: None,
2578                multisample: wgpu::MultisampleState::default(),
2579                multiview_mask: None,
2580                cache: None,
2581            });
2582        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
2583            label: Some("roxlap-gpu line.uniform"),
2584            size: std::mem::size_of::<LineParams>() as u64,
2585            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
2586            mapped_at_creation: false,
2587        });
2588        let dummy_depth = self.device.create_buffer(&wgpu::BufferDescriptor {
2589            label: Some("roxlap-gpu line.dummy_depth"),
2590            size: 4,
2591            usage: wgpu::BufferUsages::STORAGE,
2592            mapped_at_creation: false,
2593        });
2594        self.line_resources = Some(LineResources {
2595            pipeline,
2596            bgl,
2597            uniform_buf,
2598            dummy_depth,
2599        });
2600    }
2601
2602    /// Upload (or replace) an RGBA8 image as a sampled texture, returning
2603    /// a stable id for [`GpuImageQuad::image`]. `rgba` is row-major,
2604    /// `width * height * 4` bytes, straight (un-premultiplied) alpha.
2605    /// Reuses a dropped slot when one exists. Returns `0` for malformed
2606    /// input (an id that draws nothing).
2607    pub fn upload_image(&mut self, rgba: &[u8], width: u32, height: u32) -> usize {
2608        if width == 0 || height == 0 || rgba.len() != (width as usize) * (height as usize) * 4 {
2609            return 0;
2610        }
2611        let texture = self.device.create_texture(&wgpu::TextureDescriptor {
2612            label: Some("roxlap-gpu image_sprite"),
2613            size: wgpu::Extent3d {
2614                width,
2615                height,
2616                depth_or_array_layers: 1,
2617            },
2618            mip_level_count: 1,
2619            sample_count: 1,
2620            dimension: wgpu::TextureDimension::D2,
2621            format: wgpu::TextureFormat::Rgba8Unorm,
2622            usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST,
2623            view_formats: &[],
2624        });
2625        self.queue.write_texture(
2626            wgpu::TexelCopyTextureInfo {
2627                texture: &texture,
2628                mip_level: 0,
2629                origin: wgpu::Origin3d::ZERO,
2630                aspect: wgpu::TextureAspect::All,
2631            },
2632            rgba,
2633            wgpu::TexelCopyBufferLayout {
2634                offset: 0,
2635                bytes_per_row: Some(width * 4),
2636                rows_per_image: Some(height),
2637            },
2638            wgpu::Extent3d {
2639                width,
2640                height,
2641                depth_or_array_layers: 1,
2642            },
2643        );
2644        let view = texture.create_view(&wgpu::TextureViewDescriptor::default());
2645        let resident = ImageResident {
2646            view,
2647            _texture: texture,
2648        };
2649        if let Some(slot) = self.images.iter().position(Option::is_none) {
2650            self.images[slot] = Some(resident);
2651            slot
2652        } else {
2653            self.images.push(Some(resident));
2654            self.images.len() - 1
2655        }
2656    }
2657
2658    /// Release an image uploaded with [`Self::upload_image`] (the slot
2659    /// becomes reusable).
2660    pub fn drop_image(&mut self, id: usize) {
2661        if let Some(slot) = self.images.get_mut(id) {
2662            *slot = None;
2663        }
2664    }
2665
2666    /// Draw world-space 2D image sprites ([`GpuImageQuad`]) over the
2667    /// pending frame — the textured-quad sibling of
2668    /// [`Self::draw_lines_deferred`]. Projects each quad with `cam` (the
2669    /// marcher's pinhole) + the last frame's FOV / surface size, expands +
2670    /// near-clips to triangles, and runs one `LoadOp::Load` pass with a
2671    /// draw per quad (each binds its own texture). UVs are perspective-correct;
2672    /// depth-tested quads are occluded by nearer marched geometry. Call
2673    /// after `render`, before `present` / `paint_egui`. No-op if no frame
2674    /// is pending.
2675    pub fn draw_images_deferred(&mut self, cam: &GpuLineCamera, quads: &[GpuImageQuad]) {
2676        if self.pending_frame.is_none() || quads.is_empty() {
2677            return;
2678        }
2679        let (w, h) = (self.surface_config.width, self.surface_config.height);
2680        let fov = self.last_fov_y_rad;
2681        if w == 0 || h == 0 || fov <= 0.0 {
2682            return;
2683        }
2684
2685        // Concatenate every quad's verts into one buffer, recording each
2686        // quad's (range, texture) so they share a single render pass.
2687        let mut verts: Vec<ImageVertex> = Vec::new();
2688        let mut draws: Vec<(u32, u32, usize)> = Vec::new();
2689        for quad in quads {
2690            if !matches!(self.images.get(quad.image), Some(Some(_))) {
2691                continue; // dropped / never-uploaded id
2692            }
2693            let v = build_image_vertices(cam, quad, w, h, fov, self.flip_x);
2694            if v.is_empty() {
2695                continue;
2696            }
2697            let start = verts.len() as u32;
2698            verts.extend_from_slice(&v);
2699            draws.push((start, verts.len() as u32, quad.image));
2700        }
2701        if draws.is_empty() {
2702            return;
2703        }
2704
2705        self.ensure_image_resources();
2706        // See `draw_lines_deferred`: skip depth when there's no valid
2707        // current-frame scene depth (none built, or a color-only clear).
2708        let no_depth = u32::from(self.scene_dda.is_none() || !self.scene_depth_valid);
2709        let params = LineParams {
2710            screen_w: w,
2711            screen_h: h,
2712            depth_bias: LINE_DEPTH_BIAS,
2713            no_depth,
2714            flip_x: u32::from(self.flip_x),
2715            _pad: [0; 3],
2716        };
2717        {
2718            let res = self.image_resources.as_ref().expect("just built");
2719            self.queue
2720                .write_buffer(&res.uniform_buf, 0, bytemuck::bytes_of(&params));
2721        }
2722
2723        // Grow-only persistent vertex buffer (mirrors the line vbuf).
2724        let needed = std::mem::size_of_val(verts.as_slice()) as u64;
2725        if self.image_vbuf_cap < needed {
2726            let cap = needed.next_power_of_two().max(4096);
2727            self.image_vbuf = Some(self.device.create_buffer(&wgpu::BufferDescriptor {
2728                label: Some("roxlap-gpu image.vbuf"),
2729                size: cap,
2730                usage: wgpu::BufferUsages::VERTEX | wgpu::BufferUsages::COPY_DST,
2731                mapped_at_creation: false,
2732            }));
2733            self.image_vbuf_cap = cap;
2734        }
2735        let vbuf = self.image_vbuf.as_ref().expect("ensured above");
2736        self.queue
2737            .write_buffer(vbuf, 0, bytemuck::cast_slice(&verts));
2738
2739        // One bind group per draw (the texture view differs per quad).
2740        let res = self.image_resources.as_ref().expect("just built");
2741        let depth_resource = match &self.scene_dda {
2742            Some(dda) => dda.depth_buffer.as_entire_binding(),
2743            None => res.dummy_depth.as_entire_binding(),
2744        };
2745        let bind_groups: Vec<wgpu::BindGroup> = draws
2746            .iter()
2747            .map(|&(_, _, image_id)| {
2748                let resident = self.images[image_id].as_ref().expect("checked present");
2749                self.device.create_bind_group(&wgpu::BindGroupDescriptor {
2750                    label: Some("roxlap-gpu image.bg"),
2751                    layout: &res.bgl,
2752                    entries: &[
2753                        wgpu::BindGroupEntry {
2754                            binding: 0,
2755                            resource: res.uniform_buf.as_entire_binding(),
2756                        },
2757                        wgpu::BindGroupEntry {
2758                            binding: 1,
2759                            resource: depth_resource.clone(),
2760                        },
2761                        wgpu::BindGroupEntry {
2762                            binding: 2,
2763                            resource: wgpu::BindingResource::TextureView(&resident.view),
2764                        },
2765                        wgpu::BindGroupEntry {
2766                            binding: 3,
2767                            resource: wgpu::BindingResource::Sampler(&res.sampler),
2768                        },
2769                    ],
2770                })
2771            })
2772            .collect();
2773
2774        let view = &self.pending_frame.as_ref().expect("checked above").1;
2775        let mut encoder = self
2776            .device
2777            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
2778                label: Some("roxlap-gpu images"),
2779            });
2780        {
2781            let mut pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
2782                label: Some("roxlap-gpu image paint"),
2783                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
2784                    view,
2785                    depth_slice: None,
2786                    resolve_target: None,
2787                    ops: wgpu::Operations {
2788                        load: wgpu::LoadOp::Load,
2789                        store: wgpu::StoreOp::Store,
2790                    },
2791                })],
2792                depth_stencil_attachment: None,
2793                timestamp_writes: None,
2794                occlusion_query_set: None,
2795                multiview_mask: None,
2796            });
2797            pass.set_pipeline(&res.pipeline);
2798            pass.set_vertex_buffer(0, vbuf.slice(..));
2799            for (&(start, end, _), bg) in draws.iter().zip(&bind_groups) {
2800                pass.set_bind_group(0, bg, &[]);
2801                pass.draw(start..end, 0..1);
2802            }
2803        }
2804        self.queue.submit(std::iter::once(encoder.finish()));
2805        // pending_frame left intact — present/paint_egui finishes it.
2806    }
2807
2808    /// Lazy-build the [`ImageResources`] (`image.wgsl` pipeline + uniform +
2809    /// nearest sampler + dummy depth). Straight-alpha over-blend, no
2810    /// depth-stencil attachment (the depth test is manual in the FS).
2811    fn ensure_image_resources(&mut self) {
2812        if self.image_resources.is_some() {
2813            return;
2814        }
2815        let shader = self
2816            .device
2817            .create_shader_module(wgpu::ShaderModuleDescriptor {
2818                label: Some("image.wgsl"),
2819                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/image.wgsl").into()),
2820            });
2821        let bgl = self
2822            .device
2823            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
2824                label: Some("roxlap-gpu image.bgl"),
2825                entries: &[
2826                    wgpu::BindGroupLayoutEntry {
2827                        binding: 0,
2828                        visibility: wgpu::ShaderStages::FRAGMENT,
2829                        ty: wgpu::BindingType::Buffer {
2830                            ty: wgpu::BufferBindingType::Uniform,
2831                            has_dynamic_offset: false,
2832                            min_binding_size: None,
2833                        },
2834                        count: None,
2835                    },
2836                    wgpu::BindGroupLayoutEntry {
2837                        binding: 1,
2838                        visibility: wgpu::ShaderStages::FRAGMENT,
2839                        ty: wgpu::BindingType::Buffer {
2840                            ty: wgpu::BufferBindingType::Storage { read_only: true },
2841                            has_dynamic_offset: false,
2842                            min_binding_size: None,
2843                        },
2844                        count: None,
2845                    },
2846                    wgpu::BindGroupLayoutEntry {
2847                        binding: 2,
2848                        visibility: wgpu::ShaderStages::FRAGMENT,
2849                        ty: wgpu::BindingType::Texture {
2850                            sample_type: wgpu::TextureSampleType::Float { filterable: true },
2851                            view_dimension: wgpu::TextureViewDimension::D2,
2852                            multisampled: false,
2853                        },
2854                        count: None,
2855                    },
2856                    wgpu::BindGroupLayoutEntry {
2857                        binding: 3,
2858                        visibility: wgpu::ShaderStages::FRAGMENT,
2859                        ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
2860                        count: None,
2861                    },
2862                ],
2863            });
2864        let layout = self
2865            .device
2866            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
2867                label: Some("roxlap-gpu image.layout"),
2868                bind_group_layouts: &[Some(&bgl)],
2869                immediate_size: 0,
2870            });
2871        let pipeline = self
2872            .device
2873            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
2874                label: Some("roxlap-gpu image.pipeline"),
2875                layout: Some(&layout),
2876                vertex: wgpu::VertexState {
2877                    module: &shader,
2878                    entry_point: Some("vs_main"),
2879                    compilation_options: wgpu::PipelineCompilationOptions::default(),
2880                    buffers: &[wgpu::VertexBufferLayout {
2881                        array_stride: std::mem::size_of::<ImageVertex>() as u64,
2882                        step_mode: wgpu::VertexStepMode::Vertex,
2883                        attributes: &wgpu::vertex_attr_array![
2884                            0 => Float32x2, // ndc
2885                            1 => Float32,   // w
2886                            2 => Float32,   // depth
2887                            3 => Float32,   // depth_test
2888                            4 => Float32,   // cutoff
2889                            5 => Float32x2, // uv
2890                            6 => Float32x4, // tint
2891                        ],
2892                    }],
2893                },
2894                fragment: Some(wgpu::FragmentState {
2895                    module: &shader,
2896                    entry_point: Some("fs_main"),
2897                    compilation_options: wgpu::PipelineCompilationOptions::default(),
2898                    targets: &[Some(wgpu::ColorTargetState {
2899                        format: self.surface_config.format,
2900                        blend: Some(wgpu::BlendState::ALPHA_BLENDING),
2901                        write_mask: wgpu::ColorWrites::ALL,
2902                    })],
2903                }),
2904                primitive: wgpu::PrimitiveState {
2905                    cull_mode: None,
2906                    ..Default::default()
2907                },
2908                depth_stencil: None,
2909                multisample: wgpu::MultisampleState::default(),
2910                multiview_mask: None,
2911                cache: None,
2912            });
2913        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
2914            label: Some("roxlap-gpu image.uniform"),
2915            size: std::mem::size_of::<LineParams>() as u64,
2916            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
2917            mapped_at_creation: false,
2918        });
2919        let dummy_depth = self.device.create_buffer(&wgpu::BufferDescriptor {
2920            label: Some("roxlap-gpu image.dummy_depth"),
2921            size: 4,
2922            usage: wgpu::BufferUsages::STORAGE,
2923            mapped_at_creation: false,
2924        });
2925        let sampler = self.device.create_sampler(&wgpu::SamplerDescriptor {
2926            label: Some("roxlap-gpu image.sampler"),
2927            // Nearest + clamp: pixel-art references want crisp texels and
2928            // no wrap bleed at the quad edges.
2929            address_mode_u: wgpu::AddressMode::ClampToEdge,
2930            address_mode_v: wgpu::AddressMode::ClampToEdge,
2931            address_mode_w: wgpu::AddressMode::ClampToEdge,
2932            mag_filter: wgpu::FilterMode::Nearest,
2933            min_filter: wgpu::FilterMode::Nearest,
2934            mipmap_filter: wgpu::MipmapFilterMode::Nearest,
2935            ..Default::default()
2936        });
2937        self.image_resources = Some(ImageResources {
2938            pipeline,
2939            bgl,
2940            uniform_buf,
2941            dummy_depth,
2942            sampler,
2943        });
2944    }
2945
2946    /// Project a world point to window pixels under the marcher's
2947    /// vertical-FOV pinhole (the inverse of [`Self::pixel_ray`]), using
2948    /// the last-rendered frame's size + FOV. `None` before the first
2949    /// scene render or for a point at/behind the near plane.
2950    #[must_use]
2951    pub fn project_point(
2952        &self,
2953        cam_pos: [f32; 3],
2954        right: [f32; 3],
2955        down: [f32; 3],
2956        forward: [f32; 3],
2957        world: [f32; 3],
2958    ) -> Option<(f32, f32)> {
2959        let dda = self.scene_dda.as_ref()?;
2960        let (w, h) = dda.storage_size;
2961        if w == 0 || h == 0 || self.last_fov_y_rad <= 0.0 {
2962            return None;
2963        }
2964        let d = [
2965            world[0] - cam_pos[0],
2966            world[1] - cam_pos[1],
2967            world[2] - cam_pos[2],
2968        ];
2969        let cz = forward[0] * d[0] + forward[1] * d[1] + forward[2] * d[2];
2970        if cz < LINE_NEAR_Z {
2971            return None;
2972        }
2973        let cx = right[0] * d[0] + right[1] * d[1] + right[2] * d[2];
2974        let cy = down[0] * d[0] + down[1] * d[1] + down[2] * d[2];
2975        let half_h = (self.last_fov_y_rad * 0.5).tan();
2976        let half_w = half_h * (w as f32 / h as f32);
2977        let ndc_x = (cx / cz) / half_w;
2978        let ndc_y = -(cy / cz) / half_h;
2979        let sx = (ndc_x * 0.5 + 0.5) * w as f32;
2980        let sy = (0.5 - ndc_y * 0.5) * h as f32;
2981        Some((sx, sy))
2982    }
2983
2984    /// Overlay an `egui` UI on the pending frame, then present it
2985    /// (`hud` feature). `jobs` are the host's tessellated primitives
2986    /// (`egui::Context::tessellate`), `textures` the per-frame texture
2987    /// delta from `egui::FullOutput`, `pixels_per_point` the UI scale.
2988    ///
2989    /// Draws with `LoadOp::Load` over the marcher's frame (a separate
2990    /// encoder submitted after the scene's), so the UI composites on top
2991    /// of the world. No-op if no frame is pending.
2992    #[cfg(feature = "hud")]
2993    pub fn paint_egui(
2994        &mut self,
2995        jobs: &[egui::ClippedPrimitive],
2996        textures: &egui::TexturesDelta,
2997        pixels_per_point: f32,
2998    ) {
2999        let Some((surf_tex, surf_view)) = self.pending_frame.take() else {
3000            return;
3001        };
3002        let format = self.surface_config.format;
3003        let egui_rend = self.egui_renderer.get_or_insert_with(|| {
3004            egui_wgpu::Renderer::new(
3005                &self.device,
3006                format,
3007                egui_wgpu::RendererOptions {
3008                    msaa_samples: 1,
3009                    depth_stencil_format: None,
3010                    dithering: false,
3011                    ..Default::default()
3012                },
3013            )
3014        });
3015
3016        let screen = egui_wgpu::ScreenDescriptor {
3017            size_in_pixels: [self.surface_config.width, self.surface_config.height],
3018            pixels_per_point,
3019        };
3020        for (id, delta) in &textures.set {
3021            egui_rend.update_texture(&self.device, &self.queue, *id, delta);
3022        }
3023        let mut encoder = self
3024            .device
3025            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
3026                label: Some("roxlap-gpu egui"),
3027            });
3028        let user_bufs =
3029            egui_rend.update_buffers(&self.device, &self.queue, &mut encoder, jobs, &screen);
3030        {
3031            // `LoadOp::Load` keeps the marcher's frame; egui draws over it.
3032            let mut pass = encoder
3033                .begin_render_pass(&wgpu::RenderPassDescriptor {
3034                    label: Some("roxlap-gpu egui paint"),
3035                    color_attachments: &[Some(wgpu::RenderPassColorAttachment {
3036                        view: &surf_view,
3037                        depth_slice: None,
3038                        resolve_target: None,
3039                        ops: wgpu::Operations {
3040                            load: wgpu::LoadOp::Load,
3041                            store: wgpu::StoreOp::Store,
3042                        },
3043                    })],
3044                    depth_stencil_attachment: None,
3045                    timestamp_writes: None,
3046                    occlusion_query_set: None,
3047                    multiview_mask: None,
3048                })
3049                // egui-wgpu 0.29 requires a `'static` pass (see its docs).
3050                .forget_lifetime();
3051            egui_rend.render(&mut pass, jobs, &screen);
3052        }
3053        for id in &textures.free {
3054            egui_rend.free_texture(id);
3055        }
3056        self.queue.submit(
3057            user_bufs
3058                .into_iter()
3059                .chain(std::iter::once(encoder.finish())),
3060        );
3061        surf_tex.present();
3062    }
3063
3064    fn build_scene_dda(
3065        &self,
3066        width: u32,
3067        height: u32,
3068        surface_format: wgpu::TextureFormat,
3069    ) -> SceneDdaResources {
3070        // Framebuffer as a packed-`rgba8unorm` storage buffer (1 u32 per
3071        // pixel, row stride = `width`). See the struct-field note.
3072        let framebuffer = self.device.create_buffer(&wgpu::BufferDescriptor {
3073            label: Some("roxlap-gpu scene_dda.framebuffer"),
3074            size: u64::from(width) * u64::from(height) * 4,
3075            usage: wgpu::BufferUsages::STORAGE,
3076            mapped_at_creation: false,
3077        });
3078        // Screen size + flip flag for the blit's pixel→index math
3079        // (`vec2<u32>` size, then `flip_x` + pad). Re-written per frame in
3080        // `render_scene` so a flip toggle takes effect without a resize.
3081        let blit_dims = self.device.create_buffer(&wgpu::BufferDescriptor {
3082            label: Some("roxlap-gpu scene_dda.blit_dims"),
3083            size: 16,
3084            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
3085            mapped_at_creation: false,
3086        });
3087        self.queue.write_buffer(
3088            &blit_dims,
3089            0,
3090            bytemuck::bytes_of(&[width, height, u32::from(self.flip_x), 0u32]),
3091        );
3092
3093        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
3094            label: Some("roxlap-gpu scene_dda.uniform"),
3095            size: std::mem::size_of::<SceneDdaUniform>() as u64,
3096            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
3097            mapped_at_creation: false,
3098        });
3099
3100        // GPU.9 — per-pixel world-t depth (f32 bits as u32). Sized to
3101        // the storage texture; written by the scene pass when sprites
3102        // are active, read+tested by the sprite splatter.
3103        let depth_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
3104            label: Some("roxlap-gpu scene_dda.depth"),
3105            size: u64::from(width) * u64::from(height) * 4,
3106            // COPY_SRC so `read_depth_pixel` can stage it for picking.
3107            usage: wgpu::BufferUsages::STORAGE
3108                | wgpu::BufferUsages::COPY_DST
3109                | wgpu::BufferUsages::COPY_SRC,
3110            mapped_at_creation: false,
3111        });
3112        let depth_readback = self.device.create_buffer(&wgpu::BufferDescriptor {
3113            label: Some("roxlap-gpu scene_dda.depth_readback"),
3114            size: u64::from(width) * u64::from(height) * 4,
3115            usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
3116            mapped_at_creation: false,
3117        });
3118        let dda_shader = self
3119            .device
3120            .create_shader_module(wgpu::ShaderModuleDescriptor {
3121                label: Some("scene_dda.wgsl"),
3122                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/scene_dda.wgsl").into()),
3123            });
3124        let bgl_dda = self
3125            .device
3126            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
3127                label: Some("roxlap-gpu scene_dda.bgl"),
3128                entries: &[
3129                    bgl_uniform_entry(0),
3130                    bgl_storage_entry(1, true),
3131                    bgl_storage_entry(2, true),
3132                    bgl_storage_entry(3, true),
3133                    bgl_storage_entry(4, true),
3134                    bgl_storage_entry(5, true),
3135                    bgl_storage_entry(6, true),
3136                    bgl_storage_entry(7, true),
3137                    // Framebuffer storage buffer (read-write; the scene +
3138                    // sprite passes write packed pixels into it).
3139                    bgl_storage_entry(8, false),
3140                    // GPU.8 sky panorama + sampler.
3141                    wgpu::BindGroupLayoutEntry {
3142                        binding: 9,
3143                        visibility: wgpu::ShaderStages::COMPUTE,
3144                        ty: wgpu::BindingType::Texture {
3145                            sample_type: wgpu::TextureSampleType::Float { filterable: true },
3146                            view_dimension: wgpu::TextureViewDimension::D2,
3147                            multisampled: false,
3148                        },
3149                        count: None,
3150                    },
3151                    wgpu::BindGroupLayoutEntry {
3152                        binding: 10,
3153                        visibility: wgpu::ShaderStages::COMPUTE,
3154                        ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
3155                        count: None,
3156                    },
3157                    // GPU.9 — read-write per-pixel depth buffer.
3158                    bgl_storage_entry(11, false),
3159                    // Occupancy pages 1..MAX_OCC_PAGES (page 0 is
3160                    // binding 1). Unused pages bind a dummy buffer.
3161                    bgl_storage_entry(12, true),
3162                    bgl_storage_entry(13, true),
3163                    bgl_storage_entry(14, true),
3164                    // Per-grid cameras (runtime-sized; one per grid).
3165                    bgl_storage_entry(15, true),
3166                    // TV.6 — material palette + terrain colour→material map.
3167                    bgl_storage_entry(16, true),
3168                    bgl_storage_entry(17, true),
3169                ],
3170            });
3171        let dda_pl = self
3172            .device
3173            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
3174                label: Some("roxlap-gpu scene_dda.layout"),
3175                bind_group_layouts: &[Some(&bgl_dda)],
3176                immediate_size: 0,
3177            });
3178        let pipeline_dda = self
3179            .device
3180            .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
3181                label: Some("roxlap-gpu scene_dda.pipeline"),
3182                layout: Some(&dda_pl),
3183                module: &dda_shader,
3184                entry_point: Some("render_scene"),
3185                compilation_options: wgpu::PipelineCompilationOptions::default(),
3186                cache: None,
3187            });
3188
3189        let blit_shader = self
3190            .device
3191            .create_shader_module(wgpu::ShaderModuleDescriptor {
3192                label: Some("scene_blit.wgsl"),
3193                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/scene_blit.wgsl").into()),
3194            });
3195        let bgl_blit = self
3196            .device
3197            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
3198                label: Some("roxlap-gpu scene_dda.blit_bgl"),
3199                entries: &[
3200                    // Framebuffer storage buffer (read-only in the blit).
3201                    wgpu::BindGroupLayoutEntry {
3202                        binding: 0,
3203                        visibility: wgpu::ShaderStages::FRAGMENT,
3204                        ty: wgpu::BindingType::Buffer {
3205                            ty: wgpu::BufferBindingType::Storage { read_only: true },
3206                            has_dynamic_offset: false,
3207                            min_binding_size: None,
3208                        },
3209                        count: None,
3210                    },
3211                    // Screen-size uniform for the pixel→index math.
3212                    wgpu::BindGroupLayoutEntry {
3213                        binding: 1,
3214                        visibility: wgpu::ShaderStages::FRAGMENT,
3215                        ty: wgpu::BindingType::Buffer {
3216                            ty: wgpu::BufferBindingType::Uniform,
3217                            has_dynamic_offset: false,
3218                            min_binding_size: None,
3219                        },
3220                        count: None,
3221                    },
3222                ],
3223            });
3224        let blit_pl = self
3225            .device
3226            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
3227                label: Some("roxlap-gpu scene_dda.blit_layout"),
3228                bind_group_layouts: &[Some(&bgl_blit)],
3229                immediate_size: 0,
3230            });
3231        let pipeline_blit = self
3232            .device
3233            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
3234                label: Some("roxlap-gpu scene_dda.blit_pipeline"),
3235                layout: Some(&blit_pl),
3236                vertex: wgpu::VertexState {
3237                    module: &blit_shader,
3238                    entry_point: Some("vs_main"),
3239                    compilation_options: wgpu::PipelineCompilationOptions::default(),
3240                    buffers: &[],
3241                },
3242                fragment: Some(wgpu::FragmentState {
3243                    module: &blit_shader,
3244                    entry_point: Some("fs_main"),
3245                    compilation_options: wgpu::PipelineCompilationOptions::default(),
3246                    targets: &[Some(wgpu::ColorTargetState {
3247                        format: surface_format,
3248                        blend: None,
3249                        write_mask: wgpu::ColorWrites::ALL,
3250                    })],
3251                }),
3252                primitive: wgpu::PrimitiveState::default(),
3253                depth_stencil: None,
3254                multisample: wgpu::MultisampleState::default(),
3255                multiview_mask: None,
3256                cache: None,
3257            });
3258        let blit_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
3259            label: Some("roxlap-gpu scene_dda.blit_bg"),
3260            layout: &bgl_blit,
3261            entries: &[
3262                wgpu::BindGroupEntry {
3263                    binding: 0,
3264                    resource: framebuffer.as_entire_binding(),
3265                },
3266                wgpu::BindGroupEntry {
3267                    binding: 1,
3268                    resource: blit_dims.as_entire_binding(),
3269                },
3270            ],
3271        });
3272
3273        // TV.6 — material palette + terrain map buffers, seeded from the
3274        // renderer's current scene-material state (so a map defined before the
3275        // scene pass was built still takes effect).
3276        let (materials_pal_buf, terrain_map_buf) = {
3277            use wgpu::util::DeviceExt;
3278            let pal = self
3279                .device
3280                .create_buffer_init(&wgpu::util::BufferInitDescriptor {
3281                    label: Some("roxlap-gpu scene_dda.materials_pal"),
3282                    contents: bytemuck::cast_slice(self.scene_materials.as_slice()),
3283                    usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
3284                });
3285            // Fixed 256-row map (≤256 materials anyway) → no re-alloc when the
3286            // host changes the map after the scene pass is built.
3287            let mut rows = [[0u32; 2]; 256];
3288            for (slot, &row) in rows.iter_mut().zip(self.scene_terrain_map.iter()) {
3289                *slot = row;
3290            }
3291            let map = self
3292                .device
3293                .create_buffer_init(&wgpu::util::BufferInitDescriptor {
3294                    label: Some("roxlap-gpu scene_dda.terrain_map"),
3295                    contents: bytemuck::cast_slice(&rows),
3296                    usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
3297                });
3298            (pal, map)
3299        };
3300
3301        SceneDdaResources {
3302            storage_size: (width, height),
3303            framebuffer,
3304            uniform_buf,
3305            bgl_dda,
3306            pipeline_dda,
3307            blit_bg,
3308            pipeline_blit,
3309            blit_dims,
3310            depth_buffer,
3311            depth_readback,
3312            materials_pal_buf,
3313            terrain_map_buf,
3314        }
3315    }
3316
3317    /// Read back the per-pixel world-t depth at window pixel `(x, y)`
3318    /// from the last rendered frame, for screen→world picking. Returns
3319    /// the distance `t` along the (normalised) view ray to the nearest
3320    /// scene-grid surface, so the host reconstructs the world hit as
3321    /// `cam.pos + t * normalize(ray_dir)`. `None` for out-of-bounds
3322    /// pixels, sky / no-hit (the `T_INF` sentinel), or when no scene
3323    /// frame has been rendered.
3324    ///
3325    /// The depth buffer is the SCENE pass's output (terrain + grids),
3326    /// untouched by the sprite pass (which reads it read-only), so a
3327    /// cursor sprite under the pointer does not occlude the pick.
3328    ///
3329    /// Synchronous: copies the depth buffer to a mapped staging buffer
3330    /// and blocks on `device.poll(Wait)`. Cheap enough for click-time
3331    /// picks; do not call it every frame.
3332    ///
3333    /// Requires the last frame to have written depth, which happens
3334    /// when sprites are present (`write_depth`). The pick demo always
3335    /// has a cursor sprite, so this holds.
3336    ///
3337    /// Compiles on wasm, but the wasm facade never calls it: WebGPU's
3338    /// `device.poll` doesn't block for the GPU, so the blocking
3339    /// `recv()` here would hang the single browser thread. Picking is
3340    /// deferred on the wasm GPU path (the facade returns `None`).
3341    #[must_use]
3342    pub fn read_depth_pixel(&self, x: u32, y: u32) -> Option<f32> {
3343        let dda = self.scene_dda.as_ref()?;
3344        let (w, h) = dda.storage_size;
3345        if x >= w || y >= h {
3346            return None;
3347        }
3348        let mut enc = self
3349            .device
3350            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
3351                label: Some("roxlap-gpu depth readback"),
3352            });
3353        let size = u64::from(w) * u64::from(h) * 4;
3354        enc.copy_buffer_to_buffer(&dda.depth_buffer, 0, &dda.depth_readback, 0, size);
3355        self.queue.submit(std::iter::once(enc.finish()));
3356
3357        let slice = dda.depth_readback.slice(..);
3358        let (tx, rx) = std::sync::mpsc::channel();
3359        slice.map_async(wgpu::MapMode::Read, move |r| {
3360            let _ = tx.send(r);
3361        });
3362        self.device.poll(wgpu::PollType::wait_indefinitely()).ok();
3363        rx.recv().ok()?.ok()?;
3364
3365        let t = {
3366            let data = slice.get_mapped_range();
3367            let idx = ((y * w + x) * 4) as usize;
3368            let bytes: [u8; 4] = data[idx..idx + 4].try_into().ok()?;
3369            f32::from_le_bytes(bytes)
3370        };
3371        dda.depth_readback.unmap();
3372
3373        // Reject sky / no-hit (T_INF == 1e30 in the shader) + non-finite.
3374        if !t.is_finite() || t >= 1.0e29 {
3375            return None;
3376        }
3377        Some(t)
3378    }
3379
3380    /// World-space view-ray direction (un-normalised) for window pixel
3381    /// `(x, y)`, under the GPU marcher's projection — the canonical GPU
3382    /// unproject, mirroring `scene_dda.wgsl`'s `render_scene`
3383    /// (vertical-FOV pinhole). Uses the last-rendered frame's target
3384    /// size + FOV; `None` before the first scene render. Pair with
3385    /// [`Self::read_depth_pixel`] for screen→world picking.
3386    #[must_use]
3387    pub fn pixel_ray(
3388        &self,
3389        right: [f64; 3],
3390        down: [f64; 3],
3391        forward: [f64; 3],
3392        x: f64,
3393        y: f64,
3394    ) -> Option<[f64; 3]> {
3395        let dda = self.scene_dda.as_ref()?;
3396        let (w, h) = dda.storage_size;
3397        if w == 0 || h == 0 || self.last_fov_y_rad <= 0.0 {
3398            return None;
3399        }
3400        Some(pinhole_pixel_ray(
3401            right,
3402            down,
3403            forward,
3404            x,
3405            y,
3406            f64::from(w),
3407            f64::from(h),
3408            f64::from(self.last_fov_y_rad),
3409        ))
3410    }
3411
3412    /// GPU.10.1 — upload a sprite model registry + its instances for
3413    /// the DDA path. An empty instance slice clears all sprites.
3414    pub fn set_sprite_instances(
3415        &mut self,
3416        registry: &sprite_model::SpriteModelRegistry,
3417        instances: &[sprite_model::SpriteInstance],
3418    ) {
3419        if instances.is_empty() {
3420            self.sprite_registry = None;
3421            return;
3422        }
3423        self.sprite_registry = Some(sprite_model::SpriteRegistryResident::upload(
3424            &self.device,
3425            registry,
3426            instances,
3427        ));
3428    }
3429
3430    /// Incrementally append sprite instances **without** rebuilding the
3431    /// registry — the cheap streaming-spawn path (asteroids, projectiles).
3432    /// Returns the index of the first appended instance (`[base, base+N)`).
3433    ///
3434    /// Every appended instance must reference a model already registered
3435    /// by the [`Self::set_sprite_instances`] that established residency
3436    /// (model volumes are not re-uploaded here — build the full
3437    /// `SpriteModelRegistry` up front and seed it once, then stream
3438    /// instances). If no registry is resident yet, this performs the
3439    /// initial full upload and returns `0`.
3440    ///
3441    /// Cost is amortised O(1) per instance (the GPU instance buffer grows
3442    /// by powers of two), versus the full volume + buffer rebuild of
3443    /// [`Self::set_sprite_instances`].
3444    pub fn append_sprite_instances(
3445        &mut self,
3446        registry: &sprite_model::SpriteModelRegistry,
3447        instances: &[sprite_model::SpriteInstance],
3448    ) -> u32 {
3449        match self.sprite_registry.as_mut() {
3450            Some(reg) => reg.append_instances(&self.device, registry, instances),
3451            None => {
3452                self.set_sprite_instances(registry, instances);
3453                0
3454            }
3455        }
3456    }
3457
3458    /// Remove the sprite instance at `index` (swap-remove, O(1), no model
3459    /// re-upload). Returns `Some(old_last)` if a different instance was
3460    /// moved into `index` to fill the hole — its index changed from
3461    /// `old_last` to `index`, so a caller tracking instance handles must
3462    /// update that one. Returns `None` if `index` was the last element /
3463    /// out of range, or no registry is resident.
3464    pub fn remove_sprite_instance(&mut self, index: usize) -> Option<usize> {
3465        self.sprite_registry
3466            .as_mut()
3467            .and_then(|reg| reg.remove_instance(index))
3468    }
3469
3470    /// Incrementally add a new model (its full LOD chain) to the resident
3471    /// sprite registry **without** re-uploading the existing models — the
3472    /// counterpart to [`Self::append_sprite_instances`] for streaming in
3473    /// new geometry (unique asteroids, generated meshes).
3474    ///
3475    /// Usage mirrors `update_sprite_model`: you own the
3476    /// [`SpriteModelRegistry`](sprite_model::SpriteModelRegistry), append
3477    /// the model with [`add_lod`](sprite_model::SpriteModelRegistry::add_lod)
3478    /// (or `add`), then pass the returned `chain_id` here to sync that one
3479    /// chain to the GPU. Afterwards [`Self::append_sprite_instances`] may
3480    /// reference it.
3481    ///
3482    /// If no registry is resident yet, this performs the initial full
3483    /// upload of `registry` (all its current models, zero instances) to
3484    /// establish residency — so call it for your *first* model; only
3485    /// chains appended *after* residency exists are added incrementally.
3486    ///
3487    /// Cost is amortised O(new model voxels): the shared volume buffers
3488    /// carry slack and bump-append, growing (and rebuilding once from the
3489    /// registry) only on overflow.
3490    /// Flush queued `write_buffer` uploads by submitting an empty command
3491    /// stream. wgpu stages `write_buffer` data and flushes it on the next
3492    /// `Queue::submit`; calling this between batches of uploads (e.g. a
3493    /// flipbook's frames in [`Self::add_sprite_model`]) recycles the device
3494    /// staging pool so a big one-shot batch can't exhaust it (which would
3495    /// then crash egui-wgpu's own `write_buffer`).
3496    pub fn flush_writes(&self) {
3497        self.queue.submit(std::iter::empty::<wgpu::CommandBuffer>());
3498    }
3499
3500    pub fn add_sprite_model(
3501        &mut self,
3502        registry: &sprite_model::SpriteModelRegistry,
3503        chain_id: u32,
3504    ) {
3505        match self.sprite_registry.as_mut() {
3506            Some(reg) => reg.add_model(&self.device, &self.queue, registry, chain_id),
3507            None => {
3508                self.sprite_registry = Some(sprite_model::SpriteRegistryResident::upload(
3509                    &self.device,
3510                    registry,
3511                    &[],
3512                ));
3513            }
3514        }
3515    }
3516
3517    /// Remove a model (tombstone its LOD chain) from the resident sprite
3518    /// registry — the counterpart to [`Self::add_sprite_model`]. Frees its
3519    /// `colors`/`dirs` space for reuse by a later add; the smaller
3520    /// `occupancy`/`color_offsets` holes are reclaimed by
3521    /// [`Self::compact_sprite_models`]. Entry / chain ids stay stable, so
3522    /// other models' `chain_id`s remain valid.
3523    ///
3524    /// Instances of the removed model keep their slots but draw as nothing
3525    /// until the caller drops them via [`Self::remove_sprite_instance`].
3526    /// No-op if `chain_id` is unknown / already removed / no registry.
3527    pub fn remove_sprite_model(&mut self, chain_id: u32) {
3528        if let Some(reg) = self.sprite_registry.as_mut() {
3529            reg.remove_model(chain_id);
3530        }
3531    }
3532
3533    /// Reclaim the holes left by [`Self::remove_sprite_model`] by rebuilding
3534    /// the shared volume buffers from the live models only. `registry` must
3535    /// be the resident one. Cost is O(live volume) — call it when
3536    /// [`Self::dead_sprite_model_count`] is high (e.g. exceeds the live
3537    /// count), not every frame. No-op if no registry is resident.
3538    pub fn compact_sprite_models(&mut self, registry: &sprite_model::SpriteModelRegistry) {
3539        if let Some(reg) = self.sprite_registry.as_mut() {
3540            reg.compact(&self.device, &self.queue, registry);
3541        }
3542    }
3543
3544    /// Number of live (non-removed) sprite models (0 if none uploaded).
3545    #[must_use]
3546    pub fn sprite_model_count(&self) -> usize {
3547        self.sprite_registry
3548            .as_ref()
3549            .map_or(0, sprite_model::SpriteRegistryResident::live_model_count)
3550    }
3551
3552    /// Number of removed-but-not-yet-compacted sprite models — the
3553    /// fragmentation signal for deciding when to call
3554    /// [`Self::compact_sprite_models`].
3555    #[must_use]
3556    pub fn dead_sprite_model_count(&self) -> usize {
3557        self.sprite_registry
3558            .as_ref()
3559            .map_or(0, sprite_model::SpriteRegistryResident::dead_model_count)
3560    }
3561
3562    /// Number of resident sprite instances (0 if none uploaded).
3563    #[must_use]
3564    pub fn sprite_instance_count(&self) -> usize {
3565        self.sprite_registry
3566            .as_ref()
3567            .map_or(0, sprite_model::SpriteRegistryResident::instance_count)
3568    }
3569
3570    /// Re-pose the already-resident sprite instances in place (no model
3571    /// volume re-upload) — the cheap per-frame path for animated KFA
3572    /// limbs. `instances` must match the last [`Self::set_sprite_instances`]
3573    /// in length + order. No-op if no sprite registry is resident.
3574    pub fn update_sprite_instance_transforms(
3575        &mut self,
3576        instances: &[sprite_model::SpriteInstance],
3577    ) {
3578        if let Some(reg) = self.sprite_registry.as_mut() {
3579            reg.update_transforms(instances);
3580        }
3581    }
3582
3583    /// GPU.12 incremental — re-upload only LOD chain `chain_id`'s entries
3584    /// after an in-place edit of `registry` (carve / recolour), without
3585    /// rebuilding the whole sprite registry. `registry` must be the one
3586    /// last passed to [`Self::set_sprite_instances`] with chain
3587    /// `chain_id` already edited. No-op if no registry is resident.
3588    pub fn update_sprite_model(
3589        &mut self,
3590        registry: &sprite_model::SpriteModelRegistry,
3591        chain_id: u32,
3592    ) {
3593        if let Some(reg) = self.sprite_registry.as_mut() {
3594            reg.update_model(&self.device, &self.queue, registry, chain_id);
3595        }
3596    }
3597
3598    /// VCL.2 — repoint sprite instance `index` at LOD chain `chain_id`
3599    /// (the per-frame flipbook step for animated voxel clips). `registry`
3600    /// is the resident one; `chain_id`'s volume must already be uploaded
3601    /// (e.g. a clip's frames registered via [`Self::add_sprite_model`]).
3602    /// CPU-side rewrite picked up by the next frame's cull — no volume
3603    /// re-upload. No-op if no registry is resident.
3604    pub fn set_sprite_instance_model(
3605        &mut self,
3606        registry: &sprite_model::SpriteModelRegistry,
3607        index: usize,
3608        chain_id: u32,
3609    ) {
3610        if let Some(reg) = self.sprite_registry.as_mut() {
3611            reg.set_instance_model(registry, index, chain_id);
3612        }
3613    }
3614
3615    /// Set the per-instance `kv6colmul[256]` lighting tables (voxlap's
3616    /// `update_reflects` output, e.g. via `roxlap_core::sprite::
3617    /// sprite_colmul`), in the same order/length as the last
3618    /// [`Self::set_sprite_instances`]. The GPU sprite pass modulates each
3619    /// voxel by its surface normal's entry — matching the CPU rasteriser.
3620    /// No-op if no sprite registry is resident.
3621    pub fn set_sprite_instance_colmul(&mut self, tables: &[[u64; 256]]) {
3622        if let Some(reg) = self.sprite_registry.as_mut() {
3623            reg.set_instance_colmul(tables);
3624        }
3625    }
3626
3627    /// GPU.10.4 — set the LOD pixel threshold: a sprite steps to the
3628    /// next mip once a mip-0 voxel would project below `px` screen
3629    /// pixels. `1.0` is the natural "no sub-pixel voxels" default;
3630    /// larger values force LOD in closer (useful for inspection).
3631    /// Clamped to ≥ 0.25.
3632    pub fn set_sprite_lod_px(&mut self, px: f32) {
3633        self.sprite_lod_px = px.max(0.25);
3634    }
3635
3636    /// GPU.11.1 — set the scene-grid LOD scan distance (world units).
3637    /// A chunk entered at world-t `t` is marched at mip
3638    /// `floor(log2(max(t, msd) / msd))`, clamped to its grid's mip
3639    /// ladder. `0` disables LOD (always mip-0). Larger values push
3640    /// the coarser mips farther out — the axis-aligned-mip-beams
3641    /// mitigation lever (GPU.11.2). Default 64 (matches CPU
3642    /// `mip_scan_dist`).
3643    pub fn set_scene_mip_scan_dist(&mut self, dist: f32) {
3644        self.scene_mip_scan_dist = dist.max(0.0);
3645    }
3646
3647    /// Set per-face grid side-shading — voxlap's
3648    /// `setsideshades(top, bot, left, right, up, down)`. Each value is
3649    /// subtracted (as a u8, matching the CPU `gcsub` high byte) from a
3650    /// hit voxel's brightness byte before shading, so the scene-DDA pass
3651    /// darkens grid faces the same way the CPU rasteriser does. `[0; 6]`
3652    /// disables it (the default). The hit face is taken from the DDA's
3653    /// last-stepped axis + ray direction.
3654    pub fn set_scene_side_shades(&mut self, s: [i8; 6]) {
3655        // Reinterpret each i8 as u8 (voxlap stamps `sxx` into gcsub's
3656        // high byte verbatim), then pack (top, bot, left, right) /
3657        // (up, down, 0, 0) for the two uniform vec4s.
3658        let v = |i: usize| i32::from(s[i] as u8);
3659        self.scene_side_shades = [[v(0), v(1), v(2), v(3)], [v(4), v(5), 0, 0]];
3660    }
3661
3662    /// GPU.10.1 — build the instanced model-DDA pipeline (one thread
3663    /// per pixel). Lazily invoked the first frame a registry is present.
3664    fn build_sprite_model_dda(&self) -> SpriteModelDdaResources {
3665        let shader = self
3666            .device
3667            .create_shader_module(wgpu::ShaderModuleDescriptor {
3668                label: Some("sprite_model_dda.wgsl"),
3669                source: wgpu::ShaderSource::Wgsl(
3670                    include_str!("../shaders/sprite_model_dda.wgsl").into(),
3671                ),
3672            });
3673        let bgl = self
3674            .device
3675            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
3676                label: Some("roxlap-gpu sprite_model_dda.bgl"),
3677                entries: &[
3678                    bgl_uniform_entry(0),
3679                    bgl_storage_entry(1, true),  // occupancy
3680                    bgl_storage_entry(2, true),  // colors
3681                    bgl_storage_entry(3, true),  // color_offsets
3682                    bgl_storage_entry(4, true),  // model_meta
3683                    bgl_storage_entry(5, true),  // instances
3684                    bgl_storage_entry(6, true),  // scene depth
3685                    bgl_storage_entry(7, false), // framebuffer (read-write buffer)
3686                    bgl_storage_entry(8, true),  // tile_ranges
3687                    bgl_storage_entry(9, true),  // tile_instances
3688                    bgl_storage_entry(10, true), // per-voxel dir
3689                    bgl_storage_entry(11, true), // per-instance kv6colmul
3690                    bgl_storage_entry(12, true), // TV — material palette
3691                    bgl_storage_entry(13, true), // TV.3 — per-voxel material id
3692                ],
3693            });
3694        let pl = self
3695            .device
3696            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
3697                label: Some("roxlap-gpu sprite_model_dda.layout"),
3698                bind_group_layouts: &[Some(&bgl)],
3699                immediate_size: 0,
3700            });
3701        let pipeline = self
3702            .device
3703            .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
3704                label: Some("roxlap-gpu sprite_model_dda.pipeline"),
3705                layout: Some(&pl),
3706                module: &shader,
3707                entry_point: Some("march"),
3708                compilation_options: wgpu::PipelineCompilationOptions::default(),
3709                cache: None,
3710            });
3711        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
3712            label: Some("roxlap-gpu sprite_model_dda.uniform"),
3713            size: std::mem::size_of::<SpriteModelUniform>() as u64,
3714            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
3715            mapped_at_creation: false,
3716        });
3717        // TV — material palette, seeded from the current renderer state so a
3718        // table defined before the sprite pass was built still takes effect.
3719        let materials_buf = {
3720            use wgpu::util::DeviceExt;
3721            self.device
3722                .create_buffer_init(&wgpu::util::BufferInitDescriptor {
3723                    label: Some("roxlap-gpu sprite_model_dda.materials"),
3724                    contents: bytemuck::cast_slice(self.sprite_materials.as_slice()),
3725                    usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
3726                })
3727        };
3728        SpriteModelDdaResources {
3729            bgl,
3730            pipeline,
3731            uniform_buf,
3732            materials_buf,
3733        }
3734    }
3735
3736    /// TV — set the global voxel-material palette for the GPU sprite pass.
3737    /// Mirrors the renderer's [`MaterialTable`](roxlap_formats::material::MaterialTable):
3738    /// every sprite/clip instance's `material` id indexes it for opacity +
3739    /// blend mode. Cheap (2 KB); call it whenever the palette changes (or
3740    /// each frame). While every material is opaque the shader stays on the
3741    /// unchanged first-hit path.
3742    pub fn set_sprite_materials(&mut self, table: &roxlap_formats::material::MaterialTable) {
3743        let (palette, any_translucent) = material_palette(table);
3744        self.sprite_materials = palette;
3745        self.sprite_has_translucent = any_translucent;
3746        if let Some(smd) = &self.sprite_model_dda {
3747            self.queue.write_buffer(
3748                &smd.materials_buf,
3749                0,
3750                bytemuck::cast_slice(self.sprite_materials.as_slice()),
3751            );
3752        }
3753    }
3754
3755    /// TV.6 — set the scene (terrain) material palette + colour→material map
3756    /// for the multi-grid scene pass. Matching-colour terrain voxels render
3757    /// translucent; an empty map / all-opaque palette renders unchanged. The
3758    /// map is capped at 256 rows (the fixed buffer size).
3759    pub fn set_scene_terrain_materials(
3760        &mut self,
3761        table: &roxlap_formats::material::MaterialTable,
3762        map: &[(u32, u8)],
3763    ) {
3764        let (palette, _) = material_palette(table);
3765        self.scene_materials = palette;
3766        self.scene_terrain_map = map
3767            .iter()
3768            .take(256)
3769            .map(|&(c, m)| [c & 0x00ff_ffff, u32::from(m)])
3770            .collect();
3771        self.scene_terrain_translucent = map.iter().any(|&(_, m)| !table.get(m).is_opaque());
3772        if let Some(dda) = &self.scene_dda {
3773            self.queue.write_buffer(
3774                &dda.materials_pal_buf,
3775                0,
3776                bytemuck::cast_slice(self.scene_materials.as_slice()),
3777            );
3778            if !self.scene_terrain_map.is_empty() {
3779                self.queue.write_buffer(
3780                    &dda.terrain_map_buf,
3781                    0,
3782                    bytemuck::cast_slice(&self.scene_terrain_map),
3783                );
3784            }
3785        }
3786    }
3787}
3788
3789/// GPU.11 — headless scene-DDA renderer for tests + offline visual
3790/// gates. Owns the `scene_dda.wgsl` compute pipeline with no surface
3791/// and no blit pass; renders a [`GpuSceneResident`] to an in-memory
3792/// RGBA framebuffer via texture readback. The per-substage visual
3793/// gate (render reference scenes, diff PPMs) and the GPU.11.1 mip
3794/// render-diff both ride on this.
3795pub struct HeadlessSceneRenderer {
3796    width: u32,
3797    height: u32,
3798    /// Framebuffer storage buffer (packed `rgba8unorm`, tight rows) —
3799    /// matches the buffer-output `scene_dda.wgsl` (see its note).
3800    framebuffer: wgpu::Buffer,
3801    depth_buffer: wgpu::Buffer,
3802    uniform_buf: wgpu::Buffer,
3803    _sky_texture: wgpu::Texture,
3804    sky_view: wgpu::TextureView,
3805    sky_sampler: wgpu::Sampler,
3806    bgl: wgpu::BindGroupLayout,
3807    pipeline: wgpu::ComputePipeline,
3808    readback: wgpu::Buffer,
3809    /// Per-face side-shades for the gate render (default none). Packed
3810    /// `[(top,bot,left,right), (up,down,_,_)]`; set via
3811    /// [`Self::set_side_shades`].
3812    side_shades: [[i32; 4]; 2],
3813}
3814
3815impl HeadlessSceneRenderer {
3816    /// Build the compute pipeline + output/readback resources for a
3817    /// `width × height` framebuffer. Validates `scene_dda.wgsl` and
3818    /// the [`scene::GridStaticMeta`] std430 layout at pipeline /
3819    /// bind-group time.
3820    #[must_use]
3821    pub fn new(device: &wgpu::Device, queue: &wgpu::Queue, width: u32, height: u32) -> Self {
3822        let framebuffer = device.create_buffer(&wgpu::BufferDescriptor {
3823            label: Some("roxlap-gpu headless.framebuffer"),
3824            size: u64::from(width) * u64::from(height) * 4,
3825            usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
3826            mapped_at_creation: false,
3827        });
3828
3829        let uniform_buf = device.create_buffer(&wgpu::BufferDescriptor {
3830            label: Some("roxlap-gpu headless.uniform"),
3831            size: std::mem::size_of::<SceneDdaUniform>() as u64,
3832            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
3833            mapped_at_creation: false,
3834        });
3835        let depth_buffer = device.create_buffer(&wgpu::BufferDescriptor {
3836            label: Some("roxlap-gpu headless.depth"),
3837            size: u64::from(width) * u64::from(height) * 4,
3838            usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
3839            mapped_at_creation: false,
3840        });
3841
3842        let default_sky_pixel = [120u8, 150, 220, 255];
3843        let (sky_texture, sky_view) = create_sky_texture(device, 1, 1, &default_sky_pixel);
3844        // Upload the default sky texel (create_sky_texture only allocates
3845        // — the texel must be written or the shader samples black, which
3846        // is why a grid-less headless render came back black).
3847        queue.write_texture(
3848            wgpu::TexelCopyTextureInfo {
3849                texture: &sky_texture,
3850                mip_level: 0,
3851                origin: wgpu::Origin3d::ZERO,
3852                aspect: wgpu::TextureAspect::All,
3853            },
3854            &default_sky_pixel,
3855            wgpu::TexelCopyBufferLayout {
3856                offset: 0,
3857                bytes_per_row: Some(4),
3858                rows_per_image: Some(1),
3859            },
3860            wgpu::Extent3d {
3861                width: 1,
3862                height: 1,
3863                depth_or_array_layers: 1,
3864            },
3865        );
3866        let sky_sampler = device.create_sampler(&wgpu::SamplerDescriptor {
3867            label: Some("roxlap-gpu headless.sky_sampler"),
3868            address_mode_u: wgpu::AddressMode::Repeat,
3869            address_mode_v: wgpu::AddressMode::Repeat,
3870            mag_filter: wgpu::FilterMode::Linear,
3871            min_filter: wgpu::FilterMode::Linear,
3872            ..Default::default()
3873        });
3874
3875        let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
3876            label: Some("scene_dda.wgsl (headless)"),
3877            source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/scene_dda.wgsl").into()),
3878        });
3879        let bgl = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
3880            label: Some("roxlap-gpu headless.bgl"),
3881            entries: &[
3882                bgl_uniform_entry(0),
3883                bgl_storage_entry(1, true),
3884                bgl_storage_entry(2, true),
3885                bgl_storage_entry(3, true),
3886                bgl_storage_entry(4, true),
3887                bgl_storage_entry(5, true),
3888                bgl_storage_entry(6, true),
3889                bgl_storage_entry(7, true),
3890                // Framebuffer storage buffer (read-write).
3891                bgl_storage_entry(8, false),
3892                wgpu::BindGroupLayoutEntry {
3893                    binding: 9,
3894                    visibility: wgpu::ShaderStages::COMPUTE,
3895                    ty: wgpu::BindingType::Texture {
3896                        sample_type: wgpu::TextureSampleType::Float { filterable: true },
3897                        view_dimension: wgpu::TextureViewDimension::D2,
3898                        multisampled: false,
3899                    },
3900                    count: None,
3901                },
3902                wgpu::BindGroupLayoutEntry {
3903                    binding: 10,
3904                    visibility: wgpu::ShaderStages::COMPUTE,
3905                    ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
3906                    count: None,
3907                },
3908                bgl_storage_entry(11, false),
3909                bgl_storage_entry(12, true),
3910                bgl_storage_entry(13, true),
3911                bgl_storage_entry(14, true),
3912                // Per-grid cameras (runtime-sized; one per grid).
3913                bgl_storage_entry(15, true),
3914                // TV.6 — material palette + terrain map (opaque dummies here).
3915                bgl_storage_entry(16, true),
3916                bgl_storage_entry(17, true),
3917            ],
3918        });
3919        let pl = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
3920            label: Some("roxlap-gpu headless.layout"),
3921            bind_group_layouts: &[Some(&bgl)],
3922            immediate_size: 0,
3923        });
3924        let pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
3925            label: Some("roxlap-gpu headless.pipeline"),
3926            layout: Some(&pl),
3927            module: &shader,
3928            entry_point: Some("render_scene"),
3929            compilation_options: wgpu::PipelineCompilationOptions::default(),
3930            cache: None,
3931        });
3932
3933        // Readback is a tight buffer-to-buffer copy (no 256-byte row
3934        // padding, unlike the old texture-to-buffer path).
3935        let readback = device.create_buffer(&wgpu::BufferDescriptor {
3936            label: Some("roxlap-gpu headless.readback"),
3937            size: u64::from(width) * u64::from(height) * 4,
3938            usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
3939            mapped_at_creation: false,
3940        });
3941
3942        Self {
3943            width,
3944            height,
3945            framebuffer,
3946            depth_buffer,
3947            uniform_buf,
3948            _sky_texture: sky_texture,
3949            sky_view,
3950            sky_sampler,
3951            bgl,
3952            pipeline,
3953            readback,
3954            side_shades: [[0; 4]; 2],
3955        }
3956    }
3957
3958    /// Set per-face side-shades for subsequent [`Self::render`] calls —
3959    /// voxlap `setsideshades(top, bot, left, right, up, down)`, each an
3960    /// i8 stamped as u8 (matching the engine path). Lets the gate test
3961    /// the GPU side-shade darkening.
3962    pub fn set_side_shades(&mut self, s: [i8; 6]) {
3963        let v = |i: usize| i32::from(s[i] as u8);
3964        self.side_shades = [[v(0), v(1), v(2), v(3)], [v(4), v(5), 0, 0]];
3965    }
3966
3967    /// Render `scene` from `cameras` (one per grid) and read the
3968    /// framebuffer back as `width*height` packed `0xAABBGGRR` pixels
3969    /// (R in the low byte). Fog is disabled. `mip_scan_dist` drives
3970    /// the GPU.11.1 scene-grid LOD (`0` = always mip-0). Blocks on
3971    /// readback.
3972    ///
3973    /// # Panics
3974    /// If `cameras.len() != scene.grid_count`.
3975    #[must_use]
3976    #[allow(clippy::too_many_arguments)]
3977    pub fn render(
3978        &self,
3979        device: &wgpu::Device,
3980        queue: &wgpu::Queue,
3981        scene: &GpuSceneResident,
3982        cameras: &[Camera],
3983        fov_y_rad: f32,
3984        max_outer_steps: u32,
3985        mip_scan_dist: f32,
3986    ) -> Vec<u32> {
3987        assert_eq!(
3988            cameras.len(),
3989            scene.grid_count as usize,
3990            "headless render: {} cameras for {} grids",
3991            cameras.len(),
3992            scene.grid_count,
3993        );
3994
3995        let cam_vec: Vec<SceneDdaPerGridCamera> = cameras
3996            .iter()
3997            .map(SceneDdaPerGridCamera::from_camera)
3998            .collect();
3999        let grid_cameras = upload_grid_cameras(device, &cam_vec);
4000        // TV.6 — opaque dummies for the material palette + terrain map
4001        // bindings (headless renders opaque-only: terrain_has_translucent=0).
4002        let (dummy_pal, dummy_map) = {
4003            use wgpu::util::DeviceExt;
4004            let pal: Vec<MaterialGpu> = vec![
4005                MaterialGpu {
4006                    alpha: 1.0,
4007                    mode: 0
4008                };
4009                256
4010            ];
4011            let p = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
4012                label: Some("roxlap-gpu headless.materials_pal"),
4013                contents: bytemuck::cast_slice(&pal),
4014                usage: wgpu::BufferUsages::STORAGE,
4015            });
4016            let m = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
4017                label: Some("roxlap-gpu headless.terrain_map"),
4018                contents: bytemuck::cast_slice(&[[0u32; 2]]),
4019                usage: wgpu::BufferUsages::STORAGE,
4020            });
4021            (p, m)
4022        };
4023        let uniform = SceneDdaUniform {
4024            fov_y_rad,
4025            grid_count: scene.grid_count,
4026            max_outer_steps,
4027            _pad0: 0,
4028            screen_size: [self.width, self.height],
4029            _pad1: [0; 2],
4030            // Fog off: near/far past any reachable t → factor 0.
4031            fog_color: [0.0, 0.0, 0.0, 1.0e29],
4032            fog_far: 1.0e30,
4033            write_depth: 0,
4034            occ_page_words: scene.occupancy_page_words,
4035            occ_num_pages: scene.occupancy_num_pages,
4036            mip_scan_dist,
4037            terrain_has_translucent: 0, // headless gate: opaque only
4038            terrain_map_count: 0,
4039            _pad4: 0,
4040            // Sky direction from the first grid camera (the world frame
4041            // in these tests); a default forward camera when there are
4042            // none (grid_count == 0) so the sky lookup stays valid.
4043            sky_cam: SceneDdaPerGridCamera::from_camera(&cameras.first().copied().unwrap_or(
4044                Camera {
4045                    position: [0.0; 3],
4046                    right: [1.0, 0.0, 0.0],
4047                    down: [0.0, 0.0, 1.0],
4048                    forward: [0.0, 1.0, 0.0],
4049                    fov_y_rad,
4050                },
4051            )),
4052            side_shades0: self.side_shades[0],
4053            side_shades1: self.side_shades[1],
4054        };
4055        queue.write_buffer(&self.uniform_buf, 0, bytemuck::bytes_of(&uniform));
4056
4057        let bg = device.create_bind_group(&wgpu::BindGroupDescriptor {
4058            label: Some("roxlap-gpu headless.bg"),
4059            layout: &self.bgl,
4060            entries: &[
4061                wgpu::BindGroupEntry {
4062                    binding: 0,
4063                    resource: self.uniform_buf.as_entire_binding(),
4064                },
4065                wgpu::BindGroupEntry {
4066                    binding: 1,
4067                    resource: scene.occupancy_pages[0].as_entire_binding(),
4068                },
4069                wgpu::BindGroupEntry {
4070                    binding: 2,
4071                    resource: scene.all_color_offsets.as_entire_binding(),
4072                },
4073                wgpu::BindGroupEntry {
4074                    binding: 3,
4075                    resource: scene.all_colors.as_entire_binding(),
4076                },
4077                wgpu::BindGroupEntry {
4078                    binding: 4,
4079                    resource: scene.all_chunk_colors_base.as_entire_binding(),
4080                },
4081                wgpu::BindGroupEntry {
4082                    binding: 5,
4083                    resource: scene.all_chunk_occupancy.as_entire_binding(),
4084                },
4085                wgpu::BindGroupEntry {
4086                    binding: 6,
4087                    resource: scene.grid_static_meta.as_entire_binding(),
4088                },
4089                wgpu::BindGroupEntry {
4090                    binding: 7,
4091                    resource: scene.all_slot_chunk_idx.as_entire_binding(),
4092                },
4093                wgpu::BindGroupEntry {
4094                    binding: 8,
4095                    resource: self.framebuffer.as_entire_binding(),
4096                },
4097                wgpu::BindGroupEntry {
4098                    binding: 9,
4099                    resource: wgpu::BindingResource::TextureView(&self.sky_view),
4100                },
4101                wgpu::BindGroupEntry {
4102                    binding: 10,
4103                    resource: wgpu::BindingResource::Sampler(&self.sky_sampler),
4104                },
4105                wgpu::BindGroupEntry {
4106                    binding: 11,
4107                    resource: self.depth_buffer.as_entire_binding(),
4108                },
4109                wgpu::BindGroupEntry {
4110                    binding: 12,
4111                    resource: scene.occupancy_pages[1].as_entire_binding(),
4112                },
4113                wgpu::BindGroupEntry {
4114                    binding: 13,
4115                    resource: scene.occupancy_pages[2].as_entire_binding(),
4116                },
4117                wgpu::BindGroupEntry {
4118                    binding: 14,
4119                    resource: scene.occupancy_pages[3].as_entire_binding(),
4120                },
4121                wgpu::BindGroupEntry {
4122                    binding: 15,
4123                    resource: grid_cameras.as_entire_binding(),
4124                },
4125                wgpu::BindGroupEntry {
4126                    binding: 16,
4127                    resource: dummy_pal.as_entire_binding(),
4128                },
4129                wgpu::BindGroupEntry {
4130                    binding: 17,
4131                    resource: dummy_map.as_entire_binding(),
4132                },
4133            ],
4134        });
4135
4136        let mut enc =
4137            device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
4138        {
4139            let mut pass = enc.begin_compute_pass(&wgpu::ComputePassDescriptor {
4140                label: Some("roxlap-gpu headless.pass"),
4141                timestamp_writes: None,
4142            });
4143            pass.set_pipeline(&self.pipeline);
4144            pass.set_bind_group(0, &bg, &[]);
4145            pass.dispatch_workgroups(self.width.div_ceil(8), self.height.div_ceil(8), 1);
4146        }
4147        enc.copy_buffer_to_buffer(
4148            &self.framebuffer,
4149            0,
4150            &self.readback,
4151            0,
4152            u64::from(self.width) * u64::from(self.height) * 4,
4153        );
4154        queue.submit(Some(enc.finish()));
4155
4156        let slice = self.readback.slice(..);
4157        let (tx, rx) = std::sync::mpsc::channel();
4158        slice.map_async(wgpu::MapMode::Read, move |r| {
4159            let _ = tx.send(r);
4160        });
4161        device.poll(wgpu::PollType::wait_indefinitely()).ok();
4162        rx.recv().expect("map_async channel").expect("map_async");
4163
4164        let data = slice.get_mapped_range();
4165        // Tight `width*height` packed pixels — the shader's
4166        // `pack4x8unorm(vec4(r,g,b,a))` already yields `0xAABBGGRR`
4167        // little-endian, so a straight u32 read reconstructs each pixel.
4168        let out: Vec<u32> = data
4169            .chunks_exact(4)
4170            .map(|px| u32::from_le_bytes([px[0], px[1], px[2], px[3]]))
4171            .collect();
4172        drop(data);
4173        self.readback.unmap();
4174        out
4175    }
4176}
4177
4178fn bgl_uniform_entry(binding: u32) -> wgpu::BindGroupLayoutEntry {
4179    wgpu::BindGroupLayoutEntry {
4180        binding,
4181        visibility: wgpu::ShaderStages::COMPUTE,
4182        ty: wgpu::BindingType::Buffer {
4183            ty: wgpu::BufferBindingType::Uniform,
4184            has_dynamic_offset: false,
4185            min_binding_size: None,
4186        },
4187        count: None,
4188    }
4189}
4190
4191fn bgl_storage_entry(binding: u32, read_only: bool) -> wgpu::BindGroupLayoutEntry {
4192    wgpu::BindGroupLayoutEntry {
4193        binding,
4194        visibility: wgpu::ShaderStages::COMPUTE,
4195        ty: wgpu::BindingType::Buffer {
4196            ty: wgpu::BufferBindingType::Storage { read_only },
4197            has_dynamic_offset: false,
4198            min_binding_size: None,
4199        },
4200        count: None,
4201    }
4202}
4203
4204/// Create a fresh sky panorama texture sized `width × height` with
4205/// the initial pixel data uploaded via `write_texture`. Used by
4206/// `GpuRenderer::new` (1×1 default) and `set_sky_panorama` (host-
4207/// supplied panorama).
4208fn create_sky_texture(
4209    device: &wgpu::Device,
4210    width: u32,
4211    height: u32,
4212    _initial_pixels: &[u8],
4213) -> (wgpu::Texture, wgpu::TextureView) {
4214    let tex = device.create_texture(&wgpu::TextureDescriptor {
4215        label: Some("roxlap-gpu sky_texture"),
4216        size: wgpu::Extent3d {
4217            width,
4218            height,
4219            depth_or_array_layers: 1,
4220        },
4221        mip_level_count: 1,
4222        sample_count: 1,
4223        dimension: wgpu::TextureDimension::D2,
4224        format: wgpu::TextureFormat::Rgba8Unorm,
4225        usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST,
4226        view_formats: &[],
4227    });
4228    let view = tex.create_view(&wgpu::TextureViewDescriptor::default());
4229    (tex, view)
4230}
4231
4232/// GPU.4 needs to upload a whole grid (~hundreds of MiB) as a few
4233/// storage buffers. wgpu's default `max_storage_buffer_binding_size`
4234/// is 128 MiB, which is just enough for the demo's 32×32 ground
4235/// occupancy (~128 MiB) but not the colour array. We request as
4236/// much as the adapter is willing to give — most desktop GPUs cap
4237/// individual storage buffers at 2-4 GiB; iGPUs often offer the
4238/// full system memory.
4239pub(crate) fn pick_required_limits(adapter_limits: &wgpu::Limits) -> wgpu::Limits {
4240    wgpu::Limits {
4241        max_storage_buffer_binding_size: adapter_limits.max_storage_buffer_binding_size,
4242        max_buffer_size: adapter_limits.max_buffer_size,
4243        // Occupancy paging adds up to MAX_OCC_PAGES-1 extra storage
4244        // bindings; with the scene's other buffers + the GPU.9 depth
4245        // buffer the scene_dda stage needs ~11. The default cap is 8.
4246        // Both NVK and lavapipe advertise ≫16, so request 16.
4247        max_storage_buffers_per_shader_stage: adapter_limits
4248            .max_storage_buffers_per_shader_stage
4249            .min(16),
4250        ..wgpu::Limits::default()
4251    }
4252}
4253
4254fn pick_present_mode(modes: &[wgpu::PresentMode]) -> wgpu::PresentMode {
4255    // Prefer Mailbox > Immediate > Fifo. Fifo is the universal
4256    // fallback and the only one Wayland-on-Mesa always offers.
4257    for &m in &[wgpu::PresentMode::Mailbox, wgpu::PresentMode::Immediate] {
4258        if modes.contains(&m) {
4259            return m;
4260        }
4261    }
4262    wgpu::PresentMode::Fifo
4263}
4264
4265/// World-space view-ray direction (un-normalised) for window pixel
4266/// `(x, y)` under a vertical-FOV pinhole — the projection
4267/// `scene_dda.wgsl`'s `render_scene` uses. Shared by
4268/// [`GpuRenderer::pixel_ray`]; standalone so it's unit-testable without
4269/// a device. `right`/`down`/`forward` are the camera basis.
4270#[must_use]
4271#[allow(clippy::too_many_arguments)]
4272pub fn pinhole_pixel_ray(
4273    right: [f64; 3],
4274    down: [f64; 3],
4275    forward: [f64; 3],
4276    x: f64,
4277    y: f64,
4278    w: f64,
4279    h: f64,
4280    fov_y_rad: f64,
4281) -> [f64; 3] {
4282    let half_h = (fov_y_rad * 0.5).tan();
4283    let half_w = half_h * (w / h);
4284    let ndc_x = (x + 0.5) / w * 2.0 - 1.0;
4285    let ndc_y_top = 1.0 - (y + 0.5) / h * 2.0;
4286    let (kx, ky) = (ndc_x * half_w, ndc_y_top * half_h);
4287    [
4288        forward[0] + kx * right[0] - ky * down[0],
4289        forward[1] + kx * right[1] - ky * down[1],
4290        forward[2] + kx * right[2] - ky * down[2],
4291    ]
4292}
4293
4294#[cfg(test)]
4295mod pixel_ray_tests {
4296    use super::pinhole_pixel_ray;
4297
4298    const RIGHT: [f64; 3] = [1.0, 0.0, 0.0];
4299    const DOWN: [f64; 3] = [0.0, 1.0, 0.0];
4300    const FWD: [f64; 3] = [0.0, 0.0, 1.0]; // voxlap z-down "look down"
4301
4302    // Frame centre (NDC 0,0) points straight along `forward`.
4303    #[test]
4304    fn centre_pixel_is_forward() {
4305        let d = pinhole_pixel_ray(
4306            RIGHT,
4307            DOWN,
4308            FWD,
4309            639.5,
4310            359.5,
4311            1280.0,
4312            720.0,
4313            60_f64.to_radians(),
4314        );
4315        assert!(
4316            d[0].abs() < 1e-9 && d[1].abs() < 1e-9,
4317            "centre ≈ forward, got {d:?}"
4318        );
4319        assert!((d[2] - 1.0).abs() < 1e-9);
4320    }
4321
4322    // Right edge pixel tilts +right by tan(hfov/2); the lateral
4323    // component equals half_w = tan(fov_y/2)*aspect at the very edge.
4324    #[test]
4325    fn right_edge_tilts_by_half_w() {
4326        let fov = 60_f64.to_radians();
4327        let d = pinhole_pixel_ray(RIGHT, DOWN, FWD, 1279.5, 359.5, 1280.0, 720.0, fov);
4328        let half_w = (fov * 0.5).tan() * (1280.0 / 720.0);
4329        assert!((d[0] - half_w).abs() < 1e-6, "x={}, half_w={half_w}", d[0]);
4330        assert!(d[0] > 0.0, "right edge tilts +right");
4331    }
4332
4333    /// Statically validate every WGSL shader with naga (the same
4334    /// front-end + validator wgpu runs at pipeline creation), so shader
4335    /// edits — e.g. the GPU.10 sprite lighting bindings — are caught in
4336    /// CI without needing a GPU device.
4337    #[test]
4338    fn wgsl_shaders_validate() {
4339        let shaders: &[(&str, &str)] = &[
4340            (
4341                "sprite_model_dda.wgsl",
4342                include_str!("../shaders/sprite_model_dda.wgsl"),
4343            ),
4344            ("scene_dda.wgsl", include_str!("../shaders/scene_dda.wgsl")),
4345            ("blit.wgsl", include_str!("../shaders/blit.wgsl")),
4346            ("chunk_dda.wgsl", include_str!("../shaders/chunk_dda.wgsl")),
4347            ("grid_dda.wgsl", include_str!("../shaders/grid_dda.wgsl")),
4348            (
4349                "scene_blit.wgsl",
4350                include_str!("../shaders/scene_blit.wgsl"),
4351            ),
4352            ("line.wgsl", include_str!("../shaders/line.wgsl")),
4353            ("image.wgsl", include_str!("../shaders/image.wgsl")),
4354        ];
4355        let mut validator = naga::valid::Validator::new(
4356            naga::valid::ValidationFlags::all(),
4357            naga::valid::Capabilities::all(),
4358        );
4359        for (name, src) in shaders {
4360            let module = naga::front::wgsl::parse_str(src).unwrap_or_else(|e| {
4361                panic!("{name}: WGSL parse failed:\n{}", e.emit_to_string(src))
4362            });
4363            validator
4364                .validate(&module)
4365                .unwrap_or_else(|e| panic!("{name}: WGSL validation failed: {e:?}"));
4366        }
4367    }
4368
4369    /// A 2×2 world quad centred straight ahead projects to vertices whose
4370    /// homogeneous `w` equals the camera-forward distance (so the shader's
4371    /// `clip = ndc·w` recovers perspective-correct UVs) and whose `depth`
4372    /// is the euclidean range. Verifies geometry without a GPU device.
4373    #[test]
4374    fn image_vertices_carry_forward_w_and_euclidean_depth() {
4375        let cam = crate::GpuLineCamera {
4376            pos: [0.0, 0.0, 0.0],
4377            right: [1.0, 0.0, 0.0],
4378            down: [0.0, 1.0, 0.0],
4379            forward: [0.0, 0.0, 1.0],
4380        };
4381        // Quad 10 units ahead (forward = +Z), spanning x∈[-1,1], y∈[-1,1].
4382        let quad = crate::GpuImageQuad {
4383            corners: [
4384                [-1.0, -1.0, 10.0], // TL
4385                [1.0, -1.0, 10.0],  // TR
4386                [-1.0, 1.0, 10.0],  // BL
4387                [1.0, 1.0, 10.0],   // BR
4388            ],
4389            image: 0,
4390            tint: [1.0, 1.0, 1.0, 1.0],
4391            depth_test: true,
4392            alpha_cutoff: 0.0,
4393        };
4394        let verts = crate::build_image_vertices(&cam, &quad, 800, 600, 60_f32.to_radians(), false);
4395        assert_eq!(verts.len(), 6, "two triangles, no near-clip");
4396        for v in &verts {
4397            assert!((v.w - 10.0).abs() < 1e-4, "w == forward distance");
4398            assert!(v.depth >= 10.0, "euclidean depth >= forward distance");
4399            assert_eq!(v.depth_test, 1.0);
4400        }
4401    }
4402}