Skip to main content

roxlap_gpu/
lib.rs

1//! WGPU-backed compute-shader renderer scaffold for the roxlap
2//! voxel engine. GPU.1 in `PORTING-GPU.md`.
3//!
4//! GPU.1's job: stand up the device + surface + swapchain on a
5//! host window (any [`raw-window-handle`](raw_window_handle)
6//! provider), present a clear-to-colour frame each render call,
7//! and give the host a one-call opt-in. No voxel marching yet — the
8//! [`examples/probe.rs`](../examples/probe.rs) standalone holds
9//! the empirical FPS baseline from GPU.0.
10//!
11//! Later sub-substages flesh `GpuRenderer::render` out: GPU.2
12//! uploads voxel data, GPU.3 dispatches the inner-DDA compute
13//! shader, GPU.4 layers in chunk skipping, GPU.5 plugs the renderer
14//! into `roxlap-scene::Scene`, …
15//!
16//! ## Host integration shape (GPU.1)
17//!
18//! ```no_run
19//! use std::sync::Arc;
20//! use roxlap_gpu::{GpuRenderer, GpuRendererSettings};
21//! # use winit::window::Window;
22//! # fn pick(w: Arc<Window>, size: (u32, u32)) -> Option<GpuRenderer> {
23//! match GpuRenderer::new_blocking(w, size, GpuRendererSettings::default()) {
24//!     Ok(r) => Some(r),
25//!     Err(e) => {
26//!         eprintln!("GPU init failed: {e}; falling back to CPU");
27//!         None
28//!     }
29//! }
30//! # }
31//! ```
32
33#![allow(clippy::must_use_candidate, clippy::too_many_lines)]
34
35pub mod camera;
36pub mod decompress;
37pub mod grid;
38// Headless rendering is a native-only test/bench aid: it blocks on
39// `pollster` + `device.poll(Wait)`, neither of which exists on wasm.
40#[cfg(not(target_arch = "wasm32"))]
41pub mod headless;
42pub mod resident;
43pub mod scene;
44pub mod sprite_model;
45
46pub use camera::Camera;
47pub use decompress::{decompress_chunk, ChunkUpload, BEDROCK_RGB, CHUNK_Z};
48pub use grid::{bounding_box_of, GpuGridResident, GridUpload};
49#[cfg(not(target_arch = "wasm32"))]
50pub use headless::HeadlessGpu;
51pub use resident::GpuChunkResident;
52pub use scene::{
53    GpuSceneResident, GridRuntimeTransform, GridStaticMeta, RefreshOutcome, SceneUpload,
54};
55pub use sprite_model::{
56    build_sprite_model, SpriteInstance, SpriteInstanceTransform, SpriteModel, SpriteModelRegistry,
57    SpriteRegistryResident,
58};
59
60use std::sync::Arc;
61
62use bytemuck::{Pod, Zeroable};
63use raw_window_handle::{HasDisplayHandle, HasWindowHandle};
64
65/// Caller-controllable knobs for [`GpuRenderer::new`]. Defaults
66/// target "highest-performance GPU, prefer Mailbox/Immediate over
67/// vsync" — i.e. the same configuration the GPU.0 probe used to
68/// measure the FPS ceiling.
69#[derive(Debug, Clone, Copy)]
70pub struct GpuRendererSettings {
71    pub power_preference: PowerPreference,
72    /// Initial clear colour cycled by GPU.1's empty render path.
73    /// The voxel-rendering substages overwrite this entirely.
74    pub clear_colour: [f64; 3],
75    /// Prefer mailbox/immediate when offered; falls back to FIFO if
76    /// the surface only supports it (Wayland under Mesa often does).
77    pub uncapped_present: bool,
78}
79
80#[derive(Debug, Clone, Copy)]
81pub enum PowerPreference {
82    Low,
83    High,
84}
85
86impl Default for GpuRendererSettings {
87    fn default() -> Self {
88        Self {
89            power_preference: PowerPreference::High,
90            clear_colour: [0.06, 0.08, 0.12],
91            uncapped_present: true,
92        }
93    }
94}
95
96/// Errors `GpuRenderer::new` surfaces to the host. The host's
97/// expected flow is "try this, fall back to the CPU path on Err".
98#[derive(Debug)]
99pub enum GpuInitError {
100    CreateSurface(wgpu::CreateSurfaceError),
101    NoAdapter,
102    RequestDevice(wgpu::RequestDeviceError),
103}
104
105impl std::fmt::Display for GpuInitError {
106    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
107        match self {
108            Self::CreateSurface(e) => write!(f, "create_surface failed: {e}"),
109            Self::NoAdapter => write!(
110                f,
111                "no compatible adapter — does this system have a Vulkan/Metal/DX12 driver?"
112            ),
113            Self::RequestDevice(e) => write!(f, "request_device failed: {e}"),
114        }
115    }
116}
117
118impl std::error::Error for GpuInitError {
119    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
120        match self {
121            Self::CreateSurface(e) => Some(e),
122            Self::RequestDevice(e) => Some(e),
123            Self::NoAdapter => None,
124        }
125    }
126}
127
128impl From<wgpu::CreateSurfaceError> for GpuInitError {
129    fn from(value: wgpu::CreateSurfaceError) -> Self {
130        Self::CreateSurface(value)
131    }
132}
133
134impl From<wgpu::RequestDeviceError> for GpuInitError {
135    fn from(value: wgpu::RequestDeviceError) -> Self {
136        Self::RequestDevice(value)
137    }
138}
139
140/// WGPU-backed renderer. Owns the device, queue, and surface
141/// bound to the host's window. [`Self::render`] is the GPU.1
142/// clear-to-colour path; [`Self::render_chunk`] is GPU.3's
143/// single-chunk DDA marcher.
144///
145/// The window is consumed only at construction — `wgpu`'s
146/// `Surface<'static>` keeps its own `Arc` clone of the handle, so
147/// the renderer holds no window field of its own.
148/// A world-space line segment for [`GpuRenderer::draw_lines_deferred`].
149/// `color` is straight RGBA in `0..=1` (the alpha drives the over-blend);
150/// `width_px` is the screen-space thickness; `depth_test` occludes the
151/// segment behind nearer marched geometry.
152#[derive(Clone, Copy, Debug)]
153pub struct GpuLine {
154    pub a: [f32; 3],
155    pub b: [f32; 3],
156    pub color: [f32; 4],
157    pub width_px: f32,
158    pub depth_test: bool,
159}
160
161/// World camera basis for projecting [`GpuLine`] endpoints — the same
162/// pinhole the scene-DDA pass marches with (`right`/`down`/`forward`
163/// orthonormal, `pos` in world voxel units).
164#[derive(Clone, Copy, Debug)]
165pub struct GpuLineCamera {
166    pub pos: [f32; 3],
167    pub right: [f32; 3],
168    pub down: [f32; 3],
169    pub forward: [f32; 3],
170}
171
172/// Near plane (camera-forward distance) below which a [`GpuLine`] endpoint
173/// is clipped, so the pinhole divide stays finite.
174const LINE_NEAR_Z: f32 = 0.0625;
175/// Depth-test slack (euclidean world distance) so a line resting on the
176/// surface it traces doesn't z-fight the marched geometry.
177const LINE_DEPTH_BIAS: f32 = 0.5;
178
179/// One expanded-quad vertex (`build_line_vertices` output). `pos` is NDC;
180/// `depth` is the euclidean world distance of the source endpoint (the
181/// marcher's `best_t` metric); `depth_test` is `1.0`/`0.0`.
182#[repr(C)]
183#[derive(Clone, Copy, Pod, Zeroable)]
184struct LineVertex {
185    pos: [f32; 2],
186    depth: f32,
187    depth_test: f32,
188    color: [f32; 4],
189}
190
191/// `line.wgsl` / `image.wgsl` fragment uniform (std140; padded to 32 bytes
192/// so the uniform's struct stride is a 16-byte multiple).
193#[repr(C)]
194#[derive(Clone, Copy, Pod, Zeroable)]
195struct LineParams {
196    screen_w: u32,
197    screen_h: u32,
198    depth_bias: f32,
199    no_depth: u32,
200    /// 1 when the viewport flip is on. The depth buffer is written
201    /// unflipped (the blit mirrors at read time), but these passes flip the
202    /// vertex NDC X, so the fragment must mirror its depth lookup to match.
203    flip_x: u32,
204    _pad: [u32; 3],
205}
206
207/// Lazy-built debug-line pipeline (L3.2). The bind group is rebuilt each
208/// draw (it references the current `scene_dda.depth_buffer`, which the
209/// swapchain resize recreates); the pipeline / layout / uniform persist.
210struct LineResources {
211    pipeline: wgpu::RenderPipeline,
212    bgl: wgpu::BindGroupLayout,
213    uniform_buf: wgpu::Buffer,
214    /// 1-word stand-in bound when no scene depth exists (sprite-only /
215    /// empty scene); `no_depth = 1` keeps the shader from indexing it.
216    dummy_depth: wgpu::Buffer,
217}
218
219/// Project + expand world-space [`GpuLine`]s into screen-space quad
220/// vertices (6 per visible segment) for `line.wgsl`. Mirrors the
221/// scene-DDA pinhole (`forward + ndc_x·half_w·right − ndc_y·half_h·down`)
222/// so lines land on the marched geometry, carrying each endpoint's
223/// euclidean world distance as the depth-test key (= the marcher's
224/// `best_t`). Segments fully behind the near plane are dropped; the rest
225/// are clipped to it.
226fn build_line_vertices(
227    cam: &GpuLineCamera,
228    lines: &[GpuLine],
229    w: u32,
230    h: u32,
231    fov_y: f32,
232    flip_x: bool,
233) -> Vec<LineVertex> {
234    let aspect = w as f32 / h as f32;
235    let half_h = (fov_y * 0.5).tan();
236    let half_w = half_h * aspect;
237    let (wf, hf) = (w as f32, h as f32);
238
239    let cam_coords = |p: [f32; 3]| -> [f32; 3] {
240        let d = [p[0] - cam.pos[0], p[1] - cam.pos[1], p[2] - cam.pos[2]];
241        [
242            cam.right[0] * d[0] + cam.right[1] * d[1] + cam.right[2] * d[2],
243            cam.down[0] * d[0] + cam.down[1] * d[1] + cam.down[2] * d[2],
244            cam.forward[0] * d[0] + cam.forward[1] * d[1] + cam.forward[2] * d[2],
245        ]
246    };
247    // Camera-space point → (NDC xy, euclidean depth). NDC y is up (+1 top),
248    // matching WebGPU clip space; depth is the marcher's world-t metric.
249    let project = |q: [f32; 3]| -> ([f32; 2], f32) {
250        let inv = 1.0 / q[2];
251        let nx = q[0] * inv / half_w;
252        let ny = -q[1] * inv / half_h;
253        let depth = (q[0] * q[0] + q[1] * q[1] + q[2] * q[2]).sqrt();
254        ([nx, ny], depth)
255    };
256
257    let mut out = Vec::with_capacity(lines.len() * 6);
258    for line in lines {
259        let ca = cam_coords(line.a);
260        let cb = cam_coords(line.b);
261        let (cfa, cfb) = (ca[2], cb[2]);
262        if cfa < LINE_NEAR_Z && cfb < LINE_NEAR_Z {
263            continue;
264        }
265        // Near-clip in segment-parameter space on the forward component.
266        let (mut t0, mut t1) = (0.0f32, 1.0f32);
267        let dz = cfb - cfa;
268        if dz.abs() > f32::EPSILON {
269            let tn = (LINE_NEAR_Z - cfa) / dz;
270            if dz > 0.0 {
271                t0 = t0.max(tn);
272            } else {
273                t1 = t1.min(tn);
274            }
275        }
276        if t0 > t1 {
277            continue;
278        }
279        let lerp3 = |t: f32| {
280            [
281                ca[0] + (cb[0] - ca[0]) * t,
282                ca[1] + (cb[1] - ca[1]) * t,
283                ca[2] + (cb[2] - ca[2]) * t,
284            ]
285        };
286        let (n0, d0) = project(lerp3(t0));
287        let (n1, d1) = project(lerp3(t1));
288
289        // Expand in pixel space for a uniform screen-space thickness.
290        let to_px = |n: [f32; 2]| [(n[0] * 0.5 + 0.5) * wf, (0.5 - n[1] * 0.5) * hf];
291        let to_ndc = |p: [f32; 2]| [p[0] / wf * 2.0 - 1.0, 1.0 - p[1] / hf * 2.0];
292        let p0 = to_px(n0);
293        let p1 = to_px(n1);
294        let (dx, dy) = (p1[0] - p0[0], p1[1] - p0[1]);
295        let len = (dx * dx + dy * dy).sqrt().max(1e-6);
296        let half = line.width_px.max(1.0) * 0.5;
297        let (ex, ey) = (-dy / len * half, dx / len * half);
298
299        let c0a = to_ndc([p0[0] + ex, p0[1] + ey]);
300        let c0b = to_ndc([p0[0] - ex, p0[1] - ey]);
301        let c1a = to_ndc([p1[0] + ex, p1[1] + ey]);
302        let c1b = to_ndc([p1[0] - ex, p1[1] - ey]);
303        let dt = if line.depth_test { 1.0 } else { 0.0 };
304        // Mirror the overlay's NDC x to match the flipped scene blit.
305        let vert = |pos: [f32; 2], depth: f32| LineVertex {
306            pos: [if flip_x { -pos[0] } else { pos[0] }, pos[1]],
307            depth,
308            depth_test: dt,
309            color: line.color,
310        };
311        // Two triangles, cull disabled so winding is irrelevant.
312        out.push(vert(c0a, d0));
313        out.push(vert(c0b, d0));
314        out.push(vert(c1a, d1));
315        out.push(vert(c1a, d1));
316        out.push(vert(c0b, d0));
317        out.push(vert(c1b, d1));
318    }
319    out
320}
321
322/// A world-space 2D image-sprite quad for [`GpuRenderer::draw_images_deferred`].
323/// `corners` are the four world points `TL, TR, BL, BR` (UVs `(0,0) (1,0)
324/// (0,1) (1,1)`); `image` indexes a texture uploaded via
325/// [`GpuRenderer::upload_image`]; `tint` is straight RGBA in `0..=1`
326/// (multiplied into every texel); `depth_test` occludes the quad behind
327/// nearer marched geometry. The facade resolves orientation + back-face
328/// culling, so this is pure geometry.
329#[derive(Clone, Copy, Debug)]
330pub struct GpuImageQuad {
331    pub corners: [[f32; 3]; 4],
332    pub image: usize,
333    pub tint: [f32; 4],
334    pub depth_test: bool,
335    /// Texels with alpha below this (`0..=1`) are discarded in the FS.
336    /// `0.0` keeps the plain over-blend.
337    pub alpha_cutoff: f32,
338}
339
340/// One expanded textured-quad vertex (`build_image_vertices` output).
341/// `ndc` is the projected NDC xy; `w` is the source `forward` depth, fed
342/// back into a homogeneous clip position so the rasterizer interpolates
343/// `uv` perspective-correctly; `depth` is the euclidean world distance
344/// (the marcher's `best_t`) for the manual depth test.
345#[repr(C)]
346#[derive(Clone, Copy, Pod, Zeroable)]
347struct ImageVertex {
348    ndc: [f32; 2],
349    w: f32,
350    depth: f32,
351    depth_test: f32,
352    cutoff: f32,
353    uv: [f32; 2],
354    tint: [f32; 4],
355}
356
357/// Lazy-built image-sprite pipeline (mirrors [`LineResources`]). The
358/// per-draw bind group adds the quad's texture + a sampler to the line
359/// pass's uniform + scene-depth bindings.
360struct ImageResources {
361    pipeline: wgpu::RenderPipeline,
362    bgl: wgpu::BindGroupLayout,
363    uniform_buf: wgpu::Buffer,
364    dummy_depth: wgpu::Buffer,
365    sampler: wgpu::Sampler,
366}
367
368/// A retained image-sprite texture (uploaded via
369/// [`GpuRenderer::upload_image`], referenced by [`GpuImageQuad::image`]).
370struct ImageResident {
371    view: wgpu::TextureView,
372    // Held so the view stays valid + the texture shows in profiler dumps.
373    _texture: wgpu::Texture,
374}
375
376/// Camera-space textured-quad vertex (near-clip working set): the
377/// `(right, down, forward)` components + the texture `uv`.
378#[derive(Clone, Copy)]
379struct ImgClipV {
380    cam: [f32; 3],
381    uv: [f32; 2],
382}
383
384/// Clip a convex camera-space polygon against the near plane
385/// (`forward >= LINE_NEAR_Z`), interpolating UVs at each crossing.
386fn clip_near_image(poly: &[ImgClipV]) -> Vec<ImgClipV> {
387    let n = poly.len();
388    let mut out: Vec<ImgClipV> = Vec::with_capacity(n + 1);
389    for i in 0..n {
390        let cur = poly[i];
391        let prev = poly[(i + n - 1) % n];
392        let cur_in = cur.cam[2] >= LINE_NEAR_Z;
393        let prev_in = prev.cam[2] >= LINE_NEAR_Z;
394        if cur_in != prev_in {
395            let t = (LINE_NEAR_Z - prev.cam[2]) / (cur.cam[2] - prev.cam[2]);
396            out.push(ImgClipV {
397                cam: [
398                    prev.cam[0] + (cur.cam[0] - prev.cam[0]) * t,
399                    prev.cam[1] + (cur.cam[1] - prev.cam[1]) * t,
400                    LINE_NEAR_Z,
401                ],
402                uv: [
403                    prev.uv[0] + (cur.uv[0] - prev.uv[0]) * t,
404                    prev.uv[1] + (cur.uv[1] - prev.uv[1]) * t,
405                ],
406            });
407        }
408        if cur_in {
409            out.push(cur);
410        }
411    }
412    out
413}
414
415/// Project + near-clip a world-space [`GpuImageQuad`] into perspective-correct
416/// textured-quad vertices for `image.wgsl`. Mirrors the scene-DDA pinhole
417/// (the same one [`build_line_vertices`] uses), carrying each vertex's
418/// euclidean world distance as the depth-test key. Quads fully behind the
419/// near plane produce no vertices.
420fn build_image_vertices(
421    cam: &GpuLineCamera,
422    quad: &GpuImageQuad,
423    w: u32,
424    h: u32,
425    fov_y: f32,
426    flip_x: bool,
427) -> Vec<ImageVertex> {
428    let aspect = w as f32 / h as f32;
429    let half_h = (fov_y * 0.5).tan();
430    let half_w = half_h * aspect;
431    let dt = if quad.depth_test { 1.0 } else { 0.0 };
432
433    let cam_coords = |p: [f32; 3]| -> [f32; 3] {
434        let d = [p[0] - cam.pos[0], p[1] - cam.pos[1], p[2] - cam.pos[2]];
435        [
436            cam.right[0] * d[0] + cam.right[1] * d[1] + cam.right[2] * d[2],
437            cam.down[0] * d[0] + cam.down[1] * d[1] + cam.down[2] * d[2],
438            cam.forward[0] * d[0] + cam.forward[1] * d[1] + cam.forward[2] * d[2],
439        ]
440    };
441    let project = |v: ImgClipV| -> ImageVertex {
442        let (cx, cy, cz) = (v.cam[0], v.cam[1], v.cam[2]);
443        let nx = cx / (cz * half_w);
444        ImageVertex {
445            // Mirror NDC x to match the flipped scene blit.
446            ndc: [if flip_x { -nx } else { nx }, -cy / (cz * half_h)],
447            w: cz,
448            depth: (cx * cx + cy * cy + cz * cz).sqrt(),
449            depth_test: dt,
450            cutoff: quad.alpha_cutoff,
451            uv: v.uv,
452            tint: quad.tint,
453        }
454    };
455
456    // Per-corner UV: TL(0,0) TR(1,0) BL(0,1) BR(1,1).
457    let uvs = [[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0]];
458    let verts: Vec<ImgClipV> = quad
459        .corners
460        .iter()
461        .zip(uvs)
462        .map(|(c, uv)| ImgClipV {
463            cam: cam_coords(*c),
464            uv,
465        })
466        .collect();
467
468    let mut out = Vec::with_capacity(12);
469    for tri in [[0usize, 1, 2], [1, 3, 2]] {
470        let poly = [verts[tri[0]], verts[tri[1]], verts[tri[2]]];
471        let clipped = clip_near_image(&poly);
472        if clipped.len() < 3 {
473            continue;
474        }
475        for i in 1..clipped.len() - 1 {
476            out.push(project(clipped[0]));
477            out.push(project(clipped[i]));
478            out.push(project(clipped[i + 1]));
479        }
480    }
481    out
482}
483
484pub struct GpuRenderer {
485    surface: wgpu::Surface<'static>,
486    surface_config: wgpu::SurfaceConfiguration,
487    device: wgpu::Device,
488    queue: wgpu::Queue,
489    adapter_info: String,
490    clear_colour: [f64; 3],
491    frame_count: u32,
492    /// Mirror the marched scene horizontally on present (the scene blit
493    /// samples `width-1-x`, and line/image overlays mirror their NDC x).
494    /// The egui pass is unaffected. See [`Self::set_flip_x`].
495    flip_x: bool,
496    /// Lazy-built on first [`Self::render_chunk`] call; rebuilt when
497    /// the swapchain resizes (storage texture must match).
498    chunk_dda: Option<ChunkDdaResources>,
499    /// Lazy-built on first [`Self::render_grid`] call; same resize
500    /// trigger as `chunk_dda`. The two paths share the same blit
501    /// pipeline structure but bind different storage layouts.
502    grid_dda: Option<GridDdaResources>,
503    /// Lazy-built on first [`Self::render_scene`] call. Holds the
504    /// multi-grid pipeline + per-grid camera uniforms.
505    scene_dda: Option<SceneDdaResources>,
506    /// GPU.8 — panoramic sky texture + sampler. Created at
507    /// `new` as a 1×1 mid-grey default; [`Self::set_sky_panorama`]
508    /// replaces it. The scene-DDA bind group references this each
509    /// frame.
510    sky_texture: wgpu::Texture,
511    sky_view: wgpu::TextureView,
512    sky_sampler: wgpu::Sampler,
513    /// GPU.8 fog state. `color` is BGRA-style premultiplied (each
514    /// channel in [0, 1]); `near` is the world-t distance at which
515    /// fog starts kicking in; `far` is the distance at which it's
516    /// fully opaque. The shader does
517    /// `mix(hit, fog, smoothstep(near, far, t))`.
518    fog_color: [f32; 3],
519    fog_near: f32,
520    fog_far: f32,
521    /// GPU.10 — sprites rendered as DDA-marched voxel models (the
522    /// precise path; the GPU.9 compute splatter it replaced was
523    /// retired in 10.5). Holds the concatenated model registry + the
524    /// per-frame instance array; set via [`Self::set_sprite_instances`].
525    sprite_registry: Option<sprite_model::SpriteRegistryResident>,
526    /// Lazy-built pipeline + uniform for the model-DDA pass.
527    sprite_model_dda: Option<SpriteModelDdaResources>,
528    /// GPU.10.4 — LOD aggressiveness: step a sprite to the next mip
529    /// once a mip-0 voxel projects below this many screen pixels.
530    /// Defaults to 4.0 (the empirical sweet spot); the host can tune
531    /// via [`Self::set_sprite_lod_px`].
532    sprite_lod_px: f32,
533    /// GPU.11.1 — scene-grid LOD scan distance (world units). A chunk
534    /// entered at world-t `t` is marched at the mip level
535    /// `floor(log2(max(t, msd) / msd))`, clamped to the grid's mip
536    /// ladder. `0` disables LOD (always mip-0). Tunable via
537    /// [`Self::set_scene_mip_scan_dist`] — the axis-aligned-mip-beams
538    /// mitigation (GPU.11.2) pushes it outward if banding appears.
539    scene_mip_scan_dist: f32,
540    /// Per-face grid side-shades (voxlap setsideshades), packed for the
541    /// scene-DDA uniform: `[0]=(top,bot,left,right)`, `[1]=(up,down,_,_)`.
542    /// Each is the u8 shade intensity. `[[0;4];2]` = no shading. Set via
543    /// [`Self::set_scene_side_shades`].
544    scene_side_shades: [[i32; 4]; 2],
545    /// Vertical FOV (radians) the last `render_scene` marched with —
546    /// cached so [`Self::pixel_ray`] reconstructs the matching view ray
547    /// for picking. `0` until the first scene render.
548    last_fov_y_rad: f32,
549    /// The acquired-but-not-yet-presented swapchain frame from the most
550    /// recent deferred render ([`Self::render_scene`] /
551    /// [`Self::render_clear_deferred`]). [`Self::present`] shows it as
552    /// is; [`Self::paint_egui`] overlays egui first. Lets a host slot a
553    /// UI pass between the marcher and present. `None` between present
554    /// and the next render.
555    pending_frame: Option<(wgpu::SurfaceTexture, wgpu::TextureView)>,
556    /// Lazy-built debug-line pipeline (L3.2) — built on the first
557    /// [`Self::draw_lines_deferred`] call.
558    line_resources: Option<LineResources>,
559    /// Persistent debug-line vertex buffer (L3.3) — grown on demand and
560    /// reused across frames so a per-frame overlay (hundreds of segments)
561    /// costs one `write_buffer`, not a fresh allocation. `line_vbuf_cap`
562    /// is its capacity in bytes.
563    line_vbuf: Option<wgpu::Buffer>,
564    line_vbuf_cap: u64,
565    /// Lazy-built image-sprite pipeline — built on the first
566    /// [`Self::draw_images_deferred`] call.
567    image_resources: Option<ImageResources>,
568    /// Persistent image-sprite vertex buffer, grown on demand and reused
569    /// across frames (like [`Self::line_vbuf`]).
570    image_vbuf: Option<wgpu::Buffer>,
571    image_vbuf_cap: u64,
572    /// Retained image-sprite textures, indexed by the id
573    /// [`Self::upload_image`] returns. A dropped slot is `None` and is
574    /// re-used by a later upload.
575    images: Vec<Option<ImageResident>>,
576    /// Lazy-built `egui-wgpu` paint pipeline; created on the first
577    /// [`Self::paint_egui`] call (`hud` feature).
578    #[cfg(feature = "hud")]
579    egui_renderer: Option<egui_wgpu::Renderer>,
580}
581
582/// Per-renderer chunk-DDA pipeline state. The compute shader writes
583/// into the storage texture; a fullscreen-triangle render pass
584/// nearest-neighbour blits it to the swapchain.
585struct ChunkDdaResources {
586    storage_size: (u32, u32),
587    storage_view: wgpu::TextureView,
588    uniform_buf: wgpu::Buffer,
589    bgl_dda: wgpu::BindGroupLayout,
590    pipeline_dda: wgpu::ComputePipeline,
591    blit_bg: wgpu::BindGroup,
592    pipeline_blit: wgpu::RenderPipeline,
593    // wgpu BindGroups internally Arc their resources, but we keep
594    // the handle so the sampler shows up in profiler dumps.
595    _sampler: wgpu::Sampler,
596}
597
598struct GridDdaResources {
599    storage_size: (u32, u32),
600    storage_view: wgpu::TextureView,
601    uniform_buf: wgpu::Buffer,
602    bgl_dda: wgpu::BindGroupLayout,
603    pipeline_dda: wgpu::ComputePipeline,
604    blit_bg: wgpu::BindGroup,
605    pipeline_blit: wgpu::RenderPipeline,
606    _sampler: wgpu::Sampler,
607}
608
609struct SceneDdaResources {
610    storage_size: (u32, u32),
611    /// Framebuffer as a packed-`rgba8unorm` storage **buffer** (row
612    /// stride = width), written by the scene + sprite compute passes
613    /// and read by the blit. A buffer (not a storage texture) dodges
614    /// Chrome-Dawn's tiled write-texture layout (which produced a
615    /// 128×256-tiled image); linear + explicit stride is portable.
616    framebuffer: wgpu::Buffer,
617    uniform_buf: wgpu::Buffer,
618    bgl_dda: wgpu::BindGroupLayout,
619    pipeline_dda: wgpu::ComputePipeline,
620    blit_bg: wgpu::BindGroup,
621    pipeline_blit: wgpu::RenderPipeline,
622    /// Blit uniform: `[width, height, flip_x, _pad]`. Retained so the flip
623    /// flag (offset 8) can be re-written per frame.
624    blit_dims: wgpu::Buffer,
625    /// GPU.9 — per-pixel world-t depth (f32 bits as u32), sized
626    /// `width * height * 4`. The scene pass writes it when sprites
627    /// are present; the sprite model-DDA pass reads + composites
628    /// against it.
629    depth_buffer: wgpu::Buffer,
630    /// Picking — a `COPY_DST | MAP_READ` staging copy of `depth_buffer`
631    /// so the host can read back the per-pixel world-t after a frame
632    /// (e.g. click → which voxel). Same size as `depth_buffer`.
633    depth_readback: wgpu::Buffer,
634}
635
636/// GPU.10.0 — single-sprite model-DDA pipeline: one thread per pixel
637/// marches the model voxel volume and composites against the scene
638/// depth buffer.
639struct SpriteModelDdaResources {
640    bgl: wgpu::BindGroupLayout,
641    pipeline: wgpu::ComputePipeline,
642    uniform_buf: wgpu::Buffer,
643}
644
645/// Per-frame uniform for the model-DDA pass. Mirrors `Uniform` in
646/// `sprite_model_dda.wgsl` (std140). Per-model + per-instance data
647/// now live in storage buffers; this holds only the camera, fog, and
648/// instance count.
649#[repr(C)]
650#[derive(Clone, Copy, Pod, Zeroable)]
651struct SpriteModelUniform {
652    cam_pos: [f32; 3],
653    _p0: f32,
654    cam_right: [f32; 3],
655    _p1: f32,
656    cam_down: [f32; 3],
657    _p2: f32,
658    cam_forward: [f32; 3],
659    _p3: f32,
660    fog_color: [f32; 4],
661    screen_size: [u32; 2],
662    instance_count: u32,
663    fog_far: f32,
664    fov_y_rad: f32,
665    tiles_x: u32,
666    tile_size: u32,
667    _p6: f32,
668}
669
670/// GPU.10.3 — sprite screen-tile edge in pixels for instance binning.
671const SPRITE_TILE_SIZE: u32 = 16;
672
673/// Build the per-grid camera storage buffer bound at `scene_dda.wgsl`
674/// binding 15 (read-only). One [`SceneDdaPerGridCamera`] per grid; the
675/// shader only indexes `0..grid_count`. An empty scene pads to one
676/// zeroed element (wgpu rejects a zero-sized storage binding). This
677/// replaces the old fixed `[…; 16]` uniform array, so a scene can hold
678/// any number of grids — the only ceiling is the device's storage size.
679fn upload_grid_cameras(device: &wgpu::Device, cams: &[SceneDdaPerGridCamera]) -> wgpu::Buffer {
680    use wgpu::util::DeviceExt;
681    let one = [SceneDdaPerGridCamera::zeroed()];
682    let src: &[SceneDdaPerGridCamera] = if cams.is_empty() { &one } else { cams };
683    device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
684        label: Some("roxlap-gpu scene_dda.grid_cameras"),
685        contents: bytemuck::cast_slice(src),
686        usage: wgpu::BufferUsages::STORAGE,
687    })
688}
689
690// The scene_dda bind group + layout wire occupancy pages 1..=3 at
691// bindings 12..=14 explicitly; keep that in lockstep with the page
692// count. Bump the bindings (here, in the WGSL, and in the bind
693// group) if MAX_OCC_PAGES changes.
694const _: () = assert!(scene::MAX_OCC_PAGES == 4);
695
696#[repr(C)]
697#[derive(Clone, Copy, Pod, Zeroable)]
698struct SceneDdaPerGridCamera {
699    pos: [f32; 3],
700    _pad0: f32,
701    right: [f32; 3],
702    _pad1: f32,
703    down: [f32; 3],
704    _pad2: f32,
705    forward: [f32; 3],
706    _pad3: f32,
707}
708
709impl SceneDdaPerGridCamera {
710    fn from_camera(c: &Camera) -> Self {
711        Self {
712            pos: c.position,
713            _pad0: 0.0,
714            right: c.right,
715            _pad1: 0.0,
716            down: c.down,
717            _pad2: 0.0,
718            forward: c.forward,
719            _pad3: 0.0,
720        }
721    }
722}
723
724#[repr(C)]
725#[derive(Clone, Copy, Pod, Zeroable)]
726struct SceneDdaUniform {
727    fov_y_rad: f32,
728    grid_count: u32,
729    max_outer_steps: u32,
730    _pad0: u32,
731    screen_size: [u32; 2],
732    _pad1: [u32; 2],
733    /// GPU.8 — `[r, g, b, fog_near]`. The `near` distance is packed
734    /// into the colour's alpha channel to keep std140 alignment
735    /// tidy (a bare `f32` after the `vec4` would force extra pads).
736    fog_color: [f32; 4],
737    fog_far: f32,
738    /// GPU.9 — `1` when the sprite pass is active (scene pass then
739    /// records `best_t` into the depth buffer), `0` otherwise.
740    write_depth: u32,
741    /// Occupancy paging: words per storage page (see
742    /// `scene::split_occupancy_pages`). Only consulted by the shader
743    /// when `occ_num_pages > 1`.
744    occ_page_words: u32,
745    /// Number of real occupancy pages (1 on multi-GiB GPUs → the
746    /// shader takes a branch-free single-page read).
747    occ_num_pages: u32,
748    /// GPU.11.1 — scene-grid LOD scan distance (world units). A chunk
749    /// entered at world-t `t` marches at mip
750    /// `floor(log2(max(t, msd) / msd))`, clamped to the grid's mip
751    /// count. `0` disables LOD (always mip-0).
752    mip_scan_dist: f32,
753    _pad2: u32,
754    _pad3: u32,
755    _pad4: u32,
756    /// World camera used only to derive the per-pixel sky direction —
757    /// always valid, so a `grid_count == 0` (sprite-only / empty) scene
758    /// still paints a proper sky instead of a degenerate `(0,0,1)`
759    /// (whose `atan2(0,0)` sky lookup samples black).
760    sky_cam: SceneDdaPerGridCamera,
761    /// Per-face side-shade intensities (voxlap setsideshades), each the
762    /// u8 shade subtracted from a voxel's brightness byte at a hit.
763    /// `side_shades0 = (top, bot, left, right)`,
764    /// `side_shades1 = (up, down, _, _)`. All-zero = no shading.
765    side_shades0: [i32; 4],
766    side_shades1: [i32; 4],
767}
768
769#[repr(C)]
770#[derive(Clone, Copy, Pod, Zeroable)]
771struct GridDdaUniform {
772    camera_pos: [f32; 3],
773    _pad0: f32,
774    camera_right: [f32; 3],
775    _pad1: f32,
776    camera_down: [f32; 3],
777    _pad2: f32,
778    camera_forward: [f32; 3],
779    fov_y_rad: f32,
780    screen_size: [u32; 2],
781    vsid: u32,
782    max_outer_steps: u32,
783    chunks_dims: [u32; 3],
784    _pad3: u32,
785    origin_chunk: [i32; 3],
786    _pad4: u32,
787}
788
789#[repr(C)]
790#[derive(Clone, Copy, Pod, Zeroable)]
791struct ChunkDdaUniform {
792    camera_pos: [f32; 3],
793    _pad0: f32,
794    camera_right: [f32; 3],
795    _pad1: f32,
796    camera_down: [f32; 3],
797    _pad2: f32,
798    camera_forward: [f32; 3],
799    fov_y_rad: f32,
800    screen_size: [u32; 2],
801    vsid: u32,
802    max_scan_dist: u32,
803}
804
805impl GpuRenderer {
806    /// Stand up the device + surface + swapchain on `window`. Async
807    /// because `wgpu::Adapter`/`Device` requests are.
808    ///
809    /// `window` is any [`raw-window-handle`] provider (winit, SDL,
810    /// GLFW, …) wrapped in an `Arc`; `size` is its initial physical
811    /// framebuffer size in pixels — passed explicitly so the renderer
812    /// stays decoupled from any one windowing library's size API.
813    ///
814    /// [`raw-window-handle`]: raw_window_handle
815    ///
816    /// # Errors
817    /// Returns [`GpuInitError`] if surface creation, adapter
818    /// selection, or device request fails. Hosts treat any error as
819    /// "fall back to the CPU path".
820    pub async fn new<W>(
821        window: Arc<W>,
822        size: (u32, u32),
823        settings: GpuRendererSettings,
824    ) -> Result<Self, GpuInitError>
825    where
826        W: HasWindowHandle + HasDisplayHandle + Send + Sync + 'static,
827    {
828        let instance = wgpu::Instance::new(wgpu::InstanceDescriptor::new_without_display_handle());
829        let surface = instance.create_surface(window.clone())?;
830        let adapter = Self::request_adapter(&instance, Some(&surface), settings).await?;
831        let (device, queue) = Self::request_device(&adapter).await?;
832        Ok(Self::finish_init(
833            &adapter, device, queue, surface, size, settings,
834        ))
835    }
836
837    /// wasm/WebGPU: build the renderer against an HTML `canvas`. No
838    /// `Send + Sync` bound — wgpu's surface/device/queue are `!Send` on
839    /// the `+atomics` shared-memory wasm build, and the browser host is
840    /// single-threaded (`Rc<RefCell<…>>`). The native generic-`W` entry
841    /// (which carries the bound) isn't reachable on wasm.
842    ///
843    /// Probes for an adapter **before** `create_surface`: on wasm,
844    /// creating the surface calls `canvas.getContext("webgpu")`, which
845    /// permanently locks the canvas's context type. If we bound it and
846    /// then found no adapter, a CPU/WebGL2 fallback on the *same* canvas
847    /// (the facade clones the handle, but it's the same DOM element)
848    /// would fail with "no webgl2 context". Probing first leaves the
849    /// canvas pristine when WebGPU is unavailable.
850    ///
851    /// # Errors
852    /// See [`Self::new`].
853    #[cfg(target_arch = "wasm32")]
854    pub async fn new_from_canvas(
855        canvas: web_sys::HtmlCanvasElement,
856        size: (u32, u32),
857        settings: GpuRendererSettings,
858    ) -> Result<Self, GpuInitError> {
859        let instance = wgpu::Instance::new(wgpu::InstanceDescriptor::new_without_display_handle());
860        // Probe adapter AND device before binding the canvas — both
861        // `requestAdapter` and `requestDevice` can fail on wasm, and
862        // `create_surface` permanently locks the canvas to a WebGPU
863        // context. Creating the surface last keeps the canvas pristine
864        // for the CPU/WebGL2 fallback on any GPU-init failure.
865        let adapter = Self::request_adapter(&instance, None, settings).await?;
866        let (device, queue) = Self::request_device(&adapter).await?;
867        let surface = instance.create_surface(wgpu::SurfaceTarget::Canvas(canvas))?;
868        Ok(Self::finish_init(
869            &adapter, device, queue, surface, size, settings,
870        ))
871    }
872
873    /// Pick a GPU adapter at the settings' power preference. `None`
874    /// `compatible_surface` is used on the wasm canvas path so the probe
875    /// doesn't bind the canvas's context (see [`Self::new_from_canvas`]);
876    /// WebGPU exposes a single surface-independent adapter, so this is
877    /// safe there.
878    async fn request_adapter(
879        instance: &wgpu::Instance,
880        compatible_surface: Option<&wgpu::Surface<'static>>,
881        settings: GpuRendererSettings,
882    ) -> Result<wgpu::Adapter, GpuInitError> {
883        let power_preference = match settings.power_preference {
884            PowerPreference::Low => wgpu::PowerPreference::LowPower,
885            PowerPreference::High => wgpu::PowerPreference::HighPerformance,
886        };
887        instance
888            .request_adapter(&wgpu::RequestAdapterOptions {
889                power_preference,
890                compatible_surface,
891                force_fallback_adapter: false,
892            })
893            .await
894            .map_err(|_| GpuInitError::NoAdapter)
895    }
896
897    /// Request the device + queue from `adapter`. Pulled out of
898    /// [`Self::finish_init`] so the wasm canvas path can validate the
899    /// device **before** `create_surface` binds the canvas's WebGPU
900    /// context — if the device request fails (e.g. a browser that
901    /// rejects a wgpu-sent limit), the canvas stays pristine for the
902    /// CPU/WebGL2 fallback instead of being poisoned.
903    async fn request_device(
904        adapter: &wgpu::Adapter,
905    ) -> Result<(wgpu::Device, wgpu::Queue), GpuInitError> {
906        Ok(adapter
907            .request_device(&wgpu::DeviceDescriptor {
908                label: Some("roxlap-gpu device"),
909                required_features: wgpu::Features::empty(),
910                required_limits: pick_required_limits(&adapter.limits()),
911                experimental_features: wgpu::ExperimentalFeatures::disabled(),
912                memory_hints: wgpu::MemoryHints::default(),
913                trace: wgpu::Trace::Off,
914            })
915            .await?)
916    }
917
918    /// Shared swapchain → sky/sampler setup, run after the adapter +
919    /// device + surface exist (the surface comes from a window handle on
920    /// native, or an HTML canvas on wasm — created last on wasm so a
921    /// failed device request never touches the canvas).
922    fn finish_init(
923        adapter: &wgpu::Adapter,
924        device: wgpu::Device,
925        queue: wgpu::Queue,
926        surface: wgpu::Surface<'static>,
927        size: (u32, u32),
928        settings: GpuRendererSettings,
929    ) -> Self {
930        let info = adapter.get_info();
931        let adapter_info = format!(
932            "{name} ({backend:?}, {device_type:?})",
933            name = info.name,
934            backend = info.backend,
935            device_type = info.device_type,
936        );
937
938        let caps = surface.get_capabilities(adapter);
939        // Pick a NON-sRGB, 8-bit swapchain format. Voxlap colours are
940        // already sRGB-encoded (the slab bytes are display-ready,
941        // matching what the CPU softbuffer path writes straight to the
942        // framebuffer with no conversion); an sRGB swapchain would
943        // re-apply the gamma curve, washing the look out. We also
944        // *prefer 8-bit BGRA/RGBA* over any other non-sRGB format: some
945        // adapters (e.g. NVK) advertise a 16-bit-unorm format first,
946        // and wgpu 29 gates `create_view` on 16-bit-norm formats behind
947        // the `TEXTURE_FORMAT_16BIT_NORM` device feature (which we don't
948        // enable, to stay WebGPU-portable). Falls back to the first
949        // non-sRGB format, then `caps.formats[0]`.
950        let surface_format = caps
951            .formats
952            .iter()
953            .copied()
954            .find(|f| {
955                matches!(
956                    f,
957                    wgpu::TextureFormat::Bgra8Unorm | wgpu::TextureFormat::Rgba8Unorm
958                )
959            })
960            .or_else(|| caps.formats.iter().copied().find(|f| !f.is_srgb()))
961            .unwrap_or(caps.formats[0]);
962        let present_mode = if settings.uncapped_present {
963            pick_present_mode(&caps.present_modes)
964        } else {
965            wgpu::PresentMode::Fifo
966        };
967        // GPU.11.2 — surface the present mode: `Fifo` is vsync-capped
968        // (FPS pinned to refresh rate → compute optimisations like the
969        // mip LOD won't show up in the FPS counter). Mailbox/Immediate
970        // are uncapped. Wayland under Mesa frequently offers only Fifo.
971        eprintln!(
972            "roxlap-gpu: present mode = {present_mode:?} (available: {:?})",
973            caps.present_modes,
974        );
975        let (init_w, init_h) = size;
976        let surface_config = wgpu::SurfaceConfiguration {
977            usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
978            format: surface_format,
979            width: init_w.max(1),
980            height: init_h.max(1),
981            present_mode,
982            alpha_mode: caps.alpha_modes[0],
983            view_formats: vec![],
984            desired_maximum_frame_latency: 2,
985        };
986        surface.configure(&device, &surface_config);
987
988        // GPU.8 default sky: a 1×1 mid-grey texture. Hosts replace
989        // it via `set_sky_panorama` with a real equirectangular
990        // panorama; the default stops the shader sampling
991        // uninitialised memory before that happens.
992        let default_sky_pixel = [0x80u8, 0x80, 0x80, 0xff];
993        let (sky_texture, sky_view) = create_sky_texture(&device, 1, 1, &default_sky_pixel);
994        queue.write_texture(
995            wgpu::TexelCopyTextureInfo {
996                texture: &sky_texture,
997                mip_level: 0,
998                origin: wgpu::Origin3d::ZERO,
999                aspect: wgpu::TextureAspect::All,
1000            },
1001            &default_sky_pixel,
1002            wgpu::TexelCopyBufferLayout {
1003                offset: 0,
1004                bytes_per_row: Some(4),
1005                rows_per_image: Some(1),
1006            },
1007            wgpu::Extent3d {
1008                width: 1,
1009                height: 1,
1010                depth_or_array_layers: 1,
1011            },
1012        );
1013        let sky_sampler = device.create_sampler(&wgpu::SamplerDescriptor {
1014            label: Some("roxlap-gpu sky_sampler"),
1015            // Voxlap-convention panorama: u = elevation [0, 1]
1016            // (Repeat is a no-op since values don't go outside),
1017            // v = azimuth (wraps 360° — Repeat is required).
1018            address_mode_u: wgpu::AddressMode::Repeat,
1019            address_mode_v: wgpu::AddressMode::Repeat,
1020            address_mode_w: wgpu::AddressMode::ClampToEdge,
1021            mag_filter: wgpu::FilterMode::Linear,
1022            min_filter: wgpu::FilterMode::Linear,
1023            mipmap_filter: wgpu::MipmapFilterMode::Nearest,
1024            ..Default::default()
1025        });
1026
1027        Self {
1028            surface,
1029            surface_config,
1030            device,
1031            queue,
1032            adapter_info,
1033            clear_colour: settings.clear_colour,
1034            frame_count: 0,
1035            flip_x: false,
1036            chunk_dda: None,
1037            grid_dda: None,
1038            scene_dda: None,
1039            sky_texture,
1040            sky_view,
1041            sky_sampler,
1042            // Fog disabled by default — voxlap's CPU rasterizer
1043            // also runs without fog in the scene-demo, so matching
1044            // it means no GPU fog out of the box. Hosts can opt in
1045            // via `set_fog` (e.g. for atmospheric far-LOD masking).
1046            fog_color: [0.66, 0.74, 0.88],
1047            fog_near: 0.0,
1048            fog_far: 1.0e30,
1049            sprite_registry: None,
1050            sprite_model_dda: None,
1051            // GPU.10.4 — default LOD threshold: step to a coarser mip
1052            // once a voxel projects below 4 px. Empirically the best
1053            // quality/cost tradeoff; the host can override.
1054            sprite_lod_px: 4.0,
1055            // GPU.11.1 — matches the CPU demo's mip_scan_dist=64.
1056            scene_mip_scan_dist: 64.0,
1057            scene_side_shades: [[0; 4]; 2],
1058            last_fov_y_rad: 0.0,
1059            pending_frame: None,
1060            line_resources: None,
1061            line_vbuf: None,
1062            line_vbuf_cap: 0,
1063            image_resources: None,
1064            image_vbuf: None,
1065            image_vbuf_cap: 0,
1066            images: Vec::new(),
1067            #[cfg(feature = "hud")]
1068            egui_renderer: None,
1069        }
1070    }
1071
1072    /// Synchronous wrapper for hosts that don't have an async
1073    /// runtime. Internally `pollster::block_on`s [`Self::new`].
1074    ///
1075    /// # Errors
1076    /// See [`Self::new`].
1077    #[cfg(not(target_arch = "wasm32"))]
1078    pub fn new_blocking<W>(
1079        window: Arc<W>,
1080        size: (u32, u32),
1081        settings: GpuRendererSettings,
1082    ) -> Result<Self, GpuInitError>
1083    where
1084        W: HasWindowHandle + HasDisplayHandle + Send + Sync + 'static,
1085    {
1086        pollster::block_on(Self::new(window, size, settings))
1087    }
1088
1089    /// Human-readable adapter description — name + backend +
1090    /// device type. The demo host prints this in the title bar.
1091    pub fn adapter_info(&self) -> &str {
1092        &self.adapter_info
1093    }
1094
1095    /// Borrow the underlying wgpu device — hosts use this to build
1096    /// chunk uploads (`GpuChunkResident::upload(gpu.device(), …)`).
1097    pub fn device(&self) -> &wgpu::Device {
1098        &self.device
1099    }
1100
1101    /// Borrow the wgpu queue — hosts use this for read-back paths
1102    /// (`GpuChunkResident::read_voxel_blocking(gpu.device(), gpu.queue(), …)`).
1103    pub fn queue(&self) -> &wgpu::Queue {
1104        &self.queue
1105    }
1106
1107    /// GPU.8 — upload an equirectangular panorama as the scene's
1108    /// sky texture. `rgba` is row-major, `width × height` pixels,
1109    /// 4 bytes per pixel (R, G, B, A). The shader samples it with
1110    /// `u = atan2(dir.x, dir.y) / (2π) + 0.5` (azimuth) and
1111    /// `v = acos(-dir.z) / π` (elevation), matching standard
1112    /// equirectangular layout (top of image = zenith for voxlap's
1113    /// `+z = down` basis).
1114    /// Mirror the marched scene (and its line/image overlays) horizontally
1115    /// on present, leaving the egui overlay upright. See [`Self::flip_x`].
1116    pub fn set_flip_x(&mut self, flip: bool) {
1117        self.flip_x = flip;
1118    }
1119
1120    ///
1121    /// # Panics
1122    /// If `rgba.len() != (width * height * 4) as usize`.
1123    pub fn set_sky_panorama(&mut self, rgba: &[u8], width: u32, height: u32) {
1124        assert_eq!(
1125            rgba.len(),
1126            (width as usize) * (height as usize) * 4,
1127            "set_sky_panorama: expected w*h*4 bytes, got {}",
1128            rgba.len(),
1129        );
1130        let (tex, view) = create_sky_texture(&self.device, width, height, rgba);
1131        // Upload pixel data via `queue.write_texture` so we don't
1132        // have to map the buffer manually.
1133        self.queue.write_texture(
1134            wgpu::TexelCopyTextureInfo {
1135                texture: &tex,
1136                mip_level: 0,
1137                origin: wgpu::Origin3d::ZERO,
1138                aspect: wgpu::TextureAspect::All,
1139            },
1140            rgba,
1141            wgpu::TexelCopyBufferLayout {
1142                offset: 0,
1143                bytes_per_row: Some(width * 4),
1144                rows_per_image: Some(height),
1145            },
1146            wgpu::Extent3d {
1147                width,
1148                height,
1149                depth_or_array_layers: 1,
1150            },
1151        );
1152        self.sky_texture = tex;
1153        self.sky_view = view;
1154    }
1155
1156    /// GPU.8 — set the fog blend. `color` is per-channel [0, 1];
1157    /// `near`/`far` are world-space ray distances in voxel units.
1158    /// Hits with `t < near` show their full colour; hits with
1159    /// `t > far` show `color` exclusively; in between is a
1160    /// smoothstep blend.
1161    pub fn set_fog(&mut self, color: [f32; 3], near: f32, far: f32) {
1162        self.fog_color = color;
1163        self.fog_near = near;
1164        self.fog_far = far.max(near + 1.0);
1165    }
1166
1167    /// Re-configure the swapchain to a new physical size. Call from
1168    /// `WindowEvent::Resized`. Drops the chunk-DDA storage texture
1169    /// so [`Self::render_chunk`] rebuilds it at the new size.
1170    pub fn resize(&mut self, width: u32, height: u32) {
1171        if width == 0 || height == 0 {
1172            return;
1173        }
1174        self.surface_config.width = width;
1175        self.surface_config.height = height;
1176        self.surface.configure(&self.device, &self.surface_config);
1177        self.chunk_dda = None;
1178        self.grid_dda = None;
1179        self.scene_dda = None;
1180    }
1181
1182    /// Acquire the next swapchain frame, or `None` to skip this frame.
1183    /// wgpu 29's `get_current_texture` returns a
1184    /// [`wgpu::CurrentSurfaceTexture`] status enum (was
1185    /// `Result<_, SurfaceError>`): an outdated/lost surface reconfigures
1186    /// and skips, transient statuses just skip.
1187    fn acquire_frame(&self) -> Option<wgpu::SurfaceTexture> {
1188        use wgpu::CurrentSurfaceTexture as C;
1189        match self.surface.get_current_texture() {
1190            C::Success(t) | C::Suboptimal(t) => Some(t),
1191            C::Outdated | C::Lost => {
1192                self.surface.configure(&self.device, &self.surface_config);
1193                None
1194            }
1195            C::Timeout | C::Occluded | C::Validation => None,
1196        }
1197    }
1198
1199    /// GPU.1 render: single render pass clearing the swapchain to a
1200    /// slowly drifting colour, then presenting. Voxels arrive in
1201    /// GPU.3+.
1202    pub fn render(&mut self) {
1203        let Some(surf_tex) = self.acquire_frame() else {
1204            return;
1205        };
1206        let view = surf_tex
1207            .texture
1208            .create_view(&wgpu::TextureViewDescriptor::default());
1209
1210        // Slow colour drift so the user can tell the GPU path is
1211        // actually presenting frames vs. e.g. a frozen window.
1212        // Wrap at 2π/0.005 frames (~1257) so the cast stays exact.
1213        let phase = f64::from(self.frame_count % 1257) * 0.005;
1214        let [r, g, b] = self.clear_colour;
1215        let drift = (phase.sin() * 0.04 + 0.04).clamp(0.0, 0.1);
1216        let clear = wgpu::Color {
1217            r: (r + drift).clamp(0.0, 1.0),
1218            g: (g + drift * 0.5).clamp(0.0, 1.0),
1219            b: (b + drift * 0.25).clamp(0.0, 1.0),
1220            a: 1.0,
1221        };
1222
1223        let mut encoder = self
1224            .device
1225            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
1226                label: Some("roxlap-gpu encoder"),
1227            });
1228        {
1229            let _rp = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
1230                label: Some("roxlap-gpu clear"),
1231                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
1232                    view: &view,
1233                    depth_slice: None,
1234                    resolve_target: None,
1235                    ops: wgpu::Operations {
1236                        load: wgpu::LoadOp::Clear(clear),
1237                        store: wgpu::StoreOp::Store,
1238                    },
1239                })],
1240                depth_stencil_attachment: None,
1241                timestamp_writes: None,
1242                occlusion_query_set: None,
1243                multiview_mask: None,
1244            });
1245        }
1246        self.queue.submit(std::iter::once(encoder.finish()));
1247        surf_tex.present();
1248        self.frame_count = self.frame_count.wrapping_add(1);
1249    }
1250
1251    /// GPU.3 single-chunk render. Dispatches `chunk_dda.wgsl`
1252    /// against `resident`'s storage buffers, then blits the
1253    /// low-res storage texture to the swapchain. `camera.position`
1254    /// is in **chunk-local** voxel units (host translates from
1255    /// world coords). `max_scan_dist` caps the per-pixel DDA loop —
1256    /// scene-demo wires `+` / `-` through this each frame.
1257    ///
1258    /// # Panics
1259    /// Internally `expect`s the chunk-DDA resources to be built —
1260    /// they are constructed at the top of this function if missing.
1261    /// Cannot fire in normal control flow.
1262    pub fn render_chunk(
1263        &mut self,
1264        resident: &GpuChunkResident,
1265        camera: &Camera,
1266        max_scan_dist: u32,
1267    ) {
1268        let Some(surf_tex) = self.acquire_frame() else {
1269            return;
1270        };
1271        let surf_view = surf_tex
1272            .texture
1273            .create_view(&wgpu::TextureViewDescriptor::default());
1274
1275        let surface_w = self.surface_config.width;
1276        let surface_h = self.surface_config.height;
1277        let surface_format = self.surface_config.format;
1278
1279        // Lazy-build chunk-DDA resources; rebuild when the swapchain
1280        // grew or shrank.
1281        let needs_build = match &self.chunk_dda {
1282            Some(r) => r.storage_size != (surface_w, surface_h),
1283            None => true,
1284        };
1285        if needs_build {
1286            self.chunk_dda = Some(self.build_chunk_dda(surface_w, surface_h, surface_format));
1287        }
1288        let dda = self.chunk_dda.as_ref().expect("just built");
1289
1290        // Update uniforms.
1291        let uniform = ChunkDdaUniform {
1292            camera_pos: camera.position,
1293            _pad0: 0.0,
1294            camera_right: camera.right,
1295            _pad1: 0.0,
1296            camera_down: camera.down,
1297            _pad2: 0.0,
1298            camera_forward: camera.forward,
1299            fov_y_rad: camera.fov_y_rad,
1300            screen_size: [surface_w, surface_h],
1301            vsid: resident.vsid,
1302            max_scan_dist,
1303        };
1304        self.queue
1305            .write_buffer(&dda.uniform_buf, 0, bytemuck::bytes_of(&uniform));
1306
1307        // Per-frame DDA bind group — references the chunk's buffers
1308        // so we rebuild every frame (the resident can change between
1309        // calls).
1310        let dda_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1311            label: Some("roxlap-gpu chunk_dda.bg"),
1312            layout: &dda.bgl_dda,
1313            entries: &[
1314                wgpu::BindGroupEntry {
1315                    binding: 0,
1316                    resource: dda.uniform_buf.as_entire_binding(),
1317                },
1318                wgpu::BindGroupEntry {
1319                    binding: 1,
1320                    resource: resident.occupancy.as_entire_binding(),
1321                },
1322                wgpu::BindGroupEntry {
1323                    binding: 2,
1324                    resource: resident.color_offsets.as_entire_binding(),
1325                },
1326                wgpu::BindGroupEntry {
1327                    binding: 3,
1328                    resource: resident.colors.as_entire_binding(),
1329                },
1330                wgpu::BindGroupEntry {
1331                    binding: 4,
1332                    resource: wgpu::BindingResource::TextureView(&dda.storage_view),
1333                },
1334            ],
1335        });
1336
1337        let mut encoder = self
1338            .device
1339            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
1340                label: Some("roxlap-gpu chunk encoder"),
1341            });
1342        {
1343            let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
1344                label: Some("roxlap-gpu chunk_dda compute"),
1345                timestamp_writes: None,
1346            });
1347            cpass.set_pipeline(&dda.pipeline_dda);
1348            cpass.set_bind_group(0, &dda_bg, &[]);
1349            cpass.dispatch_workgroups(surface_w.div_ceil(8), surface_h.div_ceil(8), 1);
1350        }
1351        {
1352            let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
1353                label: Some("roxlap-gpu chunk_dda blit"),
1354                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
1355                    view: &surf_view,
1356                    depth_slice: None,
1357                    resolve_target: None,
1358                    ops: wgpu::Operations {
1359                        load: wgpu::LoadOp::Clear(wgpu::Color::BLACK),
1360                        store: wgpu::StoreOp::Store,
1361                    },
1362                })],
1363                depth_stencil_attachment: None,
1364                timestamp_writes: None,
1365                occlusion_query_set: None,
1366                multiview_mask: None,
1367            });
1368            rpass.set_pipeline(&dda.pipeline_blit);
1369            rpass.set_bind_group(0, &dda.blit_bg, &[]);
1370            rpass.draw(0..3, 0..1);
1371        }
1372        self.queue.submit(std::iter::once(encoder.finish()));
1373        surf_tex.present();
1374        self.frame_count = self.frame_count.wrapping_add(1);
1375    }
1376
1377    fn build_chunk_dda(
1378        &self,
1379        width: u32,
1380        height: u32,
1381        surface_format: wgpu::TextureFormat,
1382    ) -> ChunkDdaResources {
1383        let storage_tex = self.device.create_texture(&wgpu::TextureDescriptor {
1384            label: Some("roxlap-gpu chunk_dda.storage"),
1385            size: wgpu::Extent3d {
1386                width,
1387                height,
1388                depth_or_array_layers: 1,
1389            },
1390            mip_level_count: 1,
1391            sample_count: 1,
1392            dimension: wgpu::TextureDimension::D2,
1393            format: wgpu::TextureFormat::Rgba8Unorm,
1394            usage: wgpu::TextureUsages::STORAGE_BINDING | wgpu::TextureUsages::TEXTURE_BINDING,
1395            view_formats: &[],
1396        });
1397        let storage_view = storage_tex.create_view(&wgpu::TextureViewDescriptor::default());
1398
1399        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
1400            label: Some("roxlap-gpu chunk_dda.uniform"),
1401            size: std::mem::size_of::<ChunkDdaUniform>() as u64,
1402            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
1403            mapped_at_creation: false,
1404        });
1405
1406        let dda_shader = self
1407            .device
1408            .create_shader_module(wgpu::ShaderModuleDescriptor {
1409                label: Some("chunk_dda.wgsl"),
1410                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/chunk_dda.wgsl").into()),
1411            });
1412        let bgl_dda = self
1413            .device
1414            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
1415                label: Some("roxlap-gpu chunk_dda.bgl"),
1416                entries: &[
1417                    bgl_uniform_entry(0),
1418                    bgl_storage_entry(1, true),
1419                    bgl_storage_entry(2, true),
1420                    bgl_storage_entry(3, true),
1421                    wgpu::BindGroupLayoutEntry {
1422                        binding: 4,
1423                        visibility: wgpu::ShaderStages::COMPUTE,
1424                        ty: wgpu::BindingType::StorageTexture {
1425                            access: wgpu::StorageTextureAccess::WriteOnly,
1426                            format: wgpu::TextureFormat::Rgba8Unorm,
1427                            view_dimension: wgpu::TextureViewDimension::D2,
1428                        },
1429                        count: None,
1430                    },
1431                ],
1432            });
1433        let dda_pl = self
1434            .device
1435            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
1436                label: Some("roxlap-gpu chunk_dda.layout"),
1437                bind_group_layouts: &[Some(&bgl_dda)],
1438                immediate_size: 0,
1439            });
1440        let pipeline_dda = self
1441            .device
1442            .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
1443                label: Some("roxlap-gpu chunk_dda.pipeline"),
1444                layout: Some(&dda_pl),
1445                module: &dda_shader,
1446                entry_point: Some("render_chunk"),
1447                compilation_options: wgpu::PipelineCompilationOptions::default(),
1448                cache: None,
1449            });
1450
1451        // Fullscreen-triangle blit upscales the storage texture into
1452        // the swapchain. Nearest filter keeps the retro pixel look.
1453        let blit_shader = self
1454            .device
1455            .create_shader_module(wgpu::ShaderModuleDescriptor {
1456                label: Some("blit.wgsl"),
1457                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/blit.wgsl").into()),
1458            });
1459        let bgl_blit = self
1460            .device
1461            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
1462                label: Some("roxlap-gpu chunk_dda.blit_bgl"),
1463                entries: &[
1464                    wgpu::BindGroupLayoutEntry {
1465                        binding: 0,
1466                        visibility: wgpu::ShaderStages::FRAGMENT,
1467                        ty: wgpu::BindingType::Texture {
1468                            sample_type: wgpu::TextureSampleType::Float { filterable: false },
1469                            view_dimension: wgpu::TextureViewDimension::D2,
1470                            multisampled: false,
1471                        },
1472                        count: None,
1473                    },
1474                    wgpu::BindGroupLayoutEntry {
1475                        binding: 1,
1476                        visibility: wgpu::ShaderStages::FRAGMENT,
1477                        ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::NonFiltering),
1478                        count: None,
1479                    },
1480                ],
1481            });
1482        let blit_pl = self
1483            .device
1484            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
1485                label: Some("roxlap-gpu chunk_dda.blit_layout"),
1486                bind_group_layouts: &[Some(&bgl_blit)],
1487                immediate_size: 0,
1488            });
1489        let pipeline_blit = self
1490            .device
1491            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
1492                label: Some("roxlap-gpu chunk_dda.blit_pipeline"),
1493                layout: Some(&blit_pl),
1494                vertex: wgpu::VertexState {
1495                    module: &blit_shader,
1496                    entry_point: Some("vs_main"),
1497                    compilation_options: wgpu::PipelineCompilationOptions::default(),
1498                    buffers: &[],
1499                },
1500                fragment: Some(wgpu::FragmentState {
1501                    module: &blit_shader,
1502                    entry_point: Some("fs_main"),
1503                    compilation_options: wgpu::PipelineCompilationOptions::default(),
1504                    targets: &[Some(wgpu::ColorTargetState {
1505                        format: surface_format,
1506                        blend: None,
1507                        write_mask: wgpu::ColorWrites::ALL,
1508                    })],
1509                }),
1510                primitive: wgpu::PrimitiveState::default(),
1511                depth_stencil: None,
1512                multisample: wgpu::MultisampleState::default(),
1513                multiview_mask: None,
1514                cache: None,
1515            });
1516        let sampler = self.device.create_sampler(&wgpu::SamplerDescriptor {
1517            label: Some("roxlap-gpu chunk_dda.blit_sampler"),
1518            address_mode_u: wgpu::AddressMode::ClampToEdge,
1519            address_mode_v: wgpu::AddressMode::ClampToEdge,
1520            address_mode_w: wgpu::AddressMode::ClampToEdge,
1521            mag_filter: wgpu::FilterMode::Nearest,
1522            min_filter: wgpu::FilterMode::Nearest,
1523            mipmap_filter: wgpu::MipmapFilterMode::Nearest,
1524            ..Default::default()
1525        });
1526        let blit_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1527            label: Some("roxlap-gpu chunk_dda.blit_bg"),
1528            layout: &bgl_blit,
1529            entries: &[
1530                wgpu::BindGroupEntry {
1531                    binding: 0,
1532                    resource: wgpu::BindingResource::TextureView(&storage_view),
1533                },
1534                wgpu::BindGroupEntry {
1535                    binding: 1,
1536                    resource: wgpu::BindingResource::Sampler(&sampler),
1537                },
1538            ],
1539        });
1540
1541        ChunkDdaResources {
1542            storage_size: (width, height),
1543            storage_view,
1544            uniform_buf,
1545            bgl_dda,
1546            pipeline_dda,
1547            blit_bg,
1548            pipeline_blit,
1549            _sampler: sampler,
1550        }
1551    }
1552
1553    /// GPU.4 render — outer DDA over chunk indices + inner DDA into
1554    /// non-empty chunks. `camera.position` is in **grid-local**
1555    /// voxel units. `max_outer_steps` caps how many chunks the
1556    /// outer DDA may traverse per ray (scene-demo wires `+ / -`
1557    /// through this).
1558    ///
1559    /// # Panics
1560    /// Internally `expect`s the grid-DDA resources to be built;
1561    /// they are constructed at the top of this function if missing.
1562    pub fn render_grid(&mut self, grid: &GpuGridResident, camera: &Camera, max_outer_steps: u32) {
1563        let Some(surf_tex) = self.acquire_frame() else {
1564            return;
1565        };
1566        let surf_view = surf_tex
1567            .texture
1568            .create_view(&wgpu::TextureViewDescriptor::default());
1569
1570        let surface_w = self.surface_config.width;
1571        let surface_h = self.surface_config.height;
1572        let surface_format = self.surface_config.format;
1573
1574        let needs_build = match &self.grid_dda {
1575            Some(r) => r.storage_size != (surface_w, surface_h),
1576            None => true,
1577        };
1578        if needs_build {
1579            self.grid_dda = Some(self.build_grid_dda(surface_w, surface_h, surface_format));
1580        }
1581        let dda = self.grid_dda.as_ref().expect("just built");
1582
1583        let uniform = GridDdaUniform {
1584            camera_pos: camera.position,
1585            _pad0: 0.0,
1586            camera_right: camera.right,
1587            _pad1: 0.0,
1588            camera_down: camera.down,
1589            _pad2: 0.0,
1590            camera_forward: camera.forward,
1591            fov_y_rad: camera.fov_y_rad,
1592            screen_size: [surface_w, surface_h],
1593            vsid: grid.vsid,
1594            max_outer_steps,
1595            chunks_dims: grid.chunks_dims,
1596            _pad3: 0,
1597            origin_chunk: grid.origin_chunk,
1598            _pad4: 0,
1599        };
1600        self.queue
1601            .write_buffer(&dda.uniform_buf, 0, bytemuck::bytes_of(&uniform));
1602
1603        let dda_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1604            label: Some("roxlap-gpu grid_dda.bg"),
1605            layout: &dda.bgl_dda,
1606            entries: &[
1607                wgpu::BindGroupEntry {
1608                    binding: 0,
1609                    resource: dda.uniform_buf.as_entire_binding(),
1610                },
1611                wgpu::BindGroupEntry {
1612                    binding: 1,
1613                    resource: grid.occupancy.as_entire_binding(),
1614                },
1615                wgpu::BindGroupEntry {
1616                    binding: 2,
1617                    resource: grid.color_offsets.as_entire_binding(),
1618                },
1619                wgpu::BindGroupEntry {
1620                    binding: 3,
1621                    resource: grid.colors.as_entire_binding(),
1622                },
1623                wgpu::BindGroupEntry {
1624                    binding: 4,
1625                    resource: grid.chunk_colors_base.as_entire_binding(),
1626                },
1627                wgpu::BindGroupEntry {
1628                    binding: 5,
1629                    resource: grid.chunk_occupancy.as_entire_binding(),
1630                },
1631                wgpu::BindGroupEntry {
1632                    binding: 6,
1633                    resource: wgpu::BindingResource::TextureView(&dda.storage_view),
1634                },
1635            ],
1636        });
1637
1638        let mut encoder = self
1639            .device
1640            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
1641                label: Some("roxlap-gpu grid encoder"),
1642            });
1643        {
1644            let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
1645                label: Some("roxlap-gpu grid_dda compute"),
1646                timestamp_writes: None,
1647            });
1648            cpass.set_pipeline(&dda.pipeline_dda);
1649            cpass.set_bind_group(0, &dda_bg, &[]);
1650            cpass.dispatch_workgroups(surface_w.div_ceil(8), surface_h.div_ceil(8), 1);
1651        }
1652        {
1653            let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
1654                label: Some("roxlap-gpu grid_dda blit"),
1655                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
1656                    view: &surf_view,
1657                    depth_slice: None,
1658                    resolve_target: None,
1659                    ops: wgpu::Operations {
1660                        load: wgpu::LoadOp::Clear(wgpu::Color::BLACK),
1661                        store: wgpu::StoreOp::Store,
1662                    },
1663                })],
1664                depth_stencil_attachment: None,
1665                timestamp_writes: None,
1666                occlusion_query_set: None,
1667                multiview_mask: None,
1668            });
1669            rpass.set_pipeline(&dda.pipeline_blit);
1670            rpass.set_bind_group(0, &dda.blit_bg, &[]);
1671            rpass.draw(0..3, 0..1);
1672        }
1673        self.queue.submit(std::iter::once(encoder.finish()));
1674        surf_tex.present();
1675        self.frame_count = self.frame_count.wrapping_add(1);
1676    }
1677
1678    fn build_grid_dda(
1679        &self,
1680        width: u32,
1681        height: u32,
1682        surface_format: wgpu::TextureFormat,
1683    ) -> GridDdaResources {
1684        let storage_tex = self.device.create_texture(&wgpu::TextureDescriptor {
1685            label: Some("roxlap-gpu grid_dda.storage"),
1686            size: wgpu::Extent3d {
1687                width,
1688                height,
1689                depth_or_array_layers: 1,
1690            },
1691            mip_level_count: 1,
1692            sample_count: 1,
1693            dimension: wgpu::TextureDimension::D2,
1694            format: wgpu::TextureFormat::Rgba8Unorm,
1695            usage: wgpu::TextureUsages::STORAGE_BINDING | wgpu::TextureUsages::TEXTURE_BINDING,
1696            view_formats: &[],
1697        });
1698        let storage_view = storage_tex.create_view(&wgpu::TextureViewDescriptor::default());
1699
1700        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
1701            label: Some("roxlap-gpu grid_dda.uniform"),
1702            size: std::mem::size_of::<GridDdaUniform>() as u64,
1703            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
1704            mapped_at_creation: false,
1705        });
1706
1707        let dda_shader = self
1708            .device
1709            .create_shader_module(wgpu::ShaderModuleDescriptor {
1710                label: Some("grid_dda.wgsl"),
1711                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/grid_dda.wgsl").into()),
1712            });
1713        let bgl_dda = self
1714            .device
1715            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
1716                label: Some("roxlap-gpu grid_dda.bgl"),
1717                entries: &[
1718                    bgl_uniform_entry(0),
1719                    bgl_storage_entry(1, true),
1720                    bgl_storage_entry(2, true),
1721                    bgl_storage_entry(3, true),
1722                    bgl_storage_entry(4, true),
1723                    bgl_storage_entry(5, true),
1724                    wgpu::BindGroupLayoutEntry {
1725                        binding: 6,
1726                        visibility: wgpu::ShaderStages::COMPUTE,
1727                        ty: wgpu::BindingType::StorageTexture {
1728                            access: wgpu::StorageTextureAccess::WriteOnly,
1729                            format: wgpu::TextureFormat::Rgba8Unorm,
1730                            view_dimension: wgpu::TextureViewDimension::D2,
1731                        },
1732                        count: None,
1733                    },
1734                ],
1735            });
1736        let dda_pl = self
1737            .device
1738            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
1739                label: Some("roxlap-gpu grid_dda.layout"),
1740                bind_group_layouts: &[Some(&bgl_dda)],
1741                immediate_size: 0,
1742            });
1743        let pipeline_dda = self
1744            .device
1745            .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
1746                label: Some("roxlap-gpu grid_dda.pipeline"),
1747                layout: Some(&dda_pl),
1748                module: &dda_shader,
1749                entry_point: Some("render_grid"),
1750                compilation_options: wgpu::PipelineCompilationOptions::default(),
1751                cache: None,
1752            });
1753
1754        let blit_shader = self
1755            .device
1756            .create_shader_module(wgpu::ShaderModuleDescriptor {
1757                label: Some("blit.wgsl"),
1758                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/blit.wgsl").into()),
1759            });
1760        let bgl_blit = self
1761            .device
1762            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
1763                label: Some("roxlap-gpu grid_dda.blit_bgl"),
1764                entries: &[
1765                    wgpu::BindGroupLayoutEntry {
1766                        binding: 0,
1767                        visibility: wgpu::ShaderStages::FRAGMENT,
1768                        ty: wgpu::BindingType::Texture {
1769                            sample_type: wgpu::TextureSampleType::Float { filterable: false },
1770                            view_dimension: wgpu::TextureViewDimension::D2,
1771                            multisampled: false,
1772                        },
1773                        count: None,
1774                    },
1775                    wgpu::BindGroupLayoutEntry {
1776                        binding: 1,
1777                        visibility: wgpu::ShaderStages::FRAGMENT,
1778                        ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::NonFiltering),
1779                        count: None,
1780                    },
1781                ],
1782            });
1783        let blit_pl = self
1784            .device
1785            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
1786                label: Some("roxlap-gpu grid_dda.blit_layout"),
1787                bind_group_layouts: &[Some(&bgl_blit)],
1788                immediate_size: 0,
1789            });
1790        let pipeline_blit = self
1791            .device
1792            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
1793                label: Some("roxlap-gpu grid_dda.blit_pipeline"),
1794                layout: Some(&blit_pl),
1795                vertex: wgpu::VertexState {
1796                    module: &blit_shader,
1797                    entry_point: Some("vs_main"),
1798                    compilation_options: wgpu::PipelineCompilationOptions::default(),
1799                    buffers: &[],
1800                },
1801                fragment: Some(wgpu::FragmentState {
1802                    module: &blit_shader,
1803                    entry_point: Some("fs_main"),
1804                    compilation_options: wgpu::PipelineCompilationOptions::default(),
1805                    targets: &[Some(wgpu::ColorTargetState {
1806                        format: surface_format,
1807                        blend: None,
1808                        write_mask: wgpu::ColorWrites::ALL,
1809                    })],
1810                }),
1811                primitive: wgpu::PrimitiveState::default(),
1812                depth_stencil: None,
1813                multisample: wgpu::MultisampleState::default(),
1814                multiview_mask: None,
1815                cache: None,
1816            });
1817        let sampler = self.device.create_sampler(&wgpu::SamplerDescriptor {
1818            label: Some("roxlap-gpu grid_dda.blit_sampler"),
1819            address_mode_u: wgpu::AddressMode::ClampToEdge,
1820            address_mode_v: wgpu::AddressMode::ClampToEdge,
1821            address_mode_w: wgpu::AddressMode::ClampToEdge,
1822            mag_filter: wgpu::FilterMode::Nearest,
1823            min_filter: wgpu::FilterMode::Nearest,
1824            mipmap_filter: wgpu::MipmapFilterMode::Nearest,
1825            ..Default::default()
1826        });
1827        let blit_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1828            label: Some("roxlap-gpu grid_dda.blit_bg"),
1829            layout: &bgl_blit,
1830            entries: &[
1831                wgpu::BindGroupEntry {
1832                    binding: 0,
1833                    resource: wgpu::BindingResource::TextureView(&storage_view),
1834                },
1835                wgpu::BindGroupEntry {
1836                    binding: 1,
1837                    resource: wgpu::BindingResource::Sampler(&sampler),
1838                },
1839            ],
1840        });
1841
1842        GridDdaResources {
1843            storage_size: (width, height),
1844            storage_view,
1845            uniform_buf,
1846            bgl_dda,
1847            pipeline_dda,
1848            blit_bg,
1849            pipeline_blit,
1850            _sampler: sampler,
1851        }
1852    }
1853
1854    /// GPU.5 render — multi-grid scene marcher. `cameras[i]` is the
1855    /// world camera transformed into grid `i`'s local frame
1856    /// (caller-supplied; see scene-demo's `redraw_gpu` for the
1857    /// glam-based transform). `fov_y_rad` is the shared vertical
1858    /// FOV; `max_outer_steps` caps per-ray chunk-DDA work for each
1859    /// grid.
1860    ///
1861    /// # Panics
1862    /// If `cameras.len() != scene.grid_count`.
1863    /// `cameras[i]` is grid `i`'s world camera transformed into that
1864    /// grid's local frame (the grid marcher works in grid-local space).
1865    /// `sprite_camera` is the **world** camera: instanced sprites carry
1866    /// world-space positions/transforms, so they must project through
1867    /// the untransformed world camera — not `cameras[0]`, which is only
1868    /// the world camera when grid 0 is at identity.
1869    pub fn render_scene(
1870        &mut self,
1871        scene: &GpuSceneResident,
1872        cameras: &[Camera],
1873        sprite_camera: &Camera,
1874        fov_y_rad: f32,
1875        max_outer_steps: u32,
1876    ) {
1877        assert_eq!(
1878            cameras.len(),
1879            scene.grid_count as usize,
1880            "render_scene: {} cameras supplied, scene has {} grids",
1881            cameras.len(),
1882            scene.grid_count,
1883        );
1884        self.last_fov_y_rad = fov_y_rad; // cached for pixel_ray (picking)
1885
1886        // Deferred present: drop any frame a prior render left
1887        // un-presented (a host that skipped present/paint_egui) so we
1888        // never hold two outstanding swapchain textures.
1889        self.pending_frame = None;
1890        let Some(surf_tex) = self.acquire_frame() else {
1891            return;
1892        };
1893        let surf_view = surf_tex
1894            .texture
1895            .create_view(&wgpu::TextureViewDescriptor::default());
1896
1897        let surface_w = self.surface_config.width;
1898        let surface_h = self.surface_config.height;
1899        let surface_format = self.surface_config.format;
1900
1901        let needs_build = match &self.scene_dda {
1902            Some(r) => r.storage_size != (surface_w, surface_h),
1903            None => true,
1904        };
1905        if needs_build {
1906            self.scene_dda = Some(self.build_scene_dda(surface_w, surface_h, surface_format));
1907        }
1908        // GPU.9 — materialise the sprite pipeline the first frame
1909        // sprites are present (before the immutable `dda` borrow).
1910        // GPU.10.0 — build the model-DDA pipeline the first frame a
1911        // sprite registry is present.
1912        if self.sprite_registry.is_some() && self.sprite_model_dda.is_none() {
1913            self.sprite_model_dda = Some(self.build_sprite_model_dda());
1914        }
1915        // GPU.10.3 — frustum-cull + screen-tile-bin the sprite instances
1916        // (needs &mut self for buffer growth, so before the immutable
1917        // scene_dda borrow). Captures (visible_count, tiles_x); None when
1918        // nothing is in view.
1919        let sprite_pass: Option<(u32, u32)> = if let Some(reg) = self.sprite_registry.as_mut() {
1920            if reg.instance_capacity > 0 {
1921                // World camera — sprite positions/transforms are world-
1922                // space (independent of any grid's transform).
1923                let cam = sprite_camera;
1924                #[allow(clippy::cast_precision_loss)]
1925                let aspect = surface_w as f32 / surface_h as f32;
1926                let half_h = (fov_y_rad * 0.5).tan();
1927                let frustum = sprite_model::ViewFrustum {
1928                    pos: cam.position,
1929                    right: cam.right,
1930                    down: cam.down,
1931                    forward: cam.forward,
1932                    half_w: half_h * aspect,
1933                    half_h,
1934                    far: 1.0e9,
1935                };
1936                let (visible, tiles_x, _tiles_y) = reg.cull_bin_upload(
1937                    &self.device,
1938                    &self.queue,
1939                    &frustum,
1940                    surface_w,
1941                    surface_h,
1942                    SPRITE_TILE_SIZE,
1943                    self.sprite_lod_px,
1944                );
1945                (visible > 0).then_some((visible, tiles_x))
1946            } else {
1947                None
1948            }
1949        } else {
1950            None
1951        };
1952        let dda = self.scene_dda.as_ref().expect("just built");
1953
1954        // Refresh the blit's flip flag each frame (offset 8, after the
1955        // width/height), so toggling the flip applies without a resize.
1956        self.queue.write_buffer(
1957            &dda.blit_dims,
1958            8,
1959            bytemuck::bytes_of(&[u32::from(self.flip_x), 0u32]),
1960        );
1961
1962        // Pack per-grid cameras into a runtime-sized storage buffer
1963        // (binding 15) — no fixed cap on grid count.
1964        let cam_vec: Vec<SceneDdaPerGridCamera> = cameras
1965            .iter()
1966            .map(SceneDdaPerGridCamera::from_camera)
1967            .collect();
1968        let grid_cameras = upload_grid_cameras(&self.device, &cam_vec);
1969        let uniform = SceneDdaUniform {
1970            fov_y_rad,
1971            grid_count: scene.grid_count,
1972            max_outer_steps,
1973            _pad0: 0,
1974            screen_size: [surface_w, surface_h],
1975            _pad1: [0; 2],
1976            fog_color: [
1977                self.fog_color[0],
1978                self.fog_color[1],
1979                self.fog_color[2],
1980                self.fog_near,
1981            ],
1982            fog_far: self.fog_far,
1983            // L3.1: always write scene depth. Costs one storage store per
1984            // pixel, and the depth is needed for sprite z-test, sprite-less
1985            // `pick_depth`, and `draw_lines` occlusion alike.
1986            write_depth: 1,
1987            occ_page_words: scene.occupancy_page_words,
1988            occ_num_pages: scene.occupancy_num_pages,
1989            mip_scan_dist: self.scene_mip_scan_dist,
1990            _pad2: 0,
1991            _pad3: 0,
1992            _pad4: 0,
1993            // Sky direction comes from the world (sprite) camera, so a
1994            // grid-less sprite-only scene still paints a real sky.
1995            sky_cam: SceneDdaPerGridCamera::from_camera(sprite_camera),
1996            side_shades0: self.scene_side_shades[0],
1997            side_shades1: self.scene_side_shades[1],
1998        };
1999        self.queue
2000            .write_buffer(&dda.uniform_buf, 0, bytemuck::bytes_of(&uniform));
2001
2002        let dda_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
2003            label: Some("roxlap-gpu scene_dda.bg"),
2004            layout: &dda.bgl_dda,
2005            entries: &[
2006                wgpu::BindGroupEntry {
2007                    binding: 0,
2008                    resource: dda.uniform_buf.as_entire_binding(),
2009                },
2010                // Occupancy page 0 at binding 1; pages 1..MAX_OCC_PAGES
2011                // at bindings 12.. (see GPU.X occupancy paging).
2012                wgpu::BindGroupEntry {
2013                    binding: 1,
2014                    resource: scene.occupancy_pages[0].as_entire_binding(),
2015                },
2016                wgpu::BindGroupEntry {
2017                    binding: 2,
2018                    resource: scene.all_color_offsets.as_entire_binding(),
2019                },
2020                wgpu::BindGroupEntry {
2021                    binding: 3,
2022                    resource: scene.all_colors.as_entire_binding(),
2023                },
2024                wgpu::BindGroupEntry {
2025                    binding: 4,
2026                    resource: scene.all_chunk_colors_base.as_entire_binding(),
2027                },
2028                wgpu::BindGroupEntry {
2029                    binding: 5,
2030                    resource: scene.all_chunk_occupancy.as_entire_binding(),
2031                },
2032                wgpu::BindGroupEntry {
2033                    binding: 6,
2034                    resource: scene.grid_static_meta.as_entire_binding(),
2035                },
2036                wgpu::BindGroupEntry {
2037                    binding: 7,
2038                    resource: scene.all_slot_chunk_idx.as_entire_binding(),
2039                },
2040                wgpu::BindGroupEntry {
2041                    binding: 8,
2042                    resource: dda.framebuffer.as_entire_binding(),
2043                },
2044                wgpu::BindGroupEntry {
2045                    binding: 9,
2046                    resource: wgpu::BindingResource::TextureView(&self.sky_view),
2047                },
2048                wgpu::BindGroupEntry {
2049                    binding: 10,
2050                    resource: wgpu::BindingResource::Sampler(&self.sky_sampler),
2051                },
2052                wgpu::BindGroupEntry {
2053                    binding: 11,
2054                    resource: dda.depth_buffer.as_entire_binding(),
2055                },
2056                wgpu::BindGroupEntry {
2057                    binding: 12,
2058                    resource: scene.occupancy_pages[1].as_entire_binding(),
2059                },
2060                wgpu::BindGroupEntry {
2061                    binding: 13,
2062                    resource: scene.occupancy_pages[2].as_entire_binding(),
2063                },
2064                wgpu::BindGroupEntry {
2065                    binding: 14,
2066                    resource: scene.occupancy_pages[3].as_entire_binding(),
2067                },
2068                wgpu::BindGroupEntry {
2069                    binding: 15,
2070                    resource: grid_cameras.as_entire_binding(),
2071                },
2072            ],
2073        });
2074
2075        // GPU.9 — when sprites are present, build both splatter bind
2076        // groups up front (the splat pass writes the key buffer; the
2077        // resolve pass reads keys + scene depth and writes colour).
2078        // GPU.10.3 — model-DDA bind group + per-frame uniform, using the
2079        // cull/bin results captured above. Per-model + per-instance data
2080        // + the tile lists live in the registry buffers.
2081        let sprite_model_bg = match (&self.sprite_model_dda, &self.sprite_registry, sprite_pass) {
2082            (Some(smd), Some(reg), Some((visible, tiles_x))) => {
2083                // World camera (see the cull pass above) — sprites
2084                // project through it regardless of grid 0's transform.
2085                let cam = sprite_camera;
2086                let uni = SpriteModelUniform {
2087                    cam_pos: cam.position,
2088                    _p0: 0.0,
2089                    cam_right: cam.right,
2090                    _p1: 0.0,
2091                    cam_down: cam.down,
2092                    _p2: 0.0,
2093                    cam_forward: cam.forward,
2094                    _p3: 0.0,
2095                    fog_color: [
2096                        self.fog_color[0],
2097                        self.fog_color[1],
2098                        self.fog_color[2],
2099                        self.fog_near,
2100                    ],
2101                    screen_size: [surface_w, surface_h],
2102                    instance_count: visible,
2103                    fog_far: self.fog_far,
2104                    fov_y_rad,
2105                    tiles_x,
2106                    tile_size: SPRITE_TILE_SIZE,
2107                    _p6: 0.0,
2108                };
2109                self.queue
2110                    .write_buffer(&smd.uniform_buf, 0, bytemuck::bytes_of(&uni));
2111                Some(self.device.create_bind_group(&wgpu::BindGroupDescriptor {
2112                    label: Some("roxlap-gpu sprite_model_dda.bg"),
2113                    layout: &smd.bgl,
2114                    entries: &[
2115                        wgpu::BindGroupEntry {
2116                            binding: 0,
2117                            resource: smd.uniform_buf.as_entire_binding(),
2118                        },
2119                        wgpu::BindGroupEntry {
2120                            binding: 1,
2121                            resource: reg.occupancy.as_entire_binding(),
2122                        },
2123                        wgpu::BindGroupEntry {
2124                            binding: 2,
2125                            resource: reg.colors.as_entire_binding(),
2126                        },
2127                        wgpu::BindGroupEntry {
2128                            binding: 3,
2129                            resource: reg.color_offsets.as_entire_binding(),
2130                        },
2131                        wgpu::BindGroupEntry {
2132                            binding: 4,
2133                            resource: reg.model_meta.as_entire_binding(),
2134                        },
2135                        wgpu::BindGroupEntry {
2136                            binding: 5,
2137                            resource: reg.instances.as_entire_binding(),
2138                        },
2139                        wgpu::BindGroupEntry {
2140                            binding: 6,
2141                            resource: dda.depth_buffer.as_entire_binding(),
2142                        },
2143                        wgpu::BindGroupEntry {
2144                            binding: 7,
2145                            resource: dda.framebuffer.as_entire_binding(),
2146                        },
2147                        wgpu::BindGroupEntry {
2148                            binding: 8,
2149                            resource: reg.tile_ranges.as_entire_binding(),
2150                        },
2151                        wgpu::BindGroupEntry {
2152                            binding: 9,
2153                            resource: reg.tile_instances.as_entire_binding(),
2154                        },
2155                        wgpu::BindGroupEntry {
2156                            binding: 10,
2157                            resource: reg.dirs.as_entire_binding(),
2158                        },
2159                        wgpu::BindGroupEntry {
2160                            binding: 11,
2161                            resource: reg.colmul.as_entire_binding(),
2162                        },
2163                    ],
2164                }))
2165            }
2166            _ => None,
2167        };
2168
2169        let mut encoder = self
2170            .device
2171            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
2172                label: Some("roxlap-gpu scene encoder"),
2173            });
2174        {
2175            let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
2176                label: Some("roxlap-gpu scene_dda compute"),
2177                timestamp_writes: None,
2178            });
2179            cpass.set_pipeline(&dda.pipeline_dda);
2180            cpass.set_bind_group(0, &dda_bg, &[]);
2181            cpass.dispatch_workgroups(surface_w.div_ceil(8), surface_h.div_ceil(8), 1);
2182        }
2183        // GPU.10 — sprite model-DDA pass: one thread per pixel marches
2184        // the tile's instances + composites against scene depth, after
2185        // the scene pass wrote the depth buffer and before the blit.
2186        if let (Some(smd), Some(bg)) = (&self.sprite_model_dda, &sprite_model_bg) {
2187            let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
2188                label: Some("roxlap-gpu sprite_model_dda"),
2189                timestamp_writes: None,
2190            });
2191            cpass.set_pipeline(&smd.pipeline);
2192            cpass.set_bind_group(0, bg, &[]);
2193            cpass.dispatch_workgroups(surface_w.div_ceil(8), surface_h.div_ceil(8), 1);
2194        }
2195        {
2196            let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
2197                label: Some("roxlap-gpu scene_dda blit"),
2198                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
2199                    view: &surf_view,
2200                    depth_slice: None,
2201                    resolve_target: None,
2202                    ops: wgpu::Operations {
2203                        load: wgpu::LoadOp::Clear(wgpu::Color::BLACK),
2204                        store: wgpu::StoreOp::Store,
2205                    },
2206                })],
2207                depth_stencil_attachment: None,
2208                timestamp_writes: None,
2209                occlusion_query_set: None,
2210                multiview_mask: None,
2211            });
2212            rpass.set_pipeline(&dda.pipeline_blit);
2213            rpass.set_bind_group(0, &dda.blit_bg, &[]);
2214            rpass.draw(0..3, 0..1);
2215        }
2216        self.queue.submit(std::iter::once(encoder.finish()));
2217        // Deferred present — the host calls `present` or `paint_egui`.
2218        self.pending_frame = Some((surf_tex, surf_view));
2219        self.frame_count = self.frame_count.wrapping_add(1);
2220    }
2221
2222    /// Like [`Self::render`] (clear to colour) but **deferred**: stashes
2223    /// the frame for [`Self::present`] / [`Self::paint_egui`] instead of
2224    /// presenting. The facade uses this before any grid is resident so a
2225    /// HUD can still be painted over an empty scene.
2226    pub fn render_clear_deferred(&mut self) {
2227        self.pending_frame = None;
2228        let Some(surf_tex) = self.acquire_frame() else {
2229            return;
2230        };
2231        let view = surf_tex
2232            .texture
2233            .create_view(&wgpu::TextureViewDescriptor::default());
2234        let [r, g, b] = self.clear_colour;
2235        let mut encoder = self
2236            .device
2237            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
2238                label: Some("roxlap-gpu clear (deferred)"),
2239            });
2240        {
2241            let _rp = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
2242                label: Some("roxlap-gpu clear (deferred)"),
2243                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
2244                    view: &view,
2245                    depth_slice: None,
2246                    resolve_target: None,
2247                    ops: wgpu::Operations {
2248                        load: wgpu::LoadOp::Clear(wgpu::Color { r, g, b, a: 1.0 }),
2249                        store: wgpu::StoreOp::Store,
2250                    },
2251                })],
2252                depth_stencil_attachment: None,
2253                timestamp_writes: None,
2254                occlusion_query_set: None,
2255                multiview_mask: None,
2256            });
2257        }
2258        self.queue.submit(std::iter::once(encoder.finish()));
2259        self.pending_frame = Some((surf_tex, view));
2260    }
2261
2262    /// Present the frame stashed by the last deferred render
2263    /// ([`Self::render_scene`] / [`Self::render_clear_deferred`]). No-op
2264    /// if nothing is pending (e.g. the surface was lost mid-render).
2265    pub fn present(&mut self) {
2266        if let Some((surf_tex, _view)) = self.pending_frame.take() {
2267            surf_tex.present();
2268        }
2269    }
2270
2271    /// Draw depth-tested world-space [`GpuLine`]s over the pending frame
2272    /// (L3.2). Projects each endpoint with `cam` (the marcher's pinhole) +
2273    /// the last frame's FOV / surface size, expands to screen-space quads,
2274    /// and runs a `LoadOp::Load` pass into the pending swapchain view — so
2275    /// the lines land on the marched frame and a later `present` /
2276    /// `paint_egui` still finishes it (the pending frame is left intact).
2277    /// Depth-tested lines are occluded by nearer marched geometry (compared
2278    /// against the scene-DDA depth buffer's `best_t`); call after `render`,
2279    /// before `present` / `paint_egui`. No-op if no frame is pending.
2280    pub fn draw_lines_deferred(&mut self, cam: &GpuLineCamera, lines: &[GpuLine]) {
2281        if self.pending_frame.is_none() || lines.is_empty() {
2282            return;
2283        }
2284        let (w, h) = (self.surface_config.width, self.surface_config.height);
2285        let fov = self.last_fov_y_rad;
2286        if w == 0 || h == 0 || fov <= 0.0 {
2287            return; // no frame marched yet — no projection to reuse
2288        }
2289        let verts = build_line_vertices(cam, lines, w, h, fov, self.flip_x);
2290        if verts.is_empty() {
2291            return;
2292        }
2293        self.ensure_line_resources();
2294        let res = self.line_resources.as_ref().expect("just built");
2295
2296        // Skip the depth test when there's no scene depth buffer to read
2297        // (sprite-only / empty scene) — bind the 1-word dummy so the layout
2298        // is satisfied; `no_depth = 1` keeps the shader from indexing it.
2299        let no_depth = u32::from(self.scene_dda.is_none());
2300        let params = LineParams {
2301            screen_w: w,
2302            screen_h: h,
2303            depth_bias: LINE_DEPTH_BIAS,
2304            no_depth,
2305            flip_x: u32::from(self.flip_x),
2306            _pad: [0; 3],
2307        };
2308        self.queue
2309            .write_buffer(&res.uniform_buf, 0, bytemuck::bytes_of(&params));
2310
2311        let depth_resource = match &self.scene_dda {
2312            Some(dda) => dda.depth_buffer.as_entire_binding(),
2313            None => res.dummy_depth.as_entire_binding(),
2314        };
2315        let bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
2316            label: Some("roxlap-gpu line.bg"),
2317            layout: &res.bgl,
2318            entries: &[
2319                wgpu::BindGroupEntry {
2320                    binding: 0,
2321                    resource: res.uniform_buf.as_entire_binding(),
2322                },
2323                wgpu::BindGroupEntry {
2324                    binding: 1,
2325                    resource: depth_resource,
2326                },
2327            ],
2328        });
2329
2330        // Grow-only persistent vertex buffer (L3.3): one `write_buffer`
2331        // per overlay, reused across frames. Power-of-two capacity keeps
2332        // re-allocation rare as the segment count drifts.
2333        let needed = std::mem::size_of_val(verts.as_slice()) as u64;
2334        if self.line_vbuf_cap < needed {
2335            let cap = needed.next_power_of_two().max(4096);
2336            self.line_vbuf = Some(self.device.create_buffer(&wgpu::BufferDescriptor {
2337                label: Some("roxlap-gpu line.vbuf"),
2338                size: cap,
2339                usage: wgpu::BufferUsages::VERTEX | wgpu::BufferUsages::COPY_DST,
2340                mapped_at_creation: false,
2341            }));
2342            self.line_vbuf_cap = cap;
2343        }
2344        let vbuf = self.line_vbuf.as_ref().expect("ensured above");
2345        self.queue
2346            .write_buffer(vbuf, 0, bytemuck::cast_slice(&verts));
2347
2348        let view = &self.pending_frame.as_ref().expect("checked above").1;
2349        let mut encoder = self
2350            .device
2351            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
2352                label: Some("roxlap-gpu lines"),
2353            });
2354        {
2355            // `LoadOp::Load` keeps the marcher's frame; the lines draw over
2356            // it. Manual depth test in the FS (no depth-stencil attachment).
2357            let mut pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
2358                label: Some("roxlap-gpu line paint"),
2359                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
2360                    view,
2361                    depth_slice: None,
2362                    resolve_target: None,
2363                    ops: wgpu::Operations {
2364                        load: wgpu::LoadOp::Load,
2365                        store: wgpu::StoreOp::Store,
2366                    },
2367                })],
2368                depth_stencil_attachment: None,
2369                timestamp_writes: None,
2370                occlusion_query_set: None,
2371                multiview_mask: None,
2372            });
2373            pass.set_pipeline(&res.pipeline);
2374            pass.set_bind_group(0, &bg, &[]);
2375            pass.set_vertex_buffer(0, vbuf.slice(..));
2376            pass.draw(0..verts.len() as u32, 0..1);
2377        }
2378        self.queue.submit(std::iter::once(encoder.finish()));
2379        // pending_frame left intact — present/paint_egui finishes the frame.
2380    }
2381
2382    /// Lazy-build the [`LineResources`] (`line.wgsl` pipeline + uniform +
2383    /// dummy depth buffer). The colour target uses the surface format with
2384    /// straight-alpha over-blending; no depth-stencil attachment (the depth
2385    /// test is manual in the fragment shader against the scene depth buffer).
2386    fn ensure_line_resources(&mut self) {
2387        if self.line_resources.is_some() {
2388            return;
2389        }
2390        let shader = self
2391            .device
2392            .create_shader_module(wgpu::ShaderModuleDescriptor {
2393                label: Some("line.wgsl"),
2394                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/line.wgsl").into()),
2395            });
2396        let bgl = self
2397            .device
2398            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
2399                label: Some("roxlap-gpu line.bgl"),
2400                entries: &[
2401                    wgpu::BindGroupLayoutEntry {
2402                        binding: 0,
2403                        visibility: wgpu::ShaderStages::FRAGMENT,
2404                        ty: wgpu::BindingType::Buffer {
2405                            ty: wgpu::BufferBindingType::Uniform,
2406                            has_dynamic_offset: false,
2407                            min_binding_size: None,
2408                        },
2409                        count: None,
2410                    },
2411                    wgpu::BindGroupLayoutEntry {
2412                        binding: 1,
2413                        visibility: wgpu::ShaderStages::FRAGMENT,
2414                        ty: wgpu::BindingType::Buffer {
2415                            ty: wgpu::BufferBindingType::Storage { read_only: true },
2416                            has_dynamic_offset: false,
2417                            min_binding_size: None,
2418                        },
2419                        count: None,
2420                    },
2421                ],
2422            });
2423        let layout = self
2424            .device
2425            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
2426                label: Some("roxlap-gpu line.layout"),
2427                bind_group_layouts: &[Some(&bgl)],
2428                immediate_size: 0,
2429            });
2430        let pipeline = self
2431            .device
2432            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
2433                label: Some("roxlap-gpu line.pipeline"),
2434                layout: Some(&layout),
2435                vertex: wgpu::VertexState {
2436                    module: &shader,
2437                    entry_point: Some("vs_main"),
2438                    compilation_options: wgpu::PipelineCompilationOptions::default(),
2439                    buffers: &[wgpu::VertexBufferLayout {
2440                        array_stride: std::mem::size_of::<LineVertex>() as u64,
2441                        step_mode: wgpu::VertexStepMode::Vertex,
2442                        attributes: &wgpu::vertex_attr_array![
2443                            0 => Float32x2, // pos (NDC)
2444                            1 => Float32,   // depth
2445                            2 => Float32,   // depth_test
2446                            3 => Float32x4, // color
2447                        ],
2448                    }],
2449                },
2450                fragment: Some(wgpu::FragmentState {
2451                    module: &shader,
2452                    entry_point: Some("fs_main"),
2453                    compilation_options: wgpu::PipelineCompilationOptions::default(),
2454                    targets: &[Some(wgpu::ColorTargetState {
2455                        format: self.surface_config.format,
2456                        blend: Some(wgpu::BlendState::ALPHA_BLENDING),
2457                        write_mask: wgpu::ColorWrites::ALL,
2458                    })],
2459                }),
2460                primitive: wgpu::PrimitiveState {
2461                    cull_mode: None,
2462                    ..Default::default()
2463                },
2464                depth_stencil: None,
2465                multisample: wgpu::MultisampleState::default(),
2466                multiview_mask: None,
2467                cache: None,
2468            });
2469        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
2470            label: Some("roxlap-gpu line.uniform"),
2471            size: std::mem::size_of::<LineParams>() as u64,
2472            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
2473            mapped_at_creation: false,
2474        });
2475        let dummy_depth = self.device.create_buffer(&wgpu::BufferDescriptor {
2476            label: Some("roxlap-gpu line.dummy_depth"),
2477            size: 4,
2478            usage: wgpu::BufferUsages::STORAGE,
2479            mapped_at_creation: false,
2480        });
2481        self.line_resources = Some(LineResources {
2482            pipeline,
2483            bgl,
2484            uniform_buf,
2485            dummy_depth,
2486        });
2487    }
2488
2489    /// Upload (or replace) an RGBA8 image as a sampled texture, returning
2490    /// a stable id for [`GpuImageQuad::image`]. `rgba` is row-major,
2491    /// `width * height * 4` bytes, straight (un-premultiplied) alpha.
2492    /// Reuses a dropped slot when one exists. Returns `0` for malformed
2493    /// input (an id that draws nothing).
2494    pub fn upload_image(&mut self, rgba: &[u8], width: u32, height: u32) -> usize {
2495        if width == 0 || height == 0 || rgba.len() != (width as usize) * (height as usize) * 4 {
2496            return 0;
2497        }
2498        let texture = self.device.create_texture(&wgpu::TextureDescriptor {
2499            label: Some("roxlap-gpu image_sprite"),
2500            size: wgpu::Extent3d {
2501                width,
2502                height,
2503                depth_or_array_layers: 1,
2504            },
2505            mip_level_count: 1,
2506            sample_count: 1,
2507            dimension: wgpu::TextureDimension::D2,
2508            format: wgpu::TextureFormat::Rgba8Unorm,
2509            usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST,
2510            view_formats: &[],
2511        });
2512        self.queue.write_texture(
2513            wgpu::TexelCopyTextureInfo {
2514                texture: &texture,
2515                mip_level: 0,
2516                origin: wgpu::Origin3d::ZERO,
2517                aspect: wgpu::TextureAspect::All,
2518            },
2519            rgba,
2520            wgpu::TexelCopyBufferLayout {
2521                offset: 0,
2522                bytes_per_row: Some(width * 4),
2523                rows_per_image: Some(height),
2524            },
2525            wgpu::Extent3d {
2526                width,
2527                height,
2528                depth_or_array_layers: 1,
2529            },
2530        );
2531        let view = texture.create_view(&wgpu::TextureViewDescriptor::default());
2532        let resident = ImageResident {
2533            view,
2534            _texture: texture,
2535        };
2536        if let Some(slot) = self.images.iter().position(Option::is_none) {
2537            self.images[slot] = Some(resident);
2538            slot
2539        } else {
2540            self.images.push(Some(resident));
2541            self.images.len() - 1
2542        }
2543    }
2544
2545    /// Release an image uploaded with [`Self::upload_image`] (the slot
2546    /// becomes reusable).
2547    pub fn drop_image(&mut self, id: usize) {
2548        if let Some(slot) = self.images.get_mut(id) {
2549            *slot = None;
2550        }
2551    }
2552
2553    /// Draw world-space 2D image sprites ([`GpuImageQuad`]) over the
2554    /// pending frame — the textured-quad sibling of
2555    /// [`Self::draw_lines_deferred`]. Projects each quad with `cam` (the
2556    /// marcher's pinhole) + the last frame's FOV / surface size, expands +
2557    /// near-clips to triangles, and runs one `LoadOp::Load` pass with a
2558    /// draw per quad (each binds its own texture). UVs are perspective-correct;
2559    /// depth-tested quads are occluded by nearer marched geometry. Call
2560    /// after `render`, before `present` / `paint_egui`. No-op if no frame
2561    /// is pending.
2562    pub fn draw_images_deferred(&mut self, cam: &GpuLineCamera, quads: &[GpuImageQuad]) {
2563        if self.pending_frame.is_none() || quads.is_empty() {
2564            return;
2565        }
2566        let (w, h) = (self.surface_config.width, self.surface_config.height);
2567        let fov = self.last_fov_y_rad;
2568        if w == 0 || h == 0 || fov <= 0.0 {
2569            return;
2570        }
2571
2572        // Concatenate every quad's verts into one buffer, recording each
2573        // quad's (range, texture) so they share a single render pass.
2574        let mut verts: Vec<ImageVertex> = Vec::new();
2575        let mut draws: Vec<(u32, u32, usize)> = Vec::new();
2576        for quad in quads {
2577            if !matches!(self.images.get(quad.image), Some(Some(_))) {
2578                continue; // dropped / never-uploaded id
2579            }
2580            let v = build_image_vertices(cam, quad, w, h, fov, self.flip_x);
2581            if v.is_empty() {
2582                continue;
2583            }
2584            let start = verts.len() as u32;
2585            verts.extend_from_slice(&v);
2586            draws.push((start, verts.len() as u32, quad.image));
2587        }
2588        if draws.is_empty() {
2589            return;
2590        }
2591
2592        self.ensure_image_resources();
2593        let no_depth = u32::from(self.scene_dda.is_none());
2594        let params = LineParams {
2595            screen_w: w,
2596            screen_h: h,
2597            depth_bias: LINE_DEPTH_BIAS,
2598            no_depth,
2599            flip_x: u32::from(self.flip_x),
2600            _pad: [0; 3],
2601        };
2602        {
2603            let res = self.image_resources.as_ref().expect("just built");
2604            self.queue
2605                .write_buffer(&res.uniform_buf, 0, bytemuck::bytes_of(&params));
2606        }
2607
2608        // Grow-only persistent vertex buffer (mirrors the line vbuf).
2609        let needed = std::mem::size_of_val(verts.as_slice()) as u64;
2610        if self.image_vbuf_cap < needed {
2611            let cap = needed.next_power_of_two().max(4096);
2612            self.image_vbuf = Some(self.device.create_buffer(&wgpu::BufferDescriptor {
2613                label: Some("roxlap-gpu image.vbuf"),
2614                size: cap,
2615                usage: wgpu::BufferUsages::VERTEX | wgpu::BufferUsages::COPY_DST,
2616                mapped_at_creation: false,
2617            }));
2618            self.image_vbuf_cap = cap;
2619        }
2620        let vbuf = self.image_vbuf.as_ref().expect("ensured above");
2621        self.queue
2622            .write_buffer(vbuf, 0, bytemuck::cast_slice(&verts));
2623
2624        // One bind group per draw (the texture view differs per quad).
2625        let res = self.image_resources.as_ref().expect("just built");
2626        let depth_resource = match &self.scene_dda {
2627            Some(dda) => dda.depth_buffer.as_entire_binding(),
2628            None => res.dummy_depth.as_entire_binding(),
2629        };
2630        let bind_groups: Vec<wgpu::BindGroup> = draws
2631            .iter()
2632            .map(|&(_, _, image_id)| {
2633                let resident = self.images[image_id].as_ref().expect("checked present");
2634                self.device.create_bind_group(&wgpu::BindGroupDescriptor {
2635                    label: Some("roxlap-gpu image.bg"),
2636                    layout: &res.bgl,
2637                    entries: &[
2638                        wgpu::BindGroupEntry {
2639                            binding: 0,
2640                            resource: res.uniform_buf.as_entire_binding(),
2641                        },
2642                        wgpu::BindGroupEntry {
2643                            binding: 1,
2644                            resource: depth_resource.clone(),
2645                        },
2646                        wgpu::BindGroupEntry {
2647                            binding: 2,
2648                            resource: wgpu::BindingResource::TextureView(&resident.view),
2649                        },
2650                        wgpu::BindGroupEntry {
2651                            binding: 3,
2652                            resource: wgpu::BindingResource::Sampler(&res.sampler),
2653                        },
2654                    ],
2655                })
2656            })
2657            .collect();
2658
2659        let view = &self.pending_frame.as_ref().expect("checked above").1;
2660        let mut encoder = self
2661            .device
2662            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
2663                label: Some("roxlap-gpu images"),
2664            });
2665        {
2666            let mut pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
2667                label: Some("roxlap-gpu image paint"),
2668                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
2669                    view,
2670                    depth_slice: None,
2671                    resolve_target: None,
2672                    ops: wgpu::Operations {
2673                        load: wgpu::LoadOp::Load,
2674                        store: wgpu::StoreOp::Store,
2675                    },
2676                })],
2677                depth_stencil_attachment: None,
2678                timestamp_writes: None,
2679                occlusion_query_set: None,
2680                multiview_mask: None,
2681            });
2682            pass.set_pipeline(&res.pipeline);
2683            pass.set_vertex_buffer(0, vbuf.slice(..));
2684            for (&(start, end, _), bg) in draws.iter().zip(&bind_groups) {
2685                pass.set_bind_group(0, bg, &[]);
2686                pass.draw(start..end, 0..1);
2687            }
2688        }
2689        self.queue.submit(std::iter::once(encoder.finish()));
2690        // pending_frame left intact — present/paint_egui finishes it.
2691    }
2692
2693    /// Lazy-build the [`ImageResources`] (`image.wgsl` pipeline + uniform +
2694    /// nearest sampler + dummy depth). Straight-alpha over-blend, no
2695    /// depth-stencil attachment (the depth test is manual in the FS).
2696    fn ensure_image_resources(&mut self) {
2697        if self.image_resources.is_some() {
2698            return;
2699        }
2700        let shader = self
2701            .device
2702            .create_shader_module(wgpu::ShaderModuleDescriptor {
2703                label: Some("image.wgsl"),
2704                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/image.wgsl").into()),
2705            });
2706        let bgl = self
2707            .device
2708            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
2709                label: Some("roxlap-gpu image.bgl"),
2710                entries: &[
2711                    wgpu::BindGroupLayoutEntry {
2712                        binding: 0,
2713                        visibility: wgpu::ShaderStages::FRAGMENT,
2714                        ty: wgpu::BindingType::Buffer {
2715                            ty: wgpu::BufferBindingType::Uniform,
2716                            has_dynamic_offset: false,
2717                            min_binding_size: None,
2718                        },
2719                        count: None,
2720                    },
2721                    wgpu::BindGroupLayoutEntry {
2722                        binding: 1,
2723                        visibility: wgpu::ShaderStages::FRAGMENT,
2724                        ty: wgpu::BindingType::Buffer {
2725                            ty: wgpu::BufferBindingType::Storage { read_only: true },
2726                            has_dynamic_offset: false,
2727                            min_binding_size: None,
2728                        },
2729                        count: None,
2730                    },
2731                    wgpu::BindGroupLayoutEntry {
2732                        binding: 2,
2733                        visibility: wgpu::ShaderStages::FRAGMENT,
2734                        ty: wgpu::BindingType::Texture {
2735                            sample_type: wgpu::TextureSampleType::Float { filterable: true },
2736                            view_dimension: wgpu::TextureViewDimension::D2,
2737                            multisampled: false,
2738                        },
2739                        count: None,
2740                    },
2741                    wgpu::BindGroupLayoutEntry {
2742                        binding: 3,
2743                        visibility: wgpu::ShaderStages::FRAGMENT,
2744                        ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
2745                        count: None,
2746                    },
2747                ],
2748            });
2749        let layout = self
2750            .device
2751            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
2752                label: Some("roxlap-gpu image.layout"),
2753                bind_group_layouts: &[Some(&bgl)],
2754                immediate_size: 0,
2755            });
2756        let pipeline = self
2757            .device
2758            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
2759                label: Some("roxlap-gpu image.pipeline"),
2760                layout: Some(&layout),
2761                vertex: wgpu::VertexState {
2762                    module: &shader,
2763                    entry_point: Some("vs_main"),
2764                    compilation_options: wgpu::PipelineCompilationOptions::default(),
2765                    buffers: &[wgpu::VertexBufferLayout {
2766                        array_stride: std::mem::size_of::<ImageVertex>() as u64,
2767                        step_mode: wgpu::VertexStepMode::Vertex,
2768                        attributes: &wgpu::vertex_attr_array![
2769                            0 => Float32x2, // ndc
2770                            1 => Float32,   // w
2771                            2 => Float32,   // depth
2772                            3 => Float32,   // depth_test
2773                            4 => Float32,   // cutoff
2774                            5 => Float32x2, // uv
2775                            6 => Float32x4, // tint
2776                        ],
2777                    }],
2778                },
2779                fragment: Some(wgpu::FragmentState {
2780                    module: &shader,
2781                    entry_point: Some("fs_main"),
2782                    compilation_options: wgpu::PipelineCompilationOptions::default(),
2783                    targets: &[Some(wgpu::ColorTargetState {
2784                        format: self.surface_config.format,
2785                        blend: Some(wgpu::BlendState::ALPHA_BLENDING),
2786                        write_mask: wgpu::ColorWrites::ALL,
2787                    })],
2788                }),
2789                primitive: wgpu::PrimitiveState {
2790                    cull_mode: None,
2791                    ..Default::default()
2792                },
2793                depth_stencil: None,
2794                multisample: wgpu::MultisampleState::default(),
2795                multiview_mask: None,
2796                cache: None,
2797            });
2798        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
2799            label: Some("roxlap-gpu image.uniform"),
2800            size: std::mem::size_of::<LineParams>() as u64,
2801            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
2802            mapped_at_creation: false,
2803        });
2804        let dummy_depth = self.device.create_buffer(&wgpu::BufferDescriptor {
2805            label: Some("roxlap-gpu image.dummy_depth"),
2806            size: 4,
2807            usage: wgpu::BufferUsages::STORAGE,
2808            mapped_at_creation: false,
2809        });
2810        let sampler = self.device.create_sampler(&wgpu::SamplerDescriptor {
2811            label: Some("roxlap-gpu image.sampler"),
2812            // Nearest + clamp: pixel-art references want crisp texels and
2813            // no wrap bleed at the quad edges.
2814            address_mode_u: wgpu::AddressMode::ClampToEdge,
2815            address_mode_v: wgpu::AddressMode::ClampToEdge,
2816            address_mode_w: wgpu::AddressMode::ClampToEdge,
2817            mag_filter: wgpu::FilterMode::Nearest,
2818            min_filter: wgpu::FilterMode::Nearest,
2819            mipmap_filter: wgpu::MipmapFilterMode::Nearest,
2820            ..Default::default()
2821        });
2822        self.image_resources = Some(ImageResources {
2823            pipeline,
2824            bgl,
2825            uniform_buf,
2826            dummy_depth,
2827            sampler,
2828        });
2829    }
2830
2831    /// Project a world point to window pixels under the marcher's
2832    /// vertical-FOV pinhole (the inverse of [`Self::pixel_ray`]), using
2833    /// the last-rendered frame's size + FOV. `None` before the first
2834    /// scene render or for a point at/behind the near plane.
2835    #[must_use]
2836    pub fn project_point(
2837        &self,
2838        cam_pos: [f32; 3],
2839        right: [f32; 3],
2840        down: [f32; 3],
2841        forward: [f32; 3],
2842        world: [f32; 3],
2843    ) -> Option<(f32, f32)> {
2844        let dda = self.scene_dda.as_ref()?;
2845        let (w, h) = dda.storage_size;
2846        if w == 0 || h == 0 || self.last_fov_y_rad <= 0.0 {
2847            return None;
2848        }
2849        let d = [
2850            world[0] - cam_pos[0],
2851            world[1] - cam_pos[1],
2852            world[2] - cam_pos[2],
2853        ];
2854        let cz = forward[0] * d[0] + forward[1] * d[1] + forward[2] * d[2];
2855        if cz < LINE_NEAR_Z {
2856            return None;
2857        }
2858        let cx = right[0] * d[0] + right[1] * d[1] + right[2] * d[2];
2859        let cy = down[0] * d[0] + down[1] * d[1] + down[2] * d[2];
2860        let half_h = (self.last_fov_y_rad * 0.5).tan();
2861        let half_w = half_h * (w as f32 / h as f32);
2862        let ndc_x = (cx / cz) / half_w;
2863        let ndc_y = -(cy / cz) / half_h;
2864        let sx = (ndc_x * 0.5 + 0.5) * w as f32;
2865        let sy = (0.5 - ndc_y * 0.5) * h as f32;
2866        Some((sx, sy))
2867    }
2868
2869    /// Overlay an `egui` UI on the pending frame, then present it
2870    /// (`hud` feature). `jobs` are the host's tessellated primitives
2871    /// (`egui::Context::tessellate`), `textures` the per-frame texture
2872    /// delta from `egui::FullOutput`, `pixels_per_point` the UI scale.
2873    ///
2874    /// Draws with `LoadOp::Load` over the marcher's frame (a separate
2875    /// encoder submitted after the scene's), so the UI composites on top
2876    /// of the world. No-op if no frame is pending.
2877    #[cfg(feature = "hud")]
2878    pub fn paint_egui(
2879        &mut self,
2880        jobs: &[egui::ClippedPrimitive],
2881        textures: &egui::TexturesDelta,
2882        pixels_per_point: f32,
2883    ) {
2884        let Some((surf_tex, surf_view)) = self.pending_frame.take() else {
2885            return;
2886        };
2887        let format = self.surface_config.format;
2888        let egui_rend = self.egui_renderer.get_or_insert_with(|| {
2889            egui_wgpu::Renderer::new(
2890                &self.device,
2891                format,
2892                egui_wgpu::RendererOptions {
2893                    msaa_samples: 1,
2894                    depth_stencil_format: None,
2895                    dithering: false,
2896                    ..Default::default()
2897                },
2898            )
2899        });
2900
2901        let screen = egui_wgpu::ScreenDescriptor {
2902            size_in_pixels: [self.surface_config.width, self.surface_config.height],
2903            pixels_per_point,
2904        };
2905        for (id, delta) in &textures.set {
2906            egui_rend.update_texture(&self.device, &self.queue, *id, delta);
2907        }
2908        let mut encoder = self
2909            .device
2910            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
2911                label: Some("roxlap-gpu egui"),
2912            });
2913        let user_bufs =
2914            egui_rend.update_buffers(&self.device, &self.queue, &mut encoder, jobs, &screen);
2915        {
2916            // `LoadOp::Load` keeps the marcher's frame; egui draws over it.
2917            let mut pass = encoder
2918                .begin_render_pass(&wgpu::RenderPassDescriptor {
2919                    label: Some("roxlap-gpu egui paint"),
2920                    color_attachments: &[Some(wgpu::RenderPassColorAttachment {
2921                        view: &surf_view,
2922                        depth_slice: None,
2923                        resolve_target: None,
2924                        ops: wgpu::Operations {
2925                            load: wgpu::LoadOp::Load,
2926                            store: wgpu::StoreOp::Store,
2927                        },
2928                    })],
2929                    depth_stencil_attachment: None,
2930                    timestamp_writes: None,
2931                    occlusion_query_set: None,
2932                    multiview_mask: None,
2933                })
2934                // egui-wgpu 0.29 requires a `'static` pass (see its docs).
2935                .forget_lifetime();
2936            egui_rend.render(&mut pass, jobs, &screen);
2937        }
2938        for id in &textures.free {
2939            egui_rend.free_texture(id);
2940        }
2941        self.queue.submit(
2942            user_bufs
2943                .into_iter()
2944                .chain(std::iter::once(encoder.finish())),
2945        );
2946        surf_tex.present();
2947    }
2948
2949    fn build_scene_dda(
2950        &self,
2951        width: u32,
2952        height: u32,
2953        surface_format: wgpu::TextureFormat,
2954    ) -> SceneDdaResources {
2955        // Framebuffer as a packed-`rgba8unorm` storage buffer (1 u32 per
2956        // pixel, row stride = `width`). See the struct-field note.
2957        let framebuffer = self.device.create_buffer(&wgpu::BufferDescriptor {
2958            label: Some("roxlap-gpu scene_dda.framebuffer"),
2959            size: u64::from(width) * u64::from(height) * 4,
2960            usage: wgpu::BufferUsages::STORAGE,
2961            mapped_at_creation: false,
2962        });
2963        // Screen size + flip flag for the blit's pixel→index math
2964        // (`vec2<u32>` size, then `flip_x` + pad). Re-written per frame in
2965        // `render_scene` so a flip toggle takes effect without a resize.
2966        let blit_dims = self.device.create_buffer(&wgpu::BufferDescriptor {
2967            label: Some("roxlap-gpu scene_dda.blit_dims"),
2968            size: 16,
2969            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
2970            mapped_at_creation: false,
2971        });
2972        self.queue.write_buffer(
2973            &blit_dims,
2974            0,
2975            bytemuck::bytes_of(&[width, height, u32::from(self.flip_x), 0u32]),
2976        );
2977
2978        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
2979            label: Some("roxlap-gpu scene_dda.uniform"),
2980            size: std::mem::size_of::<SceneDdaUniform>() as u64,
2981            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
2982            mapped_at_creation: false,
2983        });
2984
2985        // GPU.9 — per-pixel world-t depth (f32 bits as u32). Sized to
2986        // the storage texture; written by the scene pass when sprites
2987        // are active, read+tested by the sprite splatter.
2988        let depth_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
2989            label: Some("roxlap-gpu scene_dda.depth"),
2990            size: u64::from(width) * u64::from(height) * 4,
2991            // COPY_SRC so `read_depth_pixel` can stage it for picking.
2992            usage: wgpu::BufferUsages::STORAGE
2993                | wgpu::BufferUsages::COPY_DST
2994                | wgpu::BufferUsages::COPY_SRC,
2995            mapped_at_creation: false,
2996        });
2997        let depth_readback = self.device.create_buffer(&wgpu::BufferDescriptor {
2998            label: Some("roxlap-gpu scene_dda.depth_readback"),
2999            size: u64::from(width) * u64::from(height) * 4,
3000            usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
3001            mapped_at_creation: false,
3002        });
3003        let dda_shader = self
3004            .device
3005            .create_shader_module(wgpu::ShaderModuleDescriptor {
3006                label: Some("scene_dda.wgsl"),
3007                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/scene_dda.wgsl").into()),
3008            });
3009        let bgl_dda = self
3010            .device
3011            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
3012                label: Some("roxlap-gpu scene_dda.bgl"),
3013                entries: &[
3014                    bgl_uniform_entry(0),
3015                    bgl_storage_entry(1, true),
3016                    bgl_storage_entry(2, true),
3017                    bgl_storage_entry(3, true),
3018                    bgl_storage_entry(4, true),
3019                    bgl_storage_entry(5, true),
3020                    bgl_storage_entry(6, true),
3021                    bgl_storage_entry(7, true),
3022                    // Framebuffer storage buffer (read-write; the scene +
3023                    // sprite passes write packed pixels into it).
3024                    bgl_storage_entry(8, false),
3025                    // GPU.8 sky panorama + sampler.
3026                    wgpu::BindGroupLayoutEntry {
3027                        binding: 9,
3028                        visibility: wgpu::ShaderStages::COMPUTE,
3029                        ty: wgpu::BindingType::Texture {
3030                            sample_type: wgpu::TextureSampleType::Float { filterable: true },
3031                            view_dimension: wgpu::TextureViewDimension::D2,
3032                            multisampled: false,
3033                        },
3034                        count: None,
3035                    },
3036                    wgpu::BindGroupLayoutEntry {
3037                        binding: 10,
3038                        visibility: wgpu::ShaderStages::COMPUTE,
3039                        ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
3040                        count: None,
3041                    },
3042                    // GPU.9 — read-write per-pixel depth buffer.
3043                    bgl_storage_entry(11, false),
3044                    // Occupancy pages 1..MAX_OCC_PAGES (page 0 is
3045                    // binding 1). Unused pages bind a dummy buffer.
3046                    bgl_storage_entry(12, true),
3047                    bgl_storage_entry(13, true),
3048                    bgl_storage_entry(14, true),
3049                    // Per-grid cameras (runtime-sized; one per grid).
3050                    bgl_storage_entry(15, true),
3051                ],
3052            });
3053        let dda_pl = self
3054            .device
3055            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
3056                label: Some("roxlap-gpu scene_dda.layout"),
3057                bind_group_layouts: &[Some(&bgl_dda)],
3058                immediate_size: 0,
3059            });
3060        let pipeline_dda = self
3061            .device
3062            .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
3063                label: Some("roxlap-gpu scene_dda.pipeline"),
3064                layout: Some(&dda_pl),
3065                module: &dda_shader,
3066                entry_point: Some("render_scene"),
3067                compilation_options: wgpu::PipelineCompilationOptions::default(),
3068                cache: None,
3069            });
3070
3071        let blit_shader = self
3072            .device
3073            .create_shader_module(wgpu::ShaderModuleDescriptor {
3074                label: Some("scene_blit.wgsl"),
3075                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/scene_blit.wgsl").into()),
3076            });
3077        let bgl_blit = self
3078            .device
3079            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
3080                label: Some("roxlap-gpu scene_dda.blit_bgl"),
3081                entries: &[
3082                    // Framebuffer storage buffer (read-only in the blit).
3083                    wgpu::BindGroupLayoutEntry {
3084                        binding: 0,
3085                        visibility: wgpu::ShaderStages::FRAGMENT,
3086                        ty: wgpu::BindingType::Buffer {
3087                            ty: wgpu::BufferBindingType::Storage { read_only: true },
3088                            has_dynamic_offset: false,
3089                            min_binding_size: None,
3090                        },
3091                        count: None,
3092                    },
3093                    // Screen-size uniform for the pixel→index math.
3094                    wgpu::BindGroupLayoutEntry {
3095                        binding: 1,
3096                        visibility: wgpu::ShaderStages::FRAGMENT,
3097                        ty: wgpu::BindingType::Buffer {
3098                            ty: wgpu::BufferBindingType::Uniform,
3099                            has_dynamic_offset: false,
3100                            min_binding_size: None,
3101                        },
3102                        count: None,
3103                    },
3104                ],
3105            });
3106        let blit_pl = self
3107            .device
3108            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
3109                label: Some("roxlap-gpu scene_dda.blit_layout"),
3110                bind_group_layouts: &[Some(&bgl_blit)],
3111                immediate_size: 0,
3112            });
3113        let pipeline_blit = self
3114            .device
3115            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
3116                label: Some("roxlap-gpu scene_dda.blit_pipeline"),
3117                layout: Some(&blit_pl),
3118                vertex: wgpu::VertexState {
3119                    module: &blit_shader,
3120                    entry_point: Some("vs_main"),
3121                    compilation_options: wgpu::PipelineCompilationOptions::default(),
3122                    buffers: &[],
3123                },
3124                fragment: Some(wgpu::FragmentState {
3125                    module: &blit_shader,
3126                    entry_point: Some("fs_main"),
3127                    compilation_options: wgpu::PipelineCompilationOptions::default(),
3128                    targets: &[Some(wgpu::ColorTargetState {
3129                        format: surface_format,
3130                        blend: None,
3131                        write_mask: wgpu::ColorWrites::ALL,
3132                    })],
3133                }),
3134                primitive: wgpu::PrimitiveState::default(),
3135                depth_stencil: None,
3136                multisample: wgpu::MultisampleState::default(),
3137                multiview_mask: None,
3138                cache: None,
3139            });
3140        let blit_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
3141            label: Some("roxlap-gpu scene_dda.blit_bg"),
3142            layout: &bgl_blit,
3143            entries: &[
3144                wgpu::BindGroupEntry {
3145                    binding: 0,
3146                    resource: framebuffer.as_entire_binding(),
3147                },
3148                wgpu::BindGroupEntry {
3149                    binding: 1,
3150                    resource: blit_dims.as_entire_binding(),
3151                },
3152            ],
3153        });
3154
3155        SceneDdaResources {
3156            storage_size: (width, height),
3157            framebuffer,
3158            uniform_buf,
3159            bgl_dda,
3160            pipeline_dda,
3161            blit_bg,
3162            pipeline_blit,
3163            blit_dims,
3164            depth_buffer,
3165            depth_readback,
3166        }
3167    }
3168
3169    /// Read back the per-pixel world-t depth at window pixel `(x, y)`
3170    /// from the last rendered frame, for screen→world picking. Returns
3171    /// the distance `t` along the (normalised) view ray to the nearest
3172    /// scene-grid surface, so the host reconstructs the world hit as
3173    /// `cam.pos + t * normalize(ray_dir)`. `None` for out-of-bounds
3174    /// pixels, sky / no-hit (the `T_INF` sentinel), or when no scene
3175    /// frame has been rendered.
3176    ///
3177    /// The depth buffer is the SCENE pass's output (terrain + grids),
3178    /// untouched by the sprite pass (which reads it read-only), so a
3179    /// cursor sprite under the pointer does not occlude the pick.
3180    ///
3181    /// Synchronous: copies the depth buffer to a mapped staging buffer
3182    /// and blocks on `device.poll(Wait)`. Cheap enough for click-time
3183    /// picks; do not call it every frame.
3184    ///
3185    /// Requires the last frame to have written depth, which happens
3186    /// when sprites are present (`write_depth`). The pick demo always
3187    /// has a cursor sprite, so this holds.
3188    ///
3189    /// Compiles on wasm, but the wasm facade never calls it: WebGPU's
3190    /// `device.poll` doesn't block for the GPU, so the blocking
3191    /// `recv()` here would hang the single browser thread. Picking is
3192    /// deferred on the wasm GPU path (the facade returns `None`).
3193    #[must_use]
3194    pub fn read_depth_pixel(&self, x: u32, y: u32) -> Option<f32> {
3195        let dda = self.scene_dda.as_ref()?;
3196        let (w, h) = dda.storage_size;
3197        if x >= w || y >= h {
3198            return None;
3199        }
3200        let mut enc = self
3201            .device
3202            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
3203                label: Some("roxlap-gpu depth readback"),
3204            });
3205        let size = u64::from(w) * u64::from(h) * 4;
3206        enc.copy_buffer_to_buffer(&dda.depth_buffer, 0, &dda.depth_readback, 0, size);
3207        self.queue.submit(std::iter::once(enc.finish()));
3208
3209        let slice = dda.depth_readback.slice(..);
3210        let (tx, rx) = std::sync::mpsc::channel();
3211        slice.map_async(wgpu::MapMode::Read, move |r| {
3212            let _ = tx.send(r);
3213        });
3214        self.device.poll(wgpu::PollType::wait_indefinitely()).ok();
3215        rx.recv().ok()?.ok()?;
3216
3217        let t = {
3218            let data = slice.get_mapped_range();
3219            let idx = ((y * w + x) * 4) as usize;
3220            let bytes: [u8; 4] = data[idx..idx + 4].try_into().ok()?;
3221            f32::from_le_bytes(bytes)
3222        };
3223        dda.depth_readback.unmap();
3224
3225        // Reject sky / no-hit (T_INF == 1e30 in the shader) + non-finite.
3226        if !t.is_finite() || t >= 1.0e29 {
3227            return None;
3228        }
3229        Some(t)
3230    }
3231
3232    /// World-space view-ray direction (un-normalised) for window pixel
3233    /// `(x, y)`, under the GPU marcher's projection — the canonical GPU
3234    /// unproject, mirroring `scene_dda.wgsl`'s `render_scene`
3235    /// (vertical-FOV pinhole). Uses the last-rendered frame's target
3236    /// size + FOV; `None` before the first scene render. Pair with
3237    /// [`Self::read_depth_pixel`] for screen→world picking.
3238    #[must_use]
3239    pub fn pixel_ray(
3240        &self,
3241        right: [f64; 3],
3242        down: [f64; 3],
3243        forward: [f64; 3],
3244        x: f64,
3245        y: f64,
3246    ) -> Option<[f64; 3]> {
3247        let dda = self.scene_dda.as_ref()?;
3248        let (w, h) = dda.storage_size;
3249        if w == 0 || h == 0 || self.last_fov_y_rad <= 0.0 {
3250            return None;
3251        }
3252        Some(pinhole_pixel_ray(
3253            right,
3254            down,
3255            forward,
3256            x,
3257            y,
3258            f64::from(w),
3259            f64::from(h),
3260            f64::from(self.last_fov_y_rad),
3261        ))
3262    }
3263
3264    /// GPU.10.1 — upload a sprite model registry + its instances for
3265    /// the DDA path. An empty instance slice clears all sprites.
3266    pub fn set_sprite_instances(
3267        &mut self,
3268        registry: &sprite_model::SpriteModelRegistry,
3269        instances: &[sprite_model::SpriteInstance],
3270    ) {
3271        if instances.is_empty() {
3272            self.sprite_registry = None;
3273            return;
3274        }
3275        self.sprite_registry = Some(sprite_model::SpriteRegistryResident::upload(
3276            &self.device,
3277            registry,
3278            instances,
3279        ));
3280    }
3281
3282    /// Incrementally append sprite instances **without** rebuilding the
3283    /// registry — the cheap streaming-spawn path (asteroids, projectiles).
3284    /// Returns the index of the first appended instance (`[base, base+N)`).
3285    ///
3286    /// Every appended instance must reference a model already registered
3287    /// by the [`Self::set_sprite_instances`] that established residency
3288    /// (model volumes are not re-uploaded here — build the full
3289    /// `SpriteModelRegistry` up front and seed it once, then stream
3290    /// instances). If no registry is resident yet, this performs the
3291    /// initial full upload and returns `0`.
3292    ///
3293    /// Cost is amortised O(1) per instance (the GPU instance buffer grows
3294    /// by powers of two), versus the full volume + buffer rebuild of
3295    /// [`Self::set_sprite_instances`].
3296    pub fn append_sprite_instances(
3297        &mut self,
3298        registry: &sprite_model::SpriteModelRegistry,
3299        instances: &[sprite_model::SpriteInstance],
3300    ) -> u32 {
3301        match self.sprite_registry.as_mut() {
3302            Some(reg) => reg.append_instances(&self.device, registry, instances),
3303            None => {
3304                self.set_sprite_instances(registry, instances);
3305                0
3306            }
3307        }
3308    }
3309
3310    /// Remove the sprite instance at `index` (swap-remove, O(1), no model
3311    /// re-upload). Returns `Some(old_last)` if a different instance was
3312    /// moved into `index` to fill the hole — its index changed from
3313    /// `old_last` to `index`, so a caller tracking instance handles must
3314    /// update that one. Returns `None` if `index` was the last element /
3315    /// out of range, or no registry is resident.
3316    pub fn remove_sprite_instance(&mut self, index: usize) -> Option<usize> {
3317        self.sprite_registry
3318            .as_mut()
3319            .and_then(|reg| reg.remove_instance(index))
3320    }
3321
3322    /// Incrementally add a new model (its full LOD chain) to the resident
3323    /// sprite registry **without** re-uploading the existing models — the
3324    /// counterpart to [`Self::append_sprite_instances`] for streaming in
3325    /// new geometry (unique asteroids, generated meshes).
3326    ///
3327    /// Usage mirrors `update_sprite_model`: you own the
3328    /// [`SpriteModelRegistry`](sprite_model::SpriteModelRegistry), append
3329    /// the model with [`add_lod`](sprite_model::SpriteModelRegistry::add_lod)
3330    /// (or `add`), then pass the returned `chain_id` here to sync that one
3331    /// chain to the GPU. Afterwards [`Self::append_sprite_instances`] may
3332    /// reference it.
3333    ///
3334    /// If no registry is resident yet, this performs the initial full
3335    /// upload of `registry` (all its current models, zero instances) to
3336    /// establish residency — so call it for your *first* model; only
3337    /// chains appended *after* residency exists are added incrementally.
3338    ///
3339    /// Cost is amortised O(new model voxels): the shared volume buffers
3340    /// carry slack and bump-append, growing (and rebuilding once from the
3341    /// registry) only on overflow.
3342    pub fn add_sprite_model(
3343        &mut self,
3344        registry: &sprite_model::SpriteModelRegistry,
3345        chain_id: u32,
3346    ) {
3347        match self.sprite_registry.as_mut() {
3348            Some(reg) => reg.add_model(&self.device, &self.queue, registry, chain_id),
3349            None => {
3350                self.sprite_registry = Some(sprite_model::SpriteRegistryResident::upload(
3351                    &self.device,
3352                    registry,
3353                    &[],
3354                ));
3355            }
3356        }
3357    }
3358
3359    /// Remove a model (tombstone its LOD chain) from the resident sprite
3360    /// registry — the counterpart to [`Self::add_sprite_model`]. Frees its
3361    /// `colors`/`dirs` space for reuse by a later add; the smaller
3362    /// `occupancy`/`color_offsets` holes are reclaimed by
3363    /// [`Self::compact_sprite_models`]. Entry / chain ids stay stable, so
3364    /// other models' `chain_id`s remain valid.
3365    ///
3366    /// Instances of the removed model keep their slots but draw as nothing
3367    /// until the caller drops them via [`Self::remove_sprite_instance`].
3368    /// No-op if `chain_id` is unknown / already removed / no registry.
3369    pub fn remove_sprite_model(&mut self, chain_id: u32) {
3370        if let Some(reg) = self.sprite_registry.as_mut() {
3371            reg.remove_model(chain_id);
3372        }
3373    }
3374
3375    /// Reclaim the holes left by [`Self::remove_sprite_model`] by rebuilding
3376    /// the shared volume buffers from the live models only. `registry` must
3377    /// be the resident one. Cost is O(live volume) — call it when
3378    /// [`Self::dead_sprite_model_count`] is high (e.g. exceeds the live
3379    /// count), not every frame. No-op if no registry is resident.
3380    pub fn compact_sprite_models(&mut self, registry: &sprite_model::SpriteModelRegistry) {
3381        if let Some(reg) = self.sprite_registry.as_mut() {
3382            reg.compact(&self.device, &self.queue, registry);
3383        }
3384    }
3385
3386    /// Number of live (non-removed) sprite models (0 if none uploaded).
3387    #[must_use]
3388    pub fn sprite_model_count(&self) -> usize {
3389        self.sprite_registry
3390            .as_ref()
3391            .map_or(0, sprite_model::SpriteRegistryResident::live_model_count)
3392    }
3393
3394    /// Number of removed-but-not-yet-compacted sprite models — the
3395    /// fragmentation signal for deciding when to call
3396    /// [`Self::compact_sprite_models`].
3397    #[must_use]
3398    pub fn dead_sprite_model_count(&self) -> usize {
3399        self.sprite_registry
3400            .as_ref()
3401            .map_or(0, sprite_model::SpriteRegistryResident::dead_model_count)
3402    }
3403
3404    /// Number of resident sprite instances (0 if none uploaded).
3405    #[must_use]
3406    pub fn sprite_instance_count(&self) -> usize {
3407        self.sprite_registry
3408            .as_ref()
3409            .map_or(0, sprite_model::SpriteRegistryResident::instance_count)
3410    }
3411
3412    /// Re-pose the already-resident sprite instances in place (no model
3413    /// volume re-upload) — the cheap per-frame path for animated KFA
3414    /// limbs. `instances` must match the last [`Self::set_sprite_instances`]
3415    /// in length + order. No-op if no sprite registry is resident.
3416    pub fn update_sprite_instance_transforms(
3417        &mut self,
3418        instances: &[sprite_model::SpriteInstance],
3419    ) {
3420        if let Some(reg) = self.sprite_registry.as_mut() {
3421            reg.update_transforms(instances);
3422        }
3423    }
3424
3425    /// GPU.12 incremental — re-upload only LOD chain `chain_id`'s entries
3426    /// after an in-place edit of `registry` (carve / recolour), without
3427    /// rebuilding the whole sprite registry. `registry` must be the one
3428    /// last passed to [`Self::set_sprite_instances`] with chain
3429    /// `chain_id` already edited. No-op if no registry is resident.
3430    pub fn update_sprite_model(
3431        &mut self,
3432        registry: &sprite_model::SpriteModelRegistry,
3433        chain_id: u32,
3434    ) {
3435        if let Some(reg) = self.sprite_registry.as_mut() {
3436            reg.update_model(&self.device, &self.queue, registry, chain_id);
3437        }
3438    }
3439
3440    /// Set the per-instance `kv6colmul[256]` lighting tables (voxlap's
3441    /// `update_reflects` output, e.g. via `roxlap_core::sprite::
3442    /// sprite_colmul`), in the same order/length as the last
3443    /// [`Self::set_sprite_instances`]. The GPU sprite pass modulates each
3444    /// voxel by its surface normal's entry — matching the CPU rasteriser.
3445    /// No-op if no sprite registry is resident.
3446    pub fn set_sprite_instance_colmul(&mut self, tables: &[[u64; 256]]) {
3447        if let Some(reg) = self.sprite_registry.as_mut() {
3448            reg.set_instance_colmul(tables);
3449        }
3450    }
3451
3452    /// GPU.10.4 — set the LOD pixel threshold: a sprite steps to the
3453    /// next mip once a mip-0 voxel would project below `px` screen
3454    /// pixels. `1.0` is the natural "no sub-pixel voxels" default;
3455    /// larger values force LOD in closer (useful for inspection).
3456    /// Clamped to ≥ 0.25.
3457    pub fn set_sprite_lod_px(&mut self, px: f32) {
3458        self.sprite_lod_px = px.max(0.25);
3459    }
3460
3461    /// GPU.11.1 — set the scene-grid LOD scan distance (world units).
3462    /// A chunk entered at world-t `t` is marched at mip
3463    /// `floor(log2(max(t, msd) / msd))`, clamped to its grid's mip
3464    /// ladder. `0` disables LOD (always mip-0). Larger values push
3465    /// the coarser mips farther out — the axis-aligned-mip-beams
3466    /// mitigation lever (GPU.11.2). Default 64 (matches CPU
3467    /// `mip_scan_dist`).
3468    pub fn set_scene_mip_scan_dist(&mut self, dist: f32) {
3469        self.scene_mip_scan_dist = dist.max(0.0);
3470    }
3471
3472    /// Set per-face grid side-shading — voxlap's
3473    /// `setsideshades(top, bot, left, right, up, down)`. Each value is
3474    /// subtracted (as a u8, matching the CPU `gcsub` high byte) from a
3475    /// hit voxel's brightness byte before shading, so the scene-DDA pass
3476    /// darkens grid faces the same way the CPU rasteriser does. `[0; 6]`
3477    /// disables it (the default). The hit face is taken from the DDA's
3478    /// last-stepped axis + ray direction.
3479    pub fn set_scene_side_shades(&mut self, s: [i8; 6]) {
3480        // Reinterpret each i8 as u8 (voxlap stamps `sxx` into gcsub's
3481        // high byte verbatim), then pack (top, bot, left, right) /
3482        // (up, down, 0, 0) for the two uniform vec4s.
3483        let v = |i: usize| i32::from(s[i] as u8);
3484        self.scene_side_shades = [[v(0), v(1), v(2), v(3)], [v(4), v(5), 0, 0]];
3485    }
3486
3487    /// GPU.10.1 — build the instanced model-DDA pipeline (one thread
3488    /// per pixel). Lazily invoked the first frame a registry is present.
3489    fn build_sprite_model_dda(&self) -> SpriteModelDdaResources {
3490        let shader = self
3491            .device
3492            .create_shader_module(wgpu::ShaderModuleDescriptor {
3493                label: Some("sprite_model_dda.wgsl"),
3494                source: wgpu::ShaderSource::Wgsl(
3495                    include_str!("../shaders/sprite_model_dda.wgsl").into(),
3496                ),
3497            });
3498        let bgl = self
3499            .device
3500            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
3501                label: Some("roxlap-gpu sprite_model_dda.bgl"),
3502                entries: &[
3503                    bgl_uniform_entry(0),
3504                    bgl_storage_entry(1, true),  // occupancy
3505                    bgl_storage_entry(2, true),  // colors
3506                    bgl_storage_entry(3, true),  // color_offsets
3507                    bgl_storage_entry(4, true),  // model_meta
3508                    bgl_storage_entry(5, true),  // instances
3509                    bgl_storage_entry(6, true),  // scene depth
3510                    bgl_storage_entry(7, false), // framebuffer (read-write buffer)
3511                    bgl_storage_entry(8, true),  // tile_ranges
3512                    bgl_storage_entry(9, true),  // tile_instances
3513                    bgl_storage_entry(10, true), // per-voxel dir
3514                    bgl_storage_entry(11, true), // per-instance kv6colmul
3515                ],
3516            });
3517        let pl = self
3518            .device
3519            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
3520                label: Some("roxlap-gpu sprite_model_dda.layout"),
3521                bind_group_layouts: &[Some(&bgl)],
3522                immediate_size: 0,
3523            });
3524        let pipeline = self
3525            .device
3526            .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
3527                label: Some("roxlap-gpu sprite_model_dda.pipeline"),
3528                layout: Some(&pl),
3529                module: &shader,
3530                entry_point: Some("march"),
3531                compilation_options: wgpu::PipelineCompilationOptions::default(),
3532                cache: None,
3533            });
3534        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
3535            label: Some("roxlap-gpu sprite_model_dda.uniform"),
3536            size: std::mem::size_of::<SpriteModelUniform>() as u64,
3537            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
3538            mapped_at_creation: false,
3539        });
3540        SpriteModelDdaResources {
3541            bgl,
3542            pipeline,
3543            uniform_buf,
3544        }
3545    }
3546}
3547
3548/// GPU.11 — headless scene-DDA renderer for tests + offline visual
3549/// gates. Owns the `scene_dda.wgsl` compute pipeline with no surface
3550/// and no blit pass; renders a [`GpuSceneResident`] to an in-memory
3551/// RGBA framebuffer via texture readback. The per-substage visual
3552/// gate (render reference scenes, diff PPMs) and the GPU.11.1 mip
3553/// render-diff both ride on this.
3554pub struct HeadlessSceneRenderer {
3555    width: u32,
3556    height: u32,
3557    /// Framebuffer storage buffer (packed `rgba8unorm`, tight rows) —
3558    /// matches the buffer-output `scene_dda.wgsl` (see its note).
3559    framebuffer: wgpu::Buffer,
3560    depth_buffer: wgpu::Buffer,
3561    uniform_buf: wgpu::Buffer,
3562    _sky_texture: wgpu::Texture,
3563    sky_view: wgpu::TextureView,
3564    sky_sampler: wgpu::Sampler,
3565    bgl: wgpu::BindGroupLayout,
3566    pipeline: wgpu::ComputePipeline,
3567    readback: wgpu::Buffer,
3568    /// Per-face side-shades for the gate render (default none). Packed
3569    /// `[(top,bot,left,right), (up,down,_,_)]`; set via
3570    /// [`Self::set_side_shades`].
3571    side_shades: [[i32; 4]; 2],
3572}
3573
3574impl HeadlessSceneRenderer {
3575    /// Build the compute pipeline + output/readback resources for a
3576    /// `width × height` framebuffer. Validates `scene_dda.wgsl` and
3577    /// the [`scene::GridStaticMeta`] std430 layout at pipeline /
3578    /// bind-group time.
3579    #[must_use]
3580    pub fn new(device: &wgpu::Device, queue: &wgpu::Queue, width: u32, height: u32) -> Self {
3581        let framebuffer = device.create_buffer(&wgpu::BufferDescriptor {
3582            label: Some("roxlap-gpu headless.framebuffer"),
3583            size: u64::from(width) * u64::from(height) * 4,
3584            usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
3585            mapped_at_creation: false,
3586        });
3587
3588        let uniform_buf = device.create_buffer(&wgpu::BufferDescriptor {
3589            label: Some("roxlap-gpu headless.uniform"),
3590            size: std::mem::size_of::<SceneDdaUniform>() as u64,
3591            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
3592            mapped_at_creation: false,
3593        });
3594        let depth_buffer = device.create_buffer(&wgpu::BufferDescriptor {
3595            label: Some("roxlap-gpu headless.depth"),
3596            size: u64::from(width) * u64::from(height) * 4,
3597            usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
3598            mapped_at_creation: false,
3599        });
3600
3601        let default_sky_pixel = [120u8, 150, 220, 255];
3602        let (sky_texture, sky_view) = create_sky_texture(device, 1, 1, &default_sky_pixel);
3603        // Upload the default sky texel (create_sky_texture only allocates
3604        // — the texel must be written or the shader samples black, which
3605        // is why a grid-less headless render came back black).
3606        queue.write_texture(
3607            wgpu::TexelCopyTextureInfo {
3608                texture: &sky_texture,
3609                mip_level: 0,
3610                origin: wgpu::Origin3d::ZERO,
3611                aspect: wgpu::TextureAspect::All,
3612            },
3613            &default_sky_pixel,
3614            wgpu::TexelCopyBufferLayout {
3615                offset: 0,
3616                bytes_per_row: Some(4),
3617                rows_per_image: Some(1),
3618            },
3619            wgpu::Extent3d {
3620                width: 1,
3621                height: 1,
3622                depth_or_array_layers: 1,
3623            },
3624        );
3625        let sky_sampler = device.create_sampler(&wgpu::SamplerDescriptor {
3626            label: Some("roxlap-gpu headless.sky_sampler"),
3627            address_mode_u: wgpu::AddressMode::Repeat,
3628            address_mode_v: wgpu::AddressMode::Repeat,
3629            mag_filter: wgpu::FilterMode::Linear,
3630            min_filter: wgpu::FilterMode::Linear,
3631            ..Default::default()
3632        });
3633
3634        let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
3635            label: Some("scene_dda.wgsl (headless)"),
3636            source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/scene_dda.wgsl").into()),
3637        });
3638        let bgl = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
3639            label: Some("roxlap-gpu headless.bgl"),
3640            entries: &[
3641                bgl_uniform_entry(0),
3642                bgl_storage_entry(1, true),
3643                bgl_storage_entry(2, true),
3644                bgl_storage_entry(3, true),
3645                bgl_storage_entry(4, true),
3646                bgl_storage_entry(5, true),
3647                bgl_storage_entry(6, true),
3648                bgl_storage_entry(7, true),
3649                // Framebuffer storage buffer (read-write).
3650                bgl_storage_entry(8, false),
3651                wgpu::BindGroupLayoutEntry {
3652                    binding: 9,
3653                    visibility: wgpu::ShaderStages::COMPUTE,
3654                    ty: wgpu::BindingType::Texture {
3655                        sample_type: wgpu::TextureSampleType::Float { filterable: true },
3656                        view_dimension: wgpu::TextureViewDimension::D2,
3657                        multisampled: false,
3658                    },
3659                    count: None,
3660                },
3661                wgpu::BindGroupLayoutEntry {
3662                    binding: 10,
3663                    visibility: wgpu::ShaderStages::COMPUTE,
3664                    ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
3665                    count: None,
3666                },
3667                bgl_storage_entry(11, false),
3668                bgl_storage_entry(12, true),
3669                bgl_storage_entry(13, true),
3670                bgl_storage_entry(14, true),
3671                // Per-grid cameras (runtime-sized; one per grid).
3672                bgl_storage_entry(15, true),
3673            ],
3674        });
3675        let pl = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
3676            label: Some("roxlap-gpu headless.layout"),
3677            bind_group_layouts: &[Some(&bgl)],
3678            immediate_size: 0,
3679        });
3680        let pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
3681            label: Some("roxlap-gpu headless.pipeline"),
3682            layout: Some(&pl),
3683            module: &shader,
3684            entry_point: Some("render_scene"),
3685            compilation_options: wgpu::PipelineCompilationOptions::default(),
3686            cache: None,
3687        });
3688
3689        // Readback is a tight buffer-to-buffer copy (no 256-byte row
3690        // padding, unlike the old texture-to-buffer path).
3691        let readback = device.create_buffer(&wgpu::BufferDescriptor {
3692            label: Some("roxlap-gpu headless.readback"),
3693            size: u64::from(width) * u64::from(height) * 4,
3694            usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
3695            mapped_at_creation: false,
3696        });
3697
3698        Self {
3699            width,
3700            height,
3701            framebuffer,
3702            depth_buffer,
3703            uniform_buf,
3704            _sky_texture: sky_texture,
3705            sky_view,
3706            sky_sampler,
3707            bgl,
3708            pipeline,
3709            readback,
3710            side_shades: [[0; 4]; 2],
3711        }
3712    }
3713
3714    /// Set per-face side-shades for subsequent [`Self::render`] calls —
3715    /// voxlap `setsideshades(top, bot, left, right, up, down)`, each an
3716    /// i8 stamped as u8 (matching the engine path). Lets the gate test
3717    /// the GPU side-shade darkening.
3718    pub fn set_side_shades(&mut self, s: [i8; 6]) {
3719        let v = |i: usize| i32::from(s[i] as u8);
3720        self.side_shades = [[v(0), v(1), v(2), v(3)], [v(4), v(5), 0, 0]];
3721    }
3722
3723    /// Render `scene` from `cameras` (one per grid) and read the
3724    /// framebuffer back as `width*height` packed `0xAABBGGRR` pixels
3725    /// (R in the low byte). Fog is disabled. `mip_scan_dist` drives
3726    /// the GPU.11.1 scene-grid LOD (`0` = always mip-0). Blocks on
3727    /// readback.
3728    ///
3729    /// # Panics
3730    /// If `cameras.len() != scene.grid_count`.
3731    #[must_use]
3732    #[allow(clippy::too_many_arguments)]
3733    pub fn render(
3734        &self,
3735        device: &wgpu::Device,
3736        queue: &wgpu::Queue,
3737        scene: &GpuSceneResident,
3738        cameras: &[Camera],
3739        fov_y_rad: f32,
3740        max_outer_steps: u32,
3741        mip_scan_dist: f32,
3742    ) -> Vec<u32> {
3743        assert_eq!(
3744            cameras.len(),
3745            scene.grid_count as usize,
3746            "headless render: {} cameras for {} grids",
3747            cameras.len(),
3748            scene.grid_count,
3749        );
3750
3751        let cam_vec: Vec<SceneDdaPerGridCamera> = cameras
3752            .iter()
3753            .map(SceneDdaPerGridCamera::from_camera)
3754            .collect();
3755        let grid_cameras = upload_grid_cameras(device, &cam_vec);
3756        let uniform = SceneDdaUniform {
3757            fov_y_rad,
3758            grid_count: scene.grid_count,
3759            max_outer_steps,
3760            _pad0: 0,
3761            screen_size: [self.width, self.height],
3762            _pad1: [0; 2],
3763            // Fog off: near/far past any reachable t → factor 0.
3764            fog_color: [0.0, 0.0, 0.0, 1.0e29],
3765            fog_far: 1.0e30,
3766            write_depth: 0,
3767            occ_page_words: scene.occupancy_page_words,
3768            occ_num_pages: scene.occupancy_num_pages,
3769            mip_scan_dist,
3770            _pad2: 0,
3771            _pad3: 0,
3772            _pad4: 0,
3773            // Sky direction from the first grid camera (the world frame
3774            // in these tests); a default forward camera when there are
3775            // none (grid_count == 0) so the sky lookup stays valid.
3776            sky_cam: SceneDdaPerGridCamera::from_camera(&cameras.first().copied().unwrap_or(
3777                Camera {
3778                    position: [0.0; 3],
3779                    right: [1.0, 0.0, 0.0],
3780                    down: [0.0, 0.0, 1.0],
3781                    forward: [0.0, 1.0, 0.0],
3782                    fov_y_rad,
3783                },
3784            )),
3785            side_shades0: self.side_shades[0],
3786            side_shades1: self.side_shades[1],
3787        };
3788        queue.write_buffer(&self.uniform_buf, 0, bytemuck::bytes_of(&uniform));
3789
3790        let bg = device.create_bind_group(&wgpu::BindGroupDescriptor {
3791            label: Some("roxlap-gpu headless.bg"),
3792            layout: &self.bgl,
3793            entries: &[
3794                wgpu::BindGroupEntry {
3795                    binding: 0,
3796                    resource: self.uniform_buf.as_entire_binding(),
3797                },
3798                wgpu::BindGroupEntry {
3799                    binding: 1,
3800                    resource: scene.occupancy_pages[0].as_entire_binding(),
3801                },
3802                wgpu::BindGroupEntry {
3803                    binding: 2,
3804                    resource: scene.all_color_offsets.as_entire_binding(),
3805                },
3806                wgpu::BindGroupEntry {
3807                    binding: 3,
3808                    resource: scene.all_colors.as_entire_binding(),
3809                },
3810                wgpu::BindGroupEntry {
3811                    binding: 4,
3812                    resource: scene.all_chunk_colors_base.as_entire_binding(),
3813                },
3814                wgpu::BindGroupEntry {
3815                    binding: 5,
3816                    resource: scene.all_chunk_occupancy.as_entire_binding(),
3817                },
3818                wgpu::BindGroupEntry {
3819                    binding: 6,
3820                    resource: scene.grid_static_meta.as_entire_binding(),
3821                },
3822                wgpu::BindGroupEntry {
3823                    binding: 7,
3824                    resource: scene.all_slot_chunk_idx.as_entire_binding(),
3825                },
3826                wgpu::BindGroupEntry {
3827                    binding: 8,
3828                    resource: self.framebuffer.as_entire_binding(),
3829                },
3830                wgpu::BindGroupEntry {
3831                    binding: 9,
3832                    resource: wgpu::BindingResource::TextureView(&self.sky_view),
3833                },
3834                wgpu::BindGroupEntry {
3835                    binding: 10,
3836                    resource: wgpu::BindingResource::Sampler(&self.sky_sampler),
3837                },
3838                wgpu::BindGroupEntry {
3839                    binding: 11,
3840                    resource: self.depth_buffer.as_entire_binding(),
3841                },
3842                wgpu::BindGroupEntry {
3843                    binding: 12,
3844                    resource: scene.occupancy_pages[1].as_entire_binding(),
3845                },
3846                wgpu::BindGroupEntry {
3847                    binding: 13,
3848                    resource: scene.occupancy_pages[2].as_entire_binding(),
3849                },
3850                wgpu::BindGroupEntry {
3851                    binding: 14,
3852                    resource: scene.occupancy_pages[3].as_entire_binding(),
3853                },
3854                wgpu::BindGroupEntry {
3855                    binding: 15,
3856                    resource: grid_cameras.as_entire_binding(),
3857                },
3858            ],
3859        });
3860
3861        let mut enc =
3862            device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
3863        {
3864            let mut pass = enc.begin_compute_pass(&wgpu::ComputePassDescriptor {
3865                label: Some("roxlap-gpu headless.pass"),
3866                timestamp_writes: None,
3867            });
3868            pass.set_pipeline(&self.pipeline);
3869            pass.set_bind_group(0, &bg, &[]);
3870            pass.dispatch_workgroups(self.width.div_ceil(8), self.height.div_ceil(8), 1);
3871        }
3872        enc.copy_buffer_to_buffer(
3873            &self.framebuffer,
3874            0,
3875            &self.readback,
3876            0,
3877            u64::from(self.width) * u64::from(self.height) * 4,
3878        );
3879        queue.submit(Some(enc.finish()));
3880
3881        let slice = self.readback.slice(..);
3882        let (tx, rx) = std::sync::mpsc::channel();
3883        slice.map_async(wgpu::MapMode::Read, move |r| {
3884            let _ = tx.send(r);
3885        });
3886        device.poll(wgpu::PollType::wait_indefinitely()).ok();
3887        rx.recv().expect("map_async channel").expect("map_async");
3888
3889        let data = slice.get_mapped_range();
3890        // Tight `width*height` packed pixels — the shader's
3891        // `pack4x8unorm(vec4(r,g,b,a))` already yields `0xAABBGGRR`
3892        // little-endian, so a straight u32 read reconstructs each pixel.
3893        let out: Vec<u32> = data
3894            .chunks_exact(4)
3895            .map(|px| u32::from_le_bytes([px[0], px[1], px[2], px[3]]))
3896            .collect();
3897        drop(data);
3898        self.readback.unmap();
3899        out
3900    }
3901}
3902
3903fn bgl_uniform_entry(binding: u32) -> wgpu::BindGroupLayoutEntry {
3904    wgpu::BindGroupLayoutEntry {
3905        binding,
3906        visibility: wgpu::ShaderStages::COMPUTE,
3907        ty: wgpu::BindingType::Buffer {
3908            ty: wgpu::BufferBindingType::Uniform,
3909            has_dynamic_offset: false,
3910            min_binding_size: None,
3911        },
3912        count: None,
3913    }
3914}
3915
3916fn bgl_storage_entry(binding: u32, read_only: bool) -> wgpu::BindGroupLayoutEntry {
3917    wgpu::BindGroupLayoutEntry {
3918        binding,
3919        visibility: wgpu::ShaderStages::COMPUTE,
3920        ty: wgpu::BindingType::Buffer {
3921            ty: wgpu::BufferBindingType::Storage { read_only },
3922            has_dynamic_offset: false,
3923            min_binding_size: None,
3924        },
3925        count: None,
3926    }
3927}
3928
3929/// Create a fresh sky panorama texture sized `width × height` with
3930/// the initial pixel data uploaded via `write_texture`. Used by
3931/// `GpuRenderer::new` (1×1 default) and `set_sky_panorama` (host-
3932/// supplied panorama).
3933fn create_sky_texture(
3934    device: &wgpu::Device,
3935    width: u32,
3936    height: u32,
3937    _initial_pixels: &[u8],
3938) -> (wgpu::Texture, wgpu::TextureView) {
3939    let tex = device.create_texture(&wgpu::TextureDescriptor {
3940        label: Some("roxlap-gpu sky_texture"),
3941        size: wgpu::Extent3d {
3942            width,
3943            height,
3944            depth_or_array_layers: 1,
3945        },
3946        mip_level_count: 1,
3947        sample_count: 1,
3948        dimension: wgpu::TextureDimension::D2,
3949        format: wgpu::TextureFormat::Rgba8Unorm,
3950        usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST,
3951        view_formats: &[],
3952    });
3953    let view = tex.create_view(&wgpu::TextureViewDescriptor::default());
3954    (tex, view)
3955}
3956
3957/// GPU.4 needs to upload a whole grid (~hundreds of MiB) as a few
3958/// storage buffers. wgpu's default `max_storage_buffer_binding_size`
3959/// is 128 MiB, which is just enough for the demo's 32×32 ground
3960/// occupancy (~128 MiB) but not the colour array. We request as
3961/// much as the adapter is willing to give — most desktop GPUs cap
3962/// individual storage buffers at 2-4 GiB; iGPUs often offer the
3963/// full system memory.
3964pub(crate) fn pick_required_limits(adapter_limits: &wgpu::Limits) -> wgpu::Limits {
3965    wgpu::Limits {
3966        max_storage_buffer_binding_size: adapter_limits.max_storage_buffer_binding_size,
3967        max_buffer_size: adapter_limits.max_buffer_size,
3968        // Occupancy paging adds up to MAX_OCC_PAGES-1 extra storage
3969        // bindings; with the scene's other buffers + the GPU.9 depth
3970        // buffer the scene_dda stage needs ~11. The default cap is 8.
3971        // Both NVK and lavapipe advertise ≫16, so request 16.
3972        max_storage_buffers_per_shader_stage: adapter_limits
3973            .max_storage_buffers_per_shader_stage
3974            .min(16),
3975        ..wgpu::Limits::default()
3976    }
3977}
3978
3979fn pick_present_mode(modes: &[wgpu::PresentMode]) -> wgpu::PresentMode {
3980    // Prefer Mailbox > Immediate > Fifo. Fifo is the universal
3981    // fallback and the only one Wayland-on-Mesa always offers.
3982    for &m in &[wgpu::PresentMode::Mailbox, wgpu::PresentMode::Immediate] {
3983        if modes.contains(&m) {
3984            return m;
3985        }
3986    }
3987    wgpu::PresentMode::Fifo
3988}
3989
3990/// World-space view-ray direction (un-normalised) for window pixel
3991/// `(x, y)` under a vertical-FOV pinhole — the projection
3992/// `scene_dda.wgsl`'s `render_scene` uses. Shared by
3993/// [`GpuRenderer::pixel_ray`]; standalone so it's unit-testable without
3994/// a device. `right`/`down`/`forward` are the camera basis.
3995#[must_use]
3996#[allow(clippy::too_many_arguments)]
3997pub fn pinhole_pixel_ray(
3998    right: [f64; 3],
3999    down: [f64; 3],
4000    forward: [f64; 3],
4001    x: f64,
4002    y: f64,
4003    w: f64,
4004    h: f64,
4005    fov_y_rad: f64,
4006) -> [f64; 3] {
4007    let half_h = (fov_y_rad * 0.5).tan();
4008    let half_w = half_h * (w / h);
4009    let ndc_x = (x + 0.5) / w * 2.0 - 1.0;
4010    let ndc_y_top = 1.0 - (y + 0.5) / h * 2.0;
4011    let (kx, ky) = (ndc_x * half_w, ndc_y_top * half_h);
4012    [
4013        forward[0] + kx * right[0] - ky * down[0],
4014        forward[1] + kx * right[1] - ky * down[1],
4015        forward[2] + kx * right[2] - ky * down[2],
4016    ]
4017}
4018
4019#[cfg(test)]
4020mod pixel_ray_tests {
4021    use super::pinhole_pixel_ray;
4022
4023    const RIGHT: [f64; 3] = [1.0, 0.0, 0.0];
4024    const DOWN: [f64; 3] = [0.0, 1.0, 0.0];
4025    const FWD: [f64; 3] = [0.0, 0.0, 1.0]; // voxlap z-down "look down"
4026
4027    // Frame centre (NDC 0,0) points straight along `forward`.
4028    #[test]
4029    fn centre_pixel_is_forward() {
4030        let d = pinhole_pixel_ray(
4031            RIGHT,
4032            DOWN,
4033            FWD,
4034            639.5,
4035            359.5,
4036            1280.0,
4037            720.0,
4038            60_f64.to_radians(),
4039        );
4040        assert!(
4041            d[0].abs() < 1e-9 && d[1].abs() < 1e-9,
4042            "centre ≈ forward, got {d:?}"
4043        );
4044        assert!((d[2] - 1.0).abs() < 1e-9);
4045    }
4046
4047    // Right edge pixel tilts +right by tan(hfov/2); the lateral
4048    // component equals half_w = tan(fov_y/2)*aspect at the very edge.
4049    #[test]
4050    fn right_edge_tilts_by_half_w() {
4051        let fov = 60_f64.to_radians();
4052        let d = pinhole_pixel_ray(RIGHT, DOWN, FWD, 1279.5, 359.5, 1280.0, 720.0, fov);
4053        let half_w = (fov * 0.5).tan() * (1280.0 / 720.0);
4054        assert!((d[0] - half_w).abs() < 1e-6, "x={}, half_w={half_w}", d[0]);
4055        assert!(d[0] > 0.0, "right edge tilts +right");
4056    }
4057
4058    /// Statically validate every WGSL shader with naga (the same
4059    /// front-end + validator wgpu runs at pipeline creation), so shader
4060    /// edits — e.g. the GPU.10 sprite lighting bindings — are caught in
4061    /// CI without needing a GPU device.
4062    #[test]
4063    fn wgsl_shaders_validate() {
4064        let shaders: &[(&str, &str)] = &[
4065            (
4066                "sprite_model_dda.wgsl",
4067                include_str!("../shaders/sprite_model_dda.wgsl"),
4068            ),
4069            ("scene_dda.wgsl", include_str!("../shaders/scene_dda.wgsl")),
4070            ("blit.wgsl", include_str!("../shaders/blit.wgsl")),
4071            ("chunk_dda.wgsl", include_str!("../shaders/chunk_dda.wgsl")),
4072            ("grid_dda.wgsl", include_str!("../shaders/grid_dda.wgsl")),
4073            (
4074                "scene_blit.wgsl",
4075                include_str!("../shaders/scene_blit.wgsl"),
4076            ),
4077            ("line.wgsl", include_str!("../shaders/line.wgsl")),
4078            ("image.wgsl", include_str!("../shaders/image.wgsl")),
4079        ];
4080        let mut validator = naga::valid::Validator::new(
4081            naga::valid::ValidationFlags::all(),
4082            naga::valid::Capabilities::all(),
4083        );
4084        for (name, src) in shaders {
4085            let module = naga::front::wgsl::parse_str(src).unwrap_or_else(|e| {
4086                panic!("{name}: WGSL parse failed:\n{}", e.emit_to_string(src))
4087            });
4088            validator
4089                .validate(&module)
4090                .unwrap_or_else(|e| panic!("{name}: WGSL validation failed: {e:?}"));
4091        }
4092    }
4093
4094    /// A 2×2 world quad centred straight ahead projects to vertices whose
4095    /// homogeneous `w` equals the camera-forward distance (so the shader's
4096    /// `clip = ndc·w` recovers perspective-correct UVs) and whose `depth`
4097    /// is the euclidean range. Verifies geometry without a GPU device.
4098    #[test]
4099    fn image_vertices_carry_forward_w_and_euclidean_depth() {
4100        let cam = crate::GpuLineCamera {
4101            pos: [0.0, 0.0, 0.0],
4102            right: [1.0, 0.0, 0.0],
4103            down: [0.0, 1.0, 0.0],
4104            forward: [0.0, 0.0, 1.0],
4105        };
4106        // Quad 10 units ahead (forward = +Z), spanning x∈[-1,1], y∈[-1,1].
4107        let quad = crate::GpuImageQuad {
4108            corners: [
4109                [-1.0, -1.0, 10.0], // TL
4110                [1.0, -1.0, 10.0],  // TR
4111                [-1.0, 1.0, 10.0],  // BL
4112                [1.0, 1.0, 10.0],   // BR
4113            ],
4114            image: 0,
4115            tint: [1.0, 1.0, 1.0, 1.0],
4116            depth_test: true,
4117            alpha_cutoff: 0.0,
4118        };
4119        let verts = crate::build_image_vertices(&cam, &quad, 800, 600, 60_f32.to_radians(), false);
4120        assert_eq!(verts.len(), 6, "two triangles, no near-clip");
4121        for v in &verts {
4122            assert!((v.w - 10.0).abs() < 1e-4, "w == forward distance");
4123            assert!(v.depth >= 10.0, "euclidean depth >= forward distance");
4124            assert_eq!(v.depth_test, 1.0);
4125        }
4126    }
4127}