Skip to main content

roxlap_gpu/
lib.rs

1//! WGPU-backed compute-shader renderer scaffold for the roxlap
2//! voxel engine. GPU.1 in `PORTING-GPU.md`.
3//!
4//! GPU.1's job: stand up the device + surface + swapchain on a
5//! host window (any [`raw-window-handle`](raw_window_handle)
6//! provider), present a clear-to-colour frame each render call,
7//! and give the host a one-call opt-in. No voxel marching yet — the
8//! [`examples/probe.rs`](../examples/probe.rs) standalone holds
9//! the empirical FPS baseline from GPU.0.
10//!
11//! Later sub-substages flesh `GpuRenderer::render` out: GPU.2
12//! uploads voxel data, GPU.3 dispatches the inner-DDA compute
13//! shader, GPU.4 layers in chunk skipping, GPU.5 plugs the renderer
14//! into `roxlap-scene::Scene`, …
15//!
16//! ## Host integration shape (GPU.1)
17//!
18//! ```no_run
19//! use std::sync::Arc;
20//! use roxlap_gpu::{GpuRenderer, GpuRendererSettings};
21//! # use winit::window::Window;
22//! # fn pick(w: Arc<Window>, size: (u32, u32)) -> Option<GpuRenderer> {
23//! match GpuRenderer::new_blocking(w, size, GpuRendererSettings::default()) {
24//!     Ok(r) => Some(r),
25//!     Err(e) => {
26//!         eprintln!("GPU init failed: {e}; falling back to CPU");
27//!         None
28//!     }
29//! }
30//! # }
31//! ```
32
33#![allow(clippy::must_use_candidate, clippy::too_many_lines)]
34
35pub mod camera;
36pub mod decompress;
37pub mod grid;
38// Headless rendering is a native-only test/bench aid: it blocks on
39// `pollster` + `device.poll(Wait)`, neither of which exists on wasm.
40#[cfg(not(target_arch = "wasm32"))]
41pub mod headless;
42pub mod resident;
43pub mod scene;
44pub mod sprite_model;
45
46pub use camera::Camera;
47pub use decompress::{decompress_chunk, ChunkUpload, BEDROCK_RGB, CHUNK_Z};
48pub use grid::{bounding_box_of, GpuGridResident, GridUpload};
49#[cfg(not(target_arch = "wasm32"))]
50pub use headless::HeadlessGpu;
51pub use resident::GpuChunkResident;
52pub use scene::{
53    GpuSceneResident, GridRuntimeTransform, GridStaticMeta, RefreshOutcome, SceneUpload,
54};
55pub use sprite_model::{
56    build_sprite_model, sprite_model_from_clip_frame, sprite_model_from_voxel_frame,
57    SpriteInstance, SpriteInstanceTransform, SpriteModel, SpriteModelRegistry,
58    SpriteRegistryResident,
59};
60
61use std::sync::Arc;
62
63use bytemuck::{Pod, Zeroable};
64use raw_window_handle::{HasDisplayHandle, HasWindowHandle};
65
66/// Caller-controllable knobs for [`GpuRenderer::new`]. Defaults
67/// target "highest-performance GPU, prefer Mailbox/Immediate over
68/// vsync" — i.e. the same configuration the GPU.0 probe used to
69/// measure the FPS ceiling.
70#[derive(Debug, Clone, Copy)]
71pub struct GpuRendererSettings {
72    pub power_preference: PowerPreference,
73    /// Initial clear colour cycled by GPU.1's empty render path.
74    /// The voxel-rendering substages overwrite this entirely.
75    pub clear_colour: [f64; 3],
76    /// Prefer mailbox/immediate when offered; falls back to FIFO if
77    /// the surface only supports it (Wayland under Mesa often does).
78    pub uncapped_present: bool,
79}
80
81#[derive(Debug, Clone, Copy)]
82pub enum PowerPreference {
83    Low,
84    High,
85}
86
87impl Default for GpuRendererSettings {
88    fn default() -> Self {
89        Self {
90            power_preference: PowerPreference::High,
91            clear_colour: [0.06, 0.08, 0.12],
92            uncapped_present: true,
93        }
94    }
95}
96
97/// Errors `GpuRenderer::new` surfaces to the host. The host's
98/// expected flow is "try this, fall back to the CPU path on Err".
99#[derive(Debug)]
100pub enum GpuInitError {
101    CreateSurface(wgpu::CreateSurfaceError),
102    NoAdapter,
103    RequestDevice(wgpu::RequestDeviceError),
104}
105
106impl std::fmt::Display for GpuInitError {
107    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
108        match self {
109            Self::CreateSurface(e) => write!(f, "create_surface failed: {e}"),
110            Self::NoAdapter => write!(
111                f,
112                "no compatible adapter — does this system have a Vulkan/Metal/DX12 driver?"
113            ),
114            Self::RequestDevice(e) => write!(f, "request_device failed: {e}"),
115        }
116    }
117}
118
119impl std::error::Error for GpuInitError {
120    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
121        match self {
122            Self::CreateSurface(e) => Some(e),
123            Self::RequestDevice(e) => Some(e),
124            Self::NoAdapter => None,
125        }
126    }
127}
128
129impl From<wgpu::CreateSurfaceError> for GpuInitError {
130    fn from(value: wgpu::CreateSurfaceError) -> Self {
131        Self::CreateSurface(value)
132    }
133}
134
135impl From<wgpu::RequestDeviceError> for GpuInitError {
136    fn from(value: wgpu::RequestDeviceError) -> Self {
137        Self::RequestDevice(value)
138    }
139}
140
141/// WGPU-backed renderer. Owns the device, queue, and surface
142/// bound to the host's window. [`Self::render`] is the GPU.1
143/// clear-to-colour path; [`Self::render_chunk`] is GPU.3's
144/// single-chunk DDA marcher.
145///
146/// The window is consumed only at construction — `wgpu`'s
147/// `Surface<'static>` keeps its own `Arc` clone of the handle, so
148/// the renderer holds no window field of its own.
149/// A world-space line segment for [`GpuRenderer::draw_lines_deferred`].
150/// `color` is straight RGBA in `0..=1` (the alpha drives the over-blend);
151/// `width_px` is the screen-space thickness; `depth_test` occludes the
152/// segment behind nearer marched geometry.
153#[derive(Clone, Copy, Debug)]
154pub struct GpuLine {
155    pub a: [f32; 3],
156    pub b: [f32; 3],
157    pub color: [f32; 4],
158    pub width_px: f32,
159    pub depth_test: bool,
160}
161
162/// World camera basis for projecting [`GpuLine`] endpoints — the same
163/// pinhole the scene-DDA pass marches with (`right`/`down`/`forward`
164/// orthonormal, `pos` in world voxel units).
165#[derive(Clone, Copy, Debug)]
166pub struct GpuLineCamera {
167    pub pos: [f32; 3],
168    pub right: [f32; 3],
169    pub down: [f32; 3],
170    pub forward: [f32; 3],
171}
172
173/// Near plane (camera-forward distance) below which a [`GpuLine`] endpoint
174/// is clipped, so the pinhole divide stays finite.
175const LINE_NEAR_Z: f32 = 0.0625;
176/// Depth-test slack (euclidean world distance) so a line resting on the
177/// surface it traces doesn't z-fight the marched geometry.
178const LINE_DEPTH_BIAS: f32 = 0.5;
179
180/// One expanded-quad vertex (`build_line_vertices` output). `pos` is NDC;
181/// `depth` is the euclidean world distance of the source endpoint (the
182/// marcher's `best_t` metric); `depth_test` is `1.0`/`0.0`.
183#[repr(C)]
184#[derive(Clone, Copy, Pod, Zeroable)]
185struct LineVertex {
186    pos: [f32; 2],
187    depth: f32,
188    depth_test: f32,
189    color: [f32; 4],
190}
191
192/// `line.wgsl` / `image.wgsl` fragment uniform (std140; padded to 32 bytes
193/// so the uniform's struct stride is a 16-byte multiple).
194#[repr(C)]
195#[derive(Clone, Copy, Pod, Zeroable)]
196struct LineParams {
197    screen_w: u32,
198    screen_h: u32,
199    depth_bias: f32,
200    no_depth: u32,
201    /// 1 when the viewport flip is on. The depth buffer is written
202    /// unflipped (the blit mirrors at read time), but these passes flip the
203    /// vertex NDC X, so the fragment must mirror its depth lookup to match.
204    flip_x: u32,
205    _pad: [u32; 3],
206}
207
208/// Lazy-built debug-line pipeline (L3.2). The bind group is rebuilt each
209/// draw (it references the current `scene_dda.depth_buffer`, which the
210/// swapchain resize recreates); the pipeline / layout / uniform persist.
211struct LineResources {
212    pipeline: wgpu::RenderPipeline,
213    bgl: wgpu::BindGroupLayout,
214    uniform_buf: wgpu::Buffer,
215    /// 1-word stand-in bound when no scene depth exists (sprite-only /
216    /// empty scene); `no_depth = 1` keeps the shader from indexing it.
217    dummy_depth: wgpu::Buffer,
218}
219
220/// Project + expand world-space [`GpuLine`]s into screen-space quad
221/// vertices (6 per visible segment) for `line.wgsl`. Mirrors the
222/// scene-DDA pinhole (`forward + ndc_x·half_w·right − ndc_y·half_h·down`)
223/// so lines land on the marched geometry, carrying each endpoint's
224/// euclidean world distance as the depth-test key (= the marcher's
225/// `best_t`). Segments fully behind the near plane are dropped; the rest
226/// are clipped to it.
227fn build_line_vertices(
228    cam: &GpuLineCamera,
229    lines: &[GpuLine],
230    w: u32,
231    h: u32,
232    fov_y: f32,
233    flip_x: bool,
234) -> Vec<LineVertex> {
235    let aspect = w as f32 / h as f32;
236    let half_h = (fov_y * 0.5).tan();
237    let half_w = half_h * aspect;
238    let (wf, hf) = (w as f32, h as f32);
239
240    let cam_coords = |p: [f32; 3]| -> [f32; 3] {
241        let d = [p[0] - cam.pos[0], p[1] - cam.pos[1], p[2] - cam.pos[2]];
242        [
243            cam.right[0] * d[0] + cam.right[1] * d[1] + cam.right[2] * d[2],
244            cam.down[0] * d[0] + cam.down[1] * d[1] + cam.down[2] * d[2],
245            cam.forward[0] * d[0] + cam.forward[1] * d[1] + cam.forward[2] * d[2],
246        ]
247    };
248    // Camera-space point → (NDC xy, euclidean depth). NDC y is up (+1 top),
249    // matching WebGPU clip space; depth is the marcher's world-t metric.
250    let project = |q: [f32; 3]| -> ([f32; 2], f32) {
251        let inv = 1.0 / q[2];
252        let nx = q[0] * inv / half_w;
253        let ny = -q[1] * inv / half_h;
254        let depth = (q[0] * q[0] + q[1] * q[1] + q[2] * q[2]).sqrt();
255        ([nx, ny], depth)
256    };
257
258    let mut out = Vec::with_capacity(lines.len() * 6);
259    for line in lines {
260        let ca = cam_coords(line.a);
261        let cb = cam_coords(line.b);
262        let (cfa, cfb) = (ca[2], cb[2]);
263        if cfa < LINE_NEAR_Z && cfb < LINE_NEAR_Z {
264            continue;
265        }
266        // Near-clip in segment-parameter space on the forward component.
267        let (mut t0, mut t1) = (0.0f32, 1.0f32);
268        let dz = cfb - cfa;
269        if dz.abs() > f32::EPSILON {
270            let tn = (LINE_NEAR_Z - cfa) / dz;
271            if dz > 0.0 {
272                t0 = t0.max(tn);
273            } else {
274                t1 = t1.min(tn);
275            }
276        }
277        if t0 > t1 {
278            continue;
279        }
280        let lerp3 = |t: f32| {
281            [
282                ca[0] + (cb[0] - ca[0]) * t,
283                ca[1] + (cb[1] - ca[1]) * t,
284                ca[2] + (cb[2] - ca[2]) * t,
285            ]
286        };
287        let (n0, d0) = project(lerp3(t0));
288        let (n1, d1) = project(lerp3(t1));
289
290        // Expand in pixel space for a uniform screen-space thickness.
291        let to_px = |n: [f32; 2]| [(n[0] * 0.5 + 0.5) * wf, (0.5 - n[1] * 0.5) * hf];
292        let to_ndc = |p: [f32; 2]| [p[0] / wf * 2.0 - 1.0, 1.0 - p[1] / hf * 2.0];
293        let p0 = to_px(n0);
294        let p1 = to_px(n1);
295        let (dx, dy) = (p1[0] - p0[0], p1[1] - p0[1]);
296        let len = (dx * dx + dy * dy).sqrt().max(1e-6);
297        let half = line.width_px.max(1.0) * 0.5;
298        let (ex, ey) = (-dy / len * half, dx / len * half);
299
300        let c0a = to_ndc([p0[0] + ex, p0[1] + ey]);
301        let c0b = to_ndc([p0[0] - ex, p0[1] - ey]);
302        let c1a = to_ndc([p1[0] + ex, p1[1] + ey]);
303        let c1b = to_ndc([p1[0] - ex, p1[1] - ey]);
304        let dt = if line.depth_test { 1.0 } else { 0.0 };
305        // Mirror the overlay's NDC x to match the flipped scene blit.
306        let vert = |pos: [f32; 2], depth: f32| LineVertex {
307            pos: [if flip_x { -pos[0] } else { pos[0] }, pos[1]],
308            depth,
309            depth_test: dt,
310            color: line.color,
311        };
312        // Two triangles, cull disabled so winding is irrelevant.
313        out.push(vert(c0a, d0));
314        out.push(vert(c0b, d0));
315        out.push(vert(c1a, d1));
316        out.push(vert(c1a, d1));
317        out.push(vert(c0b, d0));
318        out.push(vert(c1b, d1));
319    }
320    out
321}
322
323/// A world-space 2D image-sprite quad for [`GpuRenderer::draw_images_deferred`].
324/// `corners` are the four world points `TL, TR, BL, BR` (UVs `(0,0) (1,0)
325/// (0,1) (1,1)`); `image` indexes a texture uploaded via
326/// [`GpuRenderer::upload_image`]; `tint` is straight RGBA in `0..=1`
327/// (multiplied into every texel); `depth_test` occludes the quad behind
328/// nearer marched geometry. The facade resolves orientation + back-face
329/// culling, so this is pure geometry.
330#[derive(Clone, Copy, Debug)]
331pub struct GpuImageQuad {
332    pub corners: [[f32; 3]; 4],
333    pub image: usize,
334    pub tint: [f32; 4],
335    pub depth_test: bool,
336    /// Texels with alpha below this (`0..=1`) are discarded in the FS.
337    /// `0.0` keeps the plain over-blend.
338    pub alpha_cutoff: f32,
339}
340
341/// One expanded textured-quad vertex (`build_image_vertices` output).
342/// `ndc` is the projected NDC xy; `w` is the source `forward` depth, fed
343/// back into a homogeneous clip position so the rasterizer interpolates
344/// `uv` perspective-correctly; `depth` is the euclidean world distance
345/// (the marcher's `best_t`) for the manual depth test.
346#[repr(C)]
347#[derive(Clone, Copy, Pod, Zeroable)]
348struct ImageVertex {
349    ndc: [f32; 2],
350    w: f32,
351    depth: f32,
352    depth_test: f32,
353    cutoff: f32,
354    uv: [f32; 2],
355    tint: [f32; 4],
356}
357
358/// Lazy-built image-sprite pipeline (mirrors [`LineResources`]). The
359/// per-draw bind group adds the quad's texture + a sampler to the line
360/// pass's uniform + scene-depth bindings.
361struct ImageResources {
362    pipeline: wgpu::RenderPipeline,
363    bgl: wgpu::BindGroupLayout,
364    uniform_buf: wgpu::Buffer,
365    dummy_depth: wgpu::Buffer,
366    sampler: wgpu::Sampler,
367}
368
369/// A retained image-sprite texture (uploaded via
370/// [`GpuRenderer::upload_image`], referenced by [`GpuImageQuad::image`]).
371struct ImageResident {
372    view: wgpu::TextureView,
373    // Held so the view stays valid + the texture shows in profiler dumps.
374    _texture: wgpu::Texture,
375}
376
377/// Camera-space textured-quad vertex (near-clip working set): the
378/// `(right, down, forward)` components + the texture `uv`.
379#[derive(Clone, Copy)]
380struct ImgClipV {
381    cam: [f32; 3],
382    uv: [f32; 2],
383}
384
385/// Clip a convex camera-space polygon against the near plane
386/// (`forward >= LINE_NEAR_Z`), interpolating UVs at each crossing.
387fn clip_near_image(poly: &[ImgClipV]) -> Vec<ImgClipV> {
388    let n = poly.len();
389    let mut out: Vec<ImgClipV> = Vec::with_capacity(n + 1);
390    for i in 0..n {
391        let cur = poly[i];
392        let prev = poly[(i + n - 1) % n];
393        let cur_in = cur.cam[2] >= LINE_NEAR_Z;
394        let prev_in = prev.cam[2] >= LINE_NEAR_Z;
395        if cur_in != prev_in {
396            let t = (LINE_NEAR_Z - prev.cam[2]) / (cur.cam[2] - prev.cam[2]);
397            out.push(ImgClipV {
398                cam: [
399                    prev.cam[0] + (cur.cam[0] - prev.cam[0]) * t,
400                    prev.cam[1] + (cur.cam[1] - prev.cam[1]) * t,
401                    LINE_NEAR_Z,
402                ],
403                uv: [
404                    prev.uv[0] + (cur.uv[0] - prev.uv[0]) * t,
405                    prev.uv[1] + (cur.uv[1] - prev.uv[1]) * t,
406                ],
407            });
408        }
409        if cur_in {
410            out.push(cur);
411        }
412    }
413    out
414}
415
416/// Project + near-clip a world-space [`GpuImageQuad`] into perspective-correct
417/// textured-quad vertices for `image.wgsl`. Mirrors the scene-DDA pinhole
418/// (the same one [`build_line_vertices`] uses), carrying each vertex's
419/// euclidean world distance as the depth-test key. Quads fully behind the
420/// near plane produce no vertices.
421fn build_image_vertices(
422    cam: &GpuLineCamera,
423    quad: &GpuImageQuad,
424    w: u32,
425    h: u32,
426    fov_y: f32,
427    flip_x: bool,
428) -> Vec<ImageVertex> {
429    let aspect = w as f32 / h as f32;
430    let half_h = (fov_y * 0.5).tan();
431    let half_w = half_h * aspect;
432    let dt = if quad.depth_test { 1.0 } else { 0.0 };
433
434    let cam_coords = |p: [f32; 3]| -> [f32; 3] {
435        let d = [p[0] - cam.pos[0], p[1] - cam.pos[1], p[2] - cam.pos[2]];
436        [
437            cam.right[0] * d[0] + cam.right[1] * d[1] + cam.right[2] * d[2],
438            cam.down[0] * d[0] + cam.down[1] * d[1] + cam.down[2] * d[2],
439            cam.forward[0] * d[0] + cam.forward[1] * d[1] + cam.forward[2] * d[2],
440        ]
441    };
442    let project = |v: ImgClipV| -> ImageVertex {
443        let (cx, cy, cz) = (v.cam[0], v.cam[1], v.cam[2]);
444        let nx = cx / (cz * half_w);
445        ImageVertex {
446            // Mirror NDC x to match the flipped scene blit.
447            ndc: [if flip_x { -nx } else { nx }, -cy / (cz * half_h)],
448            w: cz,
449            depth: (cx * cx + cy * cy + cz * cz).sqrt(),
450            depth_test: dt,
451            cutoff: quad.alpha_cutoff,
452            uv: v.uv,
453            tint: quad.tint,
454        }
455    };
456
457    // Per-corner UV: TL(0,0) TR(1,0) BL(0,1) BR(1,1).
458    let uvs = [[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0]];
459    let verts: Vec<ImgClipV> = quad
460        .corners
461        .iter()
462        .zip(uvs)
463        .map(|(c, uv)| ImgClipV {
464            cam: cam_coords(*c),
465            uv,
466        })
467        .collect();
468
469    let mut out = Vec::with_capacity(12);
470    for tri in [[0usize, 1, 2], [1, 3, 2]] {
471        let poly = [verts[tri[0]], verts[tri[1]], verts[tri[2]]];
472        let clipped = clip_near_image(&poly);
473        if clipped.len() < 3 {
474            continue;
475        }
476        for i in 1..clipped.len() - 1 {
477            out.push(project(clipped[0]));
478            out.push(project(clipped[i]));
479            out.push(project(clipped[i + 1]));
480        }
481    }
482    out
483}
484
485pub struct GpuRenderer {
486    surface: wgpu::Surface<'static>,
487    surface_config: wgpu::SurfaceConfiguration,
488    device: wgpu::Device,
489    queue: wgpu::Queue,
490    adapter_info: String,
491    clear_colour: [f64; 3],
492    frame_count: u32,
493    /// Mirror the marched scene horizontally on present (the scene blit
494    /// samples `width-1-x`, and line/image overlays mirror their NDC x).
495    /// The egui pass is unaffected. See [`Self::set_flip_x`].
496    flip_x: bool,
497    /// Lazy-built on first [`Self::render_chunk`] call; rebuilt when
498    /// the swapchain resizes (storage texture must match).
499    chunk_dda: Option<ChunkDdaResources>,
500    /// Lazy-built on first [`Self::render_grid`] call; same resize
501    /// trigger as `chunk_dda`. The two paths share the same blit
502    /// pipeline structure but bind different storage layouts.
503    grid_dda: Option<GridDdaResources>,
504    /// Lazy-built on first [`Self::render_scene`] call. Holds the
505    /// multi-grid pipeline + per-grid camera uniforms.
506    scene_dda: Option<SceneDdaResources>,
507    /// Whether the *current* deferred frame ran a scene pass that wrote
508    /// `scene_dda.depth_buffer`. [`Self::render_scene`] sets it; the
509    /// color-only [`Self::render_clear_deferred`] clears it. Without this,
510    /// depth-tested overlays (`draw_lines_deferred` / `draw_image`) drawn
511    /// over an empty/cleared scene would test against the *previous*
512    /// scene's stale depth and clip incorrectly.
513    scene_depth_valid: bool,
514    /// GPU.8 — panoramic sky texture + sampler. Created at
515    /// `new` as a 1×1 mid-grey default; [`Self::set_sky_panorama`]
516    /// replaces it. The scene-DDA bind group references this each
517    /// frame.
518    sky_texture: wgpu::Texture,
519    sky_view: wgpu::TextureView,
520    sky_sampler: wgpu::Sampler,
521    /// GPU.8 fog state. `color` is BGRA-style premultiplied (each
522    /// channel in [0, 1]); `near` is the world-t distance at which
523    /// fog starts kicking in; `far` is the distance at which it's
524    /// fully opaque. The shader does
525    /// `mix(hit, fog, smoothstep(near, far, t))`.
526    fog_color: [f32; 3],
527    fog_near: f32,
528    fog_far: f32,
529    /// GPU.10 — sprites rendered as DDA-marched voxel models (the
530    /// precise path; the GPU.9 compute splatter it replaced was
531    /// retired in 10.5). Holds the concatenated model registry + the
532    /// per-frame instance array; set via [`Self::set_sprite_instances`].
533    sprite_registry: Option<sprite_model::SpriteRegistryResident>,
534    /// Lazy-built pipeline + uniform for the model-DDA pass.
535    sprite_model_dda: Option<SpriteModelDdaResources>,
536    /// GPU.10.4 — LOD aggressiveness: step a sprite to the next mip
537    /// once a mip-0 voxel projects below this many screen pixels.
538    /// Defaults to 4.0 (the empirical sweet spot); the host can tune
539    /// via [`Self::set_sprite_lod_px`].
540    sprite_lod_px: f32,
541    /// GPU.11.1 — scene-grid LOD scan distance (world units). A chunk
542    /// entered at world-t `t` is marched at the mip level
543    /// `floor(log2(max(t, msd) / msd))`, clamped to the grid's mip
544    /// ladder. `0` disables LOD (always mip-0). Tunable via
545    /// [`Self::set_scene_mip_scan_dist`] — the axis-aligned-mip-beams
546    /// mitigation (GPU.11.2) pushes it outward if banding appears.
547    scene_mip_scan_dist: f32,
548    /// Per-face grid side-shades (voxlap setsideshades), packed for the
549    /// scene-DDA uniform: `[0]=(top,bot,left,right)`, `[1]=(up,down,_,_)`.
550    /// Each is the u8 shade intensity. `[[0;4];2]` = no shading. Set via
551    /// [`Self::set_scene_side_shades`].
552    scene_side_shades: [[i32; 4]; 2],
553    /// Vertical FOV (radians) the last `render_scene` marched with —
554    /// cached so [`Self::pixel_ray`] reconstructs the matching view ray
555    /// for picking. `0` until the first scene render.
556    last_fov_y_rad: f32,
557    /// The acquired-but-not-yet-presented swapchain frame from the most
558    /// recent deferred render ([`Self::render_scene`] /
559    /// [`Self::render_clear_deferred`]). [`Self::present`] shows it as
560    /// is; [`Self::paint_egui`] overlays egui first. Lets a host slot a
561    /// UI pass between the marcher and present. `None` between present
562    /// and the next render.
563    pending_frame: Option<(wgpu::SurfaceTexture, wgpu::TextureView)>,
564    /// Lazy-built debug-line pipeline (L3.2) — built on the first
565    /// [`Self::draw_lines_deferred`] call.
566    line_resources: Option<LineResources>,
567    /// Persistent debug-line vertex buffer (L3.3) — grown on demand and
568    /// reused across frames so a per-frame overlay (hundreds of segments)
569    /// costs one `write_buffer`, not a fresh allocation. `line_vbuf_cap`
570    /// is its capacity in bytes.
571    line_vbuf: Option<wgpu::Buffer>,
572    line_vbuf_cap: u64,
573    /// Lazy-built image-sprite pipeline — built on the first
574    /// [`Self::draw_images_deferred`] call.
575    image_resources: Option<ImageResources>,
576    /// Persistent image-sprite vertex buffer, grown on demand and reused
577    /// across frames (like [`Self::line_vbuf`]).
578    image_vbuf: Option<wgpu::Buffer>,
579    image_vbuf_cap: u64,
580    /// Retained image-sprite textures, indexed by the id
581    /// [`Self::upload_image`] returns. A dropped slot is `None` and is
582    /// re-used by a later upload.
583    images: Vec<Option<ImageResident>>,
584    /// Lazy-built `egui-wgpu` paint pipeline; created on the first
585    /// [`Self::paint_egui`] call (`hud` feature).
586    #[cfg(feature = "hud")]
587    egui_renderer: Option<egui_wgpu::Renderer>,
588}
589
590/// Per-renderer chunk-DDA pipeline state. The compute shader writes
591/// into the storage texture; a fullscreen-triangle render pass
592/// nearest-neighbour blits it to the swapchain.
593struct ChunkDdaResources {
594    storage_size: (u32, u32),
595    storage_view: wgpu::TextureView,
596    uniform_buf: wgpu::Buffer,
597    bgl_dda: wgpu::BindGroupLayout,
598    pipeline_dda: wgpu::ComputePipeline,
599    blit_bg: wgpu::BindGroup,
600    pipeline_blit: wgpu::RenderPipeline,
601    // wgpu BindGroups internally Arc their resources, but we keep
602    // the handle so the sampler shows up in profiler dumps.
603    _sampler: wgpu::Sampler,
604}
605
606struct GridDdaResources {
607    storage_size: (u32, u32),
608    storage_view: wgpu::TextureView,
609    uniform_buf: wgpu::Buffer,
610    bgl_dda: wgpu::BindGroupLayout,
611    pipeline_dda: wgpu::ComputePipeline,
612    blit_bg: wgpu::BindGroup,
613    pipeline_blit: wgpu::RenderPipeline,
614    _sampler: wgpu::Sampler,
615}
616
617struct SceneDdaResources {
618    storage_size: (u32, u32),
619    /// Framebuffer as a packed-`rgba8unorm` storage **buffer** (row
620    /// stride = width), written by the scene + sprite compute passes
621    /// and read by the blit. A buffer (not a storage texture) dodges
622    /// Chrome-Dawn's tiled write-texture layout (which produced a
623    /// 128×256-tiled image); linear + explicit stride is portable.
624    framebuffer: wgpu::Buffer,
625    uniform_buf: wgpu::Buffer,
626    bgl_dda: wgpu::BindGroupLayout,
627    pipeline_dda: wgpu::ComputePipeline,
628    blit_bg: wgpu::BindGroup,
629    pipeline_blit: wgpu::RenderPipeline,
630    /// Blit uniform: `[width, height, flip_x, _pad]`. Retained so the flip
631    /// flag (offset 8) can be re-written per frame.
632    blit_dims: wgpu::Buffer,
633    /// GPU.9 — per-pixel world-t depth (f32 bits as u32), sized
634    /// `width * height * 4`. The scene pass writes it when sprites
635    /// are present; the sprite model-DDA pass reads + composites
636    /// against it.
637    depth_buffer: wgpu::Buffer,
638    /// Picking — a `COPY_DST | MAP_READ` staging copy of `depth_buffer`
639    /// so the host can read back the per-pixel world-t after a frame
640    /// (e.g. click → which voxel). Same size as `depth_buffer`.
641    depth_readback: wgpu::Buffer,
642}
643
644/// GPU.10.0 — single-sprite model-DDA pipeline: one thread per pixel
645/// marches the model voxel volume and composites against the scene
646/// depth buffer.
647struct SpriteModelDdaResources {
648    bgl: wgpu::BindGroupLayout,
649    pipeline: wgpu::ComputePipeline,
650    uniform_buf: wgpu::Buffer,
651}
652
653/// Per-frame uniform for the model-DDA pass. Mirrors `Uniform` in
654/// `sprite_model_dda.wgsl` (std140). Per-model + per-instance data
655/// now live in storage buffers; this holds only the camera, fog, and
656/// instance count.
657#[repr(C)]
658#[derive(Clone, Copy, Pod, Zeroable)]
659struct SpriteModelUniform {
660    cam_pos: [f32; 3],
661    _p0: f32,
662    cam_right: [f32; 3],
663    _p1: f32,
664    cam_down: [f32; 3],
665    _p2: f32,
666    cam_forward: [f32; 3],
667    _p3: f32,
668    fog_color: [f32; 4],
669    screen_size: [u32; 2],
670    instance_count: u32,
671    fog_far: f32,
672    fov_y_rad: f32,
673    tiles_x: u32,
674    tile_size: u32,
675    _p6: f32,
676}
677
678/// GPU.10.3 — sprite screen-tile edge in pixels for instance binning.
679const SPRITE_TILE_SIZE: u32 = 16;
680
681/// Build the per-grid camera storage buffer bound at `scene_dda.wgsl`
682/// binding 15 (read-only). One [`SceneDdaPerGridCamera`] per grid; the
683/// shader only indexes `0..grid_count`. An empty scene pads to one
684/// zeroed element (wgpu rejects a zero-sized storage binding). This
685/// replaces the old fixed `[…; 16]` uniform array, so a scene can hold
686/// any number of grids — the only ceiling is the device's storage size.
687fn upload_grid_cameras(device: &wgpu::Device, cams: &[SceneDdaPerGridCamera]) -> wgpu::Buffer {
688    use wgpu::util::DeviceExt;
689    let one = [SceneDdaPerGridCamera::zeroed()];
690    let src: &[SceneDdaPerGridCamera] = if cams.is_empty() { &one } else { cams };
691    device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
692        label: Some("roxlap-gpu scene_dda.grid_cameras"),
693        contents: bytemuck::cast_slice(src),
694        usage: wgpu::BufferUsages::STORAGE,
695    })
696}
697
698// The scene_dda bind group + layout wire occupancy pages 1..=3 at
699// bindings 12..=14 explicitly; keep that in lockstep with the page
700// count. Bump the bindings (here, in the WGSL, and in the bind
701// group) if MAX_OCC_PAGES changes.
702const _: () = assert!(scene::MAX_OCC_PAGES == 4);
703
704#[repr(C)]
705#[derive(Clone, Copy, Pod, Zeroable)]
706struct SceneDdaPerGridCamera {
707    pos: [f32; 3],
708    _pad0: f32,
709    right: [f32; 3],
710    _pad1: f32,
711    down: [f32; 3],
712    _pad2: f32,
713    forward: [f32; 3],
714    _pad3: f32,
715}
716
717impl SceneDdaPerGridCamera {
718    fn from_camera(c: &Camera) -> Self {
719        Self {
720            pos: c.position,
721            _pad0: 0.0,
722            right: c.right,
723            _pad1: 0.0,
724            down: c.down,
725            _pad2: 0.0,
726            forward: c.forward,
727            _pad3: 0.0,
728        }
729    }
730}
731
732#[repr(C)]
733#[derive(Clone, Copy, Pod, Zeroable)]
734struct SceneDdaUniform {
735    fov_y_rad: f32,
736    grid_count: u32,
737    max_outer_steps: u32,
738    _pad0: u32,
739    screen_size: [u32; 2],
740    _pad1: [u32; 2],
741    /// GPU.8 — `[r, g, b, fog_near]`. The `near` distance is packed
742    /// into the colour's alpha channel to keep std140 alignment
743    /// tidy (a bare `f32` after the `vec4` would force extra pads).
744    fog_color: [f32; 4],
745    fog_far: f32,
746    /// GPU.9 — `1` when the sprite pass is active (scene pass then
747    /// records `best_t` into the depth buffer), `0` otherwise.
748    write_depth: u32,
749    /// Occupancy paging: words per storage page (see
750    /// `scene::split_occupancy_pages`). Only consulted by the shader
751    /// when `occ_num_pages > 1`.
752    occ_page_words: u32,
753    /// Number of real occupancy pages (1 on multi-GiB GPUs → the
754    /// shader takes a branch-free single-page read).
755    occ_num_pages: u32,
756    /// GPU.11.1 — scene-grid LOD scan distance (world units). A chunk
757    /// entered at world-t `t` marches at mip
758    /// `floor(log2(max(t, msd) / msd))`, clamped to the grid's mip
759    /// count. `0` disables LOD (always mip-0).
760    mip_scan_dist: f32,
761    _pad2: u32,
762    _pad3: u32,
763    _pad4: u32,
764    /// World camera used only to derive the per-pixel sky direction —
765    /// always valid, so a `grid_count == 0` (sprite-only / empty) scene
766    /// still paints a proper sky instead of a degenerate `(0,0,1)`
767    /// (whose `atan2(0,0)` sky lookup samples black).
768    sky_cam: SceneDdaPerGridCamera,
769    /// Per-face side-shade intensities (voxlap setsideshades), each the
770    /// u8 shade subtracted from a voxel's brightness byte at a hit.
771    /// `side_shades0 = (top, bot, left, right)`,
772    /// `side_shades1 = (up, down, _, _)`. All-zero = no shading.
773    side_shades0: [i32; 4],
774    side_shades1: [i32; 4],
775}
776
777#[repr(C)]
778#[derive(Clone, Copy, Pod, Zeroable)]
779struct GridDdaUniform {
780    camera_pos: [f32; 3],
781    _pad0: f32,
782    camera_right: [f32; 3],
783    _pad1: f32,
784    camera_down: [f32; 3],
785    _pad2: f32,
786    camera_forward: [f32; 3],
787    fov_y_rad: f32,
788    screen_size: [u32; 2],
789    vsid: u32,
790    max_outer_steps: u32,
791    chunks_dims: [u32; 3],
792    _pad3: u32,
793    origin_chunk: [i32; 3],
794    _pad4: u32,
795}
796
797#[repr(C)]
798#[derive(Clone, Copy, Pod, Zeroable)]
799struct ChunkDdaUniform {
800    camera_pos: [f32; 3],
801    _pad0: f32,
802    camera_right: [f32; 3],
803    _pad1: f32,
804    camera_down: [f32; 3],
805    _pad2: f32,
806    camera_forward: [f32; 3],
807    fov_y_rad: f32,
808    screen_size: [u32; 2],
809    vsid: u32,
810    max_scan_dist: u32,
811}
812
813impl GpuRenderer {
814    /// Stand up the device + surface + swapchain on `window`. Async
815    /// because `wgpu::Adapter`/`Device` requests are.
816    ///
817    /// `window` is any [`raw-window-handle`] provider (winit, SDL,
818    /// GLFW, …) wrapped in an `Arc`; `size` is its initial physical
819    /// framebuffer size in pixels — passed explicitly so the renderer
820    /// stays decoupled from any one windowing library's size API.
821    ///
822    /// [`raw-window-handle`]: raw_window_handle
823    ///
824    /// # Errors
825    /// Returns [`GpuInitError`] if surface creation, adapter
826    /// selection, or device request fails. Hosts treat any error as
827    /// "fall back to the CPU path".
828    pub async fn new<W>(
829        window: Arc<W>,
830        size: (u32, u32),
831        settings: GpuRendererSettings,
832    ) -> Result<Self, GpuInitError>
833    where
834        W: HasWindowHandle + HasDisplayHandle + Send + Sync + 'static,
835    {
836        let instance = wgpu::Instance::new(wgpu::InstanceDescriptor::new_without_display_handle());
837        let surface = instance.create_surface(window.clone())?;
838        let adapter = Self::request_adapter(&instance, Some(&surface), settings).await?;
839        let (device, queue) = Self::request_device(&adapter).await?;
840        Ok(Self::finish_init(
841            &adapter, device, queue, surface, size, settings,
842        ))
843    }
844
845    /// wasm/WebGPU: build the renderer against an HTML `canvas`. No
846    /// `Send + Sync` bound — wgpu's surface/device/queue are `!Send` on
847    /// the `+atomics` shared-memory wasm build, and the browser host is
848    /// single-threaded (`Rc<RefCell<…>>`). The native generic-`W` entry
849    /// (which carries the bound) isn't reachable on wasm.
850    ///
851    /// Probes for an adapter **before** `create_surface`: on wasm,
852    /// creating the surface calls `canvas.getContext("webgpu")`, which
853    /// permanently locks the canvas's context type. If we bound it and
854    /// then found no adapter, a CPU/WebGL2 fallback on the *same* canvas
855    /// (the facade clones the handle, but it's the same DOM element)
856    /// would fail with "no webgl2 context". Probing first leaves the
857    /// canvas pristine when WebGPU is unavailable.
858    ///
859    /// # Errors
860    /// See [`Self::new`].
861    #[cfg(target_arch = "wasm32")]
862    pub async fn new_from_canvas(
863        canvas: web_sys::HtmlCanvasElement,
864        size: (u32, u32),
865        settings: GpuRendererSettings,
866    ) -> Result<Self, GpuInitError> {
867        let instance = wgpu::Instance::new(wgpu::InstanceDescriptor::new_without_display_handle());
868        // Probe adapter AND device before binding the canvas — both
869        // `requestAdapter` and `requestDevice` can fail on wasm, and
870        // `create_surface` permanently locks the canvas to a WebGPU
871        // context. Creating the surface last keeps the canvas pristine
872        // for the CPU/WebGL2 fallback on any GPU-init failure.
873        let adapter = Self::request_adapter(&instance, None, settings).await?;
874        let (device, queue) = Self::request_device(&adapter).await?;
875        let surface = instance.create_surface(wgpu::SurfaceTarget::Canvas(canvas))?;
876        Ok(Self::finish_init(
877            &adapter, device, queue, surface, size, settings,
878        ))
879    }
880
881    /// Pick a GPU adapter at the settings' power preference. `None`
882    /// `compatible_surface` is used on the wasm canvas path so the probe
883    /// doesn't bind the canvas's context (see [`Self::new_from_canvas`]);
884    /// WebGPU exposes a single surface-independent adapter, so this is
885    /// safe there.
886    async fn request_adapter(
887        instance: &wgpu::Instance,
888        compatible_surface: Option<&wgpu::Surface<'static>>,
889        settings: GpuRendererSettings,
890    ) -> Result<wgpu::Adapter, GpuInitError> {
891        let power_preference = match settings.power_preference {
892            PowerPreference::Low => wgpu::PowerPreference::LowPower,
893            PowerPreference::High => wgpu::PowerPreference::HighPerformance,
894        };
895        instance
896            .request_adapter(&wgpu::RequestAdapterOptions {
897                power_preference,
898                compatible_surface,
899                force_fallback_adapter: false,
900            })
901            .await
902            .map_err(|_| GpuInitError::NoAdapter)
903    }
904
905    /// Request the device + queue from `adapter`. Pulled out of
906    /// [`Self::finish_init`] so the wasm canvas path can validate the
907    /// device **before** `create_surface` binds the canvas's WebGPU
908    /// context — if the device request fails (e.g. a browser that
909    /// rejects a wgpu-sent limit), the canvas stays pristine for the
910    /// CPU/WebGL2 fallback instead of being poisoned.
911    async fn request_device(
912        adapter: &wgpu::Adapter,
913    ) -> Result<(wgpu::Device, wgpu::Queue), GpuInitError> {
914        Ok(adapter
915            .request_device(&wgpu::DeviceDescriptor {
916                label: Some("roxlap-gpu device"),
917                required_features: wgpu::Features::empty(),
918                required_limits: pick_required_limits(&adapter.limits()),
919                experimental_features: wgpu::ExperimentalFeatures::disabled(),
920                memory_hints: wgpu::MemoryHints::default(),
921                trace: wgpu::Trace::Off,
922            })
923            .await?)
924    }
925
926    /// Shared swapchain → sky/sampler setup, run after the adapter +
927    /// device + surface exist (the surface comes from a window handle on
928    /// native, or an HTML canvas on wasm — created last on wasm so a
929    /// failed device request never touches the canvas).
930    fn finish_init(
931        adapter: &wgpu::Adapter,
932        device: wgpu::Device,
933        queue: wgpu::Queue,
934        surface: wgpu::Surface<'static>,
935        size: (u32, u32),
936        settings: GpuRendererSettings,
937    ) -> Self {
938        let info = adapter.get_info();
939        let adapter_info = format!(
940            "{name} ({backend:?}, {device_type:?})",
941            name = info.name,
942            backend = info.backend,
943            device_type = info.device_type,
944        );
945
946        let caps = surface.get_capabilities(adapter);
947        // Pick a NON-sRGB, 8-bit swapchain format. Voxlap colours are
948        // already sRGB-encoded (the slab bytes are display-ready,
949        // matching what the CPU softbuffer path writes straight to the
950        // framebuffer with no conversion); an sRGB swapchain would
951        // re-apply the gamma curve, washing the look out. We also
952        // *prefer 8-bit BGRA/RGBA* over any other non-sRGB format: some
953        // adapters (e.g. NVK) advertise a 16-bit-unorm format first,
954        // and wgpu 29 gates `create_view` on 16-bit-norm formats behind
955        // the `TEXTURE_FORMAT_16BIT_NORM` device feature (which we don't
956        // enable, to stay WebGPU-portable). Falls back to the first
957        // non-sRGB format, then `caps.formats[0]`.
958        let surface_format = caps
959            .formats
960            .iter()
961            .copied()
962            .find(|f| {
963                matches!(
964                    f,
965                    wgpu::TextureFormat::Bgra8Unorm | wgpu::TextureFormat::Rgba8Unorm
966                )
967            })
968            .or_else(|| caps.formats.iter().copied().find(|f| !f.is_srgb()))
969            .unwrap_or(caps.formats[0]);
970        let present_mode = if settings.uncapped_present {
971            pick_present_mode(&caps.present_modes)
972        } else {
973            wgpu::PresentMode::Fifo
974        };
975        // GPU.11.2 — surface the present mode: `Fifo` is vsync-capped
976        // (FPS pinned to refresh rate → compute optimisations like the
977        // mip LOD won't show up in the FPS counter). Mailbox/Immediate
978        // are uncapped. Wayland under Mesa frequently offers only Fifo.
979        eprintln!(
980            "roxlap-gpu: present mode = {present_mode:?} (available: {:?})",
981            caps.present_modes,
982        );
983        let (init_w, init_h) = size;
984        let surface_config = wgpu::SurfaceConfiguration {
985            usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
986            format: surface_format,
987            width: init_w.max(1),
988            height: init_h.max(1),
989            present_mode,
990            alpha_mode: caps.alpha_modes[0],
991            view_formats: vec![],
992            desired_maximum_frame_latency: 2,
993        };
994        surface.configure(&device, &surface_config);
995
996        // GPU.8 default sky: a 1×1 mid-grey texture. Hosts replace
997        // it via `set_sky_panorama` with a real equirectangular
998        // panorama; the default stops the shader sampling
999        // uninitialised memory before that happens.
1000        let default_sky_pixel = [0x80u8, 0x80, 0x80, 0xff];
1001        let (sky_texture, sky_view) = create_sky_texture(&device, 1, 1, &default_sky_pixel);
1002        queue.write_texture(
1003            wgpu::TexelCopyTextureInfo {
1004                texture: &sky_texture,
1005                mip_level: 0,
1006                origin: wgpu::Origin3d::ZERO,
1007                aspect: wgpu::TextureAspect::All,
1008            },
1009            &default_sky_pixel,
1010            wgpu::TexelCopyBufferLayout {
1011                offset: 0,
1012                bytes_per_row: Some(4),
1013                rows_per_image: Some(1),
1014            },
1015            wgpu::Extent3d {
1016                width: 1,
1017                height: 1,
1018                depth_or_array_layers: 1,
1019            },
1020        );
1021        let sky_sampler = device.create_sampler(&wgpu::SamplerDescriptor {
1022            label: Some("roxlap-gpu sky_sampler"),
1023            // Voxlap-convention panorama: u = elevation [0, 1]
1024            // (Repeat is a no-op since values don't go outside),
1025            // v = azimuth (wraps 360° — Repeat is required).
1026            address_mode_u: wgpu::AddressMode::Repeat,
1027            address_mode_v: wgpu::AddressMode::Repeat,
1028            address_mode_w: wgpu::AddressMode::ClampToEdge,
1029            mag_filter: wgpu::FilterMode::Linear,
1030            min_filter: wgpu::FilterMode::Linear,
1031            mipmap_filter: wgpu::MipmapFilterMode::Nearest,
1032            ..Default::default()
1033        });
1034
1035        Self {
1036            surface,
1037            surface_config,
1038            device,
1039            queue,
1040            adapter_info,
1041            clear_colour: settings.clear_colour,
1042            frame_count: 0,
1043            flip_x: false,
1044            chunk_dda: None,
1045            grid_dda: None,
1046            scene_dda: None,
1047            scene_depth_valid: false,
1048            sky_texture,
1049            sky_view,
1050            sky_sampler,
1051            // Fog disabled by default — voxlap's CPU rasterizer
1052            // also runs without fog in the scene-demo, so matching
1053            // it means no GPU fog out of the box. Hosts can opt in
1054            // via `set_fog` (e.g. for atmospheric far-LOD masking).
1055            fog_color: [0.66, 0.74, 0.88],
1056            fog_near: 0.0,
1057            fog_far: 1.0e30,
1058            sprite_registry: None,
1059            sprite_model_dda: None,
1060            // GPU.10.4 — default LOD threshold: step to a coarser mip
1061            // once a voxel projects below 4 px. Empirically the best
1062            // quality/cost tradeoff; the host can override.
1063            sprite_lod_px: 4.0,
1064            // GPU.11.1 — matches the CPU demo's mip_scan_dist=64.
1065            scene_mip_scan_dist: 64.0,
1066            scene_side_shades: [[0; 4]; 2],
1067            last_fov_y_rad: 0.0,
1068            pending_frame: None,
1069            line_resources: None,
1070            line_vbuf: None,
1071            line_vbuf_cap: 0,
1072            image_resources: None,
1073            image_vbuf: None,
1074            image_vbuf_cap: 0,
1075            images: Vec::new(),
1076            #[cfg(feature = "hud")]
1077            egui_renderer: None,
1078        }
1079    }
1080
1081    /// Synchronous wrapper for hosts that don't have an async
1082    /// runtime. Internally `pollster::block_on`s [`Self::new`].
1083    ///
1084    /// # Errors
1085    /// See [`Self::new`].
1086    #[cfg(not(target_arch = "wasm32"))]
1087    pub fn new_blocking<W>(
1088        window: Arc<W>,
1089        size: (u32, u32),
1090        settings: GpuRendererSettings,
1091    ) -> Result<Self, GpuInitError>
1092    where
1093        W: HasWindowHandle + HasDisplayHandle + Send + Sync + 'static,
1094    {
1095        pollster::block_on(Self::new(window, size, settings))
1096    }
1097
1098    /// Human-readable adapter description — name + backend +
1099    /// device type. The demo host prints this in the title bar.
1100    pub fn adapter_info(&self) -> &str {
1101        &self.adapter_info
1102    }
1103
1104    /// Borrow the underlying wgpu device — hosts use this to build
1105    /// chunk uploads (`GpuChunkResident::upload(gpu.device(), …)`).
1106    pub fn device(&self) -> &wgpu::Device {
1107        &self.device
1108    }
1109
1110    /// Borrow the wgpu queue — hosts use this for read-back paths
1111    /// (`GpuChunkResident::read_voxel_blocking(gpu.device(), gpu.queue(), …)`).
1112    pub fn queue(&self) -> &wgpu::Queue {
1113        &self.queue
1114    }
1115
1116    /// GPU.8 — upload an equirectangular panorama as the scene's
1117    /// sky texture. `rgba` is row-major, `width × height` pixels,
1118    /// 4 bytes per pixel (R, G, B, A). The shader samples it with
1119    /// `u = atan2(dir.x, dir.y) / (2π) + 0.5` (azimuth) and
1120    /// `v = acos(-dir.z) / π` (elevation), matching standard
1121    /// equirectangular layout (top of image = zenith for voxlap's
1122    /// `+z = down` basis).
1123    /// Mirror the marched scene (and its line/image overlays) horizontally
1124    /// on present, leaving the egui overlay upright. See [`Self::flip_x`].
1125    pub fn set_flip_x(&mut self, flip: bool) {
1126        self.flip_x = flip;
1127    }
1128
1129    ///
1130    /// # Panics
1131    /// If `rgba.len() != (width * height * 4) as usize`.
1132    pub fn set_sky_panorama(&mut self, rgba: &[u8], width: u32, height: u32) {
1133        assert_eq!(
1134            rgba.len(),
1135            (width as usize) * (height as usize) * 4,
1136            "set_sky_panorama: expected w*h*4 bytes, got {}",
1137            rgba.len(),
1138        );
1139        let (tex, view) = create_sky_texture(&self.device, width, height, rgba);
1140        // Upload pixel data via `queue.write_texture` so we don't
1141        // have to map the buffer manually.
1142        self.queue.write_texture(
1143            wgpu::TexelCopyTextureInfo {
1144                texture: &tex,
1145                mip_level: 0,
1146                origin: wgpu::Origin3d::ZERO,
1147                aspect: wgpu::TextureAspect::All,
1148            },
1149            rgba,
1150            wgpu::TexelCopyBufferLayout {
1151                offset: 0,
1152                bytes_per_row: Some(width * 4),
1153                rows_per_image: Some(height),
1154            },
1155            wgpu::Extent3d {
1156                width,
1157                height,
1158                depth_or_array_layers: 1,
1159            },
1160        );
1161        self.sky_texture = tex;
1162        self.sky_view = view;
1163    }
1164
1165    /// GPU.8 — set the fog blend. `color` is per-channel [0, 1];
1166    /// `near`/`far` are world-space ray distances in voxel units.
1167    /// Hits with `t < near` show their full colour; hits with
1168    /// `t > far` show `color` exclusively; in between is a
1169    /// smoothstep blend.
1170    pub fn set_fog(&mut self, color: [f32; 3], near: f32, far: f32) {
1171        self.fog_color = color;
1172        self.fog_near = near;
1173        self.fog_far = far.max(near + 1.0);
1174    }
1175
1176    /// Re-configure the swapchain to a new physical size. Call from
1177    /// `WindowEvent::Resized`. Drops the chunk-DDA storage texture
1178    /// so [`Self::render_chunk`] rebuilds it at the new size.
1179    pub fn resize(&mut self, width: u32, height: u32) {
1180        if width == 0 || height == 0 {
1181            return;
1182        }
1183        self.surface_config.width = width;
1184        self.surface_config.height = height;
1185        self.surface.configure(&self.device, &self.surface_config);
1186        self.chunk_dda = None;
1187        self.grid_dda = None;
1188        self.scene_dda = None;
1189    }
1190
1191    /// Acquire the next swapchain frame, or `None` to skip this frame.
1192    /// wgpu 29's `get_current_texture` returns a
1193    /// [`wgpu::CurrentSurfaceTexture`] status enum (was
1194    /// `Result<_, SurfaceError>`): an outdated/lost surface reconfigures
1195    /// and skips, transient statuses just skip.
1196    fn acquire_frame(&self) -> Option<wgpu::SurfaceTexture> {
1197        use wgpu::CurrentSurfaceTexture as C;
1198        match self.surface.get_current_texture() {
1199            C::Success(t) | C::Suboptimal(t) => Some(t),
1200            C::Outdated | C::Lost => {
1201                self.surface.configure(&self.device, &self.surface_config);
1202                None
1203            }
1204            C::Timeout | C::Occluded | C::Validation => None,
1205        }
1206    }
1207
1208    /// GPU.1 render: single render pass clearing the swapchain to a
1209    /// slowly drifting colour, then presenting. Voxels arrive in
1210    /// GPU.3+.
1211    pub fn render(&mut self) {
1212        let Some(surf_tex) = self.acquire_frame() else {
1213            return;
1214        };
1215        let view = surf_tex
1216            .texture
1217            .create_view(&wgpu::TextureViewDescriptor::default());
1218
1219        // Slow colour drift so the user can tell the GPU path is
1220        // actually presenting frames vs. e.g. a frozen window.
1221        // Wrap at 2π/0.005 frames (~1257) so the cast stays exact.
1222        let phase = f64::from(self.frame_count % 1257) * 0.005;
1223        let [r, g, b] = self.clear_colour;
1224        let drift = (phase.sin() * 0.04 + 0.04).clamp(0.0, 0.1);
1225        let clear = wgpu::Color {
1226            r: (r + drift).clamp(0.0, 1.0),
1227            g: (g + drift * 0.5).clamp(0.0, 1.0),
1228            b: (b + drift * 0.25).clamp(0.0, 1.0),
1229            a: 1.0,
1230        };
1231
1232        let mut encoder = self
1233            .device
1234            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
1235                label: Some("roxlap-gpu encoder"),
1236            });
1237        {
1238            let _rp = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
1239                label: Some("roxlap-gpu clear"),
1240                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
1241                    view: &view,
1242                    depth_slice: None,
1243                    resolve_target: None,
1244                    ops: wgpu::Operations {
1245                        load: wgpu::LoadOp::Clear(clear),
1246                        store: wgpu::StoreOp::Store,
1247                    },
1248                })],
1249                depth_stencil_attachment: None,
1250                timestamp_writes: None,
1251                occlusion_query_set: None,
1252                multiview_mask: None,
1253            });
1254        }
1255        self.queue.submit(std::iter::once(encoder.finish()));
1256        surf_tex.present();
1257        self.frame_count = self.frame_count.wrapping_add(1);
1258    }
1259
1260    /// GPU.3 single-chunk render. Dispatches `chunk_dda.wgsl`
1261    /// against `resident`'s storage buffers, then blits the
1262    /// low-res storage texture to the swapchain. `camera.position`
1263    /// is in **chunk-local** voxel units (host translates from
1264    /// world coords). `max_scan_dist` caps the per-pixel DDA loop —
1265    /// scene-demo wires `+` / `-` through this each frame.
1266    ///
1267    /// # Panics
1268    /// Internally `expect`s the chunk-DDA resources to be built —
1269    /// they are constructed at the top of this function if missing.
1270    /// Cannot fire in normal control flow.
1271    pub fn render_chunk(
1272        &mut self,
1273        resident: &GpuChunkResident,
1274        camera: &Camera,
1275        max_scan_dist: u32,
1276    ) {
1277        let Some(surf_tex) = self.acquire_frame() else {
1278            return;
1279        };
1280        let surf_view = surf_tex
1281            .texture
1282            .create_view(&wgpu::TextureViewDescriptor::default());
1283
1284        let surface_w = self.surface_config.width;
1285        let surface_h = self.surface_config.height;
1286        let surface_format = self.surface_config.format;
1287
1288        // Lazy-build chunk-DDA resources; rebuild when the swapchain
1289        // grew or shrank.
1290        let needs_build = match &self.chunk_dda {
1291            Some(r) => r.storage_size != (surface_w, surface_h),
1292            None => true,
1293        };
1294        if needs_build {
1295            self.chunk_dda = Some(self.build_chunk_dda(surface_w, surface_h, surface_format));
1296        }
1297        let dda = self.chunk_dda.as_ref().expect("just built");
1298
1299        // Update uniforms.
1300        let uniform = ChunkDdaUniform {
1301            camera_pos: camera.position,
1302            _pad0: 0.0,
1303            camera_right: camera.right,
1304            _pad1: 0.0,
1305            camera_down: camera.down,
1306            _pad2: 0.0,
1307            camera_forward: camera.forward,
1308            fov_y_rad: camera.fov_y_rad,
1309            screen_size: [surface_w, surface_h],
1310            vsid: resident.vsid,
1311            max_scan_dist,
1312        };
1313        self.queue
1314            .write_buffer(&dda.uniform_buf, 0, bytemuck::bytes_of(&uniform));
1315
1316        // Per-frame DDA bind group — references the chunk's buffers
1317        // so we rebuild every frame (the resident can change between
1318        // calls).
1319        let dda_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1320            label: Some("roxlap-gpu chunk_dda.bg"),
1321            layout: &dda.bgl_dda,
1322            entries: &[
1323                wgpu::BindGroupEntry {
1324                    binding: 0,
1325                    resource: dda.uniform_buf.as_entire_binding(),
1326                },
1327                wgpu::BindGroupEntry {
1328                    binding: 1,
1329                    resource: resident.occupancy.as_entire_binding(),
1330                },
1331                wgpu::BindGroupEntry {
1332                    binding: 2,
1333                    resource: resident.color_offsets.as_entire_binding(),
1334                },
1335                wgpu::BindGroupEntry {
1336                    binding: 3,
1337                    resource: resident.colors.as_entire_binding(),
1338                },
1339                wgpu::BindGroupEntry {
1340                    binding: 4,
1341                    resource: wgpu::BindingResource::TextureView(&dda.storage_view),
1342                },
1343            ],
1344        });
1345
1346        let mut encoder = self
1347            .device
1348            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
1349                label: Some("roxlap-gpu chunk encoder"),
1350            });
1351        {
1352            let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
1353                label: Some("roxlap-gpu chunk_dda compute"),
1354                timestamp_writes: None,
1355            });
1356            cpass.set_pipeline(&dda.pipeline_dda);
1357            cpass.set_bind_group(0, &dda_bg, &[]);
1358            cpass.dispatch_workgroups(surface_w.div_ceil(8), surface_h.div_ceil(8), 1);
1359        }
1360        {
1361            let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
1362                label: Some("roxlap-gpu chunk_dda blit"),
1363                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
1364                    view: &surf_view,
1365                    depth_slice: None,
1366                    resolve_target: None,
1367                    ops: wgpu::Operations {
1368                        load: wgpu::LoadOp::Clear(wgpu::Color::BLACK),
1369                        store: wgpu::StoreOp::Store,
1370                    },
1371                })],
1372                depth_stencil_attachment: None,
1373                timestamp_writes: None,
1374                occlusion_query_set: None,
1375                multiview_mask: None,
1376            });
1377            rpass.set_pipeline(&dda.pipeline_blit);
1378            rpass.set_bind_group(0, &dda.blit_bg, &[]);
1379            rpass.draw(0..3, 0..1);
1380        }
1381        self.queue.submit(std::iter::once(encoder.finish()));
1382        surf_tex.present();
1383        self.frame_count = self.frame_count.wrapping_add(1);
1384    }
1385
1386    fn build_chunk_dda(
1387        &self,
1388        width: u32,
1389        height: u32,
1390        surface_format: wgpu::TextureFormat,
1391    ) -> ChunkDdaResources {
1392        let storage_tex = self.device.create_texture(&wgpu::TextureDescriptor {
1393            label: Some("roxlap-gpu chunk_dda.storage"),
1394            size: wgpu::Extent3d {
1395                width,
1396                height,
1397                depth_or_array_layers: 1,
1398            },
1399            mip_level_count: 1,
1400            sample_count: 1,
1401            dimension: wgpu::TextureDimension::D2,
1402            format: wgpu::TextureFormat::Rgba8Unorm,
1403            usage: wgpu::TextureUsages::STORAGE_BINDING | wgpu::TextureUsages::TEXTURE_BINDING,
1404            view_formats: &[],
1405        });
1406        let storage_view = storage_tex.create_view(&wgpu::TextureViewDescriptor::default());
1407
1408        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
1409            label: Some("roxlap-gpu chunk_dda.uniform"),
1410            size: std::mem::size_of::<ChunkDdaUniform>() as u64,
1411            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
1412            mapped_at_creation: false,
1413        });
1414
1415        let dda_shader = self
1416            .device
1417            .create_shader_module(wgpu::ShaderModuleDescriptor {
1418                label: Some("chunk_dda.wgsl"),
1419                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/chunk_dda.wgsl").into()),
1420            });
1421        let bgl_dda = self
1422            .device
1423            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
1424                label: Some("roxlap-gpu chunk_dda.bgl"),
1425                entries: &[
1426                    bgl_uniform_entry(0),
1427                    bgl_storage_entry(1, true),
1428                    bgl_storage_entry(2, true),
1429                    bgl_storage_entry(3, true),
1430                    wgpu::BindGroupLayoutEntry {
1431                        binding: 4,
1432                        visibility: wgpu::ShaderStages::COMPUTE,
1433                        ty: wgpu::BindingType::StorageTexture {
1434                            access: wgpu::StorageTextureAccess::WriteOnly,
1435                            format: wgpu::TextureFormat::Rgba8Unorm,
1436                            view_dimension: wgpu::TextureViewDimension::D2,
1437                        },
1438                        count: None,
1439                    },
1440                ],
1441            });
1442        let dda_pl = self
1443            .device
1444            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
1445                label: Some("roxlap-gpu chunk_dda.layout"),
1446                bind_group_layouts: &[Some(&bgl_dda)],
1447                immediate_size: 0,
1448            });
1449        let pipeline_dda = self
1450            .device
1451            .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
1452                label: Some("roxlap-gpu chunk_dda.pipeline"),
1453                layout: Some(&dda_pl),
1454                module: &dda_shader,
1455                entry_point: Some("render_chunk"),
1456                compilation_options: wgpu::PipelineCompilationOptions::default(),
1457                cache: None,
1458            });
1459
1460        // Fullscreen-triangle blit upscales the storage texture into
1461        // the swapchain. Nearest filter keeps the retro pixel look.
1462        let blit_shader = self
1463            .device
1464            .create_shader_module(wgpu::ShaderModuleDescriptor {
1465                label: Some("blit.wgsl"),
1466                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/blit.wgsl").into()),
1467            });
1468        let bgl_blit = self
1469            .device
1470            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
1471                label: Some("roxlap-gpu chunk_dda.blit_bgl"),
1472                entries: &[
1473                    wgpu::BindGroupLayoutEntry {
1474                        binding: 0,
1475                        visibility: wgpu::ShaderStages::FRAGMENT,
1476                        ty: wgpu::BindingType::Texture {
1477                            sample_type: wgpu::TextureSampleType::Float { filterable: false },
1478                            view_dimension: wgpu::TextureViewDimension::D2,
1479                            multisampled: false,
1480                        },
1481                        count: None,
1482                    },
1483                    wgpu::BindGroupLayoutEntry {
1484                        binding: 1,
1485                        visibility: wgpu::ShaderStages::FRAGMENT,
1486                        ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::NonFiltering),
1487                        count: None,
1488                    },
1489                ],
1490            });
1491        let blit_pl = self
1492            .device
1493            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
1494                label: Some("roxlap-gpu chunk_dda.blit_layout"),
1495                bind_group_layouts: &[Some(&bgl_blit)],
1496                immediate_size: 0,
1497            });
1498        let pipeline_blit = self
1499            .device
1500            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
1501                label: Some("roxlap-gpu chunk_dda.blit_pipeline"),
1502                layout: Some(&blit_pl),
1503                vertex: wgpu::VertexState {
1504                    module: &blit_shader,
1505                    entry_point: Some("vs_main"),
1506                    compilation_options: wgpu::PipelineCompilationOptions::default(),
1507                    buffers: &[],
1508                },
1509                fragment: Some(wgpu::FragmentState {
1510                    module: &blit_shader,
1511                    entry_point: Some("fs_main"),
1512                    compilation_options: wgpu::PipelineCompilationOptions::default(),
1513                    targets: &[Some(wgpu::ColorTargetState {
1514                        format: surface_format,
1515                        blend: None,
1516                        write_mask: wgpu::ColorWrites::ALL,
1517                    })],
1518                }),
1519                primitive: wgpu::PrimitiveState::default(),
1520                depth_stencil: None,
1521                multisample: wgpu::MultisampleState::default(),
1522                multiview_mask: None,
1523                cache: None,
1524            });
1525        let sampler = self.device.create_sampler(&wgpu::SamplerDescriptor {
1526            label: Some("roxlap-gpu chunk_dda.blit_sampler"),
1527            address_mode_u: wgpu::AddressMode::ClampToEdge,
1528            address_mode_v: wgpu::AddressMode::ClampToEdge,
1529            address_mode_w: wgpu::AddressMode::ClampToEdge,
1530            mag_filter: wgpu::FilterMode::Nearest,
1531            min_filter: wgpu::FilterMode::Nearest,
1532            mipmap_filter: wgpu::MipmapFilterMode::Nearest,
1533            ..Default::default()
1534        });
1535        let blit_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1536            label: Some("roxlap-gpu chunk_dda.blit_bg"),
1537            layout: &bgl_blit,
1538            entries: &[
1539                wgpu::BindGroupEntry {
1540                    binding: 0,
1541                    resource: wgpu::BindingResource::TextureView(&storage_view),
1542                },
1543                wgpu::BindGroupEntry {
1544                    binding: 1,
1545                    resource: wgpu::BindingResource::Sampler(&sampler),
1546                },
1547            ],
1548        });
1549
1550        ChunkDdaResources {
1551            storage_size: (width, height),
1552            storage_view,
1553            uniform_buf,
1554            bgl_dda,
1555            pipeline_dda,
1556            blit_bg,
1557            pipeline_blit,
1558            _sampler: sampler,
1559        }
1560    }
1561
1562    /// GPU.4 render — outer DDA over chunk indices + inner DDA into
1563    /// non-empty chunks. `camera.position` is in **grid-local**
1564    /// voxel units. `max_outer_steps` caps how many chunks the
1565    /// outer DDA may traverse per ray (scene-demo wires `+ / -`
1566    /// through this).
1567    ///
1568    /// # Panics
1569    /// Internally `expect`s the grid-DDA resources to be built;
1570    /// they are constructed at the top of this function if missing.
1571    pub fn render_grid(&mut self, grid: &GpuGridResident, camera: &Camera, max_outer_steps: u32) {
1572        let Some(surf_tex) = self.acquire_frame() else {
1573            return;
1574        };
1575        let surf_view = surf_tex
1576            .texture
1577            .create_view(&wgpu::TextureViewDescriptor::default());
1578
1579        let surface_w = self.surface_config.width;
1580        let surface_h = self.surface_config.height;
1581        let surface_format = self.surface_config.format;
1582
1583        let needs_build = match &self.grid_dda {
1584            Some(r) => r.storage_size != (surface_w, surface_h),
1585            None => true,
1586        };
1587        if needs_build {
1588            self.grid_dda = Some(self.build_grid_dda(surface_w, surface_h, surface_format));
1589        }
1590        let dda = self.grid_dda.as_ref().expect("just built");
1591
1592        let uniform = GridDdaUniform {
1593            camera_pos: camera.position,
1594            _pad0: 0.0,
1595            camera_right: camera.right,
1596            _pad1: 0.0,
1597            camera_down: camera.down,
1598            _pad2: 0.0,
1599            camera_forward: camera.forward,
1600            fov_y_rad: camera.fov_y_rad,
1601            screen_size: [surface_w, surface_h],
1602            vsid: grid.vsid,
1603            max_outer_steps,
1604            chunks_dims: grid.chunks_dims,
1605            _pad3: 0,
1606            origin_chunk: grid.origin_chunk,
1607            _pad4: 0,
1608        };
1609        self.queue
1610            .write_buffer(&dda.uniform_buf, 0, bytemuck::bytes_of(&uniform));
1611
1612        let dda_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1613            label: Some("roxlap-gpu grid_dda.bg"),
1614            layout: &dda.bgl_dda,
1615            entries: &[
1616                wgpu::BindGroupEntry {
1617                    binding: 0,
1618                    resource: dda.uniform_buf.as_entire_binding(),
1619                },
1620                wgpu::BindGroupEntry {
1621                    binding: 1,
1622                    resource: grid.occupancy.as_entire_binding(),
1623                },
1624                wgpu::BindGroupEntry {
1625                    binding: 2,
1626                    resource: grid.color_offsets.as_entire_binding(),
1627                },
1628                wgpu::BindGroupEntry {
1629                    binding: 3,
1630                    resource: grid.colors.as_entire_binding(),
1631                },
1632                wgpu::BindGroupEntry {
1633                    binding: 4,
1634                    resource: grid.chunk_colors_base.as_entire_binding(),
1635                },
1636                wgpu::BindGroupEntry {
1637                    binding: 5,
1638                    resource: grid.chunk_occupancy.as_entire_binding(),
1639                },
1640                wgpu::BindGroupEntry {
1641                    binding: 6,
1642                    resource: wgpu::BindingResource::TextureView(&dda.storage_view),
1643                },
1644            ],
1645        });
1646
1647        let mut encoder = self
1648            .device
1649            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
1650                label: Some("roxlap-gpu grid encoder"),
1651            });
1652        {
1653            let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
1654                label: Some("roxlap-gpu grid_dda compute"),
1655                timestamp_writes: None,
1656            });
1657            cpass.set_pipeline(&dda.pipeline_dda);
1658            cpass.set_bind_group(0, &dda_bg, &[]);
1659            cpass.dispatch_workgroups(surface_w.div_ceil(8), surface_h.div_ceil(8), 1);
1660        }
1661        {
1662            let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
1663                label: Some("roxlap-gpu grid_dda blit"),
1664                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
1665                    view: &surf_view,
1666                    depth_slice: None,
1667                    resolve_target: None,
1668                    ops: wgpu::Operations {
1669                        load: wgpu::LoadOp::Clear(wgpu::Color::BLACK),
1670                        store: wgpu::StoreOp::Store,
1671                    },
1672                })],
1673                depth_stencil_attachment: None,
1674                timestamp_writes: None,
1675                occlusion_query_set: None,
1676                multiview_mask: None,
1677            });
1678            rpass.set_pipeline(&dda.pipeline_blit);
1679            rpass.set_bind_group(0, &dda.blit_bg, &[]);
1680            rpass.draw(0..3, 0..1);
1681        }
1682        self.queue.submit(std::iter::once(encoder.finish()));
1683        surf_tex.present();
1684        self.frame_count = self.frame_count.wrapping_add(1);
1685    }
1686
1687    fn build_grid_dda(
1688        &self,
1689        width: u32,
1690        height: u32,
1691        surface_format: wgpu::TextureFormat,
1692    ) -> GridDdaResources {
1693        let storage_tex = self.device.create_texture(&wgpu::TextureDescriptor {
1694            label: Some("roxlap-gpu grid_dda.storage"),
1695            size: wgpu::Extent3d {
1696                width,
1697                height,
1698                depth_or_array_layers: 1,
1699            },
1700            mip_level_count: 1,
1701            sample_count: 1,
1702            dimension: wgpu::TextureDimension::D2,
1703            format: wgpu::TextureFormat::Rgba8Unorm,
1704            usage: wgpu::TextureUsages::STORAGE_BINDING | wgpu::TextureUsages::TEXTURE_BINDING,
1705            view_formats: &[],
1706        });
1707        let storage_view = storage_tex.create_view(&wgpu::TextureViewDescriptor::default());
1708
1709        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
1710            label: Some("roxlap-gpu grid_dda.uniform"),
1711            size: std::mem::size_of::<GridDdaUniform>() as u64,
1712            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
1713            mapped_at_creation: false,
1714        });
1715
1716        let dda_shader = self
1717            .device
1718            .create_shader_module(wgpu::ShaderModuleDescriptor {
1719                label: Some("grid_dda.wgsl"),
1720                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/grid_dda.wgsl").into()),
1721            });
1722        let bgl_dda = self
1723            .device
1724            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
1725                label: Some("roxlap-gpu grid_dda.bgl"),
1726                entries: &[
1727                    bgl_uniform_entry(0),
1728                    bgl_storage_entry(1, true),
1729                    bgl_storage_entry(2, true),
1730                    bgl_storage_entry(3, true),
1731                    bgl_storage_entry(4, true),
1732                    bgl_storage_entry(5, true),
1733                    wgpu::BindGroupLayoutEntry {
1734                        binding: 6,
1735                        visibility: wgpu::ShaderStages::COMPUTE,
1736                        ty: wgpu::BindingType::StorageTexture {
1737                            access: wgpu::StorageTextureAccess::WriteOnly,
1738                            format: wgpu::TextureFormat::Rgba8Unorm,
1739                            view_dimension: wgpu::TextureViewDimension::D2,
1740                        },
1741                        count: None,
1742                    },
1743                ],
1744            });
1745        let dda_pl = self
1746            .device
1747            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
1748                label: Some("roxlap-gpu grid_dda.layout"),
1749                bind_group_layouts: &[Some(&bgl_dda)],
1750                immediate_size: 0,
1751            });
1752        let pipeline_dda = self
1753            .device
1754            .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
1755                label: Some("roxlap-gpu grid_dda.pipeline"),
1756                layout: Some(&dda_pl),
1757                module: &dda_shader,
1758                entry_point: Some("render_grid"),
1759                compilation_options: wgpu::PipelineCompilationOptions::default(),
1760                cache: None,
1761            });
1762
1763        let blit_shader = self
1764            .device
1765            .create_shader_module(wgpu::ShaderModuleDescriptor {
1766                label: Some("blit.wgsl"),
1767                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/blit.wgsl").into()),
1768            });
1769        let bgl_blit = self
1770            .device
1771            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
1772                label: Some("roxlap-gpu grid_dda.blit_bgl"),
1773                entries: &[
1774                    wgpu::BindGroupLayoutEntry {
1775                        binding: 0,
1776                        visibility: wgpu::ShaderStages::FRAGMENT,
1777                        ty: wgpu::BindingType::Texture {
1778                            sample_type: wgpu::TextureSampleType::Float { filterable: false },
1779                            view_dimension: wgpu::TextureViewDimension::D2,
1780                            multisampled: false,
1781                        },
1782                        count: None,
1783                    },
1784                    wgpu::BindGroupLayoutEntry {
1785                        binding: 1,
1786                        visibility: wgpu::ShaderStages::FRAGMENT,
1787                        ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::NonFiltering),
1788                        count: None,
1789                    },
1790                ],
1791            });
1792        let blit_pl = self
1793            .device
1794            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
1795                label: Some("roxlap-gpu grid_dda.blit_layout"),
1796                bind_group_layouts: &[Some(&bgl_blit)],
1797                immediate_size: 0,
1798            });
1799        let pipeline_blit = self
1800            .device
1801            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
1802                label: Some("roxlap-gpu grid_dda.blit_pipeline"),
1803                layout: Some(&blit_pl),
1804                vertex: wgpu::VertexState {
1805                    module: &blit_shader,
1806                    entry_point: Some("vs_main"),
1807                    compilation_options: wgpu::PipelineCompilationOptions::default(),
1808                    buffers: &[],
1809                },
1810                fragment: Some(wgpu::FragmentState {
1811                    module: &blit_shader,
1812                    entry_point: Some("fs_main"),
1813                    compilation_options: wgpu::PipelineCompilationOptions::default(),
1814                    targets: &[Some(wgpu::ColorTargetState {
1815                        format: surface_format,
1816                        blend: None,
1817                        write_mask: wgpu::ColorWrites::ALL,
1818                    })],
1819                }),
1820                primitive: wgpu::PrimitiveState::default(),
1821                depth_stencil: None,
1822                multisample: wgpu::MultisampleState::default(),
1823                multiview_mask: None,
1824                cache: None,
1825            });
1826        let sampler = self.device.create_sampler(&wgpu::SamplerDescriptor {
1827            label: Some("roxlap-gpu grid_dda.blit_sampler"),
1828            address_mode_u: wgpu::AddressMode::ClampToEdge,
1829            address_mode_v: wgpu::AddressMode::ClampToEdge,
1830            address_mode_w: wgpu::AddressMode::ClampToEdge,
1831            mag_filter: wgpu::FilterMode::Nearest,
1832            min_filter: wgpu::FilterMode::Nearest,
1833            mipmap_filter: wgpu::MipmapFilterMode::Nearest,
1834            ..Default::default()
1835        });
1836        let blit_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1837            label: Some("roxlap-gpu grid_dda.blit_bg"),
1838            layout: &bgl_blit,
1839            entries: &[
1840                wgpu::BindGroupEntry {
1841                    binding: 0,
1842                    resource: wgpu::BindingResource::TextureView(&storage_view),
1843                },
1844                wgpu::BindGroupEntry {
1845                    binding: 1,
1846                    resource: wgpu::BindingResource::Sampler(&sampler),
1847                },
1848            ],
1849        });
1850
1851        GridDdaResources {
1852            storage_size: (width, height),
1853            storage_view,
1854            uniform_buf,
1855            bgl_dda,
1856            pipeline_dda,
1857            blit_bg,
1858            pipeline_blit,
1859            _sampler: sampler,
1860        }
1861    }
1862
1863    /// GPU.5 render — multi-grid scene marcher. `cameras[i]` is the
1864    /// world camera transformed into grid `i`'s local frame
1865    /// (caller-supplied; see scene-demo's `redraw_gpu` for the
1866    /// glam-based transform). `fov_y_rad` is the shared vertical
1867    /// FOV; `max_outer_steps` caps per-ray chunk-DDA work for each
1868    /// grid.
1869    ///
1870    /// # Panics
1871    /// If `cameras.len() != scene.grid_count`.
1872    /// `cameras[i]` is grid `i`'s world camera transformed into that
1873    /// grid's local frame (the grid marcher works in grid-local space).
1874    /// `sprite_camera` is the **world** camera: instanced sprites carry
1875    /// world-space positions/transforms, so they must project through
1876    /// the untransformed world camera — not `cameras[0]`, which is only
1877    /// the world camera when grid 0 is at identity.
1878    pub fn render_scene(
1879        &mut self,
1880        scene: &GpuSceneResident,
1881        cameras: &[Camera],
1882        sprite_camera: &Camera,
1883        fov_y_rad: f32,
1884        max_outer_steps: u32,
1885    ) {
1886        assert_eq!(
1887            cameras.len(),
1888            scene.grid_count as usize,
1889            "render_scene: {} cameras supplied, scene has {} grids",
1890            cameras.len(),
1891            scene.grid_count,
1892        );
1893        self.last_fov_y_rad = fov_y_rad; // cached for pixel_ray (picking)
1894
1895        // Deferred present: drop any frame a prior render left
1896        // un-presented (a host that skipped present/paint_egui) so we
1897        // never hold two outstanding swapchain textures.
1898        self.pending_frame = None;
1899        let Some(surf_tex) = self.acquire_frame() else {
1900            return;
1901        };
1902        let surf_view = surf_tex
1903            .texture
1904            .create_view(&wgpu::TextureViewDescriptor::default());
1905
1906        let surface_w = self.surface_config.width;
1907        let surface_h = self.surface_config.height;
1908        let surface_format = self.surface_config.format;
1909
1910        let needs_build = match &self.scene_dda {
1911            Some(r) => r.storage_size != (surface_w, surface_h),
1912            None => true,
1913        };
1914        if needs_build {
1915            self.scene_dda = Some(self.build_scene_dda(surface_w, surface_h, surface_format));
1916        }
1917        // GPU.9 — materialise the sprite pipeline the first frame
1918        // sprites are present (before the immutable `dda` borrow).
1919        // GPU.10.0 — build the model-DDA pipeline the first frame a
1920        // sprite registry is present.
1921        if self.sprite_registry.is_some() && self.sprite_model_dda.is_none() {
1922            self.sprite_model_dda = Some(self.build_sprite_model_dda());
1923        }
1924        // GPU.10.3 — frustum-cull + screen-tile-bin the sprite instances
1925        // (needs &mut self for buffer growth, so before the immutable
1926        // scene_dda borrow). Captures (visible_count, tiles_x); None when
1927        // nothing is in view.
1928        let sprite_pass: Option<(u32, u32)> = if let Some(reg) = self.sprite_registry.as_mut() {
1929            if reg.instance_capacity > 0 {
1930                // World camera — sprite positions/transforms are world-
1931                // space (independent of any grid's transform).
1932                let cam = sprite_camera;
1933                #[allow(clippy::cast_precision_loss)]
1934                let aspect = surface_w as f32 / surface_h as f32;
1935                let half_h = (fov_y_rad * 0.5).tan();
1936                let frustum = sprite_model::ViewFrustum {
1937                    pos: cam.position,
1938                    right: cam.right,
1939                    down: cam.down,
1940                    forward: cam.forward,
1941                    half_w: half_h * aspect,
1942                    half_h,
1943                    far: 1.0e9,
1944                };
1945                let (visible, tiles_x, _tiles_y) = reg.cull_bin_upload(
1946                    &self.device,
1947                    &self.queue,
1948                    &frustum,
1949                    surface_w,
1950                    surface_h,
1951                    SPRITE_TILE_SIZE,
1952                    self.sprite_lod_px,
1953                );
1954                (visible > 0).then_some((visible, tiles_x))
1955            } else {
1956                None
1957            }
1958        } else {
1959            None
1960        };
1961        let dda = self.scene_dda.as_ref().expect("just built");
1962
1963        // Refresh the blit's flip flag each frame (offset 8, after the
1964        // width/height), so toggling the flip applies without a resize.
1965        self.queue.write_buffer(
1966            &dda.blit_dims,
1967            8,
1968            bytemuck::bytes_of(&[u32::from(self.flip_x), 0u32]),
1969        );
1970
1971        // Pack per-grid cameras into a runtime-sized storage buffer
1972        // (binding 15) — no fixed cap on grid count.
1973        let cam_vec: Vec<SceneDdaPerGridCamera> = cameras
1974            .iter()
1975            .map(SceneDdaPerGridCamera::from_camera)
1976            .collect();
1977        let grid_cameras = upload_grid_cameras(&self.device, &cam_vec);
1978        let uniform = SceneDdaUniform {
1979            fov_y_rad,
1980            grid_count: scene.grid_count,
1981            max_outer_steps,
1982            _pad0: 0,
1983            screen_size: [surface_w, surface_h],
1984            _pad1: [0; 2],
1985            fog_color: [
1986                self.fog_color[0],
1987                self.fog_color[1],
1988                self.fog_color[2],
1989                self.fog_near,
1990            ],
1991            fog_far: self.fog_far,
1992            // L3.1: always write scene depth. Costs one storage store per
1993            // pixel, and the depth is needed for sprite z-test, sprite-less
1994            // `pick_depth`, and `draw_lines` occlusion alike.
1995            write_depth: 1,
1996            occ_page_words: scene.occupancy_page_words,
1997            occ_num_pages: scene.occupancy_num_pages,
1998            mip_scan_dist: self.scene_mip_scan_dist,
1999            _pad2: 0,
2000            _pad3: 0,
2001            _pad4: 0,
2002            // Sky direction comes from the world (sprite) camera, so a
2003            // grid-less sprite-only scene still paints a real sky.
2004            sky_cam: SceneDdaPerGridCamera::from_camera(sprite_camera),
2005            side_shades0: self.scene_side_shades[0],
2006            side_shades1: self.scene_side_shades[1],
2007        };
2008        self.queue
2009            .write_buffer(&dda.uniform_buf, 0, bytemuck::bytes_of(&uniform));
2010
2011        let dda_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
2012            label: Some("roxlap-gpu scene_dda.bg"),
2013            layout: &dda.bgl_dda,
2014            entries: &[
2015                wgpu::BindGroupEntry {
2016                    binding: 0,
2017                    resource: dda.uniform_buf.as_entire_binding(),
2018                },
2019                // Occupancy page 0 at binding 1; pages 1..MAX_OCC_PAGES
2020                // at bindings 12.. (see GPU.X occupancy paging).
2021                wgpu::BindGroupEntry {
2022                    binding: 1,
2023                    resource: scene.occupancy_pages[0].as_entire_binding(),
2024                },
2025                wgpu::BindGroupEntry {
2026                    binding: 2,
2027                    resource: scene.all_color_offsets.as_entire_binding(),
2028                },
2029                wgpu::BindGroupEntry {
2030                    binding: 3,
2031                    resource: scene.all_colors.as_entire_binding(),
2032                },
2033                wgpu::BindGroupEntry {
2034                    binding: 4,
2035                    resource: scene.all_chunk_colors_base.as_entire_binding(),
2036                },
2037                wgpu::BindGroupEntry {
2038                    binding: 5,
2039                    resource: scene.all_chunk_occupancy.as_entire_binding(),
2040                },
2041                wgpu::BindGroupEntry {
2042                    binding: 6,
2043                    resource: scene.grid_static_meta.as_entire_binding(),
2044                },
2045                wgpu::BindGroupEntry {
2046                    binding: 7,
2047                    resource: scene.all_slot_chunk_idx.as_entire_binding(),
2048                },
2049                wgpu::BindGroupEntry {
2050                    binding: 8,
2051                    resource: dda.framebuffer.as_entire_binding(),
2052                },
2053                wgpu::BindGroupEntry {
2054                    binding: 9,
2055                    resource: wgpu::BindingResource::TextureView(&self.sky_view),
2056                },
2057                wgpu::BindGroupEntry {
2058                    binding: 10,
2059                    resource: wgpu::BindingResource::Sampler(&self.sky_sampler),
2060                },
2061                wgpu::BindGroupEntry {
2062                    binding: 11,
2063                    resource: dda.depth_buffer.as_entire_binding(),
2064                },
2065                wgpu::BindGroupEntry {
2066                    binding: 12,
2067                    resource: scene.occupancy_pages[1].as_entire_binding(),
2068                },
2069                wgpu::BindGroupEntry {
2070                    binding: 13,
2071                    resource: scene.occupancy_pages[2].as_entire_binding(),
2072                },
2073                wgpu::BindGroupEntry {
2074                    binding: 14,
2075                    resource: scene.occupancy_pages[3].as_entire_binding(),
2076                },
2077                wgpu::BindGroupEntry {
2078                    binding: 15,
2079                    resource: grid_cameras.as_entire_binding(),
2080                },
2081            ],
2082        });
2083
2084        // GPU.9 — when sprites are present, build both splatter bind
2085        // groups up front (the splat pass writes the key buffer; the
2086        // resolve pass reads keys + scene depth and writes colour).
2087        // GPU.10.3 — model-DDA bind group + per-frame uniform, using the
2088        // cull/bin results captured above. Per-model + per-instance data
2089        // + the tile lists live in the registry buffers.
2090        let sprite_model_bg = match (&self.sprite_model_dda, &self.sprite_registry, sprite_pass) {
2091            (Some(smd), Some(reg), Some((visible, tiles_x))) => {
2092                // World camera (see the cull pass above) — sprites
2093                // project through it regardless of grid 0's transform.
2094                let cam = sprite_camera;
2095                let uni = SpriteModelUniform {
2096                    cam_pos: cam.position,
2097                    _p0: 0.0,
2098                    cam_right: cam.right,
2099                    _p1: 0.0,
2100                    cam_down: cam.down,
2101                    _p2: 0.0,
2102                    cam_forward: cam.forward,
2103                    _p3: 0.0,
2104                    fog_color: [
2105                        self.fog_color[0],
2106                        self.fog_color[1],
2107                        self.fog_color[2],
2108                        self.fog_near,
2109                    ],
2110                    screen_size: [surface_w, surface_h],
2111                    instance_count: visible,
2112                    fog_far: self.fog_far,
2113                    fov_y_rad,
2114                    tiles_x,
2115                    tile_size: SPRITE_TILE_SIZE,
2116                    _p6: 0.0,
2117                };
2118                self.queue
2119                    .write_buffer(&smd.uniform_buf, 0, bytemuck::bytes_of(&uni));
2120                Some(self.device.create_bind_group(&wgpu::BindGroupDescriptor {
2121                    label: Some("roxlap-gpu sprite_model_dda.bg"),
2122                    layout: &smd.bgl,
2123                    entries: &[
2124                        wgpu::BindGroupEntry {
2125                            binding: 0,
2126                            resource: smd.uniform_buf.as_entire_binding(),
2127                        },
2128                        wgpu::BindGroupEntry {
2129                            binding: 1,
2130                            resource: reg.occupancy.as_entire_binding(),
2131                        },
2132                        wgpu::BindGroupEntry {
2133                            binding: 2,
2134                            resource: reg.colors.as_entire_binding(),
2135                        },
2136                        wgpu::BindGroupEntry {
2137                            binding: 3,
2138                            resource: reg.color_offsets.as_entire_binding(),
2139                        },
2140                        wgpu::BindGroupEntry {
2141                            binding: 4,
2142                            resource: reg.model_meta.as_entire_binding(),
2143                        },
2144                        wgpu::BindGroupEntry {
2145                            binding: 5,
2146                            resource: reg.instances.as_entire_binding(),
2147                        },
2148                        wgpu::BindGroupEntry {
2149                            binding: 6,
2150                            resource: dda.depth_buffer.as_entire_binding(),
2151                        },
2152                        wgpu::BindGroupEntry {
2153                            binding: 7,
2154                            resource: dda.framebuffer.as_entire_binding(),
2155                        },
2156                        wgpu::BindGroupEntry {
2157                            binding: 8,
2158                            resource: reg.tile_ranges.as_entire_binding(),
2159                        },
2160                        wgpu::BindGroupEntry {
2161                            binding: 9,
2162                            resource: reg.tile_instances.as_entire_binding(),
2163                        },
2164                        wgpu::BindGroupEntry {
2165                            binding: 10,
2166                            resource: reg.dirs.as_entire_binding(),
2167                        },
2168                        wgpu::BindGroupEntry {
2169                            binding: 11,
2170                            resource: reg.colmul.as_entire_binding(),
2171                        },
2172                    ],
2173                }))
2174            }
2175            _ => None,
2176        };
2177
2178        let mut encoder = self
2179            .device
2180            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
2181                label: Some("roxlap-gpu scene encoder"),
2182            });
2183        {
2184            let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
2185                label: Some("roxlap-gpu scene_dda compute"),
2186                timestamp_writes: None,
2187            });
2188            cpass.set_pipeline(&dda.pipeline_dda);
2189            cpass.set_bind_group(0, &dda_bg, &[]);
2190            cpass.dispatch_workgroups(surface_w.div_ceil(8), surface_h.div_ceil(8), 1);
2191        }
2192        // GPU.10 — sprite model-DDA pass: one thread per pixel marches
2193        // the tile's instances + composites against scene depth, after
2194        // the scene pass wrote the depth buffer and before the blit.
2195        if let (Some(smd), Some(bg)) = (&self.sprite_model_dda, &sprite_model_bg) {
2196            let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
2197                label: Some("roxlap-gpu sprite_model_dda"),
2198                timestamp_writes: None,
2199            });
2200            cpass.set_pipeline(&smd.pipeline);
2201            cpass.set_bind_group(0, bg, &[]);
2202            cpass.dispatch_workgroups(surface_w.div_ceil(8), surface_h.div_ceil(8), 1);
2203        }
2204        {
2205            let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
2206                label: Some("roxlap-gpu scene_dda blit"),
2207                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
2208                    view: &surf_view,
2209                    depth_slice: None,
2210                    resolve_target: None,
2211                    ops: wgpu::Operations {
2212                        load: wgpu::LoadOp::Clear(wgpu::Color::BLACK),
2213                        store: wgpu::StoreOp::Store,
2214                    },
2215                })],
2216                depth_stencil_attachment: None,
2217                timestamp_writes: None,
2218                occlusion_query_set: None,
2219                multiview_mask: None,
2220            });
2221            rpass.set_pipeline(&dda.pipeline_blit);
2222            rpass.set_bind_group(0, &dda.blit_bg, &[]);
2223            rpass.draw(0..3, 0..1);
2224        }
2225        self.queue.submit(std::iter::once(encoder.finish()));
2226        // This frame wrote `scene_dda.depth_buffer`, so depth-tested
2227        // overlays may test against it.
2228        self.scene_depth_valid = true;
2229        // Deferred present — the host calls `present` or `paint_egui`.
2230        self.pending_frame = Some((surf_tex, surf_view));
2231        self.frame_count = self.frame_count.wrapping_add(1);
2232    }
2233
2234    /// Like [`Self::render`] (clear to colour) but **deferred**: stashes
2235    /// the frame for [`Self::present`] / [`Self::paint_egui`] instead of
2236    /// presenting. The facade uses this before any grid is resident so a
2237    /// HUD can still be painted over an empty scene.
2238    pub fn render_clear_deferred(&mut self) {
2239        // No scene pass this frame ⇒ `scene_dda.depth_buffer` (if it
2240        // exists from an earlier scene) is stale; depth-tested overlays
2241        // must not test against it.
2242        self.scene_depth_valid = false;
2243        self.pending_frame = None;
2244        let Some(surf_tex) = self.acquire_frame() else {
2245            return;
2246        };
2247        let view = surf_tex
2248            .texture
2249            .create_view(&wgpu::TextureViewDescriptor::default());
2250        let [r, g, b] = self.clear_colour;
2251        let mut encoder = self
2252            .device
2253            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
2254                label: Some("roxlap-gpu clear (deferred)"),
2255            });
2256        {
2257            let _rp = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
2258                label: Some("roxlap-gpu clear (deferred)"),
2259                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
2260                    view: &view,
2261                    depth_slice: None,
2262                    resolve_target: None,
2263                    ops: wgpu::Operations {
2264                        load: wgpu::LoadOp::Clear(wgpu::Color { r, g, b, a: 1.0 }),
2265                        store: wgpu::StoreOp::Store,
2266                    },
2267                })],
2268                depth_stencil_attachment: None,
2269                timestamp_writes: None,
2270                occlusion_query_set: None,
2271                multiview_mask: None,
2272            });
2273        }
2274        self.queue.submit(std::iter::once(encoder.finish()));
2275        self.pending_frame = Some((surf_tex, view));
2276    }
2277
2278    /// Present the frame stashed by the last deferred render
2279    /// ([`Self::render_scene`] / [`Self::render_clear_deferred`]). No-op
2280    /// if nothing is pending (e.g. the surface was lost mid-render).
2281    pub fn present(&mut self) {
2282        if let Some((surf_tex, _view)) = self.pending_frame.take() {
2283            surf_tex.present();
2284        }
2285    }
2286
2287    /// Draw depth-tested world-space [`GpuLine`]s over the pending frame
2288    /// (L3.2). Projects each endpoint with `cam` (the marcher's pinhole) +
2289    /// the last frame's FOV / surface size, expands to screen-space quads,
2290    /// and runs a `LoadOp::Load` pass into the pending swapchain view — so
2291    /// the lines land on the marched frame and a later `present` /
2292    /// `paint_egui` still finishes it (the pending frame is left intact).
2293    /// Depth-tested lines are occluded by nearer marched geometry (compared
2294    /// against the scene-DDA depth buffer's `best_t`); call after `render`,
2295    /// before `present` / `paint_egui`. No-op if no frame is pending.
2296    pub fn draw_lines_deferred(&mut self, cam: &GpuLineCamera, lines: &[GpuLine]) {
2297        if self.pending_frame.is_none() || lines.is_empty() {
2298            return;
2299        }
2300        let (w, h) = (self.surface_config.width, self.surface_config.height);
2301        let fov = self.last_fov_y_rad;
2302        if w == 0 || h == 0 || fov <= 0.0 {
2303            return; // no frame marched yet — no projection to reuse
2304        }
2305        let verts = build_line_vertices(cam, lines, w, h, fov, self.flip_x);
2306        if verts.is_empty() {
2307            return;
2308        }
2309        self.ensure_line_resources();
2310        let res = self.line_resources.as_ref().expect("just built");
2311
2312        // Skip the depth test when there's no current scene depth to read —
2313        // either no buffer at all (sprite-only / never-rendered) or this
2314        // frame was a color-only clear so the buffer is stale (an empty
2315        // scene drawn after a grid scene). The 1-word dummy / stale buffer
2316        // is still bound to satisfy the layout; `no_depth = 1` keeps the
2317        // shader from indexing it.
2318        let no_depth = u32::from(self.scene_dda.is_none() || !self.scene_depth_valid);
2319        let params = LineParams {
2320            screen_w: w,
2321            screen_h: h,
2322            depth_bias: LINE_DEPTH_BIAS,
2323            no_depth,
2324            flip_x: u32::from(self.flip_x),
2325            _pad: [0; 3],
2326        };
2327        self.queue
2328            .write_buffer(&res.uniform_buf, 0, bytemuck::bytes_of(&params));
2329
2330        let depth_resource = match &self.scene_dda {
2331            Some(dda) => dda.depth_buffer.as_entire_binding(),
2332            None => res.dummy_depth.as_entire_binding(),
2333        };
2334        let bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
2335            label: Some("roxlap-gpu line.bg"),
2336            layout: &res.bgl,
2337            entries: &[
2338                wgpu::BindGroupEntry {
2339                    binding: 0,
2340                    resource: res.uniform_buf.as_entire_binding(),
2341                },
2342                wgpu::BindGroupEntry {
2343                    binding: 1,
2344                    resource: depth_resource,
2345                },
2346            ],
2347        });
2348
2349        // Grow-only persistent vertex buffer (L3.3): one `write_buffer`
2350        // per overlay, reused across frames. Power-of-two capacity keeps
2351        // re-allocation rare as the segment count drifts.
2352        let needed = std::mem::size_of_val(verts.as_slice()) as u64;
2353        if self.line_vbuf_cap < needed {
2354            let cap = needed.next_power_of_two().max(4096);
2355            self.line_vbuf = Some(self.device.create_buffer(&wgpu::BufferDescriptor {
2356                label: Some("roxlap-gpu line.vbuf"),
2357                size: cap,
2358                usage: wgpu::BufferUsages::VERTEX | wgpu::BufferUsages::COPY_DST,
2359                mapped_at_creation: false,
2360            }));
2361            self.line_vbuf_cap = cap;
2362        }
2363        let vbuf = self.line_vbuf.as_ref().expect("ensured above");
2364        self.queue
2365            .write_buffer(vbuf, 0, bytemuck::cast_slice(&verts));
2366
2367        let view = &self.pending_frame.as_ref().expect("checked above").1;
2368        let mut encoder = self
2369            .device
2370            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
2371                label: Some("roxlap-gpu lines"),
2372            });
2373        {
2374            // `LoadOp::Load` keeps the marcher's frame; the lines draw over
2375            // it. Manual depth test in the FS (no depth-stencil attachment).
2376            let mut pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
2377                label: Some("roxlap-gpu line paint"),
2378                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
2379                    view,
2380                    depth_slice: None,
2381                    resolve_target: None,
2382                    ops: wgpu::Operations {
2383                        load: wgpu::LoadOp::Load,
2384                        store: wgpu::StoreOp::Store,
2385                    },
2386                })],
2387                depth_stencil_attachment: None,
2388                timestamp_writes: None,
2389                occlusion_query_set: None,
2390                multiview_mask: None,
2391            });
2392            pass.set_pipeline(&res.pipeline);
2393            pass.set_bind_group(0, &bg, &[]);
2394            pass.set_vertex_buffer(0, vbuf.slice(..));
2395            pass.draw(0..verts.len() as u32, 0..1);
2396        }
2397        self.queue.submit(std::iter::once(encoder.finish()));
2398        // pending_frame left intact — present/paint_egui finishes the frame.
2399    }
2400
2401    /// Lazy-build the [`LineResources`] (`line.wgsl` pipeline + uniform +
2402    /// dummy depth buffer). The colour target uses the surface format with
2403    /// straight-alpha over-blending; no depth-stencil attachment (the depth
2404    /// test is manual in the fragment shader against the scene depth buffer).
2405    fn ensure_line_resources(&mut self) {
2406        if self.line_resources.is_some() {
2407            return;
2408        }
2409        let shader = self
2410            .device
2411            .create_shader_module(wgpu::ShaderModuleDescriptor {
2412                label: Some("line.wgsl"),
2413                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/line.wgsl").into()),
2414            });
2415        let bgl = self
2416            .device
2417            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
2418                label: Some("roxlap-gpu line.bgl"),
2419                entries: &[
2420                    wgpu::BindGroupLayoutEntry {
2421                        binding: 0,
2422                        visibility: wgpu::ShaderStages::FRAGMENT,
2423                        ty: wgpu::BindingType::Buffer {
2424                            ty: wgpu::BufferBindingType::Uniform,
2425                            has_dynamic_offset: false,
2426                            min_binding_size: None,
2427                        },
2428                        count: None,
2429                    },
2430                    wgpu::BindGroupLayoutEntry {
2431                        binding: 1,
2432                        visibility: wgpu::ShaderStages::FRAGMENT,
2433                        ty: wgpu::BindingType::Buffer {
2434                            ty: wgpu::BufferBindingType::Storage { read_only: true },
2435                            has_dynamic_offset: false,
2436                            min_binding_size: None,
2437                        },
2438                        count: None,
2439                    },
2440                ],
2441            });
2442        let layout = self
2443            .device
2444            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
2445                label: Some("roxlap-gpu line.layout"),
2446                bind_group_layouts: &[Some(&bgl)],
2447                immediate_size: 0,
2448            });
2449        let pipeline = self
2450            .device
2451            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
2452                label: Some("roxlap-gpu line.pipeline"),
2453                layout: Some(&layout),
2454                vertex: wgpu::VertexState {
2455                    module: &shader,
2456                    entry_point: Some("vs_main"),
2457                    compilation_options: wgpu::PipelineCompilationOptions::default(),
2458                    buffers: &[wgpu::VertexBufferLayout {
2459                        array_stride: std::mem::size_of::<LineVertex>() as u64,
2460                        step_mode: wgpu::VertexStepMode::Vertex,
2461                        attributes: &wgpu::vertex_attr_array![
2462                            0 => Float32x2, // pos (NDC)
2463                            1 => Float32,   // depth
2464                            2 => Float32,   // depth_test
2465                            3 => Float32x4, // color
2466                        ],
2467                    }],
2468                },
2469                fragment: Some(wgpu::FragmentState {
2470                    module: &shader,
2471                    entry_point: Some("fs_main"),
2472                    compilation_options: wgpu::PipelineCompilationOptions::default(),
2473                    targets: &[Some(wgpu::ColorTargetState {
2474                        format: self.surface_config.format,
2475                        blend: Some(wgpu::BlendState::ALPHA_BLENDING),
2476                        write_mask: wgpu::ColorWrites::ALL,
2477                    })],
2478                }),
2479                primitive: wgpu::PrimitiveState {
2480                    cull_mode: None,
2481                    ..Default::default()
2482                },
2483                depth_stencil: None,
2484                multisample: wgpu::MultisampleState::default(),
2485                multiview_mask: None,
2486                cache: None,
2487            });
2488        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
2489            label: Some("roxlap-gpu line.uniform"),
2490            size: std::mem::size_of::<LineParams>() as u64,
2491            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
2492            mapped_at_creation: false,
2493        });
2494        let dummy_depth = self.device.create_buffer(&wgpu::BufferDescriptor {
2495            label: Some("roxlap-gpu line.dummy_depth"),
2496            size: 4,
2497            usage: wgpu::BufferUsages::STORAGE,
2498            mapped_at_creation: false,
2499        });
2500        self.line_resources = Some(LineResources {
2501            pipeline,
2502            bgl,
2503            uniform_buf,
2504            dummy_depth,
2505        });
2506    }
2507
2508    /// Upload (or replace) an RGBA8 image as a sampled texture, returning
2509    /// a stable id for [`GpuImageQuad::image`]. `rgba` is row-major,
2510    /// `width * height * 4` bytes, straight (un-premultiplied) alpha.
2511    /// Reuses a dropped slot when one exists. Returns `0` for malformed
2512    /// input (an id that draws nothing).
2513    pub fn upload_image(&mut self, rgba: &[u8], width: u32, height: u32) -> usize {
2514        if width == 0 || height == 0 || rgba.len() != (width as usize) * (height as usize) * 4 {
2515            return 0;
2516        }
2517        let texture = self.device.create_texture(&wgpu::TextureDescriptor {
2518            label: Some("roxlap-gpu image_sprite"),
2519            size: wgpu::Extent3d {
2520                width,
2521                height,
2522                depth_or_array_layers: 1,
2523            },
2524            mip_level_count: 1,
2525            sample_count: 1,
2526            dimension: wgpu::TextureDimension::D2,
2527            format: wgpu::TextureFormat::Rgba8Unorm,
2528            usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST,
2529            view_formats: &[],
2530        });
2531        self.queue.write_texture(
2532            wgpu::TexelCopyTextureInfo {
2533                texture: &texture,
2534                mip_level: 0,
2535                origin: wgpu::Origin3d::ZERO,
2536                aspect: wgpu::TextureAspect::All,
2537            },
2538            rgba,
2539            wgpu::TexelCopyBufferLayout {
2540                offset: 0,
2541                bytes_per_row: Some(width * 4),
2542                rows_per_image: Some(height),
2543            },
2544            wgpu::Extent3d {
2545                width,
2546                height,
2547                depth_or_array_layers: 1,
2548            },
2549        );
2550        let view = texture.create_view(&wgpu::TextureViewDescriptor::default());
2551        let resident = ImageResident {
2552            view,
2553            _texture: texture,
2554        };
2555        if let Some(slot) = self.images.iter().position(Option::is_none) {
2556            self.images[slot] = Some(resident);
2557            slot
2558        } else {
2559            self.images.push(Some(resident));
2560            self.images.len() - 1
2561        }
2562    }
2563
2564    /// Release an image uploaded with [`Self::upload_image`] (the slot
2565    /// becomes reusable).
2566    pub fn drop_image(&mut self, id: usize) {
2567        if let Some(slot) = self.images.get_mut(id) {
2568            *slot = None;
2569        }
2570    }
2571
2572    /// Draw world-space 2D image sprites ([`GpuImageQuad`]) over the
2573    /// pending frame — the textured-quad sibling of
2574    /// [`Self::draw_lines_deferred`]. Projects each quad with `cam` (the
2575    /// marcher's pinhole) + the last frame's FOV / surface size, expands +
2576    /// near-clips to triangles, and runs one `LoadOp::Load` pass with a
2577    /// draw per quad (each binds its own texture). UVs are perspective-correct;
2578    /// depth-tested quads are occluded by nearer marched geometry. Call
2579    /// after `render`, before `present` / `paint_egui`. No-op if no frame
2580    /// is pending.
2581    pub fn draw_images_deferred(&mut self, cam: &GpuLineCamera, quads: &[GpuImageQuad]) {
2582        if self.pending_frame.is_none() || quads.is_empty() {
2583            return;
2584        }
2585        let (w, h) = (self.surface_config.width, self.surface_config.height);
2586        let fov = self.last_fov_y_rad;
2587        if w == 0 || h == 0 || fov <= 0.0 {
2588            return;
2589        }
2590
2591        // Concatenate every quad's verts into one buffer, recording each
2592        // quad's (range, texture) so they share a single render pass.
2593        let mut verts: Vec<ImageVertex> = Vec::new();
2594        let mut draws: Vec<(u32, u32, usize)> = Vec::new();
2595        for quad in quads {
2596            if !matches!(self.images.get(quad.image), Some(Some(_))) {
2597                continue; // dropped / never-uploaded id
2598            }
2599            let v = build_image_vertices(cam, quad, w, h, fov, self.flip_x);
2600            if v.is_empty() {
2601                continue;
2602            }
2603            let start = verts.len() as u32;
2604            verts.extend_from_slice(&v);
2605            draws.push((start, verts.len() as u32, quad.image));
2606        }
2607        if draws.is_empty() {
2608            return;
2609        }
2610
2611        self.ensure_image_resources();
2612        // See `draw_lines_deferred`: skip depth when there's no valid
2613        // current-frame scene depth (none built, or a color-only clear).
2614        let no_depth = u32::from(self.scene_dda.is_none() || !self.scene_depth_valid);
2615        let params = LineParams {
2616            screen_w: w,
2617            screen_h: h,
2618            depth_bias: LINE_DEPTH_BIAS,
2619            no_depth,
2620            flip_x: u32::from(self.flip_x),
2621            _pad: [0; 3],
2622        };
2623        {
2624            let res = self.image_resources.as_ref().expect("just built");
2625            self.queue
2626                .write_buffer(&res.uniform_buf, 0, bytemuck::bytes_of(&params));
2627        }
2628
2629        // Grow-only persistent vertex buffer (mirrors the line vbuf).
2630        let needed = std::mem::size_of_val(verts.as_slice()) as u64;
2631        if self.image_vbuf_cap < needed {
2632            let cap = needed.next_power_of_two().max(4096);
2633            self.image_vbuf = Some(self.device.create_buffer(&wgpu::BufferDescriptor {
2634                label: Some("roxlap-gpu image.vbuf"),
2635                size: cap,
2636                usage: wgpu::BufferUsages::VERTEX | wgpu::BufferUsages::COPY_DST,
2637                mapped_at_creation: false,
2638            }));
2639            self.image_vbuf_cap = cap;
2640        }
2641        let vbuf = self.image_vbuf.as_ref().expect("ensured above");
2642        self.queue
2643            .write_buffer(vbuf, 0, bytemuck::cast_slice(&verts));
2644
2645        // One bind group per draw (the texture view differs per quad).
2646        let res = self.image_resources.as_ref().expect("just built");
2647        let depth_resource = match &self.scene_dda {
2648            Some(dda) => dda.depth_buffer.as_entire_binding(),
2649            None => res.dummy_depth.as_entire_binding(),
2650        };
2651        let bind_groups: Vec<wgpu::BindGroup> = draws
2652            .iter()
2653            .map(|&(_, _, image_id)| {
2654                let resident = self.images[image_id].as_ref().expect("checked present");
2655                self.device.create_bind_group(&wgpu::BindGroupDescriptor {
2656                    label: Some("roxlap-gpu image.bg"),
2657                    layout: &res.bgl,
2658                    entries: &[
2659                        wgpu::BindGroupEntry {
2660                            binding: 0,
2661                            resource: res.uniform_buf.as_entire_binding(),
2662                        },
2663                        wgpu::BindGroupEntry {
2664                            binding: 1,
2665                            resource: depth_resource.clone(),
2666                        },
2667                        wgpu::BindGroupEntry {
2668                            binding: 2,
2669                            resource: wgpu::BindingResource::TextureView(&resident.view),
2670                        },
2671                        wgpu::BindGroupEntry {
2672                            binding: 3,
2673                            resource: wgpu::BindingResource::Sampler(&res.sampler),
2674                        },
2675                    ],
2676                })
2677            })
2678            .collect();
2679
2680        let view = &self.pending_frame.as_ref().expect("checked above").1;
2681        let mut encoder = self
2682            .device
2683            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
2684                label: Some("roxlap-gpu images"),
2685            });
2686        {
2687            let mut pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
2688                label: Some("roxlap-gpu image paint"),
2689                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
2690                    view,
2691                    depth_slice: None,
2692                    resolve_target: None,
2693                    ops: wgpu::Operations {
2694                        load: wgpu::LoadOp::Load,
2695                        store: wgpu::StoreOp::Store,
2696                    },
2697                })],
2698                depth_stencil_attachment: None,
2699                timestamp_writes: None,
2700                occlusion_query_set: None,
2701                multiview_mask: None,
2702            });
2703            pass.set_pipeline(&res.pipeline);
2704            pass.set_vertex_buffer(0, vbuf.slice(..));
2705            for (&(start, end, _), bg) in draws.iter().zip(&bind_groups) {
2706                pass.set_bind_group(0, bg, &[]);
2707                pass.draw(start..end, 0..1);
2708            }
2709        }
2710        self.queue.submit(std::iter::once(encoder.finish()));
2711        // pending_frame left intact — present/paint_egui finishes it.
2712    }
2713
2714    /// Lazy-build the [`ImageResources`] (`image.wgsl` pipeline + uniform +
2715    /// nearest sampler + dummy depth). Straight-alpha over-blend, no
2716    /// depth-stencil attachment (the depth test is manual in the FS).
2717    fn ensure_image_resources(&mut self) {
2718        if self.image_resources.is_some() {
2719            return;
2720        }
2721        let shader = self
2722            .device
2723            .create_shader_module(wgpu::ShaderModuleDescriptor {
2724                label: Some("image.wgsl"),
2725                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/image.wgsl").into()),
2726            });
2727        let bgl = self
2728            .device
2729            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
2730                label: Some("roxlap-gpu image.bgl"),
2731                entries: &[
2732                    wgpu::BindGroupLayoutEntry {
2733                        binding: 0,
2734                        visibility: wgpu::ShaderStages::FRAGMENT,
2735                        ty: wgpu::BindingType::Buffer {
2736                            ty: wgpu::BufferBindingType::Uniform,
2737                            has_dynamic_offset: false,
2738                            min_binding_size: None,
2739                        },
2740                        count: None,
2741                    },
2742                    wgpu::BindGroupLayoutEntry {
2743                        binding: 1,
2744                        visibility: wgpu::ShaderStages::FRAGMENT,
2745                        ty: wgpu::BindingType::Buffer {
2746                            ty: wgpu::BufferBindingType::Storage { read_only: true },
2747                            has_dynamic_offset: false,
2748                            min_binding_size: None,
2749                        },
2750                        count: None,
2751                    },
2752                    wgpu::BindGroupLayoutEntry {
2753                        binding: 2,
2754                        visibility: wgpu::ShaderStages::FRAGMENT,
2755                        ty: wgpu::BindingType::Texture {
2756                            sample_type: wgpu::TextureSampleType::Float { filterable: true },
2757                            view_dimension: wgpu::TextureViewDimension::D2,
2758                            multisampled: false,
2759                        },
2760                        count: None,
2761                    },
2762                    wgpu::BindGroupLayoutEntry {
2763                        binding: 3,
2764                        visibility: wgpu::ShaderStages::FRAGMENT,
2765                        ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
2766                        count: None,
2767                    },
2768                ],
2769            });
2770        let layout = self
2771            .device
2772            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
2773                label: Some("roxlap-gpu image.layout"),
2774                bind_group_layouts: &[Some(&bgl)],
2775                immediate_size: 0,
2776            });
2777        let pipeline = self
2778            .device
2779            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
2780                label: Some("roxlap-gpu image.pipeline"),
2781                layout: Some(&layout),
2782                vertex: wgpu::VertexState {
2783                    module: &shader,
2784                    entry_point: Some("vs_main"),
2785                    compilation_options: wgpu::PipelineCompilationOptions::default(),
2786                    buffers: &[wgpu::VertexBufferLayout {
2787                        array_stride: std::mem::size_of::<ImageVertex>() as u64,
2788                        step_mode: wgpu::VertexStepMode::Vertex,
2789                        attributes: &wgpu::vertex_attr_array![
2790                            0 => Float32x2, // ndc
2791                            1 => Float32,   // w
2792                            2 => Float32,   // depth
2793                            3 => Float32,   // depth_test
2794                            4 => Float32,   // cutoff
2795                            5 => Float32x2, // uv
2796                            6 => Float32x4, // tint
2797                        ],
2798                    }],
2799                },
2800                fragment: Some(wgpu::FragmentState {
2801                    module: &shader,
2802                    entry_point: Some("fs_main"),
2803                    compilation_options: wgpu::PipelineCompilationOptions::default(),
2804                    targets: &[Some(wgpu::ColorTargetState {
2805                        format: self.surface_config.format,
2806                        blend: Some(wgpu::BlendState::ALPHA_BLENDING),
2807                        write_mask: wgpu::ColorWrites::ALL,
2808                    })],
2809                }),
2810                primitive: wgpu::PrimitiveState {
2811                    cull_mode: None,
2812                    ..Default::default()
2813                },
2814                depth_stencil: None,
2815                multisample: wgpu::MultisampleState::default(),
2816                multiview_mask: None,
2817                cache: None,
2818            });
2819        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
2820            label: Some("roxlap-gpu image.uniform"),
2821            size: std::mem::size_of::<LineParams>() as u64,
2822            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
2823            mapped_at_creation: false,
2824        });
2825        let dummy_depth = self.device.create_buffer(&wgpu::BufferDescriptor {
2826            label: Some("roxlap-gpu image.dummy_depth"),
2827            size: 4,
2828            usage: wgpu::BufferUsages::STORAGE,
2829            mapped_at_creation: false,
2830        });
2831        let sampler = self.device.create_sampler(&wgpu::SamplerDescriptor {
2832            label: Some("roxlap-gpu image.sampler"),
2833            // Nearest + clamp: pixel-art references want crisp texels and
2834            // no wrap bleed at the quad edges.
2835            address_mode_u: wgpu::AddressMode::ClampToEdge,
2836            address_mode_v: wgpu::AddressMode::ClampToEdge,
2837            address_mode_w: wgpu::AddressMode::ClampToEdge,
2838            mag_filter: wgpu::FilterMode::Nearest,
2839            min_filter: wgpu::FilterMode::Nearest,
2840            mipmap_filter: wgpu::MipmapFilterMode::Nearest,
2841            ..Default::default()
2842        });
2843        self.image_resources = Some(ImageResources {
2844            pipeline,
2845            bgl,
2846            uniform_buf,
2847            dummy_depth,
2848            sampler,
2849        });
2850    }
2851
2852    /// Project a world point to window pixels under the marcher's
2853    /// vertical-FOV pinhole (the inverse of [`Self::pixel_ray`]), using
2854    /// the last-rendered frame's size + FOV. `None` before the first
2855    /// scene render or for a point at/behind the near plane.
2856    #[must_use]
2857    pub fn project_point(
2858        &self,
2859        cam_pos: [f32; 3],
2860        right: [f32; 3],
2861        down: [f32; 3],
2862        forward: [f32; 3],
2863        world: [f32; 3],
2864    ) -> Option<(f32, f32)> {
2865        let dda = self.scene_dda.as_ref()?;
2866        let (w, h) = dda.storage_size;
2867        if w == 0 || h == 0 || self.last_fov_y_rad <= 0.0 {
2868            return None;
2869        }
2870        let d = [
2871            world[0] - cam_pos[0],
2872            world[1] - cam_pos[1],
2873            world[2] - cam_pos[2],
2874        ];
2875        let cz = forward[0] * d[0] + forward[1] * d[1] + forward[2] * d[2];
2876        if cz < LINE_NEAR_Z {
2877            return None;
2878        }
2879        let cx = right[0] * d[0] + right[1] * d[1] + right[2] * d[2];
2880        let cy = down[0] * d[0] + down[1] * d[1] + down[2] * d[2];
2881        let half_h = (self.last_fov_y_rad * 0.5).tan();
2882        let half_w = half_h * (w as f32 / h as f32);
2883        let ndc_x = (cx / cz) / half_w;
2884        let ndc_y = -(cy / cz) / half_h;
2885        let sx = (ndc_x * 0.5 + 0.5) * w as f32;
2886        let sy = (0.5 - ndc_y * 0.5) * h as f32;
2887        Some((sx, sy))
2888    }
2889
2890    /// Overlay an `egui` UI on the pending frame, then present it
2891    /// (`hud` feature). `jobs` are the host's tessellated primitives
2892    /// (`egui::Context::tessellate`), `textures` the per-frame texture
2893    /// delta from `egui::FullOutput`, `pixels_per_point` the UI scale.
2894    ///
2895    /// Draws with `LoadOp::Load` over the marcher's frame (a separate
2896    /// encoder submitted after the scene's), so the UI composites on top
2897    /// of the world. No-op if no frame is pending.
2898    #[cfg(feature = "hud")]
2899    pub fn paint_egui(
2900        &mut self,
2901        jobs: &[egui::ClippedPrimitive],
2902        textures: &egui::TexturesDelta,
2903        pixels_per_point: f32,
2904    ) {
2905        let Some((surf_tex, surf_view)) = self.pending_frame.take() else {
2906            return;
2907        };
2908        let format = self.surface_config.format;
2909        let egui_rend = self.egui_renderer.get_or_insert_with(|| {
2910            egui_wgpu::Renderer::new(
2911                &self.device,
2912                format,
2913                egui_wgpu::RendererOptions {
2914                    msaa_samples: 1,
2915                    depth_stencil_format: None,
2916                    dithering: false,
2917                    ..Default::default()
2918                },
2919            )
2920        });
2921
2922        let screen = egui_wgpu::ScreenDescriptor {
2923            size_in_pixels: [self.surface_config.width, self.surface_config.height],
2924            pixels_per_point,
2925        };
2926        for (id, delta) in &textures.set {
2927            egui_rend.update_texture(&self.device, &self.queue, *id, delta);
2928        }
2929        let mut encoder = self
2930            .device
2931            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
2932                label: Some("roxlap-gpu egui"),
2933            });
2934        let user_bufs =
2935            egui_rend.update_buffers(&self.device, &self.queue, &mut encoder, jobs, &screen);
2936        {
2937            // `LoadOp::Load` keeps the marcher's frame; egui draws over it.
2938            let mut pass = encoder
2939                .begin_render_pass(&wgpu::RenderPassDescriptor {
2940                    label: Some("roxlap-gpu egui paint"),
2941                    color_attachments: &[Some(wgpu::RenderPassColorAttachment {
2942                        view: &surf_view,
2943                        depth_slice: None,
2944                        resolve_target: None,
2945                        ops: wgpu::Operations {
2946                            load: wgpu::LoadOp::Load,
2947                            store: wgpu::StoreOp::Store,
2948                        },
2949                    })],
2950                    depth_stencil_attachment: None,
2951                    timestamp_writes: None,
2952                    occlusion_query_set: None,
2953                    multiview_mask: None,
2954                })
2955                // egui-wgpu 0.29 requires a `'static` pass (see its docs).
2956                .forget_lifetime();
2957            egui_rend.render(&mut pass, jobs, &screen);
2958        }
2959        for id in &textures.free {
2960            egui_rend.free_texture(id);
2961        }
2962        self.queue.submit(
2963            user_bufs
2964                .into_iter()
2965                .chain(std::iter::once(encoder.finish())),
2966        );
2967        surf_tex.present();
2968    }
2969
2970    fn build_scene_dda(
2971        &self,
2972        width: u32,
2973        height: u32,
2974        surface_format: wgpu::TextureFormat,
2975    ) -> SceneDdaResources {
2976        // Framebuffer as a packed-`rgba8unorm` storage buffer (1 u32 per
2977        // pixel, row stride = `width`). See the struct-field note.
2978        let framebuffer = self.device.create_buffer(&wgpu::BufferDescriptor {
2979            label: Some("roxlap-gpu scene_dda.framebuffer"),
2980            size: u64::from(width) * u64::from(height) * 4,
2981            usage: wgpu::BufferUsages::STORAGE,
2982            mapped_at_creation: false,
2983        });
2984        // Screen size + flip flag for the blit's pixel→index math
2985        // (`vec2<u32>` size, then `flip_x` + pad). Re-written per frame in
2986        // `render_scene` so a flip toggle takes effect without a resize.
2987        let blit_dims = self.device.create_buffer(&wgpu::BufferDescriptor {
2988            label: Some("roxlap-gpu scene_dda.blit_dims"),
2989            size: 16,
2990            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
2991            mapped_at_creation: false,
2992        });
2993        self.queue.write_buffer(
2994            &blit_dims,
2995            0,
2996            bytemuck::bytes_of(&[width, height, u32::from(self.flip_x), 0u32]),
2997        );
2998
2999        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
3000            label: Some("roxlap-gpu scene_dda.uniform"),
3001            size: std::mem::size_of::<SceneDdaUniform>() as u64,
3002            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
3003            mapped_at_creation: false,
3004        });
3005
3006        // GPU.9 — per-pixel world-t depth (f32 bits as u32). Sized to
3007        // the storage texture; written by the scene pass when sprites
3008        // are active, read+tested by the sprite splatter.
3009        let depth_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
3010            label: Some("roxlap-gpu scene_dda.depth"),
3011            size: u64::from(width) * u64::from(height) * 4,
3012            // COPY_SRC so `read_depth_pixel` can stage it for picking.
3013            usage: wgpu::BufferUsages::STORAGE
3014                | wgpu::BufferUsages::COPY_DST
3015                | wgpu::BufferUsages::COPY_SRC,
3016            mapped_at_creation: false,
3017        });
3018        let depth_readback = self.device.create_buffer(&wgpu::BufferDescriptor {
3019            label: Some("roxlap-gpu scene_dda.depth_readback"),
3020            size: u64::from(width) * u64::from(height) * 4,
3021            usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
3022            mapped_at_creation: false,
3023        });
3024        let dda_shader = self
3025            .device
3026            .create_shader_module(wgpu::ShaderModuleDescriptor {
3027                label: Some("scene_dda.wgsl"),
3028                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/scene_dda.wgsl").into()),
3029            });
3030        let bgl_dda = self
3031            .device
3032            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
3033                label: Some("roxlap-gpu scene_dda.bgl"),
3034                entries: &[
3035                    bgl_uniform_entry(0),
3036                    bgl_storage_entry(1, true),
3037                    bgl_storage_entry(2, true),
3038                    bgl_storage_entry(3, true),
3039                    bgl_storage_entry(4, true),
3040                    bgl_storage_entry(5, true),
3041                    bgl_storage_entry(6, true),
3042                    bgl_storage_entry(7, true),
3043                    // Framebuffer storage buffer (read-write; the scene +
3044                    // sprite passes write packed pixels into it).
3045                    bgl_storage_entry(8, false),
3046                    // GPU.8 sky panorama + sampler.
3047                    wgpu::BindGroupLayoutEntry {
3048                        binding: 9,
3049                        visibility: wgpu::ShaderStages::COMPUTE,
3050                        ty: wgpu::BindingType::Texture {
3051                            sample_type: wgpu::TextureSampleType::Float { filterable: true },
3052                            view_dimension: wgpu::TextureViewDimension::D2,
3053                            multisampled: false,
3054                        },
3055                        count: None,
3056                    },
3057                    wgpu::BindGroupLayoutEntry {
3058                        binding: 10,
3059                        visibility: wgpu::ShaderStages::COMPUTE,
3060                        ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
3061                        count: None,
3062                    },
3063                    // GPU.9 — read-write per-pixel depth buffer.
3064                    bgl_storage_entry(11, false),
3065                    // Occupancy pages 1..MAX_OCC_PAGES (page 0 is
3066                    // binding 1). Unused pages bind a dummy buffer.
3067                    bgl_storage_entry(12, true),
3068                    bgl_storage_entry(13, true),
3069                    bgl_storage_entry(14, true),
3070                    // Per-grid cameras (runtime-sized; one per grid).
3071                    bgl_storage_entry(15, true),
3072                ],
3073            });
3074        let dda_pl = self
3075            .device
3076            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
3077                label: Some("roxlap-gpu scene_dda.layout"),
3078                bind_group_layouts: &[Some(&bgl_dda)],
3079                immediate_size: 0,
3080            });
3081        let pipeline_dda = self
3082            .device
3083            .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
3084                label: Some("roxlap-gpu scene_dda.pipeline"),
3085                layout: Some(&dda_pl),
3086                module: &dda_shader,
3087                entry_point: Some("render_scene"),
3088                compilation_options: wgpu::PipelineCompilationOptions::default(),
3089                cache: None,
3090            });
3091
3092        let blit_shader = self
3093            .device
3094            .create_shader_module(wgpu::ShaderModuleDescriptor {
3095                label: Some("scene_blit.wgsl"),
3096                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/scene_blit.wgsl").into()),
3097            });
3098        let bgl_blit = self
3099            .device
3100            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
3101                label: Some("roxlap-gpu scene_dda.blit_bgl"),
3102                entries: &[
3103                    // Framebuffer storage buffer (read-only in the blit).
3104                    wgpu::BindGroupLayoutEntry {
3105                        binding: 0,
3106                        visibility: wgpu::ShaderStages::FRAGMENT,
3107                        ty: wgpu::BindingType::Buffer {
3108                            ty: wgpu::BufferBindingType::Storage { read_only: true },
3109                            has_dynamic_offset: false,
3110                            min_binding_size: None,
3111                        },
3112                        count: None,
3113                    },
3114                    // Screen-size uniform for the pixel→index math.
3115                    wgpu::BindGroupLayoutEntry {
3116                        binding: 1,
3117                        visibility: wgpu::ShaderStages::FRAGMENT,
3118                        ty: wgpu::BindingType::Buffer {
3119                            ty: wgpu::BufferBindingType::Uniform,
3120                            has_dynamic_offset: false,
3121                            min_binding_size: None,
3122                        },
3123                        count: None,
3124                    },
3125                ],
3126            });
3127        let blit_pl = self
3128            .device
3129            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
3130                label: Some("roxlap-gpu scene_dda.blit_layout"),
3131                bind_group_layouts: &[Some(&bgl_blit)],
3132                immediate_size: 0,
3133            });
3134        let pipeline_blit = self
3135            .device
3136            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
3137                label: Some("roxlap-gpu scene_dda.blit_pipeline"),
3138                layout: Some(&blit_pl),
3139                vertex: wgpu::VertexState {
3140                    module: &blit_shader,
3141                    entry_point: Some("vs_main"),
3142                    compilation_options: wgpu::PipelineCompilationOptions::default(),
3143                    buffers: &[],
3144                },
3145                fragment: Some(wgpu::FragmentState {
3146                    module: &blit_shader,
3147                    entry_point: Some("fs_main"),
3148                    compilation_options: wgpu::PipelineCompilationOptions::default(),
3149                    targets: &[Some(wgpu::ColorTargetState {
3150                        format: surface_format,
3151                        blend: None,
3152                        write_mask: wgpu::ColorWrites::ALL,
3153                    })],
3154                }),
3155                primitive: wgpu::PrimitiveState::default(),
3156                depth_stencil: None,
3157                multisample: wgpu::MultisampleState::default(),
3158                multiview_mask: None,
3159                cache: None,
3160            });
3161        let blit_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
3162            label: Some("roxlap-gpu scene_dda.blit_bg"),
3163            layout: &bgl_blit,
3164            entries: &[
3165                wgpu::BindGroupEntry {
3166                    binding: 0,
3167                    resource: framebuffer.as_entire_binding(),
3168                },
3169                wgpu::BindGroupEntry {
3170                    binding: 1,
3171                    resource: blit_dims.as_entire_binding(),
3172                },
3173            ],
3174        });
3175
3176        SceneDdaResources {
3177            storage_size: (width, height),
3178            framebuffer,
3179            uniform_buf,
3180            bgl_dda,
3181            pipeline_dda,
3182            blit_bg,
3183            pipeline_blit,
3184            blit_dims,
3185            depth_buffer,
3186            depth_readback,
3187        }
3188    }
3189
3190    /// Read back the per-pixel world-t depth at window pixel `(x, y)`
3191    /// from the last rendered frame, for screen→world picking. Returns
3192    /// the distance `t` along the (normalised) view ray to the nearest
3193    /// scene-grid surface, so the host reconstructs the world hit as
3194    /// `cam.pos + t * normalize(ray_dir)`. `None` for out-of-bounds
3195    /// pixels, sky / no-hit (the `T_INF` sentinel), or when no scene
3196    /// frame has been rendered.
3197    ///
3198    /// The depth buffer is the SCENE pass's output (terrain + grids),
3199    /// untouched by the sprite pass (which reads it read-only), so a
3200    /// cursor sprite under the pointer does not occlude the pick.
3201    ///
3202    /// Synchronous: copies the depth buffer to a mapped staging buffer
3203    /// and blocks on `device.poll(Wait)`. Cheap enough for click-time
3204    /// picks; do not call it every frame.
3205    ///
3206    /// Requires the last frame to have written depth, which happens
3207    /// when sprites are present (`write_depth`). The pick demo always
3208    /// has a cursor sprite, so this holds.
3209    ///
3210    /// Compiles on wasm, but the wasm facade never calls it: WebGPU's
3211    /// `device.poll` doesn't block for the GPU, so the blocking
3212    /// `recv()` here would hang the single browser thread. Picking is
3213    /// deferred on the wasm GPU path (the facade returns `None`).
3214    #[must_use]
3215    pub fn read_depth_pixel(&self, x: u32, y: u32) -> Option<f32> {
3216        let dda = self.scene_dda.as_ref()?;
3217        let (w, h) = dda.storage_size;
3218        if x >= w || y >= h {
3219            return None;
3220        }
3221        let mut enc = self
3222            .device
3223            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
3224                label: Some("roxlap-gpu depth readback"),
3225            });
3226        let size = u64::from(w) * u64::from(h) * 4;
3227        enc.copy_buffer_to_buffer(&dda.depth_buffer, 0, &dda.depth_readback, 0, size);
3228        self.queue.submit(std::iter::once(enc.finish()));
3229
3230        let slice = dda.depth_readback.slice(..);
3231        let (tx, rx) = std::sync::mpsc::channel();
3232        slice.map_async(wgpu::MapMode::Read, move |r| {
3233            let _ = tx.send(r);
3234        });
3235        self.device.poll(wgpu::PollType::wait_indefinitely()).ok();
3236        rx.recv().ok()?.ok()?;
3237
3238        let t = {
3239            let data = slice.get_mapped_range();
3240            let idx = ((y * w + x) * 4) as usize;
3241            let bytes: [u8; 4] = data[idx..idx + 4].try_into().ok()?;
3242            f32::from_le_bytes(bytes)
3243        };
3244        dda.depth_readback.unmap();
3245
3246        // Reject sky / no-hit (T_INF == 1e30 in the shader) + non-finite.
3247        if !t.is_finite() || t >= 1.0e29 {
3248            return None;
3249        }
3250        Some(t)
3251    }
3252
3253    /// World-space view-ray direction (un-normalised) for window pixel
3254    /// `(x, y)`, under the GPU marcher's projection — the canonical GPU
3255    /// unproject, mirroring `scene_dda.wgsl`'s `render_scene`
3256    /// (vertical-FOV pinhole). Uses the last-rendered frame's target
3257    /// size + FOV; `None` before the first scene render. Pair with
3258    /// [`Self::read_depth_pixel`] for screen→world picking.
3259    #[must_use]
3260    pub fn pixel_ray(
3261        &self,
3262        right: [f64; 3],
3263        down: [f64; 3],
3264        forward: [f64; 3],
3265        x: f64,
3266        y: f64,
3267    ) -> Option<[f64; 3]> {
3268        let dda = self.scene_dda.as_ref()?;
3269        let (w, h) = dda.storage_size;
3270        if w == 0 || h == 0 || self.last_fov_y_rad <= 0.0 {
3271            return None;
3272        }
3273        Some(pinhole_pixel_ray(
3274            right,
3275            down,
3276            forward,
3277            x,
3278            y,
3279            f64::from(w),
3280            f64::from(h),
3281            f64::from(self.last_fov_y_rad),
3282        ))
3283    }
3284
3285    /// GPU.10.1 — upload a sprite model registry + its instances for
3286    /// the DDA path. An empty instance slice clears all sprites.
3287    pub fn set_sprite_instances(
3288        &mut self,
3289        registry: &sprite_model::SpriteModelRegistry,
3290        instances: &[sprite_model::SpriteInstance],
3291    ) {
3292        if instances.is_empty() {
3293            self.sprite_registry = None;
3294            return;
3295        }
3296        self.sprite_registry = Some(sprite_model::SpriteRegistryResident::upload(
3297            &self.device,
3298            registry,
3299            instances,
3300        ));
3301    }
3302
3303    /// Incrementally append sprite instances **without** rebuilding the
3304    /// registry — the cheap streaming-spawn path (asteroids, projectiles).
3305    /// Returns the index of the first appended instance (`[base, base+N)`).
3306    ///
3307    /// Every appended instance must reference a model already registered
3308    /// by the [`Self::set_sprite_instances`] that established residency
3309    /// (model volumes are not re-uploaded here — build the full
3310    /// `SpriteModelRegistry` up front and seed it once, then stream
3311    /// instances). If no registry is resident yet, this performs the
3312    /// initial full upload and returns `0`.
3313    ///
3314    /// Cost is amortised O(1) per instance (the GPU instance buffer grows
3315    /// by powers of two), versus the full volume + buffer rebuild of
3316    /// [`Self::set_sprite_instances`].
3317    pub fn append_sprite_instances(
3318        &mut self,
3319        registry: &sprite_model::SpriteModelRegistry,
3320        instances: &[sprite_model::SpriteInstance],
3321    ) -> u32 {
3322        match self.sprite_registry.as_mut() {
3323            Some(reg) => reg.append_instances(&self.device, registry, instances),
3324            None => {
3325                self.set_sprite_instances(registry, instances);
3326                0
3327            }
3328        }
3329    }
3330
3331    /// Remove the sprite instance at `index` (swap-remove, O(1), no model
3332    /// re-upload). Returns `Some(old_last)` if a different instance was
3333    /// moved into `index` to fill the hole — its index changed from
3334    /// `old_last` to `index`, so a caller tracking instance handles must
3335    /// update that one. Returns `None` if `index` was the last element /
3336    /// out of range, or no registry is resident.
3337    pub fn remove_sprite_instance(&mut self, index: usize) -> Option<usize> {
3338        self.sprite_registry
3339            .as_mut()
3340            .and_then(|reg| reg.remove_instance(index))
3341    }
3342
3343    /// Incrementally add a new model (its full LOD chain) to the resident
3344    /// sprite registry **without** re-uploading the existing models — the
3345    /// counterpart to [`Self::append_sprite_instances`] for streaming in
3346    /// new geometry (unique asteroids, generated meshes).
3347    ///
3348    /// Usage mirrors `update_sprite_model`: you own the
3349    /// [`SpriteModelRegistry`](sprite_model::SpriteModelRegistry), append
3350    /// the model with [`add_lod`](sprite_model::SpriteModelRegistry::add_lod)
3351    /// (or `add`), then pass the returned `chain_id` here to sync that one
3352    /// chain to the GPU. Afterwards [`Self::append_sprite_instances`] may
3353    /// reference it.
3354    ///
3355    /// If no registry is resident yet, this performs the initial full
3356    /// upload of `registry` (all its current models, zero instances) to
3357    /// establish residency — so call it for your *first* model; only
3358    /// chains appended *after* residency exists are added incrementally.
3359    ///
3360    /// Cost is amortised O(new model voxels): the shared volume buffers
3361    /// carry slack and bump-append, growing (and rebuilding once from the
3362    /// registry) only on overflow.
3363    /// Flush queued `write_buffer` uploads by submitting an empty command
3364    /// stream. wgpu stages `write_buffer` data and flushes it on the next
3365    /// `Queue::submit`; calling this between batches of uploads (e.g. a
3366    /// flipbook's frames in [`Self::add_sprite_model`]) recycles the device
3367    /// staging pool so a big one-shot batch can't exhaust it (which would
3368    /// then crash egui-wgpu's own `write_buffer`).
3369    pub fn flush_writes(&self) {
3370        self.queue.submit(std::iter::empty::<wgpu::CommandBuffer>());
3371    }
3372
3373    pub fn add_sprite_model(
3374        &mut self,
3375        registry: &sprite_model::SpriteModelRegistry,
3376        chain_id: u32,
3377    ) {
3378        match self.sprite_registry.as_mut() {
3379            Some(reg) => reg.add_model(&self.device, &self.queue, registry, chain_id),
3380            None => {
3381                self.sprite_registry = Some(sprite_model::SpriteRegistryResident::upload(
3382                    &self.device,
3383                    registry,
3384                    &[],
3385                ));
3386            }
3387        }
3388    }
3389
3390    /// Remove a model (tombstone its LOD chain) from the resident sprite
3391    /// registry — the counterpart to [`Self::add_sprite_model`]. Frees its
3392    /// `colors`/`dirs` space for reuse by a later add; the smaller
3393    /// `occupancy`/`color_offsets` holes are reclaimed by
3394    /// [`Self::compact_sprite_models`]. Entry / chain ids stay stable, so
3395    /// other models' `chain_id`s remain valid.
3396    ///
3397    /// Instances of the removed model keep their slots but draw as nothing
3398    /// until the caller drops them via [`Self::remove_sprite_instance`].
3399    /// No-op if `chain_id` is unknown / already removed / no registry.
3400    pub fn remove_sprite_model(&mut self, chain_id: u32) {
3401        if let Some(reg) = self.sprite_registry.as_mut() {
3402            reg.remove_model(chain_id);
3403        }
3404    }
3405
3406    /// Reclaim the holes left by [`Self::remove_sprite_model`] by rebuilding
3407    /// the shared volume buffers from the live models only. `registry` must
3408    /// be the resident one. Cost is O(live volume) — call it when
3409    /// [`Self::dead_sprite_model_count`] is high (e.g. exceeds the live
3410    /// count), not every frame. No-op if no registry is resident.
3411    pub fn compact_sprite_models(&mut self, registry: &sprite_model::SpriteModelRegistry) {
3412        if let Some(reg) = self.sprite_registry.as_mut() {
3413            reg.compact(&self.device, &self.queue, registry);
3414        }
3415    }
3416
3417    /// Number of live (non-removed) sprite models (0 if none uploaded).
3418    #[must_use]
3419    pub fn sprite_model_count(&self) -> usize {
3420        self.sprite_registry
3421            .as_ref()
3422            .map_or(0, sprite_model::SpriteRegistryResident::live_model_count)
3423    }
3424
3425    /// Number of removed-but-not-yet-compacted sprite models — the
3426    /// fragmentation signal for deciding when to call
3427    /// [`Self::compact_sprite_models`].
3428    #[must_use]
3429    pub fn dead_sprite_model_count(&self) -> usize {
3430        self.sprite_registry
3431            .as_ref()
3432            .map_or(0, sprite_model::SpriteRegistryResident::dead_model_count)
3433    }
3434
3435    /// Number of resident sprite instances (0 if none uploaded).
3436    #[must_use]
3437    pub fn sprite_instance_count(&self) -> usize {
3438        self.sprite_registry
3439            .as_ref()
3440            .map_or(0, sprite_model::SpriteRegistryResident::instance_count)
3441    }
3442
3443    /// Re-pose the already-resident sprite instances in place (no model
3444    /// volume re-upload) — the cheap per-frame path for animated KFA
3445    /// limbs. `instances` must match the last [`Self::set_sprite_instances`]
3446    /// in length + order. No-op if no sprite registry is resident.
3447    pub fn update_sprite_instance_transforms(
3448        &mut self,
3449        instances: &[sprite_model::SpriteInstance],
3450    ) {
3451        if let Some(reg) = self.sprite_registry.as_mut() {
3452            reg.update_transforms(instances);
3453        }
3454    }
3455
3456    /// GPU.12 incremental — re-upload only LOD chain `chain_id`'s entries
3457    /// after an in-place edit of `registry` (carve / recolour), without
3458    /// rebuilding the whole sprite registry. `registry` must be the one
3459    /// last passed to [`Self::set_sprite_instances`] with chain
3460    /// `chain_id` already edited. No-op if no registry is resident.
3461    pub fn update_sprite_model(
3462        &mut self,
3463        registry: &sprite_model::SpriteModelRegistry,
3464        chain_id: u32,
3465    ) {
3466        if let Some(reg) = self.sprite_registry.as_mut() {
3467            reg.update_model(&self.device, &self.queue, registry, chain_id);
3468        }
3469    }
3470
3471    /// VCL.2 — repoint sprite instance `index` at LOD chain `chain_id`
3472    /// (the per-frame flipbook step for animated voxel clips). `registry`
3473    /// is the resident one; `chain_id`'s volume must already be uploaded
3474    /// (e.g. a clip's frames registered via [`Self::add_sprite_model`]).
3475    /// CPU-side rewrite picked up by the next frame's cull — no volume
3476    /// re-upload. No-op if no registry is resident.
3477    pub fn set_sprite_instance_model(
3478        &mut self,
3479        registry: &sprite_model::SpriteModelRegistry,
3480        index: usize,
3481        chain_id: u32,
3482    ) {
3483        if let Some(reg) = self.sprite_registry.as_mut() {
3484            reg.set_instance_model(registry, index, chain_id);
3485        }
3486    }
3487
3488    /// Set the per-instance `kv6colmul[256]` lighting tables (voxlap's
3489    /// `update_reflects` output, e.g. via `roxlap_core::sprite::
3490    /// sprite_colmul`), in the same order/length as the last
3491    /// [`Self::set_sprite_instances`]. The GPU sprite pass modulates each
3492    /// voxel by its surface normal's entry — matching the CPU rasteriser.
3493    /// No-op if no sprite registry is resident.
3494    pub fn set_sprite_instance_colmul(&mut self, tables: &[[u64; 256]]) {
3495        if let Some(reg) = self.sprite_registry.as_mut() {
3496            reg.set_instance_colmul(tables);
3497        }
3498    }
3499
3500    /// GPU.10.4 — set the LOD pixel threshold: a sprite steps to the
3501    /// next mip once a mip-0 voxel would project below `px` screen
3502    /// pixels. `1.0` is the natural "no sub-pixel voxels" default;
3503    /// larger values force LOD in closer (useful for inspection).
3504    /// Clamped to ≥ 0.25.
3505    pub fn set_sprite_lod_px(&mut self, px: f32) {
3506        self.sprite_lod_px = px.max(0.25);
3507    }
3508
3509    /// GPU.11.1 — set the scene-grid LOD scan distance (world units).
3510    /// A chunk entered at world-t `t` is marched at mip
3511    /// `floor(log2(max(t, msd) / msd))`, clamped to its grid's mip
3512    /// ladder. `0` disables LOD (always mip-0). Larger values push
3513    /// the coarser mips farther out — the axis-aligned-mip-beams
3514    /// mitigation lever (GPU.11.2). Default 64 (matches CPU
3515    /// `mip_scan_dist`).
3516    pub fn set_scene_mip_scan_dist(&mut self, dist: f32) {
3517        self.scene_mip_scan_dist = dist.max(0.0);
3518    }
3519
3520    /// Set per-face grid side-shading — voxlap's
3521    /// `setsideshades(top, bot, left, right, up, down)`. Each value is
3522    /// subtracted (as a u8, matching the CPU `gcsub` high byte) from a
3523    /// hit voxel's brightness byte before shading, so the scene-DDA pass
3524    /// darkens grid faces the same way the CPU rasteriser does. `[0; 6]`
3525    /// disables it (the default). The hit face is taken from the DDA's
3526    /// last-stepped axis + ray direction.
3527    pub fn set_scene_side_shades(&mut self, s: [i8; 6]) {
3528        // Reinterpret each i8 as u8 (voxlap stamps `sxx` into gcsub's
3529        // high byte verbatim), then pack (top, bot, left, right) /
3530        // (up, down, 0, 0) for the two uniform vec4s.
3531        let v = |i: usize| i32::from(s[i] as u8);
3532        self.scene_side_shades = [[v(0), v(1), v(2), v(3)], [v(4), v(5), 0, 0]];
3533    }
3534
3535    /// GPU.10.1 — build the instanced model-DDA pipeline (one thread
3536    /// per pixel). Lazily invoked the first frame a registry is present.
3537    fn build_sprite_model_dda(&self) -> SpriteModelDdaResources {
3538        let shader = self
3539            .device
3540            .create_shader_module(wgpu::ShaderModuleDescriptor {
3541                label: Some("sprite_model_dda.wgsl"),
3542                source: wgpu::ShaderSource::Wgsl(
3543                    include_str!("../shaders/sprite_model_dda.wgsl").into(),
3544                ),
3545            });
3546        let bgl = self
3547            .device
3548            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
3549                label: Some("roxlap-gpu sprite_model_dda.bgl"),
3550                entries: &[
3551                    bgl_uniform_entry(0),
3552                    bgl_storage_entry(1, true),  // occupancy
3553                    bgl_storage_entry(2, true),  // colors
3554                    bgl_storage_entry(3, true),  // color_offsets
3555                    bgl_storage_entry(4, true),  // model_meta
3556                    bgl_storage_entry(5, true),  // instances
3557                    bgl_storage_entry(6, true),  // scene depth
3558                    bgl_storage_entry(7, false), // framebuffer (read-write buffer)
3559                    bgl_storage_entry(8, true),  // tile_ranges
3560                    bgl_storage_entry(9, true),  // tile_instances
3561                    bgl_storage_entry(10, true), // per-voxel dir
3562                    bgl_storage_entry(11, true), // per-instance kv6colmul
3563                ],
3564            });
3565        let pl = self
3566            .device
3567            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
3568                label: Some("roxlap-gpu sprite_model_dda.layout"),
3569                bind_group_layouts: &[Some(&bgl)],
3570                immediate_size: 0,
3571            });
3572        let pipeline = self
3573            .device
3574            .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
3575                label: Some("roxlap-gpu sprite_model_dda.pipeline"),
3576                layout: Some(&pl),
3577                module: &shader,
3578                entry_point: Some("march"),
3579                compilation_options: wgpu::PipelineCompilationOptions::default(),
3580                cache: None,
3581            });
3582        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
3583            label: Some("roxlap-gpu sprite_model_dda.uniform"),
3584            size: std::mem::size_of::<SpriteModelUniform>() as u64,
3585            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
3586            mapped_at_creation: false,
3587        });
3588        SpriteModelDdaResources {
3589            bgl,
3590            pipeline,
3591            uniform_buf,
3592        }
3593    }
3594}
3595
3596/// GPU.11 — headless scene-DDA renderer for tests + offline visual
3597/// gates. Owns the `scene_dda.wgsl` compute pipeline with no surface
3598/// and no blit pass; renders a [`GpuSceneResident`] to an in-memory
3599/// RGBA framebuffer via texture readback. The per-substage visual
3600/// gate (render reference scenes, diff PPMs) and the GPU.11.1 mip
3601/// render-diff both ride on this.
3602pub struct HeadlessSceneRenderer {
3603    width: u32,
3604    height: u32,
3605    /// Framebuffer storage buffer (packed `rgba8unorm`, tight rows) —
3606    /// matches the buffer-output `scene_dda.wgsl` (see its note).
3607    framebuffer: wgpu::Buffer,
3608    depth_buffer: wgpu::Buffer,
3609    uniform_buf: wgpu::Buffer,
3610    _sky_texture: wgpu::Texture,
3611    sky_view: wgpu::TextureView,
3612    sky_sampler: wgpu::Sampler,
3613    bgl: wgpu::BindGroupLayout,
3614    pipeline: wgpu::ComputePipeline,
3615    readback: wgpu::Buffer,
3616    /// Per-face side-shades for the gate render (default none). Packed
3617    /// `[(top,bot,left,right), (up,down,_,_)]`; set via
3618    /// [`Self::set_side_shades`].
3619    side_shades: [[i32; 4]; 2],
3620}
3621
3622impl HeadlessSceneRenderer {
3623    /// Build the compute pipeline + output/readback resources for a
3624    /// `width × height` framebuffer. Validates `scene_dda.wgsl` and
3625    /// the [`scene::GridStaticMeta`] std430 layout at pipeline /
3626    /// bind-group time.
3627    #[must_use]
3628    pub fn new(device: &wgpu::Device, queue: &wgpu::Queue, width: u32, height: u32) -> Self {
3629        let framebuffer = device.create_buffer(&wgpu::BufferDescriptor {
3630            label: Some("roxlap-gpu headless.framebuffer"),
3631            size: u64::from(width) * u64::from(height) * 4,
3632            usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
3633            mapped_at_creation: false,
3634        });
3635
3636        let uniform_buf = device.create_buffer(&wgpu::BufferDescriptor {
3637            label: Some("roxlap-gpu headless.uniform"),
3638            size: std::mem::size_of::<SceneDdaUniform>() as u64,
3639            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
3640            mapped_at_creation: false,
3641        });
3642        let depth_buffer = device.create_buffer(&wgpu::BufferDescriptor {
3643            label: Some("roxlap-gpu headless.depth"),
3644            size: u64::from(width) * u64::from(height) * 4,
3645            usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
3646            mapped_at_creation: false,
3647        });
3648
3649        let default_sky_pixel = [120u8, 150, 220, 255];
3650        let (sky_texture, sky_view) = create_sky_texture(device, 1, 1, &default_sky_pixel);
3651        // Upload the default sky texel (create_sky_texture only allocates
3652        // — the texel must be written or the shader samples black, which
3653        // is why a grid-less headless render came back black).
3654        queue.write_texture(
3655            wgpu::TexelCopyTextureInfo {
3656                texture: &sky_texture,
3657                mip_level: 0,
3658                origin: wgpu::Origin3d::ZERO,
3659                aspect: wgpu::TextureAspect::All,
3660            },
3661            &default_sky_pixel,
3662            wgpu::TexelCopyBufferLayout {
3663                offset: 0,
3664                bytes_per_row: Some(4),
3665                rows_per_image: Some(1),
3666            },
3667            wgpu::Extent3d {
3668                width: 1,
3669                height: 1,
3670                depth_or_array_layers: 1,
3671            },
3672        );
3673        let sky_sampler = device.create_sampler(&wgpu::SamplerDescriptor {
3674            label: Some("roxlap-gpu headless.sky_sampler"),
3675            address_mode_u: wgpu::AddressMode::Repeat,
3676            address_mode_v: wgpu::AddressMode::Repeat,
3677            mag_filter: wgpu::FilterMode::Linear,
3678            min_filter: wgpu::FilterMode::Linear,
3679            ..Default::default()
3680        });
3681
3682        let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
3683            label: Some("scene_dda.wgsl (headless)"),
3684            source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/scene_dda.wgsl").into()),
3685        });
3686        let bgl = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
3687            label: Some("roxlap-gpu headless.bgl"),
3688            entries: &[
3689                bgl_uniform_entry(0),
3690                bgl_storage_entry(1, true),
3691                bgl_storage_entry(2, true),
3692                bgl_storage_entry(3, true),
3693                bgl_storage_entry(4, true),
3694                bgl_storage_entry(5, true),
3695                bgl_storage_entry(6, true),
3696                bgl_storage_entry(7, true),
3697                // Framebuffer storage buffer (read-write).
3698                bgl_storage_entry(8, false),
3699                wgpu::BindGroupLayoutEntry {
3700                    binding: 9,
3701                    visibility: wgpu::ShaderStages::COMPUTE,
3702                    ty: wgpu::BindingType::Texture {
3703                        sample_type: wgpu::TextureSampleType::Float { filterable: true },
3704                        view_dimension: wgpu::TextureViewDimension::D2,
3705                        multisampled: false,
3706                    },
3707                    count: None,
3708                },
3709                wgpu::BindGroupLayoutEntry {
3710                    binding: 10,
3711                    visibility: wgpu::ShaderStages::COMPUTE,
3712                    ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
3713                    count: None,
3714                },
3715                bgl_storage_entry(11, false),
3716                bgl_storage_entry(12, true),
3717                bgl_storage_entry(13, true),
3718                bgl_storage_entry(14, true),
3719                // Per-grid cameras (runtime-sized; one per grid).
3720                bgl_storage_entry(15, true),
3721            ],
3722        });
3723        let pl = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
3724            label: Some("roxlap-gpu headless.layout"),
3725            bind_group_layouts: &[Some(&bgl)],
3726            immediate_size: 0,
3727        });
3728        let pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
3729            label: Some("roxlap-gpu headless.pipeline"),
3730            layout: Some(&pl),
3731            module: &shader,
3732            entry_point: Some("render_scene"),
3733            compilation_options: wgpu::PipelineCompilationOptions::default(),
3734            cache: None,
3735        });
3736
3737        // Readback is a tight buffer-to-buffer copy (no 256-byte row
3738        // padding, unlike the old texture-to-buffer path).
3739        let readback = device.create_buffer(&wgpu::BufferDescriptor {
3740            label: Some("roxlap-gpu headless.readback"),
3741            size: u64::from(width) * u64::from(height) * 4,
3742            usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
3743            mapped_at_creation: false,
3744        });
3745
3746        Self {
3747            width,
3748            height,
3749            framebuffer,
3750            depth_buffer,
3751            uniform_buf,
3752            _sky_texture: sky_texture,
3753            sky_view,
3754            sky_sampler,
3755            bgl,
3756            pipeline,
3757            readback,
3758            side_shades: [[0; 4]; 2],
3759        }
3760    }
3761
3762    /// Set per-face side-shades for subsequent [`Self::render`] calls —
3763    /// voxlap `setsideshades(top, bot, left, right, up, down)`, each an
3764    /// i8 stamped as u8 (matching the engine path). Lets the gate test
3765    /// the GPU side-shade darkening.
3766    pub fn set_side_shades(&mut self, s: [i8; 6]) {
3767        let v = |i: usize| i32::from(s[i] as u8);
3768        self.side_shades = [[v(0), v(1), v(2), v(3)], [v(4), v(5), 0, 0]];
3769    }
3770
3771    /// Render `scene` from `cameras` (one per grid) and read the
3772    /// framebuffer back as `width*height` packed `0xAABBGGRR` pixels
3773    /// (R in the low byte). Fog is disabled. `mip_scan_dist` drives
3774    /// the GPU.11.1 scene-grid LOD (`0` = always mip-0). Blocks on
3775    /// readback.
3776    ///
3777    /// # Panics
3778    /// If `cameras.len() != scene.grid_count`.
3779    #[must_use]
3780    #[allow(clippy::too_many_arguments)]
3781    pub fn render(
3782        &self,
3783        device: &wgpu::Device,
3784        queue: &wgpu::Queue,
3785        scene: &GpuSceneResident,
3786        cameras: &[Camera],
3787        fov_y_rad: f32,
3788        max_outer_steps: u32,
3789        mip_scan_dist: f32,
3790    ) -> Vec<u32> {
3791        assert_eq!(
3792            cameras.len(),
3793            scene.grid_count as usize,
3794            "headless render: {} cameras for {} grids",
3795            cameras.len(),
3796            scene.grid_count,
3797        );
3798
3799        let cam_vec: Vec<SceneDdaPerGridCamera> = cameras
3800            .iter()
3801            .map(SceneDdaPerGridCamera::from_camera)
3802            .collect();
3803        let grid_cameras = upload_grid_cameras(device, &cam_vec);
3804        let uniform = SceneDdaUniform {
3805            fov_y_rad,
3806            grid_count: scene.grid_count,
3807            max_outer_steps,
3808            _pad0: 0,
3809            screen_size: [self.width, self.height],
3810            _pad1: [0; 2],
3811            // Fog off: near/far past any reachable t → factor 0.
3812            fog_color: [0.0, 0.0, 0.0, 1.0e29],
3813            fog_far: 1.0e30,
3814            write_depth: 0,
3815            occ_page_words: scene.occupancy_page_words,
3816            occ_num_pages: scene.occupancy_num_pages,
3817            mip_scan_dist,
3818            _pad2: 0,
3819            _pad3: 0,
3820            _pad4: 0,
3821            // Sky direction from the first grid camera (the world frame
3822            // in these tests); a default forward camera when there are
3823            // none (grid_count == 0) so the sky lookup stays valid.
3824            sky_cam: SceneDdaPerGridCamera::from_camera(&cameras.first().copied().unwrap_or(
3825                Camera {
3826                    position: [0.0; 3],
3827                    right: [1.0, 0.0, 0.0],
3828                    down: [0.0, 0.0, 1.0],
3829                    forward: [0.0, 1.0, 0.0],
3830                    fov_y_rad,
3831                },
3832            )),
3833            side_shades0: self.side_shades[0],
3834            side_shades1: self.side_shades[1],
3835        };
3836        queue.write_buffer(&self.uniform_buf, 0, bytemuck::bytes_of(&uniform));
3837
3838        let bg = device.create_bind_group(&wgpu::BindGroupDescriptor {
3839            label: Some("roxlap-gpu headless.bg"),
3840            layout: &self.bgl,
3841            entries: &[
3842                wgpu::BindGroupEntry {
3843                    binding: 0,
3844                    resource: self.uniform_buf.as_entire_binding(),
3845                },
3846                wgpu::BindGroupEntry {
3847                    binding: 1,
3848                    resource: scene.occupancy_pages[0].as_entire_binding(),
3849                },
3850                wgpu::BindGroupEntry {
3851                    binding: 2,
3852                    resource: scene.all_color_offsets.as_entire_binding(),
3853                },
3854                wgpu::BindGroupEntry {
3855                    binding: 3,
3856                    resource: scene.all_colors.as_entire_binding(),
3857                },
3858                wgpu::BindGroupEntry {
3859                    binding: 4,
3860                    resource: scene.all_chunk_colors_base.as_entire_binding(),
3861                },
3862                wgpu::BindGroupEntry {
3863                    binding: 5,
3864                    resource: scene.all_chunk_occupancy.as_entire_binding(),
3865                },
3866                wgpu::BindGroupEntry {
3867                    binding: 6,
3868                    resource: scene.grid_static_meta.as_entire_binding(),
3869                },
3870                wgpu::BindGroupEntry {
3871                    binding: 7,
3872                    resource: scene.all_slot_chunk_idx.as_entire_binding(),
3873                },
3874                wgpu::BindGroupEntry {
3875                    binding: 8,
3876                    resource: self.framebuffer.as_entire_binding(),
3877                },
3878                wgpu::BindGroupEntry {
3879                    binding: 9,
3880                    resource: wgpu::BindingResource::TextureView(&self.sky_view),
3881                },
3882                wgpu::BindGroupEntry {
3883                    binding: 10,
3884                    resource: wgpu::BindingResource::Sampler(&self.sky_sampler),
3885                },
3886                wgpu::BindGroupEntry {
3887                    binding: 11,
3888                    resource: self.depth_buffer.as_entire_binding(),
3889                },
3890                wgpu::BindGroupEntry {
3891                    binding: 12,
3892                    resource: scene.occupancy_pages[1].as_entire_binding(),
3893                },
3894                wgpu::BindGroupEntry {
3895                    binding: 13,
3896                    resource: scene.occupancy_pages[2].as_entire_binding(),
3897                },
3898                wgpu::BindGroupEntry {
3899                    binding: 14,
3900                    resource: scene.occupancy_pages[3].as_entire_binding(),
3901                },
3902                wgpu::BindGroupEntry {
3903                    binding: 15,
3904                    resource: grid_cameras.as_entire_binding(),
3905                },
3906            ],
3907        });
3908
3909        let mut enc =
3910            device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
3911        {
3912            let mut pass = enc.begin_compute_pass(&wgpu::ComputePassDescriptor {
3913                label: Some("roxlap-gpu headless.pass"),
3914                timestamp_writes: None,
3915            });
3916            pass.set_pipeline(&self.pipeline);
3917            pass.set_bind_group(0, &bg, &[]);
3918            pass.dispatch_workgroups(self.width.div_ceil(8), self.height.div_ceil(8), 1);
3919        }
3920        enc.copy_buffer_to_buffer(
3921            &self.framebuffer,
3922            0,
3923            &self.readback,
3924            0,
3925            u64::from(self.width) * u64::from(self.height) * 4,
3926        );
3927        queue.submit(Some(enc.finish()));
3928
3929        let slice = self.readback.slice(..);
3930        let (tx, rx) = std::sync::mpsc::channel();
3931        slice.map_async(wgpu::MapMode::Read, move |r| {
3932            let _ = tx.send(r);
3933        });
3934        device.poll(wgpu::PollType::wait_indefinitely()).ok();
3935        rx.recv().expect("map_async channel").expect("map_async");
3936
3937        let data = slice.get_mapped_range();
3938        // Tight `width*height` packed pixels — the shader's
3939        // `pack4x8unorm(vec4(r,g,b,a))` already yields `0xAABBGGRR`
3940        // little-endian, so a straight u32 read reconstructs each pixel.
3941        let out: Vec<u32> = data
3942            .chunks_exact(4)
3943            .map(|px| u32::from_le_bytes([px[0], px[1], px[2], px[3]]))
3944            .collect();
3945        drop(data);
3946        self.readback.unmap();
3947        out
3948    }
3949}
3950
3951fn bgl_uniform_entry(binding: u32) -> wgpu::BindGroupLayoutEntry {
3952    wgpu::BindGroupLayoutEntry {
3953        binding,
3954        visibility: wgpu::ShaderStages::COMPUTE,
3955        ty: wgpu::BindingType::Buffer {
3956            ty: wgpu::BufferBindingType::Uniform,
3957            has_dynamic_offset: false,
3958            min_binding_size: None,
3959        },
3960        count: None,
3961    }
3962}
3963
3964fn bgl_storage_entry(binding: u32, read_only: bool) -> wgpu::BindGroupLayoutEntry {
3965    wgpu::BindGroupLayoutEntry {
3966        binding,
3967        visibility: wgpu::ShaderStages::COMPUTE,
3968        ty: wgpu::BindingType::Buffer {
3969            ty: wgpu::BufferBindingType::Storage { read_only },
3970            has_dynamic_offset: false,
3971            min_binding_size: None,
3972        },
3973        count: None,
3974    }
3975}
3976
3977/// Create a fresh sky panorama texture sized `width × height` with
3978/// the initial pixel data uploaded via `write_texture`. Used by
3979/// `GpuRenderer::new` (1×1 default) and `set_sky_panorama` (host-
3980/// supplied panorama).
3981fn create_sky_texture(
3982    device: &wgpu::Device,
3983    width: u32,
3984    height: u32,
3985    _initial_pixels: &[u8],
3986) -> (wgpu::Texture, wgpu::TextureView) {
3987    let tex = device.create_texture(&wgpu::TextureDescriptor {
3988        label: Some("roxlap-gpu sky_texture"),
3989        size: wgpu::Extent3d {
3990            width,
3991            height,
3992            depth_or_array_layers: 1,
3993        },
3994        mip_level_count: 1,
3995        sample_count: 1,
3996        dimension: wgpu::TextureDimension::D2,
3997        format: wgpu::TextureFormat::Rgba8Unorm,
3998        usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST,
3999        view_formats: &[],
4000    });
4001    let view = tex.create_view(&wgpu::TextureViewDescriptor::default());
4002    (tex, view)
4003}
4004
4005/// GPU.4 needs to upload a whole grid (~hundreds of MiB) as a few
4006/// storage buffers. wgpu's default `max_storage_buffer_binding_size`
4007/// is 128 MiB, which is just enough for the demo's 32×32 ground
4008/// occupancy (~128 MiB) but not the colour array. We request as
4009/// much as the adapter is willing to give — most desktop GPUs cap
4010/// individual storage buffers at 2-4 GiB; iGPUs often offer the
4011/// full system memory.
4012pub(crate) fn pick_required_limits(adapter_limits: &wgpu::Limits) -> wgpu::Limits {
4013    wgpu::Limits {
4014        max_storage_buffer_binding_size: adapter_limits.max_storage_buffer_binding_size,
4015        max_buffer_size: adapter_limits.max_buffer_size,
4016        // Occupancy paging adds up to MAX_OCC_PAGES-1 extra storage
4017        // bindings; with the scene's other buffers + the GPU.9 depth
4018        // buffer the scene_dda stage needs ~11. The default cap is 8.
4019        // Both NVK and lavapipe advertise ≫16, so request 16.
4020        max_storage_buffers_per_shader_stage: adapter_limits
4021            .max_storage_buffers_per_shader_stage
4022            .min(16),
4023        ..wgpu::Limits::default()
4024    }
4025}
4026
4027fn pick_present_mode(modes: &[wgpu::PresentMode]) -> wgpu::PresentMode {
4028    // Prefer Mailbox > Immediate > Fifo. Fifo is the universal
4029    // fallback and the only one Wayland-on-Mesa always offers.
4030    for &m in &[wgpu::PresentMode::Mailbox, wgpu::PresentMode::Immediate] {
4031        if modes.contains(&m) {
4032            return m;
4033        }
4034    }
4035    wgpu::PresentMode::Fifo
4036}
4037
4038/// World-space view-ray direction (un-normalised) for window pixel
4039/// `(x, y)` under a vertical-FOV pinhole — the projection
4040/// `scene_dda.wgsl`'s `render_scene` uses. Shared by
4041/// [`GpuRenderer::pixel_ray`]; standalone so it's unit-testable without
4042/// a device. `right`/`down`/`forward` are the camera basis.
4043#[must_use]
4044#[allow(clippy::too_many_arguments)]
4045pub fn pinhole_pixel_ray(
4046    right: [f64; 3],
4047    down: [f64; 3],
4048    forward: [f64; 3],
4049    x: f64,
4050    y: f64,
4051    w: f64,
4052    h: f64,
4053    fov_y_rad: f64,
4054) -> [f64; 3] {
4055    let half_h = (fov_y_rad * 0.5).tan();
4056    let half_w = half_h * (w / h);
4057    let ndc_x = (x + 0.5) / w * 2.0 - 1.0;
4058    let ndc_y_top = 1.0 - (y + 0.5) / h * 2.0;
4059    let (kx, ky) = (ndc_x * half_w, ndc_y_top * half_h);
4060    [
4061        forward[0] + kx * right[0] - ky * down[0],
4062        forward[1] + kx * right[1] - ky * down[1],
4063        forward[2] + kx * right[2] - ky * down[2],
4064    ]
4065}
4066
4067#[cfg(test)]
4068mod pixel_ray_tests {
4069    use super::pinhole_pixel_ray;
4070
4071    const RIGHT: [f64; 3] = [1.0, 0.0, 0.0];
4072    const DOWN: [f64; 3] = [0.0, 1.0, 0.0];
4073    const FWD: [f64; 3] = [0.0, 0.0, 1.0]; // voxlap z-down "look down"
4074
4075    // Frame centre (NDC 0,0) points straight along `forward`.
4076    #[test]
4077    fn centre_pixel_is_forward() {
4078        let d = pinhole_pixel_ray(
4079            RIGHT,
4080            DOWN,
4081            FWD,
4082            639.5,
4083            359.5,
4084            1280.0,
4085            720.0,
4086            60_f64.to_radians(),
4087        );
4088        assert!(
4089            d[0].abs() < 1e-9 && d[1].abs() < 1e-9,
4090            "centre ≈ forward, got {d:?}"
4091        );
4092        assert!((d[2] - 1.0).abs() < 1e-9);
4093    }
4094
4095    // Right edge pixel tilts +right by tan(hfov/2); the lateral
4096    // component equals half_w = tan(fov_y/2)*aspect at the very edge.
4097    #[test]
4098    fn right_edge_tilts_by_half_w() {
4099        let fov = 60_f64.to_radians();
4100        let d = pinhole_pixel_ray(RIGHT, DOWN, FWD, 1279.5, 359.5, 1280.0, 720.0, fov);
4101        let half_w = (fov * 0.5).tan() * (1280.0 / 720.0);
4102        assert!((d[0] - half_w).abs() < 1e-6, "x={}, half_w={half_w}", d[0]);
4103        assert!(d[0] > 0.0, "right edge tilts +right");
4104    }
4105
4106    /// Statically validate every WGSL shader with naga (the same
4107    /// front-end + validator wgpu runs at pipeline creation), so shader
4108    /// edits — e.g. the GPU.10 sprite lighting bindings — are caught in
4109    /// CI without needing a GPU device.
4110    #[test]
4111    fn wgsl_shaders_validate() {
4112        let shaders: &[(&str, &str)] = &[
4113            (
4114                "sprite_model_dda.wgsl",
4115                include_str!("../shaders/sprite_model_dda.wgsl"),
4116            ),
4117            ("scene_dda.wgsl", include_str!("../shaders/scene_dda.wgsl")),
4118            ("blit.wgsl", include_str!("../shaders/blit.wgsl")),
4119            ("chunk_dda.wgsl", include_str!("../shaders/chunk_dda.wgsl")),
4120            ("grid_dda.wgsl", include_str!("../shaders/grid_dda.wgsl")),
4121            (
4122                "scene_blit.wgsl",
4123                include_str!("../shaders/scene_blit.wgsl"),
4124            ),
4125            ("line.wgsl", include_str!("../shaders/line.wgsl")),
4126            ("image.wgsl", include_str!("../shaders/image.wgsl")),
4127        ];
4128        let mut validator = naga::valid::Validator::new(
4129            naga::valid::ValidationFlags::all(),
4130            naga::valid::Capabilities::all(),
4131        );
4132        for (name, src) in shaders {
4133            let module = naga::front::wgsl::parse_str(src).unwrap_or_else(|e| {
4134                panic!("{name}: WGSL parse failed:\n{}", e.emit_to_string(src))
4135            });
4136            validator
4137                .validate(&module)
4138                .unwrap_or_else(|e| panic!("{name}: WGSL validation failed: {e:?}"));
4139        }
4140    }
4141
4142    /// A 2×2 world quad centred straight ahead projects to vertices whose
4143    /// homogeneous `w` equals the camera-forward distance (so the shader's
4144    /// `clip = ndc·w` recovers perspective-correct UVs) and whose `depth`
4145    /// is the euclidean range. Verifies geometry without a GPU device.
4146    #[test]
4147    fn image_vertices_carry_forward_w_and_euclidean_depth() {
4148        let cam = crate::GpuLineCamera {
4149            pos: [0.0, 0.0, 0.0],
4150            right: [1.0, 0.0, 0.0],
4151            down: [0.0, 1.0, 0.0],
4152            forward: [0.0, 0.0, 1.0],
4153        };
4154        // Quad 10 units ahead (forward = +Z), spanning x∈[-1,1], y∈[-1,1].
4155        let quad = crate::GpuImageQuad {
4156            corners: [
4157                [-1.0, -1.0, 10.0], // TL
4158                [1.0, -1.0, 10.0],  // TR
4159                [-1.0, 1.0, 10.0],  // BL
4160                [1.0, 1.0, 10.0],   // BR
4161            ],
4162            image: 0,
4163            tint: [1.0, 1.0, 1.0, 1.0],
4164            depth_test: true,
4165            alpha_cutoff: 0.0,
4166        };
4167        let verts = crate::build_image_vertices(&cam, &quad, 800, 600, 60_f32.to_radians(), false);
4168        assert_eq!(verts.len(), 6, "two triangles, no near-clip");
4169        for v in &verts {
4170            assert!((v.w - 10.0).abs() < 1e-4, "w == forward distance");
4171            assert!(v.depth >= 10.0, "euclidean depth >= forward distance");
4172            assert_eq!(v.depth_test, 1.0);
4173        }
4174    }
4175}