Skip to main content

roxlap_gpu/
lib.rs

1//! WGPU-backed compute-shader renderer scaffold for the roxlap
2//! voxel engine. GPU.1 in `PORTING-GPU.md`.
3//!
4//! GPU.1's job: stand up the device + surface + swapchain on a
5//! host window (any [`raw-window-handle`](raw_window_handle)
6//! provider), present a clear-to-colour frame each render call,
7//! and give the host a one-call opt-in. No voxel marching yet — the
8//! [`examples/probe.rs`](../examples/probe.rs) standalone holds
9//! the empirical FPS baseline from GPU.0.
10//!
11//! Later sub-substages flesh `GpuRenderer::render` out: GPU.2
12//! uploads voxel data, GPU.3 dispatches the inner-DDA compute
13//! shader, GPU.4 layers in chunk skipping, GPU.5 plugs the renderer
14//! into `roxlap-scene::Scene`, …
15//!
16//! ## Host integration shape (GPU.1)
17//!
18//! ```no_run
19//! use std::sync::Arc;
20//! use roxlap_gpu::{GpuRenderer, GpuRendererSettings};
21//! # use winit::window::Window;
22//! # fn pick(w: Arc<Window>, size: (u32, u32)) -> Option<GpuRenderer> {
23//! match GpuRenderer::new_blocking(w, size, GpuRendererSettings::default()) {
24//!     Ok(r) => Some(r),
25//!     Err(e) => {
26//!         eprintln!("GPU init failed: {e}; falling back to CPU");
27//!         None
28//!     }
29//! }
30//! # }
31//! ```
32
33#![allow(clippy::must_use_candidate, clippy::too_many_lines)]
34
35pub mod camera;
36pub mod decompress;
37pub mod grid;
38// Headless rendering is a native-only test/bench aid: it blocks on
39// `pollster` + `device.poll(Wait)`, neither of which exists on wasm.
40#[cfg(not(target_arch = "wasm32"))]
41pub mod headless;
42pub mod resident;
43pub mod scene;
44pub mod sprite_model;
45
46pub use camera::Camera;
47pub use decompress::{decompress_chunk, ChunkUpload, BEDROCK_RGB, CHUNK_Z};
48pub use grid::{bounding_box_of, GpuGridResident, GridUpload};
49#[cfg(not(target_arch = "wasm32"))]
50pub use headless::HeadlessGpu;
51pub use resident::GpuChunkResident;
52pub use scene::{
53    GpuSceneResident, GridRuntimeTransform, GridStaticMeta, RefreshOutcome, SceneUpload,
54};
55pub use sprite_model::{
56    build_sprite_model, SpriteInstance, SpriteInstanceTransform, SpriteModel, SpriteModelRegistry,
57    SpriteRegistryResident,
58};
59
60use std::sync::Arc;
61
62use bytemuck::{Pod, Zeroable};
63use raw_window_handle::{HasDisplayHandle, HasWindowHandle};
64
65/// Caller-controllable knobs for [`GpuRenderer::new`]. Defaults
66/// target "highest-performance GPU, prefer Mailbox/Immediate over
67/// vsync" — i.e. the same configuration the GPU.0 probe used to
68/// measure the FPS ceiling.
69#[derive(Debug, Clone, Copy)]
70pub struct GpuRendererSettings {
71    pub power_preference: PowerPreference,
72    /// Initial clear colour cycled by GPU.1's empty render path.
73    /// The voxel-rendering substages overwrite this entirely.
74    pub clear_colour: [f64; 3],
75    /// Prefer mailbox/immediate when offered; falls back to FIFO if
76    /// the surface only supports it (Wayland under Mesa often does).
77    pub uncapped_present: bool,
78}
79
80#[derive(Debug, Clone, Copy)]
81pub enum PowerPreference {
82    Low,
83    High,
84}
85
86impl Default for GpuRendererSettings {
87    fn default() -> Self {
88        Self {
89            power_preference: PowerPreference::High,
90            clear_colour: [0.06, 0.08, 0.12],
91            uncapped_present: true,
92        }
93    }
94}
95
96/// Errors `GpuRenderer::new` surfaces to the host. The host's
97/// expected flow is "try this, fall back to the CPU path on Err".
98#[derive(Debug)]
99pub enum GpuInitError {
100    CreateSurface(wgpu::CreateSurfaceError),
101    NoAdapter,
102    RequestDevice(wgpu::RequestDeviceError),
103}
104
105impl std::fmt::Display for GpuInitError {
106    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
107        match self {
108            Self::CreateSurface(e) => write!(f, "create_surface failed: {e}"),
109            Self::NoAdapter => write!(
110                f,
111                "no compatible adapter — does this system have a Vulkan/Metal/DX12 driver?"
112            ),
113            Self::RequestDevice(e) => write!(f, "request_device failed: {e}"),
114        }
115    }
116}
117
118impl std::error::Error for GpuInitError {
119    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
120        match self {
121            Self::CreateSurface(e) => Some(e),
122            Self::RequestDevice(e) => Some(e),
123            Self::NoAdapter => None,
124        }
125    }
126}
127
128impl From<wgpu::CreateSurfaceError> for GpuInitError {
129    fn from(value: wgpu::CreateSurfaceError) -> Self {
130        Self::CreateSurface(value)
131    }
132}
133
134impl From<wgpu::RequestDeviceError> for GpuInitError {
135    fn from(value: wgpu::RequestDeviceError) -> Self {
136        Self::RequestDevice(value)
137    }
138}
139
140/// WGPU-backed renderer. Owns the device, queue, and surface
141/// bound to the host's window. [`Self::render`] is the GPU.1
142/// clear-to-colour path; [`Self::render_chunk`] is GPU.3's
143/// single-chunk DDA marcher.
144///
145/// The window is consumed only at construction — `wgpu`'s
146/// `Surface<'static>` keeps its own `Arc` clone of the handle, so
147/// the renderer holds no window field of its own.
148/// A world-space line segment for [`GpuRenderer::draw_lines_deferred`].
149/// `color` is straight RGBA in `0..=1` (the alpha drives the over-blend);
150/// `width_px` is the screen-space thickness; `depth_test` occludes the
151/// segment behind nearer marched geometry.
152#[derive(Clone, Copy, Debug)]
153pub struct GpuLine {
154    pub a: [f32; 3],
155    pub b: [f32; 3],
156    pub color: [f32; 4],
157    pub width_px: f32,
158    pub depth_test: bool,
159}
160
161/// World camera basis for projecting [`GpuLine`] endpoints — the same
162/// pinhole the scene-DDA pass marches with (`right`/`down`/`forward`
163/// orthonormal, `pos` in world voxel units).
164#[derive(Clone, Copy, Debug)]
165pub struct GpuLineCamera {
166    pub pos: [f32; 3],
167    pub right: [f32; 3],
168    pub down: [f32; 3],
169    pub forward: [f32; 3],
170}
171
172/// Near plane (camera-forward distance) below which a [`GpuLine`] endpoint
173/// is clipped, so the pinhole divide stays finite.
174const LINE_NEAR_Z: f32 = 0.0625;
175/// Depth-test slack (euclidean world distance) so a line resting on the
176/// surface it traces doesn't z-fight the marched geometry.
177const LINE_DEPTH_BIAS: f32 = 0.5;
178
179/// One expanded-quad vertex (`build_line_vertices` output). `pos` is NDC;
180/// `depth` is the euclidean world distance of the source endpoint (the
181/// marcher's `best_t` metric); `depth_test` is `1.0`/`0.0`.
182#[repr(C)]
183#[derive(Clone, Copy, Pod, Zeroable)]
184struct LineVertex {
185    pos: [f32; 2],
186    depth: f32,
187    depth_test: f32,
188    color: [f32; 4],
189}
190
191/// `line.wgsl` fragment uniform (std140; 16 bytes).
192#[repr(C)]
193#[derive(Clone, Copy, Pod, Zeroable)]
194struct LineParams {
195    screen_w: u32,
196    screen_h: u32,
197    depth_bias: f32,
198    no_depth: u32,
199}
200
201/// Lazy-built debug-line pipeline (L3.2). The bind group is rebuilt each
202/// draw (it references the current `scene_dda.depth_buffer`, which the
203/// swapchain resize recreates); the pipeline / layout / uniform persist.
204struct LineResources {
205    pipeline: wgpu::RenderPipeline,
206    bgl: wgpu::BindGroupLayout,
207    uniform_buf: wgpu::Buffer,
208    /// 1-word stand-in bound when no scene depth exists (sprite-only /
209    /// empty scene); `no_depth = 1` keeps the shader from indexing it.
210    dummy_depth: wgpu::Buffer,
211}
212
213/// Project + expand world-space [`GpuLine`]s into screen-space quad
214/// vertices (6 per visible segment) for `line.wgsl`. Mirrors the
215/// scene-DDA pinhole (`forward + ndc_x·half_w·right − ndc_y·half_h·down`)
216/// so lines land on the marched geometry, carrying each endpoint's
217/// euclidean world distance as the depth-test key (= the marcher's
218/// `best_t`). Segments fully behind the near plane are dropped; the rest
219/// are clipped to it.
220fn build_line_vertices(
221    cam: &GpuLineCamera,
222    lines: &[GpuLine],
223    w: u32,
224    h: u32,
225    fov_y: f32,
226) -> Vec<LineVertex> {
227    let aspect = w as f32 / h as f32;
228    let half_h = (fov_y * 0.5).tan();
229    let half_w = half_h * aspect;
230    let (wf, hf) = (w as f32, h as f32);
231
232    let cam_coords = |p: [f32; 3]| -> [f32; 3] {
233        let d = [p[0] - cam.pos[0], p[1] - cam.pos[1], p[2] - cam.pos[2]];
234        [
235            cam.right[0] * d[0] + cam.right[1] * d[1] + cam.right[2] * d[2],
236            cam.down[0] * d[0] + cam.down[1] * d[1] + cam.down[2] * d[2],
237            cam.forward[0] * d[0] + cam.forward[1] * d[1] + cam.forward[2] * d[2],
238        ]
239    };
240    // Camera-space point → (NDC xy, euclidean depth). NDC y is up (+1 top),
241    // matching WebGPU clip space; depth is the marcher's world-t metric.
242    let project = |q: [f32; 3]| -> ([f32; 2], f32) {
243        let inv = 1.0 / q[2];
244        let nx = q[0] * inv / half_w;
245        let ny = -q[1] * inv / half_h;
246        let depth = (q[0] * q[0] + q[1] * q[1] + q[2] * q[2]).sqrt();
247        ([nx, ny], depth)
248    };
249
250    let mut out = Vec::with_capacity(lines.len() * 6);
251    for line in lines {
252        let ca = cam_coords(line.a);
253        let cb = cam_coords(line.b);
254        let (cfa, cfb) = (ca[2], cb[2]);
255        if cfa < LINE_NEAR_Z && cfb < LINE_NEAR_Z {
256            continue;
257        }
258        // Near-clip in segment-parameter space on the forward component.
259        let (mut t0, mut t1) = (0.0f32, 1.0f32);
260        let dz = cfb - cfa;
261        if dz.abs() > f32::EPSILON {
262            let tn = (LINE_NEAR_Z - cfa) / dz;
263            if dz > 0.0 {
264                t0 = t0.max(tn);
265            } else {
266                t1 = t1.min(tn);
267            }
268        }
269        if t0 > t1 {
270            continue;
271        }
272        let lerp3 = |t: f32| {
273            [
274                ca[0] + (cb[0] - ca[0]) * t,
275                ca[1] + (cb[1] - ca[1]) * t,
276                ca[2] + (cb[2] - ca[2]) * t,
277            ]
278        };
279        let (n0, d0) = project(lerp3(t0));
280        let (n1, d1) = project(lerp3(t1));
281
282        // Expand in pixel space for a uniform screen-space thickness.
283        let to_px = |n: [f32; 2]| [(n[0] * 0.5 + 0.5) * wf, (0.5 - n[1] * 0.5) * hf];
284        let to_ndc = |p: [f32; 2]| [p[0] / wf * 2.0 - 1.0, 1.0 - p[1] / hf * 2.0];
285        let p0 = to_px(n0);
286        let p1 = to_px(n1);
287        let (dx, dy) = (p1[0] - p0[0], p1[1] - p0[1]);
288        let len = (dx * dx + dy * dy).sqrt().max(1e-6);
289        let half = line.width_px.max(1.0) * 0.5;
290        let (ex, ey) = (-dy / len * half, dx / len * half);
291
292        let c0a = to_ndc([p0[0] + ex, p0[1] + ey]);
293        let c0b = to_ndc([p0[0] - ex, p0[1] - ey]);
294        let c1a = to_ndc([p1[0] + ex, p1[1] + ey]);
295        let c1b = to_ndc([p1[0] - ex, p1[1] - ey]);
296        let dt = if line.depth_test { 1.0 } else { 0.0 };
297        let vert = |pos: [f32; 2], depth: f32| LineVertex {
298            pos,
299            depth,
300            depth_test: dt,
301            color: line.color,
302        };
303        // Two triangles, cull disabled so winding is irrelevant.
304        out.push(vert(c0a, d0));
305        out.push(vert(c0b, d0));
306        out.push(vert(c1a, d1));
307        out.push(vert(c1a, d1));
308        out.push(vert(c0b, d0));
309        out.push(vert(c1b, d1));
310    }
311    out
312}
313
314/// A world-space 2D image-sprite quad for [`GpuRenderer::draw_images_deferred`].
315/// `corners` are the four world points `TL, TR, BL, BR` (UVs `(0,0) (1,0)
316/// (0,1) (1,1)`); `image` indexes a texture uploaded via
317/// [`GpuRenderer::upload_image`]; `tint` is straight RGBA in `0..=1`
318/// (multiplied into every texel); `depth_test` occludes the quad behind
319/// nearer marched geometry. The facade resolves orientation + back-face
320/// culling, so this is pure geometry.
321#[derive(Clone, Copy, Debug)]
322pub struct GpuImageQuad {
323    pub corners: [[f32; 3]; 4],
324    pub image: usize,
325    pub tint: [f32; 4],
326    pub depth_test: bool,
327}
328
329/// One expanded textured-quad vertex (`build_image_vertices` output).
330/// `ndc` is the projected NDC xy; `w` is the source `forward` depth, fed
331/// back into a homogeneous clip position so the rasterizer interpolates
332/// `uv` perspective-correctly; `depth` is the euclidean world distance
333/// (the marcher's `best_t`) for the manual depth test.
334#[repr(C)]
335#[derive(Clone, Copy, Pod, Zeroable)]
336struct ImageVertex {
337    ndc: [f32; 2],
338    w: f32,
339    depth: f32,
340    depth_test: f32,
341    uv: [f32; 2],
342    tint: [f32; 4],
343}
344
345/// Lazy-built image-sprite pipeline (mirrors [`LineResources`]). The
346/// per-draw bind group adds the quad's texture + a sampler to the line
347/// pass's uniform + scene-depth bindings.
348struct ImageResources {
349    pipeline: wgpu::RenderPipeline,
350    bgl: wgpu::BindGroupLayout,
351    uniform_buf: wgpu::Buffer,
352    dummy_depth: wgpu::Buffer,
353    sampler: wgpu::Sampler,
354}
355
356/// A retained image-sprite texture (uploaded via
357/// [`GpuRenderer::upload_image`], referenced by [`GpuImageQuad::image`]).
358struct ImageResident {
359    view: wgpu::TextureView,
360    // Held so the view stays valid + the texture shows in profiler dumps.
361    _texture: wgpu::Texture,
362}
363
364/// Camera-space textured-quad vertex (near-clip working set): the
365/// `(right, down, forward)` components + the texture `uv`.
366#[derive(Clone, Copy)]
367struct ImgClipV {
368    cam: [f32; 3],
369    uv: [f32; 2],
370}
371
372/// Clip a convex camera-space polygon against the near plane
373/// (`forward >= LINE_NEAR_Z`), interpolating UVs at each crossing.
374fn clip_near_image(poly: &[ImgClipV]) -> Vec<ImgClipV> {
375    let n = poly.len();
376    let mut out: Vec<ImgClipV> = Vec::with_capacity(n + 1);
377    for i in 0..n {
378        let cur = poly[i];
379        let prev = poly[(i + n - 1) % n];
380        let cur_in = cur.cam[2] >= LINE_NEAR_Z;
381        let prev_in = prev.cam[2] >= LINE_NEAR_Z;
382        if cur_in != prev_in {
383            let t = (LINE_NEAR_Z - prev.cam[2]) / (cur.cam[2] - prev.cam[2]);
384            out.push(ImgClipV {
385                cam: [
386                    prev.cam[0] + (cur.cam[0] - prev.cam[0]) * t,
387                    prev.cam[1] + (cur.cam[1] - prev.cam[1]) * t,
388                    LINE_NEAR_Z,
389                ],
390                uv: [
391                    prev.uv[0] + (cur.uv[0] - prev.uv[0]) * t,
392                    prev.uv[1] + (cur.uv[1] - prev.uv[1]) * t,
393                ],
394            });
395        }
396        if cur_in {
397            out.push(cur);
398        }
399    }
400    out
401}
402
403/// Project + near-clip a world-space [`GpuImageQuad`] into perspective-correct
404/// textured-quad vertices for `image.wgsl`. Mirrors the scene-DDA pinhole
405/// (the same one [`build_line_vertices`] uses), carrying each vertex's
406/// euclidean world distance as the depth-test key. Quads fully behind the
407/// near plane produce no vertices.
408fn build_image_vertices(
409    cam: &GpuLineCamera,
410    quad: &GpuImageQuad,
411    w: u32,
412    h: u32,
413    fov_y: f32,
414) -> Vec<ImageVertex> {
415    let aspect = w as f32 / h as f32;
416    let half_h = (fov_y * 0.5).tan();
417    let half_w = half_h * aspect;
418    let dt = if quad.depth_test { 1.0 } else { 0.0 };
419
420    let cam_coords = |p: [f32; 3]| -> [f32; 3] {
421        let d = [p[0] - cam.pos[0], p[1] - cam.pos[1], p[2] - cam.pos[2]];
422        [
423            cam.right[0] * d[0] + cam.right[1] * d[1] + cam.right[2] * d[2],
424            cam.down[0] * d[0] + cam.down[1] * d[1] + cam.down[2] * d[2],
425            cam.forward[0] * d[0] + cam.forward[1] * d[1] + cam.forward[2] * d[2],
426        ]
427    };
428    let project = |v: ImgClipV| -> ImageVertex {
429        let (cx, cy, cz) = (v.cam[0], v.cam[1], v.cam[2]);
430        ImageVertex {
431            ndc: [cx / (cz * half_w), -cy / (cz * half_h)],
432            w: cz,
433            depth: (cx * cx + cy * cy + cz * cz).sqrt(),
434            depth_test: dt,
435            uv: v.uv,
436            tint: quad.tint,
437        }
438    };
439
440    // Per-corner UV: TL(0,0) TR(1,0) BL(0,1) BR(1,1).
441    let uvs = [[0.0, 0.0], [1.0, 0.0], [0.0, 1.0], [1.0, 1.0]];
442    let verts: Vec<ImgClipV> = quad
443        .corners
444        .iter()
445        .zip(uvs)
446        .map(|(c, uv)| ImgClipV {
447            cam: cam_coords(*c),
448            uv,
449        })
450        .collect();
451
452    let mut out = Vec::with_capacity(12);
453    for tri in [[0usize, 1, 2], [1, 3, 2]] {
454        let poly = [verts[tri[0]], verts[tri[1]], verts[tri[2]]];
455        let clipped = clip_near_image(&poly);
456        if clipped.len() < 3 {
457            continue;
458        }
459        for i in 1..clipped.len() - 1 {
460            out.push(project(clipped[0]));
461            out.push(project(clipped[i]));
462            out.push(project(clipped[i + 1]));
463        }
464    }
465    out
466}
467
468pub struct GpuRenderer {
469    surface: wgpu::Surface<'static>,
470    surface_config: wgpu::SurfaceConfiguration,
471    device: wgpu::Device,
472    queue: wgpu::Queue,
473    adapter_info: String,
474    clear_colour: [f64; 3],
475    frame_count: u32,
476    /// Lazy-built on first [`Self::render_chunk`] call; rebuilt when
477    /// the swapchain resizes (storage texture must match).
478    chunk_dda: Option<ChunkDdaResources>,
479    /// Lazy-built on first [`Self::render_grid`] call; same resize
480    /// trigger as `chunk_dda`. The two paths share the same blit
481    /// pipeline structure but bind different storage layouts.
482    grid_dda: Option<GridDdaResources>,
483    /// Lazy-built on first [`Self::render_scene`] call. Holds the
484    /// multi-grid pipeline + per-grid camera uniforms.
485    scene_dda: Option<SceneDdaResources>,
486    /// GPU.8 — panoramic sky texture + sampler. Created at
487    /// `new` as a 1×1 mid-grey default; [`Self::set_sky_panorama`]
488    /// replaces it. The scene-DDA bind group references this each
489    /// frame.
490    sky_texture: wgpu::Texture,
491    sky_view: wgpu::TextureView,
492    sky_sampler: wgpu::Sampler,
493    /// GPU.8 fog state. `color` is BGRA-style premultiplied (each
494    /// channel in [0, 1]); `near` is the world-t distance at which
495    /// fog starts kicking in; `far` is the distance at which it's
496    /// fully opaque. The shader does
497    /// `mix(hit, fog, smoothstep(near, far, t))`.
498    fog_color: [f32; 3],
499    fog_near: f32,
500    fog_far: f32,
501    /// GPU.10 — sprites rendered as DDA-marched voxel models (the
502    /// precise path; the GPU.9 compute splatter it replaced was
503    /// retired in 10.5). Holds the concatenated model registry + the
504    /// per-frame instance array; set via [`Self::set_sprite_instances`].
505    sprite_registry: Option<sprite_model::SpriteRegistryResident>,
506    /// Lazy-built pipeline + uniform for the model-DDA pass.
507    sprite_model_dda: Option<SpriteModelDdaResources>,
508    /// GPU.10.4 — LOD aggressiveness: step a sprite to the next mip
509    /// once a mip-0 voxel projects below this many screen pixels.
510    /// Defaults to 4.0 (the empirical sweet spot); the host can tune
511    /// via [`Self::set_sprite_lod_px`].
512    sprite_lod_px: f32,
513    /// GPU.11.1 — scene-grid LOD scan distance (world units). A chunk
514    /// entered at world-t `t` is marched at the mip level
515    /// `floor(log2(max(t, msd) / msd))`, clamped to the grid's mip
516    /// ladder. `0` disables LOD (always mip-0). Tunable via
517    /// [`Self::set_scene_mip_scan_dist`] — the axis-aligned-mip-beams
518    /// mitigation (GPU.11.2) pushes it outward if banding appears.
519    scene_mip_scan_dist: f32,
520    /// Per-face grid side-shades (voxlap setsideshades), packed for the
521    /// scene-DDA uniform: `[0]=(top,bot,left,right)`, `[1]=(up,down,_,_)`.
522    /// Each is the u8 shade intensity. `[[0;4];2]` = no shading. Set via
523    /// [`Self::set_scene_side_shades`].
524    scene_side_shades: [[i32; 4]; 2],
525    /// Vertical FOV (radians) the last `render_scene` marched with —
526    /// cached so [`Self::pixel_ray`] reconstructs the matching view ray
527    /// for picking. `0` until the first scene render.
528    last_fov_y_rad: f32,
529    /// The acquired-but-not-yet-presented swapchain frame from the most
530    /// recent deferred render ([`Self::render_scene`] /
531    /// [`Self::render_clear_deferred`]). [`Self::present`] shows it as
532    /// is; [`Self::paint_egui`] overlays egui first. Lets a host slot a
533    /// UI pass between the marcher and present. `None` between present
534    /// and the next render.
535    pending_frame: Option<(wgpu::SurfaceTexture, wgpu::TextureView)>,
536    /// Lazy-built debug-line pipeline (L3.2) — built on the first
537    /// [`Self::draw_lines_deferred`] call.
538    line_resources: Option<LineResources>,
539    /// Persistent debug-line vertex buffer (L3.3) — grown on demand and
540    /// reused across frames so a per-frame overlay (hundreds of segments)
541    /// costs one `write_buffer`, not a fresh allocation. `line_vbuf_cap`
542    /// is its capacity in bytes.
543    line_vbuf: Option<wgpu::Buffer>,
544    line_vbuf_cap: u64,
545    /// Lazy-built image-sprite pipeline — built on the first
546    /// [`Self::draw_images_deferred`] call.
547    image_resources: Option<ImageResources>,
548    /// Persistent image-sprite vertex buffer, grown on demand and reused
549    /// across frames (like [`Self::line_vbuf`]).
550    image_vbuf: Option<wgpu::Buffer>,
551    image_vbuf_cap: u64,
552    /// Retained image-sprite textures, indexed by the id
553    /// [`Self::upload_image`] returns. A dropped slot is `None` and is
554    /// re-used by a later upload.
555    images: Vec<Option<ImageResident>>,
556    /// Lazy-built `egui-wgpu` paint pipeline; created on the first
557    /// [`Self::paint_egui`] call (`hud` feature).
558    #[cfg(feature = "hud")]
559    egui_renderer: Option<egui_wgpu::Renderer>,
560}
561
562/// Per-renderer chunk-DDA pipeline state. The compute shader writes
563/// into the storage texture; a fullscreen-triangle render pass
564/// nearest-neighbour blits it to the swapchain.
565struct ChunkDdaResources {
566    storage_size: (u32, u32),
567    storage_view: wgpu::TextureView,
568    uniform_buf: wgpu::Buffer,
569    bgl_dda: wgpu::BindGroupLayout,
570    pipeline_dda: wgpu::ComputePipeline,
571    blit_bg: wgpu::BindGroup,
572    pipeline_blit: wgpu::RenderPipeline,
573    // wgpu BindGroups internally Arc their resources, but we keep
574    // the handle so the sampler shows up in profiler dumps.
575    _sampler: wgpu::Sampler,
576}
577
578struct GridDdaResources {
579    storage_size: (u32, u32),
580    storage_view: wgpu::TextureView,
581    uniform_buf: wgpu::Buffer,
582    bgl_dda: wgpu::BindGroupLayout,
583    pipeline_dda: wgpu::ComputePipeline,
584    blit_bg: wgpu::BindGroup,
585    pipeline_blit: wgpu::RenderPipeline,
586    _sampler: wgpu::Sampler,
587}
588
589struct SceneDdaResources {
590    storage_size: (u32, u32),
591    /// Framebuffer as a packed-`rgba8unorm` storage **buffer** (row
592    /// stride = width), written by the scene + sprite compute passes
593    /// and read by the blit. A buffer (not a storage texture) dodges
594    /// Chrome-Dawn's tiled write-texture layout (which produced a
595    /// 128×256-tiled image); linear + explicit stride is portable.
596    framebuffer: wgpu::Buffer,
597    uniform_buf: wgpu::Buffer,
598    bgl_dda: wgpu::BindGroupLayout,
599    pipeline_dda: wgpu::ComputePipeline,
600    blit_bg: wgpu::BindGroup,
601    pipeline_blit: wgpu::RenderPipeline,
602    /// GPU.9 — per-pixel world-t depth (f32 bits as u32), sized
603    /// `width * height * 4`. The scene pass writes it when sprites
604    /// are present; the sprite model-DDA pass reads + composites
605    /// against it.
606    depth_buffer: wgpu::Buffer,
607    /// Picking — a `COPY_DST | MAP_READ` staging copy of `depth_buffer`
608    /// so the host can read back the per-pixel world-t after a frame
609    /// (e.g. click → which voxel). Same size as `depth_buffer`.
610    depth_readback: wgpu::Buffer,
611}
612
613/// GPU.10.0 — single-sprite model-DDA pipeline: one thread per pixel
614/// marches the model voxel volume and composites against the scene
615/// depth buffer.
616struct SpriteModelDdaResources {
617    bgl: wgpu::BindGroupLayout,
618    pipeline: wgpu::ComputePipeline,
619    uniform_buf: wgpu::Buffer,
620}
621
622/// Per-frame uniform for the model-DDA pass. Mirrors `Uniform` in
623/// `sprite_model_dda.wgsl` (std140). Per-model + per-instance data
624/// now live in storage buffers; this holds only the camera, fog, and
625/// instance count.
626#[repr(C)]
627#[derive(Clone, Copy, Pod, Zeroable)]
628struct SpriteModelUniform {
629    cam_pos: [f32; 3],
630    _p0: f32,
631    cam_right: [f32; 3],
632    _p1: f32,
633    cam_down: [f32; 3],
634    _p2: f32,
635    cam_forward: [f32; 3],
636    _p3: f32,
637    fog_color: [f32; 4],
638    screen_size: [u32; 2],
639    instance_count: u32,
640    fog_far: f32,
641    fov_y_rad: f32,
642    tiles_x: u32,
643    tile_size: u32,
644    _p6: f32,
645}
646
647/// GPU.10.3 — sprite screen-tile edge in pixels for instance binning.
648const SPRITE_TILE_SIZE: u32 = 16;
649
650/// Build the per-grid camera storage buffer bound at `scene_dda.wgsl`
651/// binding 15 (read-only). One [`SceneDdaPerGridCamera`] per grid; the
652/// shader only indexes `0..grid_count`. An empty scene pads to one
653/// zeroed element (wgpu rejects a zero-sized storage binding). This
654/// replaces the old fixed `[…; 16]` uniform array, so a scene can hold
655/// any number of grids — the only ceiling is the device's storage size.
656fn upload_grid_cameras(device: &wgpu::Device, cams: &[SceneDdaPerGridCamera]) -> wgpu::Buffer {
657    use wgpu::util::DeviceExt;
658    let one = [SceneDdaPerGridCamera::zeroed()];
659    let src: &[SceneDdaPerGridCamera] = if cams.is_empty() { &one } else { cams };
660    device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
661        label: Some("roxlap-gpu scene_dda.grid_cameras"),
662        contents: bytemuck::cast_slice(src),
663        usage: wgpu::BufferUsages::STORAGE,
664    })
665}
666
667// The scene_dda bind group + layout wire occupancy pages 1..=3 at
668// bindings 12..=14 explicitly; keep that in lockstep with the page
669// count. Bump the bindings (here, in the WGSL, and in the bind
670// group) if MAX_OCC_PAGES changes.
671const _: () = assert!(scene::MAX_OCC_PAGES == 4);
672
673#[repr(C)]
674#[derive(Clone, Copy, Pod, Zeroable)]
675struct SceneDdaPerGridCamera {
676    pos: [f32; 3],
677    _pad0: f32,
678    right: [f32; 3],
679    _pad1: f32,
680    down: [f32; 3],
681    _pad2: f32,
682    forward: [f32; 3],
683    _pad3: f32,
684}
685
686impl SceneDdaPerGridCamera {
687    fn from_camera(c: &Camera) -> Self {
688        Self {
689            pos: c.position,
690            _pad0: 0.0,
691            right: c.right,
692            _pad1: 0.0,
693            down: c.down,
694            _pad2: 0.0,
695            forward: c.forward,
696            _pad3: 0.0,
697        }
698    }
699}
700
701#[repr(C)]
702#[derive(Clone, Copy, Pod, Zeroable)]
703struct SceneDdaUniform {
704    fov_y_rad: f32,
705    grid_count: u32,
706    max_outer_steps: u32,
707    _pad0: u32,
708    screen_size: [u32; 2],
709    _pad1: [u32; 2],
710    /// GPU.8 — `[r, g, b, fog_near]`. The `near` distance is packed
711    /// into the colour's alpha channel to keep std140 alignment
712    /// tidy (a bare `f32` after the `vec4` would force extra pads).
713    fog_color: [f32; 4],
714    fog_far: f32,
715    /// GPU.9 — `1` when the sprite pass is active (scene pass then
716    /// records `best_t` into the depth buffer), `0` otherwise.
717    write_depth: u32,
718    /// Occupancy paging: words per storage page (see
719    /// `scene::split_occupancy_pages`). Only consulted by the shader
720    /// when `occ_num_pages > 1`.
721    occ_page_words: u32,
722    /// Number of real occupancy pages (1 on multi-GiB GPUs → the
723    /// shader takes a branch-free single-page read).
724    occ_num_pages: u32,
725    /// GPU.11.1 — scene-grid LOD scan distance (world units). A chunk
726    /// entered at world-t `t` marches at mip
727    /// `floor(log2(max(t, msd) / msd))`, clamped to the grid's mip
728    /// count. `0` disables LOD (always mip-0).
729    mip_scan_dist: f32,
730    _pad2: u32,
731    _pad3: u32,
732    _pad4: u32,
733    /// World camera used only to derive the per-pixel sky direction —
734    /// always valid, so a `grid_count == 0` (sprite-only / empty) scene
735    /// still paints a proper sky instead of a degenerate `(0,0,1)`
736    /// (whose `atan2(0,0)` sky lookup samples black).
737    sky_cam: SceneDdaPerGridCamera,
738    /// Per-face side-shade intensities (voxlap setsideshades), each the
739    /// u8 shade subtracted from a voxel's brightness byte at a hit.
740    /// `side_shades0 = (top, bot, left, right)`,
741    /// `side_shades1 = (up, down, _, _)`. All-zero = no shading.
742    side_shades0: [i32; 4],
743    side_shades1: [i32; 4],
744}
745
746#[repr(C)]
747#[derive(Clone, Copy, Pod, Zeroable)]
748struct GridDdaUniform {
749    camera_pos: [f32; 3],
750    _pad0: f32,
751    camera_right: [f32; 3],
752    _pad1: f32,
753    camera_down: [f32; 3],
754    _pad2: f32,
755    camera_forward: [f32; 3],
756    fov_y_rad: f32,
757    screen_size: [u32; 2],
758    vsid: u32,
759    max_outer_steps: u32,
760    chunks_dims: [u32; 3],
761    _pad3: u32,
762    origin_chunk: [i32; 3],
763    _pad4: u32,
764}
765
766#[repr(C)]
767#[derive(Clone, Copy, Pod, Zeroable)]
768struct ChunkDdaUniform {
769    camera_pos: [f32; 3],
770    _pad0: f32,
771    camera_right: [f32; 3],
772    _pad1: f32,
773    camera_down: [f32; 3],
774    _pad2: f32,
775    camera_forward: [f32; 3],
776    fov_y_rad: f32,
777    screen_size: [u32; 2],
778    vsid: u32,
779    max_scan_dist: u32,
780}
781
782impl GpuRenderer {
783    /// Stand up the device + surface + swapchain on `window`. Async
784    /// because `wgpu::Adapter`/`Device` requests are.
785    ///
786    /// `window` is any [`raw-window-handle`] provider (winit, SDL,
787    /// GLFW, …) wrapped in an `Arc`; `size` is its initial physical
788    /// framebuffer size in pixels — passed explicitly so the renderer
789    /// stays decoupled from any one windowing library's size API.
790    ///
791    /// [`raw-window-handle`]: raw_window_handle
792    ///
793    /// # Errors
794    /// Returns [`GpuInitError`] if surface creation, adapter
795    /// selection, or device request fails. Hosts treat any error as
796    /// "fall back to the CPU path".
797    pub async fn new<W>(
798        window: Arc<W>,
799        size: (u32, u32),
800        settings: GpuRendererSettings,
801    ) -> Result<Self, GpuInitError>
802    where
803        W: HasWindowHandle + HasDisplayHandle + Send + Sync + 'static,
804    {
805        let instance = wgpu::Instance::new(wgpu::InstanceDescriptor::new_without_display_handle());
806        let surface = instance.create_surface(window.clone())?;
807        let adapter = Self::request_adapter(&instance, Some(&surface), settings).await?;
808        let (device, queue) = Self::request_device(&adapter).await?;
809        Ok(Self::finish_init(
810            &adapter, device, queue, surface, size, settings,
811        ))
812    }
813
814    /// wasm/WebGPU: build the renderer against an HTML `canvas`. No
815    /// `Send + Sync` bound — wgpu's surface/device/queue are `!Send` on
816    /// the `+atomics` shared-memory wasm build, and the browser host is
817    /// single-threaded (`Rc<RefCell<…>>`). The native generic-`W` entry
818    /// (which carries the bound) isn't reachable on wasm.
819    ///
820    /// Probes for an adapter **before** `create_surface`: on wasm,
821    /// creating the surface calls `canvas.getContext("webgpu")`, which
822    /// permanently locks the canvas's context type. If we bound it and
823    /// then found no adapter, a CPU/WebGL2 fallback on the *same* canvas
824    /// (the facade clones the handle, but it's the same DOM element)
825    /// would fail with "no webgl2 context". Probing first leaves the
826    /// canvas pristine when WebGPU is unavailable.
827    ///
828    /// # Errors
829    /// See [`Self::new`].
830    #[cfg(target_arch = "wasm32")]
831    pub async fn new_from_canvas(
832        canvas: web_sys::HtmlCanvasElement,
833        size: (u32, u32),
834        settings: GpuRendererSettings,
835    ) -> Result<Self, GpuInitError> {
836        let instance = wgpu::Instance::new(wgpu::InstanceDescriptor::new_without_display_handle());
837        // Probe adapter AND device before binding the canvas — both
838        // `requestAdapter` and `requestDevice` can fail on wasm, and
839        // `create_surface` permanently locks the canvas to a WebGPU
840        // context. Creating the surface last keeps the canvas pristine
841        // for the CPU/WebGL2 fallback on any GPU-init failure.
842        let adapter = Self::request_adapter(&instance, None, settings).await?;
843        let (device, queue) = Self::request_device(&adapter).await?;
844        let surface = instance.create_surface(wgpu::SurfaceTarget::Canvas(canvas))?;
845        Ok(Self::finish_init(
846            &adapter, device, queue, surface, size, settings,
847        ))
848    }
849
850    /// Pick a GPU adapter at the settings' power preference. `None`
851    /// `compatible_surface` is used on the wasm canvas path so the probe
852    /// doesn't bind the canvas's context (see [`Self::new_from_canvas`]);
853    /// WebGPU exposes a single surface-independent adapter, so this is
854    /// safe there.
855    async fn request_adapter(
856        instance: &wgpu::Instance,
857        compatible_surface: Option<&wgpu::Surface<'static>>,
858        settings: GpuRendererSettings,
859    ) -> Result<wgpu::Adapter, GpuInitError> {
860        let power_preference = match settings.power_preference {
861            PowerPreference::Low => wgpu::PowerPreference::LowPower,
862            PowerPreference::High => wgpu::PowerPreference::HighPerformance,
863        };
864        instance
865            .request_adapter(&wgpu::RequestAdapterOptions {
866                power_preference,
867                compatible_surface,
868                force_fallback_adapter: false,
869            })
870            .await
871            .map_err(|_| GpuInitError::NoAdapter)
872    }
873
874    /// Request the device + queue from `adapter`. Pulled out of
875    /// [`Self::finish_init`] so the wasm canvas path can validate the
876    /// device **before** `create_surface` binds the canvas's WebGPU
877    /// context — if the device request fails (e.g. a browser that
878    /// rejects a wgpu-sent limit), the canvas stays pristine for the
879    /// CPU/WebGL2 fallback instead of being poisoned.
880    async fn request_device(
881        adapter: &wgpu::Adapter,
882    ) -> Result<(wgpu::Device, wgpu::Queue), GpuInitError> {
883        Ok(adapter
884            .request_device(&wgpu::DeviceDescriptor {
885                label: Some("roxlap-gpu device"),
886                required_features: wgpu::Features::empty(),
887                required_limits: pick_required_limits(&adapter.limits()),
888                experimental_features: wgpu::ExperimentalFeatures::disabled(),
889                memory_hints: wgpu::MemoryHints::default(),
890                trace: wgpu::Trace::Off,
891            })
892            .await?)
893    }
894
895    /// Shared swapchain → sky/sampler setup, run after the adapter +
896    /// device + surface exist (the surface comes from a window handle on
897    /// native, or an HTML canvas on wasm — created last on wasm so a
898    /// failed device request never touches the canvas).
899    fn finish_init(
900        adapter: &wgpu::Adapter,
901        device: wgpu::Device,
902        queue: wgpu::Queue,
903        surface: wgpu::Surface<'static>,
904        size: (u32, u32),
905        settings: GpuRendererSettings,
906    ) -> Self {
907        let info = adapter.get_info();
908        let adapter_info = format!(
909            "{name} ({backend:?}, {device_type:?})",
910            name = info.name,
911            backend = info.backend,
912            device_type = info.device_type,
913        );
914
915        let caps = surface.get_capabilities(adapter);
916        // Pick a NON-sRGB, 8-bit swapchain format. Voxlap colours are
917        // already sRGB-encoded (the slab bytes are display-ready,
918        // matching what the CPU softbuffer path writes straight to the
919        // framebuffer with no conversion); an sRGB swapchain would
920        // re-apply the gamma curve, washing the look out. We also
921        // *prefer 8-bit BGRA/RGBA* over any other non-sRGB format: some
922        // adapters (e.g. NVK) advertise a 16-bit-unorm format first,
923        // and wgpu 29 gates `create_view` on 16-bit-norm formats behind
924        // the `TEXTURE_FORMAT_16BIT_NORM` device feature (which we don't
925        // enable, to stay WebGPU-portable). Falls back to the first
926        // non-sRGB format, then `caps.formats[0]`.
927        let surface_format = caps
928            .formats
929            .iter()
930            .copied()
931            .find(|f| {
932                matches!(
933                    f,
934                    wgpu::TextureFormat::Bgra8Unorm | wgpu::TextureFormat::Rgba8Unorm
935                )
936            })
937            .or_else(|| caps.formats.iter().copied().find(|f| !f.is_srgb()))
938            .unwrap_or(caps.formats[0]);
939        let present_mode = if settings.uncapped_present {
940            pick_present_mode(&caps.present_modes)
941        } else {
942            wgpu::PresentMode::Fifo
943        };
944        // GPU.11.2 — surface the present mode: `Fifo` is vsync-capped
945        // (FPS pinned to refresh rate → compute optimisations like the
946        // mip LOD won't show up in the FPS counter). Mailbox/Immediate
947        // are uncapped. Wayland under Mesa frequently offers only Fifo.
948        eprintln!(
949            "roxlap-gpu: present mode = {present_mode:?} (available: {:?})",
950            caps.present_modes,
951        );
952        let (init_w, init_h) = size;
953        let surface_config = wgpu::SurfaceConfiguration {
954            usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
955            format: surface_format,
956            width: init_w.max(1),
957            height: init_h.max(1),
958            present_mode,
959            alpha_mode: caps.alpha_modes[0],
960            view_formats: vec![],
961            desired_maximum_frame_latency: 2,
962        };
963        surface.configure(&device, &surface_config);
964
965        // GPU.8 default sky: a 1×1 mid-grey texture. Hosts replace
966        // it via `set_sky_panorama` with a real equirectangular
967        // panorama; the default stops the shader sampling
968        // uninitialised memory before that happens.
969        let default_sky_pixel = [0x80u8, 0x80, 0x80, 0xff];
970        let (sky_texture, sky_view) = create_sky_texture(&device, 1, 1, &default_sky_pixel);
971        queue.write_texture(
972            wgpu::TexelCopyTextureInfo {
973                texture: &sky_texture,
974                mip_level: 0,
975                origin: wgpu::Origin3d::ZERO,
976                aspect: wgpu::TextureAspect::All,
977            },
978            &default_sky_pixel,
979            wgpu::TexelCopyBufferLayout {
980                offset: 0,
981                bytes_per_row: Some(4),
982                rows_per_image: Some(1),
983            },
984            wgpu::Extent3d {
985                width: 1,
986                height: 1,
987                depth_or_array_layers: 1,
988            },
989        );
990        let sky_sampler = device.create_sampler(&wgpu::SamplerDescriptor {
991            label: Some("roxlap-gpu sky_sampler"),
992            // Voxlap-convention panorama: u = elevation [0, 1]
993            // (Repeat is a no-op since values don't go outside),
994            // v = azimuth (wraps 360° — Repeat is required).
995            address_mode_u: wgpu::AddressMode::Repeat,
996            address_mode_v: wgpu::AddressMode::Repeat,
997            address_mode_w: wgpu::AddressMode::ClampToEdge,
998            mag_filter: wgpu::FilterMode::Linear,
999            min_filter: wgpu::FilterMode::Linear,
1000            mipmap_filter: wgpu::MipmapFilterMode::Nearest,
1001            ..Default::default()
1002        });
1003
1004        Self {
1005            surface,
1006            surface_config,
1007            device,
1008            queue,
1009            adapter_info,
1010            clear_colour: settings.clear_colour,
1011            frame_count: 0,
1012            chunk_dda: None,
1013            grid_dda: None,
1014            scene_dda: None,
1015            sky_texture,
1016            sky_view,
1017            sky_sampler,
1018            // Fog disabled by default — voxlap's CPU rasterizer
1019            // also runs without fog in the scene-demo, so matching
1020            // it means no GPU fog out of the box. Hosts can opt in
1021            // via `set_fog` (e.g. for atmospheric far-LOD masking).
1022            fog_color: [0.66, 0.74, 0.88],
1023            fog_near: 0.0,
1024            fog_far: 1.0e30,
1025            sprite_registry: None,
1026            sprite_model_dda: None,
1027            // GPU.10.4 — default LOD threshold: step to a coarser mip
1028            // once a voxel projects below 4 px. Empirically the best
1029            // quality/cost tradeoff; the host can override.
1030            sprite_lod_px: 4.0,
1031            // GPU.11.1 — matches the CPU demo's mip_scan_dist=64.
1032            scene_mip_scan_dist: 64.0,
1033            scene_side_shades: [[0; 4]; 2],
1034            last_fov_y_rad: 0.0,
1035            pending_frame: None,
1036            line_resources: None,
1037            line_vbuf: None,
1038            line_vbuf_cap: 0,
1039            image_resources: None,
1040            image_vbuf: None,
1041            image_vbuf_cap: 0,
1042            images: Vec::new(),
1043            #[cfg(feature = "hud")]
1044            egui_renderer: None,
1045        }
1046    }
1047
1048    /// Synchronous wrapper for hosts that don't have an async
1049    /// runtime. Internally `pollster::block_on`s [`Self::new`].
1050    ///
1051    /// # Errors
1052    /// See [`Self::new`].
1053    #[cfg(not(target_arch = "wasm32"))]
1054    pub fn new_blocking<W>(
1055        window: Arc<W>,
1056        size: (u32, u32),
1057        settings: GpuRendererSettings,
1058    ) -> Result<Self, GpuInitError>
1059    where
1060        W: HasWindowHandle + HasDisplayHandle + Send + Sync + 'static,
1061    {
1062        pollster::block_on(Self::new(window, size, settings))
1063    }
1064
1065    /// Human-readable adapter description — name + backend +
1066    /// device type. The demo host prints this in the title bar.
1067    pub fn adapter_info(&self) -> &str {
1068        &self.adapter_info
1069    }
1070
1071    /// Borrow the underlying wgpu device — hosts use this to build
1072    /// chunk uploads (`GpuChunkResident::upload(gpu.device(), …)`).
1073    pub fn device(&self) -> &wgpu::Device {
1074        &self.device
1075    }
1076
1077    /// Borrow the wgpu queue — hosts use this for read-back paths
1078    /// (`GpuChunkResident::read_voxel_blocking(gpu.device(), gpu.queue(), …)`).
1079    pub fn queue(&self) -> &wgpu::Queue {
1080        &self.queue
1081    }
1082
1083    /// GPU.8 — upload an equirectangular panorama as the scene's
1084    /// sky texture. `rgba` is row-major, `width × height` pixels,
1085    /// 4 bytes per pixel (R, G, B, A). The shader samples it with
1086    /// `u = atan2(dir.x, dir.y) / (2π) + 0.5` (azimuth) and
1087    /// `v = acos(-dir.z) / π` (elevation), matching standard
1088    /// equirectangular layout (top of image = zenith for voxlap's
1089    /// `+z = down` basis).
1090    ///
1091    /// # Panics
1092    /// If `rgba.len() != (width * height * 4) as usize`.
1093    pub fn set_sky_panorama(&mut self, rgba: &[u8], width: u32, height: u32) {
1094        assert_eq!(
1095            rgba.len(),
1096            (width as usize) * (height as usize) * 4,
1097            "set_sky_panorama: expected w*h*4 bytes, got {}",
1098            rgba.len(),
1099        );
1100        let (tex, view) = create_sky_texture(&self.device, width, height, rgba);
1101        // Upload pixel data via `queue.write_texture` so we don't
1102        // have to map the buffer manually.
1103        self.queue.write_texture(
1104            wgpu::TexelCopyTextureInfo {
1105                texture: &tex,
1106                mip_level: 0,
1107                origin: wgpu::Origin3d::ZERO,
1108                aspect: wgpu::TextureAspect::All,
1109            },
1110            rgba,
1111            wgpu::TexelCopyBufferLayout {
1112                offset: 0,
1113                bytes_per_row: Some(width * 4),
1114                rows_per_image: Some(height),
1115            },
1116            wgpu::Extent3d {
1117                width,
1118                height,
1119                depth_or_array_layers: 1,
1120            },
1121        );
1122        self.sky_texture = tex;
1123        self.sky_view = view;
1124    }
1125
1126    /// GPU.8 — set the fog blend. `color` is per-channel [0, 1];
1127    /// `near`/`far` are world-space ray distances in voxel units.
1128    /// Hits with `t < near` show their full colour; hits with
1129    /// `t > far` show `color` exclusively; in between is a
1130    /// smoothstep blend.
1131    pub fn set_fog(&mut self, color: [f32; 3], near: f32, far: f32) {
1132        self.fog_color = color;
1133        self.fog_near = near;
1134        self.fog_far = far.max(near + 1.0);
1135    }
1136
1137    /// Re-configure the swapchain to a new physical size. Call from
1138    /// `WindowEvent::Resized`. Drops the chunk-DDA storage texture
1139    /// so [`Self::render_chunk`] rebuilds it at the new size.
1140    pub fn resize(&mut self, width: u32, height: u32) {
1141        if width == 0 || height == 0 {
1142            return;
1143        }
1144        self.surface_config.width = width;
1145        self.surface_config.height = height;
1146        self.surface.configure(&self.device, &self.surface_config);
1147        self.chunk_dda = None;
1148        self.grid_dda = None;
1149        self.scene_dda = None;
1150    }
1151
1152    /// Acquire the next swapchain frame, or `None` to skip this frame.
1153    /// wgpu 29's `get_current_texture` returns a
1154    /// [`wgpu::CurrentSurfaceTexture`] status enum (was
1155    /// `Result<_, SurfaceError>`): an outdated/lost surface reconfigures
1156    /// and skips, transient statuses just skip.
1157    fn acquire_frame(&self) -> Option<wgpu::SurfaceTexture> {
1158        use wgpu::CurrentSurfaceTexture as C;
1159        match self.surface.get_current_texture() {
1160            C::Success(t) | C::Suboptimal(t) => Some(t),
1161            C::Outdated | C::Lost => {
1162                self.surface.configure(&self.device, &self.surface_config);
1163                None
1164            }
1165            C::Timeout | C::Occluded | C::Validation => None,
1166        }
1167    }
1168
1169    /// GPU.1 render: single render pass clearing the swapchain to a
1170    /// slowly drifting colour, then presenting. Voxels arrive in
1171    /// GPU.3+.
1172    pub fn render(&mut self) {
1173        let Some(surf_tex) = self.acquire_frame() else {
1174            return;
1175        };
1176        let view = surf_tex
1177            .texture
1178            .create_view(&wgpu::TextureViewDescriptor::default());
1179
1180        // Slow colour drift so the user can tell the GPU path is
1181        // actually presenting frames vs. e.g. a frozen window.
1182        // Wrap at 2π/0.005 frames (~1257) so the cast stays exact.
1183        let phase = f64::from(self.frame_count % 1257) * 0.005;
1184        let [r, g, b] = self.clear_colour;
1185        let drift = (phase.sin() * 0.04 + 0.04).clamp(0.0, 0.1);
1186        let clear = wgpu::Color {
1187            r: (r + drift).clamp(0.0, 1.0),
1188            g: (g + drift * 0.5).clamp(0.0, 1.0),
1189            b: (b + drift * 0.25).clamp(0.0, 1.0),
1190            a: 1.0,
1191        };
1192
1193        let mut encoder = self
1194            .device
1195            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
1196                label: Some("roxlap-gpu encoder"),
1197            });
1198        {
1199            let _rp = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
1200                label: Some("roxlap-gpu clear"),
1201                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
1202                    view: &view,
1203                    depth_slice: None,
1204                    resolve_target: None,
1205                    ops: wgpu::Operations {
1206                        load: wgpu::LoadOp::Clear(clear),
1207                        store: wgpu::StoreOp::Store,
1208                    },
1209                })],
1210                depth_stencil_attachment: None,
1211                timestamp_writes: None,
1212                occlusion_query_set: None,
1213                multiview_mask: None,
1214            });
1215        }
1216        self.queue.submit(std::iter::once(encoder.finish()));
1217        surf_tex.present();
1218        self.frame_count = self.frame_count.wrapping_add(1);
1219    }
1220
1221    /// GPU.3 single-chunk render. Dispatches `chunk_dda.wgsl`
1222    /// against `resident`'s storage buffers, then blits the
1223    /// low-res storage texture to the swapchain. `camera.position`
1224    /// is in **chunk-local** voxel units (host translates from
1225    /// world coords). `max_scan_dist` caps the per-pixel DDA loop —
1226    /// scene-demo wires `+` / `-` through this each frame.
1227    ///
1228    /// # Panics
1229    /// Internally `expect`s the chunk-DDA resources to be built —
1230    /// they are constructed at the top of this function if missing.
1231    /// Cannot fire in normal control flow.
1232    pub fn render_chunk(
1233        &mut self,
1234        resident: &GpuChunkResident,
1235        camera: &Camera,
1236        max_scan_dist: u32,
1237    ) {
1238        let Some(surf_tex) = self.acquire_frame() else {
1239            return;
1240        };
1241        let surf_view = surf_tex
1242            .texture
1243            .create_view(&wgpu::TextureViewDescriptor::default());
1244
1245        let surface_w = self.surface_config.width;
1246        let surface_h = self.surface_config.height;
1247        let surface_format = self.surface_config.format;
1248
1249        // Lazy-build chunk-DDA resources; rebuild when the swapchain
1250        // grew or shrank.
1251        let needs_build = match &self.chunk_dda {
1252            Some(r) => r.storage_size != (surface_w, surface_h),
1253            None => true,
1254        };
1255        if needs_build {
1256            self.chunk_dda = Some(self.build_chunk_dda(surface_w, surface_h, surface_format));
1257        }
1258        let dda = self.chunk_dda.as_ref().expect("just built");
1259
1260        // Update uniforms.
1261        let uniform = ChunkDdaUniform {
1262            camera_pos: camera.position,
1263            _pad0: 0.0,
1264            camera_right: camera.right,
1265            _pad1: 0.0,
1266            camera_down: camera.down,
1267            _pad2: 0.0,
1268            camera_forward: camera.forward,
1269            fov_y_rad: camera.fov_y_rad,
1270            screen_size: [surface_w, surface_h],
1271            vsid: resident.vsid,
1272            max_scan_dist,
1273        };
1274        self.queue
1275            .write_buffer(&dda.uniform_buf, 0, bytemuck::bytes_of(&uniform));
1276
1277        // Per-frame DDA bind group — references the chunk's buffers
1278        // so we rebuild every frame (the resident can change between
1279        // calls).
1280        let dda_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1281            label: Some("roxlap-gpu chunk_dda.bg"),
1282            layout: &dda.bgl_dda,
1283            entries: &[
1284                wgpu::BindGroupEntry {
1285                    binding: 0,
1286                    resource: dda.uniform_buf.as_entire_binding(),
1287                },
1288                wgpu::BindGroupEntry {
1289                    binding: 1,
1290                    resource: resident.occupancy.as_entire_binding(),
1291                },
1292                wgpu::BindGroupEntry {
1293                    binding: 2,
1294                    resource: resident.color_offsets.as_entire_binding(),
1295                },
1296                wgpu::BindGroupEntry {
1297                    binding: 3,
1298                    resource: resident.colors.as_entire_binding(),
1299                },
1300                wgpu::BindGroupEntry {
1301                    binding: 4,
1302                    resource: wgpu::BindingResource::TextureView(&dda.storage_view),
1303                },
1304            ],
1305        });
1306
1307        let mut encoder = self
1308            .device
1309            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
1310                label: Some("roxlap-gpu chunk encoder"),
1311            });
1312        {
1313            let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
1314                label: Some("roxlap-gpu chunk_dda compute"),
1315                timestamp_writes: None,
1316            });
1317            cpass.set_pipeline(&dda.pipeline_dda);
1318            cpass.set_bind_group(0, &dda_bg, &[]);
1319            cpass.dispatch_workgroups(surface_w.div_ceil(8), surface_h.div_ceil(8), 1);
1320        }
1321        {
1322            let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
1323                label: Some("roxlap-gpu chunk_dda blit"),
1324                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
1325                    view: &surf_view,
1326                    depth_slice: None,
1327                    resolve_target: None,
1328                    ops: wgpu::Operations {
1329                        load: wgpu::LoadOp::Clear(wgpu::Color::BLACK),
1330                        store: wgpu::StoreOp::Store,
1331                    },
1332                })],
1333                depth_stencil_attachment: None,
1334                timestamp_writes: None,
1335                occlusion_query_set: None,
1336                multiview_mask: None,
1337            });
1338            rpass.set_pipeline(&dda.pipeline_blit);
1339            rpass.set_bind_group(0, &dda.blit_bg, &[]);
1340            rpass.draw(0..3, 0..1);
1341        }
1342        self.queue.submit(std::iter::once(encoder.finish()));
1343        surf_tex.present();
1344        self.frame_count = self.frame_count.wrapping_add(1);
1345    }
1346
1347    fn build_chunk_dda(
1348        &self,
1349        width: u32,
1350        height: u32,
1351        surface_format: wgpu::TextureFormat,
1352    ) -> ChunkDdaResources {
1353        let storage_tex = self.device.create_texture(&wgpu::TextureDescriptor {
1354            label: Some("roxlap-gpu chunk_dda.storage"),
1355            size: wgpu::Extent3d {
1356                width,
1357                height,
1358                depth_or_array_layers: 1,
1359            },
1360            mip_level_count: 1,
1361            sample_count: 1,
1362            dimension: wgpu::TextureDimension::D2,
1363            format: wgpu::TextureFormat::Rgba8Unorm,
1364            usage: wgpu::TextureUsages::STORAGE_BINDING | wgpu::TextureUsages::TEXTURE_BINDING,
1365            view_formats: &[],
1366        });
1367        let storage_view = storage_tex.create_view(&wgpu::TextureViewDescriptor::default());
1368
1369        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
1370            label: Some("roxlap-gpu chunk_dda.uniform"),
1371            size: std::mem::size_of::<ChunkDdaUniform>() as u64,
1372            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
1373            mapped_at_creation: false,
1374        });
1375
1376        let dda_shader = self
1377            .device
1378            .create_shader_module(wgpu::ShaderModuleDescriptor {
1379                label: Some("chunk_dda.wgsl"),
1380                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/chunk_dda.wgsl").into()),
1381            });
1382        let bgl_dda = self
1383            .device
1384            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
1385                label: Some("roxlap-gpu chunk_dda.bgl"),
1386                entries: &[
1387                    bgl_uniform_entry(0),
1388                    bgl_storage_entry(1, true),
1389                    bgl_storage_entry(2, true),
1390                    bgl_storage_entry(3, true),
1391                    wgpu::BindGroupLayoutEntry {
1392                        binding: 4,
1393                        visibility: wgpu::ShaderStages::COMPUTE,
1394                        ty: wgpu::BindingType::StorageTexture {
1395                            access: wgpu::StorageTextureAccess::WriteOnly,
1396                            format: wgpu::TextureFormat::Rgba8Unorm,
1397                            view_dimension: wgpu::TextureViewDimension::D2,
1398                        },
1399                        count: None,
1400                    },
1401                ],
1402            });
1403        let dda_pl = self
1404            .device
1405            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
1406                label: Some("roxlap-gpu chunk_dda.layout"),
1407                bind_group_layouts: &[Some(&bgl_dda)],
1408                immediate_size: 0,
1409            });
1410        let pipeline_dda = self
1411            .device
1412            .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
1413                label: Some("roxlap-gpu chunk_dda.pipeline"),
1414                layout: Some(&dda_pl),
1415                module: &dda_shader,
1416                entry_point: Some("render_chunk"),
1417                compilation_options: wgpu::PipelineCompilationOptions::default(),
1418                cache: None,
1419            });
1420
1421        // Fullscreen-triangle blit upscales the storage texture into
1422        // the swapchain. Nearest filter keeps the retro pixel look.
1423        let blit_shader = self
1424            .device
1425            .create_shader_module(wgpu::ShaderModuleDescriptor {
1426                label: Some("blit.wgsl"),
1427                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/blit.wgsl").into()),
1428            });
1429        let bgl_blit = self
1430            .device
1431            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
1432                label: Some("roxlap-gpu chunk_dda.blit_bgl"),
1433                entries: &[
1434                    wgpu::BindGroupLayoutEntry {
1435                        binding: 0,
1436                        visibility: wgpu::ShaderStages::FRAGMENT,
1437                        ty: wgpu::BindingType::Texture {
1438                            sample_type: wgpu::TextureSampleType::Float { filterable: false },
1439                            view_dimension: wgpu::TextureViewDimension::D2,
1440                            multisampled: false,
1441                        },
1442                        count: None,
1443                    },
1444                    wgpu::BindGroupLayoutEntry {
1445                        binding: 1,
1446                        visibility: wgpu::ShaderStages::FRAGMENT,
1447                        ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::NonFiltering),
1448                        count: None,
1449                    },
1450                ],
1451            });
1452        let blit_pl = self
1453            .device
1454            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
1455                label: Some("roxlap-gpu chunk_dda.blit_layout"),
1456                bind_group_layouts: &[Some(&bgl_blit)],
1457                immediate_size: 0,
1458            });
1459        let pipeline_blit = self
1460            .device
1461            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
1462                label: Some("roxlap-gpu chunk_dda.blit_pipeline"),
1463                layout: Some(&blit_pl),
1464                vertex: wgpu::VertexState {
1465                    module: &blit_shader,
1466                    entry_point: Some("vs_main"),
1467                    compilation_options: wgpu::PipelineCompilationOptions::default(),
1468                    buffers: &[],
1469                },
1470                fragment: Some(wgpu::FragmentState {
1471                    module: &blit_shader,
1472                    entry_point: Some("fs_main"),
1473                    compilation_options: wgpu::PipelineCompilationOptions::default(),
1474                    targets: &[Some(wgpu::ColorTargetState {
1475                        format: surface_format,
1476                        blend: None,
1477                        write_mask: wgpu::ColorWrites::ALL,
1478                    })],
1479                }),
1480                primitive: wgpu::PrimitiveState::default(),
1481                depth_stencil: None,
1482                multisample: wgpu::MultisampleState::default(),
1483                multiview_mask: None,
1484                cache: None,
1485            });
1486        let sampler = self.device.create_sampler(&wgpu::SamplerDescriptor {
1487            label: Some("roxlap-gpu chunk_dda.blit_sampler"),
1488            address_mode_u: wgpu::AddressMode::ClampToEdge,
1489            address_mode_v: wgpu::AddressMode::ClampToEdge,
1490            address_mode_w: wgpu::AddressMode::ClampToEdge,
1491            mag_filter: wgpu::FilterMode::Nearest,
1492            min_filter: wgpu::FilterMode::Nearest,
1493            mipmap_filter: wgpu::MipmapFilterMode::Nearest,
1494            ..Default::default()
1495        });
1496        let blit_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1497            label: Some("roxlap-gpu chunk_dda.blit_bg"),
1498            layout: &bgl_blit,
1499            entries: &[
1500                wgpu::BindGroupEntry {
1501                    binding: 0,
1502                    resource: wgpu::BindingResource::TextureView(&storage_view),
1503                },
1504                wgpu::BindGroupEntry {
1505                    binding: 1,
1506                    resource: wgpu::BindingResource::Sampler(&sampler),
1507                },
1508            ],
1509        });
1510
1511        ChunkDdaResources {
1512            storage_size: (width, height),
1513            storage_view,
1514            uniform_buf,
1515            bgl_dda,
1516            pipeline_dda,
1517            blit_bg,
1518            pipeline_blit,
1519            _sampler: sampler,
1520        }
1521    }
1522
1523    /// GPU.4 render — outer DDA over chunk indices + inner DDA into
1524    /// non-empty chunks. `camera.position` is in **grid-local**
1525    /// voxel units. `max_outer_steps` caps how many chunks the
1526    /// outer DDA may traverse per ray (scene-demo wires `+ / -`
1527    /// through this).
1528    ///
1529    /// # Panics
1530    /// Internally `expect`s the grid-DDA resources to be built;
1531    /// they are constructed at the top of this function if missing.
1532    pub fn render_grid(&mut self, grid: &GpuGridResident, camera: &Camera, max_outer_steps: u32) {
1533        let Some(surf_tex) = self.acquire_frame() else {
1534            return;
1535        };
1536        let surf_view = surf_tex
1537            .texture
1538            .create_view(&wgpu::TextureViewDescriptor::default());
1539
1540        let surface_w = self.surface_config.width;
1541        let surface_h = self.surface_config.height;
1542        let surface_format = self.surface_config.format;
1543
1544        let needs_build = match &self.grid_dda {
1545            Some(r) => r.storage_size != (surface_w, surface_h),
1546            None => true,
1547        };
1548        if needs_build {
1549            self.grid_dda = Some(self.build_grid_dda(surface_w, surface_h, surface_format));
1550        }
1551        let dda = self.grid_dda.as_ref().expect("just built");
1552
1553        let uniform = GridDdaUniform {
1554            camera_pos: camera.position,
1555            _pad0: 0.0,
1556            camera_right: camera.right,
1557            _pad1: 0.0,
1558            camera_down: camera.down,
1559            _pad2: 0.0,
1560            camera_forward: camera.forward,
1561            fov_y_rad: camera.fov_y_rad,
1562            screen_size: [surface_w, surface_h],
1563            vsid: grid.vsid,
1564            max_outer_steps,
1565            chunks_dims: grid.chunks_dims,
1566            _pad3: 0,
1567            origin_chunk: grid.origin_chunk,
1568            _pad4: 0,
1569        };
1570        self.queue
1571            .write_buffer(&dda.uniform_buf, 0, bytemuck::bytes_of(&uniform));
1572
1573        let dda_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1574            label: Some("roxlap-gpu grid_dda.bg"),
1575            layout: &dda.bgl_dda,
1576            entries: &[
1577                wgpu::BindGroupEntry {
1578                    binding: 0,
1579                    resource: dda.uniform_buf.as_entire_binding(),
1580                },
1581                wgpu::BindGroupEntry {
1582                    binding: 1,
1583                    resource: grid.occupancy.as_entire_binding(),
1584                },
1585                wgpu::BindGroupEntry {
1586                    binding: 2,
1587                    resource: grid.color_offsets.as_entire_binding(),
1588                },
1589                wgpu::BindGroupEntry {
1590                    binding: 3,
1591                    resource: grid.colors.as_entire_binding(),
1592                },
1593                wgpu::BindGroupEntry {
1594                    binding: 4,
1595                    resource: grid.chunk_colors_base.as_entire_binding(),
1596                },
1597                wgpu::BindGroupEntry {
1598                    binding: 5,
1599                    resource: grid.chunk_occupancy.as_entire_binding(),
1600                },
1601                wgpu::BindGroupEntry {
1602                    binding: 6,
1603                    resource: wgpu::BindingResource::TextureView(&dda.storage_view),
1604                },
1605            ],
1606        });
1607
1608        let mut encoder = self
1609            .device
1610            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
1611                label: Some("roxlap-gpu grid encoder"),
1612            });
1613        {
1614            let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
1615                label: Some("roxlap-gpu grid_dda compute"),
1616                timestamp_writes: None,
1617            });
1618            cpass.set_pipeline(&dda.pipeline_dda);
1619            cpass.set_bind_group(0, &dda_bg, &[]);
1620            cpass.dispatch_workgroups(surface_w.div_ceil(8), surface_h.div_ceil(8), 1);
1621        }
1622        {
1623            let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
1624                label: Some("roxlap-gpu grid_dda blit"),
1625                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
1626                    view: &surf_view,
1627                    depth_slice: None,
1628                    resolve_target: None,
1629                    ops: wgpu::Operations {
1630                        load: wgpu::LoadOp::Clear(wgpu::Color::BLACK),
1631                        store: wgpu::StoreOp::Store,
1632                    },
1633                })],
1634                depth_stencil_attachment: None,
1635                timestamp_writes: None,
1636                occlusion_query_set: None,
1637                multiview_mask: None,
1638            });
1639            rpass.set_pipeline(&dda.pipeline_blit);
1640            rpass.set_bind_group(0, &dda.blit_bg, &[]);
1641            rpass.draw(0..3, 0..1);
1642        }
1643        self.queue.submit(std::iter::once(encoder.finish()));
1644        surf_tex.present();
1645        self.frame_count = self.frame_count.wrapping_add(1);
1646    }
1647
1648    fn build_grid_dda(
1649        &self,
1650        width: u32,
1651        height: u32,
1652        surface_format: wgpu::TextureFormat,
1653    ) -> GridDdaResources {
1654        let storage_tex = self.device.create_texture(&wgpu::TextureDescriptor {
1655            label: Some("roxlap-gpu grid_dda.storage"),
1656            size: wgpu::Extent3d {
1657                width,
1658                height,
1659                depth_or_array_layers: 1,
1660            },
1661            mip_level_count: 1,
1662            sample_count: 1,
1663            dimension: wgpu::TextureDimension::D2,
1664            format: wgpu::TextureFormat::Rgba8Unorm,
1665            usage: wgpu::TextureUsages::STORAGE_BINDING | wgpu::TextureUsages::TEXTURE_BINDING,
1666            view_formats: &[],
1667        });
1668        let storage_view = storage_tex.create_view(&wgpu::TextureViewDescriptor::default());
1669
1670        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
1671            label: Some("roxlap-gpu grid_dda.uniform"),
1672            size: std::mem::size_of::<GridDdaUniform>() as u64,
1673            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
1674            mapped_at_creation: false,
1675        });
1676
1677        let dda_shader = self
1678            .device
1679            .create_shader_module(wgpu::ShaderModuleDescriptor {
1680                label: Some("grid_dda.wgsl"),
1681                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/grid_dda.wgsl").into()),
1682            });
1683        let bgl_dda = self
1684            .device
1685            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
1686                label: Some("roxlap-gpu grid_dda.bgl"),
1687                entries: &[
1688                    bgl_uniform_entry(0),
1689                    bgl_storage_entry(1, true),
1690                    bgl_storage_entry(2, true),
1691                    bgl_storage_entry(3, true),
1692                    bgl_storage_entry(4, true),
1693                    bgl_storage_entry(5, true),
1694                    wgpu::BindGroupLayoutEntry {
1695                        binding: 6,
1696                        visibility: wgpu::ShaderStages::COMPUTE,
1697                        ty: wgpu::BindingType::StorageTexture {
1698                            access: wgpu::StorageTextureAccess::WriteOnly,
1699                            format: wgpu::TextureFormat::Rgba8Unorm,
1700                            view_dimension: wgpu::TextureViewDimension::D2,
1701                        },
1702                        count: None,
1703                    },
1704                ],
1705            });
1706        let dda_pl = self
1707            .device
1708            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
1709                label: Some("roxlap-gpu grid_dda.layout"),
1710                bind_group_layouts: &[Some(&bgl_dda)],
1711                immediate_size: 0,
1712            });
1713        let pipeline_dda = self
1714            .device
1715            .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
1716                label: Some("roxlap-gpu grid_dda.pipeline"),
1717                layout: Some(&dda_pl),
1718                module: &dda_shader,
1719                entry_point: Some("render_grid"),
1720                compilation_options: wgpu::PipelineCompilationOptions::default(),
1721                cache: None,
1722            });
1723
1724        let blit_shader = self
1725            .device
1726            .create_shader_module(wgpu::ShaderModuleDescriptor {
1727                label: Some("blit.wgsl"),
1728                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/blit.wgsl").into()),
1729            });
1730        let bgl_blit = self
1731            .device
1732            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
1733                label: Some("roxlap-gpu grid_dda.blit_bgl"),
1734                entries: &[
1735                    wgpu::BindGroupLayoutEntry {
1736                        binding: 0,
1737                        visibility: wgpu::ShaderStages::FRAGMENT,
1738                        ty: wgpu::BindingType::Texture {
1739                            sample_type: wgpu::TextureSampleType::Float { filterable: false },
1740                            view_dimension: wgpu::TextureViewDimension::D2,
1741                            multisampled: false,
1742                        },
1743                        count: None,
1744                    },
1745                    wgpu::BindGroupLayoutEntry {
1746                        binding: 1,
1747                        visibility: wgpu::ShaderStages::FRAGMENT,
1748                        ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::NonFiltering),
1749                        count: None,
1750                    },
1751                ],
1752            });
1753        let blit_pl = self
1754            .device
1755            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
1756                label: Some("roxlap-gpu grid_dda.blit_layout"),
1757                bind_group_layouts: &[Some(&bgl_blit)],
1758                immediate_size: 0,
1759            });
1760        let pipeline_blit = self
1761            .device
1762            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
1763                label: Some("roxlap-gpu grid_dda.blit_pipeline"),
1764                layout: Some(&blit_pl),
1765                vertex: wgpu::VertexState {
1766                    module: &blit_shader,
1767                    entry_point: Some("vs_main"),
1768                    compilation_options: wgpu::PipelineCompilationOptions::default(),
1769                    buffers: &[],
1770                },
1771                fragment: Some(wgpu::FragmentState {
1772                    module: &blit_shader,
1773                    entry_point: Some("fs_main"),
1774                    compilation_options: wgpu::PipelineCompilationOptions::default(),
1775                    targets: &[Some(wgpu::ColorTargetState {
1776                        format: surface_format,
1777                        blend: None,
1778                        write_mask: wgpu::ColorWrites::ALL,
1779                    })],
1780                }),
1781                primitive: wgpu::PrimitiveState::default(),
1782                depth_stencil: None,
1783                multisample: wgpu::MultisampleState::default(),
1784                multiview_mask: None,
1785                cache: None,
1786            });
1787        let sampler = self.device.create_sampler(&wgpu::SamplerDescriptor {
1788            label: Some("roxlap-gpu grid_dda.blit_sampler"),
1789            address_mode_u: wgpu::AddressMode::ClampToEdge,
1790            address_mode_v: wgpu::AddressMode::ClampToEdge,
1791            address_mode_w: wgpu::AddressMode::ClampToEdge,
1792            mag_filter: wgpu::FilterMode::Nearest,
1793            min_filter: wgpu::FilterMode::Nearest,
1794            mipmap_filter: wgpu::MipmapFilterMode::Nearest,
1795            ..Default::default()
1796        });
1797        let blit_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1798            label: Some("roxlap-gpu grid_dda.blit_bg"),
1799            layout: &bgl_blit,
1800            entries: &[
1801                wgpu::BindGroupEntry {
1802                    binding: 0,
1803                    resource: wgpu::BindingResource::TextureView(&storage_view),
1804                },
1805                wgpu::BindGroupEntry {
1806                    binding: 1,
1807                    resource: wgpu::BindingResource::Sampler(&sampler),
1808                },
1809            ],
1810        });
1811
1812        GridDdaResources {
1813            storage_size: (width, height),
1814            storage_view,
1815            uniform_buf,
1816            bgl_dda,
1817            pipeline_dda,
1818            blit_bg,
1819            pipeline_blit,
1820            _sampler: sampler,
1821        }
1822    }
1823
1824    /// GPU.5 render — multi-grid scene marcher. `cameras[i]` is the
1825    /// world camera transformed into grid `i`'s local frame
1826    /// (caller-supplied; see scene-demo's `redraw_gpu` for the
1827    /// glam-based transform). `fov_y_rad` is the shared vertical
1828    /// FOV; `max_outer_steps` caps per-ray chunk-DDA work for each
1829    /// grid.
1830    ///
1831    /// # Panics
1832    /// If `cameras.len() != scene.grid_count`.
1833    /// `cameras[i]` is grid `i`'s world camera transformed into that
1834    /// grid's local frame (the grid marcher works in grid-local space).
1835    /// `sprite_camera` is the **world** camera: instanced sprites carry
1836    /// world-space positions/transforms, so they must project through
1837    /// the untransformed world camera — not `cameras[0]`, which is only
1838    /// the world camera when grid 0 is at identity.
1839    pub fn render_scene(
1840        &mut self,
1841        scene: &GpuSceneResident,
1842        cameras: &[Camera],
1843        sprite_camera: &Camera,
1844        fov_y_rad: f32,
1845        max_outer_steps: u32,
1846    ) {
1847        assert_eq!(
1848            cameras.len(),
1849            scene.grid_count as usize,
1850            "render_scene: {} cameras supplied, scene has {} grids",
1851            cameras.len(),
1852            scene.grid_count,
1853        );
1854        self.last_fov_y_rad = fov_y_rad; // cached for pixel_ray (picking)
1855
1856        // Deferred present: drop any frame a prior render left
1857        // un-presented (a host that skipped present/paint_egui) so we
1858        // never hold two outstanding swapchain textures.
1859        self.pending_frame = None;
1860        let Some(surf_tex) = self.acquire_frame() else {
1861            return;
1862        };
1863        let surf_view = surf_tex
1864            .texture
1865            .create_view(&wgpu::TextureViewDescriptor::default());
1866
1867        let surface_w = self.surface_config.width;
1868        let surface_h = self.surface_config.height;
1869        let surface_format = self.surface_config.format;
1870
1871        let needs_build = match &self.scene_dda {
1872            Some(r) => r.storage_size != (surface_w, surface_h),
1873            None => true,
1874        };
1875        if needs_build {
1876            self.scene_dda = Some(self.build_scene_dda(surface_w, surface_h, surface_format));
1877        }
1878        // GPU.9 — materialise the sprite pipeline the first frame
1879        // sprites are present (before the immutable `dda` borrow).
1880        // GPU.10.0 — build the model-DDA pipeline the first frame a
1881        // sprite registry is present.
1882        if self.sprite_registry.is_some() && self.sprite_model_dda.is_none() {
1883            self.sprite_model_dda = Some(self.build_sprite_model_dda());
1884        }
1885        // GPU.10.3 — frustum-cull + screen-tile-bin the sprite instances
1886        // (needs &mut self for buffer growth, so before the immutable
1887        // scene_dda borrow). Captures (visible_count, tiles_x); None when
1888        // nothing is in view.
1889        let sprite_pass: Option<(u32, u32)> = if let Some(reg) = self.sprite_registry.as_mut() {
1890            if reg.instance_capacity > 0 {
1891                // World camera — sprite positions/transforms are world-
1892                // space (independent of any grid's transform).
1893                let cam = sprite_camera;
1894                #[allow(clippy::cast_precision_loss)]
1895                let aspect = surface_w as f32 / surface_h as f32;
1896                let half_h = (fov_y_rad * 0.5).tan();
1897                let frustum = sprite_model::ViewFrustum {
1898                    pos: cam.position,
1899                    right: cam.right,
1900                    down: cam.down,
1901                    forward: cam.forward,
1902                    half_w: half_h * aspect,
1903                    half_h,
1904                    far: 1.0e9,
1905                };
1906                let (visible, tiles_x, _tiles_y) = reg.cull_bin_upload(
1907                    &self.device,
1908                    &self.queue,
1909                    &frustum,
1910                    surface_w,
1911                    surface_h,
1912                    SPRITE_TILE_SIZE,
1913                    self.sprite_lod_px,
1914                );
1915                (visible > 0).then_some((visible, tiles_x))
1916            } else {
1917                None
1918            }
1919        } else {
1920            None
1921        };
1922        let dda = self.scene_dda.as_ref().expect("just built");
1923
1924        // Pack per-grid cameras into a runtime-sized storage buffer
1925        // (binding 15) — no fixed cap on grid count.
1926        let cam_vec: Vec<SceneDdaPerGridCamera> = cameras
1927            .iter()
1928            .map(SceneDdaPerGridCamera::from_camera)
1929            .collect();
1930        let grid_cameras = upload_grid_cameras(&self.device, &cam_vec);
1931        let uniform = SceneDdaUniform {
1932            fov_y_rad,
1933            grid_count: scene.grid_count,
1934            max_outer_steps,
1935            _pad0: 0,
1936            screen_size: [surface_w, surface_h],
1937            _pad1: [0; 2],
1938            fog_color: [
1939                self.fog_color[0],
1940                self.fog_color[1],
1941                self.fog_color[2],
1942                self.fog_near,
1943            ],
1944            fog_far: self.fog_far,
1945            // L3.1: always write scene depth. Costs one storage store per
1946            // pixel, and the depth is needed for sprite z-test, sprite-less
1947            // `pick_depth`, and `draw_lines` occlusion alike.
1948            write_depth: 1,
1949            occ_page_words: scene.occupancy_page_words,
1950            occ_num_pages: scene.occupancy_num_pages,
1951            mip_scan_dist: self.scene_mip_scan_dist,
1952            _pad2: 0,
1953            _pad3: 0,
1954            _pad4: 0,
1955            // Sky direction comes from the world (sprite) camera, so a
1956            // grid-less sprite-only scene still paints a real sky.
1957            sky_cam: SceneDdaPerGridCamera::from_camera(sprite_camera),
1958            side_shades0: self.scene_side_shades[0],
1959            side_shades1: self.scene_side_shades[1],
1960        };
1961        self.queue
1962            .write_buffer(&dda.uniform_buf, 0, bytemuck::bytes_of(&uniform));
1963
1964        let dda_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1965            label: Some("roxlap-gpu scene_dda.bg"),
1966            layout: &dda.bgl_dda,
1967            entries: &[
1968                wgpu::BindGroupEntry {
1969                    binding: 0,
1970                    resource: dda.uniform_buf.as_entire_binding(),
1971                },
1972                // Occupancy page 0 at binding 1; pages 1..MAX_OCC_PAGES
1973                // at bindings 12.. (see GPU.X occupancy paging).
1974                wgpu::BindGroupEntry {
1975                    binding: 1,
1976                    resource: scene.occupancy_pages[0].as_entire_binding(),
1977                },
1978                wgpu::BindGroupEntry {
1979                    binding: 2,
1980                    resource: scene.all_color_offsets.as_entire_binding(),
1981                },
1982                wgpu::BindGroupEntry {
1983                    binding: 3,
1984                    resource: scene.all_colors.as_entire_binding(),
1985                },
1986                wgpu::BindGroupEntry {
1987                    binding: 4,
1988                    resource: scene.all_chunk_colors_base.as_entire_binding(),
1989                },
1990                wgpu::BindGroupEntry {
1991                    binding: 5,
1992                    resource: scene.all_chunk_occupancy.as_entire_binding(),
1993                },
1994                wgpu::BindGroupEntry {
1995                    binding: 6,
1996                    resource: scene.grid_static_meta.as_entire_binding(),
1997                },
1998                wgpu::BindGroupEntry {
1999                    binding: 7,
2000                    resource: scene.all_slot_chunk_idx.as_entire_binding(),
2001                },
2002                wgpu::BindGroupEntry {
2003                    binding: 8,
2004                    resource: dda.framebuffer.as_entire_binding(),
2005                },
2006                wgpu::BindGroupEntry {
2007                    binding: 9,
2008                    resource: wgpu::BindingResource::TextureView(&self.sky_view),
2009                },
2010                wgpu::BindGroupEntry {
2011                    binding: 10,
2012                    resource: wgpu::BindingResource::Sampler(&self.sky_sampler),
2013                },
2014                wgpu::BindGroupEntry {
2015                    binding: 11,
2016                    resource: dda.depth_buffer.as_entire_binding(),
2017                },
2018                wgpu::BindGroupEntry {
2019                    binding: 12,
2020                    resource: scene.occupancy_pages[1].as_entire_binding(),
2021                },
2022                wgpu::BindGroupEntry {
2023                    binding: 13,
2024                    resource: scene.occupancy_pages[2].as_entire_binding(),
2025                },
2026                wgpu::BindGroupEntry {
2027                    binding: 14,
2028                    resource: scene.occupancy_pages[3].as_entire_binding(),
2029                },
2030                wgpu::BindGroupEntry {
2031                    binding: 15,
2032                    resource: grid_cameras.as_entire_binding(),
2033                },
2034            ],
2035        });
2036
2037        // GPU.9 — when sprites are present, build both splatter bind
2038        // groups up front (the splat pass writes the key buffer; the
2039        // resolve pass reads keys + scene depth and writes colour).
2040        // GPU.10.3 — model-DDA bind group + per-frame uniform, using the
2041        // cull/bin results captured above. Per-model + per-instance data
2042        // + the tile lists live in the registry buffers.
2043        let sprite_model_bg = match (&self.sprite_model_dda, &self.sprite_registry, sprite_pass) {
2044            (Some(smd), Some(reg), Some((visible, tiles_x))) => {
2045                // World camera (see the cull pass above) — sprites
2046                // project through it regardless of grid 0's transform.
2047                let cam = sprite_camera;
2048                let uni = SpriteModelUniform {
2049                    cam_pos: cam.position,
2050                    _p0: 0.0,
2051                    cam_right: cam.right,
2052                    _p1: 0.0,
2053                    cam_down: cam.down,
2054                    _p2: 0.0,
2055                    cam_forward: cam.forward,
2056                    _p3: 0.0,
2057                    fog_color: [
2058                        self.fog_color[0],
2059                        self.fog_color[1],
2060                        self.fog_color[2],
2061                        self.fog_near,
2062                    ],
2063                    screen_size: [surface_w, surface_h],
2064                    instance_count: visible,
2065                    fog_far: self.fog_far,
2066                    fov_y_rad,
2067                    tiles_x,
2068                    tile_size: SPRITE_TILE_SIZE,
2069                    _p6: 0.0,
2070                };
2071                self.queue
2072                    .write_buffer(&smd.uniform_buf, 0, bytemuck::bytes_of(&uni));
2073                Some(self.device.create_bind_group(&wgpu::BindGroupDescriptor {
2074                    label: Some("roxlap-gpu sprite_model_dda.bg"),
2075                    layout: &smd.bgl,
2076                    entries: &[
2077                        wgpu::BindGroupEntry {
2078                            binding: 0,
2079                            resource: smd.uniform_buf.as_entire_binding(),
2080                        },
2081                        wgpu::BindGroupEntry {
2082                            binding: 1,
2083                            resource: reg.occupancy.as_entire_binding(),
2084                        },
2085                        wgpu::BindGroupEntry {
2086                            binding: 2,
2087                            resource: reg.colors.as_entire_binding(),
2088                        },
2089                        wgpu::BindGroupEntry {
2090                            binding: 3,
2091                            resource: reg.color_offsets.as_entire_binding(),
2092                        },
2093                        wgpu::BindGroupEntry {
2094                            binding: 4,
2095                            resource: reg.model_meta.as_entire_binding(),
2096                        },
2097                        wgpu::BindGroupEntry {
2098                            binding: 5,
2099                            resource: reg.instances.as_entire_binding(),
2100                        },
2101                        wgpu::BindGroupEntry {
2102                            binding: 6,
2103                            resource: dda.depth_buffer.as_entire_binding(),
2104                        },
2105                        wgpu::BindGroupEntry {
2106                            binding: 7,
2107                            resource: dda.framebuffer.as_entire_binding(),
2108                        },
2109                        wgpu::BindGroupEntry {
2110                            binding: 8,
2111                            resource: reg.tile_ranges.as_entire_binding(),
2112                        },
2113                        wgpu::BindGroupEntry {
2114                            binding: 9,
2115                            resource: reg.tile_instances.as_entire_binding(),
2116                        },
2117                        wgpu::BindGroupEntry {
2118                            binding: 10,
2119                            resource: reg.dirs.as_entire_binding(),
2120                        },
2121                        wgpu::BindGroupEntry {
2122                            binding: 11,
2123                            resource: reg.colmul.as_entire_binding(),
2124                        },
2125                    ],
2126                }))
2127            }
2128            _ => None,
2129        };
2130
2131        let mut encoder = self
2132            .device
2133            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
2134                label: Some("roxlap-gpu scene encoder"),
2135            });
2136        {
2137            let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
2138                label: Some("roxlap-gpu scene_dda compute"),
2139                timestamp_writes: None,
2140            });
2141            cpass.set_pipeline(&dda.pipeline_dda);
2142            cpass.set_bind_group(0, &dda_bg, &[]);
2143            cpass.dispatch_workgroups(surface_w.div_ceil(8), surface_h.div_ceil(8), 1);
2144        }
2145        // GPU.10 — sprite model-DDA pass: one thread per pixel marches
2146        // the tile's instances + composites against scene depth, after
2147        // the scene pass wrote the depth buffer and before the blit.
2148        if let (Some(smd), Some(bg)) = (&self.sprite_model_dda, &sprite_model_bg) {
2149            let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
2150                label: Some("roxlap-gpu sprite_model_dda"),
2151                timestamp_writes: None,
2152            });
2153            cpass.set_pipeline(&smd.pipeline);
2154            cpass.set_bind_group(0, bg, &[]);
2155            cpass.dispatch_workgroups(surface_w.div_ceil(8), surface_h.div_ceil(8), 1);
2156        }
2157        {
2158            let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
2159                label: Some("roxlap-gpu scene_dda blit"),
2160                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
2161                    view: &surf_view,
2162                    depth_slice: None,
2163                    resolve_target: None,
2164                    ops: wgpu::Operations {
2165                        load: wgpu::LoadOp::Clear(wgpu::Color::BLACK),
2166                        store: wgpu::StoreOp::Store,
2167                    },
2168                })],
2169                depth_stencil_attachment: None,
2170                timestamp_writes: None,
2171                occlusion_query_set: None,
2172                multiview_mask: None,
2173            });
2174            rpass.set_pipeline(&dda.pipeline_blit);
2175            rpass.set_bind_group(0, &dda.blit_bg, &[]);
2176            rpass.draw(0..3, 0..1);
2177        }
2178        self.queue.submit(std::iter::once(encoder.finish()));
2179        // Deferred present — the host calls `present` or `paint_egui`.
2180        self.pending_frame = Some((surf_tex, surf_view));
2181        self.frame_count = self.frame_count.wrapping_add(1);
2182    }
2183
2184    /// Like [`Self::render`] (clear to colour) but **deferred**: stashes
2185    /// the frame for [`Self::present`] / [`Self::paint_egui`] instead of
2186    /// presenting. The facade uses this before any grid is resident so a
2187    /// HUD can still be painted over an empty scene.
2188    pub fn render_clear_deferred(&mut self) {
2189        self.pending_frame = None;
2190        let Some(surf_tex) = self.acquire_frame() else {
2191            return;
2192        };
2193        let view = surf_tex
2194            .texture
2195            .create_view(&wgpu::TextureViewDescriptor::default());
2196        let [r, g, b] = self.clear_colour;
2197        let mut encoder = self
2198            .device
2199            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
2200                label: Some("roxlap-gpu clear (deferred)"),
2201            });
2202        {
2203            let _rp = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
2204                label: Some("roxlap-gpu clear (deferred)"),
2205                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
2206                    view: &view,
2207                    depth_slice: None,
2208                    resolve_target: None,
2209                    ops: wgpu::Operations {
2210                        load: wgpu::LoadOp::Clear(wgpu::Color { r, g, b, a: 1.0 }),
2211                        store: wgpu::StoreOp::Store,
2212                    },
2213                })],
2214                depth_stencil_attachment: None,
2215                timestamp_writes: None,
2216                occlusion_query_set: None,
2217                multiview_mask: None,
2218            });
2219        }
2220        self.queue.submit(std::iter::once(encoder.finish()));
2221        self.pending_frame = Some((surf_tex, view));
2222    }
2223
2224    /// Present the frame stashed by the last deferred render
2225    /// ([`Self::render_scene`] / [`Self::render_clear_deferred`]). No-op
2226    /// if nothing is pending (e.g. the surface was lost mid-render).
2227    pub fn present(&mut self) {
2228        if let Some((surf_tex, _view)) = self.pending_frame.take() {
2229            surf_tex.present();
2230        }
2231    }
2232
2233    /// Draw depth-tested world-space [`GpuLine`]s over the pending frame
2234    /// (L3.2). Projects each endpoint with `cam` (the marcher's pinhole) +
2235    /// the last frame's FOV / surface size, expands to screen-space quads,
2236    /// and runs a `LoadOp::Load` pass into the pending swapchain view — so
2237    /// the lines land on the marched frame and a later `present` /
2238    /// `paint_egui` still finishes it (the pending frame is left intact).
2239    /// Depth-tested lines are occluded by nearer marched geometry (compared
2240    /// against the scene-DDA depth buffer's `best_t`); call after `render`,
2241    /// before `present` / `paint_egui`. No-op if no frame is pending.
2242    pub fn draw_lines_deferred(&mut self, cam: &GpuLineCamera, lines: &[GpuLine]) {
2243        if self.pending_frame.is_none() || lines.is_empty() {
2244            return;
2245        }
2246        let (w, h) = (self.surface_config.width, self.surface_config.height);
2247        let fov = self.last_fov_y_rad;
2248        if w == 0 || h == 0 || fov <= 0.0 {
2249            return; // no frame marched yet — no projection to reuse
2250        }
2251        let verts = build_line_vertices(cam, lines, w, h, fov);
2252        if verts.is_empty() {
2253            return;
2254        }
2255        self.ensure_line_resources();
2256        let res = self.line_resources.as_ref().expect("just built");
2257
2258        // Skip the depth test when there's no scene depth buffer to read
2259        // (sprite-only / empty scene) — bind the 1-word dummy so the layout
2260        // is satisfied; `no_depth = 1` keeps the shader from indexing it.
2261        let no_depth = u32::from(self.scene_dda.is_none());
2262        let params = LineParams {
2263            screen_w: w,
2264            screen_h: h,
2265            depth_bias: LINE_DEPTH_BIAS,
2266            no_depth,
2267        };
2268        self.queue
2269            .write_buffer(&res.uniform_buf, 0, bytemuck::bytes_of(&params));
2270
2271        let depth_resource = match &self.scene_dda {
2272            Some(dda) => dda.depth_buffer.as_entire_binding(),
2273            None => res.dummy_depth.as_entire_binding(),
2274        };
2275        let bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
2276            label: Some("roxlap-gpu line.bg"),
2277            layout: &res.bgl,
2278            entries: &[
2279                wgpu::BindGroupEntry {
2280                    binding: 0,
2281                    resource: res.uniform_buf.as_entire_binding(),
2282                },
2283                wgpu::BindGroupEntry {
2284                    binding: 1,
2285                    resource: depth_resource,
2286                },
2287            ],
2288        });
2289
2290        // Grow-only persistent vertex buffer (L3.3): one `write_buffer`
2291        // per overlay, reused across frames. Power-of-two capacity keeps
2292        // re-allocation rare as the segment count drifts.
2293        let needed = std::mem::size_of_val(verts.as_slice()) as u64;
2294        if self.line_vbuf_cap < needed {
2295            let cap = needed.next_power_of_two().max(4096);
2296            self.line_vbuf = Some(self.device.create_buffer(&wgpu::BufferDescriptor {
2297                label: Some("roxlap-gpu line.vbuf"),
2298                size: cap,
2299                usage: wgpu::BufferUsages::VERTEX | wgpu::BufferUsages::COPY_DST,
2300                mapped_at_creation: false,
2301            }));
2302            self.line_vbuf_cap = cap;
2303        }
2304        let vbuf = self.line_vbuf.as_ref().expect("ensured above");
2305        self.queue
2306            .write_buffer(vbuf, 0, bytemuck::cast_slice(&verts));
2307
2308        let view = &self.pending_frame.as_ref().expect("checked above").1;
2309        let mut encoder = self
2310            .device
2311            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
2312                label: Some("roxlap-gpu lines"),
2313            });
2314        {
2315            // `LoadOp::Load` keeps the marcher's frame; the lines draw over
2316            // it. Manual depth test in the FS (no depth-stencil attachment).
2317            let mut pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
2318                label: Some("roxlap-gpu line paint"),
2319                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
2320                    view,
2321                    depth_slice: None,
2322                    resolve_target: None,
2323                    ops: wgpu::Operations {
2324                        load: wgpu::LoadOp::Load,
2325                        store: wgpu::StoreOp::Store,
2326                    },
2327                })],
2328                depth_stencil_attachment: None,
2329                timestamp_writes: None,
2330                occlusion_query_set: None,
2331                multiview_mask: None,
2332            });
2333            pass.set_pipeline(&res.pipeline);
2334            pass.set_bind_group(0, &bg, &[]);
2335            pass.set_vertex_buffer(0, vbuf.slice(..));
2336            pass.draw(0..verts.len() as u32, 0..1);
2337        }
2338        self.queue.submit(std::iter::once(encoder.finish()));
2339        // pending_frame left intact — present/paint_egui finishes the frame.
2340    }
2341
2342    /// Lazy-build the [`LineResources`] (`line.wgsl` pipeline + uniform +
2343    /// dummy depth buffer). The colour target uses the surface format with
2344    /// straight-alpha over-blending; no depth-stencil attachment (the depth
2345    /// test is manual in the fragment shader against the scene depth buffer).
2346    fn ensure_line_resources(&mut self) {
2347        if self.line_resources.is_some() {
2348            return;
2349        }
2350        let shader = self
2351            .device
2352            .create_shader_module(wgpu::ShaderModuleDescriptor {
2353                label: Some("line.wgsl"),
2354                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/line.wgsl").into()),
2355            });
2356        let bgl = self
2357            .device
2358            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
2359                label: Some("roxlap-gpu line.bgl"),
2360                entries: &[
2361                    wgpu::BindGroupLayoutEntry {
2362                        binding: 0,
2363                        visibility: wgpu::ShaderStages::FRAGMENT,
2364                        ty: wgpu::BindingType::Buffer {
2365                            ty: wgpu::BufferBindingType::Uniform,
2366                            has_dynamic_offset: false,
2367                            min_binding_size: None,
2368                        },
2369                        count: None,
2370                    },
2371                    wgpu::BindGroupLayoutEntry {
2372                        binding: 1,
2373                        visibility: wgpu::ShaderStages::FRAGMENT,
2374                        ty: wgpu::BindingType::Buffer {
2375                            ty: wgpu::BufferBindingType::Storage { read_only: true },
2376                            has_dynamic_offset: false,
2377                            min_binding_size: None,
2378                        },
2379                        count: None,
2380                    },
2381                ],
2382            });
2383        let layout = self
2384            .device
2385            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
2386                label: Some("roxlap-gpu line.layout"),
2387                bind_group_layouts: &[Some(&bgl)],
2388                immediate_size: 0,
2389            });
2390        let pipeline = self
2391            .device
2392            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
2393                label: Some("roxlap-gpu line.pipeline"),
2394                layout: Some(&layout),
2395                vertex: wgpu::VertexState {
2396                    module: &shader,
2397                    entry_point: Some("vs_main"),
2398                    compilation_options: wgpu::PipelineCompilationOptions::default(),
2399                    buffers: &[wgpu::VertexBufferLayout {
2400                        array_stride: std::mem::size_of::<LineVertex>() as u64,
2401                        step_mode: wgpu::VertexStepMode::Vertex,
2402                        attributes: &wgpu::vertex_attr_array![
2403                            0 => Float32x2, // pos (NDC)
2404                            1 => Float32,   // depth
2405                            2 => Float32,   // depth_test
2406                            3 => Float32x4, // color
2407                        ],
2408                    }],
2409                },
2410                fragment: Some(wgpu::FragmentState {
2411                    module: &shader,
2412                    entry_point: Some("fs_main"),
2413                    compilation_options: wgpu::PipelineCompilationOptions::default(),
2414                    targets: &[Some(wgpu::ColorTargetState {
2415                        format: self.surface_config.format,
2416                        blend: Some(wgpu::BlendState::ALPHA_BLENDING),
2417                        write_mask: wgpu::ColorWrites::ALL,
2418                    })],
2419                }),
2420                primitive: wgpu::PrimitiveState {
2421                    cull_mode: None,
2422                    ..Default::default()
2423                },
2424                depth_stencil: None,
2425                multisample: wgpu::MultisampleState::default(),
2426                multiview_mask: None,
2427                cache: None,
2428            });
2429        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
2430            label: Some("roxlap-gpu line.uniform"),
2431            size: std::mem::size_of::<LineParams>() as u64,
2432            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
2433            mapped_at_creation: false,
2434        });
2435        let dummy_depth = self.device.create_buffer(&wgpu::BufferDescriptor {
2436            label: Some("roxlap-gpu line.dummy_depth"),
2437            size: 4,
2438            usage: wgpu::BufferUsages::STORAGE,
2439            mapped_at_creation: false,
2440        });
2441        self.line_resources = Some(LineResources {
2442            pipeline,
2443            bgl,
2444            uniform_buf,
2445            dummy_depth,
2446        });
2447    }
2448
2449    /// Upload (or replace) an RGBA8 image as a sampled texture, returning
2450    /// a stable id for [`GpuImageQuad::image`]. `rgba` is row-major,
2451    /// `width * height * 4` bytes, straight (un-premultiplied) alpha.
2452    /// Reuses a dropped slot when one exists. Returns `0` for malformed
2453    /// input (an id that draws nothing).
2454    pub fn upload_image(&mut self, rgba: &[u8], width: u32, height: u32) -> usize {
2455        if width == 0 || height == 0 || rgba.len() != (width as usize) * (height as usize) * 4 {
2456            return 0;
2457        }
2458        let texture = self.device.create_texture(&wgpu::TextureDescriptor {
2459            label: Some("roxlap-gpu image_sprite"),
2460            size: wgpu::Extent3d {
2461                width,
2462                height,
2463                depth_or_array_layers: 1,
2464            },
2465            mip_level_count: 1,
2466            sample_count: 1,
2467            dimension: wgpu::TextureDimension::D2,
2468            format: wgpu::TextureFormat::Rgba8Unorm,
2469            usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST,
2470            view_formats: &[],
2471        });
2472        self.queue.write_texture(
2473            wgpu::TexelCopyTextureInfo {
2474                texture: &texture,
2475                mip_level: 0,
2476                origin: wgpu::Origin3d::ZERO,
2477                aspect: wgpu::TextureAspect::All,
2478            },
2479            rgba,
2480            wgpu::TexelCopyBufferLayout {
2481                offset: 0,
2482                bytes_per_row: Some(width * 4),
2483                rows_per_image: Some(height),
2484            },
2485            wgpu::Extent3d {
2486                width,
2487                height,
2488                depth_or_array_layers: 1,
2489            },
2490        );
2491        let view = texture.create_view(&wgpu::TextureViewDescriptor::default());
2492        let resident = ImageResident {
2493            view,
2494            _texture: texture,
2495        };
2496        if let Some(slot) = self.images.iter().position(Option::is_none) {
2497            self.images[slot] = Some(resident);
2498            slot
2499        } else {
2500            self.images.push(Some(resident));
2501            self.images.len() - 1
2502        }
2503    }
2504
2505    /// Release an image uploaded with [`Self::upload_image`] (the slot
2506    /// becomes reusable).
2507    pub fn drop_image(&mut self, id: usize) {
2508        if let Some(slot) = self.images.get_mut(id) {
2509            *slot = None;
2510        }
2511    }
2512
2513    /// Draw world-space 2D image sprites ([`GpuImageQuad`]) over the
2514    /// pending frame — the textured-quad sibling of
2515    /// [`Self::draw_lines_deferred`]. Projects each quad with `cam` (the
2516    /// marcher's pinhole) + the last frame's FOV / surface size, expands +
2517    /// near-clips to triangles, and runs one `LoadOp::Load` pass with a
2518    /// draw per quad (each binds its own texture). UVs are perspective-correct;
2519    /// depth-tested quads are occluded by nearer marched geometry. Call
2520    /// after `render`, before `present` / `paint_egui`. No-op if no frame
2521    /// is pending.
2522    pub fn draw_images_deferred(&mut self, cam: &GpuLineCamera, quads: &[GpuImageQuad]) {
2523        if self.pending_frame.is_none() || quads.is_empty() {
2524            return;
2525        }
2526        let (w, h) = (self.surface_config.width, self.surface_config.height);
2527        let fov = self.last_fov_y_rad;
2528        if w == 0 || h == 0 || fov <= 0.0 {
2529            return;
2530        }
2531
2532        // Concatenate every quad's verts into one buffer, recording each
2533        // quad's (range, texture) so they share a single render pass.
2534        let mut verts: Vec<ImageVertex> = Vec::new();
2535        let mut draws: Vec<(u32, u32, usize)> = Vec::new();
2536        for quad in quads {
2537            if !matches!(self.images.get(quad.image), Some(Some(_))) {
2538                continue; // dropped / never-uploaded id
2539            }
2540            let v = build_image_vertices(cam, quad, w, h, fov);
2541            if v.is_empty() {
2542                continue;
2543            }
2544            let start = verts.len() as u32;
2545            verts.extend_from_slice(&v);
2546            draws.push((start, verts.len() as u32, quad.image));
2547        }
2548        if draws.is_empty() {
2549            return;
2550        }
2551
2552        self.ensure_image_resources();
2553        let no_depth = u32::from(self.scene_dda.is_none());
2554        let params = LineParams {
2555            screen_w: w,
2556            screen_h: h,
2557            depth_bias: LINE_DEPTH_BIAS,
2558            no_depth,
2559        };
2560        {
2561            let res = self.image_resources.as_ref().expect("just built");
2562            self.queue
2563                .write_buffer(&res.uniform_buf, 0, bytemuck::bytes_of(&params));
2564        }
2565
2566        // Grow-only persistent vertex buffer (mirrors the line vbuf).
2567        let needed = std::mem::size_of_val(verts.as_slice()) as u64;
2568        if self.image_vbuf_cap < needed {
2569            let cap = needed.next_power_of_two().max(4096);
2570            self.image_vbuf = Some(self.device.create_buffer(&wgpu::BufferDescriptor {
2571                label: Some("roxlap-gpu image.vbuf"),
2572                size: cap,
2573                usage: wgpu::BufferUsages::VERTEX | wgpu::BufferUsages::COPY_DST,
2574                mapped_at_creation: false,
2575            }));
2576            self.image_vbuf_cap = cap;
2577        }
2578        let vbuf = self.image_vbuf.as_ref().expect("ensured above");
2579        self.queue
2580            .write_buffer(vbuf, 0, bytemuck::cast_slice(&verts));
2581
2582        // One bind group per draw (the texture view differs per quad).
2583        let res = self.image_resources.as_ref().expect("just built");
2584        let depth_resource = match &self.scene_dda {
2585            Some(dda) => dda.depth_buffer.as_entire_binding(),
2586            None => res.dummy_depth.as_entire_binding(),
2587        };
2588        let bind_groups: Vec<wgpu::BindGroup> = draws
2589            .iter()
2590            .map(|&(_, _, image_id)| {
2591                let resident = self.images[image_id].as_ref().expect("checked present");
2592                self.device.create_bind_group(&wgpu::BindGroupDescriptor {
2593                    label: Some("roxlap-gpu image.bg"),
2594                    layout: &res.bgl,
2595                    entries: &[
2596                        wgpu::BindGroupEntry {
2597                            binding: 0,
2598                            resource: res.uniform_buf.as_entire_binding(),
2599                        },
2600                        wgpu::BindGroupEntry {
2601                            binding: 1,
2602                            resource: depth_resource.clone(),
2603                        },
2604                        wgpu::BindGroupEntry {
2605                            binding: 2,
2606                            resource: wgpu::BindingResource::TextureView(&resident.view),
2607                        },
2608                        wgpu::BindGroupEntry {
2609                            binding: 3,
2610                            resource: wgpu::BindingResource::Sampler(&res.sampler),
2611                        },
2612                    ],
2613                })
2614            })
2615            .collect();
2616
2617        let view = &self.pending_frame.as_ref().expect("checked above").1;
2618        let mut encoder = self
2619            .device
2620            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
2621                label: Some("roxlap-gpu images"),
2622            });
2623        {
2624            let mut pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
2625                label: Some("roxlap-gpu image paint"),
2626                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
2627                    view,
2628                    depth_slice: None,
2629                    resolve_target: None,
2630                    ops: wgpu::Operations {
2631                        load: wgpu::LoadOp::Load,
2632                        store: wgpu::StoreOp::Store,
2633                    },
2634                })],
2635                depth_stencil_attachment: None,
2636                timestamp_writes: None,
2637                occlusion_query_set: None,
2638                multiview_mask: None,
2639            });
2640            pass.set_pipeline(&res.pipeline);
2641            pass.set_vertex_buffer(0, vbuf.slice(..));
2642            for (&(start, end, _), bg) in draws.iter().zip(&bind_groups) {
2643                pass.set_bind_group(0, bg, &[]);
2644                pass.draw(start..end, 0..1);
2645            }
2646        }
2647        self.queue.submit(std::iter::once(encoder.finish()));
2648        // pending_frame left intact — present/paint_egui finishes it.
2649    }
2650
2651    /// Lazy-build the [`ImageResources`] (`image.wgsl` pipeline + uniform +
2652    /// nearest sampler + dummy depth). Straight-alpha over-blend, no
2653    /// depth-stencil attachment (the depth test is manual in the FS).
2654    fn ensure_image_resources(&mut self) {
2655        if self.image_resources.is_some() {
2656            return;
2657        }
2658        let shader = self
2659            .device
2660            .create_shader_module(wgpu::ShaderModuleDescriptor {
2661                label: Some("image.wgsl"),
2662                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/image.wgsl").into()),
2663            });
2664        let bgl = self
2665            .device
2666            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
2667                label: Some("roxlap-gpu image.bgl"),
2668                entries: &[
2669                    wgpu::BindGroupLayoutEntry {
2670                        binding: 0,
2671                        visibility: wgpu::ShaderStages::FRAGMENT,
2672                        ty: wgpu::BindingType::Buffer {
2673                            ty: wgpu::BufferBindingType::Uniform,
2674                            has_dynamic_offset: false,
2675                            min_binding_size: None,
2676                        },
2677                        count: None,
2678                    },
2679                    wgpu::BindGroupLayoutEntry {
2680                        binding: 1,
2681                        visibility: wgpu::ShaderStages::FRAGMENT,
2682                        ty: wgpu::BindingType::Buffer {
2683                            ty: wgpu::BufferBindingType::Storage { read_only: true },
2684                            has_dynamic_offset: false,
2685                            min_binding_size: None,
2686                        },
2687                        count: None,
2688                    },
2689                    wgpu::BindGroupLayoutEntry {
2690                        binding: 2,
2691                        visibility: wgpu::ShaderStages::FRAGMENT,
2692                        ty: wgpu::BindingType::Texture {
2693                            sample_type: wgpu::TextureSampleType::Float { filterable: true },
2694                            view_dimension: wgpu::TextureViewDimension::D2,
2695                            multisampled: false,
2696                        },
2697                        count: None,
2698                    },
2699                    wgpu::BindGroupLayoutEntry {
2700                        binding: 3,
2701                        visibility: wgpu::ShaderStages::FRAGMENT,
2702                        ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
2703                        count: None,
2704                    },
2705                ],
2706            });
2707        let layout = self
2708            .device
2709            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
2710                label: Some("roxlap-gpu image.layout"),
2711                bind_group_layouts: &[Some(&bgl)],
2712                immediate_size: 0,
2713            });
2714        let pipeline = self
2715            .device
2716            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
2717                label: Some("roxlap-gpu image.pipeline"),
2718                layout: Some(&layout),
2719                vertex: wgpu::VertexState {
2720                    module: &shader,
2721                    entry_point: Some("vs_main"),
2722                    compilation_options: wgpu::PipelineCompilationOptions::default(),
2723                    buffers: &[wgpu::VertexBufferLayout {
2724                        array_stride: std::mem::size_of::<ImageVertex>() as u64,
2725                        step_mode: wgpu::VertexStepMode::Vertex,
2726                        attributes: &wgpu::vertex_attr_array![
2727                            0 => Float32x2, // ndc
2728                            1 => Float32,   // w
2729                            2 => Float32,   // depth
2730                            3 => Float32,   // depth_test
2731                            4 => Float32x2, // uv
2732                            5 => Float32x4, // tint
2733                        ],
2734                    }],
2735                },
2736                fragment: Some(wgpu::FragmentState {
2737                    module: &shader,
2738                    entry_point: Some("fs_main"),
2739                    compilation_options: wgpu::PipelineCompilationOptions::default(),
2740                    targets: &[Some(wgpu::ColorTargetState {
2741                        format: self.surface_config.format,
2742                        blend: Some(wgpu::BlendState::ALPHA_BLENDING),
2743                        write_mask: wgpu::ColorWrites::ALL,
2744                    })],
2745                }),
2746                primitive: wgpu::PrimitiveState {
2747                    cull_mode: None,
2748                    ..Default::default()
2749                },
2750                depth_stencil: None,
2751                multisample: wgpu::MultisampleState::default(),
2752                multiview_mask: None,
2753                cache: None,
2754            });
2755        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
2756            label: Some("roxlap-gpu image.uniform"),
2757            size: std::mem::size_of::<LineParams>() as u64,
2758            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
2759            mapped_at_creation: false,
2760        });
2761        let dummy_depth = self.device.create_buffer(&wgpu::BufferDescriptor {
2762            label: Some("roxlap-gpu image.dummy_depth"),
2763            size: 4,
2764            usage: wgpu::BufferUsages::STORAGE,
2765            mapped_at_creation: false,
2766        });
2767        let sampler = self.device.create_sampler(&wgpu::SamplerDescriptor {
2768            label: Some("roxlap-gpu image.sampler"),
2769            // Nearest + clamp: pixel-art references want crisp texels and
2770            // no wrap bleed at the quad edges.
2771            address_mode_u: wgpu::AddressMode::ClampToEdge,
2772            address_mode_v: wgpu::AddressMode::ClampToEdge,
2773            address_mode_w: wgpu::AddressMode::ClampToEdge,
2774            mag_filter: wgpu::FilterMode::Nearest,
2775            min_filter: wgpu::FilterMode::Nearest,
2776            mipmap_filter: wgpu::MipmapFilterMode::Nearest,
2777            ..Default::default()
2778        });
2779        self.image_resources = Some(ImageResources {
2780            pipeline,
2781            bgl,
2782            uniform_buf,
2783            dummy_depth,
2784            sampler,
2785        });
2786    }
2787
2788    /// Project a world point to window pixels under the marcher's
2789    /// vertical-FOV pinhole (the inverse of [`Self::pixel_ray`]), using
2790    /// the last-rendered frame's size + FOV. `None` before the first
2791    /// scene render or for a point at/behind the near plane.
2792    #[must_use]
2793    pub fn project_point(
2794        &self,
2795        cam_pos: [f32; 3],
2796        right: [f32; 3],
2797        down: [f32; 3],
2798        forward: [f32; 3],
2799        world: [f32; 3],
2800    ) -> Option<(f32, f32)> {
2801        let dda = self.scene_dda.as_ref()?;
2802        let (w, h) = dda.storage_size;
2803        if w == 0 || h == 0 || self.last_fov_y_rad <= 0.0 {
2804            return None;
2805        }
2806        let d = [
2807            world[0] - cam_pos[0],
2808            world[1] - cam_pos[1],
2809            world[2] - cam_pos[2],
2810        ];
2811        let cz = forward[0] * d[0] + forward[1] * d[1] + forward[2] * d[2];
2812        if cz < LINE_NEAR_Z {
2813            return None;
2814        }
2815        let cx = right[0] * d[0] + right[1] * d[1] + right[2] * d[2];
2816        let cy = down[0] * d[0] + down[1] * d[1] + down[2] * d[2];
2817        let half_h = (self.last_fov_y_rad * 0.5).tan();
2818        let half_w = half_h * (w as f32 / h as f32);
2819        let ndc_x = (cx / cz) / half_w;
2820        let ndc_y = -(cy / cz) / half_h;
2821        let sx = (ndc_x * 0.5 + 0.5) * w as f32;
2822        let sy = (0.5 - ndc_y * 0.5) * h as f32;
2823        Some((sx, sy))
2824    }
2825
2826    /// Overlay an `egui` UI on the pending frame, then present it
2827    /// (`hud` feature). `jobs` are the host's tessellated primitives
2828    /// (`egui::Context::tessellate`), `textures` the per-frame texture
2829    /// delta from `egui::FullOutput`, `pixels_per_point` the UI scale.
2830    ///
2831    /// Draws with `LoadOp::Load` over the marcher's frame (a separate
2832    /// encoder submitted after the scene's), so the UI composites on top
2833    /// of the world. No-op if no frame is pending.
2834    #[cfg(feature = "hud")]
2835    pub fn paint_egui(
2836        &mut self,
2837        jobs: &[egui::ClippedPrimitive],
2838        textures: &egui::TexturesDelta,
2839        pixels_per_point: f32,
2840    ) {
2841        let Some((surf_tex, surf_view)) = self.pending_frame.take() else {
2842            return;
2843        };
2844        let format = self.surface_config.format;
2845        let egui_rend = self.egui_renderer.get_or_insert_with(|| {
2846            egui_wgpu::Renderer::new(
2847                &self.device,
2848                format,
2849                egui_wgpu::RendererOptions {
2850                    msaa_samples: 1,
2851                    depth_stencil_format: None,
2852                    dithering: false,
2853                    ..Default::default()
2854                },
2855            )
2856        });
2857
2858        let screen = egui_wgpu::ScreenDescriptor {
2859            size_in_pixels: [self.surface_config.width, self.surface_config.height],
2860            pixels_per_point,
2861        };
2862        for (id, delta) in &textures.set {
2863            egui_rend.update_texture(&self.device, &self.queue, *id, delta);
2864        }
2865        let mut encoder = self
2866            .device
2867            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
2868                label: Some("roxlap-gpu egui"),
2869            });
2870        let user_bufs =
2871            egui_rend.update_buffers(&self.device, &self.queue, &mut encoder, jobs, &screen);
2872        {
2873            // `LoadOp::Load` keeps the marcher's frame; egui draws over it.
2874            let mut pass = encoder
2875                .begin_render_pass(&wgpu::RenderPassDescriptor {
2876                    label: Some("roxlap-gpu egui paint"),
2877                    color_attachments: &[Some(wgpu::RenderPassColorAttachment {
2878                        view: &surf_view,
2879                        depth_slice: None,
2880                        resolve_target: None,
2881                        ops: wgpu::Operations {
2882                            load: wgpu::LoadOp::Load,
2883                            store: wgpu::StoreOp::Store,
2884                        },
2885                    })],
2886                    depth_stencil_attachment: None,
2887                    timestamp_writes: None,
2888                    occlusion_query_set: None,
2889                    multiview_mask: None,
2890                })
2891                // egui-wgpu 0.29 requires a `'static` pass (see its docs).
2892                .forget_lifetime();
2893            egui_rend.render(&mut pass, jobs, &screen);
2894        }
2895        for id in &textures.free {
2896            egui_rend.free_texture(id);
2897        }
2898        self.queue.submit(
2899            user_bufs
2900                .into_iter()
2901                .chain(std::iter::once(encoder.finish())),
2902        );
2903        surf_tex.present();
2904    }
2905
2906    fn build_scene_dda(
2907        &self,
2908        width: u32,
2909        height: u32,
2910        surface_format: wgpu::TextureFormat,
2911    ) -> SceneDdaResources {
2912        // Framebuffer as a packed-`rgba8unorm` storage buffer (1 u32 per
2913        // pixel, row stride = `width`). See the struct-field note.
2914        let framebuffer = self.device.create_buffer(&wgpu::BufferDescriptor {
2915            label: Some("roxlap-gpu scene_dda.framebuffer"),
2916            size: u64::from(width) * u64::from(height) * 4,
2917            usage: wgpu::BufferUsages::STORAGE,
2918            mapped_at_creation: false,
2919        });
2920        // Screen size for the blit's pixel→index math (`vec2<u32>`).
2921        let blit_dims = self.device.create_buffer(&wgpu::BufferDescriptor {
2922            label: Some("roxlap-gpu scene_dda.blit_dims"),
2923            size: 8,
2924            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
2925            mapped_at_creation: false,
2926        });
2927        self.queue
2928            .write_buffer(&blit_dims, 0, bytemuck::bytes_of(&[width, height]));
2929
2930        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
2931            label: Some("roxlap-gpu scene_dda.uniform"),
2932            size: std::mem::size_of::<SceneDdaUniform>() as u64,
2933            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
2934            mapped_at_creation: false,
2935        });
2936
2937        // GPU.9 — per-pixel world-t depth (f32 bits as u32). Sized to
2938        // the storage texture; written by the scene pass when sprites
2939        // are active, read+tested by the sprite splatter.
2940        let depth_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
2941            label: Some("roxlap-gpu scene_dda.depth"),
2942            size: u64::from(width) * u64::from(height) * 4,
2943            // COPY_SRC so `read_depth_pixel` can stage it for picking.
2944            usage: wgpu::BufferUsages::STORAGE
2945                | wgpu::BufferUsages::COPY_DST
2946                | wgpu::BufferUsages::COPY_SRC,
2947            mapped_at_creation: false,
2948        });
2949        let depth_readback = self.device.create_buffer(&wgpu::BufferDescriptor {
2950            label: Some("roxlap-gpu scene_dda.depth_readback"),
2951            size: u64::from(width) * u64::from(height) * 4,
2952            usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
2953            mapped_at_creation: false,
2954        });
2955        let dda_shader = self
2956            .device
2957            .create_shader_module(wgpu::ShaderModuleDescriptor {
2958                label: Some("scene_dda.wgsl"),
2959                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/scene_dda.wgsl").into()),
2960            });
2961        let bgl_dda = self
2962            .device
2963            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
2964                label: Some("roxlap-gpu scene_dda.bgl"),
2965                entries: &[
2966                    bgl_uniform_entry(0),
2967                    bgl_storage_entry(1, true),
2968                    bgl_storage_entry(2, true),
2969                    bgl_storage_entry(3, true),
2970                    bgl_storage_entry(4, true),
2971                    bgl_storage_entry(5, true),
2972                    bgl_storage_entry(6, true),
2973                    bgl_storage_entry(7, true),
2974                    // Framebuffer storage buffer (read-write; the scene +
2975                    // sprite passes write packed pixels into it).
2976                    bgl_storage_entry(8, false),
2977                    // GPU.8 sky panorama + sampler.
2978                    wgpu::BindGroupLayoutEntry {
2979                        binding: 9,
2980                        visibility: wgpu::ShaderStages::COMPUTE,
2981                        ty: wgpu::BindingType::Texture {
2982                            sample_type: wgpu::TextureSampleType::Float { filterable: true },
2983                            view_dimension: wgpu::TextureViewDimension::D2,
2984                            multisampled: false,
2985                        },
2986                        count: None,
2987                    },
2988                    wgpu::BindGroupLayoutEntry {
2989                        binding: 10,
2990                        visibility: wgpu::ShaderStages::COMPUTE,
2991                        ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
2992                        count: None,
2993                    },
2994                    // GPU.9 — read-write per-pixel depth buffer.
2995                    bgl_storage_entry(11, false),
2996                    // Occupancy pages 1..MAX_OCC_PAGES (page 0 is
2997                    // binding 1). Unused pages bind a dummy buffer.
2998                    bgl_storage_entry(12, true),
2999                    bgl_storage_entry(13, true),
3000                    bgl_storage_entry(14, true),
3001                    // Per-grid cameras (runtime-sized; one per grid).
3002                    bgl_storage_entry(15, true),
3003                ],
3004            });
3005        let dda_pl = self
3006            .device
3007            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
3008                label: Some("roxlap-gpu scene_dda.layout"),
3009                bind_group_layouts: &[Some(&bgl_dda)],
3010                immediate_size: 0,
3011            });
3012        let pipeline_dda = self
3013            .device
3014            .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
3015                label: Some("roxlap-gpu scene_dda.pipeline"),
3016                layout: Some(&dda_pl),
3017                module: &dda_shader,
3018                entry_point: Some("render_scene"),
3019                compilation_options: wgpu::PipelineCompilationOptions::default(),
3020                cache: None,
3021            });
3022
3023        let blit_shader = self
3024            .device
3025            .create_shader_module(wgpu::ShaderModuleDescriptor {
3026                label: Some("scene_blit.wgsl"),
3027                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/scene_blit.wgsl").into()),
3028            });
3029        let bgl_blit = self
3030            .device
3031            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
3032                label: Some("roxlap-gpu scene_dda.blit_bgl"),
3033                entries: &[
3034                    // Framebuffer storage buffer (read-only in the blit).
3035                    wgpu::BindGroupLayoutEntry {
3036                        binding: 0,
3037                        visibility: wgpu::ShaderStages::FRAGMENT,
3038                        ty: wgpu::BindingType::Buffer {
3039                            ty: wgpu::BufferBindingType::Storage { read_only: true },
3040                            has_dynamic_offset: false,
3041                            min_binding_size: None,
3042                        },
3043                        count: None,
3044                    },
3045                    // Screen-size uniform for the pixel→index math.
3046                    wgpu::BindGroupLayoutEntry {
3047                        binding: 1,
3048                        visibility: wgpu::ShaderStages::FRAGMENT,
3049                        ty: wgpu::BindingType::Buffer {
3050                            ty: wgpu::BufferBindingType::Uniform,
3051                            has_dynamic_offset: false,
3052                            min_binding_size: None,
3053                        },
3054                        count: None,
3055                    },
3056                ],
3057            });
3058        let blit_pl = self
3059            .device
3060            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
3061                label: Some("roxlap-gpu scene_dda.blit_layout"),
3062                bind_group_layouts: &[Some(&bgl_blit)],
3063                immediate_size: 0,
3064            });
3065        let pipeline_blit = self
3066            .device
3067            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
3068                label: Some("roxlap-gpu scene_dda.blit_pipeline"),
3069                layout: Some(&blit_pl),
3070                vertex: wgpu::VertexState {
3071                    module: &blit_shader,
3072                    entry_point: Some("vs_main"),
3073                    compilation_options: wgpu::PipelineCompilationOptions::default(),
3074                    buffers: &[],
3075                },
3076                fragment: Some(wgpu::FragmentState {
3077                    module: &blit_shader,
3078                    entry_point: Some("fs_main"),
3079                    compilation_options: wgpu::PipelineCompilationOptions::default(),
3080                    targets: &[Some(wgpu::ColorTargetState {
3081                        format: surface_format,
3082                        blend: None,
3083                        write_mask: wgpu::ColorWrites::ALL,
3084                    })],
3085                }),
3086                primitive: wgpu::PrimitiveState::default(),
3087                depth_stencil: None,
3088                multisample: wgpu::MultisampleState::default(),
3089                multiview_mask: None,
3090                cache: None,
3091            });
3092        let blit_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
3093            label: Some("roxlap-gpu scene_dda.blit_bg"),
3094            layout: &bgl_blit,
3095            entries: &[
3096                wgpu::BindGroupEntry {
3097                    binding: 0,
3098                    resource: framebuffer.as_entire_binding(),
3099                },
3100                wgpu::BindGroupEntry {
3101                    binding: 1,
3102                    resource: blit_dims.as_entire_binding(),
3103                },
3104            ],
3105        });
3106
3107        SceneDdaResources {
3108            storage_size: (width, height),
3109            framebuffer,
3110            uniform_buf,
3111            bgl_dda,
3112            pipeline_dda,
3113            blit_bg,
3114            pipeline_blit,
3115            depth_buffer,
3116            depth_readback,
3117        }
3118    }
3119
3120    /// Read back the per-pixel world-t depth at window pixel `(x, y)`
3121    /// from the last rendered frame, for screen→world picking. Returns
3122    /// the distance `t` along the (normalised) view ray to the nearest
3123    /// scene-grid surface, so the host reconstructs the world hit as
3124    /// `cam.pos + t * normalize(ray_dir)`. `None` for out-of-bounds
3125    /// pixels, sky / no-hit (the `T_INF` sentinel), or when no scene
3126    /// frame has been rendered.
3127    ///
3128    /// The depth buffer is the SCENE pass's output (terrain + grids),
3129    /// untouched by the sprite pass (which reads it read-only), so a
3130    /// cursor sprite under the pointer does not occlude the pick.
3131    ///
3132    /// Synchronous: copies the depth buffer to a mapped staging buffer
3133    /// and blocks on `device.poll(Wait)`. Cheap enough for click-time
3134    /// picks; do not call it every frame.
3135    ///
3136    /// Requires the last frame to have written depth, which happens
3137    /// when sprites are present (`write_depth`). The pick demo always
3138    /// has a cursor sprite, so this holds.
3139    ///
3140    /// Compiles on wasm, but the wasm facade never calls it: WebGPU's
3141    /// `device.poll` doesn't block for the GPU, so the blocking
3142    /// `recv()` here would hang the single browser thread. Picking is
3143    /// deferred on the wasm GPU path (the facade returns `None`).
3144    #[must_use]
3145    pub fn read_depth_pixel(&self, x: u32, y: u32) -> Option<f32> {
3146        let dda = self.scene_dda.as_ref()?;
3147        let (w, h) = dda.storage_size;
3148        if x >= w || y >= h {
3149            return None;
3150        }
3151        let mut enc = self
3152            .device
3153            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
3154                label: Some("roxlap-gpu depth readback"),
3155            });
3156        let size = u64::from(w) * u64::from(h) * 4;
3157        enc.copy_buffer_to_buffer(&dda.depth_buffer, 0, &dda.depth_readback, 0, size);
3158        self.queue.submit(std::iter::once(enc.finish()));
3159
3160        let slice = dda.depth_readback.slice(..);
3161        let (tx, rx) = std::sync::mpsc::channel();
3162        slice.map_async(wgpu::MapMode::Read, move |r| {
3163            let _ = tx.send(r);
3164        });
3165        self.device.poll(wgpu::PollType::wait_indefinitely()).ok();
3166        rx.recv().ok()?.ok()?;
3167
3168        let t = {
3169            let data = slice.get_mapped_range();
3170            let idx = ((y * w + x) * 4) as usize;
3171            let bytes: [u8; 4] = data[idx..idx + 4].try_into().ok()?;
3172            f32::from_le_bytes(bytes)
3173        };
3174        dda.depth_readback.unmap();
3175
3176        // Reject sky / no-hit (T_INF == 1e30 in the shader) + non-finite.
3177        if !t.is_finite() || t >= 1.0e29 {
3178            return None;
3179        }
3180        Some(t)
3181    }
3182
3183    /// World-space view-ray direction (un-normalised) for window pixel
3184    /// `(x, y)`, under the GPU marcher's projection — the canonical GPU
3185    /// unproject, mirroring `scene_dda.wgsl`'s `render_scene`
3186    /// (vertical-FOV pinhole). Uses the last-rendered frame's target
3187    /// size + FOV; `None` before the first scene render. Pair with
3188    /// [`Self::read_depth_pixel`] for screen→world picking.
3189    #[must_use]
3190    pub fn pixel_ray(
3191        &self,
3192        right: [f64; 3],
3193        down: [f64; 3],
3194        forward: [f64; 3],
3195        x: f64,
3196        y: f64,
3197    ) -> Option<[f64; 3]> {
3198        let dda = self.scene_dda.as_ref()?;
3199        let (w, h) = dda.storage_size;
3200        if w == 0 || h == 0 || self.last_fov_y_rad <= 0.0 {
3201            return None;
3202        }
3203        Some(pinhole_pixel_ray(
3204            right,
3205            down,
3206            forward,
3207            x,
3208            y,
3209            f64::from(w),
3210            f64::from(h),
3211            f64::from(self.last_fov_y_rad),
3212        ))
3213    }
3214
3215    /// GPU.10.1 — upload a sprite model registry + its instances for
3216    /// the DDA path. An empty instance slice clears all sprites.
3217    pub fn set_sprite_instances(
3218        &mut self,
3219        registry: &sprite_model::SpriteModelRegistry,
3220        instances: &[sprite_model::SpriteInstance],
3221    ) {
3222        if instances.is_empty() {
3223            self.sprite_registry = None;
3224            return;
3225        }
3226        self.sprite_registry = Some(sprite_model::SpriteRegistryResident::upload(
3227            &self.device,
3228            registry,
3229            instances,
3230        ));
3231    }
3232
3233    /// Re-pose the already-resident sprite instances in place (no model
3234    /// volume re-upload) — the cheap per-frame path for animated KFA
3235    /// limbs. `instances` must match the last [`Self::set_sprite_instances`]
3236    /// in length + order. No-op if no sprite registry is resident.
3237    pub fn update_sprite_instance_transforms(
3238        &mut self,
3239        instances: &[sprite_model::SpriteInstance],
3240    ) {
3241        if let Some(reg) = self.sprite_registry.as_mut() {
3242            reg.update_transforms(instances);
3243        }
3244    }
3245
3246    /// GPU.12 incremental — re-upload only LOD chain `chain_id`'s entries
3247    /// after an in-place edit of `registry` (carve / recolour), without
3248    /// rebuilding the whole sprite registry. `registry` must be the one
3249    /// last passed to [`Self::set_sprite_instances`] with chain
3250    /// `chain_id` already edited. No-op if no registry is resident.
3251    pub fn update_sprite_model(
3252        &mut self,
3253        registry: &sprite_model::SpriteModelRegistry,
3254        chain_id: u32,
3255    ) {
3256        if let Some(reg) = self.sprite_registry.as_mut() {
3257            reg.update_model(&self.device, &self.queue, registry, chain_id);
3258        }
3259    }
3260
3261    /// Set the per-instance `kv6colmul[256]` lighting tables (voxlap's
3262    /// `update_reflects` output, e.g. via `roxlap_core::sprite::
3263    /// sprite_colmul`), in the same order/length as the last
3264    /// [`Self::set_sprite_instances`]. The GPU sprite pass modulates each
3265    /// voxel by its surface normal's entry — matching the CPU rasteriser.
3266    /// No-op if no sprite registry is resident.
3267    pub fn set_sprite_instance_colmul(&mut self, tables: &[[u64; 256]]) {
3268        if let Some(reg) = self.sprite_registry.as_mut() {
3269            reg.set_instance_colmul(tables);
3270        }
3271    }
3272
3273    /// GPU.10.4 — set the LOD pixel threshold: a sprite steps to the
3274    /// next mip once a mip-0 voxel would project below `px` screen
3275    /// pixels. `1.0` is the natural "no sub-pixel voxels" default;
3276    /// larger values force LOD in closer (useful for inspection).
3277    /// Clamped to ≥ 0.25.
3278    pub fn set_sprite_lod_px(&mut self, px: f32) {
3279        self.sprite_lod_px = px.max(0.25);
3280    }
3281
3282    /// GPU.11.1 — set the scene-grid LOD scan distance (world units).
3283    /// A chunk entered at world-t `t` is marched at mip
3284    /// `floor(log2(max(t, msd) / msd))`, clamped to its grid's mip
3285    /// ladder. `0` disables LOD (always mip-0). Larger values push
3286    /// the coarser mips farther out — the axis-aligned-mip-beams
3287    /// mitigation lever (GPU.11.2). Default 64 (matches CPU
3288    /// `mip_scan_dist`).
3289    pub fn set_scene_mip_scan_dist(&mut self, dist: f32) {
3290        self.scene_mip_scan_dist = dist.max(0.0);
3291    }
3292
3293    /// Set per-face grid side-shading — voxlap's
3294    /// `setsideshades(top, bot, left, right, up, down)`. Each value is
3295    /// subtracted (as a u8, matching the CPU `gcsub` high byte) from a
3296    /// hit voxel's brightness byte before shading, so the scene-DDA pass
3297    /// darkens grid faces the same way the CPU rasteriser does. `[0; 6]`
3298    /// disables it (the default). The hit face is taken from the DDA's
3299    /// last-stepped axis + ray direction.
3300    pub fn set_scene_side_shades(&mut self, s: [i8; 6]) {
3301        // Reinterpret each i8 as u8 (voxlap stamps `sxx` into gcsub's
3302        // high byte verbatim), then pack (top, bot, left, right) /
3303        // (up, down, 0, 0) for the two uniform vec4s.
3304        let v = |i: usize| i32::from(s[i] as u8);
3305        self.scene_side_shades = [[v(0), v(1), v(2), v(3)], [v(4), v(5), 0, 0]];
3306    }
3307
3308    /// GPU.10.1 — build the instanced model-DDA pipeline (one thread
3309    /// per pixel). Lazily invoked the first frame a registry is present.
3310    fn build_sprite_model_dda(&self) -> SpriteModelDdaResources {
3311        let shader = self
3312            .device
3313            .create_shader_module(wgpu::ShaderModuleDescriptor {
3314                label: Some("sprite_model_dda.wgsl"),
3315                source: wgpu::ShaderSource::Wgsl(
3316                    include_str!("../shaders/sprite_model_dda.wgsl").into(),
3317                ),
3318            });
3319        let bgl = self
3320            .device
3321            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
3322                label: Some("roxlap-gpu sprite_model_dda.bgl"),
3323                entries: &[
3324                    bgl_uniform_entry(0),
3325                    bgl_storage_entry(1, true),  // occupancy
3326                    bgl_storage_entry(2, true),  // colors
3327                    bgl_storage_entry(3, true),  // color_offsets
3328                    bgl_storage_entry(4, true),  // model_meta
3329                    bgl_storage_entry(5, true),  // instances
3330                    bgl_storage_entry(6, true),  // scene depth
3331                    bgl_storage_entry(7, false), // framebuffer (read-write buffer)
3332                    bgl_storage_entry(8, true),  // tile_ranges
3333                    bgl_storage_entry(9, true),  // tile_instances
3334                    bgl_storage_entry(10, true), // per-voxel dir
3335                    bgl_storage_entry(11, true), // per-instance kv6colmul
3336                ],
3337            });
3338        let pl = self
3339            .device
3340            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
3341                label: Some("roxlap-gpu sprite_model_dda.layout"),
3342                bind_group_layouts: &[Some(&bgl)],
3343                immediate_size: 0,
3344            });
3345        let pipeline = self
3346            .device
3347            .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
3348                label: Some("roxlap-gpu sprite_model_dda.pipeline"),
3349                layout: Some(&pl),
3350                module: &shader,
3351                entry_point: Some("march"),
3352                compilation_options: wgpu::PipelineCompilationOptions::default(),
3353                cache: None,
3354            });
3355        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
3356            label: Some("roxlap-gpu sprite_model_dda.uniform"),
3357            size: std::mem::size_of::<SpriteModelUniform>() as u64,
3358            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
3359            mapped_at_creation: false,
3360        });
3361        SpriteModelDdaResources {
3362            bgl,
3363            pipeline,
3364            uniform_buf,
3365        }
3366    }
3367}
3368
3369/// GPU.11 — headless scene-DDA renderer for tests + offline visual
3370/// gates. Owns the `scene_dda.wgsl` compute pipeline with no surface
3371/// and no blit pass; renders a [`GpuSceneResident`] to an in-memory
3372/// RGBA framebuffer via texture readback. The per-substage visual
3373/// gate (render reference scenes, diff PPMs) and the GPU.11.1 mip
3374/// render-diff both ride on this.
3375pub struct HeadlessSceneRenderer {
3376    width: u32,
3377    height: u32,
3378    /// Framebuffer storage buffer (packed `rgba8unorm`, tight rows) —
3379    /// matches the buffer-output `scene_dda.wgsl` (see its note).
3380    framebuffer: wgpu::Buffer,
3381    depth_buffer: wgpu::Buffer,
3382    uniform_buf: wgpu::Buffer,
3383    _sky_texture: wgpu::Texture,
3384    sky_view: wgpu::TextureView,
3385    sky_sampler: wgpu::Sampler,
3386    bgl: wgpu::BindGroupLayout,
3387    pipeline: wgpu::ComputePipeline,
3388    readback: wgpu::Buffer,
3389    /// Per-face side-shades for the gate render (default none). Packed
3390    /// `[(top,bot,left,right), (up,down,_,_)]`; set via
3391    /// [`Self::set_side_shades`].
3392    side_shades: [[i32; 4]; 2],
3393}
3394
3395impl HeadlessSceneRenderer {
3396    /// Build the compute pipeline + output/readback resources for a
3397    /// `width × height` framebuffer. Validates `scene_dda.wgsl` and
3398    /// the [`scene::GridStaticMeta`] std430 layout at pipeline /
3399    /// bind-group time.
3400    #[must_use]
3401    pub fn new(device: &wgpu::Device, queue: &wgpu::Queue, width: u32, height: u32) -> Self {
3402        let framebuffer = device.create_buffer(&wgpu::BufferDescriptor {
3403            label: Some("roxlap-gpu headless.framebuffer"),
3404            size: u64::from(width) * u64::from(height) * 4,
3405            usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
3406            mapped_at_creation: false,
3407        });
3408
3409        let uniform_buf = device.create_buffer(&wgpu::BufferDescriptor {
3410            label: Some("roxlap-gpu headless.uniform"),
3411            size: std::mem::size_of::<SceneDdaUniform>() as u64,
3412            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
3413            mapped_at_creation: false,
3414        });
3415        let depth_buffer = device.create_buffer(&wgpu::BufferDescriptor {
3416            label: Some("roxlap-gpu headless.depth"),
3417            size: u64::from(width) * u64::from(height) * 4,
3418            usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
3419            mapped_at_creation: false,
3420        });
3421
3422        let default_sky_pixel = [120u8, 150, 220, 255];
3423        let (sky_texture, sky_view) = create_sky_texture(device, 1, 1, &default_sky_pixel);
3424        // Upload the default sky texel (create_sky_texture only allocates
3425        // — the texel must be written or the shader samples black, which
3426        // is why a grid-less headless render came back black).
3427        queue.write_texture(
3428            wgpu::TexelCopyTextureInfo {
3429                texture: &sky_texture,
3430                mip_level: 0,
3431                origin: wgpu::Origin3d::ZERO,
3432                aspect: wgpu::TextureAspect::All,
3433            },
3434            &default_sky_pixel,
3435            wgpu::TexelCopyBufferLayout {
3436                offset: 0,
3437                bytes_per_row: Some(4),
3438                rows_per_image: Some(1),
3439            },
3440            wgpu::Extent3d {
3441                width: 1,
3442                height: 1,
3443                depth_or_array_layers: 1,
3444            },
3445        );
3446        let sky_sampler = device.create_sampler(&wgpu::SamplerDescriptor {
3447            label: Some("roxlap-gpu headless.sky_sampler"),
3448            address_mode_u: wgpu::AddressMode::Repeat,
3449            address_mode_v: wgpu::AddressMode::Repeat,
3450            mag_filter: wgpu::FilterMode::Linear,
3451            min_filter: wgpu::FilterMode::Linear,
3452            ..Default::default()
3453        });
3454
3455        let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
3456            label: Some("scene_dda.wgsl (headless)"),
3457            source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/scene_dda.wgsl").into()),
3458        });
3459        let bgl = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
3460            label: Some("roxlap-gpu headless.bgl"),
3461            entries: &[
3462                bgl_uniform_entry(0),
3463                bgl_storage_entry(1, true),
3464                bgl_storage_entry(2, true),
3465                bgl_storage_entry(3, true),
3466                bgl_storage_entry(4, true),
3467                bgl_storage_entry(5, true),
3468                bgl_storage_entry(6, true),
3469                bgl_storage_entry(7, true),
3470                // Framebuffer storage buffer (read-write).
3471                bgl_storage_entry(8, false),
3472                wgpu::BindGroupLayoutEntry {
3473                    binding: 9,
3474                    visibility: wgpu::ShaderStages::COMPUTE,
3475                    ty: wgpu::BindingType::Texture {
3476                        sample_type: wgpu::TextureSampleType::Float { filterable: true },
3477                        view_dimension: wgpu::TextureViewDimension::D2,
3478                        multisampled: false,
3479                    },
3480                    count: None,
3481                },
3482                wgpu::BindGroupLayoutEntry {
3483                    binding: 10,
3484                    visibility: wgpu::ShaderStages::COMPUTE,
3485                    ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
3486                    count: None,
3487                },
3488                bgl_storage_entry(11, false),
3489                bgl_storage_entry(12, true),
3490                bgl_storage_entry(13, true),
3491                bgl_storage_entry(14, true),
3492                // Per-grid cameras (runtime-sized; one per grid).
3493                bgl_storage_entry(15, true),
3494            ],
3495        });
3496        let pl = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
3497            label: Some("roxlap-gpu headless.layout"),
3498            bind_group_layouts: &[Some(&bgl)],
3499            immediate_size: 0,
3500        });
3501        let pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
3502            label: Some("roxlap-gpu headless.pipeline"),
3503            layout: Some(&pl),
3504            module: &shader,
3505            entry_point: Some("render_scene"),
3506            compilation_options: wgpu::PipelineCompilationOptions::default(),
3507            cache: None,
3508        });
3509
3510        // Readback is a tight buffer-to-buffer copy (no 256-byte row
3511        // padding, unlike the old texture-to-buffer path).
3512        let readback = device.create_buffer(&wgpu::BufferDescriptor {
3513            label: Some("roxlap-gpu headless.readback"),
3514            size: u64::from(width) * u64::from(height) * 4,
3515            usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
3516            mapped_at_creation: false,
3517        });
3518
3519        Self {
3520            width,
3521            height,
3522            framebuffer,
3523            depth_buffer,
3524            uniform_buf,
3525            _sky_texture: sky_texture,
3526            sky_view,
3527            sky_sampler,
3528            bgl,
3529            pipeline,
3530            readback,
3531            side_shades: [[0; 4]; 2],
3532        }
3533    }
3534
3535    /// Set per-face side-shades for subsequent [`Self::render`] calls —
3536    /// voxlap `setsideshades(top, bot, left, right, up, down)`, each an
3537    /// i8 stamped as u8 (matching the engine path). Lets the gate test
3538    /// the GPU side-shade darkening.
3539    pub fn set_side_shades(&mut self, s: [i8; 6]) {
3540        let v = |i: usize| i32::from(s[i] as u8);
3541        self.side_shades = [[v(0), v(1), v(2), v(3)], [v(4), v(5), 0, 0]];
3542    }
3543
3544    /// Render `scene` from `cameras` (one per grid) and read the
3545    /// framebuffer back as `width*height` packed `0xAABBGGRR` pixels
3546    /// (R in the low byte). Fog is disabled. `mip_scan_dist` drives
3547    /// the GPU.11.1 scene-grid LOD (`0` = always mip-0). Blocks on
3548    /// readback.
3549    ///
3550    /// # Panics
3551    /// If `cameras.len() != scene.grid_count`.
3552    #[must_use]
3553    #[allow(clippy::too_many_arguments)]
3554    pub fn render(
3555        &self,
3556        device: &wgpu::Device,
3557        queue: &wgpu::Queue,
3558        scene: &GpuSceneResident,
3559        cameras: &[Camera],
3560        fov_y_rad: f32,
3561        max_outer_steps: u32,
3562        mip_scan_dist: f32,
3563    ) -> Vec<u32> {
3564        assert_eq!(
3565            cameras.len(),
3566            scene.grid_count as usize,
3567            "headless render: {} cameras for {} grids",
3568            cameras.len(),
3569            scene.grid_count,
3570        );
3571
3572        let cam_vec: Vec<SceneDdaPerGridCamera> = cameras
3573            .iter()
3574            .map(SceneDdaPerGridCamera::from_camera)
3575            .collect();
3576        let grid_cameras = upload_grid_cameras(device, &cam_vec);
3577        let uniform = SceneDdaUniform {
3578            fov_y_rad,
3579            grid_count: scene.grid_count,
3580            max_outer_steps,
3581            _pad0: 0,
3582            screen_size: [self.width, self.height],
3583            _pad1: [0; 2],
3584            // Fog off: near/far past any reachable t → factor 0.
3585            fog_color: [0.0, 0.0, 0.0, 1.0e29],
3586            fog_far: 1.0e30,
3587            write_depth: 0,
3588            occ_page_words: scene.occupancy_page_words,
3589            occ_num_pages: scene.occupancy_num_pages,
3590            mip_scan_dist,
3591            _pad2: 0,
3592            _pad3: 0,
3593            _pad4: 0,
3594            // Sky direction from the first grid camera (the world frame
3595            // in these tests); a default forward camera when there are
3596            // none (grid_count == 0) so the sky lookup stays valid.
3597            sky_cam: SceneDdaPerGridCamera::from_camera(&cameras.first().copied().unwrap_or(
3598                Camera {
3599                    position: [0.0; 3],
3600                    right: [1.0, 0.0, 0.0],
3601                    down: [0.0, 0.0, 1.0],
3602                    forward: [0.0, 1.0, 0.0],
3603                    fov_y_rad,
3604                },
3605            )),
3606            side_shades0: self.side_shades[0],
3607            side_shades1: self.side_shades[1],
3608        };
3609        queue.write_buffer(&self.uniform_buf, 0, bytemuck::bytes_of(&uniform));
3610
3611        let bg = device.create_bind_group(&wgpu::BindGroupDescriptor {
3612            label: Some("roxlap-gpu headless.bg"),
3613            layout: &self.bgl,
3614            entries: &[
3615                wgpu::BindGroupEntry {
3616                    binding: 0,
3617                    resource: self.uniform_buf.as_entire_binding(),
3618                },
3619                wgpu::BindGroupEntry {
3620                    binding: 1,
3621                    resource: scene.occupancy_pages[0].as_entire_binding(),
3622                },
3623                wgpu::BindGroupEntry {
3624                    binding: 2,
3625                    resource: scene.all_color_offsets.as_entire_binding(),
3626                },
3627                wgpu::BindGroupEntry {
3628                    binding: 3,
3629                    resource: scene.all_colors.as_entire_binding(),
3630                },
3631                wgpu::BindGroupEntry {
3632                    binding: 4,
3633                    resource: scene.all_chunk_colors_base.as_entire_binding(),
3634                },
3635                wgpu::BindGroupEntry {
3636                    binding: 5,
3637                    resource: scene.all_chunk_occupancy.as_entire_binding(),
3638                },
3639                wgpu::BindGroupEntry {
3640                    binding: 6,
3641                    resource: scene.grid_static_meta.as_entire_binding(),
3642                },
3643                wgpu::BindGroupEntry {
3644                    binding: 7,
3645                    resource: scene.all_slot_chunk_idx.as_entire_binding(),
3646                },
3647                wgpu::BindGroupEntry {
3648                    binding: 8,
3649                    resource: self.framebuffer.as_entire_binding(),
3650                },
3651                wgpu::BindGroupEntry {
3652                    binding: 9,
3653                    resource: wgpu::BindingResource::TextureView(&self.sky_view),
3654                },
3655                wgpu::BindGroupEntry {
3656                    binding: 10,
3657                    resource: wgpu::BindingResource::Sampler(&self.sky_sampler),
3658                },
3659                wgpu::BindGroupEntry {
3660                    binding: 11,
3661                    resource: self.depth_buffer.as_entire_binding(),
3662                },
3663                wgpu::BindGroupEntry {
3664                    binding: 12,
3665                    resource: scene.occupancy_pages[1].as_entire_binding(),
3666                },
3667                wgpu::BindGroupEntry {
3668                    binding: 13,
3669                    resource: scene.occupancy_pages[2].as_entire_binding(),
3670                },
3671                wgpu::BindGroupEntry {
3672                    binding: 14,
3673                    resource: scene.occupancy_pages[3].as_entire_binding(),
3674                },
3675                wgpu::BindGroupEntry {
3676                    binding: 15,
3677                    resource: grid_cameras.as_entire_binding(),
3678                },
3679            ],
3680        });
3681
3682        let mut enc =
3683            device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
3684        {
3685            let mut pass = enc.begin_compute_pass(&wgpu::ComputePassDescriptor {
3686                label: Some("roxlap-gpu headless.pass"),
3687                timestamp_writes: None,
3688            });
3689            pass.set_pipeline(&self.pipeline);
3690            pass.set_bind_group(0, &bg, &[]);
3691            pass.dispatch_workgroups(self.width.div_ceil(8), self.height.div_ceil(8), 1);
3692        }
3693        enc.copy_buffer_to_buffer(
3694            &self.framebuffer,
3695            0,
3696            &self.readback,
3697            0,
3698            u64::from(self.width) * u64::from(self.height) * 4,
3699        );
3700        queue.submit(Some(enc.finish()));
3701
3702        let slice = self.readback.slice(..);
3703        let (tx, rx) = std::sync::mpsc::channel();
3704        slice.map_async(wgpu::MapMode::Read, move |r| {
3705            let _ = tx.send(r);
3706        });
3707        device.poll(wgpu::PollType::wait_indefinitely()).ok();
3708        rx.recv().expect("map_async channel").expect("map_async");
3709
3710        let data = slice.get_mapped_range();
3711        // Tight `width*height` packed pixels — the shader's
3712        // `pack4x8unorm(vec4(r,g,b,a))` already yields `0xAABBGGRR`
3713        // little-endian, so a straight u32 read reconstructs each pixel.
3714        let out: Vec<u32> = data
3715            .chunks_exact(4)
3716            .map(|px| u32::from_le_bytes([px[0], px[1], px[2], px[3]]))
3717            .collect();
3718        drop(data);
3719        self.readback.unmap();
3720        out
3721    }
3722}
3723
3724fn bgl_uniform_entry(binding: u32) -> wgpu::BindGroupLayoutEntry {
3725    wgpu::BindGroupLayoutEntry {
3726        binding,
3727        visibility: wgpu::ShaderStages::COMPUTE,
3728        ty: wgpu::BindingType::Buffer {
3729            ty: wgpu::BufferBindingType::Uniform,
3730            has_dynamic_offset: false,
3731            min_binding_size: None,
3732        },
3733        count: None,
3734    }
3735}
3736
3737fn bgl_storage_entry(binding: u32, read_only: bool) -> wgpu::BindGroupLayoutEntry {
3738    wgpu::BindGroupLayoutEntry {
3739        binding,
3740        visibility: wgpu::ShaderStages::COMPUTE,
3741        ty: wgpu::BindingType::Buffer {
3742            ty: wgpu::BufferBindingType::Storage { read_only },
3743            has_dynamic_offset: false,
3744            min_binding_size: None,
3745        },
3746        count: None,
3747    }
3748}
3749
3750/// Create a fresh sky panorama texture sized `width × height` with
3751/// the initial pixel data uploaded via `write_texture`. Used by
3752/// `GpuRenderer::new` (1×1 default) and `set_sky_panorama` (host-
3753/// supplied panorama).
3754fn create_sky_texture(
3755    device: &wgpu::Device,
3756    width: u32,
3757    height: u32,
3758    _initial_pixels: &[u8],
3759) -> (wgpu::Texture, wgpu::TextureView) {
3760    let tex = device.create_texture(&wgpu::TextureDescriptor {
3761        label: Some("roxlap-gpu sky_texture"),
3762        size: wgpu::Extent3d {
3763            width,
3764            height,
3765            depth_or_array_layers: 1,
3766        },
3767        mip_level_count: 1,
3768        sample_count: 1,
3769        dimension: wgpu::TextureDimension::D2,
3770        format: wgpu::TextureFormat::Rgba8Unorm,
3771        usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST,
3772        view_formats: &[],
3773    });
3774    let view = tex.create_view(&wgpu::TextureViewDescriptor::default());
3775    (tex, view)
3776}
3777
3778/// GPU.4 needs to upload a whole grid (~hundreds of MiB) as a few
3779/// storage buffers. wgpu's default `max_storage_buffer_binding_size`
3780/// is 128 MiB, which is just enough for the demo's 32×32 ground
3781/// occupancy (~128 MiB) but not the colour array. We request as
3782/// much as the adapter is willing to give — most desktop GPUs cap
3783/// individual storage buffers at 2-4 GiB; iGPUs often offer the
3784/// full system memory.
3785pub(crate) fn pick_required_limits(adapter_limits: &wgpu::Limits) -> wgpu::Limits {
3786    wgpu::Limits {
3787        max_storage_buffer_binding_size: adapter_limits.max_storage_buffer_binding_size,
3788        max_buffer_size: adapter_limits.max_buffer_size,
3789        // Occupancy paging adds up to MAX_OCC_PAGES-1 extra storage
3790        // bindings; with the scene's other buffers + the GPU.9 depth
3791        // buffer the scene_dda stage needs ~11. The default cap is 8.
3792        // Both NVK and lavapipe advertise ≫16, so request 16.
3793        max_storage_buffers_per_shader_stage: adapter_limits
3794            .max_storage_buffers_per_shader_stage
3795            .min(16),
3796        ..wgpu::Limits::default()
3797    }
3798}
3799
3800fn pick_present_mode(modes: &[wgpu::PresentMode]) -> wgpu::PresentMode {
3801    // Prefer Mailbox > Immediate > Fifo. Fifo is the universal
3802    // fallback and the only one Wayland-on-Mesa always offers.
3803    for &m in &[wgpu::PresentMode::Mailbox, wgpu::PresentMode::Immediate] {
3804        if modes.contains(&m) {
3805            return m;
3806        }
3807    }
3808    wgpu::PresentMode::Fifo
3809}
3810
3811/// World-space view-ray direction (un-normalised) for window pixel
3812/// `(x, y)` under a vertical-FOV pinhole — the projection
3813/// `scene_dda.wgsl`'s `render_scene` uses. Shared by
3814/// [`GpuRenderer::pixel_ray`]; standalone so it's unit-testable without
3815/// a device. `right`/`down`/`forward` are the camera basis.
3816#[must_use]
3817#[allow(clippy::too_many_arguments)]
3818pub fn pinhole_pixel_ray(
3819    right: [f64; 3],
3820    down: [f64; 3],
3821    forward: [f64; 3],
3822    x: f64,
3823    y: f64,
3824    w: f64,
3825    h: f64,
3826    fov_y_rad: f64,
3827) -> [f64; 3] {
3828    let half_h = (fov_y_rad * 0.5).tan();
3829    let half_w = half_h * (w / h);
3830    let ndc_x = (x + 0.5) / w * 2.0 - 1.0;
3831    let ndc_y_top = 1.0 - (y + 0.5) / h * 2.0;
3832    let (kx, ky) = (ndc_x * half_w, ndc_y_top * half_h);
3833    [
3834        forward[0] + kx * right[0] - ky * down[0],
3835        forward[1] + kx * right[1] - ky * down[1],
3836        forward[2] + kx * right[2] - ky * down[2],
3837    ]
3838}
3839
3840#[cfg(test)]
3841mod pixel_ray_tests {
3842    use super::pinhole_pixel_ray;
3843
3844    const RIGHT: [f64; 3] = [1.0, 0.0, 0.0];
3845    const DOWN: [f64; 3] = [0.0, 1.0, 0.0];
3846    const FWD: [f64; 3] = [0.0, 0.0, 1.0]; // voxlap z-down "look down"
3847
3848    // Frame centre (NDC 0,0) points straight along `forward`.
3849    #[test]
3850    fn centre_pixel_is_forward() {
3851        let d = pinhole_pixel_ray(
3852            RIGHT,
3853            DOWN,
3854            FWD,
3855            639.5,
3856            359.5,
3857            1280.0,
3858            720.0,
3859            60_f64.to_radians(),
3860        );
3861        assert!(
3862            d[0].abs() < 1e-9 && d[1].abs() < 1e-9,
3863            "centre ≈ forward, got {d:?}"
3864        );
3865        assert!((d[2] - 1.0).abs() < 1e-9);
3866    }
3867
3868    // Right edge pixel tilts +right by tan(hfov/2); the lateral
3869    // component equals half_w = tan(fov_y/2)*aspect at the very edge.
3870    #[test]
3871    fn right_edge_tilts_by_half_w() {
3872        let fov = 60_f64.to_radians();
3873        let d = pinhole_pixel_ray(RIGHT, DOWN, FWD, 1279.5, 359.5, 1280.0, 720.0, fov);
3874        let half_w = (fov * 0.5).tan() * (1280.0 / 720.0);
3875        assert!((d[0] - half_w).abs() < 1e-6, "x={}, half_w={half_w}", d[0]);
3876        assert!(d[0] > 0.0, "right edge tilts +right");
3877    }
3878
3879    /// Statically validate every WGSL shader with naga (the same
3880    /// front-end + validator wgpu runs at pipeline creation), so shader
3881    /// edits — e.g. the GPU.10 sprite lighting bindings — are caught in
3882    /// CI without needing a GPU device.
3883    #[test]
3884    fn wgsl_shaders_validate() {
3885        let shaders: &[(&str, &str)] = &[
3886            (
3887                "sprite_model_dda.wgsl",
3888                include_str!("../shaders/sprite_model_dda.wgsl"),
3889            ),
3890            ("scene_dda.wgsl", include_str!("../shaders/scene_dda.wgsl")),
3891            ("blit.wgsl", include_str!("../shaders/blit.wgsl")),
3892            ("chunk_dda.wgsl", include_str!("../shaders/chunk_dda.wgsl")),
3893            ("grid_dda.wgsl", include_str!("../shaders/grid_dda.wgsl")),
3894            (
3895                "scene_blit.wgsl",
3896                include_str!("../shaders/scene_blit.wgsl"),
3897            ),
3898            ("line.wgsl", include_str!("../shaders/line.wgsl")),
3899            ("image.wgsl", include_str!("../shaders/image.wgsl")),
3900        ];
3901        let mut validator = naga::valid::Validator::new(
3902            naga::valid::ValidationFlags::all(),
3903            naga::valid::Capabilities::all(),
3904        );
3905        for (name, src) in shaders {
3906            let module = naga::front::wgsl::parse_str(src).unwrap_or_else(|e| {
3907                panic!("{name}: WGSL parse failed:\n{}", e.emit_to_string(src))
3908            });
3909            validator
3910                .validate(&module)
3911                .unwrap_or_else(|e| panic!("{name}: WGSL validation failed: {e:?}"));
3912        }
3913    }
3914
3915    /// A 2×2 world quad centred straight ahead projects to vertices whose
3916    /// homogeneous `w` equals the camera-forward distance (so the shader's
3917    /// `clip = ndc·w` recovers perspective-correct UVs) and whose `depth`
3918    /// is the euclidean range. Verifies geometry without a GPU device.
3919    #[test]
3920    fn image_vertices_carry_forward_w_and_euclidean_depth() {
3921        let cam = crate::GpuLineCamera {
3922            pos: [0.0, 0.0, 0.0],
3923            right: [1.0, 0.0, 0.0],
3924            down: [0.0, 1.0, 0.0],
3925            forward: [0.0, 0.0, 1.0],
3926        };
3927        // Quad 10 units ahead (forward = +Z), spanning x∈[-1,1], y∈[-1,1].
3928        let quad = crate::GpuImageQuad {
3929            corners: [
3930                [-1.0, -1.0, 10.0], // TL
3931                [1.0, -1.0, 10.0],  // TR
3932                [-1.0, 1.0, 10.0],  // BL
3933                [1.0, 1.0, 10.0],   // BR
3934            ],
3935            image: 0,
3936            tint: [1.0, 1.0, 1.0, 1.0],
3937            depth_test: true,
3938        };
3939        let verts = crate::build_image_vertices(&cam, &quad, 800, 600, 60_f32.to_radians());
3940        assert_eq!(verts.len(), 6, "two triangles, no near-clip");
3941        for v in &verts {
3942            assert!((v.w - 10.0).abs() < 1e-4, "w == forward distance");
3943            assert!(v.depth >= 10.0, "euclidean depth >= forward distance");
3944            assert_eq!(v.depth_test, 1.0);
3945        }
3946    }
3947}