Skip to main content

roxlap_gpu/
lib.rs

1//! WGPU-backed compute-shader renderer scaffold for the roxlap
2//! voxel engine. GPU.1 in `PORTING-GPU.md`.
3//!
4//! GPU.1's job: stand up the device + surface + swapchain on a
5//! host window (any [`raw-window-handle`](raw_window_handle)
6//! provider), present a clear-to-colour frame each render call,
7//! and give the host a one-call opt-in. No voxel marching yet — the
8//! [`examples/probe.rs`](../examples/probe.rs) standalone holds
9//! the empirical FPS baseline from GPU.0.
10//!
11//! Later sub-substages flesh `GpuRenderer::render` out: GPU.2
12//! uploads voxel data, GPU.3 dispatches the inner-DDA compute
13//! shader, GPU.4 layers in chunk skipping, GPU.5 plugs the renderer
14//! into `roxlap-scene::Scene`, …
15//!
16//! ## Host integration shape (GPU.1)
17//!
18//! ```no_run
19//! use std::sync::Arc;
20//! use roxlap_gpu::{GpuRenderer, GpuRendererSettings};
21//! # use winit::window::Window;
22//! # fn pick(w: Arc<Window>, size: (u32, u32)) -> Option<GpuRenderer> {
23//! match GpuRenderer::new_blocking(w, size, GpuRendererSettings::default()) {
24//!     Ok(r) => Some(r),
25//!     Err(e) => {
26//!         eprintln!("GPU init failed: {e}; falling back to CPU");
27//!         None
28//!     }
29//! }
30//! # }
31//! ```
32
33#![allow(clippy::must_use_candidate, clippy::too_many_lines)]
34
35pub mod camera;
36pub mod decompress;
37pub mod grid;
38// Headless rendering is a native-only test/bench aid: it blocks on
39// `pollster` + `device.poll(Wait)`, neither of which exists on wasm.
40#[cfg(not(target_arch = "wasm32"))]
41pub mod headless;
42pub mod resident;
43pub mod scene;
44pub mod sprite_model;
45
46pub use camera::Camera;
47pub use decompress::{decompress_chunk, ChunkUpload, BEDROCK_RGB, CHUNK_Z};
48pub use grid::{bounding_box_of, GpuGridResident, GridUpload};
49#[cfg(not(target_arch = "wasm32"))]
50pub use headless::HeadlessGpu;
51pub use resident::GpuChunkResident;
52pub use scene::{
53    GpuSceneResident, GridRuntimeTransform, GridStaticMeta, RefreshOutcome, SceneUpload,
54};
55pub use sprite_model::{
56    build_sprite_model, SpriteInstance, SpriteInstanceTransform, SpriteModel, SpriteModelRegistry,
57    SpriteRegistryResident,
58};
59
60use std::sync::Arc;
61
62use bytemuck::{Pod, Zeroable};
63use raw_window_handle::{HasDisplayHandle, HasWindowHandle};
64
65/// Caller-controllable knobs for [`GpuRenderer::new`]. Defaults
66/// target "highest-performance GPU, prefer Mailbox/Immediate over
67/// vsync" — i.e. the same configuration the GPU.0 probe used to
68/// measure the FPS ceiling.
69#[derive(Debug, Clone, Copy)]
70pub struct GpuRendererSettings {
71    pub power_preference: PowerPreference,
72    /// Initial clear colour cycled by GPU.1's empty render path.
73    /// The voxel-rendering substages overwrite this entirely.
74    pub clear_colour: [f64; 3],
75    /// Prefer mailbox/immediate when offered; falls back to FIFO if
76    /// the surface only supports it (Wayland under Mesa often does).
77    pub uncapped_present: bool,
78}
79
80#[derive(Debug, Clone, Copy)]
81pub enum PowerPreference {
82    Low,
83    High,
84}
85
86impl Default for GpuRendererSettings {
87    fn default() -> Self {
88        Self {
89            power_preference: PowerPreference::High,
90            clear_colour: [0.06, 0.08, 0.12],
91            uncapped_present: true,
92        }
93    }
94}
95
96/// Errors `GpuRenderer::new` surfaces to the host. The host's
97/// expected flow is "try this, fall back to the CPU path on Err".
98#[derive(Debug)]
99pub enum GpuInitError {
100    CreateSurface(wgpu::CreateSurfaceError),
101    NoAdapter,
102    RequestDevice(wgpu::RequestDeviceError),
103}
104
105impl std::fmt::Display for GpuInitError {
106    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
107        match self {
108            Self::CreateSurface(e) => write!(f, "create_surface failed: {e}"),
109            Self::NoAdapter => write!(
110                f,
111                "no compatible adapter — does this system have a Vulkan/Metal/DX12 driver?"
112            ),
113            Self::RequestDevice(e) => write!(f, "request_device failed: {e}"),
114        }
115    }
116}
117
118impl std::error::Error for GpuInitError {
119    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
120        match self {
121            Self::CreateSurface(e) => Some(e),
122            Self::RequestDevice(e) => Some(e),
123            Self::NoAdapter => None,
124        }
125    }
126}
127
128impl From<wgpu::CreateSurfaceError> for GpuInitError {
129    fn from(value: wgpu::CreateSurfaceError) -> Self {
130        Self::CreateSurface(value)
131    }
132}
133
134impl From<wgpu::RequestDeviceError> for GpuInitError {
135    fn from(value: wgpu::RequestDeviceError) -> Self {
136        Self::RequestDevice(value)
137    }
138}
139
140/// WGPU-backed renderer. Owns the device, queue, and surface
141/// bound to the host's window. [`Self::render`] is the GPU.1
142/// clear-to-colour path; [`Self::render_chunk`] is GPU.3's
143/// single-chunk DDA marcher.
144///
145/// The window is consumed only at construction — `wgpu`'s
146/// `Surface<'static>` keeps its own `Arc` clone of the handle, so
147/// the renderer holds no window field of its own.
148/// A world-space line segment for [`GpuRenderer::draw_lines_deferred`].
149/// `color` is straight RGBA in `0..=1` (the alpha drives the over-blend);
150/// `width_px` is the screen-space thickness; `depth_test` occludes the
151/// segment behind nearer marched geometry.
152#[derive(Clone, Copy, Debug)]
153pub struct GpuLine {
154    pub a: [f32; 3],
155    pub b: [f32; 3],
156    pub color: [f32; 4],
157    pub width_px: f32,
158    pub depth_test: bool,
159}
160
161/// World camera basis for projecting [`GpuLine`] endpoints — the same
162/// pinhole the scene-DDA pass marches with (`right`/`down`/`forward`
163/// orthonormal, `pos` in world voxel units).
164#[derive(Clone, Copy, Debug)]
165pub struct GpuLineCamera {
166    pub pos: [f32; 3],
167    pub right: [f32; 3],
168    pub down: [f32; 3],
169    pub forward: [f32; 3],
170}
171
172/// Near plane (camera-forward distance) below which a [`GpuLine`] endpoint
173/// is clipped, so the pinhole divide stays finite.
174const LINE_NEAR_Z: f32 = 0.0625;
175/// Depth-test slack (euclidean world distance) so a line resting on the
176/// surface it traces doesn't z-fight the marched geometry.
177const LINE_DEPTH_BIAS: f32 = 0.5;
178
179/// One expanded-quad vertex (`build_line_vertices` output). `pos` is NDC;
180/// `depth` is the euclidean world distance of the source endpoint (the
181/// marcher's `best_t` metric); `depth_test` is `1.0`/`0.0`.
182#[repr(C)]
183#[derive(Clone, Copy, Pod, Zeroable)]
184struct LineVertex {
185    pos: [f32; 2],
186    depth: f32,
187    depth_test: f32,
188    color: [f32; 4],
189}
190
191/// `line.wgsl` fragment uniform (std140; 16 bytes).
192#[repr(C)]
193#[derive(Clone, Copy, Pod, Zeroable)]
194struct LineParams {
195    screen_w: u32,
196    screen_h: u32,
197    depth_bias: f32,
198    no_depth: u32,
199}
200
201/// Lazy-built debug-line pipeline (L3.2). The bind group is rebuilt each
202/// draw (it references the current `scene_dda.depth_buffer`, which the
203/// swapchain resize recreates); the pipeline / layout / uniform persist.
204struct LineResources {
205    pipeline: wgpu::RenderPipeline,
206    bgl: wgpu::BindGroupLayout,
207    uniform_buf: wgpu::Buffer,
208    /// 1-word stand-in bound when no scene depth exists (sprite-only /
209    /// empty scene); `no_depth = 1` keeps the shader from indexing it.
210    dummy_depth: wgpu::Buffer,
211}
212
213/// Project + expand world-space [`GpuLine`]s into screen-space quad
214/// vertices (6 per visible segment) for `line.wgsl`. Mirrors the
215/// scene-DDA pinhole (`forward + ndc_x·half_w·right − ndc_y·half_h·down`)
216/// so lines land on the marched geometry, carrying each endpoint's
217/// euclidean world distance as the depth-test key (= the marcher's
218/// `best_t`). Segments fully behind the near plane are dropped; the rest
219/// are clipped to it.
220fn build_line_vertices(
221    cam: &GpuLineCamera,
222    lines: &[GpuLine],
223    w: u32,
224    h: u32,
225    fov_y: f32,
226) -> Vec<LineVertex> {
227    let aspect = w as f32 / h as f32;
228    let half_h = (fov_y * 0.5).tan();
229    let half_w = half_h * aspect;
230    let (wf, hf) = (w as f32, h as f32);
231
232    let cam_coords = |p: [f32; 3]| -> [f32; 3] {
233        let d = [p[0] - cam.pos[0], p[1] - cam.pos[1], p[2] - cam.pos[2]];
234        [
235            cam.right[0] * d[0] + cam.right[1] * d[1] + cam.right[2] * d[2],
236            cam.down[0] * d[0] + cam.down[1] * d[1] + cam.down[2] * d[2],
237            cam.forward[0] * d[0] + cam.forward[1] * d[1] + cam.forward[2] * d[2],
238        ]
239    };
240    // Camera-space point → (NDC xy, euclidean depth). NDC y is up (+1 top),
241    // matching WebGPU clip space; depth is the marcher's world-t metric.
242    let project = |q: [f32; 3]| -> ([f32; 2], f32) {
243        let inv = 1.0 / q[2];
244        let nx = q[0] * inv / half_w;
245        let ny = -q[1] * inv / half_h;
246        let depth = (q[0] * q[0] + q[1] * q[1] + q[2] * q[2]).sqrt();
247        ([nx, ny], depth)
248    };
249
250    let mut out = Vec::with_capacity(lines.len() * 6);
251    for line in lines {
252        let ca = cam_coords(line.a);
253        let cb = cam_coords(line.b);
254        let (cfa, cfb) = (ca[2], cb[2]);
255        if cfa < LINE_NEAR_Z && cfb < LINE_NEAR_Z {
256            continue;
257        }
258        // Near-clip in segment-parameter space on the forward component.
259        let (mut t0, mut t1) = (0.0f32, 1.0f32);
260        let dz = cfb - cfa;
261        if dz.abs() > f32::EPSILON {
262            let tn = (LINE_NEAR_Z - cfa) / dz;
263            if dz > 0.0 {
264                t0 = t0.max(tn);
265            } else {
266                t1 = t1.min(tn);
267            }
268        }
269        if t0 > t1 {
270            continue;
271        }
272        let lerp3 = |t: f32| {
273            [
274                ca[0] + (cb[0] - ca[0]) * t,
275                ca[1] + (cb[1] - ca[1]) * t,
276                ca[2] + (cb[2] - ca[2]) * t,
277            ]
278        };
279        let (n0, d0) = project(lerp3(t0));
280        let (n1, d1) = project(lerp3(t1));
281
282        // Expand in pixel space for a uniform screen-space thickness.
283        let to_px = |n: [f32; 2]| [(n[0] * 0.5 + 0.5) * wf, (0.5 - n[1] * 0.5) * hf];
284        let to_ndc = |p: [f32; 2]| [p[0] / wf * 2.0 - 1.0, 1.0 - p[1] / hf * 2.0];
285        let p0 = to_px(n0);
286        let p1 = to_px(n1);
287        let (dx, dy) = (p1[0] - p0[0], p1[1] - p0[1]);
288        let len = (dx * dx + dy * dy).sqrt().max(1e-6);
289        let half = line.width_px.max(1.0) * 0.5;
290        let (ex, ey) = (-dy / len * half, dx / len * half);
291
292        let c0a = to_ndc([p0[0] + ex, p0[1] + ey]);
293        let c0b = to_ndc([p0[0] - ex, p0[1] - ey]);
294        let c1a = to_ndc([p1[0] + ex, p1[1] + ey]);
295        let c1b = to_ndc([p1[0] - ex, p1[1] - ey]);
296        let dt = if line.depth_test { 1.0 } else { 0.0 };
297        let vert = |pos: [f32; 2], depth: f32| LineVertex {
298            pos,
299            depth,
300            depth_test: dt,
301            color: line.color,
302        };
303        // Two triangles, cull disabled so winding is irrelevant.
304        out.push(vert(c0a, d0));
305        out.push(vert(c0b, d0));
306        out.push(vert(c1a, d1));
307        out.push(vert(c1a, d1));
308        out.push(vert(c0b, d0));
309        out.push(vert(c1b, d1));
310    }
311    out
312}
313
314pub struct GpuRenderer {
315    surface: wgpu::Surface<'static>,
316    surface_config: wgpu::SurfaceConfiguration,
317    device: wgpu::Device,
318    queue: wgpu::Queue,
319    adapter_info: String,
320    clear_colour: [f64; 3],
321    frame_count: u32,
322    /// Lazy-built on first [`Self::render_chunk`] call; rebuilt when
323    /// the swapchain resizes (storage texture must match).
324    chunk_dda: Option<ChunkDdaResources>,
325    /// Lazy-built on first [`Self::render_grid`] call; same resize
326    /// trigger as `chunk_dda`. The two paths share the same blit
327    /// pipeline structure but bind different storage layouts.
328    grid_dda: Option<GridDdaResources>,
329    /// Lazy-built on first [`Self::render_scene`] call. Holds the
330    /// multi-grid pipeline + per-grid camera uniforms.
331    scene_dda: Option<SceneDdaResources>,
332    /// GPU.8 — panoramic sky texture + sampler. Created at
333    /// `new` as a 1×1 mid-grey default; [`Self::set_sky_panorama`]
334    /// replaces it. The scene-DDA bind group references this each
335    /// frame.
336    sky_texture: wgpu::Texture,
337    sky_view: wgpu::TextureView,
338    sky_sampler: wgpu::Sampler,
339    /// GPU.8 fog state. `color` is BGRA-style premultiplied (each
340    /// channel in [0, 1]); `near` is the world-t distance at which
341    /// fog starts kicking in; `far` is the distance at which it's
342    /// fully opaque. The shader does
343    /// `mix(hit, fog, smoothstep(near, far, t))`.
344    fog_color: [f32; 3],
345    fog_near: f32,
346    fog_far: f32,
347    /// GPU.10 — sprites rendered as DDA-marched voxel models (the
348    /// precise path; the GPU.9 compute splatter it replaced was
349    /// retired in 10.5). Holds the concatenated model registry + the
350    /// per-frame instance array; set via [`Self::set_sprite_instances`].
351    sprite_registry: Option<sprite_model::SpriteRegistryResident>,
352    /// Lazy-built pipeline + uniform for the model-DDA pass.
353    sprite_model_dda: Option<SpriteModelDdaResources>,
354    /// GPU.10.4 — LOD aggressiveness: step a sprite to the next mip
355    /// once a mip-0 voxel projects below this many screen pixels.
356    /// Defaults to 4.0 (the empirical sweet spot); the host can tune
357    /// via [`Self::set_sprite_lod_px`].
358    sprite_lod_px: f32,
359    /// GPU.11.1 — scene-grid LOD scan distance (world units). A chunk
360    /// entered at world-t `t` is marched at the mip level
361    /// `floor(log2(max(t, msd) / msd))`, clamped to the grid's mip
362    /// ladder. `0` disables LOD (always mip-0). Tunable via
363    /// [`Self::set_scene_mip_scan_dist`] — the axis-aligned-mip-beams
364    /// mitigation (GPU.11.2) pushes it outward if banding appears.
365    scene_mip_scan_dist: f32,
366    /// Per-face grid side-shades (voxlap setsideshades), packed for the
367    /// scene-DDA uniform: `[0]=(top,bot,left,right)`, `[1]=(up,down,_,_)`.
368    /// Each is the u8 shade intensity. `[[0;4];2]` = no shading. Set via
369    /// [`Self::set_scene_side_shades`].
370    scene_side_shades: [[i32; 4]; 2],
371    /// Vertical FOV (radians) the last `render_scene` marched with —
372    /// cached so [`Self::pixel_ray`] reconstructs the matching view ray
373    /// for picking. `0` until the first scene render.
374    last_fov_y_rad: f32,
375    /// The acquired-but-not-yet-presented swapchain frame from the most
376    /// recent deferred render ([`Self::render_scene`] /
377    /// [`Self::render_clear_deferred`]). [`Self::present`] shows it as
378    /// is; [`Self::paint_egui`] overlays egui first. Lets a host slot a
379    /// UI pass between the marcher and present. `None` between present
380    /// and the next render.
381    pending_frame: Option<(wgpu::SurfaceTexture, wgpu::TextureView)>,
382    /// Lazy-built debug-line pipeline (L3.2) — built on the first
383    /// [`Self::draw_lines_deferred`] call.
384    line_resources: Option<LineResources>,
385    /// Persistent debug-line vertex buffer (L3.3) — grown on demand and
386    /// reused across frames so a per-frame overlay (hundreds of segments)
387    /// costs one `write_buffer`, not a fresh allocation. `line_vbuf_cap`
388    /// is its capacity in bytes.
389    line_vbuf: Option<wgpu::Buffer>,
390    line_vbuf_cap: u64,
391    /// Lazy-built `egui-wgpu` paint pipeline; created on the first
392    /// [`Self::paint_egui`] call (`hud` feature).
393    #[cfg(feature = "hud")]
394    egui_renderer: Option<egui_wgpu::Renderer>,
395}
396
397/// Per-renderer chunk-DDA pipeline state. The compute shader writes
398/// into the storage texture; a fullscreen-triangle render pass
399/// nearest-neighbour blits it to the swapchain.
400struct ChunkDdaResources {
401    storage_size: (u32, u32),
402    storage_view: wgpu::TextureView,
403    uniform_buf: wgpu::Buffer,
404    bgl_dda: wgpu::BindGroupLayout,
405    pipeline_dda: wgpu::ComputePipeline,
406    blit_bg: wgpu::BindGroup,
407    pipeline_blit: wgpu::RenderPipeline,
408    // wgpu BindGroups internally Arc their resources, but we keep
409    // the handle so the sampler shows up in profiler dumps.
410    _sampler: wgpu::Sampler,
411}
412
413struct GridDdaResources {
414    storage_size: (u32, u32),
415    storage_view: wgpu::TextureView,
416    uniform_buf: wgpu::Buffer,
417    bgl_dda: wgpu::BindGroupLayout,
418    pipeline_dda: wgpu::ComputePipeline,
419    blit_bg: wgpu::BindGroup,
420    pipeline_blit: wgpu::RenderPipeline,
421    _sampler: wgpu::Sampler,
422}
423
424struct SceneDdaResources {
425    storage_size: (u32, u32),
426    /// Framebuffer as a packed-`rgba8unorm` storage **buffer** (row
427    /// stride = width), written by the scene + sprite compute passes
428    /// and read by the blit. A buffer (not a storage texture) dodges
429    /// Chrome-Dawn's tiled write-texture layout (which produced a
430    /// 128×256-tiled image); linear + explicit stride is portable.
431    framebuffer: wgpu::Buffer,
432    uniform_buf: wgpu::Buffer,
433    bgl_dda: wgpu::BindGroupLayout,
434    pipeline_dda: wgpu::ComputePipeline,
435    blit_bg: wgpu::BindGroup,
436    pipeline_blit: wgpu::RenderPipeline,
437    /// GPU.9 — per-pixel world-t depth (f32 bits as u32), sized
438    /// `width * height * 4`. The scene pass writes it when sprites
439    /// are present; the sprite model-DDA pass reads + composites
440    /// against it.
441    depth_buffer: wgpu::Buffer,
442    /// Picking — a `COPY_DST | MAP_READ` staging copy of `depth_buffer`
443    /// so the host can read back the per-pixel world-t after a frame
444    /// (e.g. click → which voxel). Same size as `depth_buffer`.
445    depth_readback: wgpu::Buffer,
446}
447
448/// GPU.10.0 — single-sprite model-DDA pipeline: one thread per pixel
449/// marches the model voxel volume and composites against the scene
450/// depth buffer.
451struct SpriteModelDdaResources {
452    bgl: wgpu::BindGroupLayout,
453    pipeline: wgpu::ComputePipeline,
454    uniform_buf: wgpu::Buffer,
455}
456
457/// Per-frame uniform for the model-DDA pass. Mirrors `Uniform` in
458/// `sprite_model_dda.wgsl` (std140). Per-model + per-instance data
459/// now live in storage buffers; this holds only the camera, fog, and
460/// instance count.
461#[repr(C)]
462#[derive(Clone, Copy, Pod, Zeroable)]
463struct SpriteModelUniform {
464    cam_pos: [f32; 3],
465    _p0: f32,
466    cam_right: [f32; 3],
467    _p1: f32,
468    cam_down: [f32; 3],
469    _p2: f32,
470    cam_forward: [f32; 3],
471    _p3: f32,
472    fog_color: [f32; 4],
473    screen_size: [u32; 2],
474    instance_count: u32,
475    fog_far: f32,
476    fov_y_rad: f32,
477    tiles_x: u32,
478    tile_size: u32,
479    _p6: f32,
480}
481
482/// GPU.10.3 — sprite screen-tile edge in pixels for instance binning.
483const SPRITE_TILE_SIZE: u32 = 16;
484
485/// Build the per-grid camera storage buffer bound at `scene_dda.wgsl`
486/// binding 15 (read-only). One [`SceneDdaPerGridCamera`] per grid; the
487/// shader only indexes `0..grid_count`. An empty scene pads to one
488/// zeroed element (wgpu rejects a zero-sized storage binding). This
489/// replaces the old fixed `[…; 16]` uniform array, so a scene can hold
490/// any number of grids — the only ceiling is the device's storage size.
491fn upload_grid_cameras(device: &wgpu::Device, cams: &[SceneDdaPerGridCamera]) -> wgpu::Buffer {
492    use wgpu::util::DeviceExt;
493    let one = [SceneDdaPerGridCamera::zeroed()];
494    let src: &[SceneDdaPerGridCamera] = if cams.is_empty() { &one } else { cams };
495    device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
496        label: Some("roxlap-gpu scene_dda.grid_cameras"),
497        contents: bytemuck::cast_slice(src),
498        usage: wgpu::BufferUsages::STORAGE,
499    })
500}
501
502// The scene_dda bind group + layout wire occupancy pages 1..=3 at
503// bindings 12..=14 explicitly; keep that in lockstep with the page
504// count. Bump the bindings (here, in the WGSL, and in the bind
505// group) if MAX_OCC_PAGES changes.
506const _: () = assert!(scene::MAX_OCC_PAGES == 4);
507
508#[repr(C)]
509#[derive(Clone, Copy, Pod, Zeroable)]
510struct SceneDdaPerGridCamera {
511    pos: [f32; 3],
512    _pad0: f32,
513    right: [f32; 3],
514    _pad1: f32,
515    down: [f32; 3],
516    _pad2: f32,
517    forward: [f32; 3],
518    _pad3: f32,
519}
520
521impl SceneDdaPerGridCamera {
522    fn from_camera(c: &Camera) -> Self {
523        Self {
524            pos: c.position,
525            _pad0: 0.0,
526            right: c.right,
527            _pad1: 0.0,
528            down: c.down,
529            _pad2: 0.0,
530            forward: c.forward,
531            _pad3: 0.0,
532        }
533    }
534}
535
536#[repr(C)]
537#[derive(Clone, Copy, Pod, Zeroable)]
538struct SceneDdaUniform {
539    fov_y_rad: f32,
540    grid_count: u32,
541    max_outer_steps: u32,
542    _pad0: u32,
543    screen_size: [u32; 2],
544    _pad1: [u32; 2],
545    /// GPU.8 — `[r, g, b, fog_near]`. The `near` distance is packed
546    /// into the colour's alpha channel to keep std140 alignment
547    /// tidy (a bare `f32` after the `vec4` would force extra pads).
548    fog_color: [f32; 4],
549    fog_far: f32,
550    /// GPU.9 — `1` when the sprite pass is active (scene pass then
551    /// records `best_t` into the depth buffer), `0` otherwise.
552    write_depth: u32,
553    /// Occupancy paging: words per storage page (see
554    /// `scene::split_occupancy_pages`). Only consulted by the shader
555    /// when `occ_num_pages > 1`.
556    occ_page_words: u32,
557    /// Number of real occupancy pages (1 on multi-GiB GPUs → the
558    /// shader takes a branch-free single-page read).
559    occ_num_pages: u32,
560    /// GPU.11.1 — scene-grid LOD scan distance (world units). A chunk
561    /// entered at world-t `t` marches at mip
562    /// `floor(log2(max(t, msd) / msd))`, clamped to the grid's mip
563    /// count. `0` disables LOD (always mip-0).
564    mip_scan_dist: f32,
565    _pad2: u32,
566    _pad3: u32,
567    _pad4: u32,
568    /// World camera used only to derive the per-pixel sky direction —
569    /// always valid, so a `grid_count == 0` (sprite-only / empty) scene
570    /// still paints a proper sky instead of a degenerate `(0,0,1)`
571    /// (whose `atan2(0,0)` sky lookup samples black).
572    sky_cam: SceneDdaPerGridCamera,
573    /// Per-face side-shade intensities (voxlap setsideshades), each the
574    /// u8 shade subtracted from a voxel's brightness byte at a hit.
575    /// `side_shades0 = (top, bot, left, right)`,
576    /// `side_shades1 = (up, down, _, _)`. All-zero = no shading.
577    side_shades0: [i32; 4],
578    side_shades1: [i32; 4],
579}
580
581#[repr(C)]
582#[derive(Clone, Copy, Pod, Zeroable)]
583struct GridDdaUniform {
584    camera_pos: [f32; 3],
585    _pad0: f32,
586    camera_right: [f32; 3],
587    _pad1: f32,
588    camera_down: [f32; 3],
589    _pad2: f32,
590    camera_forward: [f32; 3],
591    fov_y_rad: f32,
592    screen_size: [u32; 2],
593    vsid: u32,
594    max_outer_steps: u32,
595    chunks_dims: [u32; 3],
596    _pad3: u32,
597    origin_chunk: [i32; 3],
598    _pad4: u32,
599}
600
601#[repr(C)]
602#[derive(Clone, Copy, Pod, Zeroable)]
603struct ChunkDdaUniform {
604    camera_pos: [f32; 3],
605    _pad0: f32,
606    camera_right: [f32; 3],
607    _pad1: f32,
608    camera_down: [f32; 3],
609    _pad2: f32,
610    camera_forward: [f32; 3],
611    fov_y_rad: f32,
612    screen_size: [u32; 2],
613    vsid: u32,
614    max_scan_dist: u32,
615}
616
617impl GpuRenderer {
618    /// Stand up the device + surface + swapchain on `window`. Async
619    /// because `wgpu::Adapter`/`Device` requests are.
620    ///
621    /// `window` is any [`raw-window-handle`] provider (winit, SDL,
622    /// GLFW, …) wrapped in an `Arc`; `size` is its initial physical
623    /// framebuffer size in pixels — passed explicitly so the renderer
624    /// stays decoupled from any one windowing library's size API.
625    ///
626    /// [`raw-window-handle`]: raw_window_handle
627    ///
628    /// # Errors
629    /// Returns [`GpuInitError`] if surface creation, adapter
630    /// selection, or device request fails. Hosts treat any error as
631    /// "fall back to the CPU path".
632    pub async fn new<W>(
633        window: Arc<W>,
634        size: (u32, u32),
635        settings: GpuRendererSettings,
636    ) -> Result<Self, GpuInitError>
637    where
638        W: HasWindowHandle + HasDisplayHandle + Send + Sync + 'static,
639    {
640        let instance = wgpu::Instance::new(wgpu::InstanceDescriptor::new_without_display_handle());
641        let surface = instance.create_surface(window.clone())?;
642        let adapter = Self::request_adapter(&instance, Some(&surface), settings).await?;
643        let (device, queue) = Self::request_device(&adapter).await?;
644        Ok(Self::finish_init(
645            &adapter, device, queue, surface, size, settings,
646        ))
647    }
648
649    /// wasm/WebGPU: build the renderer against an HTML `canvas`. No
650    /// `Send + Sync` bound — wgpu's surface/device/queue are `!Send` on
651    /// the `+atomics` shared-memory wasm build, and the browser host is
652    /// single-threaded (`Rc<RefCell<…>>`). The native generic-`W` entry
653    /// (which carries the bound) isn't reachable on wasm.
654    ///
655    /// Probes for an adapter **before** `create_surface`: on wasm,
656    /// creating the surface calls `canvas.getContext("webgpu")`, which
657    /// permanently locks the canvas's context type. If we bound it and
658    /// then found no adapter, a CPU/WebGL2 fallback on the *same* canvas
659    /// (the facade clones the handle, but it's the same DOM element)
660    /// would fail with "no webgl2 context". Probing first leaves the
661    /// canvas pristine when WebGPU is unavailable.
662    ///
663    /// # Errors
664    /// See [`Self::new`].
665    #[cfg(target_arch = "wasm32")]
666    pub async fn new_from_canvas(
667        canvas: web_sys::HtmlCanvasElement,
668        size: (u32, u32),
669        settings: GpuRendererSettings,
670    ) -> Result<Self, GpuInitError> {
671        let instance = wgpu::Instance::new(wgpu::InstanceDescriptor::new_without_display_handle());
672        // Probe adapter AND device before binding the canvas — both
673        // `requestAdapter` and `requestDevice` can fail on wasm, and
674        // `create_surface` permanently locks the canvas to a WebGPU
675        // context. Creating the surface last keeps the canvas pristine
676        // for the CPU/WebGL2 fallback on any GPU-init failure.
677        let adapter = Self::request_adapter(&instance, None, settings).await?;
678        let (device, queue) = Self::request_device(&adapter).await?;
679        let surface = instance.create_surface(wgpu::SurfaceTarget::Canvas(canvas))?;
680        Ok(Self::finish_init(
681            &adapter, device, queue, surface, size, settings,
682        ))
683    }
684
685    /// Pick a GPU adapter at the settings' power preference. `None`
686    /// `compatible_surface` is used on the wasm canvas path so the probe
687    /// doesn't bind the canvas's context (see [`Self::new_from_canvas`]);
688    /// WebGPU exposes a single surface-independent adapter, so this is
689    /// safe there.
690    async fn request_adapter(
691        instance: &wgpu::Instance,
692        compatible_surface: Option<&wgpu::Surface<'static>>,
693        settings: GpuRendererSettings,
694    ) -> Result<wgpu::Adapter, GpuInitError> {
695        let power_preference = match settings.power_preference {
696            PowerPreference::Low => wgpu::PowerPreference::LowPower,
697            PowerPreference::High => wgpu::PowerPreference::HighPerformance,
698        };
699        instance
700            .request_adapter(&wgpu::RequestAdapterOptions {
701                power_preference,
702                compatible_surface,
703                force_fallback_adapter: false,
704            })
705            .await
706            .map_err(|_| GpuInitError::NoAdapter)
707    }
708
709    /// Request the device + queue from `adapter`. Pulled out of
710    /// [`Self::finish_init`] so the wasm canvas path can validate the
711    /// device **before** `create_surface` binds the canvas's WebGPU
712    /// context — if the device request fails (e.g. a browser that
713    /// rejects a wgpu-sent limit), the canvas stays pristine for the
714    /// CPU/WebGL2 fallback instead of being poisoned.
715    async fn request_device(
716        adapter: &wgpu::Adapter,
717    ) -> Result<(wgpu::Device, wgpu::Queue), GpuInitError> {
718        Ok(adapter
719            .request_device(&wgpu::DeviceDescriptor {
720                label: Some("roxlap-gpu device"),
721                required_features: wgpu::Features::empty(),
722                required_limits: pick_required_limits(&adapter.limits()),
723                experimental_features: wgpu::ExperimentalFeatures::disabled(),
724                memory_hints: wgpu::MemoryHints::default(),
725                trace: wgpu::Trace::Off,
726            })
727            .await?)
728    }
729
730    /// Shared swapchain → sky/sampler setup, run after the adapter +
731    /// device + surface exist (the surface comes from a window handle on
732    /// native, or an HTML canvas on wasm — created last on wasm so a
733    /// failed device request never touches the canvas).
734    fn finish_init(
735        adapter: &wgpu::Adapter,
736        device: wgpu::Device,
737        queue: wgpu::Queue,
738        surface: wgpu::Surface<'static>,
739        size: (u32, u32),
740        settings: GpuRendererSettings,
741    ) -> Self {
742        let info = adapter.get_info();
743        let adapter_info = format!(
744            "{name} ({backend:?}, {device_type:?})",
745            name = info.name,
746            backend = info.backend,
747            device_type = info.device_type,
748        );
749
750        let caps = surface.get_capabilities(adapter);
751        // Pick a NON-sRGB, 8-bit swapchain format. Voxlap colours are
752        // already sRGB-encoded (the slab bytes are display-ready,
753        // matching what the CPU softbuffer path writes straight to the
754        // framebuffer with no conversion); an sRGB swapchain would
755        // re-apply the gamma curve, washing the look out. We also
756        // *prefer 8-bit BGRA/RGBA* over any other non-sRGB format: some
757        // adapters (e.g. NVK) advertise a 16-bit-unorm format first,
758        // and wgpu 29 gates `create_view` on 16-bit-norm formats behind
759        // the `TEXTURE_FORMAT_16BIT_NORM` device feature (which we don't
760        // enable, to stay WebGPU-portable). Falls back to the first
761        // non-sRGB format, then `caps.formats[0]`.
762        let surface_format = caps
763            .formats
764            .iter()
765            .copied()
766            .find(|f| {
767                matches!(
768                    f,
769                    wgpu::TextureFormat::Bgra8Unorm | wgpu::TextureFormat::Rgba8Unorm
770                )
771            })
772            .or_else(|| caps.formats.iter().copied().find(|f| !f.is_srgb()))
773            .unwrap_or(caps.formats[0]);
774        let present_mode = if settings.uncapped_present {
775            pick_present_mode(&caps.present_modes)
776        } else {
777            wgpu::PresentMode::Fifo
778        };
779        // GPU.11.2 — surface the present mode: `Fifo` is vsync-capped
780        // (FPS pinned to refresh rate → compute optimisations like the
781        // mip LOD won't show up in the FPS counter). Mailbox/Immediate
782        // are uncapped. Wayland under Mesa frequently offers only Fifo.
783        eprintln!(
784            "roxlap-gpu: present mode = {present_mode:?} (available: {:?})",
785            caps.present_modes,
786        );
787        let (init_w, init_h) = size;
788        let surface_config = wgpu::SurfaceConfiguration {
789            usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
790            format: surface_format,
791            width: init_w.max(1),
792            height: init_h.max(1),
793            present_mode,
794            alpha_mode: caps.alpha_modes[0],
795            view_formats: vec![],
796            desired_maximum_frame_latency: 2,
797        };
798        surface.configure(&device, &surface_config);
799
800        // GPU.8 default sky: a 1×1 mid-grey texture. Hosts replace
801        // it via `set_sky_panorama` with a real equirectangular
802        // panorama; the default stops the shader sampling
803        // uninitialised memory before that happens.
804        let default_sky_pixel = [0x80u8, 0x80, 0x80, 0xff];
805        let (sky_texture, sky_view) = create_sky_texture(&device, 1, 1, &default_sky_pixel);
806        queue.write_texture(
807            wgpu::TexelCopyTextureInfo {
808                texture: &sky_texture,
809                mip_level: 0,
810                origin: wgpu::Origin3d::ZERO,
811                aspect: wgpu::TextureAspect::All,
812            },
813            &default_sky_pixel,
814            wgpu::TexelCopyBufferLayout {
815                offset: 0,
816                bytes_per_row: Some(4),
817                rows_per_image: Some(1),
818            },
819            wgpu::Extent3d {
820                width: 1,
821                height: 1,
822                depth_or_array_layers: 1,
823            },
824        );
825        let sky_sampler = device.create_sampler(&wgpu::SamplerDescriptor {
826            label: Some("roxlap-gpu sky_sampler"),
827            // Voxlap-convention panorama: u = elevation [0, 1]
828            // (Repeat is a no-op since values don't go outside),
829            // v = azimuth (wraps 360° — Repeat is required).
830            address_mode_u: wgpu::AddressMode::Repeat,
831            address_mode_v: wgpu::AddressMode::Repeat,
832            address_mode_w: wgpu::AddressMode::ClampToEdge,
833            mag_filter: wgpu::FilterMode::Linear,
834            min_filter: wgpu::FilterMode::Linear,
835            mipmap_filter: wgpu::MipmapFilterMode::Nearest,
836            ..Default::default()
837        });
838
839        Self {
840            surface,
841            surface_config,
842            device,
843            queue,
844            adapter_info,
845            clear_colour: settings.clear_colour,
846            frame_count: 0,
847            chunk_dda: None,
848            grid_dda: None,
849            scene_dda: None,
850            sky_texture,
851            sky_view,
852            sky_sampler,
853            // Fog disabled by default — voxlap's CPU rasterizer
854            // also runs without fog in the scene-demo, so matching
855            // it means no GPU fog out of the box. Hosts can opt in
856            // via `set_fog` (e.g. for atmospheric far-LOD masking).
857            fog_color: [0.66, 0.74, 0.88],
858            fog_near: 0.0,
859            fog_far: 1.0e30,
860            sprite_registry: None,
861            sprite_model_dda: None,
862            // GPU.10.4 — default LOD threshold: step to a coarser mip
863            // once a voxel projects below 4 px. Empirically the best
864            // quality/cost tradeoff; the host can override.
865            sprite_lod_px: 4.0,
866            // GPU.11.1 — matches the CPU demo's mip_scan_dist=64.
867            scene_mip_scan_dist: 64.0,
868            scene_side_shades: [[0; 4]; 2],
869            last_fov_y_rad: 0.0,
870            pending_frame: None,
871            line_resources: None,
872            line_vbuf: None,
873            line_vbuf_cap: 0,
874            #[cfg(feature = "hud")]
875            egui_renderer: None,
876        }
877    }
878
879    /// Synchronous wrapper for hosts that don't have an async
880    /// runtime. Internally `pollster::block_on`s [`Self::new`].
881    ///
882    /// # Errors
883    /// See [`Self::new`].
884    #[cfg(not(target_arch = "wasm32"))]
885    pub fn new_blocking<W>(
886        window: Arc<W>,
887        size: (u32, u32),
888        settings: GpuRendererSettings,
889    ) -> Result<Self, GpuInitError>
890    where
891        W: HasWindowHandle + HasDisplayHandle + Send + Sync + 'static,
892    {
893        pollster::block_on(Self::new(window, size, settings))
894    }
895
896    /// Human-readable adapter description — name + backend +
897    /// device type. The demo host prints this in the title bar.
898    pub fn adapter_info(&self) -> &str {
899        &self.adapter_info
900    }
901
902    /// Borrow the underlying wgpu device — hosts use this to build
903    /// chunk uploads (`GpuChunkResident::upload(gpu.device(), …)`).
904    pub fn device(&self) -> &wgpu::Device {
905        &self.device
906    }
907
908    /// Borrow the wgpu queue — hosts use this for read-back paths
909    /// (`GpuChunkResident::read_voxel_blocking(gpu.device(), gpu.queue(), …)`).
910    pub fn queue(&self) -> &wgpu::Queue {
911        &self.queue
912    }
913
914    /// GPU.8 — upload an equirectangular panorama as the scene's
915    /// sky texture. `rgba` is row-major, `width × height` pixels,
916    /// 4 bytes per pixel (R, G, B, A). The shader samples it with
917    /// `u = atan2(dir.x, dir.y) / (2π) + 0.5` (azimuth) and
918    /// `v = acos(-dir.z) / π` (elevation), matching standard
919    /// equirectangular layout (top of image = zenith for voxlap's
920    /// `+z = down` basis).
921    ///
922    /// # Panics
923    /// If `rgba.len() != (width * height * 4) as usize`.
924    pub fn set_sky_panorama(&mut self, rgba: &[u8], width: u32, height: u32) {
925        assert_eq!(
926            rgba.len(),
927            (width as usize) * (height as usize) * 4,
928            "set_sky_panorama: expected w*h*4 bytes, got {}",
929            rgba.len(),
930        );
931        let (tex, view) = create_sky_texture(&self.device, width, height, rgba);
932        // Upload pixel data via `queue.write_texture` so we don't
933        // have to map the buffer manually.
934        self.queue.write_texture(
935            wgpu::TexelCopyTextureInfo {
936                texture: &tex,
937                mip_level: 0,
938                origin: wgpu::Origin3d::ZERO,
939                aspect: wgpu::TextureAspect::All,
940            },
941            rgba,
942            wgpu::TexelCopyBufferLayout {
943                offset: 0,
944                bytes_per_row: Some(width * 4),
945                rows_per_image: Some(height),
946            },
947            wgpu::Extent3d {
948                width,
949                height,
950                depth_or_array_layers: 1,
951            },
952        );
953        self.sky_texture = tex;
954        self.sky_view = view;
955    }
956
957    /// GPU.8 — set the fog blend. `color` is per-channel [0, 1];
958    /// `near`/`far` are world-space ray distances in voxel units.
959    /// Hits with `t < near` show their full colour; hits with
960    /// `t > far` show `color` exclusively; in between is a
961    /// smoothstep blend.
962    pub fn set_fog(&mut self, color: [f32; 3], near: f32, far: f32) {
963        self.fog_color = color;
964        self.fog_near = near;
965        self.fog_far = far.max(near + 1.0);
966    }
967
968    /// Re-configure the swapchain to a new physical size. Call from
969    /// `WindowEvent::Resized`. Drops the chunk-DDA storage texture
970    /// so [`Self::render_chunk`] rebuilds it at the new size.
971    pub fn resize(&mut self, width: u32, height: u32) {
972        if width == 0 || height == 0 {
973            return;
974        }
975        self.surface_config.width = width;
976        self.surface_config.height = height;
977        self.surface.configure(&self.device, &self.surface_config);
978        self.chunk_dda = None;
979        self.grid_dda = None;
980        self.scene_dda = None;
981    }
982
983    /// Acquire the next swapchain frame, or `None` to skip this frame.
984    /// wgpu 29's `get_current_texture` returns a
985    /// [`wgpu::CurrentSurfaceTexture`] status enum (was
986    /// `Result<_, SurfaceError>`): an outdated/lost surface reconfigures
987    /// and skips, transient statuses just skip.
988    fn acquire_frame(&self) -> Option<wgpu::SurfaceTexture> {
989        use wgpu::CurrentSurfaceTexture as C;
990        match self.surface.get_current_texture() {
991            C::Success(t) | C::Suboptimal(t) => Some(t),
992            C::Outdated | C::Lost => {
993                self.surface.configure(&self.device, &self.surface_config);
994                None
995            }
996            C::Timeout | C::Occluded | C::Validation => None,
997        }
998    }
999
1000    /// GPU.1 render: single render pass clearing the swapchain to a
1001    /// slowly drifting colour, then presenting. Voxels arrive in
1002    /// GPU.3+.
1003    pub fn render(&mut self) {
1004        let Some(surf_tex) = self.acquire_frame() else {
1005            return;
1006        };
1007        let view = surf_tex
1008            .texture
1009            .create_view(&wgpu::TextureViewDescriptor::default());
1010
1011        // Slow colour drift so the user can tell the GPU path is
1012        // actually presenting frames vs. e.g. a frozen window.
1013        // Wrap at 2π/0.005 frames (~1257) so the cast stays exact.
1014        let phase = f64::from(self.frame_count % 1257) * 0.005;
1015        let [r, g, b] = self.clear_colour;
1016        let drift = (phase.sin() * 0.04 + 0.04).clamp(0.0, 0.1);
1017        let clear = wgpu::Color {
1018            r: (r + drift).clamp(0.0, 1.0),
1019            g: (g + drift * 0.5).clamp(0.0, 1.0),
1020            b: (b + drift * 0.25).clamp(0.0, 1.0),
1021            a: 1.0,
1022        };
1023
1024        let mut encoder = self
1025            .device
1026            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
1027                label: Some("roxlap-gpu encoder"),
1028            });
1029        {
1030            let _rp = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
1031                label: Some("roxlap-gpu clear"),
1032                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
1033                    view: &view,
1034                    depth_slice: None,
1035                    resolve_target: None,
1036                    ops: wgpu::Operations {
1037                        load: wgpu::LoadOp::Clear(clear),
1038                        store: wgpu::StoreOp::Store,
1039                    },
1040                })],
1041                depth_stencil_attachment: None,
1042                timestamp_writes: None,
1043                occlusion_query_set: None,
1044                multiview_mask: None,
1045            });
1046        }
1047        self.queue.submit(std::iter::once(encoder.finish()));
1048        surf_tex.present();
1049        self.frame_count = self.frame_count.wrapping_add(1);
1050    }
1051
1052    /// GPU.3 single-chunk render. Dispatches `chunk_dda.wgsl`
1053    /// against `resident`'s storage buffers, then blits the
1054    /// low-res storage texture to the swapchain. `camera.position`
1055    /// is in **chunk-local** voxel units (host translates from
1056    /// world coords). `max_scan_dist` caps the per-pixel DDA loop —
1057    /// scene-demo wires `+` / `-` through this each frame.
1058    ///
1059    /// # Panics
1060    /// Internally `expect`s the chunk-DDA resources to be built —
1061    /// they are constructed at the top of this function if missing.
1062    /// Cannot fire in normal control flow.
1063    pub fn render_chunk(
1064        &mut self,
1065        resident: &GpuChunkResident,
1066        camera: &Camera,
1067        max_scan_dist: u32,
1068    ) {
1069        let Some(surf_tex) = self.acquire_frame() else {
1070            return;
1071        };
1072        let surf_view = surf_tex
1073            .texture
1074            .create_view(&wgpu::TextureViewDescriptor::default());
1075
1076        let surface_w = self.surface_config.width;
1077        let surface_h = self.surface_config.height;
1078        let surface_format = self.surface_config.format;
1079
1080        // Lazy-build chunk-DDA resources; rebuild when the swapchain
1081        // grew or shrank.
1082        let needs_build = match &self.chunk_dda {
1083            Some(r) => r.storage_size != (surface_w, surface_h),
1084            None => true,
1085        };
1086        if needs_build {
1087            self.chunk_dda = Some(self.build_chunk_dda(surface_w, surface_h, surface_format));
1088        }
1089        let dda = self.chunk_dda.as_ref().expect("just built");
1090
1091        // Update uniforms.
1092        let uniform = ChunkDdaUniform {
1093            camera_pos: camera.position,
1094            _pad0: 0.0,
1095            camera_right: camera.right,
1096            _pad1: 0.0,
1097            camera_down: camera.down,
1098            _pad2: 0.0,
1099            camera_forward: camera.forward,
1100            fov_y_rad: camera.fov_y_rad,
1101            screen_size: [surface_w, surface_h],
1102            vsid: resident.vsid,
1103            max_scan_dist,
1104        };
1105        self.queue
1106            .write_buffer(&dda.uniform_buf, 0, bytemuck::bytes_of(&uniform));
1107
1108        // Per-frame DDA bind group — references the chunk's buffers
1109        // so we rebuild every frame (the resident can change between
1110        // calls).
1111        let dda_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1112            label: Some("roxlap-gpu chunk_dda.bg"),
1113            layout: &dda.bgl_dda,
1114            entries: &[
1115                wgpu::BindGroupEntry {
1116                    binding: 0,
1117                    resource: dda.uniform_buf.as_entire_binding(),
1118                },
1119                wgpu::BindGroupEntry {
1120                    binding: 1,
1121                    resource: resident.occupancy.as_entire_binding(),
1122                },
1123                wgpu::BindGroupEntry {
1124                    binding: 2,
1125                    resource: resident.color_offsets.as_entire_binding(),
1126                },
1127                wgpu::BindGroupEntry {
1128                    binding: 3,
1129                    resource: resident.colors.as_entire_binding(),
1130                },
1131                wgpu::BindGroupEntry {
1132                    binding: 4,
1133                    resource: wgpu::BindingResource::TextureView(&dda.storage_view),
1134                },
1135            ],
1136        });
1137
1138        let mut encoder = self
1139            .device
1140            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
1141                label: Some("roxlap-gpu chunk encoder"),
1142            });
1143        {
1144            let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
1145                label: Some("roxlap-gpu chunk_dda compute"),
1146                timestamp_writes: None,
1147            });
1148            cpass.set_pipeline(&dda.pipeline_dda);
1149            cpass.set_bind_group(0, &dda_bg, &[]);
1150            cpass.dispatch_workgroups(surface_w.div_ceil(8), surface_h.div_ceil(8), 1);
1151        }
1152        {
1153            let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
1154                label: Some("roxlap-gpu chunk_dda blit"),
1155                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
1156                    view: &surf_view,
1157                    depth_slice: None,
1158                    resolve_target: None,
1159                    ops: wgpu::Operations {
1160                        load: wgpu::LoadOp::Clear(wgpu::Color::BLACK),
1161                        store: wgpu::StoreOp::Store,
1162                    },
1163                })],
1164                depth_stencil_attachment: None,
1165                timestamp_writes: None,
1166                occlusion_query_set: None,
1167                multiview_mask: None,
1168            });
1169            rpass.set_pipeline(&dda.pipeline_blit);
1170            rpass.set_bind_group(0, &dda.blit_bg, &[]);
1171            rpass.draw(0..3, 0..1);
1172        }
1173        self.queue.submit(std::iter::once(encoder.finish()));
1174        surf_tex.present();
1175        self.frame_count = self.frame_count.wrapping_add(1);
1176    }
1177
1178    fn build_chunk_dda(
1179        &self,
1180        width: u32,
1181        height: u32,
1182        surface_format: wgpu::TextureFormat,
1183    ) -> ChunkDdaResources {
1184        let storage_tex = self.device.create_texture(&wgpu::TextureDescriptor {
1185            label: Some("roxlap-gpu chunk_dda.storage"),
1186            size: wgpu::Extent3d {
1187                width,
1188                height,
1189                depth_or_array_layers: 1,
1190            },
1191            mip_level_count: 1,
1192            sample_count: 1,
1193            dimension: wgpu::TextureDimension::D2,
1194            format: wgpu::TextureFormat::Rgba8Unorm,
1195            usage: wgpu::TextureUsages::STORAGE_BINDING | wgpu::TextureUsages::TEXTURE_BINDING,
1196            view_formats: &[],
1197        });
1198        let storage_view = storage_tex.create_view(&wgpu::TextureViewDescriptor::default());
1199
1200        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
1201            label: Some("roxlap-gpu chunk_dda.uniform"),
1202            size: std::mem::size_of::<ChunkDdaUniform>() as u64,
1203            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
1204            mapped_at_creation: false,
1205        });
1206
1207        let dda_shader = self
1208            .device
1209            .create_shader_module(wgpu::ShaderModuleDescriptor {
1210                label: Some("chunk_dda.wgsl"),
1211                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/chunk_dda.wgsl").into()),
1212            });
1213        let bgl_dda = self
1214            .device
1215            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
1216                label: Some("roxlap-gpu chunk_dda.bgl"),
1217                entries: &[
1218                    bgl_uniform_entry(0),
1219                    bgl_storage_entry(1, true),
1220                    bgl_storage_entry(2, true),
1221                    bgl_storage_entry(3, true),
1222                    wgpu::BindGroupLayoutEntry {
1223                        binding: 4,
1224                        visibility: wgpu::ShaderStages::COMPUTE,
1225                        ty: wgpu::BindingType::StorageTexture {
1226                            access: wgpu::StorageTextureAccess::WriteOnly,
1227                            format: wgpu::TextureFormat::Rgba8Unorm,
1228                            view_dimension: wgpu::TextureViewDimension::D2,
1229                        },
1230                        count: None,
1231                    },
1232                ],
1233            });
1234        let dda_pl = self
1235            .device
1236            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
1237                label: Some("roxlap-gpu chunk_dda.layout"),
1238                bind_group_layouts: &[Some(&bgl_dda)],
1239                immediate_size: 0,
1240            });
1241        let pipeline_dda = self
1242            .device
1243            .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
1244                label: Some("roxlap-gpu chunk_dda.pipeline"),
1245                layout: Some(&dda_pl),
1246                module: &dda_shader,
1247                entry_point: Some("render_chunk"),
1248                compilation_options: wgpu::PipelineCompilationOptions::default(),
1249                cache: None,
1250            });
1251
1252        // Fullscreen-triangle blit upscales the storage texture into
1253        // the swapchain. Nearest filter keeps the retro pixel look.
1254        let blit_shader = self
1255            .device
1256            .create_shader_module(wgpu::ShaderModuleDescriptor {
1257                label: Some("blit.wgsl"),
1258                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/blit.wgsl").into()),
1259            });
1260        let bgl_blit = self
1261            .device
1262            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
1263                label: Some("roxlap-gpu chunk_dda.blit_bgl"),
1264                entries: &[
1265                    wgpu::BindGroupLayoutEntry {
1266                        binding: 0,
1267                        visibility: wgpu::ShaderStages::FRAGMENT,
1268                        ty: wgpu::BindingType::Texture {
1269                            sample_type: wgpu::TextureSampleType::Float { filterable: false },
1270                            view_dimension: wgpu::TextureViewDimension::D2,
1271                            multisampled: false,
1272                        },
1273                        count: None,
1274                    },
1275                    wgpu::BindGroupLayoutEntry {
1276                        binding: 1,
1277                        visibility: wgpu::ShaderStages::FRAGMENT,
1278                        ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::NonFiltering),
1279                        count: None,
1280                    },
1281                ],
1282            });
1283        let blit_pl = self
1284            .device
1285            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
1286                label: Some("roxlap-gpu chunk_dda.blit_layout"),
1287                bind_group_layouts: &[Some(&bgl_blit)],
1288                immediate_size: 0,
1289            });
1290        let pipeline_blit = self
1291            .device
1292            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
1293                label: Some("roxlap-gpu chunk_dda.blit_pipeline"),
1294                layout: Some(&blit_pl),
1295                vertex: wgpu::VertexState {
1296                    module: &blit_shader,
1297                    entry_point: Some("vs_main"),
1298                    compilation_options: wgpu::PipelineCompilationOptions::default(),
1299                    buffers: &[],
1300                },
1301                fragment: Some(wgpu::FragmentState {
1302                    module: &blit_shader,
1303                    entry_point: Some("fs_main"),
1304                    compilation_options: wgpu::PipelineCompilationOptions::default(),
1305                    targets: &[Some(wgpu::ColorTargetState {
1306                        format: surface_format,
1307                        blend: None,
1308                        write_mask: wgpu::ColorWrites::ALL,
1309                    })],
1310                }),
1311                primitive: wgpu::PrimitiveState::default(),
1312                depth_stencil: None,
1313                multisample: wgpu::MultisampleState::default(),
1314                multiview_mask: None,
1315                cache: None,
1316            });
1317        let sampler = self.device.create_sampler(&wgpu::SamplerDescriptor {
1318            label: Some("roxlap-gpu chunk_dda.blit_sampler"),
1319            address_mode_u: wgpu::AddressMode::ClampToEdge,
1320            address_mode_v: wgpu::AddressMode::ClampToEdge,
1321            address_mode_w: wgpu::AddressMode::ClampToEdge,
1322            mag_filter: wgpu::FilterMode::Nearest,
1323            min_filter: wgpu::FilterMode::Nearest,
1324            mipmap_filter: wgpu::MipmapFilterMode::Nearest,
1325            ..Default::default()
1326        });
1327        let blit_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1328            label: Some("roxlap-gpu chunk_dda.blit_bg"),
1329            layout: &bgl_blit,
1330            entries: &[
1331                wgpu::BindGroupEntry {
1332                    binding: 0,
1333                    resource: wgpu::BindingResource::TextureView(&storage_view),
1334                },
1335                wgpu::BindGroupEntry {
1336                    binding: 1,
1337                    resource: wgpu::BindingResource::Sampler(&sampler),
1338                },
1339            ],
1340        });
1341
1342        ChunkDdaResources {
1343            storage_size: (width, height),
1344            storage_view,
1345            uniform_buf,
1346            bgl_dda,
1347            pipeline_dda,
1348            blit_bg,
1349            pipeline_blit,
1350            _sampler: sampler,
1351        }
1352    }
1353
1354    /// GPU.4 render — outer DDA over chunk indices + inner DDA into
1355    /// non-empty chunks. `camera.position` is in **grid-local**
1356    /// voxel units. `max_outer_steps` caps how many chunks the
1357    /// outer DDA may traverse per ray (scene-demo wires `+ / -`
1358    /// through this).
1359    ///
1360    /// # Panics
1361    /// Internally `expect`s the grid-DDA resources to be built;
1362    /// they are constructed at the top of this function if missing.
1363    pub fn render_grid(&mut self, grid: &GpuGridResident, camera: &Camera, max_outer_steps: u32) {
1364        let Some(surf_tex) = self.acquire_frame() else {
1365            return;
1366        };
1367        let surf_view = surf_tex
1368            .texture
1369            .create_view(&wgpu::TextureViewDescriptor::default());
1370
1371        let surface_w = self.surface_config.width;
1372        let surface_h = self.surface_config.height;
1373        let surface_format = self.surface_config.format;
1374
1375        let needs_build = match &self.grid_dda {
1376            Some(r) => r.storage_size != (surface_w, surface_h),
1377            None => true,
1378        };
1379        if needs_build {
1380            self.grid_dda = Some(self.build_grid_dda(surface_w, surface_h, surface_format));
1381        }
1382        let dda = self.grid_dda.as_ref().expect("just built");
1383
1384        let uniform = GridDdaUniform {
1385            camera_pos: camera.position,
1386            _pad0: 0.0,
1387            camera_right: camera.right,
1388            _pad1: 0.0,
1389            camera_down: camera.down,
1390            _pad2: 0.0,
1391            camera_forward: camera.forward,
1392            fov_y_rad: camera.fov_y_rad,
1393            screen_size: [surface_w, surface_h],
1394            vsid: grid.vsid,
1395            max_outer_steps,
1396            chunks_dims: grid.chunks_dims,
1397            _pad3: 0,
1398            origin_chunk: grid.origin_chunk,
1399            _pad4: 0,
1400        };
1401        self.queue
1402            .write_buffer(&dda.uniform_buf, 0, bytemuck::bytes_of(&uniform));
1403
1404        let dda_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1405            label: Some("roxlap-gpu grid_dda.bg"),
1406            layout: &dda.bgl_dda,
1407            entries: &[
1408                wgpu::BindGroupEntry {
1409                    binding: 0,
1410                    resource: dda.uniform_buf.as_entire_binding(),
1411                },
1412                wgpu::BindGroupEntry {
1413                    binding: 1,
1414                    resource: grid.occupancy.as_entire_binding(),
1415                },
1416                wgpu::BindGroupEntry {
1417                    binding: 2,
1418                    resource: grid.color_offsets.as_entire_binding(),
1419                },
1420                wgpu::BindGroupEntry {
1421                    binding: 3,
1422                    resource: grid.colors.as_entire_binding(),
1423                },
1424                wgpu::BindGroupEntry {
1425                    binding: 4,
1426                    resource: grid.chunk_colors_base.as_entire_binding(),
1427                },
1428                wgpu::BindGroupEntry {
1429                    binding: 5,
1430                    resource: grid.chunk_occupancy.as_entire_binding(),
1431                },
1432                wgpu::BindGroupEntry {
1433                    binding: 6,
1434                    resource: wgpu::BindingResource::TextureView(&dda.storage_view),
1435                },
1436            ],
1437        });
1438
1439        let mut encoder = self
1440            .device
1441            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
1442                label: Some("roxlap-gpu grid encoder"),
1443            });
1444        {
1445            let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
1446                label: Some("roxlap-gpu grid_dda compute"),
1447                timestamp_writes: None,
1448            });
1449            cpass.set_pipeline(&dda.pipeline_dda);
1450            cpass.set_bind_group(0, &dda_bg, &[]);
1451            cpass.dispatch_workgroups(surface_w.div_ceil(8), surface_h.div_ceil(8), 1);
1452        }
1453        {
1454            let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
1455                label: Some("roxlap-gpu grid_dda blit"),
1456                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
1457                    view: &surf_view,
1458                    depth_slice: None,
1459                    resolve_target: None,
1460                    ops: wgpu::Operations {
1461                        load: wgpu::LoadOp::Clear(wgpu::Color::BLACK),
1462                        store: wgpu::StoreOp::Store,
1463                    },
1464                })],
1465                depth_stencil_attachment: None,
1466                timestamp_writes: None,
1467                occlusion_query_set: None,
1468                multiview_mask: None,
1469            });
1470            rpass.set_pipeline(&dda.pipeline_blit);
1471            rpass.set_bind_group(0, &dda.blit_bg, &[]);
1472            rpass.draw(0..3, 0..1);
1473        }
1474        self.queue.submit(std::iter::once(encoder.finish()));
1475        surf_tex.present();
1476        self.frame_count = self.frame_count.wrapping_add(1);
1477    }
1478
1479    fn build_grid_dda(
1480        &self,
1481        width: u32,
1482        height: u32,
1483        surface_format: wgpu::TextureFormat,
1484    ) -> GridDdaResources {
1485        let storage_tex = self.device.create_texture(&wgpu::TextureDescriptor {
1486            label: Some("roxlap-gpu grid_dda.storage"),
1487            size: wgpu::Extent3d {
1488                width,
1489                height,
1490                depth_or_array_layers: 1,
1491            },
1492            mip_level_count: 1,
1493            sample_count: 1,
1494            dimension: wgpu::TextureDimension::D2,
1495            format: wgpu::TextureFormat::Rgba8Unorm,
1496            usage: wgpu::TextureUsages::STORAGE_BINDING | wgpu::TextureUsages::TEXTURE_BINDING,
1497            view_formats: &[],
1498        });
1499        let storage_view = storage_tex.create_view(&wgpu::TextureViewDescriptor::default());
1500
1501        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
1502            label: Some("roxlap-gpu grid_dda.uniform"),
1503            size: std::mem::size_of::<GridDdaUniform>() as u64,
1504            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
1505            mapped_at_creation: false,
1506        });
1507
1508        let dda_shader = self
1509            .device
1510            .create_shader_module(wgpu::ShaderModuleDescriptor {
1511                label: Some("grid_dda.wgsl"),
1512                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/grid_dda.wgsl").into()),
1513            });
1514        let bgl_dda = self
1515            .device
1516            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
1517                label: Some("roxlap-gpu grid_dda.bgl"),
1518                entries: &[
1519                    bgl_uniform_entry(0),
1520                    bgl_storage_entry(1, true),
1521                    bgl_storage_entry(2, true),
1522                    bgl_storage_entry(3, true),
1523                    bgl_storage_entry(4, true),
1524                    bgl_storage_entry(5, true),
1525                    wgpu::BindGroupLayoutEntry {
1526                        binding: 6,
1527                        visibility: wgpu::ShaderStages::COMPUTE,
1528                        ty: wgpu::BindingType::StorageTexture {
1529                            access: wgpu::StorageTextureAccess::WriteOnly,
1530                            format: wgpu::TextureFormat::Rgba8Unorm,
1531                            view_dimension: wgpu::TextureViewDimension::D2,
1532                        },
1533                        count: None,
1534                    },
1535                ],
1536            });
1537        let dda_pl = self
1538            .device
1539            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
1540                label: Some("roxlap-gpu grid_dda.layout"),
1541                bind_group_layouts: &[Some(&bgl_dda)],
1542                immediate_size: 0,
1543            });
1544        let pipeline_dda = self
1545            .device
1546            .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
1547                label: Some("roxlap-gpu grid_dda.pipeline"),
1548                layout: Some(&dda_pl),
1549                module: &dda_shader,
1550                entry_point: Some("render_grid"),
1551                compilation_options: wgpu::PipelineCompilationOptions::default(),
1552                cache: None,
1553            });
1554
1555        let blit_shader = self
1556            .device
1557            .create_shader_module(wgpu::ShaderModuleDescriptor {
1558                label: Some("blit.wgsl"),
1559                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/blit.wgsl").into()),
1560            });
1561        let bgl_blit = self
1562            .device
1563            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
1564                label: Some("roxlap-gpu grid_dda.blit_bgl"),
1565                entries: &[
1566                    wgpu::BindGroupLayoutEntry {
1567                        binding: 0,
1568                        visibility: wgpu::ShaderStages::FRAGMENT,
1569                        ty: wgpu::BindingType::Texture {
1570                            sample_type: wgpu::TextureSampleType::Float { filterable: false },
1571                            view_dimension: wgpu::TextureViewDimension::D2,
1572                            multisampled: false,
1573                        },
1574                        count: None,
1575                    },
1576                    wgpu::BindGroupLayoutEntry {
1577                        binding: 1,
1578                        visibility: wgpu::ShaderStages::FRAGMENT,
1579                        ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::NonFiltering),
1580                        count: None,
1581                    },
1582                ],
1583            });
1584        let blit_pl = self
1585            .device
1586            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
1587                label: Some("roxlap-gpu grid_dda.blit_layout"),
1588                bind_group_layouts: &[Some(&bgl_blit)],
1589                immediate_size: 0,
1590            });
1591        let pipeline_blit = self
1592            .device
1593            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
1594                label: Some("roxlap-gpu grid_dda.blit_pipeline"),
1595                layout: Some(&blit_pl),
1596                vertex: wgpu::VertexState {
1597                    module: &blit_shader,
1598                    entry_point: Some("vs_main"),
1599                    compilation_options: wgpu::PipelineCompilationOptions::default(),
1600                    buffers: &[],
1601                },
1602                fragment: Some(wgpu::FragmentState {
1603                    module: &blit_shader,
1604                    entry_point: Some("fs_main"),
1605                    compilation_options: wgpu::PipelineCompilationOptions::default(),
1606                    targets: &[Some(wgpu::ColorTargetState {
1607                        format: surface_format,
1608                        blend: None,
1609                        write_mask: wgpu::ColorWrites::ALL,
1610                    })],
1611                }),
1612                primitive: wgpu::PrimitiveState::default(),
1613                depth_stencil: None,
1614                multisample: wgpu::MultisampleState::default(),
1615                multiview_mask: None,
1616                cache: None,
1617            });
1618        let sampler = self.device.create_sampler(&wgpu::SamplerDescriptor {
1619            label: Some("roxlap-gpu grid_dda.blit_sampler"),
1620            address_mode_u: wgpu::AddressMode::ClampToEdge,
1621            address_mode_v: wgpu::AddressMode::ClampToEdge,
1622            address_mode_w: wgpu::AddressMode::ClampToEdge,
1623            mag_filter: wgpu::FilterMode::Nearest,
1624            min_filter: wgpu::FilterMode::Nearest,
1625            mipmap_filter: wgpu::MipmapFilterMode::Nearest,
1626            ..Default::default()
1627        });
1628        let blit_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1629            label: Some("roxlap-gpu grid_dda.blit_bg"),
1630            layout: &bgl_blit,
1631            entries: &[
1632                wgpu::BindGroupEntry {
1633                    binding: 0,
1634                    resource: wgpu::BindingResource::TextureView(&storage_view),
1635                },
1636                wgpu::BindGroupEntry {
1637                    binding: 1,
1638                    resource: wgpu::BindingResource::Sampler(&sampler),
1639                },
1640            ],
1641        });
1642
1643        GridDdaResources {
1644            storage_size: (width, height),
1645            storage_view,
1646            uniform_buf,
1647            bgl_dda,
1648            pipeline_dda,
1649            blit_bg,
1650            pipeline_blit,
1651            _sampler: sampler,
1652        }
1653    }
1654
1655    /// GPU.5 render — multi-grid scene marcher. `cameras[i]` is the
1656    /// world camera transformed into grid `i`'s local frame
1657    /// (caller-supplied; see scene-demo's `redraw_gpu` for the
1658    /// glam-based transform). `fov_y_rad` is the shared vertical
1659    /// FOV; `max_outer_steps` caps per-ray chunk-DDA work for each
1660    /// grid.
1661    ///
1662    /// # Panics
1663    /// If `cameras.len() != scene.grid_count`.
1664    /// `cameras[i]` is grid `i`'s world camera transformed into that
1665    /// grid's local frame (the grid marcher works in grid-local space).
1666    /// `sprite_camera` is the **world** camera: instanced sprites carry
1667    /// world-space positions/transforms, so they must project through
1668    /// the untransformed world camera — not `cameras[0]`, which is only
1669    /// the world camera when grid 0 is at identity.
1670    pub fn render_scene(
1671        &mut self,
1672        scene: &GpuSceneResident,
1673        cameras: &[Camera],
1674        sprite_camera: &Camera,
1675        fov_y_rad: f32,
1676        max_outer_steps: u32,
1677    ) {
1678        assert_eq!(
1679            cameras.len(),
1680            scene.grid_count as usize,
1681            "render_scene: {} cameras supplied, scene has {} grids",
1682            cameras.len(),
1683            scene.grid_count,
1684        );
1685        self.last_fov_y_rad = fov_y_rad; // cached for pixel_ray (picking)
1686
1687        // Deferred present: drop any frame a prior render left
1688        // un-presented (a host that skipped present/paint_egui) so we
1689        // never hold two outstanding swapchain textures.
1690        self.pending_frame = None;
1691        let Some(surf_tex) = self.acquire_frame() else {
1692            return;
1693        };
1694        let surf_view = surf_tex
1695            .texture
1696            .create_view(&wgpu::TextureViewDescriptor::default());
1697
1698        let surface_w = self.surface_config.width;
1699        let surface_h = self.surface_config.height;
1700        let surface_format = self.surface_config.format;
1701
1702        let needs_build = match &self.scene_dda {
1703            Some(r) => r.storage_size != (surface_w, surface_h),
1704            None => true,
1705        };
1706        if needs_build {
1707            self.scene_dda = Some(self.build_scene_dda(surface_w, surface_h, surface_format));
1708        }
1709        // GPU.9 — materialise the sprite pipeline the first frame
1710        // sprites are present (before the immutable `dda` borrow).
1711        // GPU.10.0 — build the model-DDA pipeline the first frame a
1712        // sprite registry is present.
1713        if self.sprite_registry.is_some() && self.sprite_model_dda.is_none() {
1714            self.sprite_model_dda = Some(self.build_sprite_model_dda());
1715        }
1716        // GPU.10.3 — frustum-cull + screen-tile-bin the sprite instances
1717        // (needs &mut self for buffer growth, so before the immutable
1718        // scene_dda borrow). Captures (visible_count, tiles_x); None when
1719        // nothing is in view.
1720        let sprite_pass: Option<(u32, u32)> = if let Some(reg) = self.sprite_registry.as_mut() {
1721            if reg.instance_capacity > 0 {
1722                // World camera — sprite positions/transforms are world-
1723                // space (independent of any grid's transform).
1724                let cam = sprite_camera;
1725                #[allow(clippy::cast_precision_loss)]
1726                let aspect = surface_w as f32 / surface_h as f32;
1727                let half_h = (fov_y_rad * 0.5).tan();
1728                let frustum = sprite_model::ViewFrustum {
1729                    pos: cam.position,
1730                    right: cam.right,
1731                    down: cam.down,
1732                    forward: cam.forward,
1733                    half_w: half_h * aspect,
1734                    half_h,
1735                    far: 1.0e9,
1736                };
1737                let (visible, tiles_x, _tiles_y) = reg.cull_bin_upload(
1738                    &self.device,
1739                    &self.queue,
1740                    &frustum,
1741                    surface_w,
1742                    surface_h,
1743                    SPRITE_TILE_SIZE,
1744                    self.sprite_lod_px,
1745                );
1746                (visible > 0).then_some((visible, tiles_x))
1747            } else {
1748                None
1749            }
1750        } else {
1751            None
1752        };
1753        let dda = self.scene_dda.as_ref().expect("just built");
1754
1755        // Pack per-grid cameras into a runtime-sized storage buffer
1756        // (binding 15) — no fixed cap on grid count.
1757        let cam_vec: Vec<SceneDdaPerGridCamera> = cameras
1758            .iter()
1759            .map(SceneDdaPerGridCamera::from_camera)
1760            .collect();
1761        let grid_cameras = upload_grid_cameras(&self.device, &cam_vec);
1762        let uniform = SceneDdaUniform {
1763            fov_y_rad,
1764            grid_count: scene.grid_count,
1765            max_outer_steps,
1766            _pad0: 0,
1767            screen_size: [surface_w, surface_h],
1768            _pad1: [0; 2],
1769            fog_color: [
1770                self.fog_color[0],
1771                self.fog_color[1],
1772                self.fog_color[2],
1773                self.fog_near,
1774            ],
1775            fog_far: self.fog_far,
1776            // L3.1: always write scene depth. Costs one storage store per
1777            // pixel, and the depth is needed for sprite z-test, sprite-less
1778            // `pick_depth`, and `draw_lines` occlusion alike.
1779            write_depth: 1,
1780            occ_page_words: scene.occupancy_page_words,
1781            occ_num_pages: scene.occupancy_num_pages,
1782            mip_scan_dist: self.scene_mip_scan_dist,
1783            _pad2: 0,
1784            _pad3: 0,
1785            _pad4: 0,
1786            // Sky direction comes from the world (sprite) camera, so a
1787            // grid-less sprite-only scene still paints a real sky.
1788            sky_cam: SceneDdaPerGridCamera::from_camera(sprite_camera),
1789            side_shades0: self.scene_side_shades[0],
1790            side_shades1: self.scene_side_shades[1],
1791        };
1792        self.queue
1793            .write_buffer(&dda.uniform_buf, 0, bytemuck::bytes_of(&uniform));
1794
1795        let dda_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1796            label: Some("roxlap-gpu scene_dda.bg"),
1797            layout: &dda.bgl_dda,
1798            entries: &[
1799                wgpu::BindGroupEntry {
1800                    binding: 0,
1801                    resource: dda.uniform_buf.as_entire_binding(),
1802                },
1803                // Occupancy page 0 at binding 1; pages 1..MAX_OCC_PAGES
1804                // at bindings 12.. (see GPU.X occupancy paging).
1805                wgpu::BindGroupEntry {
1806                    binding: 1,
1807                    resource: scene.occupancy_pages[0].as_entire_binding(),
1808                },
1809                wgpu::BindGroupEntry {
1810                    binding: 2,
1811                    resource: scene.all_color_offsets.as_entire_binding(),
1812                },
1813                wgpu::BindGroupEntry {
1814                    binding: 3,
1815                    resource: scene.all_colors.as_entire_binding(),
1816                },
1817                wgpu::BindGroupEntry {
1818                    binding: 4,
1819                    resource: scene.all_chunk_colors_base.as_entire_binding(),
1820                },
1821                wgpu::BindGroupEntry {
1822                    binding: 5,
1823                    resource: scene.all_chunk_occupancy.as_entire_binding(),
1824                },
1825                wgpu::BindGroupEntry {
1826                    binding: 6,
1827                    resource: scene.grid_static_meta.as_entire_binding(),
1828                },
1829                wgpu::BindGroupEntry {
1830                    binding: 7,
1831                    resource: scene.all_slot_chunk_idx.as_entire_binding(),
1832                },
1833                wgpu::BindGroupEntry {
1834                    binding: 8,
1835                    resource: dda.framebuffer.as_entire_binding(),
1836                },
1837                wgpu::BindGroupEntry {
1838                    binding: 9,
1839                    resource: wgpu::BindingResource::TextureView(&self.sky_view),
1840                },
1841                wgpu::BindGroupEntry {
1842                    binding: 10,
1843                    resource: wgpu::BindingResource::Sampler(&self.sky_sampler),
1844                },
1845                wgpu::BindGroupEntry {
1846                    binding: 11,
1847                    resource: dda.depth_buffer.as_entire_binding(),
1848                },
1849                wgpu::BindGroupEntry {
1850                    binding: 12,
1851                    resource: scene.occupancy_pages[1].as_entire_binding(),
1852                },
1853                wgpu::BindGroupEntry {
1854                    binding: 13,
1855                    resource: scene.occupancy_pages[2].as_entire_binding(),
1856                },
1857                wgpu::BindGroupEntry {
1858                    binding: 14,
1859                    resource: scene.occupancy_pages[3].as_entire_binding(),
1860                },
1861                wgpu::BindGroupEntry {
1862                    binding: 15,
1863                    resource: grid_cameras.as_entire_binding(),
1864                },
1865            ],
1866        });
1867
1868        // GPU.9 — when sprites are present, build both splatter bind
1869        // groups up front (the splat pass writes the key buffer; the
1870        // resolve pass reads keys + scene depth and writes colour).
1871        // GPU.10.3 — model-DDA bind group + per-frame uniform, using the
1872        // cull/bin results captured above. Per-model + per-instance data
1873        // + the tile lists live in the registry buffers.
1874        let sprite_model_bg = match (&self.sprite_model_dda, &self.sprite_registry, sprite_pass) {
1875            (Some(smd), Some(reg), Some((visible, tiles_x))) => {
1876                // World camera (see the cull pass above) — sprites
1877                // project through it regardless of grid 0's transform.
1878                let cam = sprite_camera;
1879                let uni = SpriteModelUniform {
1880                    cam_pos: cam.position,
1881                    _p0: 0.0,
1882                    cam_right: cam.right,
1883                    _p1: 0.0,
1884                    cam_down: cam.down,
1885                    _p2: 0.0,
1886                    cam_forward: cam.forward,
1887                    _p3: 0.0,
1888                    fog_color: [
1889                        self.fog_color[0],
1890                        self.fog_color[1],
1891                        self.fog_color[2],
1892                        self.fog_near,
1893                    ],
1894                    screen_size: [surface_w, surface_h],
1895                    instance_count: visible,
1896                    fog_far: self.fog_far,
1897                    fov_y_rad,
1898                    tiles_x,
1899                    tile_size: SPRITE_TILE_SIZE,
1900                    _p6: 0.0,
1901                };
1902                self.queue
1903                    .write_buffer(&smd.uniform_buf, 0, bytemuck::bytes_of(&uni));
1904                Some(self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1905                    label: Some("roxlap-gpu sprite_model_dda.bg"),
1906                    layout: &smd.bgl,
1907                    entries: &[
1908                        wgpu::BindGroupEntry {
1909                            binding: 0,
1910                            resource: smd.uniform_buf.as_entire_binding(),
1911                        },
1912                        wgpu::BindGroupEntry {
1913                            binding: 1,
1914                            resource: reg.occupancy.as_entire_binding(),
1915                        },
1916                        wgpu::BindGroupEntry {
1917                            binding: 2,
1918                            resource: reg.colors.as_entire_binding(),
1919                        },
1920                        wgpu::BindGroupEntry {
1921                            binding: 3,
1922                            resource: reg.color_offsets.as_entire_binding(),
1923                        },
1924                        wgpu::BindGroupEntry {
1925                            binding: 4,
1926                            resource: reg.model_meta.as_entire_binding(),
1927                        },
1928                        wgpu::BindGroupEntry {
1929                            binding: 5,
1930                            resource: reg.instances.as_entire_binding(),
1931                        },
1932                        wgpu::BindGroupEntry {
1933                            binding: 6,
1934                            resource: dda.depth_buffer.as_entire_binding(),
1935                        },
1936                        wgpu::BindGroupEntry {
1937                            binding: 7,
1938                            resource: dda.framebuffer.as_entire_binding(),
1939                        },
1940                        wgpu::BindGroupEntry {
1941                            binding: 8,
1942                            resource: reg.tile_ranges.as_entire_binding(),
1943                        },
1944                        wgpu::BindGroupEntry {
1945                            binding: 9,
1946                            resource: reg.tile_instances.as_entire_binding(),
1947                        },
1948                        wgpu::BindGroupEntry {
1949                            binding: 10,
1950                            resource: reg.dirs.as_entire_binding(),
1951                        },
1952                        wgpu::BindGroupEntry {
1953                            binding: 11,
1954                            resource: reg.colmul.as_entire_binding(),
1955                        },
1956                    ],
1957                }))
1958            }
1959            _ => None,
1960        };
1961
1962        let mut encoder = self
1963            .device
1964            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
1965                label: Some("roxlap-gpu scene encoder"),
1966            });
1967        {
1968            let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
1969                label: Some("roxlap-gpu scene_dda compute"),
1970                timestamp_writes: None,
1971            });
1972            cpass.set_pipeline(&dda.pipeline_dda);
1973            cpass.set_bind_group(0, &dda_bg, &[]);
1974            cpass.dispatch_workgroups(surface_w.div_ceil(8), surface_h.div_ceil(8), 1);
1975        }
1976        // GPU.10 — sprite model-DDA pass: one thread per pixel marches
1977        // the tile's instances + composites against scene depth, after
1978        // the scene pass wrote the depth buffer and before the blit.
1979        if let (Some(smd), Some(bg)) = (&self.sprite_model_dda, &sprite_model_bg) {
1980            let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
1981                label: Some("roxlap-gpu sprite_model_dda"),
1982                timestamp_writes: None,
1983            });
1984            cpass.set_pipeline(&smd.pipeline);
1985            cpass.set_bind_group(0, bg, &[]);
1986            cpass.dispatch_workgroups(surface_w.div_ceil(8), surface_h.div_ceil(8), 1);
1987        }
1988        {
1989            let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
1990                label: Some("roxlap-gpu scene_dda blit"),
1991                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
1992                    view: &surf_view,
1993                    depth_slice: None,
1994                    resolve_target: None,
1995                    ops: wgpu::Operations {
1996                        load: wgpu::LoadOp::Clear(wgpu::Color::BLACK),
1997                        store: wgpu::StoreOp::Store,
1998                    },
1999                })],
2000                depth_stencil_attachment: None,
2001                timestamp_writes: None,
2002                occlusion_query_set: None,
2003                multiview_mask: None,
2004            });
2005            rpass.set_pipeline(&dda.pipeline_blit);
2006            rpass.set_bind_group(0, &dda.blit_bg, &[]);
2007            rpass.draw(0..3, 0..1);
2008        }
2009        self.queue.submit(std::iter::once(encoder.finish()));
2010        // Deferred present — the host calls `present` or `paint_egui`.
2011        self.pending_frame = Some((surf_tex, surf_view));
2012        self.frame_count = self.frame_count.wrapping_add(1);
2013    }
2014
2015    /// Like [`Self::render`] (clear to colour) but **deferred**: stashes
2016    /// the frame for [`Self::present`] / [`Self::paint_egui`] instead of
2017    /// presenting. The facade uses this before any grid is resident so a
2018    /// HUD can still be painted over an empty scene.
2019    pub fn render_clear_deferred(&mut self) {
2020        self.pending_frame = None;
2021        let Some(surf_tex) = self.acquire_frame() else {
2022            return;
2023        };
2024        let view = surf_tex
2025            .texture
2026            .create_view(&wgpu::TextureViewDescriptor::default());
2027        let [r, g, b] = self.clear_colour;
2028        let mut encoder = self
2029            .device
2030            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
2031                label: Some("roxlap-gpu clear (deferred)"),
2032            });
2033        {
2034            let _rp = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
2035                label: Some("roxlap-gpu clear (deferred)"),
2036                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
2037                    view: &view,
2038                    depth_slice: None,
2039                    resolve_target: None,
2040                    ops: wgpu::Operations {
2041                        load: wgpu::LoadOp::Clear(wgpu::Color { r, g, b, a: 1.0 }),
2042                        store: wgpu::StoreOp::Store,
2043                    },
2044                })],
2045                depth_stencil_attachment: None,
2046                timestamp_writes: None,
2047                occlusion_query_set: None,
2048                multiview_mask: None,
2049            });
2050        }
2051        self.queue.submit(std::iter::once(encoder.finish()));
2052        self.pending_frame = Some((surf_tex, view));
2053    }
2054
2055    /// Present the frame stashed by the last deferred render
2056    /// ([`Self::render_scene`] / [`Self::render_clear_deferred`]). No-op
2057    /// if nothing is pending (e.g. the surface was lost mid-render).
2058    pub fn present(&mut self) {
2059        if let Some((surf_tex, _view)) = self.pending_frame.take() {
2060            surf_tex.present();
2061        }
2062    }
2063
2064    /// Draw depth-tested world-space [`GpuLine`]s over the pending frame
2065    /// (L3.2). Projects each endpoint with `cam` (the marcher's pinhole) +
2066    /// the last frame's FOV / surface size, expands to screen-space quads,
2067    /// and runs a `LoadOp::Load` pass into the pending swapchain view — so
2068    /// the lines land on the marched frame and a later `present` /
2069    /// `paint_egui` still finishes it (the pending frame is left intact).
2070    /// Depth-tested lines are occluded by nearer marched geometry (compared
2071    /// against the scene-DDA depth buffer's `best_t`); call after `render`,
2072    /// before `present` / `paint_egui`. No-op if no frame is pending.
2073    pub fn draw_lines_deferred(&mut self, cam: &GpuLineCamera, lines: &[GpuLine]) {
2074        if self.pending_frame.is_none() || lines.is_empty() {
2075            return;
2076        }
2077        let (w, h) = (self.surface_config.width, self.surface_config.height);
2078        let fov = self.last_fov_y_rad;
2079        if w == 0 || h == 0 || fov <= 0.0 {
2080            return; // no frame marched yet — no projection to reuse
2081        }
2082        let verts = build_line_vertices(cam, lines, w, h, fov);
2083        if verts.is_empty() {
2084            return;
2085        }
2086        self.ensure_line_resources();
2087        let res = self.line_resources.as_ref().expect("just built");
2088
2089        // Skip the depth test when there's no scene depth buffer to read
2090        // (sprite-only / empty scene) — bind the 1-word dummy so the layout
2091        // is satisfied; `no_depth = 1` keeps the shader from indexing it.
2092        let no_depth = u32::from(self.scene_dda.is_none());
2093        let params = LineParams {
2094            screen_w: w,
2095            screen_h: h,
2096            depth_bias: LINE_DEPTH_BIAS,
2097            no_depth,
2098        };
2099        self.queue
2100            .write_buffer(&res.uniform_buf, 0, bytemuck::bytes_of(&params));
2101
2102        let depth_resource = match &self.scene_dda {
2103            Some(dda) => dda.depth_buffer.as_entire_binding(),
2104            None => res.dummy_depth.as_entire_binding(),
2105        };
2106        let bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
2107            label: Some("roxlap-gpu line.bg"),
2108            layout: &res.bgl,
2109            entries: &[
2110                wgpu::BindGroupEntry {
2111                    binding: 0,
2112                    resource: res.uniform_buf.as_entire_binding(),
2113                },
2114                wgpu::BindGroupEntry {
2115                    binding: 1,
2116                    resource: depth_resource,
2117                },
2118            ],
2119        });
2120
2121        // Grow-only persistent vertex buffer (L3.3): one `write_buffer`
2122        // per overlay, reused across frames. Power-of-two capacity keeps
2123        // re-allocation rare as the segment count drifts.
2124        let needed = std::mem::size_of_val(verts.as_slice()) as u64;
2125        if self.line_vbuf_cap < needed {
2126            let cap = needed.next_power_of_two().max(4096);
2127            self.line_vbuf = Some(self.device.create_buffer(&wgpu::BufferDescriptor {
2128                label: Some("roxlap-gpu line.vbuf"),
2129                size: cap,
2130                usage: wgpu::BufferUsages::VERTEX | wgpu::BufferUsages::COPY_DST,
2131                mapped_at_creation: false,
2132            }));
2133            self.line_vbuf_cap = cap;
2134        }
2135        let vbuf = self.line_vbuf.as_ref().expect("ensured above");
2136        self.queue
2137            .write_buffer(vbuf, 0, bytemuck::cast_slice(&verts));
2138
2139        let view = &self.pending_frame.as_ref().expect("checked above").1;
2140        let mut encoder = self
2141            .device
2142            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
2143                label: Some("roxlap-gpu lines"),
2144            });
2145        {
2146            // `LoadOp::Load` keeps the marcher's frame; the lines draw over
2147            // it. Manual depth test in the FS (no depth-stencil attachment).
2148            let mut pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
2149                label: Some("roxlap-gpu line paint"),
2150                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
2151                    view,
2152                    depth_slice: None,
2153                    resolve_target: None,
2154                    ops: wgpu::Operations {
2155                        load: wgpu::LoadOp::Load,
2156                        store: wgpu::StoreOp::Store,
2157                    },
2158                })],
2159                depth_stencil_attachment: None,
2160                timestamp_writes: None,
2161                occlusion_query_set: None,
2162                multiview_mask: None,
2163            });
2164            pass.set_pipeline(&res.pipeline);
2165            pass.set_bind_group(0, &bg, &[]);
2166            pass.set_vertex_buffer(0, vbuf.slice(..));
2167            pass.draw(0..verts.len() as u32, 0..1);
2168        }
2169        self.queue.submit(std::iter::once(encoder.finish()));
2170        // pending_frame left intact — present/paint_egui finishes the frame.
2171    }
2172
2173    /// Lazy-build the [`LineResources`] (`line.wgsl` pipeline + uniform +
2174    /// dummy depth buffer). The colour target uses the surface format with
2175    /// straight-alpha over-blending; no depth-stencil attachment (the depth
2176    /// test is manual in the fragment shader against the scene depth buffer).
2177    fn ensure_line_resources(&mut self) {
2178        if self.line_resources.is_some() {
2179            return;
2180        }
2181        let shader = self
2182            .device
2183            .create_shader_module(wgpu::ShaderModuleDescriptor {
2184                label: Some("line.wgsl"),
2185                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/line.wgsl").into()),
2186            });
2187        let bgl = self
2188            .device
2189            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
2190                label: Some("roxlap-gpu line.bgl"),
2191                entries: &[
2192                    wgpu::BindGroupLayoutEntry {
2193                        binding: 0,
2194                        visibility: wgpu::ShaderStages::FRAGMENT,
2195                        ty: wgpu::BindingType::Buffer {
2196                            ty: wgpu::BufferBindingType::Uniform,
2197                            has_dynamic_offset: false,
2198                            min_binding_size: None,
2199                        },
2200                        count: None,
2201                    },
2202                    wgpu::BindGroupLayoutEntry {
2203                        binding: 1,
2204                        visibility: wgpu::ShaderStages::FRAGMENT,
2205                        ty: wgpu::BindingType::Buffer {
2206                            ty: wgpu::BufferBindingType::Storage { read_only: true },
2207                            has_dynamic_offset: false,
2208                            min_binding_size: None,
2209                        },
2210                        count: None,
2211                    },
2212                ],
2213            });
2214        let layout = self
2215            .device
2216            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
2217                label: Some("roxlap-gpu line.layout"),
2218                bind_group_layouts: &[Some(&bgl)],
2219                immediate_size: 0,
2220            });
2221        let pipeline = self
2222            .device
2223            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
2224                label: Some("roxlap-gpu line.pipeline"),
2225                layout: Some(&layout),
2226                vertex: wgpu::VertexState {
2227                    module: &shader,
2228                    entry_point: Some("vs_main"),
2229                    compilation_options: wgpu::PipelineCompilationOptions::default(),
2230                    buffers: &[wgpu::VertexBufferLayout {
2231                        array_stride: std::mem::size_of::<LineVertex>() as u64,
2232                        step_mode: wgpu::VertexStepMode::Vertex,
2233                        attributes: &wgpu::vertex_attr_array![
2234                            0 => Float32x2, // pos (NDC)
2235                            1 => Float32,   // depth
2236                            2 => Float32,   // depth_test
2237                            3 => Float32x4, // color
2238                        ],
2239                    }],
2240                },
2241                fragment: Some(wgpu::FragmentState {
2242                    module: &shader,
2243                    entry_point: Some("fs_main"),
2244                    compilation_options: wgpu::PipelineCompilationOptions::default(),
2245                    targets: &[Some(wgpu::ColorTargetState {
2246                        format: self.surface_config.format,
2247                        blend: Some(wgpu::BlendState::ALPHA_BLENDING),
2248                        write_mask: wgpu::ColorWrites::ALL,
2249                    })],
2250                }),
2251                primitive: wgpu::PrimitiveState {
2252                    cull_mode: None,
2253                    ..Default::default()
2254                },
2255                depth_stencil: None,
2256                multisample: wgpu::MultisampleState::default(),
2257                multiview_mask: None,
2258                cache: None,
2259            });
2260        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
2261            label: Some("roxlap-gpu line.uniform"),
2262            size: std::mem::size_of::<LineParams>() as u64,
2263            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
2264            mapped_at_creation: false,
2265        });
2266        let dummy_depth = self.device.create_buffer(&wgpu::BufferDescriptor {
2267            label: Some("roxlap-gpu line.dummy_depth"),
2268            size: 4,
2269            usage: wgpu::BufferUsages::STORAGE,
2270            mapped_at_creation: false,
2271        });
2272        self.line_resources = Some(LineResources {
2273            pipeline,
2274            bgl,
2275            uniform_buf,
2276            dummy_depth,
2277        });
2278    }
2279
2280    /// Overlay an `egui` UI on the pending frame, then present it
2281    /// (`hud` feature). `jobs` are the host's tessellated primitives
2282    /// (`egui::Context::tessellate`), `textures` the per-frame texture
2283    /// delta from `egui::FullOutput`, `pixels_per_point` the UI scale.
2284    ///
2285    /// Draws with `LoadOp::Load` over the marcher's frame (a separate
2286    /// encoder submitted after the scene's), so the UI composites on top
2287    /// of the world. No-op if no frame is pending.
2288    #[cfg(feature = "hud")]
2289    pub fn paint_egui(
2290        &mut self,
2291        jobs: &[egui::ClippedPrimitive],
2292        textures: &egui::TexturesDelta,
2293        pixels_per_point: f32,
2294    ) {
2295        let Some((surf_tex, surf_view)) = self.pending_frame.take() else {
2296            return;
2297        };
2298        let format = self.surface_config.format;
2299        let egui_rend = self.egui_renderer.get_or_insert_with(|| {
2300            egui_wgpu::Renderer::new(
2301                &self.device,
2302                format,
2303                egui_wgpu::RendererOptions {
2304                    msaa_samples: 1,
2305                    depth_stencil_format: None,
2306                    dithering: false,
2307                    ..Default::default()
2308                },
2309            )
2310        });
2311
2312        let screen = egui_wgpu::ScreenDescriptor {
2313            size_in_pixels: [self.surface_config.width, self.surface_config.height],
2314            pixels_per_point,
2315        };
2316        for (id, delta) in &textures.set {
2317            egui_rend.update_texture(&self.device, &self.queue, *id, delta);
2318        }
2319        let mut encoder = self
2320            .device
2321            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
2322                label: Some("roxlap-gpu egui"),
2323            });
2324        let user_bufs =
2325            egui_rend.update_buffers(&self.device, &self.queue, &mut encoder, jobs, &screen);
2326        {
2327            // `LoadOp::Load` keeps the marcher's frame; egui draws over it.
2328            let mut pass = encoder
2329                .begin_render_pass(&wgpu::RenderPassDescriptor {
2330                    label: Some("roxlap-gpu egui paint"),
2331                    color_attachments: &[Some(wgpu::RenderPassColorAttachment {
2332                        view: &surf_view,
2333                        depth_slice: None,
2334                        resolve_target: None,
2335                        ops: wgpu::Operations {
2336                            load: wgpu::LoadOp::Load,
2337                            store: wgpu::StoreOp::Store,
2338                        },
2339                    })],
2340                    depth_stencil_attachment: None,
2341                    timestamp_writes: None,
2342                    occlusion_query_set: None,
2343                    multiview_mask: None,
2344                })
2345                // egui-wgpu 0.29 requires a `'static` pass (see its docs).
2346                .forget_lifetime();
2347            egui_rend.render(&mut pass, jobs, &screen);
2348        }
2349        for id in &textures.free {
2350            egui_rend.free_texture(id);
2351        }
2352        self.queue.submit(
2353            user_bufs
2354                .into_iter()
2355                .chain(std::iter::once(encoder.finish())),
2356        );
2357        surf_tex.present();
2358    }
2359
2360    fn build_scene_dda(
2361        &self,
2362        width: u32,
2363        height: u32,
2364        surface_format: wgpu::TextureFormat,
2365    ) -> SceneDdaResources {
2366        // Framebuffer as a packed-`rgba8unorm` storage buffer (1 u32 per
2367        // pixel, row stride = `width`). See the struct-field note.
2368        let framebuffer = self.device.create_buffer(&wgpu::BufferDescriptor {
2369            label: Some("roxlap-gpu scene_dda.framebuffer"),
2370            size: u64::from(width) * u64::from(height) * 4,
2371            usage: wgpu::BufferUsages::STORAGE,
2372            mapped_at_creation: false,
2373        });
2374        // Screen size for the blit's pixel→index math (`vec2<u32>`).
2375        let blit_dims = self.device.create_buffer(&wgpu::BufferDescriptor {
2376            label: Some("roxlap-gpu scene_dda.blit_dims"),
2377            size: 8,
2378            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
2379            mapped_at_creation: false,
2380        });
2381        self.queue
2382            .write_buffer(&blit_dims, 0, bytemuck::bytes_of(&[width, height]));
2383
2384        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
2385            label: Some("roxlap-gpu scene_dda.uniform"),
2386            size: std::mem::size_of::<SceneDdaUniform>() as u64,
2387            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
2388            mapped_at_creation: false,
2389        });
2390
2391        // GPU.9 — per-pixel world-t depth (f32 bits as u32). Sized to
2392        // the storage texture; written by the scene pass when sprites
2393        // are active, read+tested by the sprite splatter.
2394        let depth_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
2395            label: Some("roxlap-gpu scene_dda.depth"),
2396            size: u64::from(width) * u64::from(height) * 4,
2397            // COPY_SRC so `read_depth_pixel` can stage it for picking.
2398            usage: wgpu::BufferUsages::STORAGE
2399                | wgpu::BufferUsages::COPY_DST
2400                | wgpu::BufferUsages::COPY_SRC,
2401            mapped_at_creation: false,
2402        });
2403        let depth_readback = self.device.create_buffer(&wgpu::BufferDescriptor {
2404            label: Some("roxlap-gpu scene_dda.depth_readback"),
2405            size: u64::from(width) * u64::from(height) * 4,
2406            usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
2407            mapped_at_creation: false,
2408        });
2409        let dda_shader = self
2410            .device
2411            .create_shader_module(wgpu::ShaderModuleDescriptor {
2412                label: Some("scene_dda.wgsl"),
2413                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/scene_dda.wgsl").into()),
2414            });
2415        let bgl_dda = self
2416            .device
2417            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
2418                label: Some("roxlap-gpu scene_dda.bgl"),
2419                entries: &[
2420                    bgl_uniform_entry(0),
2421                    bgl_storage_entry(1, true),
2422                    bgl_storage_entry(2, true),
2423                    bgl_storage_entry(3, true),
2424                    bgl_storage_entry(4, true),
2425                    bgl_storage_entry(5, true),
2426                    bgl_storage_entry(6, true),
2427                    bgl_storage_entry(7, true),
2428                    // Framebuffer storage buffer (read-write; the scene +
2429                    // sprite passes write packed pixels into it).
2430                    bgl_storage_entry(8, false),
2431                    // GPU.8 sky panorama + sampler.
2432                    wgpu::BindGroupLayoutEntry {
2433                        binding: 9,
2434                        visibility: wgpu::ShaderStages::COMPUTE,
2435                        ty: wgpu::BindingType::Texture {
2436                            sample_type: wgpu::TextureSampleType::Float { filterable: true },
2437                            view_dimension: wgpu::TextureViewDimension::D2,
2438                            multisampled: false,
2439                        },
2440                        count: None,
2441                    },
2442                    wgpu::BindGroupLayoutEntry {
2443                        binding: 10,
2444                        visibility: wgpu::ShaderStages::COMPUTE,
2445                        ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
2446                        count: None,
2447                    },
2448                    // GPU.9 — read-write per-pixel depth buffer.
2449                    bgl_storage_entry(11, false),
2450                    // Occupancy pages 1..MAX_OCC_PAGES (page 0 is
2451                    // binding 1). Unused pages bind a dummy buffer.
2452                    bgl_storage_entry(12, true),
2453                    bgl_storage_entry(13, true),
2454                    bgl_storage_entry(14, true),
2455                    // Per-grid cameras (runtime-sized; one per grid).
2456                    bgl_storage_entry(15, true),
2457                ],
2458            });
2459        let dda_pl = self
2460            .device
2461            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
2462                label: Some("roxlap-gpu scene_dda.layout"),
2463                bind_group_layouts: &[Some(&bgl_dda)],
2464                immediate_size: 0,
2465            });
2466        let pipeline_dda = self
2467            .device
2468            .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
2469                label: Some("roxlap-gpu scene_dda.pipeline"),
2470                layout: Some(&dda_pl),
2471                module: &dda_shader,
2472                entry_point: Some("render_scene"),
2473                compilation_options: wgpu::PipelineCompilationOptions::default(),
2474                cache: None,
2475            });
2476
2477        let blit_shader = self
2478            .device
2479            .create_shader_module(wgpu::ShaderModuleDescriptor {
2480                label: Some("scene_blit.wgsl"),
2481                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/scene_blit.wgsl").into()),
2482            });
2483        let bgl_blit = self
2484            .device
2485            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
2486                label: Some("roxlap-gpu scene_dda.blit_bgl"),
2487                entries: &[
2488                    // Framebuffer storage buffer (read-only in the blit).
2489                    wgpu::BindGroupLayoutEntry {
2490                        binding: 0,
2491                        visibility: wgpu::ShaderStages::FRAGMENT,
2492                        ty: wgpu::BindingType::Buffer {
2493                            ty: wgpu::BufferBindingType::Storage { read_only: true },
2494                            has_dynamic_offset: false,
2495                            min_binding_size: None,
2496                        },
2497                        count: None,
2498                    },
2499                    // Screen-size uniform for the pixel→index math.
2500                    wgpu::BindGroupLayoutEntry {
2501                        binding: 1,
2502                        visibility: wgpu::ShaderStages::FRAGMENT,
2503                        ty: wgpu::BindingType::Buffer {
2504                            ty: wgpu::BufferBindingType::Uniform,
2505                            has_dynamic_offset: false,
2506                            min_binding_size: None,
2507                        },
2508                        count: None,
2509                    },
2510                ],
2511            });
2512        let blit_pl = self
2513            .device
2514            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
2515                label: Some("roxlap-gpu scene_dda.blit_layout"),
2516                bind_group_layouts: &[Some(&bgl_blit)],
2517                immediate_size: 0,
2518            });
2519        let pipeline_blit = self
2520            .device
2521            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
2522                label: Some("roxlap-gpu scene_dda.blit_pipeline"),
2523                layout: Some(&blit_pl),
2524                vertex: wgpu::VertexState {
2525                    module: &blit_shader,
2526                    entry_point: Some("vs_main"),
2527                    compilation_options: wgpu::PipelineCompilationOptions::default(),
2528                    buffers: &[],
2529                },
2530                fragment: Some(wgpu::FragmentState {
2531                    module: &blit_shader,
2532                    entry_point: Some("fs_main"),
2533                    compilation_options: wgpu::PipelineCompilationOptions::default(),
2534                    targets: &[Some(wgpu::ColorTargetState {
2535                        format: surface_format,
2536                        blend: None,
2537                        write_mask: wgpu::ColorWrites::ALL,
2538                    })],
2539                }),
2540                primitive: wgpu::PrimitiveState::default(),
2541                depth_stencil: None,
2542                multisample: wgpu::MultisampleState::default(),
2543                multiview_mask: None,
2544                cache: None,
2545            });
2546        let blit_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
2547            label: Some("roxlap-gpu scene_dda.blit_bg"),
2548            layout: &bgl_blit,
2549            entries: &[
2550                wgpu::BindGroupEntry {
2551                    binding: 0,
2552                    resource: framebuffer.as_entire_binding(),
2553                },
2554                wgpu::BindGroupEntry {
2555                    binding: 1,
2556                    resource: blit_dims.as_entire_binding(),
2557                },
2558            ],
2559        });
2560
2561        SceneDdaResources {
2562            storage_size: (width, height),
2563            framebuffer,
2564            uniform_buf,
2565            bgl_dda,
2566            pipeline_dda,
2567            blit_bg,
2568            pipeline_blit,
2569            depth_buffer,
2570            depth_readback,
2571        }
2572    }
2573
2574    /// Read back the per-pixel world-t depth at window pixel `(x, y)`
2575    /// from the last rendered frame, for screen→world picking. Returns
2576    /// the distance `t` along the (normalised) view ray to the nearest
2577    /// scene-grid surface, so the host reconstructs the world hit as
2578    /// `cam.pos + t * normalize(ray_dir)`. `None` for out-of-bounds
2579    /// pixels, sky / no-hit (the `T_INF` sentinel), or when no scene
2580    /// frame has been rendered.
2581    ///
2582    /// The depth buffer is the SCENE pass's output (terrain + grids),
2583    /// untouched by the sprite pass (which reads it read-only), so a
2584    /// cursor sprite under the pointer does not occlude the pick.
2585    ///
2586    /// Synchronous: copies the depth buffer to a mapped staging buffer
2587    /// and blocks on `device.poll(Wait)`. Cheap enough for click-time
2588    /// picks; do not call it every frame.
2589    ///
2590    /// Requires the last frame to have written depth, which happens
2591    /// when sprites are present (`write_depth`). The pick demo always
2592    /// has a cursor sprite, so this holds.
2593    ///
2594    /// Compiles on wasm, but the wasm facade never calls it: WebGPU's
2595    /// `device.poll` doesn't block for the GPU, so the blocking
2596    /// `recv()` here would hang the single browser thread. Picking is
2597    /// deferred on the wasm GPU path (the facade returns `None`).
2598    #[must_use]
2599    pub fn read_depth_pixel(&self, x: u32, y: u32) -> Option<f32> {
2600        let dda = self.scene_dda.as_ref()?;
2601        let (w, h) = dda.storage_size;
2602        if x >= w || y >= h {
2603            return None;
2604        }
2605        let mut enc = self
2606            .device
2607            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
2608                label: Some("roxlap-gpu depth readback"),
2609            });
2610        let size = u64::from(w) * u64::from(h) * 4;
2611        enc.copy_buffer_to_buffer(&dda.depth_buffer, 0, &dda.depth_readback, 0, size);
2612        self.queue.submit(std::iter::once(enc.finish()));
2613
2614        let slice = dda.depth_readback.slice(..);
2615        let (tx, rx) = std::sync::mpsc::channel();
2616        slice.map_async(wgpu::MapMode::Read, move |r| {
2617            let _ = tx.send(r);
2618        });
2619        self.device.poll(wgpu::PollType::wait_indefinitely()).ok();
2620        rx.recv().ok()?.ok()?;
2621
2622        let t = {
2623            let data = slice.get_mapped_range();
2624            let idx = ((y * w + x) * 4) as usize;
2625            let bytes: [u8; 4] = data[idx..idx + 4].try_into().ok()?;
2626            f32::from_le_bytes(bytes)
2627        };
2628        dda.depth_readback.unmap();
2629
2630        // Reject sky / no-hit (T_INF == 1e30 in the shader) + non-finite.
2631        if !t.is_finite() || t >= 1.0e29 {
2632            return None;
2633        }
2634        Some(t)
2635    }
2636
2637    /// World-space view-ray direction (un-normalised) for window pixel
2638    /// `(x, y)`, under the GPU marcher's projection — the canonical GPU
2639    /// unproject, mirroring `scene_dda.wgsl`'s `render_scene`
2640    /// (vertical-FOV pinhole). Uses the last-rendered frame's target
2641    /// size + FOV; `None` before the first scene render. Pair with
2642    /// [`Self::read_depth_pixel`] for screen→world picking.
2643    #[must_use]
2644    pub fn pixel_ray(
2645        &self,
2646        right: [f64; 3],
2647        down: [f64; 3],
2648        forward: [f64; 3],
2649        x: f64,
2650        y: f64,
2651    ) -> Option<[f64; 3]> {
2652        let dda = self.scene_dda.as_ref()?;
2653        let (w, h) = dda.storage_size;
2654        if w == 0 || h == 0 || self.last_fov_y_rad <= 0.0 {
2655            return None;
2656        }
2657        Some(pinhole_pixel_ray(
2658            right,
2659            down,
2660            forward,
2661            x,
2662            y,
2663            f64::from(w),
2664            f64::from(h),
2665            f64::from(self.last_fov_y_rad),
2666        ))
2667    }
2668
2669    /// GPU.10.1 — upload a sprite model registry + its instances for
2670    /// the DDA path. An empty instance slice clears all sprites.
2671    pub fn set_sprite_instances(
2672        &mut self,
2673        registry: &sprite_model::SpriteModelRegistry,
2674        instances: &[sprite_model::SpriteInstance],
2675    ) {
2676        if instances.is_empty() {
2677            self.sprite_registry = None;
2678            return;
2679        }
2680        self.sprite_registry = Some(sprite_model::SpriteRegistryResident::upload(
2681            &self.device,
2682            registry,
2683            instances,
2684        ));
2685    }
2686
2687    /// Re-pose the already-resident sprite instances in place (no model
2688    /// volume re-upload) — the cheap per-frame path for animated KFA
2689    /// limbs. `instances` must match the last [`Self::set_sprite_instances`]
2690    /// in length + order. No-op if no sprite registry is resident.
2691    pub fn update_sprite_instance_transforms(
2692        &mut self,
2693        instances: &[sprite_model::SpriteInstance],
2694    ) {
2695        if let Some(reg) = self.sprite_registry.as_mut() {
2696            reg.update_transforms(instances);
2697        }
2698    }
2699
2700    /// GPU.12 incremental — re-upload only LOD chain `chain_id`'s entries
2701    /// after an in-place edit of `registry` (carve / recolour), without
2702    /// rebuilding the whole sprite registry. `registry` must be the one
2703    /// last passed to [`Self::set_sprite_instances`] with chain
2704    /// `chain_id` already edited. No-op if no registry is resident.
2705    pub fn update_sprite_model(
2706        &mut self,
2707        registry: &sprite_model::SpriteModelRegistry,
2708        chain_id: u32,
2709    ) {
2710        if let Some(reg) = self.sprite_registry.as_mut() {
2711            reg.update_model(&self.device, &self.queue, registry, chain_id);
2712        }
2713    }
2714
2715    /// Set the per-instance `kv6colmul[256]` lighting tables (voxlap's
2716    /// `update_reflects` output, e.g. via `roxlap_core::sprite::
2717    /// sprite_colmul`), in the same order/length as the last
2718    /// [`Self::set_sprite_instances`]. The GPU sprite pass modulates each
2719    /// voxel by its surface normal's entry — matching the CPU rasteriser.
2720    /// No-op if no sprite registry is resident.
2721    pub fn set_sprite_instance_colmul(&mut self, tables: &[[u64; 256]]) {
2722        if let Some(reg) = self.sprite_registry.as_mut() {
2723            reg.set_instance_colmul(tables);
2724        }
2725    }
2726
2727    /// GPU.10.4 — set the LOD pixel threshold: a sprite steps to the
2728    /// next mip once a mip-0 voxel would project below `px` screen
2729    /// pixels. `1.0` is the natural "no sub-pixel voxels" default;
2730    /// larger values force LOD in closer (useful for inspection).
2731    /// Clamped to ≥ 0.25.
2732    pub fn set_sprite_lod_px(&mut self, px: f32) {
2733        self.sprite_lod_px = px.max(0.25);
2734    }
2735
2736    /// GPU.11.1 — set the scene-grid LOD scan distance (world units).
2737    /// A chunk entered at world-t `t` is marched at mip
2738    /// `floor(log2(max(t, msd) / msd))`, clamped to its grid's mip
2739    /// ladder. `0` disables LOD (always mip-0). Larger values push
2740    /// the coarser mips farther out — the axis-aligned-mip-beams
2741    /// mitigation lever (GPU.11.2). Default 64 (matches CPU
2742    /// `mip_scan_dist`).
2743    pub fn set_scene_mip_scan_dist(&mut self, dist: f32) {
2744        self.scene_mip_scan_dist = dist.max(0.0);
2745    }
2746
2747    /// Set per-face grid side-shading — voxlap's
2748    /// `setsideshades(top, bot, left, right, up, down)`. Each value is
2749    /// subtracted (as a u8, matching the CPU `gcsub` high byte) from a
2750    /// hit voxel's brightness byte before shading, so the scene-DDA pass
2751    /// darkens grid faces the same way the CPU rasteriser does. `[0; 6]`
2752    /// disables it (the default). The hit face is taken from the DDA's
2753    /// last-stepped axis + ray direction.
2754    pub fn set_scene_side_shades(&mut self, s: [i8; 6]) {
2755        // Reinterpret each i8 as u8 (voxlap stamps `sxx` into gcsub's
2756        // high byte verbatim), then pack (top, bot, left, right) /
2757        // (up, down, 0, 0) for the two uniform vec4s.
2758        let v = |i: usize| i32::from(s[i] as u8);
2759        self.scene_side_shades = [[v(0), v(1), v(2), v(3)], [v(4), v(5), 0, 0]];
2760    }
2761
2762    /// GPU.10.1 — build the instanced model-DDA pipeline (one thread
2763    /// per pixel). Lazily invoked the first frame a registry is present.
2764    fn build_sprite_model_dda(&self) -> SpriteModelDdaResources {
2765        let shader = self
2766            .device
2767            .create_shader_module(wgpu::ShaderModuleDescriptor {
2768                label: Some("sprite_model_dda.wgsl"),
2769                source: wgpu::ShaderSource::Wgsl(
2770                    include_str!("../shaders/sprite_model_dda.wgsl").into(),
2771                ),
2772            });
2773        let bgl = self
2774            .device
2775            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
2776                label: Some("roxlap-gpu sprite_model_dda.bgl"),
2777                entries: &[
2778                    bgl_uniform_entry(0),
2779                    bgl_storage_entry(1, true),  // occupancy
2780                    bgl_storage_entry(2, true),  // colors
2781                    bgl_storage_entry(3, true),  // color_offsets
2782                    bgl_storage_entry(4, true),  // model_meta
2783                    bgl_storage_entry(5, true),  // instances
2784                    bgl_storage_entry(6, true),  // scene depth
2785                    bgl_storage_entry(7, false), // framebuffer (read-write buffer)
2786                    bgl_storage_entry(8, true),  // tile_ranges
2787                    bgl_storage_entry(9, true),  // tile_instances
2788                    bgl_storage_entry(10, true), // per-voxel dir
2789                    bgl_storage_entry(11, true), // per-instance kv6colmul
2790                ],
2791            });
2792        let pl = self
2793            .device
2794            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
2795                label: Some("roxlap-gpu sprite_model_dda.layout"),
2796                bind_group_layouts: &[Some(&bgl)],
2797                immediate_size: 0,
2798            });
2799        let pipeline = self
2800            .device
2801            .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
2802                label: Some("roxlap-gpu sprite_model_dda.pipeline"),
2803                layout: Some(&pl),
2804                module: &shader,
2805                entry_point: Some("march"),
2806                compilation_options: wgpu::PipelineCompilationOptions::default(),
2807                cache: None,
2808            });
2809        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
2810            label: Some("roxlap-gpu sprite_model_dda.uniform"),
2811            size: std::mem::size_of::<SpriteModelUniform>() as u64,
2812            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
2813            mapped_at_creation: false,
2814        });
2815        SpriteModelDdaResources {
2816            bgl,
2817            pipeline,
2818            uniform_buf,
2819        }
2820    }
2821}
2822
2823/// GPU.11 — headless scene-DDA renderer for tests + offline visual
2824/// gates. Owns the `scene_dda.wgsl` compute pipeline with no surface
2825/// and no blit pass; renders a [`GpuSceneResident`] to an in-memory
2826/// RGBA framebuffer via texture readback. The per-substage visual
2827/// gate (render reference scenes, diff PPMs) and the GPU.11.1 mip
2828/// render-diff both ride on this.
2829pub struct HeadlessSceneRenderer {
2830    width: u32,
2831    height: u32,
2832    /// Framebuffer storage buffer (packed `rgba8unorm`, tight rows) —
2833    /// matches the buffer-output `scene_dda.wgsl` (see its note).
2834    framebuffer: wgpu::Buffer,
2835    depth_buffer: wgpu::Buffer,
2836    uniform_buf: wgpu::Buffer,
2837    _sky_texture: wgpu::Texture,
2838    sky_view: wgpu::TextureView,
2839    sky_sampler: wgpu::Sampler,
2840    bgl: wgpu::BindGroupLayout,
2841    pipeline: wgpu::ComputePipeline,
2842    readback: wgpu::Buffer,
2843    /// Per-face side-shades for the gate render (default none). Packed
2844    /// `[(top,bot,left,right), (up,down,_,_)]`; set via
2845    /// [`Self::set_side_shades`].
2846    side_shades: [[i32; 4]; 2],
2847}
2848
2849impl HeadlessSceneRenderer {
2850    /// Build the compute pipeline + output/readback resources for a
2851    /// `width × height` framebuffer. Validates `scene_dda.wgsl` and
2852    /// the [`scene::GridStaticMeta`] std430 layout at pipeline /
2853    /// bind-group time.
2854    #[must_use]
2855    pub fn new(device: &wgpu::Device, queue: &wgpu::Queue, width: u32, height: u32) -> Self {
2856        let framebuffer = device.create_buffer(&wgpu::BufferDescriptor {
2857            label: Some("roxlap-gpu headless.framebuffer"),
2858            size: u64::from(width) * u64::from(height) * 4,
2859            usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
2860            mapped_at_creation: false,
2861        });
2862
2863        let uniform_buf = device.create_buffer(&wgpu::BufferDescriptor {
2864            label: Some("roxlap-gpu headless.uniform"),
2865            size: std::mem::size_of::<SceneDdaUniform>() as u64,
2866            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
2867            mapped_at_creation: false,
2868        });
2869        let depth_buffer = device.create_buffer(&wgpu::BufferDescriptor {
2870            label: Some("roxlap-gpu headless.depth"),
2871            size: u64::from(width) * u64::from(height) * 4,
2872            usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
2873            mapped_at_creation: false,
2874        });
2875
2876        let default_sky_pixel = [120u8, 150, 220, 255];
2877        let (sky_texture, sky_view) = create_sky_texture(device, 1, 1, &default_sky_pixel);
2878        // Upload the default sky texel (create_sky_texture only allocates
2879        // — the texel must be written or the shader samples black, which
2880        // is why a grid-less headless render came back black).
2881        queue.write_texture(
2882            wgpu::TexelCopyTextureInfo {
2883                texture: &sky_texture,
2884                mip_level: 0,
2885                origin: wgpu::Origin3d::ZERO,
2886                aspect: wgpu::TextureAspect::All,
2887            },
2888            &default_sky_pixel,
2889            wgpu::TexelCopyBufferLayout {
2890                offset: 0,
2891                bytes_per_row: Some(4),
2892                rows_per_image: Some(1),
2893            },
2894            wgpu::Extent3d {
2895                width: 1,
2896                height: 1,
2897                depth_or_array_layers: 1,
2898            },
2899        );
2900        let sky_sampler = device.create_sampler(&wgpu::SamplerDescriptor {
2901            label: Some("roxlap-gpu headless.sky_sampler"),
2902            address_mode_u: wgpu::AddressMode::Repeat,
2903            address_mode_v: wgpu::AddressMode::Repeat,
2904            mag_filter: wgpu::FilterMode::Linear,
2905            min_filter: wgpu::FilterMode::Linear,
2906            ..Default::default()
2907        });
2908
2909        let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
2910            label: Some("scene_dda.wgsl (headless)"),
2911            source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/scene_dda.wgsl").into()),
2912        });
2913        let bgl = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
2914            label: Some("roxlap-gpu headless.bgl"),
2915            entries: &[
2916                bgl_uniform_entry(0),
2917                bgl_storage_entry(1, true),
2918                bgl_storage_entry(2, true),
2919                bgl_storage_entry(3, true),
2920                bgl_storage_entry(4, true),
2921                bgl_storage_entry(5, true),
2922                bgl_storage_entry(6, true),
2923                bgl_storage_entry(7, true),
2924                // Framebuffer storage buffer (read-write).
2925                bgl_storage_entry(8, false),
2926                wgpu::BindGroupLayoutEntry {
2927                    binding: 9,
2928                    visibility: wgpu::ShaderStages::COMPUTE,
2929                    ty: wgpu::BindingType::Texture {
2930                        sample_type: wgpu::TextureSampleType::Float { filterable: true },
2931                        view_dimension: wgpu::TextureViewDimension::D2,
2932                        multisampled: false,
2933                    },
2934                    count: None,
2935                },
2936                wgpu::BindGroupLayoutEntry {
2937                    binding: 10,
2938                    visibility: wgpu::ShaderStages::COMPUTE,
2939                    ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
2940                    count: None,
2941                },
2942                bgl_storage_entry(11, false),
2943                bgl_storage_entry(12, true),
2944                bgl_storage_entry(13, true),
2945                bgl_storage_entry(14, true),
2946                // Per-grid cameras (runtime-sized; one per grid).
2947                bgl_storage_entry(15, true),
2948            ],
2949        });
2950        let pl = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
2951            label: Some("roxlap-gpu headless.layout"),
2952            bind_group_layouts: &[Some(&bgl)],
2953            immediate_size: 0,
2954        });
2955        let pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
2956            label: Some("roxlap-gpu headless.pipeline"),
2957            layout: Some(&pl),
2958            module: &shader,
2959            entry_point: Some("render_scene"),
2960            compilation_options: wgpu::PipelineCompilationOptions::default(),
2961            cache: None,
2962        });
2963
2964        // Readback is a tight buffer-to-buffer copy (no 256-byte row
2965        // padding, unlike the old texture-to-buffer path).
2966        let readback = device.create_buffer(&wgpu::BufferDescriptor {
2967            label: Some("roxlap-gpu headless.readback"),
2968            size: u64::from(width) * u64::from(height) * 4,
2969            usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
2970            mapped_at_creation: false,
2971        });
2972
2973        Self {
2974            width,
2975            height,
2976            framebuffer,
2977            depth_buffer,
2978            uniform_buf,
2979            _sky_texture: sky_texture,
2980            sky_view,
2981            sky_sampler,
2982            bgl,
2983            pipeline,
2984            readback,
2985            side_shades: [[0; 4]; 2],
2986        }
2987    }
2988
2989    /// Set per-face side-shades for subsequent [`Self::render`] calls —
2990    /// voxlap `setsideshades(top, bot, left, right, up, down)`, each an
2991    /// i8 stamped as u8 (matching the engine path). Lets the gate test
2992    /// the GPU side-shade darkening.
2993    pub fn set_side_shades(&mut self, s: [i8; 6]) {
2994        let v = |i: usize| i32::from(s[i] as u8);
2995        self.side_shades = [[v(0), v(1), v(2), v(3)], [v(4), v(5), 0, 0]];
2996    }
2997
2998    /// Render `scene` from `cameras` (one per grid) and read the
2999    /// framebuffer back as `width*height` packed `0xAABBGGRR` pixels
3000    /// (R in the low byte). Fog is disabled. `mip_scan_dist` drives
3001    /// the GPU.11.1 scene-grid LOD (`0` = always mip-0). Blocks on
3002    /// readback.
3003    ///
3004    /// # Panics
3005    /// If `cameras.len() != scene.grid_count`.
3006    #[must_use]
3007    #[allow(clippy::too_many_arguments)]
3008    pub fn render(
3009        &self,
3010        device: &wgpu::Device,
3011        queue: &wgpu::Queue,
3012        scene: &GpuSceneResident,
3013        cameras: &[Camera],
3014        fov_y_rad: f32,
3015        max_outer_steps: u32,
3016        mip_scan_dist: f32,
3017    ) -> Vec<u32> {
3018        assert_eq!(
3019            cameras.len(),
3020            scene.grid_count as usize,
3021            "headless render: {} cameras for {} grids",
3022            cameras.len(),
3023            scene.grid_count,
3024        );
3025
3026        let cam_vec: Vec<SceneDdaPerGridCamera> = cameras
3027            .iter()
3028            .map(SceneDdaPerGridCamera::from_camera)
3029            .collect();
3030        let grid_cameras = upload_grid_cameras(device, &cam_vec);
3031        let uniform = SceneDdaUniform {
3032            fov_y_rad,
3033            grid_count: scene.grid_count,
3034            max_outer_steps,
3035            _pad0: 0,
3036            screen_size: [self.width, self.height],
3037            _pad1: [0; 2],
3038            // Fog off: near/far past any reachable t → factor 0.
3039            fog_color: [0.0, 0.0, 0.0, 1.0e29],
3040            fog_far: 1.0e30,
3041            write_depth: 0,
3042            occ_page_words: scene.occupancy_page_words,
3043            occ_num_pages: scene.occupancy_num_pages,
3044            mip_scan_dist,
3045            _pad2: 0,
3046            _pad3: 0,
3047            _pad4: 0,
3048            // Sky direction from the first grid camera (the world frame
3049            // in these tests); a default forward camera when there are
3050            // none (grid_count == 0) so the sky lookup stays valid.
3051            sky_cam: SceneDdaPerGridCamera::from_camera(&cameras.first().copied().unwrap_or(
3052                Camera {
3053                    position: [0.0; 3],
3054                    right: [1.0, 0.0, 0.0],
3055                    down: [0.0, 0.0, 1.0],
3056                    forward: [0.0, 1.0, 0.0],
3057                    fov_y_rad,
3058                },
3059            )),
3060            side_shades0: self.side_shades[0],
3061            side_shades1: self.side_shades[1],
3062        };
3063        queue.write_buffer(&self.uniform_buf, 0, bytemuck::bytes_of(&uniform));
3064
3065        let bg = device.create_bind_group(&wgpu::BindGroupDescriptor {
3066            label: Some("roxlap-gpu headless.bg"),
3067            layout: &self.bgl,
3068            entries: &[
3069                wgpu::BindGroupEntry {
3070                    binding: 0,
3071                    resource: self.uniform_buf.as_entire_binding(),
3072                },
3073                wgpu::BindGroupEntry {
3074                    binding: 1,
3075                    resource: scene.occupancy_pages[0].as_entire_binding(),
3076                },
3077                wgpu::BindGroupEntry {
3078                    binding: 2,
3079                    resource: scene.all_color_offsets.as_entire_binding(),
3080                },
3081                wgpu::BindGroupEntry {
3082                    binding: 3,
3083                    resource: scene.all_colors.as_entire_binding(),
3084                },
3085                wgpu::BindGroupEntry {
3086                    binding: 4,
3087                    resource: scene.all_chunk_colors_base.as_entire_binding(),
3088                },
3089                wgpu::BindGroupEntry {
3090                    binding: 5,
3091                    resource: scene.all_chunk_occupancy.as_entire_binding(),
3092                },
3093                wgpu::BindGroupEntry {
3094                    binding: 6,
3095                    resource: scene.grid_static_meta.as_entire_binding(),
3096                },
3097                wgpu::BindGroupEntry {
3098                    binding: 7,
3099                    resource: scene.all_slot_chunk_idx.as_entire_binding(),
3100                },
3101                wgpu::BindGroupEntry {
3102                    binding: 8,
3103                    resource: self.framebuffer.as_entire_binding(),
3104                },
3105                wgpu::BindGroupEntry {
3106                    binding: 9,
3107                    resource: wgpu::BindingResource::TextureView(&self.sky_view),
3108                },
3109                wgpu::BindGroupEntry {
3110                    binding: 10,
3111                    resource: wgpu::BindingResource::Sampler(&self.sky_sampler),
3112                },
3113                wgpu::BindGroupEntry {
3114                    binding: 11,
3115                    resource: self.depth_buffer.as_entire_binding(),
3116                },
3117                wgpu::BindGroupEntry {
3118                    binding: 12,
3119                    resource: scene.occupancy_pages[1].as_entire_binding(),
3120                },
3121                wgpu::BindGroupEntry {
3122                    binding: 13,
3123                    resource: scene.occupancy_pages[2].as_entire_binding(),
3124                },
3125                wgpu::BindGroupEntry {
3126                    binding: 14,
3127                    resource: scene.occupancy_pages[3].as_entire_binding(),
3128                },
3129                wgpu::BindGroupEntry {
3130                    binding: 15,
3131                    resource: grid_cameras.as_entire_binding(),
3132                },
3133            ],
3134        });
3135
3136        let mut enc =
3137            device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
3138        {
3139            let mut pass = enc.begin_compute_pass(&wgpu::ComputePassDescriptor {
3140                label: Some("roxlap-gpu headless.pass"),
3141                timestamp_writes: None,
3142            });
3143            pass.set_pipeline(&self.pipeline);
3144            pass.set_bind_group(0, &bg, &[]);
3145            pass.dispatch_workgroups(self.width.div_ceil(8), self.height.div_ceil(8), 1);
3146        }
3147        enc.copy_buffer_to_buffer(
3148            &self.framebuffer,
3149            0,
3150            &self.readback,
3151            0,
3152            u64::from(self.width) * u64::from(self.height) * 4,
3153        );
3154        queue.submit(Some(enc.finish()));
3155
3156        let slice = self.readback.slice(..);
3157        let (tx, rx) = std::sync::mpsc::channel();
3158        slice.map_async(wgpu::MapMode::Read, move |r| {
3159            let _ = tx.send(r);
3160        });
3161        device.poll(wgpu::PollType::wait_indefinitely()).ok();
3162        rx.recv().expect("map_async channel").expect("map_async");
3163
3164        let data = slice.get_mapped_range();
3165        // Tight `width*height` packed pixels — the shader's
3166        // `pack4x8unorm(vec4(r,g,b,a))` already yields `0xAABBGGRR`
3167        // little-endian, so a straight u32 read reconstructs each pixel.
3168        let out: Vec<u32> = data
3169            .chunks_exact(4)
3170            .map(|px| u32::from_le_bytes([px[0], px[1], px[2], px[3]]))
3171            .collect();
3172        drop(data);
3173        self.readback.unmap();
3174        out
3175    }
3176}
3177
3178fn bgl_uniform_entry(binding: u32) -> wgpu::BindGroupLayoutEntry {
3179    wgpu::BindGroupLayoutEntry {
3180        binding,
3181        visibility: wgpu::ShaderStages::COMPUTE,
3182        ty: wgpu::BindingType::Buffer {
3183            ty: wgpu::BufferBindingType::Uniform,
3184            has_dynamic_offset: false,
3185            min_binding_size: None,
3186        },
3187        count: None,
3188    }
3189}
3190
3191fn bgl_storage_entry(binding: u32, read_only: bool) -> wgpu::BindGroupLayoutEntry {
3192    wgpu::BindGroupLayoutEntry {
3193        binding,
3194        visibility: wgpu::ShaderStages::COMPUTE,
3195        ty: wgpu::BindingType::Buffer {
3196            ty: wgpu::BufferBindingType::Storage { read_only },
3197            has_dynamic_offset: false,
3198            min_binding_size: None,
3199        },
3200        count: None,
3201    }
3202}
3203
3204/// Create a fresh sky panorama texture sized `width × height` with
3205/// the initial pixel data uploaded via `write_texture`. Used by
3206/// `GpuRenderer::new` (1×1 default) and `set_sky_panorama` (host-
3207/// supplied panorama).
3208fn create_sky_texture(
3209    device: &wgpu::Device,
3210    width: u32,
3211    height: u32,
3212    _initial_pixels: &[u8],
3213) -> (wgpu::Texture, wgpu::TextureView) {
3214    let tex = device.create_texture(&wgpu::TextureDescriptor {
3215        label: Some("roxlap-gpu sky_texture"),
3216        size: wgpu::Extent3d {
3217            width,
3218            height,
3219            depth_or_array_layers: 1,
3220        },
3221        mip_level_count: 1,
3222        sample_count: 1,
3223        dimension: wgpu::TextureDimension::D2,
3224        format: wgpu::TextureFormat::Rgba8Unorm,
3225        usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST,
3226        view_formats: &[],
3227    });
3228    let view = tex.create_view(&wgpu::TextureViewDescriptor::default());
3229    (tex, view)
3230}
3231
3232/// GPU.4 needs to upload a whole grid (~hundreds of MiB) as a few
3233/// storage buffers. wgpu's default `max_storage_buffer_binding_size`
3234/// is 128 MiB, which is just enough for the demo's 32×32 ground
3235/// occupancy (~128 MiB) but not the colour array. We request as
3236/// much as the adapter is willing to give — most desktop GPUs cap
3237/// individual storage buffers at 2-4 GiB; iGPUs often offer the
3238/// full system memory.
3239pub(crate) fn pick_required_limits(adapter_limits: &wgpu::Limits) -> wgpu::Limits {
3240    wgpu::Limits {
3241        max_storage_buffer_binding_size: adapter_limits.max_storage_buffer_binding_size,
3242        max_buffer_size: adapter_limits.max_buffer_size,
3243        // Occupancy paging adds up to MAX_OCC_PAGES-1 extra storage
3244        // bindings; with the scene's other buffers + the GPU.9 depth
3245        // buffer the scene_dda stage needs ~11. The default cap is 8.
3246        // Both NVK and lavapipe advertise ≫16, so request 16.
3247        max_storage_buffers_per_shader_stage: adapter_limits
3248            .max_storage_buffers_per_shader_stage
3249            .min(16),
3250        ..wgpu::Limits::default()
3251    }
3252}
3253
3254fn pick_present_mode(modes: &[wgpu::PresentMode]) -> wgpu::PresentMode {
3255    // Prefer Mailbox > Immediate > Fifo. Fifo is the universal
3256    // fallback and the only one Wayland-on-Mesa always offers.
3257    for &m in &[wgpu::PresentMode::Mailbox, wgpu::PresentMode::Immediate] {
3258        if modes.contains(&m) {
3259            return m;
3260        }
3261    }
3262    wgpu::PresentMode::Fifo
3263}
3264
3265/// World-space view-ray direction (un-normalised) for window pixel
3266/// `(x, y)` under a vertical-FOV pinhole — the projection
3267/// `scene_dda.wgsl`'s `render_scene` uses. Shared by
3268/// [`GpuRenderer::pixel_ray`]; standalone so it's unit-testable without
3269/// a device. `right`/`down`/`forward` are the camera basis.
3270#[must_use]
3271#[allow(clippy::too_many_arguments)]
3272pub fn pinhole_pixel_ray(
3273    right: [f64; 3],
3274    down: [f64; 3],
3275    forward: [f64; 3],
3276    x: f64,
3277    y: f64,
3278    w: f64,
3279    h: f64,
3280    fov_y_rad: f64,
3281) -> [f64; 3] {
3282    let half_h = (fov_y_rad * 0.5).tan();
3283    let half_w = half_h * (w / h);
3284    let ndc_x = (x + 0.5) / w * 2.0 - 1.0;
3285    let ndc_y_top = 1.0 - (y + 0.5) / h * 2.0;
3286    let (kx, ky) = (ndc_x * half_w, ndc_y_top * half_h);
3287    [
3288        forward[0] + kx * right[0] - ky * down[0],
3289        forward[1] + kx * right[1] - ky * down[1],
3290        forward[2] + kx * right[2] - ky * down[2],
3291    ]
3292}
3293
3294#[cfg(test)]
3295mod pixel_ray_tests {
3296    use super::pinhole_pixel_ray;
3297
3298    const RIGHT: [f64; 3] = [1.0, 0.0, 0.0];
3299    const DOWN: [f64; 3] = [0.0, 1.0, 0.0];
3300    const FWD: [f64; 3] = [0.0, 0.0, 1.0]; // voxlap z-down "look down"
3301
3302    // Frame centre (NDC 0,0) points straight along `forward`.
3303    #[test]
3304    fn centre_pixel_is_forward() {
3305        let d = pinhole_pixel_ray(
3306            RIGHT,
3307            DOWN,
3308            FWD,
3309            639.5,
3310            359.5,
3311            1280.0,
3312            720.0,
3313            60_f64.to_radians(),
3314        );
3315        assert!(
3316            d[0].abs() < 1e-9 && d[1].abs() < 1e-9,
3317            "centre ≈ forward, got {d:?}"
3318        );
3319        assert!((d[2] - 1.0).abs() < 1e-9);
3320    }
3321
3322    // Right edge pixel tilts +right by tan(hfov/2); the lateral
3323    // component equals half_w = tan(fov_y/2)*aspect at the very edge.
3324    #[test]
3325    fn right_edge_tilts_by_half_w() {
3326        let fov = 60_f64.to_radians();
3327        let d = pinhole_pixel_ray(RIGHT, DOWN, FWD, 1279.5, 359.5, 1280.0, 720.0, fov);
3328        let half_w = (fov * 0.5).tan() * (1280.0 / 720.0);
3329        assert!((d[0] - half_w).abs() < 1e-6, "x={}, half_w={half_w}", d[0]);
3330        assert!(d[0] > 0.0, "right edge tilts +right");
3331    }
3332
3333    /// Statically validate every WGSL shader with naga (the same
3334    /// front-end + validator wgpu runs at pipeline creation), so shader
3335    /// edits — e.g. the GPU.10 sprite lighting bindings — are caught in
3336    /// CI without needing a GPU device.
3337    #[test]
3338    fn wgsl_shaders_validate() {
3339        let shaders: &[(&str, &str)] = &[
3340            (
3341                "sprite_model_dda.wgsl",
3342                include_str!("../shaders/sprite_model_dda.wgsl"),
3343            ),
3344            ("scene_dda.wgsl", include_str!("../shaders/scene_dda.wgsl")),
3345            ("blit.wgsl", include_str!("../shaders/blit.wgsl")),
3346            ("chunk_dda.wgsl", include_str!("../shaders/chunk_dda.wgsl")),
3347            ("grid_dda.wgsl", include_str!("../shaders/grid_dda.wgsl")),
3348            (
3349                "scene_blit.wgsl",
3350                include_str!("../shaders/scene_blit.wgsl"),
3351            ),
3352            ("line.wgsl", include_str!("../shaders/line.wgsl")),
3353        ];
3354        let mut validator = naga::valid::Validator::new(
3355            naga::valid::ValidationFlags::all(),
3356            naga::valid::Capabilities::all(),
3357        );
3358        for (name, src) in shaders {
3359            let module = naga::front::wgsl::parse_str(src).unwrap_or_else(|e| {
3360                panic!("{name}: WGSL parse failed:\n{}", e.emit_to_string(src))
3361            });
3362            validator
3363                .validate(&module)
3364                .unwrap_or_else(|e| panic!("{name}: WGSL validation failed: {e:?}"));
3365        }
3366    }
3367}