Skip to main content

roxlap_gpu/
lib.rs

1//! WGPU-backed compute-shader renderer scaffold for the roxlap
2//! voxel engine. GPU.1 in `PORTING-GPU.md`.
3//!
4//! GPU.1's job: stand up the device + surface + swapchain on a
5//! host window (any [`raw-window-handle`](raw_window_handle)
6//! provider), present a clear-to-colour frame each render call,
7//! and give the host a one-call opt-in. No voxel marching yet — the
8//! [`examples/probe.rs`](../examples/probe.rs) standalone holds
9//! the empirical FPS baseline from GPU.0.
10//!
11//! Later sub-substages flesh `GpuRenderer::render` out: GPU.2
12//! uploads voxel data, GPU.3 dispatches the inner-DDA compute
13//! shader, GPU.4 layers in chunk skipping, GPU.5 plugs the renderer
14//! into `roxlap-scene::Scene`, …
15//!
16//! ## Host integration shape (GPU.1)
17//!
18//! ```no_run
19//! use std::sync::Arc;
20//! use roxlap_gpu::{GpuRenderer, GpuRendererSettings};
21//! # use winit::window::Window;
22//! # fn pick(w: Arc<Window>, size: (u32, u32)) -> Option<GpuRenderer> {
23//! match GpuRenderer::new_blocking(w, size, GpuRendererSettings::default()) {
24//!     Ok(r) => Some(r),
25//!     Err(e) => {
26//!         eprintln!("GPU init failed: {e}; falling back to CPU");
27//!         None
28//!     }
29//! }
30//! # }
31//! ```
32
33#![allow(clippy::must_use_candidate, clippy::too_many_lines)]
34
35pub mod camera;
36pub mod decompress;
37pub mod grid;
38// Headless rendering is a native-only test/bench aid: it blocks on
39// `pollster` + `device.poll(Wait)`, neither of which exists on wasm.
40#[cfg(not(target_arch = "wasm32"))]
41pub mod headless;
42pub mod resident;
43pub mod scene;
44pub mod sprite_model;
45
46pub use camera::Camera;
47pub use decompress::{decompress_chunk, ChunkUpload, BEDROCK_RGB, CHUNK_Z};
48pub use grid::{bounding_box_of, GpuGridResident, GridUpload};
49#[cfg(not(target_arch = "wasm32"))]
50pub use headless::HeadlessGpu;
51pub use resident::GpuChunkResident;
52pub use scene::{
53    GpuSceneResident, GridRuntimeTransform, GridStaticMeta, RefreshOutcome, SceneUpload,
54    MAX_SCENE_GRIDS,
55};
56pub use sprite_model::{
57    build_sprite_model, SpriteInstance, SpriteInstanceTransform, SpriteModel, SpriteModelRegistry,
58    SpriteRegistryResident,
59};
60
61use std::sync::Arc;
62
63use bytemuck::{Pod, Zeroable};
64use raw_window_handle::{HasDisplayHandle, HasWindowHandle};
65
66/// Caller-controllable knobs for [`GpuRenderer::new`]. Defaults
67/// target "highest-performance GPU, prefer Mailbox/Immediate over
68/// vsync" — i.e. the same configuration the GPU.0 probe used to
69/// measure the FPS ceiling.
70#[derive(Debug, Clone, Copy)]
71pub struct GpuRendererSettings {
72    pub power_preference: PowerPreference,
73    /// Initial clear colour cycled by GPU.1's empty render path.
74    /// The voxel-rendering substages overwrite this entirely.
75    pub clear_colour: [f64; 3],
76    /// Prefer mailbox/immediate when offered; falls back to FIFO if
77    /// the surface only supports it (Wayland under Mesa often does).
78    pub uncapped_present: bool,
79}
80
81#[derive(Debug, Clone, Copy)]
82pub enum PowerPreference {
83    Low,
84    High,
85}
86
87impl Default for GpuRendererSettings {
88    fn default() -> Self {
89        Self {
90            power_preference: PowerPreference::High,
91            clear_colour: [0.06, 0.08, 0.12],
92            uncapped_present: true,
93        }
94    }
95}
96
97/// Errors `GpuRenderer::new` surfaces to the host. The host's
98/// expected flow is "try this, fall back to the CPU path on Err".
99#[derive(Debug)]
100pub enum GpuInitError {
101    CreateSurface(wgpu::CreateSurfaceError),
102    NoAdapter,
103    RequestDevice(wgpu::RequestDeviceError),
104}
105
106impl std::fmt::Display for GpuInitError {
107    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
108        match self {
109            Self::CreateSurface(e) => write!(f, "create_surface failed: {e}"),
110            Self::NoAdapter => write!(
111                f,
112                "no compatible adapter — does this system have a Vulkan/Metal/DX12 driver?"
113            ),
114            Self::RequestDevice(e) => write!(f, "request_device failed: {e}"),
115        }
116    }
117}
118
119impl std::error::Error for GpuInitError {
120    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
121        match self {
122            Self::CreateSurface(e) => Some(e),
123            Self::RequestDevice(e) => Some(e),
124            Self::NoAdapter => None,
125        }
126    }
127}
128
129impl From<wgpu::CreateSurfaceError> for GpuInitError {
130    fn from(value: wgpu::CreateSurfaceError) -> Self {
131        Self::CreateSurface(value)
132    }
133}
134
135impl From<wgpu::RequestDeviceError> for GpuInitError {
136    fn from(value: wgpu::RequestDeviceError) -> Self {
137        Self::RequestDevice(value)
138    }
139}
140
141/// WGPU-backed renderer. Owns the device, queue, and surface
142/// bound to the host's window. [`Self::render`] is the GPU.1
143/// clear-to-colour path; [`Self::render_chunk`] is GPU.3's
144/// single-chunk DDA marcher.
145///
146/// The window is consumed only at construction — `wgpu`'s
147/// `Surface<'static>` keeps its own `Arc` clone of the handle, so
148/// the renderer holds no window field of its own.
149/// A world-space line segment for [`GpuRenderer::draw_lines_deferred`].
150/// `color` is straight RGBA in `0..=1` (the alpha drives the over-blend);
151/// `width_px` is the screen-space thickness; `depth_test` occludes the
152/// segment behind nearer marched geometry.
153#[derive(Clone, Copy, Debug)]
154pub struct GpuLine {
155    pub a: [f32; 3],
156    pub b: [f32; 3],
157    pub color: [f32; 4],
158    pub width_px: f32,
159    pub depth_test: bool,
160}
161
162/// World camera basis for projecting [`GpuLine`] endpoints — the same
163/// pinhole the scene-DDA pass marches with (`right`/`down`/`forward`
164/// orthonormal, `pos` in world voxel units).
165#[derive(Clone, Copy, Debug)]
166pub struct GpuLineCamera {
167    pub pos: [f32; 3],
168    pub right: [f32; 3],
169    pub down: [f32; 3],
170    pub forward: [f32; 3],
171}
172
173/// Near plane (camera-forward distance) below which a [`GpuLine`] endpoint
174/// is clipped, so the pinhole divide stays finite.
175const LINE_NEAR_Z: f32 = 0.0625;
176/// Depth-test slack (euclidean world distance) so a line resting on the
177/// surface it traces doesn't z-fight the marched geometry.
178const LINE_DEPTH_BIAS: f32 = 0.5;
179
180/// One expanded-quad vertex (`build_line_vertices` output). `pos` is NDC;
181/// `depth` is the euclidean world distance of the source endpoint (the
182/// marcher's `best_t` metric); `depth_test` is `1.0`/`0.0`.
183#[repr(C)]
184#[derive(Clone, Copy, Pod, Zeroable)]
185struct LineVertex {
186    pos: [f32; 2],
187    depth: f32,
188    depth_test: f32,
189    color: [f32; 4],
190}
191
192/// `line.wgsl` fragment uniform (std140; 16 bytes).
193#[repr(C)]
194#[derive(Clone, Copy, Pod, Zeroable)]
195struct LineParams {
196    screen_w: u32,
197    screen_h: u32,
198    depth_bias: f32,
199    no_depth: u32,
200}
201
202/// Lazy-built debug-line pipeline (L3.2). The bind group is rebuilt each
203/// draw (it references the current `scene_dda.depth_buffer`, which the
204/// swapchain resize recreates); the pipeline / layout / uniform persist.
205struct LineResources {
206    pipeline: wgpu::RenderPipeline,
207    bgl: wgpu::BindGroupLayout,
208    uniform_buf: wgpu::Buffer,
209    /// 1-word stand-in bound when no scene depth exists (sprite-only /
210    /// empty scene); `no_depth = 1` keeps the shader from indexing it.
211    dummy_depth: wgpu::Buffer,
212}
213
214/// Project + expand world-space [`GpuLine`]s into screen-space quad
215/// vertices (6 per visible segment) for `line.wgsl`. Mirrors the
216/// scene-DDA pinhole (`forward + ndc_x·half_w·right − ndc_y·half_h·down`)
217/// so lines land on the marched geometry, carrying each endpoint's
218/// euclidean world distance as the depth-test key (= the marcher's
219/// `best_t`). Segments fully behind the near plane are dropped; the rest
220/// are clipped to it.
221fn build_line_vertices(
222    cam: &GpuLineCamera,
223    lines: &[GpuLine],
224    w: u32,
225    h: u32,
226    fov_y: f32,
227) -> Vec<LineVertex> {
228    let aspect = w as f32 / h as f32;
229    let half_h = (fov_y * 0.5).tan();
230    let half_w = half_h * aspect;
231    let (wf, hf) = (w as f32, h as f32);
232
233    let cam_coords = |p: [f32; 3]| -> [f32; 3] {
234        let d = [p[0] - cam.pos[0], p[1] - cam.pos[1], p[2] - cam.pos[2]];
235        [
236            cam.right[0] * d[0] + cam.right[1] * d[1] + cam.right[2] * d[2],
237            cam.down[0] * d[0] + cam.down[1] * d[1] + cam.down[2] * d[2],
238            cam.forward[0] * d[0] + cam.forward[1] * d[1] + cam.forward[2] * d[2],
239        ]
240    };
241    // Camera-space point → (NDC xy, euclidean depth). NDC y is up (+1 top),
242    // matching WebGPU clip space; depth is the marcher's world-t metric.
243    let project = |q: [f32; 3]| -> ([f32; 2], f32) {
244        let inv = 1.0 / q[2];
245        let nx = q[0] * inv / half_w;
246        let ny = -q[1] * inv / half_h;
247        let depth = (q[0] * q[0] + q[1] * q[1] + q[2] * q[2]).sqrt();
248        ([nx, ny], depth)
249    };
250
251    let mut out = Vec::with_capacity(lines.len() * 6);
252    for line in lines {
253        let ca = cam_coords(line.a);
254        let cb = cam_coords(line.b);
255        let (cfa, cfb) = (ca[2], cb[2]);
256        if cfa < LINE_NEAR_Z && cfb < LINE_NEAR_Z {
257            continue;
258        }
259        // Near-clip in segment-parameter space on the forward component.
260        let (mut t0, mut t1) = (0.0f32, 1.0f32);
261        let dz = cfb - cfa;
262        if dz.abs() > f32::EPSILON {
263            let tn = (LINE_NEAR_Z - cfa) / dz;
264            if dz > 0.0 {
265                t0 = t0.max(tn);
266            } else {
267                t1 = t1.min(tn);
268            }
269        }
270        if t0 > t1 {
271            continue;
272        }
273        let lerp3 = |t: f32| {
274            [
275                ca[0] + (cb[0] - ca[0]) * t,
276                ca[1] + (cb[1] - ca[1]) * t,
277                ca[2] + (cb[2] - ca[2]) * t,
278            ]
279        };
280        let (n0, d0) = project(lerp3(t0));
281        let (n1, d1) = project(lerp3(t1));
282
283        // Expand in pixel space for a uniform screen-space thickness.
284        let to_px = |n: [f32; 2]| [(n[0] * 0.5 + 0.5) * wf, (0.5 - n[1] * 0.5) * hf];
285        let to_ndc = |p: [f32; 2]| [p[0] / wf * 2.0 - 1.0, 1.0 - p[1] / hf * 2.0];
286        let p0 = to_px(n0);
287        let p1 = to_px(n1);
288        let (dx, dy) = (p1[0] - p0[0], p1[1] - p0[1]);
289        let len = (dx * dx + dy * dy).sqrt().max(1e-6);
290        let half = line.width_px.max(1.0) * 0.5;
291        let (ex, ey) = (-dy / len * half, dx / len * half);
292
293        let c0a = to_ndc([p0[0] + ex, p0[1] + ey]);
294        let c0b = to_ndc([p0[0] - ex, p0[1] - ey]);
295        let c1a = to_ndc([p1[0] + ex, p1[1] + ey]);
296        let c1b = to_ndc([p1[0] - ex, p1[1] - ey]);
297        let dt = if line.depth_test { 1.0 } else { 0.0 };
298        let vert = |pos: [f32; 2], depth: f32| LineVertex {
299            pos,
300            depth,
301            depth_test: dt,
302            color: line.color,
303        };
304        // Two triangles, cull disabled so winding is irrelevant.
305        out.push(vert(c0a, d0));
306        out.push(vert(c0b, d0));
307        out.push(vert(c1a, d1));
308        out.push(vert(c1a, d1));
309        out.push(vert(c0b, d0));
310        out.push(vert(c1b, d1));
311    }
312    out
313}
314
315pub struct GpuRenderer {
316    surface: wgpu::Surface<'static>,
317    surface_config: wgpu::SurfaceConfiguration,
318    device: wgpu::Device,
319    queue: wgpu::Queue,
320    adapter_info: String,
321    clear_colour: [f64; 3],
322    frame_count: u32,
323    /// Lazy-built on first [`Self::render_chunk`] call; rebuilt when
324    /// the swapchain resizes (storage texture must match).
325    chunk_dda: Option<ChunkDdaResources>,
326    /// Lazy-built on first [`Self::render_grid`] call; same resize
327    /// trigger as `chunk_dda`. The two paths share the same blit
328    /// pipeline structure but bind different storage layouts.
329    grid_dda: Option<GridDdaResources>,
330    /// Lazy-built on first [`Self::render_scene`] call. Holds the
331    /// multi-grid pipeline + per-grid camera uniforms.
332    scene_dda: Option<SceneDdaResources>,
333    /// GPU.8 — panoramic sky texture + sampler. Created at
334    /// `new` as a 1×1 mid-grey default; [`Self::set_sky_panorama`]
335    /// replaces it. The scene-DDA bind group references this each
336    /// frame.
337    sky_texture: wgpu::Texture,
338    sky_view: wgpu::TextureView,
339    sky_sampler: wgpu::Sampler,
340    /// GPU.8 fog state. `color` is BGRA-style premultiplied (each
341    /// channel in [0, 1]); `near` is the world-t distance at which
342    /// fog starts kicking in; `far` is the distance at which it's
343    /// fully opaque. The shader does
344    /// `mix(hit, fog, smoothstep(near, far, t))`.
345    fog_color: [f32; 3],
346    fog_near: f32,
347    fog_far: f32,
348    /// GPU.10 — sprites rendered as DDA-marched voxel models (the
349    /// precise path; the GPU.9 compute splatter it replaced was
350    /// retired in 10.5). Holds the concatenated model registry + the
351    /// per-frame instance array; set via [`Self::set_sprite_instances`].
352    sprite_registry: Option<sprite_model::SpriteRegistryResident>,
353    /// Lazy-built pipeline + uniform for the model-DDA pass.
354    sprite_model_dda: Option<SpriteModelDdaResources>,
355    /// GPU.10.4 — LOD aggressiveness: step a sprite to the next mip
356    /// once a mip-0 voxel projects below this many screen pixels.
357    /// Defaults to 4.0 (the empirical sweet spot); the host can tune
358    /// via [`Self::set_sprite_lod_px`].
359    sprite_lod_px: f32,
360    /// GPU.11.1 — scene-grid LOD scan distance (world units). A chunk
361    /// entered at world-t `t` is marched at the mip level
362    /// `floor(log2(max(t, msd) / msd))`, clamped to the grid's mip
363    /// ladder. `0` disables LOD (always mip-0). Tunable via
364    /// [`Self::set_scene_mip_scan_dist`] — the axis-aligned-mip-beams
365    /// mitigation (GPU.11.2) pushes it outward if banding appears.
366    scene_mip_scan_dist: f32,
367    /// Per-face grid side-shades (voxlap setsideshades), packed for the
368    /// scene-DDA uniform: `[0]=(top,bot,left,right)`, `[1]=(up,down,_,_)`.
369    /// Each is the u8 shade intensity. `[[0;4];2]` = no shading. Set via
370    /// [`Self::set_scene_side_shades`].
371    scene_side_shades: [[i32; 4]; 2],
372    /// Vertical FOV (radians) the last `render_scene` marched with —
373    /// cached so [`Self::pixel_ray`] reconstructs the matching view ray
374    /// for picking. `0` until the first scene render.
375    last_fov_y_rad: f32,
376    /// The acquired-but-not-yet-presented swapchain frame from the most
377    /// recent deferred render ([`Self::render_scene`] /
378    /// [`Self::render_clear_deferred`]). [`Self::present`] shows it as
379    /// is; [`Self::paint_egui`] overlays egui first. Lets a host slot a
380    /// UI pass between the marcher and present. `None` between present
381    /// and the next render.
382    pending_frame: Option<(wgpu::SurfaceTexture, wgpu::TextureView)>,
383    /// Lazy-built debug-line pipeline (L3.2) — built on the first
384    /// [`Self::draw_lines_deferred`] call.
385    line_resources: Option<LineResources>,
386    /// Persistent debug-line vertex buffer (L3.3) — grown on demand and
387    /// reused across frames so a per-frame overlay (hundreds of segments)
388    /// costs one `write_buffer`, not a fresh allocation. `line_vbuf_cap`
389    /// is its capacity in bytes.
390    line_vbuf: Option<wgpu::Buffer>,
391    line_vbuf_cap: u64,
392    /// Lazy-built `egui-wgpu` paint pipeline; created on the first
393    /// [`Self::paint_egui`] call (`hud` feature).
394    #[cfg(feature = "hud")]
395    egui_renderer: Option<egui_wgpu::Renderer>,
396}
397
398/// Per-renderer chunk-DDA pipeline state. The compute shader writes
399/// into the storage texture; a fullscreen-triangle render pass
400/// nearest-neighbour blits it to the swapchain.
401struct ChunkDdaResources {
402    storage_size: (u32, u32),
403    storage_view: wgpu::TextureView,
404    uniform_buf: wgpu::Buffer,
405    bgl_dda: wgpu::BindGroupLayout,
406    pipeline_dda: wgpu::ComputePipeline,
407    blit_bg: wgpu::BindGroup,
408    pipeline_blit: wgpu::RenderPipeline,
409    // wgpu BindGroups internally Arc their resources, but we keep
410    // the handle so the sampler shows up in profiler dumps.
411    _sampler: wgpu::Sampler,
412}
413
414struct GridDdaResources {
415    storage_size: (u32, u32),
416    storage_view: wgpu::TextureView,
417    uniform_buf: wgpu::Buffer,
418    bgl_dda: wgpu::BindGroupLayout,
419    pipeline_dda: wgpu::ComputePipeline,
420    blit_bg: wgpu::BindGroup,
421    pipeline_blit: wgpu::RenderPipeline,
422    _sampler: wgpu::Sampler,
423}
424
425struct SceneDdaResources {
426    storage_size: (u32, u32),
427    /// Framebuffer as a packed-`rgba8unorm` storage **buffer** (row
428    /// stride = width), written by the scene + sprite compute passes
429    /// and read by the blit. A buffer (not a storage texture) dodges
430    /// Chrome-Dawn's tiled write-texture layout (which produced a
431    /// 128×256-tiled image); linear + explicit stride is portable.
432    framebuffer: wgpu::Buffer,
433    uniform_buf: wgpu::Buffer,
434    bgl_dda: wgpu::BindGroupLayout,
435    pipeline_dda: wgpu::ComputePipeline,
436    blit_bg: wgpu::BindGroup,
437    pipeline_blit: wgpu::RenderPipeline,
438    /// GPU.9 — per-pixel world-t depth (f32 bits as u32), sized
439    /// `width * height * 4`. The scene pass writes it when sprites
440    /// are present; the sprite model-DDA pass reads + composites
441    /// against it.
442    depth_buffer: wgpu::Buffer,
443    /// Picking — a `COPY_DST | MAP_READ` staging copy of `depth_buffer`
444    /// so the host can read back the per-pixel world-t after a frame
445    /// (e.g. click → which voxel). Same size as `depth_buffer`.
446    depth_readback: wgpu::Buffer,
447}
448
449/// GPU.10.0 — single-sprite model-DDA pipeline: one thread per pixel
450/// marches the model voxel volume and composites against the scene
451/// depth buffer.
452struct SpriteModelDdaResources {
453    bgl: wgpu::BindGroupLayout,
454    pipeline: wgpu::ComputePipeline,
455    uniform_buf: wgpu::Buffer,
456}
457
458/// Per-frame uniform for the model-DDA pass. Mirrors `Uniform` in
459/// `sprite_model_dda.wgsl` (std140). Per-model + per-instance data
460/// now live in storage buffers; this holds only the camera, fog, and
461/// instance count.
462#[repr(C)]
463#[derive(Clone, Copy, Pod, Zeroable)]
464struct SpriteModelUniform {
465    cam_pos: [f32; 3],
466    _p0: f32,
467    cam_right: [f32; 3],
468    _p1: f32,
469    cam_down: [f32; 3],
470    _p2: f32,
471    cam_forward: [f32; 3],
472    _p3: f32,
473    fog_color: [f32; 4],
474    screen_size: [u32; 2],
475    instance_count: u32,
476    fog_far: f32,
477    fov_y_rad: f32,
478    tiles_x: u32,
479    tile_size: u32,
480    _p6: f32,
481}
482
483const SCENE_MAX_GRIDS: usize = MAX_SCENE_GRIDS as usize;
484
485/// GPU.10.3 — sprite screen-tile edge in pixels for instance binning.
486const SPRITE_TILE_SIZE: u32 = 16;
487
488// The scene_dda bind group + layout wire occupancy pages 1..=3 at
489// bindings 12..=14 explicitly; keep that in lockstep with the page
490// count. Bump the bindings (here, in the WGSL, and in the bind
491// group) if MAX_OCC_PAGES changes.
492const _: () = assert!(scene::MAX_OCC_PAGES == 4);
493
494#[repr(C)]
495#[derive(Clone, Copy, Pod, Zeroable)]
496struct SceneDdaPerGridCamera {
497    pos: [f32; 3],
498    _pad0: f32,
499    right: [f32; 3],
500    _pad1: f32,
501    down: [f32; 3],
502    _pad2: f32,
503    forward: [f32; 3],
504    _pad3: f32,
505}
506
507impl SceneDdaPerGridCamera {
508    fn from_camera(c: &Camera) -> Self {
509        Self {
510            pos: c.position,
511            _pad0: 0.0,
512            right: c.right,
513            _pad1: 0.0,
514            down: c.down,
515            _pad2: 0.0,
516            forward: c.forward,
517            _pad3: 0.0,
518        }
519    }
520}
521
522#[repr(C)]
523#[derive(Clone, Copy, Pod, Zeroable)]
524struct SceneDdaUniform {
525    fov_y_rad: f32,
526    grid_count: u32,
527    max_outer_steps: u32,
528    _pad0: u32,
529    screen_size: [u32; 2],
530    _pad1: [u32; 2],
531    cameras: [SceneDdaPerGridCamera; SCENE_MAX_GRIDS],
532    /// GPU.8 — `[r, g, b, fog_near]`. The `near` distance is packed
533    /// into the colour's alpha channel to keep std140 alignment
534    /// tidy (a bare `f32` after the `vec4` would force extra pads).
535    fog_color: [f32; 4],
536    fog_far: f32,
537    /// GPU.9 — `1` when the sprite pass is active (scene pass then
538    /// records `best_t` into the depth buffer), `0` otherwise.
539    write_depth: u32,
540    /// Occupancy paging: words per storage page (see
541    /// `scene::split_occupancy_pages`). Only consulted by the shader
542    /// when `occ_num_pages > 1`.
543    occ_page_words: u32,
544    /// Number of real occupancy pages (1 on multi-GiB GPUs → the
545    /// shader takes a branch-free single-page read).
546    occ_num_pages: u32,
547    /// GPU.11.1 — scene-grid LOD scan distance (world units). A chunk
548    /// entered at world-t `t` marches at mip
549    /// `floor(log2(max(t, msd) / msd))`, clamped to the grid's mip
550    /// count. `0` disables LOD (always mip-0).
551    mip_scan_dist: f32,
552    _pad2: u32,
553    _pad3: u32,
554    _pad4: u32,
555    /// World camera used only to derive the per-pixel sky direction —
556    /// always valid, so a `grid_count == 0` (sprite-only / empty) scene
557    /// still paints a proper sky instead of a degenerate `(0,0,1)`
558    /// (whose `atan2(0,0)` sky lookup samples black).
559    sky_cam: SceneDdaPerGridCamera,
560    /// Per-face side-shade intensities (voxlap setsideshades), each the
561    /// u8 shade subtracted from a voxel's brightness byte at a hit.
562    /// `side_shades0 = (top, bot, left, right)`,
563    /// `side_shades1 = (up, down, _, _)`. All-zero = no shading.
564    side_shades0: [i32; 4],
565    side_shades1: [i32; 4],
566}
567
568#[repr(C)]
569#[derive(Clone, Copy, Pod, Zeroable)]
570struct GridDdaUniform {
571    camera_pos: [f32; 3],
572    _pad0: f32,
573    camera_right: [f32; 3],
574    _pad1: f32,
575    camera_down: [f32; 3],
576    _pad2: f32,
577    camera_forward: [f32; 3],
578    fov_y_rad: f32,
579    screen_size: [u32; 2],
580    vsid: u32,
581    max_outer_steps: u32,
582    chunks_dims: [u32; 3],
583    _pad3: u32,
584    origin_chunk: [i32; 3],
585    _pad4: u32,
586}
587
588#[repr(C)]
589#[derive(Clone, Copy, Pod, Zeroable)]
590struct ChunkDdaUniform {
591    camera_pos: [f32; 3],
592    _pad0: f32,
593    camera_right: [f32; 3],
594    _pad1: f32,
595    camera_down: [f32; 3],
596    _pad2: f32,
597    camera_forward: [f32; 3],
598    fov_y_rad: f32,
599    screen_size: [u32; 2],
600    vsid: u32,
601    max_scan_dist: u32,
602}
603
604impl GpuRenderer {
605    /// Stand up the device + surface + swapchain on `window`. Async
606    /// because `wgpu::Adapter`/`Device` requests are.
607    ///
608    /// `window` is any [`raw-window-handle`] provider (winit, SDL,
609    /// GLFW, …) wrapped in an `Arc`; `size` is its initial physical
610    /// framebuffer size in pixels — passed explicitly so the renderer
611    /// stays decoupled from any one windowing library's size API.
612    ///
613    /// [`raw-window-handle`]: raw_window_handle
614    ///
615    /// # Errors
616    /// Returns [`GpuInitError`] if surface creation, adapter
617    /// selection, or device request fails. Hosts treat any error as
618    /// "fall back to the CPU path".
619    pub async fn new<W>(
620        window: Arc<W>,
621        size: (u32, u32),
622        settings: GpuRendererSettings,
623    ) -> Result<Self, GpuInitError>
624    where
625        W: HasWindowHandle + HasDisplayHandle + Send + Sync + 'static,
626    {
627        let instance = wgpu::Instance::new(wgpu::InstanceDescriptor::new_without_display_handle());
628        let surface = instance.create_surface(window.clone())?;
629        let adapter = Self::request_adapter(&instance, Some(&surface), settings).await?;
630        let (device, queue) = Self::request_device(&adapter).await?;
631        Ok(Self::finish_init(
632            &adapter, device, queue, surface, size, settings,
633        ))
634    }
635
636    /// wasm/WebGPU: build the renderer against an HTML `canvas`. No
637    /// `Send + Sync` bound — wgpu's surface/device/queue are `!Send` on
638    /// the `+atomics` shared-memory wasm build, and the browser host is
639    /// single-threaded (`Rc<RefCell<…>>`). The native generic-`W` entry
640    /// (which carries the bound) isn't reachable on wasm.
641    ///
642    /// Probes for an adapter **before** `create_surface`: on wasm,
643    /// creating the surface calls `canvas.getContext("webgpu")`, which
644    /// permanently locks the canvas's context type. If we bound it and
645    /// then found no adapter, a CPU/WebGL2 fallback on the *same* canvas
646    /// (the facade clones the handle, but it's the same DOM element)
647    /// would fail with "no webgl2 context". Probing first leaves the
648    /// canvas pristine when WebGPU is unavailable.
649    ///
650    /// # Errors
651    /// See [`Self::new`].
652    #[cfg(target_arch = "wasm32")]
653    pub async fn new_from_canvas(
654        canvas: web_sys::HtmlCanvasElement,
655        size: (u32, u32),
656        settings: GpuRendererSettings,
657    ) -> Result<Self, GpuInitError> {
658        let instance = wgpu::Instance::new(wgpu::InstanceDescriptor::new_without_display_handle());
659        // Probe adapter AND device before binding the canvas — both
660        // `requestAdapter` and `requestDevice` can fail on wasm, and
661        // `create_surface` permanently locks the canvas to a WebGPU
662        // context. Creating the surface last keeps the canvas pristine
663        // for the CPU/WebGL2 fallback on any GPU-init failure.
664        let adapter = Self::request_adapter(&instance, None, settings).await?;
665        let (device, queue) = Self::request_device(&adapter).await?;
666        let surface = instance.create_surface(wgpu::SurfaceTarget::Canvas(canvas))?;
667        Ok(Self::finish_init(
668            &adapter, device, queue, surface, size, settings,
669        ))
670    }
671
672    /// Pick a GPU adapter at the settings' power preference. `None`
673    /// `compatible_surface` is used on the wasm canvas path so the probe
674    /// doesn't bind the canvas's context (see [`Self::new_from_canvas`]);
675    /// WebGPU exposes a single surface-independent adapter, so this is
676    /// safe there.
677    async fn request_adapter(
678        instance: &wgpu::Instance,
679        compatible_surface: Option<&wgpu::Surface<'static>>,
680        settings: GpuRendererSettings,
681    ) -> Result<wgpu::Adapter, GpuInitError> {
682        let power_preference = match settings.power_preference {
683            PowerPreference::Low => wgpu::PowerPreference::LowPower,
684            PowerPreference::High => wgpu::PowerPreference::HighPerformance,
685        };
686        instance
687            .request_adapter(&wgpu::RequestAdapterOptions {
688                power_preference,
689                compatible_surface,
690                force_fallback_adapter: false,
691            })
692            .await
693            .map_err(|_| GpuInitError::NoAdapter)
694    }
695
696    /// Request the device + queue from `adapter`. Pulled out of
697    /// [`Self::finish_init`] so the wasm canvas path can validate the
698    /// device **before** `create_surface` binds the canvas's WebGPU
699    /// context — if the device request fails (e.g. a browser that
700    /// rejects a wgpu-sent limit), the canvas stays pristine for the
701    /// CPU/WebGL2 fallback instead of being poisoned.
702    async fn request_device(
703        adapter: &wgpu::Adapter,
704    ) -> Result<(wgpu::Device, wgpu::Queue), GpuInitError> {
705        Ok(adapter
706            .request_device(&wgpu::DeviceDescriptor {
707                label: Some("roxlap-gpu device"),
708                required_features: wgpu::Features::empty(),
709                required_limits: pick_required_limits(&adapter.limits()),
710                experimental_features: wgpu::ExperimentalFeatures::disabled(),
711                memory_hints: wgpu::MemoryHints::default(),
712                trace: wgpu::Trace::Off,
713            })
714            .await?)
715    }
716
717    /// Shared swapchain → sky/sampler setup, run after the adapter +
718    /// device + surface exist (the surface comes from a window handle on
719    /// native, or an HTML canvas on wasm — created last on wasm so a
720    /// failed device request never touches the canvas).
721    fn finish_init(
722        adapter: &wgpu::Adapter,
723        device: wgpu::Device,
724        queue: wgpu::Queue,
725        surface: wgpu::Surface<'static>,
726        size: (u32, u32),
727        settings: GpuRendererSettings,
728    ) -> Self {
729        let info = adapter.get_info();
730        let adapter_info = format!(
731            "{name} ({backend:?}, {device_type:?})",
732            name = info.name,
733            backend = info.backend,
734            device_type = info.device_type,
735        );
736
737        let caps = surface.get_capabilities(adapter);
738        // Pick a NON-sRGB, 8-bit swapchain format. Voxlap colours are
739        // already sRGB-encoded (the slab bytes are display-ready,
740        // matching what the CPU softbuffer path writes straight to the
741        // framebuffer with no conversion); an sRGB swapchain would
742        // re-apply the gamma curve, washing the look out. We also
743        // *prefer 8-bit BGRA/RGBA* over any other non-sRGB format: some
744        // adapters (e.g. NVK) advertise a 16-bit-unorm format first,
745        // and wgpu 29 gates `create_view` on 16-bit-norm formats behind
746        // the `TEXTURE_FORMAT_16BIT_NORM` device feature (which we don't
747        // enable, to stay WebGPU-portable). Falls back to the first
748        // non-sRGB format, then `caps.formats[0]`.
749        let surface_format = caps
750            .formats
751            .iter()
752            .copied()
753            .find(|f| {
754                matches!(
755                    f,
756                    wgpu::TextureFormat::Bgra8Unorm | wgpu::TextureFormat::Rgba8Unorm
757                )
758            })
759            .or_else(|| caps.formats.iter().copied().find(|f| !f.is_srgb()))
760            .unwrap_or(caps.formats[0]);
761        let present_mode = if settings.uncapped_present {
762            pick_present_mode(&caps.present_modes)
763        } else {
764            wgpu::PresentMode::Fifo
765        };
766        // GPU.11.2 — surface the present mode: `Fifo` is vsync-capped
767        // (FPS pinned to refresh rate → compute optimisations like the
768        // mip LOD won't show up in the FPS counter). Mailbox/Immediate
769        // are uncapped. Wayland under Mesa frequently offers only Fifo.
770        eprintln!(
771            "roxlap-gpu: present mode = {present_mode:?} (available: {:?})",
772            caps.present_modes,
773        );
774        let (init_w, init_h) = size;
775        let surface_config = wgpu::SurfaceConfiguration {
776            usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
777            format: surface_format,
778            width: init_w.max(1),
779            height: init_h.max(1),
780            present_mode,
781            alpha_mode: caps.alpha_modes[0],
782            view_formats: vec![],
783            desired_maximum_frame_latency: 2,
784        };
785        surface.configure(&device, &surface_config);
786
787        // GPU.8 default sky: a 1×1 mid-grey texture. Hosts replace
788        // it via `set_sky_panorama` with a real equirectangular
789        // panorama; the default stops the shader sampling
790        // uninitialised memory before that happens.
791        let default_sky_pixel = [0x80u8, 0x80, 0x80, 0xff];
792        let (sky_texture, sky_view) = create_sky_texture(&device, 1, 1, &default_sky_pixel);
793        queue.write_texture(
794            wgpu::TexelCopyTextureInfo {
795                texture: &sky_texture,
796                mip_level: 0,
797                origin: wgpu::Origin3d::ZERO,
798                aspect: wgpu::TextureAspect::All,
799            },
800            &default_sky_pixel,
801            wgpu::TexelCopyBufferLayout {
802                offset: 0,
803                bytes_per_row: Some(4),
804                rows_per_image: Some(1),
805            },
806            wgpu::Extent3d {
807                width: 1,
808                height: 1,
809                depth_or_array_layers: 1,
810            },
811        );
812        let sky_sampler = device.create_sampler(&wgpu::SamplerDescriptor {
813            label: Some("roxlap-gpu sky_sampler"),
814            // Voxlap-convention panorama: u = elevation [0, 1]
815            // (Repeat is a no-op since values don't go outside),
816            // v = azimuth (wraps 360° — Repeat is required).
817            address_mode_u: wgpu::AddressMode::Repeat,
818            address_mode_v: wgpu::AddressMode::Repeat,
819            address_mode_w: wgpu::AddressMode::ClampToEdge,
820            mag_filter: wgpu::FilterMode::Linear,
821            min_filter: wgpu::FilterMode::Linear,
822            mipmap_filter: wgpu::MipmapFilterMode::Nearest,
823            ..Default::default()
824        });
825
826        Self {
827            surface,
828            surface_config,
829            device,
830            queue,
831            adapter_info,
832            clear_colour: settings.clear_colour,
833            frame_count: 0,
834            chunk_dda: None,
835            grid_dda: None,
836            scene_dda: None,
837            sky_texture,
838            sky_view,
839            sky_sampler,
840            // Fog disabled by default — voxlap's CPU rasterizer
841            // also runs without fog in the scene-demo, so matching
842            // it means no GPU fog out of the box. Hosts can opt in
843            // via `set_fog` (e.g. for atmospheric far-LOD masking).
844            fog_color: [0.66, 0.74, 0.88],
845            fog_near: 0.0,
846            fog_far: 1.0e30,
847            sprite_registry: None,
848            sprite_model_dda: None,
849            // GPU.10.4 — default LOD threshold: step to a coarser mip
850            // once a voxel projects below 4 px. Empirically the best
851            // quality/cost tradeoff; the host can override.
852            sprite_lod_px: 4.0,
853            // GPU.11.1 — matches the CPU demo's mip_scan_dist=64.
854            scene_mip_scan_dist: 64.0,
855            scene_side_shades: [[0; 4]; 2],
856            last_fov_y_rad: 0.0,
857            pending_frame: None,
858            line_resources: None,
859            line_vbuf: None,
860            line_vbuf_cap: 0,
861            #[cfg(feature = "hud")]
862            egui_renderer: None,
863        }
864    }
865
866    /// Synchronous wrapper for hosts that don't have an async
867    /// runtime. Internally `pollster::block_on`s [`Self::new`].
868    ///
869    /// # Errors
870    /// See [`Self::new`].
871    #[cfg(not(target_arch = "wasm32"))]
872    pub fn new_blocking<W>(
873        window: Arc<W>,
874        size: (u32, u32),
875        settings: GpuRendererSettings,
876    ) -> Result<Self, GpuInitError>
877    where
878        W: HasWindowHandle + HasDisplayHandle + Send + Sync + 'static,
879    {
880        pollster::block_on(Self::new(window, size, settings))
881    }
882
883    /// Human-readable adapter description — name + backend +
884    /// device type. The demo host prints this in the title bar.
885    pub fn adapter_info(&self) -> &str {
886        &self.adapter_info
887    }
888
889    /// Borrow the underlying wgpu device — hosts use this to build
890    /// chunk uploads (`GpuChunkResident::upload(gpu.device(), …)`).
891    pub fn device(&self) -> &wgpu::Device {
892        &self.device
893    }
894
895    /// Borrow the wgpu queue — hosts use this for read-back paths
896    /// (`GpuChunkResident::read_voxel_blocking(gpu.device(), gpu.queue(), …)`).
897    pub fn queue(&self) -> &wgpu::Queue {
898        &self.queue
899    }
900
901    /// GPU.8 — upload an equirectangular panorama as the scene's
902    /// sky texture. `rgba` is row-major, `width × height` pixels,
903    /// 4 bytes per pixel (R, G, B, A). The shader samples it with
904    /// `u = atan2(dir.x, dir.y) / (2π) + 0.5` (azimuth) and
905    /// `v = acos(-dir.z) / π` (elevation), matching standard
906    /// equirectangular layout (top of image = zenith for voxlap's
907    /// `+z = down` basis).
908    ///
909    /// # Panics
910    /// If `rgba.len() != (width * height * 4) as usize`.
911    pub fn set_sky_panorama(&mut self, rgba: &[u8], width: u32, height: u32) {
912        assert_eq!(
913            rgba.len(),
914            (width as usize) * (height as usize) * 4,
915            "set_sky_panorama: expected w*h*4 bytes, got {}",
916            rgba.len(),
917        );
918        let (tex, view) = create_sky_texture(&self.device, width, height, rgba);
919        // Upload pixel data via `queue.write_texture` so we don't
920        // have to map the buffer manually.
921        self.queue.write_texture(
922            wgpu::TexelCopyTextureInfo {
923                texture: &tex,
924                mip_level: 0,
925                origin: wgpu::Origin3d::ZERO,
926                aspect: wgpu::TextureAspect::All,
927            },
928            rgba,
929            wgpu::TexelCopyBufferLayout {
930                offset: 0,
931                bytes_per_row: Some(width * 4),
932                rows_per_image: Some(height),
933            },
934            wgpu::Extent3d {
935                width,
936                height,
937                depth_or_array_layers: 1,
938            },
939        );
940        self.sky_texture = tex;
941        self.sky_view = view;
942    }
943
944    /// GPU.8 — set the fog blend. `color` is per-channel [0, 1];
945    /// `near`/`far` are world-space ray distances in voxel units.
946    /// Hits with `t < near` show their full colour; hits with
947    /// `t > far` show `color` exclusively; in between is a
948    /// smoothstep blend.
949    pub fn set_fog(&mut self, color: [f32; 3], near: f32, far: f32) {
950        self.fog_color = color;
951        self.fog_near = near;
952        self.fog_far = far.max(near + 1.0);
953    }
954
955    /// Re-configure the swapchain to a new physical size. Call from
956    /// `WindowEvent::Resized`. Drops the chunk-DDA storage texture
957    /// so [`Self::render_chunk`] rebuilds it at the new size.
958    pub fn resize(&mut self, width: u32, height: u32) {
959        if width == 0 || height == 0 {
960            return;
961        }
962        self.surface_config.width = width;
963        self.surface_config.height = height;
964        self.surface.configure(&self.device, &self.surface_config);
965        self.chunk_dda = None;
966        self.grid_dda = None;
967        self.scene_dda = None;
968    }
969
970    /// Acquire the next swapchain frame, or `None` to skip this frame.
971    /// wgpu 29's `get_current_texture` returns a
972    /// [`wgpu::CurrentSurfaceTexture`] status enum (was
973    /// `Result<_, SurfaceError>`): an outdated/lost surface reconfigures
974    /// and skips, transient statuses just skip.
975    fn acquire_frame(&self) -> Option<wgpu::SurfaceTexture> {
976        use wgpu::CurrentSurfaceTexture as C;
977        match self.surface.get_current_texture() {
978            C::Success(t) | C::Suboptimal(t) => Some(t),
979            C::Outdated | C::Lost => {
980                self.surface.configure(&self.device, &self.surface_config);
981                None
982            }
983            C::Timeout | C::Occluded | C::Validation => None,
984        }
985    }
986
987    /// GPU.1 render: single render pass clearing the swapchain to a
988    /// slowly drifting colour, then presenting. Voxels arrive in
989    /// GPU.3+.
990    pub fn render(&mut self) {
991        let Some(surf_tex) = self.acquire_frame() else {
992            return;
993        };
994        let view = surf_tex
995            .texture
996            .create_view(&wgpu::TextureViewDescriptor::default());
997
998        // Slow colour drift so the user can tell the GPU path is
999        // actually presenting frames vs. e.g. a frozen window.
1000        // Wrap at 2π/0.005 frames (~1257) so the cast stays exact.
1001        let phase = f64::from(self.frame_count % 1257) * 0.005;
1002        let [r, g, b] = self.clear_colour;
1003        let drift = (phase.sin() * 0.04 + 0.04).clamp(0.0, 0.1);
1004        let clear = wgpu::Color {
1005            r: (r + drift).clamp(0.0, 1.0),
1006            g: (g + drift * 0.5).clamp(0.0, 1.0),
1007            b: (b + drift * 0.25).clamp(0.0, 1.0),
1008            a: 1.0,
1009        };
1010
1011        let mut encoder = self
1012            .device
1013            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
1014                label: Some("roxlap-gpu encoder"),
1015            });
1016        {
1017            let _rp = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
1018                label: Some("roxlap-gpu clear"),
1019                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
1020                    view: &view,
1021                    depth_slice: None,
1022                    resolve_target: None,
1023                    ops: wgpu::Operations {
1024                        load: wgpu::LoadOp::Clear(clear),
1025                        store: wgpu::StoreOp::Store,
1026                    },
1027                })],
1028                depth_stencil_attachment: None,
1029                timestamp_writes: None,
1030                occlusion_query_set: None,
1031                multiview_mask: None,
1032            });
1033        }
1034        self.queue.submit(std::iter::once(encoder.finish()));
1035        surf_tex.present();
1036        self.frame_count = self.frame_count.wrapping_add(1);
1037    }
1038
1039    /// GPU.3 single-chunk render. Dispatches `chunk_dda.wgsl`
1040    /// against `resident`'s storage buffers, then blits the
1041    /// low-res storage texture to the swapchain. `camera.position`
1042    /// is in **chunk-local** voxel units (host translates from
1043    /// world coords). `max_scan_dist` caps the per-pixel DDA loop —
1044    /// scene-demo wires `+` / `-` through this each frame.
1045    ///
1046    /// # Panics
1047    /// Internally `expect`s the chunk-DDA resources to be built —
1048    /// they are constructed at the top of this function if missing.
1049    /// Cannot fire in normal control flow.
1050    pub fn render_chunk(
1051        &mut self,
1052        resident: &GpuChunkResident,
1053        camera: &Camera,
1054        max_scan_dist: u32,
1055    ) {
1056        let Some(surf_tex) = self.acquire_frame() else {
1057            return;
1058        };
1059        let surf_view = surf_tex
1060            .texture
1061            .create_view(&wgpu::TextureViewDescriptor::default());
1062
1063        let surface_w = self.surface_config.width;
1064        let surface_h = self.surface_config.height;
1065        let surface_format = self.surface_config.format;
1066
1067        // Lazy-build chunk-DDA resources; rebuild when the swapchain
1068        // grew or shrank.
1069        let needs_build = match &self.chunk_dda {
1070            Some(r) => r.storage_size != (surface_w, surface_h),
1071            None => true,
1072        };
1073        if needs_build {
1074            self.chunk_dda = Some(self.build_chunk_dda(surface_w, surface_h, surface_format));
1075        }
1076        let dda = self.chunk_dda.as_ref().expect("just built");
1077
1078        // Update uniforms.
1079        let uniform = ChunkDdaUniform {
1080            camera_pos: camera.position,
1081            _pad0: 0.0,
1082            camera_right: camera.right,
1083            _pad1: 0.0,
1084            camera_down: camera.down,
1085            _pad2: 0.0,
1086            camera_forward: camera.forward,
1087            fov_y_rad: camera.fov_y_rad,
1088            screen_size: [surface_w, surface_h],
1089            vsid: resident.vsid,
1090            max_scan_dist,
1091        };
1092        self.queue
1093            .write_buffer(&dda.uniform_buf, 0, bytemuck::bytes_of(&uniform));
1094
1095        // Per-frame DDA bind group — references the chunk's buffers
1096        // so we rebuild every frame (the resident can change between
1097        // calls).
1098        let dda_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1099            label: Some("roxlap-gpu chunk_dda.bg"),
1100            layout: &dda.bgl_dda,
1101            entries: &[
1102                wgpu::BindGroupEntry {
1103                    binding: 0,
1104                    resource: dda.uniform_buf.as_entire_binding(),
1105                },
1106                wgpu::BindGroupEntry {
1107                    binding: 1,
1108                    resource: resident.occupancy.as_entire_binding(),
1109                },
1110                wgpu::BindGroupEntry {
1111                    binding: 2,
1112                    resource: resident.color_offsets.as_entire_binding(),
1113                },
1114                wgpu::BindGroupEntry {
1115                    binding: 3,
1116                    resource: resident.colors.as_entire_binding(),
1117                },
1118                wgpu::BindGroupEntry {
1119                    binding: 4,
1120                    resource: wgpu::BindingResource::TextureView(&dda.storage_view),
1121                },
1122            ],
1123        });
1124
1125        let mut encoder = self
1126            .device
1127            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
1128                label: Some("roxlap-gpu chunk encoder"),
1129            });
1130        {
1131            let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
1132                label: Some("roxlap-gpu chunk_dda compute"),
1133                timestamp_writes: None,
1134            });
1135            cpass.set_pipeline(&dda.pipeline_dda);
1136            cpass.set_bind_group(0, &dda_bg, &[]);
1137            cpass.dispatch_workgroups(surface_w.div_ceil(8), surface_h.div_ceil(8), 1);
1138        }
1139        {
1140            let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
1141                label: Some("roxlap-gpu chunk_dda blit"),
1142                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
1143                    view: &surf_view,
1144                    depth_slice: None,
1145                    resolve_target: None,
1146                    ops: wgpu::Operations {
1147                        load: wgpu::LoadOp::Clear(wgpu::Color::BLACK),
1148                        store: wgpu::StoreOp::Store,
1149                    },
1150                })],
1151                depth_stencil_attachment: None,
1152                timestamp_writes: None,
1153                occlusion_query_set: None,
1154                multiview_mask: None,
1155            });
1156            rpass.set_pipeline(&dda.pipeline_blit);
1157            rpass.set_bind_group(0, &dda.blit_bg, &[]);
1158            rpass.draw(0..3, 0..1);
1159        }
1160        self.queue.submit(std::iter::once(encoder.finish()));
1161        surf_tex.present();
1162        self.frame_count = self.frame_count.wrapping_add(1);
1163    }
1164
1165    fn build_chunk_dda(
1166        &self,
1167        width: u32,
1168        height: u32,
1169        surface_format: wgpu::TextureFormat,
1170    ) -> ChunkDdaResources {
1171        let storage_tex = self.device.create_texture(&wgpu::TextureDescriptor {
1172            label: Some("roxlap-gpu chunk_dda.storage"),
1173            size: wgpu::Extent3d {
1174                width,
1175                height,
1176                depth_or_array_layers: 1,
1177            },
1178            mip_level_count: 1,
1179            sample_count: 1,
1180            dimension: wgpu::TextureDimension::D2,
1181            format: wgpu::TextureFormat::Rgba8Unorm,
1182            usage: wgpu::TextureUsages::STORAGE_BINDING | wgpu::TextureUsages::TEXTURE_BINDING,
1183            view_formats: &[],
1184        });
1185        let storage_view = storage_tex.create_view(&wgpu::TextureViewDescriptor::default());
1186
1187        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
1188            label: Some("roxlap-gpu chunk_dda.uniform"),
1189            size: std::mem::size_of::<ChunkDdaUniform>() as u64,
1190            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
1191            mapped_at_creation: false,
1192        });
1193
1194        let dda_shader = self
1195            .device
1196            .create_shader_module(wgpu::ShaderModuleDescriptor {
1197                label: Some("chunk_dda.wgsl"),
1198                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/chunk_dda.wgsl").into()),
1199            });
1200        let bgl_dda = self
1201            .device
1202            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
1203                label: Some("roxlap-gpu chunk_dda.bgl"),
1204                entries: &[
1205                    bgl_uniform_entry(0),
1206                    bgl_storage_entry(1, true),
1207                    bgl_storage_entry(2, true),
1208                    bgl_storage_entry(3, true),
1209                    wgpu::BindGroupLayoutEntry {
1210                        binding: 4,
1211                        visibility: wgpu::ShaderStages::COMPUTE,
1212                        ty: wgpu::BindingType::StorageTexture {
1213                            access: wgpu::StorageTextureAccess::WriteOnly,
1214                            format: wgpu::TextureFormat::Rgba8Unorm,
1215                            view_dimension: wgpu::TextureViewDimension::D2,
1216                        },
1217                        count: None,
1218                    },
1219                ],
1220            });
1221        let dda_pl = self
1222            .device
1223            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
1224                label: Some("roxlap-gpu chunk_dda.layout"),
1225                bind_group_layouts: &[Some(&bgl_dda)],
1226                immediate_size: 0,
1227            });
1228        let pipeline_dda = self
1229            .device
1230            .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
1231                label: Some("roxlap-gpu chunk_dda.pipeline"),
1232                layout: Some(&dda_pl),
1233                module: &dda_shader,
1234                entry_point: Some("render_chunk"),
1235                compilation_options: wgpu::PipelineCompilationOptions::default(),
1236                cache: None,
1237            });
1238
1239        // Fullscreen-triangle blit upscales the storage texture into
1240        // the swapchain. Nearest filter keeps the retro pixel look.
1241        let blit_shader = self
1242            .device
1243            .create_shader_module(wgpu::ShaderModuleDescriptor {
1244                label: Some("blit.wgsl"),
1245                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/blit.wgsl").into()),
1246            });
1247        let bgl_blit = self
1248            .device
1249            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
1250                label: Some("roxlap-gpu chunk_dda.blit_bgl"),
1251                entries: &[
1252                    wgpu::BindGroupLayoutEntry {
1253                        binding: 0,
1254                        visibility: wgpu::ShaderStages::FRAGMENT,
1255                        ty: wgpu::BindingType::Texture {
1256                            sample_type: wgpu::TextureSampleType::Float { filterable: false },
1257                            view_dimension: wgpu::TextureViewDimension::D2,
1258                            multisampled: false,
1259                        },
1260                        count: None,
1261                    },
1262                    wgpu::BindGroupLayoutEntry {
1263                        binding: 1,
1264                        visibility: wgpu::ShaderStages::FRAGMENT,
1265                        ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::NonFiltering),
1266                        count: None,
1267                    },
1268                ],
1269            });
1270        let blit_pl = self
1271            .device
1272            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
1273                label: Some("roxlap-gpu chunk_dda.blit_layout"),
1274                bind_group_layouts: &[Some(&bgl_blit)],
1275                immediate_size: 0,
1276            });
1277        let pipeline_blit = self
1278            .device
1279            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
1280                label: Some("roxlap-gpu chunk_dda.blit_pipeline"),
1281                layout: Some(&blit_pl),
1282                vertex: wgpu::VertexState {
1283                    module: &blit_shader,
1284                    entry_point: Some("vs_main"),
1285                    compilation_options: wgpu::PipelineCompilationOptions::default(),
1286                    buffers: &[],
1287                },
1288                fragment: Some(wgpu::FragmentState {
1289                    module: &blit_shader,
1290                    entry_point: Some("fs_main"),
1291                    compilation_options: wgpu::PipelineCompilationOptions::default(),
1292                    targets: &[Some(wgpu::ColorTargetState {
1293                        format: surface_format,
1294                        blend: None,
1295                        write_mask: wgpu::ColorWrites::ALL,
1296                    })],
1297                }),
1298                primitive: wgpu::PrimitiveState::default(),
1299                depth_stencil: None,
1300                multisample: wgpu::MultisampleState::default(),
1301                multiview_mask: None,
1302                cache: None,
1303            });
1304        let sampler = self.device.create_sampler(&wgpu::SamplerDescriptor {
1305            label: Some("roxlap-gpu chunk_dda.blit_sampler"),
1306            address_mode_u: wgpu::AddressMode::ClampToEdge,
1307            address_mode_v: wgpu::AddressMode::ClampToEdge,
1308            address_mode_w: wgpu::AddressMode::ClampToEdge,
1309            mag_filter: wgpu::FilterMode::Nearest,
1310            min_filter: wgpu::FilterMode::Nearest,
1311            mipmap_filter: wgpu::MipmapFilterMode::Nearest,
1312            ..Default::default()
1313        });
1314        let blit_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1315            label: Some("roxlap-gpu chunk_dda.blit_bg"),
1316            layout: &bgl_blit,
1317            entries: &[
1318                wgpu::BindGroupEntry {
1319                    binding: 0,
1320                    resource: wgpu::BindingResource::TextureView(&storage_view),
1321                },
1322                wgpu::BindGroupEntry {
1323                    binding: 1,
1324                    resource: wgpu::BindingResource::Sampler(&sampler),
1325                },
1326            ],
1327        });
1328
1329        ChunkDdaResources {
1330            storage_size: (width, height),
1331            storage_view,
1332            uniform_buf,
1333            bgl_dda,
1334            pipeline_dda,
1335            blit_bg,
1336            pipeline_blit,
1337            _sampler: sampler,
1338        }
1339    }
1340
1341    /// GPU.4 render — outer DDA over chunk indices + inner DDA into
1342    /// non-empty chunks. `camera.position` is in **grid-local**
1343    /// voxel units. `max_outer_steps` caps how many chunks the
1344    /// outer DDA may traverse per ray (scene-demo wires `+ / -`
1345    /// through this).
1346    ///
1347    /// # Panics
1348    /// Internally `expect`s the grid-DDA resources to be built;
1349    /// they are constructed at the top of this function if missing.
1350    pub fn render_grid(&mut self, grid: &GpuGridResident, camera: &Camera, max_outer_steps: u32) {
1351        let Some(surf_tex) = self.acquire_frame() else {
1352            return;
1353        };
1354        let surf_view = surf_tex
1355            .texture
1356            .create_view(&wgpu::TextureViewDescriptor::default());
1357
1358        let surface_w = self.surface_config.width;
1359        let surface_h = self.surface_config.height;
1360        let surface_format = self.surface_config.format;
1361
1362        let needs_build = match &self.grid_dda {
1363            Some(r) => r.storage_size != (surface_w, surface_h),
1364            None => true,
1365        };
1366        if needs_build {
1367            self.grid_dda = Some(self.build_grid_dda(surface_w, surface_h, surface_format));
1368        }
1369        let dda = self.grid_dda.as_ref().expect("just built");
1370
1371        let uniform = GridDdaUniform {
1372            camera_pos: camera.position,
1373            _pad0: 0.0,
1374            camera_right: camera.right,
1375            _pad1: 0.0,
1376            camera_down: camera.down,
1377            _pad2: 0.0,
1378            camera_forward: camera.forward,
1379            fov_y_rad: camera.fov_y_rad,
1380            screen_size: [surface_w, surface_h],
1381            vsid: grid.vsid,
1382            max_outer_steps,
1383            chunks_dims: grid.chunks_dims,
1384            _pad3: 0,
1385            origin_chunk: grid.origin_chunk,
1386            _pad4: 0,
1387        };
1388        self.queue
1389            .write_buffer(&dda.uniform_buf, 0, bytemuck::bytes_of(&uniform));
1390
1391        let dda_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1392            label: Some("roxlap-gpu grid_dda.bg"),
1393            layout: &dda.bgl_dda,
1394            entries: &[
1395                wgpu::BindGroupEntry {
1396                    binding: 0,
1397                    resource: dda.uniform_buf.as_entire_binding(),
1398                },
1399                wgpu::BindGroupEntry {
1400                    binding: 1,
1401                    resource: grid.occupancy.as_entire_binding(),
1402                },
1403                wgpu::BindGroupEntry {
1404                    binding: 2,
1405                    resource: grid.color_offsets.as_entire_binding(),
1406                },
1407                wgpu::BindGroupEntry {
1408                    binding: 3,
1409                    resource: grid.colors.as_entire_binding(),
1410                },
1411                wgpu::BindGroupEntry {
1412                    binding: 4,
1413                    resource: grid.chunk_colors_base.as_entire_binding(),
1414                },
1415                wgpu::BindGroupEntry {
1416                    binding: 5,
1417                    resource: grid.chunk_occupancy.as_entire_binding(),
1418                },
1419                wgpu::BindGroupEntry {
1420                    binding: 6,
1421                    resource: wgpu::BindingResource::TextureView(&dda.storage_view),
1422                },
1423            ],
1424        });
1425
1426        let mut encoder = self
1427            .device
1428            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
1429                label: Some("roxlap-gpu grid encoder"),
1430            });
1431        {
1432            let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
1433                label: Some("roxlap-gpu grid_dda compute"),
1434                timestamp_writes: None,
1435            });
1436            cpass.set_pipeline(&dda.pipeline_dda);
1437            cpass.set_bind_group(0, &dda_bg, &[]);
1438            cpass.dispatch_workgroups(surface_w.div_ceil(8), surface_h.div_ceil(8), 1);
1439        }
1440        {
1441            let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
1442                label: Some("roxlap-gpu grid_dda blit"),
1443                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
1444                    view: &surf_view,
1445                    depth_slice: None,
1446                    resolve_target: None,
1447                    ops: wgpu::Operations {
1448                        load: wgpu::LoadOp::Clear(wgpu::Color::BLACK),
1449                        store: wgpu::StoreOp::Store,
1450                    },
1451                })],
1452                depth_stencil_attachment: None,
1453                timestamp_writes: None,
1454                occlusion_query_set: None,
1455                multiview_mask: None,
1456            });
1457            rpass.set_pipeline(&dda.pipeline_blit);
1458            rpass.set_bind_group(0, &dda.blit_bg, &[]);
1459            rpass.draw(0..3, 0..1);
1460        }
1461        self.queue.submit(std::iter::once(encoder.finish()));
1462        surf_tex.present();
1463        self.frame_count = self.frame_count.wrapping_add(1);
1464    }
1465
1466    fn build_grid_dda(
1467        &self,
1468        width: u32,
1469        height: u32,
1470        surface_format: wgpu::TextureFormat,
1471    ) -> GridDdaResources {
1472        let storage_tex = self.device.create_texture(&wgpu::TextureDescriptor {
1473            label: Some("roxlap-gpu grid_dda.storage"),
1474            size: wgpu::Extent3d {
1475                width,
1476                height,
1477                depth_or_array_layers: 1,
1478            },
1479            mip_level_count: 1,
1480            sample_count: 1,
1481            dimension: wgpu::TextureDimension::D2,
1482            format: wgpu::TextureFormat::Rgba8Unorm,
1483            usage: wgpu::TextureUsages::STORAGE_BINDING | wgpu::TextureUsages::TEXTURE_BINDING,
1484            view_formats: &[],
1485        });
1486        let storage_view = storage_tex.create_view(&wgpu::TextureViewDescriptor::default());
1487
1488        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
1489            label: Some("roxlap-gpu grid_dda.uniform"),
1490            size: std::mem::size_of::<GridDdaUniform>() as u64,
1491            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
1492            mapped_at_creation: false,
1493        });
1494
1495        let dda_shader = self
1496            .device
1497            .create_shader_module(wgpu::ShaderModuleDescriptor {
1498                label: Some("grid_dda.wgsl"),
1499                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/grid_dda.wgsl").into()),
1500            });
1501        let bgl_dda = self
1502            .device
1503            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
1504                label: Some("roxlap-gpu grid_dda.bgl"),
1505                entries: &[
1506                    bgl_uniform_entry(0),
1507                    bgl_storage_entry(1, true),
1508                    bgl_storage_entry(2, true),
1509                    bgl_storage_entry(3, true),
1510                    bgl_storage_entry(4, true),
1511                    bgl_storage_entry(5, true),
1512                    wgpu::BindGroupLayoutEntry {
1513                        binding: 6,
1514                        visibility: wgpu::ShaderStages::COMPUTE,
1515                        ty: wgpu::BindingType::StorageTexture {
1516                            access: wgpu::StorageTextureAccess::WriteOnly,
1517                            format: wgpu::TextureFormat::Rgba8Unorm,
1518                            view_dimension: wgpu::TextureViewDimension::D2,
1519                        },
1520                        count: None,
1521                    },
1522                ],
1523            });
1524        let dda_pl = self
1525            .device
1526            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
1527                label: Some("roxlap-gpu grid_dda.layout"),
1528                bind_group_layouts: &[Some(&bgl_dda)],
1529                immediate_size: 0,
1530            });
1531        let pipeline_dda = self
1532            .device
1533            .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
1534                label: Some("roxlap-gpu grid_dda.pipeline"),
1535                layout: Some(&dda_pl),
1536                module: &dda_shader,
1537                entry_point: Some("render_grid"),
1538                compilation_options: wgpu::PipelineCompilationOptions::default(),
1539                cache: None,
1540            });
1541
1542        let blit_shader = self
1543            .device
1544            .create_shader_module(wgpu::ShaderModuleDescriptor {
1545                label: Some("blit.wgsl"),
1546                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/blit.wgsl").into()),
1547            });
1548        let bgl_blit = self
1549            .device
1550            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
1551                label: Some("roxlap-gpu grid_dda.blit_bgl"),
1552                entries: &[
1553                    wgpu::BindGroupLayoutEntry {
1554                        binding: 0,
1555                        visibility: wgpu::ShaderStages::FRAGMENT,
1556                        ty: wgpu::BindingType::Texture {
1557                            sample_type: wgpu::TextureSampleType::Float { filterable: false },
1558                            view_dimension: wgpu::TextureViewDimension::D2,
1559                            multisampled: false,
1560                        },
1561                        count: None,
1562                    },
1563                    wgpu::BindGroupLayoutEntry {
1564                        binding: 1,
1565                        visibility: wgpu::ShaderStages::FRAGMENT,
1566                        ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::NonFiltering),
1567                        count: None,
1568                    },
1569                ],
1570            });
1571        let blit_pl = self
1572            .device
1573            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
1574                label: Some("roxlap-gpu grid_dda.blit_layout"),
1575                bind_group_layouts: &[Some(&bgl_blit)],
1576                immediate_size: 0,
1577            });
1578        let pipeline_blit = self
1579            .device
1580            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
1581                label: Some("roxlap-gpu grid_dda.blit_pipeline"),
1582                layout: Some(&blit_pl),
1583                vertex: wgpu::VertexState {
1584                    module: &blit_shader,
1585                    entry_point: Some("vs_main"),
1586                    compilation_options: wgpu::PipelineCompilationOptions::default(),
1587                    buffers: &[],
1588                },
1589                fragment: Some(wgpu::FragmentState {
1590                    module: &blit_shader,
1591                    entry_point: Some("fs_main"),
1592                    compilation_options: wgpu::PipelineCompilationOptions::default(),
1593                    targets: &[Some(wgpu::ColorTargetState {
1594                        format: surface_format,
1595                        blend: None,
1596                        write_mask: wgpu::ColorWrites::ALL,
1597                    })],
1598                }),
1599                primitive: wgpu::PrimitiveState::default(),
1600                depth_stencil: None,
1601                multisample: wgpu::MultisampleState::default(),
1602                multiview_mask: None,
1603                cache: None,
1604            });
1605        let sampler = self.device.create_sampler(&wgpu::SamplerDescriptor {
1606            label: Some("roxlap-gpu grid_dda.blit_sampler"),
1607            address_mode_u: wgpu::AddressMode::ClampToEdge,
1608            address_mode_v: wgpu::AddressMode::ClampToEdge,
1609            address_mode_w: wgpu::AddressMode::ClampToEdge,
1610            mag_filter: wgpu::FilterMode::Nearest,
1611            min_filter: wgpu::FilterMode::Nearest,
1612            mipmap_filter: wgpu::MipmapFilterMode::Nearest,
1613            ..Default::default()
1614        });
1615        let blit_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1616            label: Some("roxlap-gpu grid_dda.blit_bg"),
1617            layout: &bgl_blit,
1618            entries: &[
1619                wgpu::BindGroupEntry {
1620                    binding: 0,
1621                    resource: wgpu::BindingResource::TextureView(&storage_view),
1622                },
1623                wgpu::BindGroupEntry {
1624                    binding: 1,
1625                    resource: wgpu::BindingResource::Sampler(&sampler),
1626                },
1627            ],
1628        });
1629
1630        GridDdaResources {
1631            storage_size: (width, height),
1632            storage_view,
1633            uniform_buf,
1634            bgl_dda,
1635            pipeline_dda,
1636            blit_bg,
1637            pipeline_blit,
1638            _sampler: sampler,
1639        }
1640    }
1641
1642    /// GPU.5 render — multi-grid scene marcher. `cameras[i]` is the
1643    /// world camera transformed into grid `i`'s local frame
1644    /// (caller-supplied; see scene-demo's `redraw_gpu` for the
1645    /// glam-based transform). `fov_y_rad` is the shared vertical
1646    /// FOV; `max_outer_steps` caps per-ray chunk-DDA work for each
1647    /// grid.
1648    ///
1649    /// # Panics
1650    /// If `cameras.len() != scene.grid_count` or
1651    /// `scene.grid_count > MAX_SCENE_GRIDS`.
1652    /// `cameras[i]` is grid `i`'s world camera transformed into that
1653    /// grid's local frame (the grid marcher works in grid-local space).
1654    /// `sprite_camera` is the **world** camera: instanced sprites carry
1655    /// world-space positions/transforms, so they must project through
1656    /// the untransformed world camera — not `cameras[0]`, which is only
1657    /// the world camera when grid 0 is at identity.
1658    pub fn render_scene(
1659        &mut self,
1660        scene: &GpuSceneResident,
1661        cameras: &[Camera],
1662        sprite_camera: &Camera,
1663        fov_y_rad: f32,
1664        max_outer_steps: u32,
1665    ) {
1666        assert_eq!(
1667            cameras.len(),
1668            scene.grid_count as usize,
1669            "render_scene: {} cameras supplied, scene has {} grids",
1670            cameras.len(),
1671            scene.grid_count,
1672        );
1673        assert!(
1674            scene.grid_count as usize <= SCENE_MAX_GRIDS,
1675            "render_scene: scene has {} grids, shader supports {}",
1676            scene.grid_count,
1677            SCENE_MAX_GRIDS,
1678        );
1679        self.last_fov_y_rad = fov_y_rad; // cached for pixel_ray (picking)
1680
1681        // Deferred present: drop any frame a prior render left
1682        // un-presented (a host that skipped present/paint_egui) so we
1683        // never hold two outstanding swapchain textures.
1684        self.pending_frame = None;
1685        let Some(surf_tex) = self.acquire_frame() else {
1686            return;
1687        };
1688        let surf_view = surf_tex
1689            .texture
1690            .create_view(&wgpu::TextureViewDescriptor::default());
1691
1692        let surface_w = self.surface_config.width;
1693        let surface_h = self.surface_config.height;
1694        let surface_format = self.surface_config.format;
1695
1696        let needs_build = match &self.scene_dda {
1697            Some(r) => r.storage_size != (surface_w, surface_h),
1698            None => true,
1699        };
1700        if needs_build {
1701            self.scene_dda = Some(self.build_scene_dda(surface_w, surface_h, surface_format));
1702        }
1703        // GPU.9 — materialise the sprite pipeline the first frame
1704        // sprites are present (before the immutable `dda` borrow).
1705        // GPU.10.0 — build the model-DDA pipeline the first frame a
1706        // sprite registry is present.
1707        if self.sprite_registry.is_some() && self.sprite_model_dda.is_none() {
1708            self.sprite_model_dda = Some(self.build_sprite_model_dda());
1709        }
1710        // GPU.10.3 — frustum-cull + screen-tile-bin the sprite instances
1711        // (needs &mut self for buffer growth, so before the immutable
1712        // scene_dda borrow). Captures (visible_count, tiles_x); None when
1713        // nothing is in view.
1714        let sprite_pass: Option<(u32, u32)> = if let Some(reg) = self.sprite_registry.as_mut() {
1715            if reg.instance_capacity > 0 {
1716                // World camera — sprite positions/transforms are world-
1717                // space (independent of any grid's transform).
1718                let cam = sprite_camera;
1719                #[allow(clippy::cast_precision_loss)]
1720                let aspect = surface_w as f32 / surface_h as f32;
1721                let half_h = (fov_y_rad * 0.5).tan();
1722                let frustum = sprite_model::ViewFrustum {
1723                    pos: cam.position,
1724                    right: cam.right,
1725                    down: cam.down,
1726                    forward: cam.forward,
1727                    half_w: half_h * aspect,
1728                    half_h,
1729                    far: 1.0e9,
1730                };
1731                let (visible, tiles_x, _tiles_y) = reg.cull_bin_upload(
1732                    &self.device,
1733                    &self.queue,
1734                    &frustum,
1735                    surface_w,
1736                    surface_h,
1737                    SPRITE_TILE_SIZE,
1738                    self.sprite_lod_px,
1739                );
1740                (visible > 0).then_some((visible, tiles_x))
1741            } else {
1742                None
1743            }
1744        } else {
1745            None
1746        };
1747        let dda = self.scene_dda.as_ref().expect("just built");
1748
1749        // Pack per-grid cameras.
1750        let mut cam_array = [SceneDdaPerGridCamera::zeroed(); SCENE_MAX_GRIDS];
1751        for (i, cam) in cameras.iter().enumerate() {
1752            cam_array[i] = SceneDdaPerGridCamera {
1753                pos: cam.position,
1754                _pad0: 0.0,
1755                right: cam.right,
1756                _pad1: 0.0,
1757                down: cam.down,
1758                _pad2: 0.0,
1759                forward: cam.forward,
1760                _pad3: 0.0,
1761            };
1762        }
1763        let uniform = SceneDdaUniform {
1764            fov_y_rad,
1765            grid_count: scene.grid_count,
1766            max_outer_steps,
1767            _pad0: 0,
1768            screen_size: [surface_w, surface_h],
1769            _pad1: [0; 2],
1770            cameras: cam_array,
1771            fog_color: [
1772                self.fog_color[0],
1773                self.fog_color[1],
1774                self.fog_color[2],
1775                self.fog_near,
1776            ],
1777            fog_far: self.fog_far,
1778            // L3.1: always write scene depth. Costs one storage store per
1779            // pixel, and the depth is needed for sprite z-test, sprite-less
1780            // `pick_depth`, and `draw_lines` occlusion alike.
1781            write_depth: 1,
1782            occ_page_words: scene.occupancy_page_words,
1783            occ_num_pages: scene.occupancy_num_pages,
1784            mip_scan_dist: self.scene_mip_scan_dist,
1785            _pad2: 0,
1786            _pad3: 0,
1787            _pad4: 0,
1788            // Sky direction comes from the world (sprite) camera, so a
1789            // grid-less sprite-only scene still paints a real sky.
1790            sky_cam: SceneDdaPerGridCamera::from_camera(sprite_camera),
1791            side_shades0: self.scene_side_shades[0],
1792            side_shades1: self.scene_side_shades[1],
1793        };
1794        self.queue
1795            .write_buffer(&dda.uniform_buf, 0, bytemuck::bytes_of(&uniform));
1796
1797        let dda_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1798            label: Some("roxlap-gpu scene_dda.bg"),
1799            layout: &dda.bgl_dda,
1800            entries: &[
1801                wgpu::BindGroupEntry {
1802                    binding: 0,
1803                    resource: dda.uniform_buf.as_entire_binding(),
1804                },
1805                // Occupancy page 0 at binding 1; pages 1..MAX_OCC_PAGES
1806                // at bindings 12.. (see GPU.X occupancy paging).
1807                wgpu::BindGroupEntry {
1808                    binding: 1,
1809                    resource: scene.occupancy_pages[0].as_entire_binding(),
1810                },
1811                wgpu::BindGroupEntry {
1812                    binding: 2,
1813                    resource: scene.all_color_offsets.as_entire_binding(),
1814                },
1815                wgpu::BindGroupEntry {
1816                    binding: 3,
1817                    resource: scene.all_colors.as_entire_binding(),
1818                },
1819                wgpu::BindGroupEntry {
1820                    binding: 4,
1821                    resource: scene.all_chunk_colors_base.as_entire_binding(),
1822                },
1823                wgpu::BindGroupEntry {
1824                    binding: 5,
1825                    resource: scene.all_chunk_occupancy.as_entire_binding(),
1826                },
1827                wgpu::BindGroupEntry {
1828                    binding: 6,
1829                    resource: scene.grid_static_meta.as_entire_binding(),
1830                },
1831                wgpu::BindGroupEntry {
1832                    binding: 7,
1833                    resource: scene.all_slot_chunk_idx.as_entire_binding(),
1834                },
1835                wgpu::BindGroupEntry {
1836                    binding: 8,
1837                    resource: dda.framebuffer.as_entire_binding(),
1838                },
1839                wgpu::BindGroupEntry {
1840                    binding: 9,
1841                    resource: wgpu::BindingResource::TextureView(&self.sky_view),
1842                },
1843                wgpu::BindGroupEntry {
1844                    binding: 10,
1845                    resource: wgpu::BindingResource::Sampler(&self.sky_sampler),
1846                },
1847                wgpu::BindGroupEntry {
1848                    binding: 11,
1849                    resource: dda.depth_buffer.as_entire_binding(),
1850                },
1851                wgpu::BindGroupEntry {
1852                    binding: 12,
1853                    resource: scene.occupancy_pages[1].as_entire_binding(),
1854                },
1855                wgpu::BindGroupEntry {
1856                    binding: 13,
1857                    resource: scene.occupancy_pages[2].as_entire_binding(),
1858                },
1859                wgpu::BindGroupEntry {
1860                    binding: 14,
1861                    resource: scene.occupancy_pages[3].as_entire_binding(),
1862                },
1863            ],
1864        });
1865
1866        // GPU.9 — when sprites are present, build both splatter bind
1867        // groups up front (the splat pass writes the key buffer; the
1868        // resolve pass reads keys + scene depth and writes colour).
1869        // GPU.10.3 — model-DDA bind group + per-frame uniform, using the
1870        // cull/bin results captured above. Per-model + per-instance data
1871        // + the tile lists live in the registry buffers.
1872        let sprite_model_bg = match (&self.sprite_model_dda, &self.sprite_registry, sprite_pass) {
1873            (Some(smd), Some(reg), Some((visible, tiles_x))) => {
1874                // World camera (see the cull pass above) — sprites
1875                // project through it regardless of grid 0's transform.
1876                let cam = sprite_camera;
1877                let uni = SpriteModelUniform {
1878                    cam_pos: cam.position,
1879                    _p0: 0.0,
1880                    cam_right: cam.right,
1881                    _p1: 0.0,
1882                    cam_down: cam.down,
1883                    _p2: 0.0,
1884                    cam_forward: cam.forward,
1885                    _p3: 0.0,
1886                    fog_color: [
1887                        self.fog_color[0],
1888                        self.fog_color[1],
1889                        self.fog_color[2],
1890                        self.fog_near,
1891                    ],
1892                    screen_size: [surface_w, surface_h],
1893                    instance_count: visible,
1894                    fog_far: self.fog_far,
1895                    fov_y_rad,
1896                    tiles_x,
1897                    tile_size: SPRITE_TILE_SIZE,
1898                    _p6: 0.0,
1899                };
1900                self.queue
1901                    .write_buffer(&smd.uniform_buf, 0, bytemuck::bytes_of(&uni));
1902                Some(self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1903                    label: Some("roxlap-gpu sprite_model_dda.bg"),
1904                    layout: &smd.bgl,
1905                    entries: &[
1906                        wgpu::BindGroupEntry {
1907                            binding: 0,
1908                            resource: smd.uniform_buf.as_entire_binding(),
1909                        },
1910                        wgpu::BindGroupEntry {
1911                            binding: 1,
1912                            resource: reg.occupancy.as_entire_binding(),
1913                        },
1914                        wgpu::BindGroupEntry {
1915                            binding: 2,
1916                            resource: reg.colors.as_entire_binding(),
1917                        },
1918                        wgpu::BindGroupEntry {
1919                            binding: 3,
1920                            resource: reg.color_offsets.as_entire_binding(),
1921                        },
1922                        wgpu::BindGroupEntry {
1923                            binding: 4,
1924                            resource: reg.model_meta.as_entire_binding(),
1925                        },
1926                        wgpu::BindGroupEntry {
1927                            binding: 5,
1928                            resource: reg.instances.as_entire_binding(),
1929                        },
1930                        wgpu::BindGroupEntry {
1931                            binding: 6,
1932                            resource: dda.depth_buffer.as_entire_binding(),
1933                        },
1934                        wgpu::BindGroupEntry {
1935                            binding: 7,
1936                            resource: dda.framebuffer.as_entire_binding(),
1937                        },
1938                        wgpu::BindGroupEntry {
1939                            binding: 8,
1940                            resource: reg.tile_ranges.as_entire_binding(),
1941                        },
1942                        wgpu::BindGroupEntry {
1943                            binding: 9,
1944                            resource: reg.tile_instances.as_entire_binding(),
1945                        },
1946                        wgpu::BindGroupEntry {
1947                            binding: 10,
1948                            resource: reg.dirs.as_entire_binding(),
1949                        },
1950                        wgpu::BindGroupEntry {
1951                            binding: 11,
1952                            resource: reg.colmul.as_entire_binding(),
1953                        },
1954                    ],
1955                }))
1956            }
1957            _ => None,
1958        };
1959
1960        let mut encoder = self
1961            .device
1962            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
1963                label: Some("roxlap-gpu scene encoder"),
1964            });
1965        {
1966            let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
1967                label: Some("roxlap-gpu scene_dda compute"),
1968                timestamp_writes: None,
1969            });
1970            cpass.set_pipeline(&dda.pipeline_dda);
1971            cpass.set_bind_group(0, &dda_bg, &[]);
1972            cpass.dispatch_workgroups(surface_w.div_ceil(8), surface_h.div_ceil(8), 1);
1973        }
1974        // GPU.10 — sprite model-DDA pass: one thread per pixel marches
1975        // the tile's instances + composites against scene depth, after
1976        // the scene pass wrote the depth buffer and before the blit.
1977        if let (Some(smd), Some(bg)) = (&self.sprite_model_dda, &sprite_model_bg) {
1978            let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
1979                label: Some("roxlap-gpu sprite_model_dda"),
1980                timestamp_writes: None,
1981            });
1982            cpass.set_pipeline(&smd.pipeline);
1983            cpass.set_bind_group(0, bg, &[]);
1984            cpass.dispatch_workgroups(surface_w.div_ceil(8), surface_h.div_ceil(8), 1);
1985        }
1986        {
1987            let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
1988                label: Some("roxlap-gpu scene_dda blit"),
1989                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
1990                    view: &surf_view,
1991                    depth_slice: None,
1992                    resolve_target: None,
1993                    ops: wgpu::Operations {
1994                        load: wgpu::LoadOp::Clear(wgpu::Color::BLACK),
1995                        store: wgpu::StoreOp::Store,
1996                    },
1997                })],
1998                depth_stencil_attachment: None,
1999                timestamp_writes: None,
2000                occlusion_query_set: None,
2001                multiview_mask: None,
2002            });
2003            rpass.set_pipeline(&dda.pipeline_blit);
2004            rpass.set_bind_group(0, &dda.blit_bg, &[]);
2005            rpass.draw(0..3, 0..1);
2006        }
2007        self.queue.submit(std::iter::once(encoder.finish()));
2008        // Deferred present — the host calls `present` or `paint_egui`.
2009        self.pending_frame = Some((surf_tex, surf_view));
2010        self.frame_count = self.frame_count.wrapping_add(1);
2011    }
2012
2013    /// Like [`Self::render`] (clear to colour) but **deferred**: stashes
2014    /// the frame for [`Self::present`] / [`Self::paint_egui`] instead of
2015    /// presenting. The facade uses this before any grid is resident so a
2016    /// HUD can still be painted over an empty scene.
2017    pub fn render_clear_deferred(&mut self) {
2018        self.pending_frame = None;
2019        let Some(surf_tex) = self.acquire_frame() else {
2020            return;
2021        };
2022        let view = surf_tex
2023            .texture
2024            .create_view(&wgpu::TextureViewDescriptor::default());
2025        let [r, g, b] = self.clear_colour;
2026        let mut encoder = self
2027            .device
2028            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
2029                label: Some("roxlap-gpu clear (deferred)"),
2030            });
2031        {
2032            let _rp = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
2033                label: Some("roxlap-gpu clear (deferred)"),
2034                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
2035                    view: &view,
2036                    depth_slice: None,
2037                    resolve_target: None,
2038                    ops: wgpu::Operations {
2039                        load: wgpu::LoadOp::Clear(wgpu::Color { r, g, b, a: 1.0 }),
2040                        store: wgpu::StoreOp::Store,
2041                    },
2042                })],
2043                depth_stencil_attachment: None,
2044                timestamp_writes: None,
2045                occlusion_query_set: None,
2046                multiview_mask: None,
2047            });
2048        }
2049        self.queue.submit(std::iter::once(encoder.finish()));
2050        self.pending_frame = Some((surf_tex, view));
2051    }
2052
2053    /// Present the frame stashed by the last deferred render
2054    /// ([`Self::render_scene`] / [`Self::render_clear_deferred`]). No-op
2055    /// if nothing is pending (e.g. the surface was lost mid-render).
2056    pub fn present(&mut self) {
2057        if let Some((surf_tex, _view)) = self.pending_frame.take() {
2058            surf_tex.present();
2059        }
2060    }
2061
2062    /// Draw depth-tested world-space [`GpuLine`]s over the pending frame
2063    /// (L3.2). Projects each endpoint with `cam` (the marcher's pinhole) +
2064    /// the last frame's FOV / surface size, expands to screen-space quads,
2065    /// and runs a `LoadOp::Load` pass into the pending swapchain view — so
2066    /// the lines land on the marched frame and a later `present` /
2067    /// `paint_egui` still finishes it (the pending frame is left intact).
2068    /// Depth-tested lines are occluded by nearer marched geometry (compared
2069    /// against the scene-DDA depth buffer's `best_t`); call after `render`,
2070    /// before `present` / `paint_egui`. No-op if no frame is pending.
2071    pub fn draw_lines_deferred(&mut self, cam: &GpuLineCamera, lines: &[GpuLine]) {
2072        if self.pending_frame.is_none() || lines.is_empty() {
2073            return;
2074        }
2075        let (w, h) = (self.surface_config.width, self.surface_config.height);
2076        let fov = self.last_fov_y_rad;
2077        if w == 0 || h == 0 || fov <= 0.0 {
2078            return; // no frame marched yet — no projection to reuse
2079        }
2080        let verts = build_line_vertices(cam, lines, w, h, fov);
2081        if verts.is_empty() {
2082            return;
2083        }
2084        self.ensure_line_resources();
2085        let res = self.line_resources.as_ref().expect("just built");
2086
2087        // Skip the depth test when there's no scene depth buffer to read
2088        // (sprite-only / empty scene) — bind the 1-word dummy so the layout
2089        // is satisfied; `no_depth = 1` keeps the shader from indexing it.
2090        let no_depth = u32::from(self.scene_dda.is_none());
2091        let params = LineParams {
2092            screen_w: w,
2093            screen_h: h,
2094            depth_bias: LINE_DEPTH_BIAS,
2095            no_depth,
2096        };
2097        self.queue
2098            .write_buffer(&res.uniform_buf, 0, bytemuck::bytes_of(&params));
2099
2100        let depth_resource = match &self.scene_dda {
2101            Some(dda) => dda.depth_buffer.as_entire_binding(),
2102            None => res.dummy_depth.as_entire_binding(),
2103        };
2104        let bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
2105            label: Some("roxlap-gpu line.bg"),
2106            layout: &res.bgl,
2107            entries: &[
2108                wgpu::BindGroupEntry {
2109                    binding: 0,
2110                    resource: res.uniform_buf.as_entire_binding(),
2111                },
2112                wgpu::BindGroupEntry {
2113                    binding: 1,
2114                    resource: depth_resource,
2115                },
2116            ],
2117        });
2118
2119        // Grow-only persistent vertex buffer (L3.3): one `write_buffer`
2120        // per overlay, reused across frames. Power-of-two capacity keeps
2121        // re-allocation rare as the segment count drifts.
2122        let needed = std::mem::size_of_val(verts.as_slice()) as u64;
2123        if self.line_vbuf_cap < needed {
2124            let cap = needed.next_power_of_two().max(4096);
2125            self.line_vbuf = Some(self.device.create_buffer(&wgpu::BufferDescriptor {
2126                label: Some("roxlap-gpu line.vbuf"),
2127                size: cap,
2128                usage: wgpu::BufferUsages::VERTEX | wgpu::BufferUsages::COPY_DST,
2129                mapped_at_creation: false,
2130            }));
2131            self.line_vbuf_cap = cap;
2132        }
2133        let vbuf = self.line_vbuf.as_ref().expect("ensured above");
2134        self.queue
2135            .write_buffer(vbuf, 0, bytemuck::cast_slice(&verts));
2136
2137        let view = &self.pending_frame.as_ref().expect("checked above").1;
2138        let mut encoder = self
2139            .device
2140            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
2141                label: Some("roxlap-gpu lines"),
2142            });
2143        {
2144            // `LoadOp::Load` keeps the marcher's frame; the lines draw over
2145            // it. Manual depth test in the FS (no depth-stencil attachment).
2146            let mut pass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
2147                label: Some("roxlap-gpu line paint"),
2148                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
2149                    view,
2150                    depth_slice: None,
2151                    resolve_target: None,
2152                    ops: wgpu::Operations {
2153                        load: wgpu::LoadOp::Load,
2154                        store: wgpu::StoreOp::Store,
2155                    },
2156                })],
2157                depth_stencil_attachment: None,
2158                timestamp_writes: None,
2159                occlusion_query_set: None,
2160                multiview_mask: None,
2161            });
2162            pass.set_pipeline(&res.pipeline);
2163            pass.set_bind_group(0, &bg, &[]);
2164            pass.set_vertex_buffer(0, vbuf.slice(..));
2165            pass.draw(0..verts.len() as u32, 0..1);
2166        }
2167        self.queue.submit(std::iter::once(encoder.finish()));
2168        // pending_frame left intact — present/paint_egui finishes the frame.
2169    }
2170
2171    /// Lazy-build the [`LineResources`] (`line.wgsl` pipeline + uniform +
2172    /// dummy depth buffer). The colour target uses the surface format with
2173    /// straight-alpha over-blending; no depth-stencil attachment (the depth
2174    /// test is manual in the fragment shader against the scene depth buffer).
2175    fn ensure_line_resources(&mut self) {
2176        if self.line_resources.is_some() {
2177            return;
2178        }
2179        let shader = self
2180            .device
2181            .create_shader_module(wgpu::ShaderModuleDescriptor {
2182                label: Some("line.wgsl"),
2183                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/line.wgsl").into()),
2184            });
2185        let bgl = self
2186            .device
2187            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
2188                label: Some("roxlap-gpu line.bgl"),
2189                entries: &[
2190                    wgpu::BindGroupLayoutEntry {
2191                        binding: 0,
2192                        visibility: wgpu::ShaderStages::FRAGMENT,
2193                        ty: wgpu::BindingType::Buffer {
2194                            ty: wgpu::BufferBindingType::Uniform,
2195                            has_dynamic_offset: false,
2196                            min_binding_size: None,
2197                        },
2198                        count: None,
2199                    },
2200                    wgpu::BindGroupLayoutEntry {
2201                        binding: 1,
2202                        visibility: wgpu::ShaderStages::FRAGMENT,
2203                        ty: wgpu::BindingType::Buffer {
2204                            ty: wgpu::BufferBindingType::Storage { read_only: true },
2205                            has_dynamic_offset: false,
2206                            min_binding_size: None,
2207                        },
2208                        count: None,
2209                    },
2210                ],
2211            });
2212        let layout = self
2213            .device
2214            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
2215                label: Some("roxlap-gpu line.layout"),
2216                bind_group_layouts: &[Some(&bgl)],
2217                immediate_size: 0,
2218            });
2219        let pipeline = self
2220            .device
2221            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
2222                label: Some("roxlap-gpu line.pipeline"),
2223                layout: Some(&layout),
2224                vertex: wgpu::VertexState {
2225                    module: &shader,
2226                    entry_point: Some("vs_main"),
2227                    compilation_options: wgpu::PipelineCompilationOptions::default(),
2228                    buffers: &[wgpu::VertexBufferLayout {
2229                        array_stride: std::mem::size_of::<LineVertex>() as u64,
2230                        step_mode: wgpu::VertexStepMode::Vertex,
2231                        attributes: &wgpu::vertex_attr_array![
2232                            0 => Float32x2, // pos (NDC)
2233                            1 => Float32,   // depth
2234                            2 => Float32,   // depth_test
2235                            3 => Float32x4, // color
2236                        ],
2237                    }],
2238                },
2239                fragment: Some(wgpu::FragmentState {
2240                    module: &shader,
2241                    entry_point: Some("fs_main"),
2242                    compilation_options: wgpu::PipelineCompilationOptions::default(),
2243                    targets: &[Some(wgpu::ColorTargetState {
2244                        format: self.surface_config.format,
2245                        blend: Some(wgpu::BlendState::ALPHA_BLENDING),
2246                        write_mask: wgpu::ColorWrites::ALL,
2247                    })],
2248                }),
2249                primitive: wgpu::PrimitiveState {
2250                    cull_mode: None,
2251                    ..Default::default()
2252                },
2253                depth_stencil: None,
2254                multisample: wgpu::MultisampleState::default(),
2255                multiview_mask: None,
2256                cache: None,
2257            });
2258        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
2259            label: Some("roxlap-gpu line.uniform"),
2260            size: std::mem::size_of::<LineParams>() as u64,
2261            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
2262            mapped_at_creation: false,
2263        });
2264        let dummy_depth = self.device.create_buffer(&wgpu::BufferDescriptor {
2265            label: Some("roxlap-gpu line.dummy_depth"),
2266            size: 4,
2267            usage: wgpu::BufferUsages::STORAGE,
2268            mapped_at_creation: false,
2269        });
2270        self.line_resources = Some(LineResources {
2271            pipeline,
2272            bgl,
2273            uniform_buf,
2274            dummy_depth,
2275        });
2276    }
2277
2278    /// Overlay an `egui` UI on the pending frame, then present it
2279    /// (`hud` feature). `jobs` are the host's tessellated primitives
2280    /// (`egui::Context::tessellate`), `textures` the per-frame texture
2281    /// delta from `egui::FullOutput`, `pixels_per_point` the UI scale.
2282    ///
2283    /// Draws with `LoadOp::Load` over the marcher's frame (a separate
2284    /// encoder submitted after the scene's), so the UI composites on top
2285    /// of the world. No-op if no frame is pending.
2286    #[cfg(feature = "hud")]
2287    pub fn paint_egui(
2288        &mut self,
2289        jobs: &[egui::ClippedPrimitive],
2290        textures: &egui::TexturesDelta,
2291        pixels_per_point: f32,
2292    ) {
2293        let Some((surf_tex, surf_view)) = self.pending_frame.take() else {
2294            return;
2295        };
2296        let format = self.surface_config.format;
2297        let egui_rend = self.egui_renderer.get_or_insert_with(|| {
2298            egui_wgpu::Renderer::new(
2299                &self.device,
2300                format,
2301                egui_wgpu::RendererOptions {
2302                    msaa_samples: 1,
2303                    depth_stencil_format: None,
2304                    dithering: false,
2305                    ..Default::default()
2306                },
2307            )
2308        });
2309
2310        let screen = egui_wgpu::ScreenDescriptor {
2311            size_in_pixels: [self.surface_config.width, self.surface_config.height],
2312            pixels_per_point,
2313        };
2314        for (id, delta) in &textures.set {
2315            egui_rend.update_texture(&self.device, &self.queue, *id, delta);
2316        }
2317        let mut encoder = self
2318            .device
2319            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
2320                label: Some("roxlap-gpu egui"),
2321            });
2322        let user_bufs =
2323            egui_rend.update_buffers(&self.device, &self.queue, &mut encoder, jobs, &screen);
2324        {
2325            // `LoadOp::Load` keeps the marcher's frame; egui draws over it.
2326            let mut pass = encoder
2327                .begin_render_pass(&wgpu::RenderPassDescriptor {
2328                    label: Some("roxlap-gpu egui paint"),
2329                    color_attachments: &[Some(wgpu::RenderPassColorAttachment {
2330                        view: &surf_view,
2331                        depth_slice: None,
2332                        resolve_target: None,
2333                        ops: wgpu::Operations {
2334                            load: wgpu::LoadOp::Load,
2335                            store: wgpu::StoreOp::Store,
2336                        },
2337                    })],
2338                    depth_stencil_attachment: None,
2339                    timestamp_writes: None,
2340                    occlusion_query_set: None,
2341                    multiview_mask: None,
2342                })
2343                // egui-wgpu 0.29 requires a `'static` pass (see its docs).
2344                .forget_lifetime();
2345            egui_rend.render(&mut pass, jobs, &screen);
2346        }
2347        for id in &textures.free {
2348            egui_rend.free_texture(id);
2349        }
2350        self.queue.submit(
2351            user_bufs
2352                .into_iter()
2353                .chain(std::iter::once(encoder.finish())),
2354        );
2355        surf_tex.present();
2356    }
2357
2358    fn build_scene_dda(
2359        &self,
2360        width: u32,
2361        height: u32,
2362        surface_format: wgpu::TextureFormat,
2363    ) -> SceneDdaResources {
2364        // Framebuffer as a packed-`rgba8unorm` storage buffer (1 u32 per
2365        // pixel, row stride = `width`). See the struct-field note.
2366        let framebuffer = self.device.create_buffer(&wgpu::BufferDescriptor {
2367            label: Some("roxlap-gpu scene_dda.framebuffer"),
2368            size: u64::from(width) * u64::from(height) * 4,
2369            usage: wgpu::BufferUsages::STORAGE,
2370            mapped_at_creation: false,
2371        });
2372        // Screen size for the blit's pixel→index math (`vec2<u32>`).
2373        let blit_dims = self.device.create_buffer(&wgpu::BufferDescriptor {
2374            label: Some("roxlap-gpu scene_dda.blit_dims"),
2375            size: 8,
2376            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
2377            mapped_at_creation: false,
2378        });
2379        self.queue
2380            .write_buffer(&blit_dims, 0, bytemuck::bytes_of(&[width, height]));
2381
2382        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
2383            label: Some("roxlap-gpu scene_dda.uniform"),
2384            size: std::mem::size_of::<SceneDdaUniform>() as u64,
2385            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
2386            mapped_at_creation: false,
2387        });
2388
2389        // GPU.9 — per-pixel world-t depth (f32 bits as u32). Sized to
2390        // the storage texture; written by the scene pass when sprites
2391        // are active, read+tested by the sprite splatter.
2392        let depth_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
2393            label: Some("roxlap-gpu scene_dda.depth"),
2394            size: u64::from(width) * u64::from(height) * 4,
2395            // COPY_SRC so `read_depth_pixel` can stage it for picking.
2396            usage: wgpu::BufferUsages::STORAGE
2397                | wgpu::BufferUsages::COPY_DST
2398                | wgpu::BufferUsages::COPY_SRC,
2399            mapped_at_creation: false,
2400        });
2401        let depth_readback = self.device.create_buffer(&wgpu::BufferDescriptor {
2402            label: Some("roxlap-gpu scene_dda.depth_readback"),
2403            size: u64::from(width) * u64::from(height) * 4,
2404            usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
2405            mapped_at_creation: false,
2406        });
2407        let dda_shader = self
2408            .device
2409            .create_shader_module(wgpu::ShaderModuleDescriptor {
2410                label: Some("scene_dda.wgsl"),
2411                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/scene_dda.wgsl").into()),
2412            });
2413        let bgl_dda = self
2414            .device
2415            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
2416                label: Some("roxlap-gpu scene_dda.bgl"),
2417                entries: &[
2418                    bgl_uniform_entry(0),
2419                    bgl_storage_entry(1, true),
2420                    bgl_storage_entry(2, true),
2421                    bgl_storage_entry(3, true),
2422                    bgl_storage_entry(4, true),
2423                    bgl_storage_entry(5, true),
2424                    bgl_storage_entry(6, true),
2425                    bgl_storage_entry(7, true),
2426                    // Framebuffer storage buffer (read-write; the scene +
2427                    // sprite passes write packed pixels into it).
2428                    bgl_storage_entry(8, false),
2429                    // GPU.8 sky panorama + sampler.
2430                    wgpu::BindGroupLayoutEntry {
2431                        binding: 9,
2432                        visibility: wgpu::ShaderStages::COMPUTE,
2433                        ty: wgpu::BindingType::Texture {
2434                            sample_type: wgpu::TextureSampleType::Float { filterable: true },
2435                            view_dimension: wgpu::TextureViewDimension::D2,
2436                            multisampled: false,
2437                        },
2438                        count: None,
2439                    },
2440                    wgpu::BindGroupLayoutEntry {
2441                        binding: 10,
2442                        visibility: wgpu::ShaderStages::COMPUTE,
2443                        ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
2444                        count: None,
2445                    },
2446                    // GPU.9 — read-write per-pixel depth buffer.
2447                    bgl_storage_entry(11, false),
2448                    // Occupancy pages 1..MAX_OCC_PAGES (page 0 is
2449                    // binding 1). Unused pages bind a dummy buffer.
2450                    bgl_storage_entry(12, true),
2451                    bgl_storage_entry(13, true),
2452                    bgl_storage_entry(14, true),
2453                ],
2454            });
2455        let dda_pl = self
2456            .device
2457            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
2458                label: Some("roxlap-gpu scene_dda.layout"),
2459                bind_group_layouts: &[Some(&bgl_dda)],
2460                immediate_size: 0,
2461            });
2462        let pipeline_dda = self
2463            .device
2464            .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
2465                label: Some("roxlap-gpu scene_dda.pipeline"),
2466                layout: Some(&dda_pl),
2467                module: &dda_shader,
2468                entry_point: Some("render_scene"),
2469                compilation_options: wgpu::PipelineCompilationOptions::default(),
2470                cache: None,
2471            });
2472
2473        let blit_shader = self
2474            .device
2475            .create_shader_module(wgpu::ShaderModuleDescriptor {
2476                label: Some("scene_blit.wgsl"),
2477                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/scene_blit.wgsl").into()),
2478            });
2479        let bgl_blit = self
2480            .device
2481            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
2482                label: Some("roxlap-gpu scene_dda.blit_bgl"),
2483                entries: &[
2484                    // Framebuffer storage buffer (read-only in the blit).
2485                    wgpu::BindGroupLayoutEntry {
2486                        binding: 0,
2487                        visibility: wgpu::ShaderStages::FRAGMENT,
2488                        ty: wgpu::BindingType::Buffer {
2489                            ty: wgpu::BufferBindingType::Storage { read_only: true },
2490                            has_dynamic_offset: false,
2491                            min_binding_size: None,
2492                        },
2493                        count: None,
2494                    },
2495                    // Screen-size uniform for the pixel→index math.
2496                    wgpu::BindGroupLayoutEntry {
2497                        binding: 1,
2498                        visibility: wgpu::ShaderStages::FRAGMENT,
2499                        ty: wgpu::BindingType::Buffer {
2500                            ty: wgpu::BufferBindingType::Uniform,
2501                            has_dynamic_offset: false,
2502                            min_binding_size: None,
2503                        },
2504                        count: None,
2505                    },
2506                ],
2507            });
2508        let blit_pl = self
2509            .device
2510            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
2511                label: Some("roxlap-gpu scene_dda.blit_layout"),
2512                bind_group_layouts: &[Some(&bgl_blit)],
2513                immediate_size: 0,
2514            });
2515        let pipeline_blit = self
2516            .device
2517            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
2518                label: Some("roxlap-gpu scene_dda.blit_pipeline"),
2519                layout: Some(&blit_pl),
2520                vertex: wgpu::VertexState {
2521                    module: &blit_shader,
2522                    entry_point: Some("vs_main"),
2523                    compilation_options: wgpu::PipelineCompilationOptions::default(),
2524                    buffers: &[],
2525                },
2526                fragment: Some(wgpu::FragmentState {
2527                    module: &blit_shader,
2528                    entry_point: Some("fs_main"),
2529                    compilation_options: wgpu::PipelineCompilationOptions::default(),
2530                    targets: &[Some(wgpu::ColorTargetState {
2531                        format: surface_format,
2532                        blend: None,
2533                        write_mask: wgpu::ColorWrites::ALL,
2534                    })],
2535                }),
2536                primitive: wgpu::PrimitiveState::default(),
2537                depth_stencil: None,
2538                multisample: wgpu::MultisampleState::default(),
2539                multiview_mask: None,
2540                cache: None,
2541            });
2542        let blit_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
2543            label: Some("roxlap-gpu scene_dda.blit_bg"),
2544            layout: &bgl_blit,
2545            entries: &[
2546                wgpu::BindGroupEntry {
2547                    binding: 0,
2548                    resource: framebuffer.as_entire_binding(),
2549                },
2550                wgpu::BindGroupEntry {
2551                    binding: 1,
2552                    resource: blit_dims.as_entire_binding(),
2553                },
2554            ],
2555        });
2556
2557        SceneDdaResources {
2558            storage_size: (width, height),
2559            framebuffer,
2560            uniform_buf,
2561            bgl_dda,
2562            pipeline_dda,
2563            blit_bg,
2564            pipeline_blit,
2565            depth_buffer,
2566            depth_readback,
2567        }
2568    }
2569
2570    /// Read back the per-pixel world-t depth at window pixel `(x, y)`
2571    /// from the last rendered frame, for screen→world picking. Returns
2572    /// the distance `t` along the (normalised) view ray to the nearest
2573    /// scene-grid surface, so the host reconstructs the world hit as
2574    /// `cam.pos + t * normalize(ray_dir)`. `None` for out-of-bounds
2575    /// pixels, sky / no-hit (the `T_INF` sentinel), or when no scene
2576    /// frame has been rendered.
2577    ///
2578    /// The depth buffer is the SCENE pass's output (terrain + grids),
2579    /// untouched by the sprite pass (which reads it read-only), so a
2580    /// cursor sprite under the pointer does not occlude the pick.
2581    ///
2582    /// Synchronous: copies the depth buffer to a mapped staging buffer
2583    /// and blocks on `device.poll(Wait)`. Cheap enough for click-time
2584    /// picks; do not call it every frame.
2585    ///
2586    /// Requires the last frame to have written depth, which happens
2587    /// when sprites are present (`write_depth`). The pick demo always
2588    /// has a cursor sprite, so this holds.
2589    ///
2590    /// Compiles on wasm, but the wasm facade never calls it: WebGPU's
2591    /// `device.poll` doesn't block for the GPU, so the blocking
2592    /// `recv()` here would hang the single browser thread. Picking is
2593    /// deferred on the wasm GPU path (the facade returns `None`).
2594    #[must_use]
2595    pub fn read_depth_pixel(&self, x: u32, y: u32) -> Option<f32> {
2596        let dda = self.scene_dda.as_ref()?;
2597        let (w, h) = dda.storage_size;
2598        if x >= w || y >= h {
2599            return None;
2600        }
2601        let mut enc = self
2602            .device
2603            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
2604                label: Some("roxlap-gpu depth readback"),
2605            });
2606        let size = u64::from(w) * u64::from(h) * 4;
2607        enc.copy_buffer_to_buffer(&dda.depth_buffer, 0, &dda.depth_readback, 0, size);
2608        self.queue.submit(std::iter::once(enc.finish()));
2609
2610        let slice = dda.depth_readback.slice(..);
2611        let (tx, rx) = std::sync::mpsc::channel();
2612        slice.map_async(wgpu::MapMode::Read, move |r| {
2613            let _ = tx.send(r);
2614        });
2615        self.device.poll(wgpu::PollType::wait_indefinitely()).ok();
2616        rx.recv().ok()?.ok()?;
2617
2618        let t = {
2619            let data = slice.get_mapped_range();
2620            let idx = ((y * w + x) * 4) as usize;
2621            let bytes: [u8; 4] = data[idx..idx + 4].try_into().ok()?;
2622            f32::from_le_bytes(bytes)
2623        };
2624        dda.depth_readback.unmap();
2625
2626        // Reject sky / no-hit (T_INF == 1e30 in the shader) + non-finite.
2627        if !t.is_finite() || t >= 1.0e29 {
2628            return None;
2629        }
2630        Some(t)
2631    }
2632
2633    /// World-space view-ray direction (un-normalised) for window pixel
2634    /// `(x, y)`, under the GPU marcher's projection — the canonical GPU
2635    /// unproject, mirroring `scene_dda.wgsl`'s `render_scene`
2636    /// (vertical-FOV pinhole). Uses the last-rendered frame's target
2637    /// size + FOV; `None` before the first scene render. Pair with
2638    /// [`Self::read_depth_pixel`] for screen→world picking.
2639    #[must_use]
2640    pub fn pixel_ray(
2641        &self,
2642        right: [f64; 3],
2643        down: [f64; 3],
2644        forward: [f64; 3],
2645        x: f64,
2646        y: f64,
2647    ) -> Option<[f64; 3]> {
2648        let dda = self.scene_dda.as_ref()?;
2649        let (w, h) = dda.storage_size;
2650        if w == 0 || h == 0 || self.last_fov_y_rad <= 0.0 {
2651            return None;
2652        }
2653        Some(pinhole_pixel_ray(
2654            right,
2655            down,
2656            forward,
2657            x,
2658            y,
2659            f64::from(w),
2660            f64::from(h),
2661            f64::from(self.last_fov_y_rad),
2662        ))
2663    }
2664
2665    /// GPU.10.1 — upload a sprite model registry + its instances for
2666    /// the DDA path. An empty instance slice clears all sprites.
2667    pub fn set_sprite_instances(
2668        &mut self,
2669        registry: &sprite_model::SpriteModelRegistry,
2670        instances: &[sprite_model::SpriteInstance],
2671    ) {
2672        if instances.is_empty() {
2673            self.sprite_registry = None;
2674            return;
2675        }
2676        self.sprite_registry = Some(sprite_model::SpriteRegistryResident::upload(
2677            &self.device,
2678            registry,
2679            instances,
2680        ));
2681    }
2682
2683    /// Re-pose the already-resident sprite instances in place (no model
2684    /// volume re-upload) — the cheap per-frame path for animated KFA
2685    /// limbs. `instances` must match the last [`Self::set_sprite_instances`]
2686    /// in length + order. No-op if no sprite registry is resident.
2687    pub fn update_sprite_instance_transforms(
2688        &mut self,
2689        instances: &[sprite_model::SpriteInstance],
2690    ) {
2691        if let Some(reg) = self.sprite_registry.as_mut() {
2692            reg.update_transforms(instances);
2693        }
2694    }
2695
2696    /// GPU.12 incremental — re-upload only LOD chain `chain_id`'s entries
2697    /// after an in-place edit of `registry` (carve / recolour), without
2698    /// rebuilding the whole sprite registry. `registry` must be the one
2699    /// last passed to [`Self::set_sprite_instances`] with chain
2700    /// `chain_id` already edited. No-op if no registry is resident.
2701    pub fn update_sprite_model(
2702        &mut self,
2703        registry: &sprite_model::SpriteModelRegistry,
2704        chain_id: u32,
2705    ) {
2706        if let Some(reg) = self.sprite_registry.as_mut() {
2707            reg.update_model(&self.device, &self.queue, registry, chain_id);
2708        }
2709    }
2710
2711    /// Set the per-instance `kv6colmul[256]` lighting tables (voxlap's
2712    /// `update_reflects` output, e.g. via `roxlap_core::sprite::
2713    /// sprite_colmul`), in the same order/length as the last
2714    /// [`Self::set_sprite_instances`]. The GPU sprite pass modulates each
2715    /// voxel by its surface normal's entry — matching the CPU rasteriser.
2716    /// No-op if no sprite registry is resident.
2717    pub fn set_sprite_instance_colmul(&mut self, tables: &[[u64; 256]]) {
2718        if let Some(reg) = self.sprite_registry.as_mut() {
2719            reg.set_instance_colmul(tables);
2720        }
2721    }
2722
2723    /// GPU.10.4 — set the LOD pixel threshold: a sprite steps to the
2724    /// next mip once a mip-0 voxel would project below `px` screen
2725    /// pixels. `1.0` is the natural "no sub-pixel voxels" default;
2726    /// larger values force LOD in closer (useful for inspection).
2727    /// Clamped to ≥ 0.25.
2728    pub fn set_sprite_lod_px(&mut self, px: f32) {
2729        self.sprite_lod_px = px.max(0.25);
2730    }
2731
2732    /// GPU.11.1 — set the scene-grid LOD scan distance (world units).
2733    /// A chunk entered at world-t `t` is marched at mip
2734    /// `floor(log2(max(t, msd) / msd))`, clamped to its grid's mip
2735    /// ladder. `0` disables LOD (always mip-0). Larger values push
2736    /// the coarser mips farther out — the axis-aligned-mip-beams
2737    /// mitigation lever (GPU.11.2). Default 64 (matches CPU
2738    /// `mip_scan_dist`).
2739    pub fn set_scene_mip_scan_dist(&mut self, dist: f32) {
2740        self.scene_mip_scan_dist = dist.max(0.0);
2741    }
2742
2743    /// Set per-face grid side-shading — voxlap's
2744    /// `setsideshades(top, bot, left, right, up, down)`. Each value is
2745    /// subtracted (as a u8, matching the CPU `gcsub` high byte) from a
2746    /// hit voxel's brightness byte before shading, so the scene-DDA pass
2747    /// darkens grid faces the same way the CPU rasteriser does. `[0; 6]`
2748    /// disables it (the default). The hit face is taken from the DDA's
2749    /// last-stepped axis + ray direction.
2750    pub fn set_scene_side_shades(&mut self, s: [i8; 6]) {
2751        // Reinterpret each i8 as u8 (voxlap stamps `sxx` into gcsub's
2752        // high byte verbatim), then pack (top, bot, left, right) /
2753        // (up, down, 0, 0) for the two uniform vec4s.
2754        let v = |i: usize| i32::from(s[i] as u8);
2755        self.scene_side_shades = [[v(0), v(1), v(2), v(3)], [v(4), v(5), 0, 0]];
2756    }
2757
2758    /// GPU.10.1 — build the instanced model-DDA pipeline (one thread
2759    /// per pixel). Lazily invoked the first frame a registry is present.
2760    fn build_sprite_model_dda(&self) -> SpriteModelDdaResources {
2761        let shader = self
2762            .device
2763            .create_shader_module(wgpu::ShaderModuleDescriptor {
2764                label: Some("sprite_model_dda.wgsl"),
2765                source: wgpu::ShaderSource::Wgsl(
2766                    include_str!("../shaders/sprite_model_dda.wgsl").into(),
2767                ),
2768            });
2769        let bgl = self
2770            .device
2771            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
2772                label: Some("roxlap-gpu sprite_model_dda.bgl"),
2773                entries: &[
2774                    bgl_uniform_entry(0),
2775                    bgl_storage_entry(1, true),  // occupancy
2776                    bgl_storage_entry(2, true),  // colors
2777                    bgl_storage_entry(3, true),  // color_offsets
2778                    bgl_storage_entry(4, true),  // model_meta
2779                    bgl_storage_entry(5, true),  // instances
2780                    bgl_storage_entry(6, true),  // scene depth
2781                    bgl_storage_entry(7, false), // framebuffer (read-write buffer)
2782                    bgl_storage_entry(8, true),  // tile_ranges
2783                    bgl_storage_entry(9, true),  // tile_instances
2784                    bgl_storage_entry(10, true), // per-voxel dir
2785                    bgl_storage_entry(11, true), // per-instance kv6colmul
2786                ],
2787            });
2788        let pl = self
2789            .device
2790            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
2791                label: Some("roxlap-gpu sprite_model_dda.layout"),
2792                bind_group_layouts: &[Some(&bgl)],
2793                immediate_size: 0,
2794            });
2795        let pipeline = self
2796            .device
2797            .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
2798                label: Some("roxlap-gpu sprite_model_dda.pipeline"),
2799                layout: Some(&pl),
2800                module: &shader,
2801                entry_point: Some("march"),
2802                compilation_options: wgpu::PipelineCompilationOptions::default(),
2803                cache: None,
2804            });
2805        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
2806            label: Some("roxlap-gpu sprite_model_dda.uniform"),
2807            size: std::mem::size_of::<SpriteModelUniform>() as u64,
2808            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
2809            mapped_at_creation: false,
2810        });
2811        SpriteModelDdaResources {
2812            bgl,
2813            pipeline,
2814            uniform_buf,
2815        }
2816    }
2817}
2818
2819/// GPU.11 — headless scene-DDA renderer for tests + offline visual
2820/// gates. Owns the `scene_dda.wgsl` compute pipeline with no surface
2821/// and no blit pass; renders a [`GpuSceneResident`] to an in-memory
2822/// RGBA framebuffer via texture readback. The per-substage visual
2823/// gate (render reference scenes, diff PPMs) and the GPU.11.1 mip
2824/// render-diff both ride on this.
2825pub struct HeadlessSceneRenderer {
2826    width: u32,
2827    height: u32,
2828    /// Framebuffer storage buffer (packed `rgba8unorm`, tight rows) —
2829    /// matches the buffer-output `scene_dda.wgsl` (see its note).
2830    framebuffer: wgpu::Buffer,
2831    depth_buffer: wgpu::Buffer,
2832    uniform_buf: wgpu::Buffer,
2833    _sky_texture: wgpu::Texture,
2834    sky_view: wgpu::TextureView,
2835    sky_sampler: wgpu::Sampler,
2836    bgl: wgpu::BindGroupLayout,
2837    pipeline: wgpu::ComputePipeline,
2838    readback: wgpu::Buffer,
2839    /// Per-face side-shades for the gate render (default none). Packed
2840    /// `[(top,bot,left,right), (up,down,_,_)]`; set via
2841    /// [`Self::set_side_shades`].
2842    side_shades: [[i32; 4]; 2],
2843}
2844
2845impl HeadlessSceneRenderer {
2846    /// Build the compute pipeline + output/readback resources for a
2847    /// `width × height` framebuffer. Validates `scene_dda.wgsl` and
2848    /// the [`scene::GridStaticMeta`] std430 layout at pipeline /
2849    /// bind-group time.
2850    #[must_use]
2851    pub fn new(device: &wgpu::Device, queue: &wgpu::Queue, width: u32, height: u32) -> Self {
2852        let framebuffer = device.create_buffer(&wgpu::BufferDescriptor {
2853            label: Some("roxlap-gpu headless.framebuffer"),
2854            size: u64::from(width) * u64::from(height) * 4,
2855            usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
2856            mapped_at_creation: false,
2857        });
2858
2859        let uniform_buf = device.create_buffer(&wgpu::BufferDescriptor {
2860            label: Some("roxlap-gpu headless.uniform"),
2861            size: std::mem::size_of::<SceneDdaUniform>() as u64,
2862            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
2863            mapped_at_creation: false,
2864        });
2865        let depth_buffer = device.create_buffer(&wgpu::BufferDescriptor {
2866            label: Some("roxlap-gpu headless.depth"),
2867            size: u64::from(width) * u64::from(height) * 4,
2868            usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
2869            mapped_at_creation: false,
2870        });
2871
2872        let default_sky_pixel = [120u8, 150, 220, 255];
2873        let (sky_texture, sky_view) = create_sky_texture(device, 1, 1, &default_sky_pixel);
2874        // Upload the default sky texel (create_sky_texture only allocates
2875        // — the texel must be written or the shader samples black, which
2876        // is why a grid-less headless render came back black).
2877        queue.write_texture(
2878            wgpu::TexelCopyTextureInfo {
2879                texture: &sky_texture,
2880                mip_level: 0,
2881                origin: wgpu::Origin3d::ZERO,
2882                aspect: wgpu::TextureAspect::All,
2883            },
2884            &default_sky_pixel,
2885            wgpu::TexelCopyBufferLayout {
2886                offset: 0,
2887                bytes_per_row: Some(4),
2888                rows_per_image: Some(1),
2889            },
2890            wgpu::Extent3d {
2891                width: 1,
2892                height: 1,
2893                depth_or_array_layers: 1,
2894            },
2895        );
2896        let sky_sampler = device.create_sampler(&wgpu::SamplerDescriptor {
2897            label: Some("roxlap-gpu headless.sky_sampler"),
2898            address_mode_u: wgpu::AddressMode::Repeat,
2899            address_mode_v: wgpu::AddressMode::Repeat,
2900            mag_filter: wgpu::FilterMode::Linear,
2901            min_filter: wgpu::FilterMode::Linear,
2902            ..Default::default()
2903        });
2904
2905        let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
2906            label: Some("scene_dda.wgsl (headless)"),
2907            source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/scene_dda.wgsl").into()),
2908        });
2909        let bgl = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
2910            label: Some("roxlap-gpu headless.bgl"),
2911            entries: &[
2912                bgl_uniform_entry(0),
2913                bgl_storage_entry(1, true),
2914                bgl_storage_entry(2, true),
2915                bgl_storage_entry(3, true),
2916                bgl_storage_entry(4, true),
2917                bgl_storage_entry(5, true),
2918                bgl_storage_entry(6, true),
2919                bgl_storage_entry(7, true),
2920                // Framebuffer storage buffer (read-write).
2921                bgl_storage_entry(8, false),
2922                wgpu::BindGroupLayoutEntry {
2923                    binding: 9,
2924                    visibility: wgpu::ShaderStages::COMPUTE,
2925                    ty: wgpu::BindingType::Texture {
2926                        sample_type: wgpu::TextureSampleType::Float { filterable: true },
2927                        view_dimension: wgpu::TextureViewDimension::D2,
2928                        multisampled: false,
2929                    },
2930                    count: None,
2931                },
2932                wgpu::BindGroupLayoutEntry {
2933                    binding: 10,
2934                    visibility: wgpu::ShaderStages::COMPUTE,
2935                    ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
2936                    count: None,
2937                },
2938                bgl_storage_entry(11, false),
2939                bgl_storage_entry(12, true),
2940                bgl_storage_entry(13, true),
2941                bgl_storage_entry(14, true),
2942            ],
2943        });
2944        let pl = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
2945            label: Some("roxlap-gpu headless.layout"),
2946            bind_group_layouts: &[Some(&bgl)],
2947            immediate_size: 0,
2948        });
2949        let pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
2950            label: Some("roxlap-gpu headless.pipeline"),
2951            layout: Some(&pl),
2952            module: &shader,
2953            entry_point: Some("render_scene"),
2954            compilation_options: wgpu::PipelineCompilationOptions::default(),
2955            cache: None,
2956        });
2957
2958        // Readback is a tight buffer-to-buffer copy (no 256-byte row
2959        // padding, unlike the old texture-to-buffer path).
2960        let readback = device.create_buffer(&wgpu::BufferDescriptor {
2961            label: Some("roxlap-gpu headless.readback"),
2962            size: u64::from(width) * u64::from(height) * 4,
2963            usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
2964            mapped_at_creation: false,
2965        });
2966
2967        Self {
2968            width,
2969            height,
2970            framebuffer,
2971            depth_buffer,
2972            uniform_buf,
2973            _sky_texture: sky_texture,
2974            sky_view,
2975            sky_sampler,
2976            bgl,
2977            pipeline,
2978            readback,
2979            side_shades: [[0; 4]; 2],
2980        }
2981    }
2982
2983    /// Set per-face side-shades for subsequent [`Self::render`] calls —
2984    /// voxlap `setsideshades(top, bot, left, right, up, down)`, each an
2985    /// i8 stamped as u8 (matching the engine path). Lets the gate test
2986    /// the GPU side-shade darkening.
2987    pub fn set_side_shades(&mut self, s: [i8; 6]) {
2988        let v = |i: usize| i32::from(s[i] as u8);
2989        self.side_shades = [[v(0), v(1), v(2), v(3)], [v(4), v(5), 0, 0]];
2990    }
2991
2992    /// Render `scene` from `cameras` (one per grid) and read the
2993    /// framebuffer back as `width*height` packed `0xAABBGGRR` pixels
2994    /// (R in the low byte). Fog is disabled. `mip_scan_dist` drives
2995    /// the GPU.11.1 scene-grid LOD (`0` = always mip-0). Blocks on
2996    /// readback.
2997    ///
2998    /// # Panics
2999    /// If `cameras.len() != scene.grid_count`.
3000    #[must_use]
3001    #[allow(clippy::too_many_arguments)]
3002    pub fn render(
3003        &self,
3004        device: &wgpu::Device,
3005        queue: &wgpu::Queue,
3006        scene: &GpuSceneResident,
3007        cameras: &[Camera],
3008        fov_y_rad: f32,
3009        max_outer_steps: u32,
3010        mip_scan_dist: f32,
3011    ) -> Vec<u32> {
3012        assert_eq!(
3013            cameras.len(),
3014            scene.grid_count as usize,
3015            "headless render: {} cameras for {} grids",
3016            cameras.len(),
3017            scene.grid_count,
3018        );
3019
3020        let mut cam_array = [SceneDdaPerGridCamera::zeroed(); SCENE_MAX_GRIDS];
3021        for (i, cam) in cameras.iter().enumerate() {
3022            cam_array[i] = SceneDdaPerGridCamera {
3023                pos: cam.position,
3024                _pad0: 0.0,
3025                right: cam.right,
3026                _pad1: 0.0,
3027                down: cam.down,
3028                _pad2: 0.0,
3029                forward: cam.forward,
3030                _pad3: 0.0,
3031            };
3032        }
3033        let uniform = SceneDdaUniform {
3034            fov_y_rad,
3035            grid_count: scene.grid_count,
3036            max_outer_steps,
3037            _pad0: 0,
3038            screen_size: [self.width, self.height],
3039            _pad1: [0; 2],
3040            cameras: cam_array,
3041            // Fog off: near/far past any reachable t → factor 0.
3042            fog_color: [0.0, 0.0, 0.0, 1.0e29],
3043            fog_far: 1.0e30,
3044            write_depth: 0,
3045            occ_page_words: scene.occupancy_page_words,
3046            occ_num_pages: scene.occupancy_num_pages,
3047            mip_scan_dist,
3048            _pad2: 0,
3049            _pad3: 0,
3050            _pad4: 0,
3051            // Sky direction from the first grid camera (the world frame
3052            // in these tests); a default forward camera when there are
3053            // none (grid_count == 0) so the sky lookup stays valid.
3054            sky_cam: SceneDdaPerGridCamera::from_camera(&cameras.first().copied().unwrap_or(
3055                Camera {
3056                    position: [0.0; 3],
3057                    right: [1.0, 0.0, 0.0],
3058                    down: [0.0, 0.0, 1.0],
3059                    forward: [0.0, 1.0, 0.0],
3060                    fov_y_rad,
3061                },
3062            )),
3063            side_shades0: self.side_shades[0],
3064            side_shades1: self.side_shades[1],
3065        };
3066        queue.write_buffer(&self.uniform_buf, 0, bytemuck::bytes_of(&uniform));
3067
3068        let bg = device.create_bind_group(&wgpu::BindGroupDescriptor {
3069            label: Some("roxlap-gpu headless.bg"),
3070            layout: &self.bgl,
3071            entries: &[
3072                wgpu::BindGroupEntry {
3073                    binding: 0,
3074                    resource: self.uniform_buf.as_entire_binding(),
3075                },
3076                wgpu::BindGroupEntry {
3077                    binding: 1,
3078                    resource: scene.occupancy_pages[0].as_entire_binding(),
3079                },
3080                wgpu::BindGroupEntry {
3081                    binding: 2,
3082                    resource: scene.all_color_offsets.as_entire_binding(),
3083                },
3084                wgpu::BindGroupEntry {
3085                    binding: 3,
3086                    resource: scene.all_colors.as_entire_binding(),
3087                },
3088                wgpu::BindGroupEntry {
3089                    binding: 4,
3090                    resource: scene.all_chunk_colors_base.as_entire_binding(),
3091                },
3092                wgpu::BindGroupEntry {
3093                    binding: 5,
3094                    resource: scene.all_chunk_occupancy.as_entire_binding(),
3095                },
3096                wgpu::BindGroupEntry {
3097                    binding: 6,
3098                    resource: scene.grid_static_meta.as_entire_binding(),
3099                },
3100                wgpu::BindGroupEntry {
3101                    binding: 7,
3102                    resource: scene.all_slot_chunk_idx.as_entire_binding(),
3103                },
3104                wgpu::BindGroupEntry {
3105                    binding: 8,
3106                    resource: self.framebuffer.as_entire_binding(),
3107                },
3108                wgpu::BindGroupEntry {
3109                    binding: 9,
3110                    resource: wgpu::BindingResource::TextureView(&self.sky_view),
3111                },
3112                wgpu::BindGroupEntry {
3113                    binding: 10,
3114                    resource: wgpu::BindingResource::Sampler(&self.sky_sampler),
3115                },
3116                wgpu::BindGroupEntry {
3117                    binding: 11,
3118                    resource: self.depth_buffer.as_entire_binding(),
3119                },
3120                wgpu::BindGroupEntry {
3121                    binding: 12,
3122                    resource: scene.occupancy_pages[1].as_entire_binding(),
3123                },
3124                wgpu::BindGroupEntry {
3125                    binding: 13,
3126                    resource: scene.occupancy_pages[2].as_entire_binding(),
3127                },
3128                wgpu::BindGroupEntry {
3129                    binding: 14,
3130                    resource: scene.occupancy_pages[3].as_entire_binding(),
3131                },
3132            ],
3133        });
3134
3135        let mut enc =
3136            device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
3137        {
3138            let mut pass = enc.begin_compute_pass(&wgpu::ComputePassDescriptor {
3139                label: Some("roxlap-gpu headless.pass"),
3140                timestamp_writes: None,
3141            });
3142            pass.set_pipeline(&self.pipeline);
3143            pass.set_bind_group(0, &bg, &[]);
3144            pass.dispatch_workgroups(self.width.div_ceil(8), self.height.div_ceil(8), 1);
3145        }
3146        enc.copy_buffer_to_buffer(
3147            &self.framebuffer,
3148            0,
3149            &self.readback,
3150            0,
3151            u64::from(self.width) * u64::from(self.height) * 4,
3152        );
3153        queue.submit(Some(enc.finish()));
3154
3155        let slice = self.readback.slice(..);
3156        let (tx, rx) = std::sync::mpsc::channel();
3157        slice.map_async(wgpu::MapMode::Read, move |r| {
3158            let _ = tx.send(r);
3159        });
3160        device.poll(wgpu::PollType::wait_indefinitely()).ok();
3161        rx.recv().expect("map_async channel").expect("map_async");
3162
3163        let data = slice.get_mapped_range();
3164        // Tight `width*height` packed pixels — the shader's
3165        // `pack4x8unorm(vec4(r,g,b,a))` already yields `0xAABBGGRR`
3166        // little-endian, so a straight u32 read reconstructs each pixel.
3167        let out: Vec<u32> = data
3168            .chunks_exact(4)
3169            .map(|px| u32::from_le_bytes([px[0], px[1], px[2], px[3]]))
3170            .collect();
3171        drop(data);
3172        self.readback.unmap();
3173        out
3174    }
3175}
3176
3177fn bgl_uniform_entry(binding: u32) -> wgpu::BindGroupLayoutEntry {
3178    wgpu::BindGroupLayoutEntry {
3179        binding,
3180        visibility: wgpu::ShaderStages::COMPUTE,
3181        ty: wgpu::BindingType::Buffer {
3182            ty: wgpu::BufferBindingType::Uniform,
3183            has_dynamic_offset: false,
3184            min_binding_size: None,
3185        },
3186        count: None,
3187    }
3188}
3189
3190fn bgl_storage_entry(binding: u32, read_only: bool) -> wgpu::BindGroupLayoutEntry {
3191    wgpu::BindGroupLayoutEntry {
3192        binding,
3193        visibility: wgpu::ShaderStages::COMPUTE,
3194        ty: wgpu::BindingType::Buffer {
3195            ty: wgpu::BufferBindingType::Storage { read_only },
3196            has_dynamic_offset: false,
3197            min_binding_size: None,
3198        },
3199        count: None,
3200    }
3201}
3202
3203/// Create a fresh sky panorama texture sized `width × height` with
3204/// the initial pixel data uploaded via `write_texture`. Used by
3205/// `GpuRenderer::new` (1×1 default) and `set_sky_panorama` (host-
3206/// supplied panorama).
3207fn create_sky_texture(
3208    device: &wgpu::Device,
3209    width: u32,
3210    height: u32,
3211    _initial_pixels: &[u8],
3212) -> (wgpu::Texture, wgpu::TextureView) {
3213    let tex = device.create_texture(&wgpu::TextureDescriptor {
3214        label: Some("roxlap-gpu sky_texture"),
3215        size: wgpu::Extent3d {
3216            width,
3217            height,
3218            depth_or_array_layers: 1,
3219        },
3220        mip_level_count: 1,
3221        sample_count: 1,
3222        dimension: wgpu::TextureDimension::D2,
3223        format: wgpu::TextureFormat::Rgba8Unorm,
3224        usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST,
3225        view_formats: &[],
3226    });
3227    let view = tex.create_view(&wgpu::TextureViewDescriptor::default());
3228    (tex, view)
3229}
3230
3231/// GPU.4 needs to upload a whole grid (~hundreds of MiB) as a few
3232/// storage buffers. wgpu's default `max_storage_buffer_binding_size`
3233/// is 128 MiB, which is just enough for the demo's 32×32 ground
3234/// occupancy (~128 MiB) but not the colour array. We request as
3235/// much as the adapter is willing to give — most desktop GPUs cap
3236/// individual storage buffers at 2-4 GiB; iGPUs often offer the
3237/// full system memory.
3238pub(crate) fn pick_required_limits(adapter_limits: &wgpu::Limits) -> wgpu::Limits {
3239    wgpu::Limits {
3240        max_storage_buffer_binding_size: adapter_limits.max_storage_buffer_binding_size,
3241        max_buffer_size: adapter_limits.max_buffer_size,
3242        // Occupancy paging adds up to MAX_OCC_PAGES-1 extra storage
3243        // bindings; with the scene's other buffers + the GPU.9 depth
3244        // buffer the scene_dda stage needs ~11. The default cap is 8.
3245        // Both NVK and lavapipe advertise ≫16, so request 16.
3246        max_storage_buffers_per_shader_stage: adapter_limits
3247            .max_storage_buffers_per_shader_stage
3248            .min(16),
3249        ..wgpu::Limits::default()
3250    }
3251}
3252
3253fn pick_present_mode(modes: &[wgpu::PresentMode]) -> wgpu::PresentMode {
3254    // Prefer Mailbox > Immediate > Fifo. Fifo is the universal
3255    // fallback and the only one Wayland-on-Mesa always offers.
3256    for &m in &[wgpu::PresentMode::Mailbox, wgpu::PresentMode::Immediate] {
3257        if modes.contains(&m) {
3258            return m;
3259        }
3260    }
3261    wgpu::PresentMode::Fifo
3262}
3263
3264/// World-space view-ray direction (un-normalised) for window pixel
3265/// `(x, y)` under a vertical-FOV pinhole — the projection
3266/// `scene_dda.wgsl`'s `render_scene` uses. Shared by
3267/// [`GpuRenderer::pixel_ray`]; standalone so it's unit-testable without
3268/// a device. `right`/`down`/`forward` are the camera basis.
3269#[must_use]
3270#[allow(clippy::too_many_arguments)]
3271pub fn pinhole_pixel_ray(
3272    right: [f64; 3],
3273    down: [f64; 3],
3274    forward: [f64; 3],
3275    x: f64,
3276    y: f64,
3277    w: f64,
3278    h: f64,
3279    fov_y_rad: f64,
3280) -> [f64; 3] {
3281    let half_h = (fov_y_rad * 0.5).tan();
3282    let half_w = half_h * (w / h);
3283    let ndc_x = (x + 0.5) / w * 2.0 - 1.0;
3284    let ndc_y_top = 1.0 - (y + 0.5) / h * 2.0;
3285    let (kx, ky) = (ndc_x * half_w, ndc_y_top * half_h);
3286    [
3287        forward[0] + kx * right[0] - ky * down[0],
3288        forward[1] + kx * right[1] - ky * down[1],
3289        forward[2] + kx * right[2] - ky * down[2],
3290    ]
3291}
3292
3293#[cfg(test)]
3294mod pixel_ray_tests {
3295    use super::pinhole_pixel_ray;
3296
3297    const RIGHT: [f64; 3] = [1.0, 0.0, 0.0];
3298    const DOWN: [f64; 3] = [0.0, 1.0, 0.0];
3299    const FWD: [f64; 3] = [0.0, 0.0, 1.0]; // voxlap z-down "look down"
3300
3301    // Frame centre (NDC 0,0) points straight along `forward`.
3302    #[test]
3303    fn centre_pixel_is_forward() {
3304        let d = pinhole_pixel_ray(
3305            RIGHT,
3306            DOWN,
3307            FWD,
3308            639.5,
3309            359.5,
3310            1280.0,
3311            720.0,
3312            60_f64.to_radians(),
3313        );
3314        assert!(
3315            d[0].abs() < 1e-9 && d[1].abs() < 1e-9,
3316            "centre ≈ forward, got {d:?}"
3317        );
3318        assert!((d[2] - 1.0).abs() < 1e-9);
3319    }
3320
3321    // Right edge pixel tilts +right by tan(hfov/2); the lateral
3322    // component equals half_w = tan(fov_y/2)*aspect at the very edge.
3323    #[test]
3324    fn right_edge_tilts_by_half_w() {
3325        let fov = 60_f64.to_radians();
3326        let d = pinhole_pixel_ray(RIGHT, DOWN, FWD, 1279.5, 359.5, 1280.0, 720.0, fov);
3327        let half_w = (fov * 0.5).tan() * (1280.0 / 720.0);
3328        assert!((d[0] - half_w).abs() < 1e-6, "x={}, half_w={half_w}", d[0]);
3329        assert!(d[0] > 0.0, "right edge tilts +right");
3330    }
3331
3332    /// Statically validate every WGSL shader with naga (the same
3333    /// front-end + validator wgpu runs at pipeline creation), so shader
3334    /// edits — e.g. the GPU.10 sprite lighting bindings — are caught in
3335    /// CI without needing a GPU device.
3336    #[test]
3337    fn wgsl_shaders_validate() {
3338        let shaders: &[(&str, &str)] = &[
3339            (
3340                "sprite_model_dda.wgsl",
3341                include_str!("../shaders/sprite_model_dda.wgsl"),
3342            ),
3343            ("scene_dda.wgsl", include_str!("../shaders/scene_dda.wgsl")),
3344            ("blit.wgsl", include_str!("../shaders/blit.wgsl")),
3345            ("chunk_dda.wgsl", include_str!("../shaders/chunk_dda.wgsl")),
3346            ("grid_dda.wgsl", include_str!("../shaders/grid_dda.wgsl")),
3347            (
3348                "scene_blit.wgsl",
3349                include_str!("../shaders/scene_blit.wgsl"),
3350            ),
3351            ("line.wgsl", include_str!("../shaders/line.wgsl")),
3352        ];
3353        let mut validator = naga::valid::Validator::new(
3354            naga::valid::ValidationFlags::all(),
3355            naga::valid::Capabilities::all(),
3356        );
3357        for (name, src) in shaders {
3358            let module = naga::front::wgsl::parse_str(src).unwrap_or_else(|e| {
3359                panic!("{name}: WGSL parse failed:\n{}", e.emit_to_string(src))
3360            });
3361            validator
3362                .validate(&module)
3363                .unwrap_or_else(|e| panic!("{name}: WGSL validation failed: {e:?}"));
3364        }
3365    }
3366}