Skip to main content

roxlap_gpu/
lib.rs

1//! WGPU-backed compute-shader renderer scaffold for the roxlap
2//! voxel engine. GPU.1 in `PORTING-GPU.md`.
3//!
4//! GPU.1's job: stand up the device + surface + swapchain on a
5//! host window (any [`raw-window-handle`](raw_window_handle)
6//! provider), present a clear-to-colour frame each render call,
7//! and give the host a one-call opt-in. No voxel marching yet — the
8//! [`examples/probe.rs`](../examples/probe.rs) standalone holds
9//! the empirical FPS baseline from GPU.0.
10//!
11//! Later sub-substages flesh `GpuRenderer::render` out: GPU.2
12//! uploads voxel data, GPU.3 dispatches the inner-DDA compute
13//! shader, GPU.4 layers in chunk skipping, GPU.5 plugs the renderer
14//! into `roxlap-scene::Scene`, …
15//!
16//! ## Host integration shape (GPU.1)
17//!
18//! ```no_run
19//! use std::sync::Arc;
20//! use roxlap_gpu::{GpuRenderer, GpuRendererSettings};
21//! # use winit::window::Window;
22//! # fn pick(w: Arc<Window>, size: (u32, u32)) -> Option<GpuRenderer> {
23//! match GpuRenderer::new_blocking(w, size, GpuRendererSettings::default()) {
24//!     Ok(r) => Some(r),
25//!     Err(e) => {
26//!         eprintln!("GPU init failed: {e}; falling back to CPU");
27//!         None
28//!     }
29//! }
30//! # }
31//! ```
32
33#![allow(clippy::must_use_candidate, clippy::too_many_lines)]
34
35pub mod camera;
36pub mod decompress;
37pub mod grid;
38pub mod headless;
39pub mod resident;
40pub mod scene;
41pub mod sprite_model;
42
43pub use camera::Camera;
44pub use decompress::{decompress_chunk, ChunkUpload, BEDROCK_RGB, CHUNK_Z};
45pub use grid::{bounding_box_of, GpuGridResident, GridUpload};
46pub use headless::HeadlessGpu;
47pub use resident::GpuChunkResident;
48pub use scene::{
49    GpuSceneResident, GridRuntimeTransform, GridStaticMeta, RefreshOutcome, SceneUpload,
50    MAX_SCENE_GRIDS,
51};
52pub use sprite_model::{
53    build_sprite_model, SpriteInstance, SpriteInstanceTransform, SpriteModel, SpriteModelRegistry,
54    SpriteRegistryResident,
55};
56
57use std::sync::Arc;
58
59use bytemuck::{Pod, Zeroable};
60use raw_window_handle::{HasDisplayHandle, HasWindowHandle};
61
62/// Caller-controllable knobs for [`GpuRenderer::new`]. Defaults
63/// target "highest-performance GPU, prefer Mailbox/Immediate over
64/// vsync" — i.e. the same configuration the GPU.0 probe used to
65/// measure the FPS ceiling.
66#[derive(Debug, Clone, Copy)]
67pub struct GpuRendererSettings {
68    pub power_preference: PowerPreference,
69    /// Initial clear colour cycled by GPU.1's empty render path.
70    /// The voxel-rendering substages overwrite this entirely.
71    pub clear_colour: [f64; 3],
72    /// Prefer mailbox/immediate when offered; falls back to FIFO if
73    /// the surface only supports it (Wayland under Mesa often does).
74    pub uncapped_present: bool,
75}
76
77#[derive(Debug, Clone, Copy)]
78pub enum PowerPreference {
79    Low,
80    High,
81}
82
83impl Default for GpuRendererSettings {
84    fn default() -> Self {
85        Self {
86            power_preference: PowerPreference::High,
87            clear_colour: [0.06, 0.08, 0.12],
88            uncapped_present: true,
89        }
90    }
91}
92
93/// Errors `GpuRenderer::new` surfaces to the host. The host's
94/// expected flow is "try this, fall back to the CPU path on Err".
95#[derive(Debug)]
96pub enum GpuInitError {
97    CreateSurface(wgpu::CreateSurfaceError),
98    NoAdapter,
99    RequestDevice(wgpu::RequestDeviceError),
100}
101
102impl std::fmt::Display for GpuInitError {
103    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
104        match self {
105            Self::CreateSurface(e) => write!(f, "create_surface failed: {e}"),
106            Self::NoAdapter => write!(
107                f,
108                "no compatible adapter — does this system have a Vulkan/Metal/DX12 driver?"
109            ),
110            Self::RequestDevice(e) => write!(f, "request_device failed: {e}"),
111        }
112    }
113}
114
115impl std::error::Error for GpuInitError {
116    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
117        match self {
118            Self::CreateSurface(e) => Some(e),
119            Self::RequestDevice(e) => Some(e),
120            Self::NoAdapter => None,
121        }
122    }
123}
124
125impl From<wgpu::CreateSurfaceError> for GpuInitError {
126    fn from(value: wgpu::CreateSurfaceError) -> Self {
127        Self::CreateSurface(value)
128    }
129}
130
131impl From<wgpu::RequestDeviceError> for GpuInitError {
132    fn from(value: wgpu::RequestDeviceError) -> Self {
133        Self::RequestDevice(value)
134    }
135}
136
137/// WGPU-backed renderer. Owns the device, queue, and surface
138/// bound to the host's window. [`Self::render`] is the GPU.1
139/// clear-to-colour path; [`Self::render_chunk`] is GPU.3's
140/// single-chunk DDA marcher.
141///
142/// The window is consumed only at construction — `wgpu`'s
143/// `Surface<'static>` keeps its own `Arc` clone of the handle, so
144/// the renderer holds no window field of its own.
145pub struct GpuRenderer {
146    surface: wgpu::Surface<'static>,
147    surface_config: wgpu::SurfaceConfiguration,
148    device: wgpu::Device,
149    queue: wgpu::Queue,
150    adapter_info: String,
151    clear_colour: [f64; 3],
152    frame_count: u32,
153    /// Lazy-built on first [`Self::render_chunk`] call; rebuilt when
154    /// the swapchain resizes (storage texture must match).
155    chunk_dda: Option<ChunkDdaResources>,
156    /// Lazy-built on first [`Self::render_grid`] call; same resize
157    /// trigger as `chunk_dda`. The two paths share the same blit
158    /// pipeline structure but bind different storage layouts.
159    grid_dda: Option<GridDdaResources>,
160    /// Lazy-built on first [`Self::render_scene`] call. Holds the
161    /// multi-grid pipeline + per-grid camera uniforms.
162    scene_dda: Option<SceneDdaResources>,
163    /// GPU.8 — panoramic sky texture + sampler. Created at
164    /// `new` as a 1×1 mid-grey default; [`Self::set_sky_panorama`]
165    /// replaces it. The scene-DDA bind group references this each
166    /// frame.
167    sky_texture: wgpu::Texture,
168    sky_view: wgpu::TextureView,
169    sky_sampler: wgpu::Sampler,
170    /// GPU.8 fog state. `color` is BGRA-style premultiplied (each
171    /// channel in [0, 1]); `near` is the world-t distance at which
172    /// fog starts kicking in; `far` is the distance at which it's
173    /// fully opaque. The shader does
174    /// `mix(hit, fog, smoothstep(near, far, t))`.
175    fog_color: [f32; 3],
176    fog_near: f32,
177    fog_far: f32,
178    /// GPU.10 — sprites rendered as DDA-marched voxel models (the
179    /// precise path; the GPU.9 compute splatter it replaced was
180    /// retired in 10.5). Holds the concatenated model registry + the
181    /// per-frame instance array; set via [`Self::set_sprite_instances`].
182    sprite_registry: Option<sprite_model::SpriteRegistryResident>,
183    /// Lazy-built pipeline + uniform for the model-DDA pass.
184    sprite_model_dda: Option<SpriteModelDdaResources>,
185    /// GPU.10.4 — LOD aggressiveness: step a sprite to the next mip
186    /// once a mip-0 voxel projects below this many screen pixels.
187    /// Defaults to 4.0 (the empirical sweet spot); the host can tune
188    /// via [`Self::set_sprite_lod_px`].
189    sprite_lod_px: f32,
190    /// GPU.11.1 — scene-grid LOD scan distance (world units). A chunk
191    /// entered at world-t `t` is marched at the mip level
192    /// `floor(log2(max(t, msd) / msd))`, clamped to the grid's mip
193    /// ladder. `0` disables LOD (always mip-0). Tunable via
194    /// [`Self::set_scene_mip_scan_dist`] — the axis-aligned-mip-beams
195    /// mitigation (GPU.11.2) pushes it outward if banding appears.
196    scene_mip_scan_dist: f32,
197    /// Per-face grid side-shades (voxlap setsideshades), packed for the
198    /// scene-DDA uniform: `[0]=(top,bot,left,right)`, `[1]=(up,down,_,_)`.
199    /// Each is the u8 shade intensity. `[[0;4];2]` = no shading. Set via
200    /// [`Self::set_scene_side_shades`].
201    scene_side_shades: [[i32; 4]; 2],
202    /// Vertical FOV (radians) the last `render_scene` marched with —
203    /// cached so [`Self::pixel_ray`] reconstructs the matching view ray
204    /// for picking. `0` until the first scene render.
205    last_fov_y_rad: f32,
206    /// The acquired-but-not-yet-presented swapchain frame from the most
207    /// recent deferred render ([`Self::render_scene`] /
208    /// [`Self::render_clear_deferred`]). [`Self::present`] shows it as
209    /// is; [`Self::paint_egui`] overlays egui first. Lets a host slot a
210    /// UI pass between the marcher and present. `None` between present
211    /// and the next render.
212    pending_frame: Option<(wgpu::SurfaceTexture, wgpu::TextureView)>,
213    /// Lazy-built `egui-wgpu` paint pipeline; created on the first
214    /// [`Self::paint_egui`] call (`hud` feature).
215    #[cfg(feature = "hud")]
216    egui_renderer: Option<egui_wgpu::Renderer>,
217}
218
219/// Per-renderer chunk-DDA pipeline state. The compute shader writes
220/// into the storage texture; a fullscreen-triangle render pass
221/// nearest-neighbour blits it to the swapchain.
222struct ChunkDdaResources {
223    storage_size: (u32, u32),
224    storage_view: wgpu::TextureView,
225    uniform_buf: wgpu::Buffer,
226    bgl_dda: wgpu::BindGroupLayout,
227    pipeline_dda: wgpu::ComputePipeline,
228    blit_bg: wgpu::BindGroup,
229    pipeline_blit: wgpu::RenderPipeline,
230    // wgpu BindGroups internally Arc their resources, but we keep
231    // the handle so the sampler shows up in profiler dumps.
232    _sampler: wgpu::Sampler,
233}
234
235struct GridDdaResources {
236    storage_size: (u32, u32),
237    storage_view: wgpu::TextureView,
238    uniform_buf: wgpu::Buffer,
239    bgl_dda: wgpu::BindGroupLayout,
240    pipeline_dda: wgpu::ComputePipeline,
241    blit_bg: wgpu::BindGroup,
242    pipeline_blit: wgpu::RenderPipeline,
243    _sampler: wgpu::Sampler,
244}
245
246struct SceneDdaResources {
247    storage_size: (u32, u32),
248    storage_view: wgpu::TextureView,
249    uniform_buf: wgpu::Buffer,
250    bgl_dda: wgpu::BindGroupLayout,
251    pipeline_dda: wgpu::ComputePipeline,
252    blit_bg: wgpu::BindGroup,
253    pipeline_blit: wgpu::RenderPipeline,
254    _sampler: wgpu::Sampler,
255    /// GPU.9 — per-pixel world-t depth (f32 bits as u32), sized
256    /// `width * height * 4`. The scene pass writes it when sprites
257    /// are present; the sprite model-DDA pass reads + composites
258    /// against it.
259    depth_buffer: wgpu::Buffer,
260    /// Picking — a `COPY_DST | MAP_READ` staging copy of `depth_buffer`
261    /// so the host can read back the per-pixel world-t after a frame
262    /// (e.g. click → which voxel). Same size as `depth_buffer`.
263    depth_readback: wgpu::Buffer,
264}
265
266/// GPU.10.0 — single-sprite model-DDA pipeline: one thread per pixel
267/// marches the model voxel volume and composites against the scene
268/// depth buffer.
269struct SpriteModelDdaResources {
270    bgl: wgpu::BindGroupLayout,
271    pipeline: wgpu::ComputePipeline,
272    uniform_buf: wgpu::Buffer,
273}
274
275/// Per-frame uniform for the model-DDA pass. Mirrors `Uniform` in
276/// `sprite_model_dda.wgsl` (std140). Per-model + per-instance data
277/// now live in storage buffers; this holds only the camera, fog, and
278/// instance count.
279#[repr(C)]
280#[derive(Clone, Copy, Pod, Zeroable)]
281struct SpriteModelUniform {
282    cam_pos: [f32; 3],
283    _p0: f32,
284    cam_right: [f32; 3],
285    _p1: f32,
286    cam_down: [f32; 3],
287    _p2: f32,
288    cam_forward: [f32; 3],
289    _p3: f32,
290    fog_color: [f32; 4],
291    screen_size: [u32; 2],
292    instance_count: u32,
293    fog_far: f32,
294    fov_y_rad: f32,
295    tiles_x: u32,
296    tile_size: u32,
297    _p6: f32,
298}
299
300const SCENE_MAX_GRIDS: usize = MAX_SCENE_GRIDS as usize;
301
302/// GPU.10.3 — sprite screen-tile edge in pixels for instance binning.
303const SPRITE_TILE_SIZE: u32 = 16;
304
305// The scene_dda bind group + layout wire occupancy pages 1..=3 at
306// bindings 12..=14 explicitly; keep that in lockstep with the page
307// count. Bump the bindings (here, in the WGSL, and in the bind
308// group) if MAX_OCC_PAGES changes.
309const _: () = assert!(scene::MAX_OCC_PAGES == 4);
310
311#[repr(C)]
312#[derive(Clone, Copy, Pod, Zeroable)]
313struct SceneDdaPerGridCamera {
314    pos: [f32; 3],
315    _pad0: f32,
316    right: [f32; 3],
317    _pad1: f32,
318    down: [f32; 3],
319    _pad2: f32,
320    forward: [f32; 3],
321    _pad3: f32,
322}
323
324impl SceneDdaPerGridCamera {
325    fn from_camera(c: &Camera) -> Self {
326        Self {
327            pos: c.position,
328            _pad0: 0.0,
329            right: c.right,
330            _pad1: 0.0,
331            down: c.down,
332            _pad2: 0.0,
333            forward: c.forward,
334            _pad3: 0.0,
335        }
336    }
337}
338
339#[repr(C)]
340#[derive(Clone, Copy, Pod, Zeroable)]
341struct SceneDdaUniform {
342    fov_y_rad: f32,
343    grid_count: u32,
344    max_outer_steps: u32,
345    _pad0: u32,
346    screen_size: [u32; 2],
347    _pad1: [u32; 2],
348    cameras: [SceneDdaPerGridCamera; SCENE_MAX_GRIDS],
349    /// GPU.8 — `[r, g, b, fog_near]`. The `near` distance is packed
350    /// into the colour's alpha channel to keep std140 alignment
351    /// tidy (a bare `f32` after the `vec4` would force extra pads).
352    fog_color: [f32; 4],
353    fog_far: f32,
354    /// GPU.9 — `1` when the sprite pass is active (scene pass then
355    /// records `best_t` into the depth buffer), `0` otherwise.
356    write_depth: u32,
357    /// Occupancy paging: words per storage page (see
358    /// `scene::split_occupancy_pages`). Only consulted by the shader
359    /// when `occ_num_pages > 1`.
360    occ_page_words: u32,
361    /// Number of real occupancy pages (1 on multi-GiB GPUs → the
362    /// shader takes a branch-free single-page read).
363    occ_num_pages: u32,
364    /// GPU.11.1 — scene-grid LOD scan distance (world units). A chunk
365    /// entered at world-t `t` marches at mip
366    /// `floor(log2(max(t, msd) / msd))`, clamped to the grid's mip
367    /// count. `0` disables LOD (always mip-0).
368    mip_scan_dist: f32,
369    _pad2: u32,
370    _pad3: u32,
371    _pad4: u32,
372    /// World camera used only to derive the per-pixel sky direction —
373    /// always valid, so a `grid_count == 0` (sprite-only / empty) scene
374    /// still paints a proper sky instead of a degenerate `(0,0,1)`
375    /// (whose `atan2(0,0)` sky lookup samples black).
376    sky_cam: SceneDdaPerGridCamera,
377    /// Per-face side-shade intensities (voxlap setsideshades), each the
378    /// u8 shade subtracted from a voxel's brightness byte at a hit.
379    /// `side_shades0 = (top, bot, left, right)`,
380    /// `side_shades1 = (up, down, _, _)`. All-zero = no shading.
381    side_shades0: [i32; 4],
382    side_shades1: [i32; 4],
383}
384
385#[repr(C)]
386#[derive(Clone, Copy, Pod, Zeroable)]
387struct GridDdaUniform {
388    camera_pos: [f32; 3],
389    _pad0: f32,
390    camera_right: [f32; 3],
391    _pad1: f32,
392    camera_down: [f32; 3],
393    _pad2: f32,
394    camera_forward: [f32; 3],
395    fov_y_rad: f32,
396    screen_size: [u32; 2],
397    vsid: u32,
398    max_outer_steps: u32,
399    chunks_dims: [u32; 3],
400    _pad3: u32,
401    origin_chunk: [i32; 3],
402    _pad4: u32,
403}
404
405#[repr(C)]
406#[derive(Clone, Copy, Pod, Zeroable)]
407struct ChunkDdaUniform {
408    camera_pos: [f32; 3],
409    _pad0: f32,
410    camera_right: [f32; 3],
411    _pad1: f32,
412    camera_down: [f32; 3],
413    _pad2: f32,
414    camera_forward: [f32; 3],
415    fov_y_rad: f32,
416    screen_size: [u32; 2],
417    vsid: u32,
418    max_scan_dist: u32,
419}
420
421impl GpuRenderer {
422    /// Stand up the device + surface + swapchain on `window`. Async
423    /// because `wgpu::Adapter`/`Device` requests are.
424    ///
425    /// `window` is any [`raw-window-handle`] provider (winit, SDL,
426    /// GLFW, …) wrapped in an `Arc`; `size` is its initial physical
427    /// framebuffer size in pixels — passed explicitly so the renderer
428    /// stays decoupled from any one windowing library's size API.
429    ///
430    /// [`raw-window-handle`]: raw_window_handle
431    ///
432    /// # Errors
433    /// Returns [`GpuInitError`] if surface creation, adapter
434    /// selection, or device request fails. Hosts treat any error as
435    /// "fall back to the CPU path".
436    pub async fn new<W>(
437        window: Arc<W>,
438        size: (u32, u32),
439        settings: GpuRendererSettings,
440    ) -> Result<Self, GpuInitError>
441    where
442        W: HasWindowHandle + HasDisplayHandle + Send + Sync + 'static,
443    {
444        let instance = wgpu::Instance::new(wgpu::InstanceDescriptor::default());
445        let surface = instance.create_surface(window.clone())?;
446        let power_preference = match settings.power_preference {
447            PowerPreference::Low => wgpu::PowerPreference::LowPower,
448            PowerPreference::High => wgpu::PowerPreference::HighPerformance,
449        };
450        let adapter = instance
451            .request_adapter(&wgpu::RequestAdapterOptions {
452                power_preference,
453                compatible_surface: Some(&surface),
454                force_fallback_adapter: false,
455            })
456            .await
457            .ok_or(GpuInitError::NoAdapter)?;
458
459        let info = adapter.get_info();
460        let adapter_info = format!(
461            "{name} ({backend:?}, {device_type:?})",
462            name = info.name,
463            backend = info.backend,
464            device_type = info.device_type,
465        );
466
467        let (device, queue) = adapter
468            .request_device(
469                &wgpu::DeviceDescriptor {
470                    label: Some("roxlap-gpu device"),
471                    required_features: wgpu::Features::empty(),
472                    required_limits: pick_required_limits(&adapter.limits()),
473                    memory_hints: wgpu::MemoryHints::default(),
474                },
475                None,
476            )
477            .await?;
478
479        let caps = surface.get_capabilities(&adapter);
480        // Pick a NON-sRGB swapchain format. Voxlap colours are
481        // already sRGB-encoded (the slab bytes are display-ready,
482        // matching what the CPU softbuffer path writes straight to
483        // the framebuffer with no conversion). An sRGB swapchain
484        // would re-apply the gamma curve on top, producing a
485        // washed-out / pastel look that diverges from the CPU
486        // renderer. Falls back to `caps.formats[0]` only if every
487        // offered format is sRGB.
488        let surface_format = caps
489            .formats
490            .iter()
491            .copied()
492            .find(|f| !f.is_srgb())
493            .unwrap_or(caps.formats[0]);
494        let present_mode = if settings.uncapped_present {
495            pick_present_mode(&caps.present_modes)
496        } else {
497            wgpu::PresentMode::Fifo
498        };
499        // GPU.11.2 — surface the present mode: `Fifo` is vsync-capped
500        // (FPS pinned to refresh rate → compute optimisations like the
501        // mip LOD won't show up in the FPS counter). Mailbox/Immediate
502        // are uncapped. Wayland under Mesa frequently offers only Fifo.
503        eprintln!(
504            "roxlap-gpu: present mode = {present_mode:?} (available: {:?})",
505            caps.present_modes,
506        );
507        let (init_w, init_h) = size;
508        let surface_config = wgpu::SurfaceConfiguration {
509            usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
510            format: surface_format,
511            width: init_w.max(1),
512            height: init_h.max(1),
513            present_mode,
514            alpha_mode: caps.alpha_modes[0],
515            view_formats: vec![],
516            desired_maximum_frame_latency: 2,
517        };
518        surface.configure(&device, &surface_config);
519
520        // GPU.8 default sky: a 1×1 mid-grey texture. Hosts replace
521        // it via `set_sky_panorama` with a real equirectangular
522        // panorama; the default stops the shader sampling
523        // uninitialised memory before that happens.
524        let default_sky_pixel = [0x80u8, 0x80, 0x80, 0xff];
525        let (sky_texture, sky_view) = create_sky_texture(&device, 1, 1, &default_sky_pixel);
526        queue.write_texture(
527            wgpu::ImageCopyTexture {
528                texture: &sky_texture,
529                mip_level: 0,
530                origin: wgpu::Origin3d::ZERO,
531                aspect: wgpu::TextureAspect::All,
532            },
533            &default_sky_pixel,
534            wgpu::ImageDataLayout {
535                offset: 0,
536                bytes_per_row: Some(4),
537                rows_per_image: Some(1),
538            },
539            wgpu::Extent3d {
540                width: 1,
541                height: 1,
542                depth_or_array_layers: 1,
543            },
544        );
545        let sky_sampler = device.create_sampler(&wgpu::SamplerDescriptor {
546            label: Some("roxlap-gpu sky_sampler"),
547            // Voxlap-convention panorama: u = elevation [0, 1]
548            // (Repeat is a no-op since values don't go outside),
549            // v = azimuth (wraps 360° — Repeat is required).
550            address_mode_u: wgpu::AddressMode::Repeat,
551            address_mode_v: wgpu::AddressMode::Repeat,
552            address_mode_w: wgpu::AddressMode::ClampToEdge,
553            mag_filter: wgpu::FilterMode::Linear,
554            min_filter: wgpu::FilterMode::Linear,
555            mipmap_filter: wgpu::FilterMode::Nearest,
556            ..Default::default()
557        });
558
559        Ok(Self {
560            surface,
561            surface_config,
562            device,
563            queue,
564            adapter_info,
565            clear_colour: settings.clear_colour,
566            frame_count: 0,
567            chunk_dda: None,
568            grid_dda: None,
569            scene_dda: None,
570            sky_texture,
571            sky_view,
572            sky_sampler,
573            // Fog disabled by default — voxlap's CPU rasterizer
574            // also runs without fog in the scene-demo, so matching
575            // it means no GPU fog out of the box. Hosts can opt in
576            // via `set_fog` (e.g. for atmospheric far-LOD masking).
577            fog_color: [0.66, 0.74, 0.88],
578            fog_near: 0.0,
579            fog_far: 1.0e30,
580            sprite_registry: None,
581            sprite_model_dda: None,
582            // GPU.10.4 — default LOD threshold: step to a coarser mip
583            // once a voxel projects below 4 px. Empirically the best
584            // quality/cost tradeoff; the host can override.
585            sprite_lod_px: 4.0,
586            // GPU.11.1 — matches the CPU demo's mip_scan_dist=64.
587            scene_mip_scan_dist: 64.0,
588            scene_side_shades: [[0; 4]; 2],
589            last_fov_y_rad: 0.0,
590            pending_frame: None,
591            #[cfg(feature = "hud")]
592            egui_renderer: None,
593        })
594    }
595
596    /// Synchronous wrapper for hosts that don't have an async
597    /// runtime. Internally `pollster::block_on`s [`Self::new`].
598    ///
599    /// # Errors
600    /// See [`Self::new`].
601    pub fn new_blocking<W>(
602        window: Arc<W>,
603        size: (u32, u32),
604        settings: GpuRendererSettings,
605    ) -> Result<Self, GpuInitError>
606    where
607        W: HasWindowHandle + HasDisplayHandle + Send + Sync + 'static,
608    {
609        pollster::block_on(Self::new(window, size, settings))
610    }
611
612    /// Human-readable adapter description — name + backend +
613    /// device type. The demo host prints this in the title bar.
614    pub fn adapter_info(&self) -> &str {
615        &self.adapter_info
616    }
617
618    /// Borrow the underlying wgpu device — hosts use this to build
619    /// chunk uploads (`GpuChunkResident::upload(gpu.device(), …)`).
620    pub fn device(&self) -> &wgpu::Device {
621        &self.device
622    }
623
624    /// Borrow the wgpu queue — hosts use this for read-back paths
625    /// (`GpuChunkResident::read_voxel_blocking(gpu.device(), gpu.queue(), …)`).
626    pub fn queue(&self) -> &wgpu::Queue {
627        &self.queue
628    }
629
630    /// GPU.8 — upload an equirectangular panorama as the scene's
631    /// sky texture. `rgba` is row-major, `width × height` pixels,
632    /// 4 bytes per pixel (R, G, B, A). The shader samples it with
633    /// `u = atan2(dir.x, dir.y) / (2π) + 0.5` (azimuth) and
634    /// `v = acos(-dir.z) / π` (elevation), matching standard
635    /// equirectangular layout (top of image = zenith for voxlap's
636    /// `+z = down` basis).
637    ///
638    /// # Panics
639    /// If `rgba.len() != (width * height * 4) as usize`.
640    pub fn set_sky_panorama(&mut self, rgba: &[u8], width: u32, height: u32) {
641        assert_eq!(
642            rgba.len(),
643            (width as usize) * (height as usize) * 4,
644            "set_sky_panorama: expected w*h*4 bytes, got {}",
645            rgba.len(),
646        );
647        let (tex, view) = create_sky_texture(&self.device, width, height, rgba);
648        // Upload pixel data via `queue.write_texture` so we don't
649        // have to map the buffer manually.
650        self.queue.write_texture(
651            wgpu::ImageCopyTexture {
652                texture: &tex,
653                mip_level: 0,
654                origin: wgpu::Origin3d::ZERO,
655                aspect: wgpu::TextureAspect::All,
656            },
657            rgba,
658            wgpu::ImageDataLayout {
659                offset: 0,
660                bytes_per_row: Some(width * 4),
661                rows_per_image: Some(height),
662            },
663            wgpu::Extent3d {
664                width,
665                height,
666                depth_or_array_layers: 1,
667            },
668        );
669        self.sky_texture = tex;
670        self.sky_view = view;
671    }
672
673    /// GPU.8 — set the fog blend. `color` is per-channel [0, 1];
674    /// `near`/`far` are world-space ray distances in voxel units.
675    /// Hits with `t < near` show their full colour; hits with
676    /// `t > far` show `color` exclusively; in between is a
677    /// smoothstep blend.
678    pub fn set_fog(&mut self, color: [f32; 3], near: f32, far: f32) {
679        self.fog_color = color;
680        self.fog_near = near;
681        self.fog_far = far.max(near + 1.0);
682    }
683
684    /// Re-configure the swapchain to a new physical size. Call from
685    /// `WindowEvent::Resized`. Drops the chunk-DDA storage texture
686    /// so [`Self::render_chunk`] rebuilds it at the new size.
687    pub fn resize(&mut self, width: u32, height: u32) {
688        if width == 0 || height == 0 {
689            return;
690        }
691        self.surface_config.width = width;
692        self.surface_config.height = height;
693        self.surface.configure(&self.device, &self.surface_config);
694        self.chunk_dda = None;
695        self.grid_dda = None;
696        self.scene_dda = None;
697    }
698
699    /// GPU.1 render: single render pass clearing the swapchain to a
700    /// slowly drifting colour, then presenting. Voxels arrive in
701    /// GPU.3+.
702    pub fn render(&mut self) {
703        let surf_tex = match self.surface.get_current_texture() {
704            Ok(t) => t,
705            Err(wgpu::SurfaceError::Outdated | wgpu::SurfaceError::Lost) => {
706                self.surface.configure(&self.device, &self.surface_config);
707                return;
708            }
709            Err(e) => {
710                eprintln!("roxlap-gpu surface error: {e:?}");
711                return;
712            }
713        };
714        let view = surf_tex
715            .texture
716            .create_view(&wgpu::TextureViewDescriptor::default());
717
718        // Slow colour drift so the user can tell the GPU path is
719        // actually presenting frames vs. e.g. a frozen window.
720        // Wrap at 2π/0.005 frames (~1257) so the cast stays exact.
721        let phase = f64::from(self.frame_count % 1257) * 0.005;
722        let [r, g, b] = self.clear_colour;
723        let drift = (phase.sin() * 0.04 + 0.04).clamp(0.0, 0.1);
724        let clear = wgpu::Color {
725            r: (r + drift).clamp(0.0, 1.0),
726            g: (g + drift * 0.5).clamp(0.0, 1.0),
727            b: (b + drift * 0.25).clamp(0.0, 1.0),
728            a: 1.0,
729        };
730
731        let mut encoder = self
732            .device
733            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
734                label: Some("roxlap-gpu encoder"),
735            });
736        {
737            let _rp = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
738                label: Some("roxlap-gpu clear"),
739                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
740                    view: &view,
741                    resolve_target: None,
742                    ops: wgpu::Operations {
743                        load: wgpu::LoadOp::Clear(clear),
744                        store: wgpu::StoreOp::Store,
745                    },
746                })],
747                depth_stencil_attachment: None,
748                timestamp_writes: None,
749                occlusion_query_set: None,
750            });
751        }
752        self.queue.submit(std::iter::once(encoder.finish()));
753        surf_tex.present();
754        self.frame_count = self.frame_count.wrapping_add(1);
755    }
756
757    /// GPU.3 single-chunk render. Dispatches `chunk_dda.wgsl`
758    /// against `resident`'s storage buffers, then blits the
759    /// low-res storage texture to the swapchain. `camera.position`
760    /// is in **chunk-local** voxel units (host translates from
761    /// world coords). `max_scan_dist` caps the per-pixel DDA loop —
762    /// scene-demo wires `+` / `-` through this each frame.
763    ///
764    /// # Panics
765    /// Internally `expect`s the chunk-DDA resources to be built —
766    /// they are constructed at the top of this function if missing.
767    /// Cannot fire in normal control flow.
768    pub fn render_chunk(
769        &mut self,
770        resident: &GpuChunkResident,
771        camera: &Camera,
772        max_scan_dist: u32,
773    ) {
774        let surf_tex = match self.surface.get_current_texture() {
775            Ok(t) => t,
776            Err(wgpu::SurfaceError::Outdated | wgpu::SurfaceError::Lost) => {
777                self.surface.configure(&self.device, &self.surface_config);
778                return;
779            }
780            Err(e) => {
781                eprintln!("roxlap-gpu surface error: {e:?}");
782                return;
783            }
784        };
785        let surf_view = surf_tex
786            .texture
787            .create_view(&wgpu::TextureViewDescriptor::default());
788
789        let surface_w = self.surface_config.width;
790        let surface_h = self.surface_config.height;
791        let surface_format = self.surface_config.format;
792
793        // Lazy-build chunk-DDA resources; rebuild when the swapchain
794        // grew or shrank.
795        let needs_build = match &self.chunk_dda {
796            Some(r) => r.storage_size != (surface_w, surface_h),
797            None => true,
798        };
799        if needs_build {
800            self.chunk_dda = Some(self.build_chunk_dda(surface_w, surface_h, surface_format));
801        }
802        let dda = self.chunk_dda.as_ref().expect("just built");
803
804        // Update uniforms.
805        let uniform = ChunkDdaUniform {
806            camera_pos: camera.position,
807            _pad0: 0.0,
808            camera_right: camera.right,
809            _pad1: 0.0,
810            camera_down: camera.down,
811            _pad2: 0.0,
812            camera_forward: camera.forward,
813            fov_y_rad: camera.fov_y_rad,
814            screen_size: [surface_w, surface_h],
815            vsid: resident.vsid,
816            max_scan_dist,
817        };
818        self.queue
819            .write_buffer(&dda.uniform_buf, 0, bytemuck::bytes_of(&uniform));
820
821        // Per-frame DDA bind group — references the chunk's buffers
822        // so we rebuild every frame (the resident can change between
823        // calls).
824        let dda_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
825            label: Some("roxlap-gpu chunk_dda.bg"),
826            layout: &dda.bgl_dda,
827            entries: &[
828                wgpu::BindGroupEntry {
829                    binding: 0,
830                    resource: dda.uniform_buf.as_entire_binding(),
831                },
832                wgpu::BindGroupEntry {
833                    binding: 1,
834                    resource: resident.occupancy.as_entire_binding(),
835                },
836                wgpu::BindGroupEntry {
837                    binding: 2,
838                    resource: resident.color_offsets.as_entire_binding(),
839                },
840                wgpu::BindGroupEntry {
841                    binding: 3,
842                    resource: resident.colors.as_entire_binding(),
843                },
844                wgpu::BindGroupEntry {
845                    binding: 4,
846                    resource: wgpu::BindingResource::TextureView(&dda.storage_view),
847                },
848            ],
849        });
850
851        let mut encoder = self
852            .device
853            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
854                label: Some("roxlap-gpu chunk encoder"),
855            });
856        {
857            let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
858                label: Some("roxlap-gpu chunk_dda compute"),
859                timestamp_writes: None,
860            });
861            cpass.set_pipeline(&dda.pipeline_dda);
862            cpass.set_bind_group(0, &dda_bg, &[]);
863            cpass.dispatch_workgroups(surface_w.div_ceil(8), surface_h.div_ceil(8), 1);
864        }
865        {
866            let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
867                label: Some("roxlap-gpu chunk_dda blit"),
868                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
869                    view: &surf_view,
870                    resolve_target: None,
871                    ops: wgpu::Operations {
872                        load: wgpu::LoadOp::Clear(wgpu::Color::BLACK),
873                        store: wgpu::StoreOp::Store,
874                    },
875                })],
876                depth_stencil_attachment: None,
877                timestamp_writes: None,
878                occlusion_query_set: None,
879            });
880            rpass.set_pipeline(&dda.pipeline_blit);
881            rpass.set_bind_group(0, &dda.blit_bg, &[]);
882            rpass.draw(0..3, 0..1);
883        }
884        self.queue.submit(std::iter::once(encoder.finish()));
885        surf_tex.present();
886        self.frame_count = self.frame_count.wrapping_add(1);
887    }
888
889    fn build_chunk_dda(
890        &self,
891        width: u32,
892        height: u32,
893        surface_format: wgpu::TextureFormat,
894    ) -> ChunkDdaResources {
895        let storage_tex = self.device.create_texture(&wgpu::TextureDescriptor {
896            label: Some("roxlap-gpu chunk_dda.storage"),
897            size: wgpu::Extent3d {
898                width,
899                height,
900                depth_or_array_layers: 1,
901            },
902            mip_level_count: 1,
903            sample_count: 1,
904            dimension: wgpu::TextureDimension::D2,
905            format: wgpu::TextureFormat::Rgba8Unorm,
906            usage: wgpu::TextureUsages::STORAGE_BINDING | wgpu::TextureUsages::TEXTURE_BINDING,
907            view_formats: &[],
908        });
909        let storage_view = storage_tex.create_view(&wgpu::TextureViewDescriptor::default());
910
911        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
912            label: Some("roxlap-gpu chunk_dda.uniform"),
913            size: std::mem::size_of::<ChunkDdaUniform>() as u64,
914            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
915            mapped_at_creation: false,
916        });
917
918        let dda_shader = self
919            .device
920            .create_shader_module(wgpu::ShaderModuleDescriptor {
921                label: Some("chunk_dda.wgsl"),
922                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/chunk_dda.wgsl").into()),
923            });
924        let bgl_dda = self
925            .device
926            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
927                label: Some("roxlap-gpu chunk_dda.bgl"),
928                entries: &[
929                    bgl_uniform_entry(0),
930                    bgl_storage_entry(1, true),
931                    bgl_storage_entry(2, true),
932                    bgl_storage_entry(3, true),
933                    wgpu::BindGroupLayoutEntry {
934                        binding: 4,
935                        visibility: wgpu::ShaderStages::COMPUTE,
936                        ty: wgpu::BindingType::StorageTexture {
937                            access: wgpu::StorageTextureAccess::WriteOnly,
938                            format: wgpu::TextureFormat::Rgba8Unorm,
939                            view_dimension: wgpu::TextureViewDimension::D2,
940                        },
941                        count: None,
942                    },
943                ],
944            });
945        let dda_pl = self
946            .device
947            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
948                label: Some("roxlap-gpu chunk_dda.layout"),
949                bind_group_layouts: &[&bgl_dda],
950                push_constant_ranges: &[],
951            });
952        let pipeline_dda = self
953            .device
954            .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
955                label: Some("roxlap-gpu chunk_dda.pipeline"),
956                layout: Some(&dda_pl),
957                module: &dda_shader,
958                entry_point: "render_chunk",
959                compilation_options: wgpu::PipelineCompilationOptions::default(),
960                cache: None,
961            });
962
963        // Fullscreen-triangle blit upscales the storage texture into
964        // the swapchain. Nearest filter keeps the retro pixel look.
965        let blit_shader = self
966            .device
967            .create_shader_module(wgpu::ShaderModuleDescriptor {
968                label: Some("blit.wgsl"),
969                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/blit.wgsl").into()),
970            });
971        let bgl_blit = self
972            .device
973            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
974                label: Some("roxlap-gpu chunk_dda.blit_bgl"),
975                entries: &[
976                    wgpu::BindGroupLayoutEntry {
977                        binding: 0,
978                        visibility: wgpu::ShaderStages::FRAGMENT,
979                        ty: wgpu::BindingType::Texture {
980                            sample_type: wgpu::TextureSampleType::Float { filterable: false },
981                            view_dimension: wgpu::TextureViewDimension::D2,
982                            multisampled: false,
983                        },
984                        count: None,
985                    },
986                    wgpu::BindGroupLayoutEntry {
987                        binding: 1,
988                        visibility: wgpu::ShaderStages::FRAGMENT,
989                        ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::NonFiltering),
990                        count: None,
991                    },
992                ],
993            });
994        let blit_pl = self
995            .device
996            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
997                label: Some("roxlap-gpu chunk_dda.blit_layout"),
998                bind_group_layouts: &[&bgl_blit],
999                push_constant_ranges: &[],
1000            });
1001        let pipeline_blit = self
1002            .device
1003            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
1004                label: Some("roxlap-gpu chunk_dda.blit_pipeline"),
1005                layout: Some(&blit_pl),
1006                vertex: wgpu::VertexState {
1007                    module: &blit_shader,
1008                    entry_point: "vs_main",
1009                    compilation_options: wgpu::PipelineCompilationOptions::default(),
1010                    buffers: &[],
1011                },
1012                fragment: Some(wgpu::FragmentState {
1013                    module: &blit_shader,
1014                    entry_point: "fs_main",
1015                    compilation_options: wgpu::PipelineCompilationOptions::default(),
1016                    targets: &[Some(wgpu::ColorTargetState {
1017                        format: surface_format,
1018                        blend: None,
1019                        write_mask: wgpu::ColorWrites::ALL,
1020                    })],
1021                }),
1022                primitive: wgpu::PrimitiveState::default(),
1023                depth_stencil: None,
1024                multisample: wgpu::MultisampleState::default(),
1025                multiview: None,
1026                cache: None,
1027            });
1028        let sampler = self.device.create_sampler(&wgpu::SamplerDescriptor {
1029            label: Some("roxlap-gpu chunk_dda.blit_sampler"),
1030            address_mode_u: wgpu::AddressMode::ClampToEdge,
1031            address_mode_v: wgpu::AddressMode::ClampToEdge,
1032            address_mode_w: wgpu::AddressMode::ClampToEdge,
1033            mag_filter: wgpu::FilterMode::Nearest,
1034            min_filter: wgpu::FilterMode::Nearest,
1035            mipmap_filter: wgpu::FilterMode::Nearest,
1036            ..Default::default()
1037        });
1038        let blit_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1039            label: Some("roxlap-gpu chunk_dda.blit_bg"),
1040            layout: &bgl_blit,
1041            entries: &[
1042                wgpu::BindGroupEntry {
1043                    binding: 0,
1044                    resource: wgpu::BindingResource::TextureView(&storage_view),
1045                },
1046                wgpu::BindGroupEntry {
1047                    binding: 1,
1048                    resource: wgpu::BindingResource::Sampler(&sampler),
1049                },
1050            ],
1051        });
1052
1053        ChunkDdaResources {
1054            storage_size: (width, height),
1055            storage_view,
1056            uniform_buf,
1057            bgl_dda,
1058            pipeline_dda,
1059            blit_bg,
1060            pipeline_blit,
1061            _sampler: sampler,
1062        }
1063    }
1064
1065    /// GPU.4 render — outer DDA over chunk indices + inner DDA into
1066    /// non-empty chunks. `camera.position` is in **grid-local**
1067    /// voxel units. `max_outer_steps` caps how many chunks the
1068    /// outer DDA may traverse per ray (scene-demo wires `+ / -`
1069    /// through this).
1070    ///
1071    /// # Panics
1072    /// Internally `expect`s the grid-DDA resources to be built;
1073    /// they are constructed at the top of this function if missing.
1074    pub fn render_grid(&mut self, grid: &GpuGridResident, camera: &Camera, max_outer_steps: u32) {
1075        let surf_tex = match self.surface.get_current_texture() {
1076            Ok(t) => t,
1077            Err(wgpu::SurfaceError::Outdated | wgpu::SurfaceError::Lost) => {
1078                self.surface.configure(&self.device, &self.surface_config);
1079                return;
1080            }
1081            Err(e) => {
1082                eprintln!("roxlap-gpu surface error: {e:?}");
1083                return;
1084            }
1085        };
1086        let surf_view = surf_tex
1087            .texture
1088            .create_view(&wgpu::TextureViewDescriptor::default());
1089
1090        let surface_w = self.surface_config.width;
1091        let surface_h = self.surface_config.height;
1092        let surface_format = self.surface_config.format;
1093
1094        let needs_build = match &self.grid_dda {
1095            Some(r) => r.storage_size != (surface_w, surface_h),
1096            None => true,
1097        };
1098        if needs_build {
1099            self.grid_dda = Some(self.build_grid_dda(surface_w, surface_h, surface_format));
1100        }
1101        let dda = self.grid_dda.as_ref().expect("just built");
1102
1103        let uniform = GridDdaUniform {
1104            camera_pos: camera.position,
1105            _pad0: 0.0,
1106            camera_right: camera.right,
1107            _pad1: 0.0,
1108            camera_down: camera.down,
1109            _pad2: 0.0,
1110            camera_forward: camera.forward,
1111            fov_y_rad: camera.fov_y_rad,
1112            screen_size: [surface_w, surface_h],
1113            vsid: grid.vsid,
1114            max_outer_steps,
1115            chunks_dims: grid.chunks_dims,
1116            _pad3: 0,
1117            origin_chunk: grid.origin_chunk,
1118            _pad4: 0,
1119        };
1120        self.queue
1121            .write_buffer(&dda.uniform_buf, 0, bytemuck::bytes_of(&uniform));
1122
1123        let dda_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1124            label: Some("roxlap-gpu grid_dda.bg"),
1125            layout: &dda.bgl_dda,
1126            entries: &[
1127                wgpu::BindGroupEntry {
1128                    binding: 0,
1129                    resource: dda.uniform_buf.as_entire_binding(),
1130                },
1131                wgpu::BindGroupEntry {
1132                    binding: 1,
1133                    resource: grid.occupancy.as_entire_binding(),
1134                },
1135                wgpu::BindGroupEntry {
1136                    binding: 2,
1137                    resource: grid.color_offsets.as_entire_binding(),
1138                },
1139                wgpu::BindGroupEntry {
1140                    binding: 3,
1141                    resource: grid.colors.as_entire_binding(),
1142                },
1143                wgpu::BindGroupEntry {
1144                    binding: 4,
1145                    resource: grid.chunk_colors_base.as_entire_binding(),
1146                },
1147                wgpu::BindGroupEntry {
1148                    binding: 5,
1149                    resource: grid.chunk_occupancy.as_entire_binding(),
1150                },
1151                wgpu::BindGroupEntry {
1152                    binding: 6,
1153                    resource: wgpu::BindingResource::TextureView(&dda.storage_view),
1154                },
1155            ],
1156        });
1157
1158        let mut encoder = self
1159            .device
1160            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
1161                label: Some("roxlap-gpu grid encoder"),
1162            });
1163        {
1164            let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
1165                label: Some("roxlap-gpu grid_dda compute"),
1166                timestamp_writes: None,
1167            });
1168            cpass.set_pipeline(&dda.pipeline_dda);
1169            cpass.set_bind_group(0, &dda_bg, &[]);
1170            cpass.dispatch_workgroups(surface_w.div_ceil(8), surface_h.div_ceil(8), 1);
1171        }
1172        {
1173            let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
1174                label: Some("roxlap-gpu grid_dda blit"),
1175                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
1176                    view: &surf_view,
1177                    resolve_target: None,
1178                    ops: wgpu::Operations {
1179                        load: wgpu::LoadOp::Clear(wgpu::Color::BLACK),
1180                        store: wgpu::StoreOp::Store,
1181                    },
1182                })],
1183                depth_stencil_attachment: None,
1184                timestamp_writes: None,
1185                occlusion_query_set: None,
1186            });
1187            rpass.set_pipeline(&dda.pipeline_blit);
1188            rpass.set_bind_group(0, &dda.blit_bg, &[]);
1189            rpass.draw(0..3, 0..1);
1190        }
1191        self.queue.submit(std::iter::once(encoder.finish()));
1192        surf_tex.present();
1193        self.frame_count = self.frame_count.wrapping_add(1);
1194    }
1195
1196    fn build_grid_dda(
1197        &self,
1198        width: u32,
1199        height: u32,
1200        surface_format: wgpu::TextureFormat,
1201    ) -> GridDdaResources {
1202        let storage_tex = self.device.create_texture(&wgpu::TextureDescriptor {
1203            label: Some("roxlap-gpu grid_dda.storage"),
1204            size: wgpu::Extent3d {
1205                width,
1206                height,
1207                depth_or_array_layers: 1,
1208            },
1209            mip_level_count: 1,
1210            sample_count: 1,
1211            dimension: wgpu::TextureDimension::D2,
1212            format: wgpu::TextureFormat::Rgba8Unorm,
1213            usage: wgpu::TextureUsages::STORAGE_BINDING | wgpu::TextureUsages::TEXTURE_BINDING,
1214            view_formats: &[],
1215        });
1216        let storage_view = storage_tex.create_view(&wgpu::TextureViewDescriptor::default());
1217
1218        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
1219            label: Some("roxlap-gpu grid_dda.uniform"),
1220            size: std::mem::size_of::<GridDdaUniform>() as u64,
1221            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
1222            mapped_at_creation: false,
1223        });
1224
1225        let dda_shader = self
1226            .device
1227            .create_shader_module(wgpu::ShaderModuleDescriptor {
1228                label: Some("grid_dda.wgsl"),
1229                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/grid_dda.wgsl").into()),
1230            });
1231        let bgl_dda = self
1232            .device
1233            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
1234                label: Some("roxlap-gpu grid_dda.bgl"),
1235                entries: &[
1236                    bgl_uniform_entry(0),
1237                    bgl_storage_entry(1, true),
1238                    bgl_storage_entry(2, true),
1239                    bgl_storage_entry(3, true),
1240                    bgl_storage_entry(4, true),
1241                    bgl_storage_entry(5, true),
1242                    wgpu::BindGroupLayoutEntry {
1243                        binding: 6,
1244                        visibility: wgpu::ShaderStages::COMPUTE,
1245                        ty: wgpu::BindingType::StorageTexture {
1246                            access: wgpu::StorageTextureAccess::WriteOnly,
1247                            format: wgpu::TextureFormat::Rgba8Unorm,
1248                            view_dimension: wgpu::TextureViewDimension::D2,
1249                        },
1250                        count: None,
1251                    },
1252                ],
1253            });
1254        let dda_pl = self
1255            .device
1256            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
1257                label: Some("roxlap-gpu grid_dda.layout"),
1258                bind_group_layouts: &[&bgl_dda],
1259                push_constant_ranges: &[],
1260            });
1261        let pipeline_dda = self
1262            .device
1263            .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
1264                label: Some("roxlap-gpu grid_dda.pipeline"),
1265                layout: Some(&dda_pl),
1266                module: &dda_shader,
1267                entry_point: "render_grid",
1268                compilation_options: wgpu::PipelineCompilationOptions::default(),
1269                cache: None,
1270            });
1271
1272        let blit_shader = self
1273            .device
1274            .create_shader_module(wgpu::ShaderModuleDescriptor {
1275                label: Some("blit.wgsl"),
1276                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/blit.wgsl").into()),
1277            });
1278        let bgl_blit = self
1279            .device
1280            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
1281                label: Some("roxlap-gpu grid_dda.blit_bgl"),
1282                entries: &[
1283                    wgpu::BindGroupLayoutEntry {
1284                        binding: 0,
1285                        visibility: wgpu::ShaderStages::FRAGMENT,
1286                        ty: wgpu::BindingType::Texture {
1287                            sample_type: wgpu::TextureSampleType::Float { filterable: false },
1288                            view_dimension: wgpu::TextureViewDimension::D2,
1289                            multisampled: false,
1290                        },
1291                        count: None,
1292                    },
1293                    wgpu::BindGroupLayoutEntry {
1294                        binding: 1,
1295                        visibility: wgpu::ShaderStages::FRAGMENT,
1296                        ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::NonFiltering),
1297                        count: None,
1298                    },
1299                ],
1300            });
1301        let blit_pl = self
1302            .device
1303            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
1304                label: Some("roxlap-gpu grid_dda.blit_layout"),
1305                bind_group_layouts: &[&bgl_blit],
1306                push_constant_ranges: &[],
1307            });
1308        let pipeline_blit = self
1309            .device
1310            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
1311                label: Some("roxlap-gpu grid_dda.blit_pipeline"),
1312                layout: Some(&blit_pl),
1313                vertex: wgpu::VertexState {
1314                    module: &blit_shader,
1315                    entry_point: "vs_main",
1316                    compilation_options: wgpu::PipelineCompilationOptions::default(),
1317                    buffers: &[],
1318                },
1319                fragment: Some(wgpu::FragmentState {
1320                    module: &blit_shader,
1321                    entry_point: "fs_main",
1322                    compilation_options: wgpu::PipelineCompilationOptions::default(),
1323                    targets: &[Some(wgpu::ColorTargetState {
1324                        format: surface_format,
1325                        blend: None,
1326                        write_mask: wgpu::ColorWrites::ALL,
1327                    })],
1328                }),
1329                primitive: wgpu::PrimitiveState::default(),
1330                depth_stencil: None,
1331                multisample: wgpu::MultisampleState::default(),
1332                multiview: None,
1333                cache: None,
1334            });
1335        let sampler = self.device.create_sampler(&wgpu::SamplerDescriptor {
1336            label: Some("roxlap-gpu grid_dda.blit_sampler"),
1337            address_mode_u: wgpu::AddressMode::ClampToEdge,
1338            address_mode_v: wgpu::AddressMode::ClampToEdge,
1339            address_mode_w: wgpu::AddressMode::ClampToEdge,
1340            mag_filter: wgpu::FilterMode::Nearest,
1341            min_filter: wgpu::FilterMode::Nearest,
1342            mipmap_filter: wgpu::FilterMode::Nearest,
1343            ..Default::default()
1344        });
1345        let blit_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1346            label: Some("roxlap-gpu grid_dda.blit_bg"),
1347            layout: &bgl_blit,
1348            entries: &[
1349                wgpu::BindGroupEntry {
1350                    binding: 0,
1351                    resource: wgpu::BindingResource::TextureView(&storage_view),
1352                },
1353                wgpu::BindGroupEntry {
1354                    binding: 1,
1355                    resource: wgpu::BindingResource::Sampler(&sampler),
1356                },
1357            ],
1358        });
1359
1360        GridDdaResources {
1361            storage_size: (width, height),
1362            storage_view,
1363            uniform_buf,
1364            bgl_dda,
1365            pipeline_dda,
1366            blit_bg,
1367            pipeline_blit,
1368            _sampler: sampler,
1369        }
1370    }
1371
1372    /// GPU.5 render — multi-grid scene marcher. `cameras[i]` is the
1373    /// world camera transformed into grid `i`'s local frame
1374    /// (caller-supplied; see scene-demo's `redraw_gpu` for the
1375    /// glam-based transform). `fov_y_rad` is the shared vertical
1376    /// FOV; `max_outer_steps` caps per-ray chunk-DDA work for each
1377    /// grid.
1378    ///
1379    /// # Panics
1380    /// If `cameras.len() != scene.grid_count` or
1381    /// `scene.grid_count > MAX_SCENE_GRIDS`.
1382    /// `cameras[i]` is grid `i`'s world camera transformed into that
1383    /// grid's local frame (the grid marcher works in grid-local space).
1384    /// `sprite_camera` is the **world** camera: instanced sprites carry
1385    /// world-space positions/transforms, so they must project through
1386    /// the untransformed world camera — not `cameras[0]`, which is only
1387    /// the world camera when grid 0 is at identity.
1388    pub fn render_scene(
1389        &mut self,
1390        scene: &GpuSceneResident,
1391        cameras: &[Camera],
1392        sprite_camera: &Camera,
1393        fov_y_rad: f32,
1394        max_outer_steps: u32,
1395    ) {
1396        assert_eq!(
1397            cameras.len(),
1398            scene.grid_count as usize,
1399            "render_scene: {} cameras supplied, scene has {} grids",
1400            cameras.len(),
1401            scene.grid_count,
1402        );
1403        assert!(
1404            scene.grid_count as usize <= SCENE_MAX_GRIDS,
1405            "render_scene: scene has {} grids, shader supports {}",
1406            scene.grid_count,
1407            SCENE_MAX_GRIDS,
1408        );
1409        self.last_fov_y_rad = fov_y_rad; // cached for pixel_ray (picking)
1410
1411        // Deferred present: drop any frame a prior render left
1412        // un-presented (a host that skipped present/paint_egui) so we
1413        // never hold two outstanding swapchain textures.
1414        self.pending_frame = None;
1415        let surf_tex = match self.surface.get_current_texture() {
1416            Ok(t) => t,
1417            Err(wgpu::SurfaceError::Outdated | wgpu::SurfaceError::Lost) => {
1418                self.surface.configure(&self.device, &self.surface_config);
1419                return;
1420            }
1421            Err(e) => {
1422                eprintln!("roxlap-gpu surface error: {e:?}");
1423                return;
1424            }
1425        };
1426        let surf_view = surf_tex
1427            .texture
1428            .create_view(&wgpu::TextureViewDescriptor::default());
1429
1430        let surface_w = self.surface_config.width;
1431        let surface_h = self.surface_config.height;
1432        let surface_format = self.surface_config.format;
1433
1434        let needs_build = match &self.scene_dda {
1435            Some(r) => r.storage_size != (surface_w, surface_h),
1436            None => true,
1437        };
1438        if needs_build {
1439            self.scene_dda = Some(self.build_scene_dda(surface_w, surface_h, surface_format));
1440        }
1441        // GPU.9 — materialise the sprite pipeline the first frame
1442        // sprites are present (before the immutable `dda` borrow).
1443        // GPU.10.0 — build the model-DDA pipeline the first frame a
1444        // sprite registry is present.
1445        if self.sprite_registry.is_some() && self.sprite_model_dda.is_none() {
1446            self.sprite_model_dda = Some(self.build_sprite_model_dda());
1447        }
1448        // GPU.10.3 — frustum-cull + screen-tile-bin the sprite instances
1449        // (needs &mut self for buffer growth, so before the immutable
1450        // scene_dda borrow). Captures (visible_count, tiles_x); None when
1451        // nothing is in view.
1452        let sprite_pass: Option<(u32, u32)> = if let Some(reg) = self.sprite_registry.as_mut() {
1453            if reg.instance_capacity > 0 {
1454                // World camera — sprite positions/transforms are world-
1455                // space (independent of any grid's transform).
1456                let cam = sprite_camera;
1457                #[allow(clippy::cast_precision_loss)]
1458                let aspect = surface_w as f32 / surface_h as f32;
1459                let half_h = (fov_y_rad * 0.5).tan();
1460                let frustum = sprite_model::ViewFrustum {
1461                    pos: cam.position,
1462                    right: cam.right,
1463                    down: cam.down,
1464                    forward: cam.forward,
1465                    half_w: half_h * aspect,
1466                    half_h,
1467                    far: 1.0e9,
1468                };
1469                let (visible, tiles_x, _tiles_y) = reg.cull_bin_upload(
1470                    &self.device,
1471                    &self.queue,
1472                    &frustum,
1473                    surface_w,
1474                    surface_h,
1475                    SPRITE_TILE_SIZE,
1476                    self.sprite_lod_px,
1477                );
1478                (visible > 0).then_some((visible, tiles_x))
1479            } else {
1480                None
1481            }
1482        } else {
1483            None
1484        };
1485        let dda = self.scene_dda.as_ref().expect("just built");
1486
1487        // Pack per-grid cameras.
1488        let mut cam_array = [SceneDdaPerGridCamera::zeroed(); SCENE_MAX_GRIDS];
1489        for (i, cam) in cameras.iter().enumerate() {
1490            cam_array[i] = SceneDdaPerGridCamera {
1491                pos: cam.position,
1492                _pad0: 0.0,
1493                right: cam.right,
1494                _pad1: 0.0,
1495                down: cam.down,
1496                _pad2: 0.0,
1497                forward: cam.forward,
1498                _pad3: 0.0,
1499            };
1500        }
1501        let uniform = SceneDdaUniform {
1502            fov_y_rad,
1503            grid_count: scene.grid_count,
1504            max_outer_steps,
1505            _pad0: 0,
1506            screen_size: [surface_w, surface_h],
1507            _pad1: [0; 2],
1508            cameras: cam_array,
1509            fog_color: [
1510                self.fog_color[0],
1511                self.fog_color[1],
1512                self.fog_color[2],
1513                self.fog_near,
1514            ],
1515            fog_far: self.fog_far,
1516            write_depth: u32::from(self.sprite_registry.is_some()),
1517            occ_page_words: scene.occupancy_page_words,
1518            occ_num_pages: scene.occupancy_num_pages,
1519            mip_scan_dist: self.scene_mip_scan_dist,
1520            _pad2: 0,
1521            _pad3: 0,
1522            _pad4: 0,
1523            // Sky direction comes from the world (sprite) camera, so a
1524            // grid-less sprite-only scene still paints a real sky.
1525            sky_cam: SceneDdaPerGridCamera::from_camera(sprite_camera),
1526            side_shades0: self.scene_side_shades[0],
1527            side_shades1: self.scene_side_shades[1],
1528        };
1529        self.queue
1530            .write_buffer(&dda.uniform_buf, 0, bytemuck::bytes_of(&uniform));
1531
1532        let dda_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1533            label: Some("roxlap-gpu scene_dda.bg"),
1534            layout: &dda.bgl_dda,
1535            entries: &[
1536                wgpu::BindGroupEntry {
1537                    binding: 0,
1538                    resource: dda.uniform_buf.as_entire_binding(),
1539                },
1540                // Occupancy page 0 at binding 1; pages 1..MAX_OCC_PAGES
1541                // at bindings 12.. (see GPU.X occupancy paging).
1542                wgpu::BindGroupEntry {
1543                    binding: 1,
1544                    resource: scene.occupancy_pages[0].as_entire_binding(),
1545                },
1546                wgpu::BindGroupEntry {
1547                    binding: 2,
1548                    resource: scene.all_color_offsets.as_entire_binding(),
1549                },
1550                wgpu::BindGroupEntry {
1551                    binding: 3,
1552                    resource: scene.all_colors.as_entire_binding(),
1553                },
1554                wgpu::BindGroupEntry {
1555                    binding: 4,
1556                    resource: scene.all_chunk_colors_base.as_entire_binding(),
1557                },
1558                wgpu::BindGroupEntry {
1559                    binding: 5,
1560                    resource: scene.all_chunk_occupancy.as_entire_binding(),
1561                },
1562                wgpu::BindGroupEntry {
1563                    binding: 6,
1564                    resource: scene.grid_static_meta.as_entire_binding(),
1565                },
1566                wgpu::BindGroupEntry {
1567                    binding: 7,
1568                    resource: scene.all_slot_chunk_idx.as_entire_binding(),
1569                },
1570                wgpu::BindGroupEntry {
1571                    binding: 8,
1572                    resource: wgpu::BindingResource::TextureView(&dda.storage_view),
1573                },
1574                wgpu::BindGroupEntry {
1575                    binding: 9,
1576                    resource: wgpu::BindingResource::TextureView(&self.sky_view),
1577                },
1578                wgpu::BindGroupEntry {
1579                    binding: 10,
1580                    resource: wgpu::BindingResource::Sampler(&self.sky_sampler),
1581                },
1582                wgpu::BindGroupEntry {
1583                    binding: 11,
1584                    resource: dda.depth_buffer.as_entire_binding(),
1585                },
1586                wgpu::BindGroupEntry {
1587                    binding: 12,
1588                    resource: scene.occupancy_pages[1].as_entire_binding(),
1589                },
1590                wgpu::BindGroupEntry {
1591                    binding: 13,
1592                    resource: scene.occupancy_pages[2].as_entire_binding(),
1593                },
1594                wgpu::BindGroupEntry {
1595                    binding: 14,
1596                    resource: scene.occupancy_pages[3].as_entire_binding(),
1597                },
1598            ],
1599        });
1600
1601        // GPU.9 — when sprites are present, build both splatter bind
1602        // groups up front (the splat pass writes the key buffer; the
1603        // resolve pass reads keys + scene depth and writes colour).
1604        // GPU.10.3 — model-DDA bind group + per-frame uniform, using the
1605        // cull/bin results captured above. Per-model + per-instance data
1606        // + the tile lists live in the registry buffers.
1607        let sprite_model_bg = match (&self.sprite_model_dda, &self.sprite_registry, sprite_pass) {
1608            (Some(smd), Some(reg), Some((visible, tiles_x))) => {
1609                // World camera (see the cull pass above) — sprites
1610                // project through it regardless of grid 0's transform.
1611                let cam = sprite_camera;
1612                let uni = SpriteModelUniform {
1613                    cam_pos: cam.position,
1614                    _p0: 0.0,
1615                    cam_right: cam.right,
1616                    _p1: 0.0,
1617                    cam_down: cam.down,
1618                    _p2: 0.0,
1619                    cam_forward: cam.forward,
1620                    _p3: 0.0,
1621                    fog_color: [
1622                        self.fog_color[0],
1623                        self.fog_color[1],
1624                        self.fog_color[2],
1625                        self.fog_near,
1626                    ],
1627                    screen_size: [surface_w, surface_h],
1628                    instance_count: visible,
1629                    fog_far: self.fog_far,
1630                    fov_y_rad,
1631                    tiles_x,
1632                    tile_size: SPRITE_TILE_SIZE,
1633                    _p6: 0.0,
1634                };
1635                self.queue
1636                    .write_buffer(&smd.uniform_buf, 0, bytemuck::bytes_of(&uni));
1637                Some(self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1638                    label: Some("roxlap-gpu sprite_model_dda.bg"),
1639                    layout: &smd.bgl,
1640                    entries: &[
1641                        wgpu::BindGroupEntry {
1642                            binding: 0,
1643                            resource: smd.uniform_buf.as_entire_binding(),
1644                        },
1645                        wgpu::BindGroupEntry {
1646                            binding: 1,
1647                            resource: reg.occupancy.as_entire_binding(),
1648                        },
1649                        wgpu::BindGroupEntry {
1650                            binding: 2,
1651                            resource: reg.colors.as_entire_binding(),
1652                        },
1653                        wgpu::BindGroupEntry {
1654                            binding: 3,
1655                            resource: reg.color_offsets.as_entire_binding(),
1656                        },
1657                        wgpu::BindGroupEntry {
1658                            binding: 4,
1659                            resource: reg.model_meta.as_entire_binding(),
1660                        },
1661                        wgpu::BindGroupEntry {
1662                            binding: 5,
1663                            resource: reg.instances.as_entire_binding(),
1664                        },
1665                        wgpu::BindGroupEntry {
1666                            binding: 6,
1667                            resource: dda.depth_buffer.as_entire_binding(),
1668                        },
1669                        wgpu::BindGroupEntry {
1670                            binding: 7,
1671                            resource: wgpu::BindingResource::TextureView(&dda.storage_view),
1672                        },
1673                        wgpu::BindGroupEntry {
1674                            binding: 8,
1675                            resource: reg.tile_ranges.as_entire_binding(),
1676                        },
1677                        wgpu::BindGroupEntry {
1678                            binding: 9,
1679                            resource: reg.tile_instances.as_entire_binding(),
1680                        },
1681                        wgpu::BindGroupEntry {
1682                            binding: 10,
1683                            resource: reg.dirs.as_entire_binding(),
1684                        },
1685                        wgpu::BindGroupEntry {
1686                            binding: 11,
1687                            resource: reg.colmul.as_entire_binding(),
1688                        },
1689                    ],
1690                }))
1691            }
1692            _ => None,
1693        };
1694
1695        let mut encoder = self
1696            .device
1697            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
1698                label: Some("roxlap-gpu scene encoder"),
1699            });
1700        {
1701            let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
1702                label: Some("roxlap-gpu scene_dda compute"),
1703                timestamp_writes: None,
1704            });
1705            cpass.set_pipeline(&dda.pipeline_dda);
1706            cpass.set_bind_group(0, &dda_bg, &[]);
1707            cpass.dispatch_workgroups(surface_w.div_ceil(8), surface_h.div_ceil(8), 1);
1708        }
1709        // GPU.10 — sprite model-DDA pass: one thread per pixel marches
1710        // the tile's instances + composites against scene depth, after
1711        // the scene pass wrote the depth buffer and before the blit.
1712        if let (Some(smd), Some(bg)) = (&self.sprite_model_dda, &sprite_model_bg) {
1713            let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
1714                label: Some("roxlap-gpu sprite_model_dda"),
1715                timestamp_writes: None,
1716            });
1717            cpass.set_pipeline(&smd.pipeline);
1718            cpass.set_bind_group(0, bg, &[]);
1719            cpass.dispatch_workgroups(surface_w.div_ceil(8), surface_h.div_ceil(8), 1);
1720        }
1721        {
1722            let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
1723                label: Some("roxlap-gpu scene_dda blit"),
1724                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
1725                    view: &surf_view,
1726                    resolve_target: None,
1727                    ops: wgpu::Operations {
1728                        load: wgpu::LoadOp::Clear(wgpu::Color::BLACK),
1729                        store: wgpu::StoreOp::Store,
1730                    },
1731                })],
1732                depth_stencil_attachment: None,
1733                timestamp_writes: None,
1734                occlusion_query_set: None,
1735            });
1736            rpass.set_pipeline(&dda.pipeline_blit);
1737            rpass.set_bind_group(0, &dda.blit_bg, &[]);
1738            rpass.draw(0..3, 0..1);
1739        }
1740        self.queue.submit(std::iter::once(encoder.finish()));
1741        // Deferred present — the host calls `present` or `paint_egui`.
1742        self.pending_frame = Some((surf_tex, surf_view));
1743        self.frame_count = self.frame_count.wrapping_add(1);
1744    }
1745
1746    /// Like [`Self::render`] (clear to colour) but **deferred**: stashes
1747    /// the frame for [`Self::present`] / [`Self::paint_egui`] instead of
1748    /// presenting. The facade uses this before any grid is resident so a
1749    /// HUD can still be painted over an empty scene.
1750    pub fn render_clear_deferred(&mut self) {
1751        self.pending_frame = None;
1752        let surf_tex = match self.surface.get_current_texture() {
1753            Ok(t) => t,
1754            Err(wgpu::SurfaceError::Outdated | wgpu::SurfaceError::Lost) => {
1755                self.surface.configure(&self.device, &self.surface_config);
1756                return;
1757            }
1758            Err(e) => {
1759                eprintln!("roxlap-gpu surface error: {e:?}");
1760                return;
1761            }
1762        };
1763        let view = surf_tex
1764            .texture
1765            .create_view(&wgpu::TextureViewDescriptor::default());
1766        let [r, g, b] = self.clear_colour;
1767        let mut encoder = self
1768            .device
1769            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
1770                label: Some("roxlap-gpu clear (deferred)"),
1771            });
1772        {
1773            let _rp = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
1774                label: Some("roxlap-gpu clear (deferred)"),
1775                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
1776                    view: &view,
1777                    resolve_target: None,
1778                    ops: wgpu::Operations {
1779                        load: wgpu::LoadOp::Clear(wgpu::Color { r, g, b, a: 1.0 }),
1780                        store: wgpu::StoreOp::Store,
1781                    },
1782                })],
1783                depth_stencil_attachment: None,
1784                timestamp_writes: None,
1785                occlusion_query_set: None,
1786            });
1787        }
1788        self.queue.submit(std::iter::once(encoder.finish()));
1789        self.pending_frame = Some((surf_tex, view));
1790    }
1791
1792    /// Present the frame stashed by the last deferred render
1793    /// ([`Self::render_scene`] / [`Self::render_clear_deferred`]). No-op
1794    /// if nothing is pending (e.g. the surface was lost mid-render).
1795    pub fn present(&mut self) {
1796        if let Some((surf_tex, _view)) = self.pending_frame.take() {
1797            surf_tex.present();
1798        }
1799    }
1800
1801    /// Overlay an `egui` UI on the pending frame, then present it
1802    /// (`hud` feature). `jobs` are the host's tessellated primitives
1803    /// (`egui::Context::tessellate`), `textures` the per-frame texture
1804    /// delta from `egui::FullOutput`, `pixels_per_point` the UI scale.
1805    ///
1806    /// Draws with `LoadOp::Load` over the marcher's frame (a separate
1807    /// encoder submitted after the scene's), so the UI composites on top
1808    /// of the world. No-op if no frame is pending.
1809    #[cfg(feature = "hud")]
1810    pub fn paint_egui(
1811        &mut self,
1812        jobs: &[egui::ClippedPrimitive],
1813        textures: &egui::TexturesDelta,
1814        pixels_per_point: f32,
1815    ) {
1816        let Some((surf_tex, surf_view)) = self.pending_frame.take() else {
1817            return;
1818        };
1819        let format = self.surface_config.format;
1820        let egui_rend = self
1821            .egui_renderer
1822            .get_or_insert_with(|| egui_wgpu::Renderer::new(&self.device, format, None, 1, false));
1823
1824        let screen = egui_wgpu::ScreenDescriptor {
1825            size_in_pixels: [self.surface_config.width, self.surface_config.height],
1826            pixels_per_point,
1827        };
1828        for (id, delta) in &textures.set {
1829            egui_rend.update_texture(&self.device, &self.queue, *id, delta);
1830        }
1831        let mut encoder = self
1832            .device
1833            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
1834                label: Some("roxlap-gpu egui"),
1835            });
1836        let user_bufs =
1837            egui_rend.update_buffers(&self.device, &self.queue, &mut encoder, jobs, &screen);
1838        {
1839            // `LoadOp::Load` keeps the marcher's frame; egui draws over it.
1840            let mut pass = encoder
1841                .begin_render_pass(&wgpu::RenderPassDescriptor {
1842                    label: Some("roxlap-gpu egui paint"),
1843                    color_attachments: &[Some(wgpu::RenderPassColorAttachment {
1844                        view: &surf_view,
1845                        resolve_target: None,
1846                        ops: wgpu::Operations {
1847                            load: wgpu::LoadOp::Load,
1848                            store: wgpu::StoreOp::Store,
1849                        },
1850                    })],
1851                    depth_stencil_attachment: None,
1852                    timestamp_writes: None,
1853                    occlusion_query_set: None,
1854                })
1855                // egui-wgpu 0.29 requires a `'static` pass (see its docs).
1856                .forget_lifetime();
1857            egui_rend.render(&mut pass, jobs, &screen);
1858        }
1859        for id in &textures.free {
1860            egui_rend.free_texture(id);
1861        }
1862        self.queue.submit(
1863            user_bufs
1864                .into_iter()
1865                .chain(std::iter::once(encoder.finish())),
1866        );
1867        surf_tex.present();
1868    }
1869
1870    fn build_scene_dda(
1871        &self,
1872        width: u32,
1873        height: u32,
1874        surface_format: wgpu::TextureFormat,
1875    ) -> SceneDdaResources {
1876        let storage_tex = self.device.create_texture(&wgpu::TextureDescriptor {
1877            label: Some("roxlap-gpu scene_dda.storage"),
1878            size: wgpu::Extent3d {
1879                width,
1880                height,
1881                depth_or_array_layers: 1,
1882            },
1883            mip_level_count: 1,
1884            sample_count: 1,
1885            dimension: wgpu::TextureDimension::D2,
1886            format: wgpu::TextureFormat::Rgba8Unorm,
1887            usage: wgpu::TextureUsages::STORAGE_BINDING | wgpu::TextureUsages::TEXTURE_BINDING,
1888            view_formats: &[],
1889        });
1890        let storage_view = storage_tex.create_view(&wgpu::TextureViewDescriptor::default());
1891
1892        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
1893            label: Some("roxlap-gpu scene_dda.uniform"),
1894            size: std::mem::size_of::<SceneDdaUniform>() as u64,
1895            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
1896            mapped_at_creation: false,
1897        });
1898
1899        // GPU.9 — per-pixel world-t depth (f32 bits as u32). Sized to
1900        // the storage texture; written by the scene pass when sprites
1901        // are active, read+tested by the sprite splatter.
1902        let depth_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
1903            label: Some("roxlap-gpu scene_dda.depth"),
1904            size: u64::from(width) * u64::from(height) * 4,
1905            // COPY_SRC so `read_depth_pixel` can stage it for picking.
1906            usage: wgpu::BufferUsages::STORAGE
1907                | wgpu::BufferUsages::COPY_DST
1908                | wgpu::BufferUsages::COPY_SRC,
1909            mapped_at_creation: false,
1910        });
1911        let depth_readback = self.device.create_buffer(&wgpu::BufferDescriptor {
1912            label: Some("roxlap-gpu scene_dda.depth_readback"),
1913            size: u64::from(width) * u64::from(height) * 4,
1914            usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
1915            mapped_at_creation: false,
1916        });
1917        let dda_shader = self
1918            .device
1919            .create_shader_module(wgpu::ShaderModuleDescriptor {
1920                label: Some("scene_dda.wgsl"),
1921                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/scene_dda.wgsl").into()),
1922            });
1923        let bgl_dda = self
1924            .device
1925            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
1926                label: Some("roxlap-gpu scene_dda.bgl"),
1927                entries: &[
1928                    bgl_uniform_entry(0),
1929                    bgl_storage_entry(1, true),
1930                    bgl_storage_entry(2, true),
1931                    bgl_storage_entry(3, true),
1932                    bgl_storage_entry(4, true),
1933                    bgl_storage_entry(5, true),
1934                    bgl_storage_entry(6, true),
1935                    bgl_storage_entry(7, true),
1936                    wgpu::BindGroupLayoutEntry {
1937                        binding: 8,
1938                        visibility: wgpu::ShaderStages::COMPUTE,
1939                        ty: wgpu::BindingType::StorageTexture {
1940                            access: wgpu::StorageTextureAccess::WriteOnly,
1941                            format: wgpu::TextureFormat::Rgba8Unorm,
1942                            view_dimension: wgpu::TextureViewDimension::D2,
1943                        },
1944                        count: None,
1945                    },
1946                    // GPU.8 sky panorama + sampler.
1947                    wgpu::BindGroupLayoutEntry {
1948                        binding: 9,
1949                        visibility: wgpu::ShaderStages::COMPUTE,
1950                        ty: wgpu::BindingType::Texture {
1951                            sample_type: wgpu::TextureSampleType::Float { filterable: true },
1952                            view_dimension: wgpu::TextureViewDimension::D2,
1953                            multisampled: false,
1954                        },
1955                        count: None,
1956                    },
1957                    wgpu::BindGroupLayoutEntry {
1958                        binding: 10,
1959                        visibility: wgpu::ShaderStages::COMPUTE,
1960                        ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
1961                        count: None,
1962                    },
1963                    // GPU.9 — read-write per-pixel depth buffer.
1964                    bgl_storage_entry(11, false),
1965                    // Occupancy pages 1..MAX_OCC_PAGES (page 0 is
1966                    // binding 1). Unused pages bind a dummy buffer.
1967                    bgl_storage_entry(12, true),
1968                    bgl_storage_entry(13, true),
1969                    bgl_storage_entry(14, true),
1970                ],
1971            });
1972        let dda_pl = self
1973            .device
1974            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
1975                label: Some("roxlap-gpu scene_dda.layout"),
1976                bind_group_layouts: &[&bgl_dda],
1977                push_constant_ranges: &[],
1978            });
1979        let pipeline_dda = self
1980            .device
1981            .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
1982                label: Some("roxlap-gpu scene_dda.pipeline"),
1983                layout: Some(&dda_pl),
1984                module: &dda_shader,
1985                entry_point: "render_scene",
1986                compilation_options: wgpu::PipelineCompilationOptions::default(),
1987                cache: None,
1988            });
1989
1990        let blit_shader = self
1991            .device
1992            .create_shader_module(wgpu::ShaderModuleDescriptor {
1993                label: Some("blit.wgsl"),
1994                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/blit.wgsl").into()),
1995            });
1996        let bgl_blit = self
1997            .device
1998            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
1999                label: Some("roxlap-gpu scene_dda.blit_bgl"),
2000                entries: &[
2001                    wgpu::BindGroupLayoutEntry {
2002                        binding: 0,
2003                        visibility: wgpu::ShaderStages::FRAGMENT,
2004                        ty: wgpu::BindingType::Texture {
2005                            sample_type: wgpu::TextureSampleType::Float { filterable: false },
2006                            view_dimension: wgpu::TextureViewDimension::D2,
2007                            multisampled: false,
2008                        },
2009                        count: None,
2010                    },
2011                    wgpu::BindGroupLayoutEntry {
2012                        binding: 1,
2013                        visibility: wgpu::ShaderStages::FRAGMENT,
2014                        ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::NonFiltering),
2015                        count: None,
2016                    },
2017                ],
2018            });
2019        let blit_pl = self
2020            .device
2021            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
2022                label: Some("roxlap-gpu scene_dda.blit_layout"),
2023                bind_group_layouts: &[&bgl_blit],
2024                push_constant_ranges: &[],
2025            });
2026        let pipeline_blit = self
2027            .device
2028            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
2029                label: Some("roxlap-gpu scene_dda.blit_pipeline"),
2030                layout: Some(&blit_pl),
2031                vertex: wgpu::VertexState {
2032                    module: &blit_shader,
2033                    entry_point: "vs_main",
2034                    compilation_options: wgpu::PipelineCompilationOptions::default(),
2035                    buffers: &[],
2036                },
2037                fragment: Some(wgpu::FragmentState {
2038                    module: &blit_shader,
2039                    entry_point: "fs_main",
2040                    compilation_options: wgpu::PipelineCompilationOptions::default(),
2041                    targets: &[Some(wgpu::ColorTargetState {
2042                        format: surface_format,
2043                        blend: None,
2044                        write_mask: wgpu::ColorWrites::ALL,
2045                    })],
2046                }),
2047                primitive: wgpu::PrimitiveState::default(),
2048                depth_stencil: None,
2049                multisample: wgpu::MultisampleState::default(),
2050                multiview: None,
2051                cache: None,
2052            });
2053        let sampler = self.device.create_sampler(&wgpu::SamplerDescriptor {
2054            label: Some("roxlap-gpu scene_dda.blit_sampler"),
2055            address_mode_u: wgpu::AddressMode::ClampToEdge,
2056            address_mode_v: wgpu::AddressMode::ClampToEdge,
2057            address_mode_w: wgpu::AddressMode::ClampToEdge,
2058            mag_filter: wgpu::FilterMode::Nearest,
2059            min_filter: wgpu::FilterMode::Nearest,
2060            mipmap_filter: wgpu::FilterMode::Nearest,
2061            ..Default::default()
2062        });
2063        let blit_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
2064            label: Some("roxlap-gpu scene_dda.blit_bg"),
2065            layout: &bgl_blit,
2066            entries: &[
2067                wgpu::BindGroupEntry {
2068                    binding: 0,
2069                    resource: wgpu::BindingResource::TextureView(&storage_view),
2070                },
2071                wgpu::BindGroupEntry {
2072                    binding: 1,
2073                    resource: wgpu::BindingResource::Sampler(&sampler),
2074                },
2075            ],
2076        });
2077
2078        SceneDdaResources {
2079            storage_size: (width, height),
2080            storage_view,
2081            uniform_buf,
2082            bgl_dda,
2083            pipeline_dda,
2084            blit_bg,
2085            pipeline_blit,
2086            _sampler: sampler,
2087            depth_buffer,
2088            depth_readback,
2089        }
2090    }
2091
2092    /// Read back the per-pixel world-t depth at window pixel `(x, y)`
2093    /// from the last rendered frame, for screen→world picking. Returns
2094    /// the distance `t` along the (normalised) view ray to the nearest
2095    /// scene-grid surface, so the host reconstructs the world hit as
2096    /// `cam.pos + t * normalize(ray_dir)`. `None` for out-of-bounds
2097    /// pixels, sky / no-hit (the `T_INF` sentinel), or when no scene
2098    /// frame has been rendered.
2099    ///
2100    /// The depth buffer is the SCENE pass's output (terrain + grids),
2101    /// untouched by the sprite pass (which reads it read-only), so a
2102    /// cursor sprite under the pointer does not occlude the pick.
2103    ///
2104    /// Synchronous: copies the depth buffer to a mapped staging buffer
2105    /// and blocks on `device.poll(Wait)`. Cheap enough for click-time
2106    /// picks; do not call it every frame.
2107    ///
2108    /// Requires the last frame to have written depth, which happens
2109    /// when sprites are present (`write_depth`). The pick demo always
2110    /// has a cursor sprite, so this holds.
2111    #[must_use]
2112    pub fn read_depth_pixel(&self, x: u32, y: u32) -> Option<f32> {
2113        let dda = self.scene_dda.as_ref()?;
2114        let (w, h) = dda.storage_size;
2115        if x >= w || y >= h {
2116            return None;
2117        }
2118        let mut enc = self
2119            .device
2120            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
2121                label: Some("roxlap-gpu depth readback"),
2122            });
2123        let size = u64::from(w) * u64::from(h) * 4;
2124        enc.copy_buffer_to_buffer(&dda.depth_buffer, 0, &dda.depth_readback, 0, size);
2125        self.queue.submit(std::iter::once(enc.finish()));
2126
2127        let slice = dda.depth_readback.slice(..);
2128        let (tx, rx) = std::sync::mpsc::channel();
2129        slice.map_async(wgpu::MapMode::Read, move |r| {
2130            let _ = tx.send(r);
2131        });
2132        self.device.poll(wgpu::Maintain::Wait);
2133        rx.recv().ok()?.ok()?;
2134
2135        let t = {
2136            let data = slice.get_mapped_range();
2137            let idx = ((y * w + x) * 4) as usize;
2138            let bytes: [u8; 4] = data[idx..idx + 4].try_into().ok()?;
2139            f32::from_le_bytes(bytes)
2140        };
2141        dda.depth_readback.unmap();
2142
2143        // Reject sky / no-hit (T_INF == 1e30 in the shader) + non-finite.
2144        if !t.is_finite() || t >= 1.0e29 {
2145            return None;
2146        }
2147        Some(t)
2148    }
2149
2150    /// World-space view-ray direction (un-normalised) for window pixel
2151    /// `(x, y)`, under the GPU marcher's projection — the canonical GPU
2152    /// unproject, mirroring `scene_dda.wgsl`'s `render_scene`
2153    /// (vertical-FOV pinhole). Uses the last-rendered frame's target
2154    /// size + FOV; `None` before the first scene render. Pair with
2155    /// [`Self::read_depth_pixel`] for screen→world picking.
2156    #[must_use]
2157    pub fn pixel_ray(
2158        &self,
2159        right: [f64; 3],
2160        down: [f64; 3],
2161        forward: [f64; 3],
2162        x: f64,
2163        y: f64,
2164    ) -> Option<[f64; 3]> {
2165        let dda = self.scene_dda.as_ref()?;
2166        let (w, h) = dda.storage_size;
2167        if w == 0 || h == 0 || self.last_fov_y_rad <= 0.0 {
2168            return None;
2169        }
2170        Some(pinhole_pixel_ray(
2171            right,
2172            down,
2173            forward,
2174            x,
2175            y,
2176            f64::from(w),
2177            f64::from(h),
2178            f64::from(self.last_fov_y_rad),
2179        ))
2180    }
2181
2182    /// GPU.10.1 — upload a sprite model registry + its instances for
2183    /// the DDA path. An empty instance slice clears all sprites.
2184    pub fn set_sprite_instances(
2185        &mut self,
2186        registry: &sprite_model::SpriteModelRegistry,
2187        instances: &[sprite_model::SpriteInstance],
2188    ) {
2189        if instances.is_empty() {
2190            self.sprite_registry = None;
2191            return;
2192        }
2193        self.sprite_registry = Some(sprite_model::SpriteRegistryResident::upload(
2194            &self.device,
2195            registry,
2196            instances,
2197        ));
2198    }
2199
2200    /// Re-pose the already-resident sprite instances in place (no model
2201    /// volume re-upload) — the cheap per-frame path for animated KFA
2202    /// limbs. `instances` must match the last [`Self::set_sprite_instances`]
2203    /// in length + order. No-op if no sprite registry is resident.
2204    pub fn update_sprite_instance_transforms(
2205        &mut self,
2206        instances: &[sprite_model::SpriteInstance],
2207    ) {
2208        if let Some(reg) = self.sprite_registry.as_mut() {
2209            reg.update_transforms(instances);
2210        }
2211    }
2212
2213    /// Set the per-instance `kv6colmul[256]` lighting tables (voxlap's
2214    /// `update_reflects` output, e.g. via `roxlap_core::sprite::
2215    /// sprite_colmul`), in the same order/length as the last
2216    /// [`Self::set_sprite_instances`]. The GPU sprite pass modulates each
2217    /// voxel by its surface normal's entry — matching the CPU rasteriser.
2218    /// No-op if no sprite registry is resident.
2219    pub fn set_sprite_instance_colmul(&mut self, tables: &[[u64; 256]]) {
2220        if let Some(reg) = self.sprite_registry.as_mut() {
2221            reg.set_instance_colmul(tables);
2222        }
2223    }
2224
2225    /// GPU.10.4 — set the LOD pixel threshold: a sprite steps to the
2226    /// next mip once a mip-0 voxel would project below `px` screen
2227    /// pixels. `1.0` is the natural "no sub-pixel voxels" default;
2228    /// larger values force LOD in closer (useful for inspection).
2229    /// Clamped to ≥ 0.25.
2230    pub fn set_sprite_lod_px(&mut self, px: f32) {
2231        self.sprite_lod_px = px.max(0.25);
2232    }
2233
2234    /// GPU.11.1 — set the scene-grid LOD scan distance (world units).
2235    /// A chunk entered at world-t `t` is marched at mip
2236    /// `floor(log2(max(t, msd) / msd))`, clamped to its grid's mip
2237    /// ladder. `0` disables LOD (always mip-0). Larger values push
2238    /// the coarser mips farther out — the axis-aligned-mip-beams
2239    /// mitigation lever (GPU.11.2). Default 64 (matches CPU
2240    /// `mip_scan_dist`).
2241    pub fn set_scene_mip_scan_dist(&mut self, dist: f32) {
2242        self.scene_mip_scan_dist = dist.max(0.0);
2243    }
2244
2245    /// Set per-face grid side-shading — voxlap's
2246    /// `setsideshades(top, bot, left, right, up, down)`. Each value is
2247    /// subtracted (as a u8, matching the CPU `gcsub` high byte) from a
2248    /// hit voxel's brightness byte before shading, so the scene-DDA pass
2249    /// darkens grid faces the same way the CPU rasteriser does. `[0; 6]`
2250    /// disables it (the default). The hit face is taken from the DDA's
2251    /// last-stepped axis + ray direction.
2252    pub fn set_scene_side_shades(&mut self, s: [i8; 6]) {
2253        // Reinterpret each i8 as u8 (voxlap stamps `sxx` into gcsub's
2254        // high byte verbatim), then pack (top, bot, left, right) /
2255        // (up, down, 0, 0) for the two uniform vec4s.
2256        let v = |i: usize| i32::from(s[i] as u8);
2257        self.scene_side_shades = [[v(0), v(1), v(2), v(3)], [v(4), v(5), 0, 0]];
2258    }
2259
2260    /// GPU.10.1 — build the instanced model-DDA pipeline (one thread
2261    /// per pixel). Lazily invoked the first frame a registry is present.
2262    fn build_sprite_model_dda(&self) -> SpriteModelDdaResources {
2263        let shader = self
2264            .device
2265            .create_shader_module(wgpu::ShaderModuleDescriptor {
2266                label: Some("sprite_model_dda.wgsl"),
2267                source: wgpu::ShaderSource::Wgsl(
2268                    include_str!("../shaders/sprite_model_dda.wgsl").into(),
2269                ),
2270            });
2271        let bgl = self
2272            .device
2273            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
2274                label: Some("roxlap-gpu sprite_model_dda.bgl"),
2275                entries: &[
2276                    bgl_uniform_entry(0),
2277                    bgl_storage_entry(1, true), // occupancy
2278                    bgl_storage_entry(2, true), // colors
2279                    bgl_storage_entry(3, true), // color_offsets
2280                    bgl_storage_entry(4, true), // model_meta
2281                    bgl_storage_entry(5, true), // instances
2282                    bgl_storage_entry(6, true), // scene depth
2283                    wgpu::BindGroupLayoutEntry {
2284                        binding: 7,
2285                        visibility: wgpu::ShaderStages::COMPUTE,
2286                        ty: wgpu::BindingType::StorageTexture {
2287                            access: wgpu::StorageTextureAccess::WriteOnly,
2288                            format: wgpu::TextureFormat::Rgba8Unorm,
2289                            view_dimension: wgpu::TextureViewDimension::D2,
2290                        },
2291                        count: None,
2292                    },
2293                    bgl_storage_entry(8, true),  // tile_ranges
2294                    bgl_storage_entry(9, true),  // tile_instances
2295                    bgl_storage_entry(10, true), // per-voxel dir
2296                    bgl_storage_entry(11, true), // per-instance kv6colmul
2297                ],
2298            });
2299        let pl = self
2300            .device
2301            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
2302                label: Some("roxlap-gpu sprite_model_dda.layout"),
2303                bind_group_layouts: &[&bgl],
2304                push_constant_ranges: &[],
2305            });
2306        let pipeline = self
2307            .device
2308            .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
2309                label: Some("roxlap-gpu sprite_model_dda.pipeline"),
2310                layout: Some(&pl),
2311                module: &shader,
2312                entry_point: "march",
2313                compilation_options: wgpu::PipelineCompilationOptions::default(),
2314                cache: None,
2315            });
2316        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
2317            label: Some("roxlap-gpu sprite_model_dda.uniform"),
2318            size: std::mem::size_of::<SpriteModelUniform>() as u64,
2319            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
2320            mapped_at_creation: false,
2321        });
2322        SpriteModelDdaResources {
2323            bgl,
2324            pipeline,
2325            uniform_buf,
2326        }
2327    }
2328}
2329
2330/// GPU.11 — headless scene-DDA renderer for tests + offline visual
2331/// gates. Owns the `scene_dda.wgsl` compute pipeline with no surface
2332/// and no blit pass; renders a [`GpuSceneResident`] to an in-memory
2333/// RGBA framebuffer via texture readback. The per-substage visual
2334/// gate (render reference scenes, diff PPMs) and the GPU.11.1 mip
2335/// render-diff both ride on this.
2336pub struct HeadlessSceneRenderer {
2337    width: u32,
2338    height: u32,
2339    output_tex: wgpu::Texture,
2340    output_view: wgpu::TextureView,
2341    depth_buffer: wgpu::Buffer,
2342    uniform_buf: wgpu::Buffer,
2343    _sky_texture: wgpu::Texture,
2344    sky_view: wgpu::TextureView,
2345    sky_sampler: wgpu::Sampler,
2346    bgl: wgpu::BindGroupLayout,
2347    pipeline: wgpu::ComputePipeline,
2348    readback: wgpu::Buffer,
2349    padded_bytes_per_row: u32,
2350    /// Per-face side-shades for the gate render (default none). Packed
2351    /// `[(top,bot,left,right), (up,down,_,_)]`; set via
2352    /// [`Self::set_side_shades`].
2353    side_shades: [[i32; 4]; 2],
2354}
2355
2356impl HeadlessSceneRenderer {
2357    /// Build the compute pipeline + output/readback resources for a
2358    /// `width × height` framebuffer. Validates `scene_dda.wgsl` and
2359    /// the [`scene::GridStaticMeta`] std430 layout at pipeline /
2360    /// bind-group time.
2361    #[must_use]
2362    pub fn new(device: &wgpu::Device, queue: &wgpu::Queue, width: u32, height: u32) -> Self {
2363        let output_tex = device.create_texture(&wgpu::TextureDescriptor {
2364            label: Some("roxlap-gpu headless.output"),
2365            size: wgpu::Extent3d {
2366                width,
2367                height,
2368                depth_or_array_layers: 1,
2369            },
2370            mip_level_count: 1,
2371            sample_count: 1,
2372            dimension: wgpu::TextureDimension::D2,
2373            format: wgpu::TextureFormat::Rgba8Unorm,
2374            usage: wgpu::TextureUsages::STORAGE_BINDING | wgpu::TextureUsages::COPY_SRC,
2375            view_formats: &[],
2376        });
2377        let output_view = output_tex.create_view(&wgpu::TextureViewDescriptor::default());
2378
2379        let uniform_buf = device.create_buffer(&wgpu::BufferDescriptor {
2380            label: Some("roxlap-gpu headless.uniform"),
2381            size: std::mem::size_of::<SceneDdaUniform>() as u64,
2382            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
2383            mapped_at_creation: false,
2384        });
2385        let depth_buffer = device.create_buffer(&wgpu::BufferDescriptor {
2386            label: Some("roxlap-gpu headless.depth"),
2387            size: u64::from(width) * u64::from(height) * 4,
2388            usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
2389            mapped_at_creation: false,
2390        });
2391
2392        let default_sky_pixel = [120u8, 150, 220, 255];
2393        let (sky_texture, sky_view) = create_sky_texture(device, 1, 1, &default_sky_pixel);
2394        // Upload the default sky texel (create_sky_texture only allocates
2395        // — the texel must be written or the shader samples black, which
2396        // is why a grid-less headless render came back black).
2397        queue.write_texture(
2398            wgpu::ImageCopyTexture {
2399                texture: &sky_texture,
2400                mip_level: 0,
2401                origin: wgpu::Origin3d::ZERO,
2402                aspect: wgpu::TextureAspect::All,
2403            },
2404            &default_sky_pixel,
2405            wgpu::ImageDataLayout {
2406                offset: 0,
2407                bytes_per_row: Some(4),
2408                rows_per_image: Some(1),
2409            },
2410            wgpu::Extent3d {
2411                width: 1,
2412                height: 1,
2413                depth_or_array_layers: 1,
2414            },
2415        );
2416        let sky_sampler = device.create_sampler(&wgpu::SamplerDescriptor {
2417            label: Some("roxlap-gpu headless.sky_sampler"),
2418            address_mode_u: wgpu::AddressMode::Repeat,
2419            address_mode_v: wgpu::AddressMode::Repeat,
2420            mag_filter: wgpu::FilterMode::Linear,
2421            min_filter: wgpu::FilterMode::Linear,
2422            ..Default::default()
2423        });
2424
2425        let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
2426            label: Some("scene_dda.wgsl (headless)"),
2427            source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/scene_dda.wgsl").into()),
2428        });
2429        let bgl = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
2430            label: Some("roxlap-gpu headless.bgl"),
2431            entries: &[
2432                bgl_uniform_entry(0),
2433                bgl_storage_entry(1, true),
2434                bgl_storage_entry(2, true),
2435                bgl_storage_entry(3, true),
2436                bgl_storage_entry(4, true),
2437                bgl_storage_entry(5, true),
2438                bgl_storage_entry(6, true),
2439                bgl_storage_entry(7, true),
2440                wgpu::BindGroupLayoutEntry {
2441                    binding: 8,
2442                    visibility: wgpu::ShaderStages::COMPUTE,
2443                    ty: wgpu::BindingType::StorageTexture {
2444                        access: wgpu::StorageTextureAccess::WriteOnly,
2445                        format: wgpu::TextureFormat::Rgba8Unorm,
2446                        view_dimension: wgpu::TextureViewDimension::D2,
2447                    },
2448                    count: None,
2449                },
2450                wgpu::BindGroupLayoutEntry {
2451                    binding: 9,
2452                    visibility: wgpu::ShaderStages::COMPUTE,
2453                    ty: wgpu::BindingType::Texture {
2454                        sample_type: wgpu::TextureSampleType::Float { filterable: true },
2455                        view_dimension: wgpu::TextureViewDimension::D2,
2456                        multisampled: false,
2457                    },
2458                    count: None,
2459                },
2460                wgpu::BindGroupLayoutEntry {
2461                    binding: 10,
2462                    visibility: wgpu::ShaderStages::COMPUTE,
2463                    ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
2464                    count: None,
2465                },
2466                bgl_storage_entry(11, false),
2467                bgl_storage_entry(12, true),
2468                bgl_storage_entry(13, true),
2469                bgl_storage_entry(14, true),
2470            ],
2471        });
2472        let pl = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
2473            label: Some("roxlap-gpu headless.layout"),
2474            bind_group_layouts: &[&bgl],
2475            push_constant_ranges: &[],
2476        });
2477        let pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
2478            label: Some("roxlap-gpu headless.pipeline"),
2479            layout: Some(&pl),
2480            module: &shader,
2481            entry_point: "render_scene",
2482            compilation_options: wgpu::PipelineCompilationOptions::default(),
2483            cache: None,
2484        });
2485
2486        // Readback buffer: row pitch must be 256-aligned for
2487        // copy_texture_to_buffer.
2488        let padded_bytes_per_row = (width * 4).div_ceil(256) * 256;
2489        let readback = device.create_buffer(&wgpu::BufferDescriptor {
2490            label: Some("roxlap-gpu headless.readback"),
2491            size: u64::from(padded_bytes_per_row) * u64::from(height),
2492            usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
2493            mapped_at_creation: false,
2494        });
2495
2496        Self {
2497            width,
2498            height,
2499            output_tex,
2500            output_view,
2501            depth_buffer,
2502            uniform_buf,
2503            _sky_texture: sky_texture,
2504            sky_view,
2505            sky_sampler,
2506            bgl,
2507            pipeline,
2508            readback,
2509            padded_bytes_per_row,
2510            side_shades: [[0; 4]; 2],
2511        }
2512    }
2513
2514    /// Set per-face side-shades for subsequent [`Self::render`] calls —
2515    /// voxlap `setsideshades(top, bot, left, right, up, down)`, each an
2516    /// i8 stamped as u8 (matching the engine path). Lets the gate test
2517    /// the GPU side-shade darkening.
2518    pub fn set_side_shades(&mut self, s: [i8; 6]) {
2519        let v = |i: usize| i32::from(s[i] as u8);
2520        self.side_shades = [[v(0), v(1), v(2), v(3)], [v(4), v(5), 0, 0]];
2521    }
2522
2523    /// Render `scene` from `cameras` (one per grid) and read the
2524    /// framebuffer back as `width*height` packed `0xAABBGGRR` pixels
2525    /// (R in the low byte). Fog is disabled. `mip_scan_dist` drives
2526    /// the GPU.11.1 scene-grid LOD (`0` = always mip-0). Blocks on
2527    /// readback.
2528    ///
2529    /// # Panics
2530    /// If `cameras.len() != scene.grid_count`.
2531    #[must_use]
2532    #[allow(clippy::too_many_arguments)]
2533    pub fn render(
2534        &self,
2535        device: &wgpu::Device,
2536        queue: &wgpu::Queue,
2537        scene: &GpuSceneResident,
2538        cameras: &[Camera],
2539        fov_y_rad: f32,
2540        max_outer_steps: u32,
2541        mip_scan_dist: f32,
2542    ) -> Vec<u32> {
2543        assert_eq!(
2544            cameras.len(),
2545            scene.grid_count as usize,
2546            "headless render: {} cameras for {} grids",
2547            cameras.len(),
2548            scene.grid_count,
2549        );
2550
2551        let mut cam_array = [SceneDdaPerGridCamera::zeroed(); SCENE_MAX_GRIDS];
2552        for (i, cam) in cameras.iter().enumerate() {
2553            cam_array[i] = SceneDdaPerGridCamera {
2554                pos: cam.position,
2555                _pad0: 0.0,
2556                right: cam.right,
2557                _pad1: 0.0,
2558                down: cam.down,
2559                _pad2: 0.0,
2560                forward: cam.forward,
2561                _pad3: 0.0,
2562            };
2563        }
2564        let uniform = SceneDdaUniform {
2565            fov_y_rad,
2566            grid_count: scene.grid_count,
2567            max_outer_steps,
2568            _pad0: 0,
2569            screen_size: [self.width, self.height],
2570            _pad1: [0; 2],
2571            cameras: cam_array,
2572            // Fog off: near/far past any reachable t → factor 0.
2573            fog_color: [0.0, 0.0, 0.0, 1.0e29],
2574            fog_far: 1.0e30,
2575            write_depth: 0,
2576            occ_page_words: scene.occupancy_page_words,
2577            occ_num_pages: scene.occupancy_num_pages,
2578            mip_scan_dist,
2579            _pad2: 0,
2580            _pad3: 0,
2581            _pad4: 0,
2582            // Sky direction from the first grid camera (the world frame
2583            // in these tests); a default forward camera when there are
2584            // none (grid_count == 0) so the sky lookup stays valid.
2585            sky_cam: SceneDdaPerGridCamera::from_camera(&cameras.first().copied().unwrap_or(
2586                Camera {
2587                    position: [0.0; 3],
2588                    right: [1.0, 0.0, 0.0],
2589                    down: [0.0, 0.0, 1.0],
2590                    forward: [0.0, 1.0, 0.0],
2591                    fov_y_rad,
2592                },
2593            )),
2594            side_shades0: self.side_shades[0],
2595            side_shades1: self.side_shades[1],
2596        };
2597        queue.write_buffer(&self.uniform_buf, 0, bytemuck::bytes_of(&uniform));
2598
2599        let bg = device.create_bind_group(&wgpu::BindGroupDescriptor {
2600            label: Some("roxlap-gpu headless.bg"),
2601            layout: &self.bgl,
2602            entries: &[
2603                wgpu::BindGroupEntry {
2604                    binding: 0,
2605                    resource: self.uniform_buf.as_entire_binding(),
2606                },
2607                wgpu::BindGroupEntry {
2608                    binding: 1,
2609                    resource: scene.occupancy_pages[0].as_entire_binding(),
2610                },
2611                wgpu::BindGroupEntry {
2612                    binding: 2,
2613                    resource: scene.all_color_offsets.as_entire_binding(),
2614                },
2615                wgpu::BindGroupEntry {
2616                    binding: 3,
2617                    resource: scene.all_colors.as_entire_binding(),
2618                },
2619                wgpu::BindGroupEntry {
2620                    binding: 4,
2621                    resource: scene.all_chunk_colors_base.as_entire_binding(),
2622                },
2623                wgpu::BindGroupEntry {
2624                    binding: 5,
2625                    resource: scene.all_chunk_occupancy.as_entire_binding(),
2626                },
2627                wgpu::BindGroupEntry {
2628                    binding: 6,
2629                    resource: scene.grid_static_meta.as_entire_binding(),
2630                },
2631                wgpu::BindGroupEntry {
2632                    binding: 7,
2633                    resource: scene.all_slot_chunk_idx.as_entire_binding(),
2634                },
2635                wgpu::BindGroupEntry {
2636                    binding: 8,
2637                    resource: wgpu::BindingResource::TextureView(&self.output_view),
2638                },
2639                wgpu::BindGroupEntry {
2640                    binding: 9,
2641                    resource: wgpu::BindingResource::TextureView(&self.sky_view),
2642                },
2643                wgpu::BindGroupEntry {
2644                    binding: 10,
2645                    resource: wgpu::BindingResource::Sampler(&self.sky_sampler),
2646                },
2647                wgpu::BindGroupEntry {
2648                    binding: 11,
2649                    resource: self.depth_buffer.as_entire_binding(),
2650                },
2651                wgpu::BindGroupEntry {
2652                    binding: 12,
2653                    resource: scene.occupancy_pages[1].as_entire_binding(),
2654                },
2655                wgpu::BindGroupEntry {
2656                    binding: 13,
2657                    resource: scene.occupancy_pages[2].as_entire_binding(),
2658                },
2659                wgpu::BindGroupEntry {
2660                    binding: 14,
2661                    resource: scene.occupancy_pages[3].as_entire_binding(),
2662                },
2663            ],
2664        });
2665
2666        let mut enc =
2667            device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
2668        {
2669            let mut pass = enc.begin_compute_pass(&wgpu::ComputePassDescriptor {
2670                label: Some("roxlap-gpu headless.pass"),
2671                timestamp_writes: None,
2672            });
2673            pass.set_pipeline(&self.pipeline);
2674            pass.set_bind_group(0, &bg, &[]);
2675            pass.dispatch_workgroups(self.width.div_ceil(8), self.height.div_ceil(8), 1);
2676        }
2677        enc.copy_texture_to_buffer(
2678            wgpu::ImageCopyTexture {
2679                texture: &self.output_tex,
2680                mip_level: 0,
2681                origin: wgpu::Origin3d::ZERO,
2682                aspect: wgpu::TextureAspect::All,
2683            },
2684            wgpu::ImageCopyBuffer {
2685                buffer: &self.readback,
2686                layout: wgpu::ImageDataLayout {
2687                    offset: 0,
2688                    bytes_per_row: Some(self.padded_bytes_per_row),
2689                    rows_per_image: Some(self.height),
2690                },
2691            },
2692            wgpu::Extent3d {
2693                width: self.width,
2694                height: self.height,
2695                depth_or_array_layers: 1,
2696            },
2697        );
2698        queue.submit(Some(enc.finish()));
2699
2700        let slice = self.readback.slice(..);
2701        let (tx, rx) = std::sync::mpsc::channel();
2702        slice.map_async(wgpu::MapMode::Read, move |r| {
2703            let _ = tx.send(r);
2704        });
2705        device.poll(wgpu::Maintain::Wait);
2706        rx.recv().expect("map_async channel").expect("map_async");
2707
2708        let data = slice.get_mapped_range();
2709        let mut out = Vec::with_capacity((self.width * self.height) as usize);
2710        let pitch = self.padded_bytes_per_row as usize;
2711        for y in 0..self.height as usize {
2712            let row = &data[y * pitch..y * pitch + self.width as usize * 4];
2713            for px in row.chunks_exact(4) {
2714                out.push(
2715                    u32::from(px[0])
2716                        | (u32::from(px[1]) << 8)
2717                        | (u32::from(px[2]) << 16)
2718                        | (u32::from(px[3]) << 24),
2719                );
2720            }
2721        }
2722        drop(data);
2723        self.readback.unmap();
2724        out
2725    }
2726}
2727
2728fn bgl_uniform_entry(binding: u32) -> wgpu::BindGroupLayoutEntry {
2729    wgpu::BindGroupLayoutEntry {
2730        binding,
2731        visibility: wgpu::ShaderStages::COMPUTE,
2732        ty: wgpu::BindingType::Buffer {
2733            ty: wgpu::BufferBindingType::Uniform,
2734            has_dynamic_offset: false,
2735            min_binding_size: None,
2736        },
2737        count: None,
2738    }
2739}
2740
2741fn bgl_storage_entry(binding: u32, read_only: bool) -> wgpu::BindGroupLayoutEntry {
2742    wgpu::BindGroupLayoutEntry {
2743        binding,
2744        visibility: wgpu::ShaderStages::COMPUTE,
2745        ty: wgpu::BindingType::Buffer {
2746            ty: wgpu::BufferBindingType::Storage { read_only },
2747            has_dynamic_offset: false,
2748            min_binding_size: None,
2749        },
2750        count: None,
2751    }
2752}
2753
2754/// Create a fresh sky panorama texture sized `width × height` with
2755/// the initial pixel data uploaded via `write_texture`. Used by
2756/// `GpuRenderer::new` (1×1 default) and `set_sky_panorama` (host-
2757/// supplied panorama).
2758fn create_sky_texture(
2759    device: &wgpu::Device,
2760    width: u32,
2761    height: u32,
2762    _initial_pixels: &[u8],
2763) -> (wgpu::Texture, wgpu::TextureView) {
2764    let tex = device.create_texture(&wgpu::TextureDescriptor {
2765        label: Some("roxlap-gpu sky_texture"),
2766        size: wgpu::Extent3d {
2767            width,
2768            height,
2769            depth_or_array_layers: 1,
2770        },
2771        mip_level_count: 1,
2772        sample_count: 1,
2773        dimension: wgpu::TextureDimension::D2,
2774        format: wgpu::TextureFormat::Rgba8Unorm,
2775        usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST,
2776        view_formats: &[],
2777    });
2778    let view = tex.create_view(&wgpu::TextureViewDescriptor::default());
2779    (tex, view)
2780}
2781
2782/// GPU.4 needs to upload a whole grid (~hundreds of MiB) as a few
2783/// storage buffers. wgpu's default `max_storage_buffer_binding_size`
2784/// is 128 MiB, which is just enough for the demo's 32×32 ground
2785/// occupancy (~128 MiB) but not the colour array. We request as
2786/// much as the adapter is willing to give — most desktop GPUs cap
2787/// individual storage buffers at 2-4 GiB; iGPUs often offer the
2788/// full system memory.
2789pub(crate) fn pick_required_limits(adapter_limits: &wgpu::Limits) -> wgpu::Limits {
2790    wgpu::Limits {
2791        max_storage_buffer_binding_size: adapter_limits.max_storage_buffer_binding_size,
2792        max_buffer_size: adapter_limits.max_buffer_size,
2793        // Occupancy paging adds up to MAX_OCC_PAGES-1 extra storage
2794        // bindings; with the scene's other buffers + the GPU.9 depth
2795        // buffer the scene_dda stage needs ~11. The default cap is 8.
2796        // Both NVK and lavapipe advertise ≫16, so request 16.
2797        max_storage_buffers_per_shader_stage: adapter_limits
2798            .max_storage_buffers_per_shader_stage
2799            .min(16),
2800        ..wgpu::Limits::default()
2801    }
2802}
2803
2804fn pick_present_mode(modes: &[wgpu::PresentMode]) -> wgpu::PresentMode {
2805    // Prefer Mailbox > Immediate > Fifo. Fifo is the universal
2806    // fallback and the only one Wayland-on-Mesa always offers.
2807    for &m in &[wgpu::PresentMode::Mailbox, wgpu::PresentMode::Immediate] {
2808        if modes.contains(&m) {
2809            return m;
2810        }
2811    }
2812    wgpu::PresentMode::Fifo
2813}
2814
2815/// World-space view-ray direction (un-normalised) for window pixel
2816/// `(x, y)` under a vertical-FOV pinhole — the projection
2817/// `scene_dda.wgsl`'s `render_scene` uses. Shared by
2818/// [`GpuRenderer::pixel_ray`]; standalone so it's unit-testable without
2819/// a device. `right`/`down`/`forward` are the camera basis.
2820#[must_use]
2821#[allow(clippy::too_many_arguments)]
2822pub fn pinhole_pixel_ray(
2823    right: [f64; 3],
2824    down: [f64; 3],
2825    forward: [f64; 3],
2826    x: f64,
2827    y: f64,
2828    w: f64,
2829    h: f64,
2830    fov_y_rad: f64,
2831) -> [f64; 3] {
2832    let half_h = (fov_y_rad * 0.5).tan();
2833    let half_w = half_h * (w / h);
2834    let ndc_x = (x + 0.5) / w * 2.0 - 1.0;
2835    let ndc_y_top = 1.0 - (y + 0.5) / h * 2.0;
2836    let (kx, ky) = (ndc_x * half_w, ndc_y_top * half_h);
2837    [
2838        forward[0] + kx * right[0] - ky * down[0],
2839        forward[1] + kx * right[1] - ky * down[1],
2840        forward[2] + kx * right[2] - ky * down[2],
2841    ]
2842}
2843
2844#[cfg(test)]
2845mod pixel_ray_tests {
2846    use super::pinhole_pixel_ray;
2847
2848    const RIGHT: [f64; 3] = [1.0, 0.0, 0.0];
2849    const DOWN: [f64; 3] = [0.0, 1.0, 0.0];
2850    const FWD: [f64; 3] = [0.0, 0.0, 1.0]; // voxlap z-down "look down"
2851
2852    // Frame centre (NDC 0,0) points straight along `forward`.
2853    #[test]
2854    fn centre_pixel_is_forward() {
2855        let d = pinhole_pixel_ray(
2856            RIGHT,
2857            DOWN,
2858            FWD,
2859            639.5,
2860            359.5,
2861            1280.0,
2862            720.0,
2863            60_f64.to_radians(),
2864        );
2865        assert!(
2866            d[0].abs() < 1e-9 && d[1].abs() < 1e-9,
2867            "centre ≈ forward, got {d:?}"
2868        );
2869        assert!((d[2] - 1.0).abs() < 1e-9);
2870    }
2871
2872    // Right edge pixel tilts +right by tan(hfov/2); the lateral
2873    // component equals half_w = tan(fov_y/2)*aspect at the very edge.
2874    #[test]
2875    fn right_edge_tilts_by_half_w() {
2876        let fov = 60_f64.to_radians();
2877        let d = pinhole_pixel_ray(RIGHT, DOWN, FWD, 1279.5, 359.5, 1280.0, 720.0, fov);
2878        let half_w = (fov * 0.5).tan() * (1280.0 / 720.0);
2879        assert!((d[0] - half_w).abs() < 1e-6, "x={}, half_w={half_w}", d[0]);
2880        assert!(d[0] > 0.0, "right edge tilts +right");
2881    }
2882
2883    /// Statically validate every WGSL shader with naga (the same
2884    /// front-end + validator wgpu runs at pipeline creation), so shader
2885    /// edits — e.g. the GPU.10 sprite lighting bindings — are caught in
2886    /// CI without needing a GPU device.
2887    #[test]
2888    fn wgsl_shaders_validate() {
2889        let shaders: &[(&str, &str)] = &[
2890            (
2891                "sprite_model_dda.wgsl",
2892                include_str!("../shaders/sprite_model_dda.wgsl"),
2893            ),
2894            ("scene_dda.wgsl", include_str!("../shaders/scene_dda.wgsl")),
2895            ("blit.wgsl", include_str!("../shaders/blit.wgsl")),
2896            ("chunk_dda.wgsl", include_str!("../shaders/chunk_dda.wgsl")),
2897            ("grid_dda.wgsl", include_str!("../shaders/grid_dda.wgsl")),
2898        ];
2899        let mut validator = naga::valid::Validator::new(
2900            naga::valid::ValidationFlags::all(),
2901            naga::valid::Capabilities::all(),
2902        );
2903        for (name, src) in shaders {
2904            let module = naga::front::wgsl::parse_str(src).unwrap_or_else(|e| {
2905                panic!("{name}: WGSL parse failed:\n{}", e.emit_to_string(src))
2906            });
2907            validator
2908                .validate(&module)
2909                .unwrap_or_else(|e| panic!("{name}: WGSL validation failed: {e:?}"));
2910        }
2911    }
2912}