Skip to main content

roxlap_gpu/
lib.rs

1//! WGPU-backed compute-shader renderer scaffold for the roxlap
2//! voxel engine. GPU.1 in `PORTING-GPU.md`.
3//!
4//! GPU.1's job: stand up the device + surface + swapchain on a
5//! host window (any [`raw-window-handle`](raw_window_handle)
6//! provider), present a clear-to-colour frame each render call,
7//! and give the host a one-call opt-in. No voxel marching yet — the
8//! [`examples/probe.rs`](../examples/probe.rs) standalone holds
9//! the empirical FPS baseline from GPU.0.
10//!
11//! Later sub-substages flesh `GpuRenderer::render` out: GPU.2
12//! uploads voxel data, GPU.3 dispatches the inner-DDA compute
13//! shader, GPU.4 layers in chunk skipping, GPU.5 plugs the renderer
14//! into `roxlap-scene::Scene`, …
15//!
16//! ## Host integration shape (GPU.1)
17//!
18//! ```no_run
19//! use std::sync::Arc;
20//! use roxlap_gpu::{GpuRenderer, GpuRendererSettings};
21//! # use winit::window::Window;
22//! # fn pick(w: Arc<Window>, size: (u32, u32)) -> Option<GpuRenderer> {
23//! match GpuRenderer::new_blocking(w, size, GpuRendererSettings::default()) {
24//!     Ok(r) => Some(r),
25//!     Err(e) => {
26//!         eprintln!("GPU init failed: {e}; falling back to CPU");
27//!         None
28//!     }
29//! }
30//! # }
31//! ```
32
33#![allow(clippy::must_use_candidate, clippy::too_many_lines)]
34
35pub mod camera;
36pub mod decompress;
37pub mod grid;
38pub mod headless;
39pub mod resident;
40pub mod scene;
41pub mod sprite_model;
42
43pub use camera::Camera;
44pub use decompress::{decompress_chunk, ChunkUpload, BEDROCK_RGB, CHUNK_Z};
45pub use grid::{bounding_box_of, GpuGridResident, GridUpload};
46pub use headless::HeadlessGpu;
47pub use resident::GpuChunkResident;
48pub use scene::{
49    GpuSceneResident, GridRuntimeTransform, GridStaticMeta, RefreshOutcome, SceneUpload,
50    MAX_SCENE_GRIDS,
51};
52pub use sprite_model::{
53    build_sprite_model, SpriteInstance, SpriteInstanceTransform, SpriteModel, SpriteModelRegistry,
54    SpriteRegistryResident,
55};
56
57use std::sync::Arc;
58
59use bytemuck::{Pod, Zeroable};
60use raw_window_handle::{HasDisplayHandle, HasWindowHandle};
61
62/// Caller-controllable knobs for [`GpuRenderer::new`]. Defaults
63/// target "highest-performance GPU, prefer Mailbox/Immediate over
64/// vsync" — i.e. the same configuration the GPU.0 probe used to
65/// measure the FPS ceiling.
66#[derive(Debug, Clone, Copy)]
67pub struct GpuRendererSettings {
68    pub power_preference: PowerPreference,
69    /// Initial clear colour cycled by GPU.1's empty render path.
70    /// The voxel-rendering substages overwrite this entirely.
71    pub clear_colour: [f64; 3],
72    /// Prefer mailbox/immediate when offered; falls back to FIFO if
73    /// the surface only supports it (Wayland under Mesa often does).
74    pub uncapped_present: bool,
75}
76
77#[derive(Debug, Clone, Copy)]
78pub enum PowerPreference {
79    Low,
80    High,
81}
82
83impl Default for GpuRendererSettings {
84    fn default() -> Self {
85        Self {
86            power_preference: PowerPreference::High,
87            clear_colour: [0.06, 0.08, 0.12],
88            uncapped_present: true,
89        }
90    }
91}
92
93/// Errors `GpuRenderer::new` surfaces to the host. The host's
94/// expected flow is "try this, fall back to the CPU path on Err".
95#[derive(Debug)]
96pub enum GpuInitError {
97    CreateSurface(wgpu::CreateSurfaceError),
98    NoAdapter,
99    RequestDevice(wgpu::RequestDeviceError),
100}
101
102impl std::fmt::Display for GpuInitError {
103    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
104        match self {
105            Self::CreateSurface(e) => write!(f, "create_surface failed: {e}"),
106            Self::NoAdapter => write!(
107                f,
108                "no compatible adapter — does this system have a Vulkan/Metal/DX12 driver?"
109            ),
110            Self::RequestDevice(e) => write!(f, "request_device failed: {e}"),
111        }
112    }
113}
114
115impl std::error::Error for GpuInitError {
116    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
117        match self {
118            Self::CreateSurface(e) => Some(e),
119            Self::RequestDevice(e) => Some(e),
120            Self::NoAdapter => None,
121        }
122    }
123}
124
125impl From<wgpu::CreateSurfaceError> for GpuInitError {
126    fn from(value: wgpu::CreateSurfaceError) -> Self {
127        Self::CreateSurface(value)
128    }
129}
130
131impl From<wgpu::RequestDeviceError> for GpuInitError {
132    fn from(value: wgpu::RequestDeviceError) -> Self {
133        Self::RequestDevice(value)
134    }
135}
136
137/// WGPU-backed renderer. Owns the device, queue, and surface
138/// bound to the host's window. [`Self::render`] is the GPU.1
139/// clear-to-colour path; [`Self::render_chunk`] is GPU.3's
140/// single-chunk DDA marcher.
141///
142/// The window is consumed only at construction — `wgpu`'s
143/// `Surface<'static>` keeps its own `Arc` clone of the handle, so
144/// the renderer holds no window field of its own.
145pub struct GpuRenderer {
146    surface: wgpu::Surface<'static>,
147    surface_config: wgpu::SurfaceConfiguration,
148    device: wgpu::Device,
149    queue: wgpu::Queue,
150    adapter_info: String,
151    clear_colour: [f64; 3],
152    frame_count: u32,
153    /// Lazy-built on first [`Self::render_chunk`] call; rebuilt when
154    /// the swapchain resizes (storage texture must match).
155    chunk_dda: Option<ChunkDdaResources>,
156    /// Lazy-built on first [`Self::render_grid`] call; same resize
157    /// trigger as `chunk_dda`. The two paths share the same blit
158    /// pipeline structure but bind different storage layouts.
159    grid_dda: Option<GridDdaResources>,
160    /// Lazy-built on first [`Self::render_scene`] call. Holds the
161    /// multi-grid pipeline + per-grid camera uniforms.
162    scene_dda: Option<SceneDdaResources>,
163    /// GPU.8 — panoramic sky texture + sampler. Created at
164    /// `new` as a 1×1 mid-grey default; [`Self::set_sky_panorama`]
165    /// replaces it. The scene-DDA bind group references this each
166    /// frame.
167    sky_texture: wgpu::Texture,
168    sky_view: wgpu::TextureView,
169    sky_sampler: wgpu::Sampler,
170    /// GPU.8 fog state. `color` is BGRA-style premultiplied (each
171    /// channel in [0, 1]); `near` is the world-t distance at which
172    /// fog starts kicking in; `far` is the distance at which it's
173    /// fully opaque. The shader does
174    /// `mix(hit, fog, smoothstep(near, far, t))`.
175    fog_color: [f32; 3],
176    fog_near: f32,
177    fog_far: f32,
178    /// GPU.10 — sprites rendered as DDA-marched voxel models (the
179    /// precise path; the GPU.9 compute splatter it replaced was
180    /// retired in 10.5). Holds the concatenated model registry + the
181    /// per-frame instance array; set via [`Self::set_sprite_instances`].
182    sprite_registry: Option<sprite_model::SpriteRegistryResident>,
183    /// Lazy-built pipeline + uniform for the model-DDA pass.
184    sprite_model_dda: Option<SpriteModelDdaResources>,
185    /// GPU.10.4 — LOD aggressiveness: step a sprite to the next mip
186    /// once a mip-0 voxel projects below this many screen pixels.
187    /// Defaults to 4.0 (the empirical sweet spot); the host can tune
188    /// via [`Self::set_sprite_lod_px`].
189    sprite_lod_px: f32,
190    /// GPU.11.1 — scene-grid LOD scan distance (world units). A chunk
191    /// entered at world-t `t` is marched at the mip level
192    /// `floor(log2(max(t, msd) / msd))`, clamped to the grid's mip
193    /// ladder. `0` disables LOD (always mip-0). Tunable via
194    /// [`Self::set_scene_mip_scan_dist`] — the axis-aligned-mip-beams
195    /// mitigation (GPU.11.2) pushes it outward if banding appears.
196    scene_mip_scan_dist: f32,
197    /// Vertical FOV (radians) the last `render_scene` marched with —
198    /// cached so [`Self::pixel_ray`] reconstructs the matching view ray
199    /// for picking. `0` until the first scene render.
200    last_fov_y_rad: f32,
201    /// The acquired-but-not-yet-presented swapchain frame from the most
202    /// recent deferred render ([`Self::render_scene`] /
203    /// [`Self::render_clear_deferred`]). [`Self::present`] shows it as
204    /// is; [`Self::paint_egui`] overlays egui first. Lets a host slot a
205    /// UI pass between the marcher and present. `None` between present
206    /// and the next render.
207    pending_frame: Option<(wgpu::SurfaceTexture, wgpu::TextureView)>,
208    /// Lazy-built `egui-wgpu` paint pipeline; created on the first
209    /// [`Self::paint_egui`] call (`hud` feature).
210    #[cfg(feature = "hud")]
211    egui_renderer: Option<egui_wgpu::Renderer>,
212}
213
214/// Per-renderer chunk-DDA pipeline state. The compute shader writes
215/// into the storage texture; a fullscreen-triangle render pass
216/// nearest-neighbour blits it to the swapchain.
217struct ChunkDdaResources {
218    storage_size: (u32, u32),
219    storage_view: wgpu::TextureView,
220    uniform_buf: wgpu::Buffer,
221    bgl_dda: wgpu::BindGroupLayout,
222    pipeline_dda: wgpu::ComputePipeline,
223    blit_bg: wgpu::BindGroup,
224    pipeline_blit: wgpu::RenderPipeline,
225    // wgpu BindGroups internally Arc their resources, but we keep
226    // the handle so the sampler shows up in profiler dumps.
227    _sampler: wgpu::Sampler,
228}
229
230struct GridDdaResources {
231    storage_size: (u32, u32),
232    storage_view: wgpu::TextureView,
233    uniform_buf: wgpu::Buffer,
234    bgl_dda: wgpu::BindGroupLayout,
235    pipeline_dda: wgpu::ComputePipeline,
236    blit_bg: wgpu::BindGroup,
237    pipeline_blit: wgpu::RenderPipeline,
238    _sampler: wgpu::Sampler,
239}
240
241struct SceneDdaResources {
242    storage_size: (u32, u32),
243    storage_view: wgpu::TextureView,
244    uniform_buf: wgpu::Buffer,
245    bgl_dda: wgpu::BindGroupLayout,
246    pipeline_dda: wgpu::ComputePipeline,
247    blit_bg: wgpu::BindGroup,
248    pipeline_blit: wgpu::RenderPipeline,
249    _sampler: wgpu::Sampler,
250    /// GPU.9 — per-pixel world-t depth (f32 bits as u32), sized
251    /// `width * height * 4`. The scene pass writes it when sprites
252    /// are present; the sprite model-DDA pass reads + composites
253    /// against it.
254    depth_buffer: wgpu::Buffer,
255    /// Picking — a `COPY_DST | MAP_READ` staging copy of `depth_buffer`
256    /// so the host can read back the per-pixel world-t after a frame
257    /// (e.g. click → which voxel). Same size as `depth_buffer`.
258    depth_readback: wgpu::Buffer,
259}
260
261/// GPU.10.0 — single-sprite model-DDA pipeline: one thread per pixel
262/// marches the model voxel volume and composites against the scene
263/// depth buffer.
264struct SpriteModelDdaResources {
265    bgl: wgpu::BindGroupLayout,
266    pipeline: wgpu::ComputePipeline,
267    uniform_buf: wgpu::Buffer,
268}
269
270/// Per-frame uniform for the model-DDA pass. Mirrors `Uniform` in
271/// `sprite_model_dda.wgsl` (std140). Per-model + per-instance data
272/// now live in storage buffers; this holds only the camera, fog, and
273/// instance count.
274#[repr(C)]
275#[derive(Clone, Copy, Pod, Zeroable)]
276struct SpriteModelUniform {
277    cam_pos: [f32; 3],
278    _p0: f32,
279    cam_right: [f32; 3],
280    _p1: f32,
281    cam_down: [f32; 3],
282    _p2: f32,
283    cam_forward: [f32; 3],
284    _p3: f32,
285    fog_color: [f32; 4],
286    screen_size: [u32; 2],
287    instance_count: u32,
288    fog_far: f32,
289    fov_y_rad: f32,
290    tiles_x: u32,
291    tile_size: u32,
292    _p6: f32,
293}
294
295const SCENE_MAX_GRIDS: usize = MAX_SCENE_GRIDS as usize;
296
297/// GPU.10.3 — sprite screen-tile edge in pixels for instance binning.
298const SPRITE_TILE_SIZE: u32 = 16;
299
300// The scene_dda bind group + layout wire occupancy pages 1..=3 at
301// bindings 12..=14 explicitly; keep that in lockstep with the page
302// count. Bump the bindings (here, in the WGSL, and in the bind
303// group) if MAX_OCC_PAGES changes.
304const _: () = assert!(scene::MAX_OCC_PAGES == 4);
305
306#[repr(C)]
307#[derive(Clone, Copy, Pod, Zeroable)]
308struct SceneDdaPerGridCamera {
309    pos: [f32; 3],
310    _pad0: f32,
311    right: [f32; 3],
312    _pad1: f32,
313    down: [f32; 3],
314    _pad2: f32,
315    forward: [f32; 3],
316    _pad3: f32,
317}
318
319#[repr(C)]
320#[derive(Clone, Copy, Pod, Zeroable)]
321struct SceneDdaUniform {
322    fov_y_rad: f32,
323    grid_count: u32,
324    max_outer_steps: u32,
325    _pad0: u32,
326    screen_size: [u32; 2],
327    _pad1: [u32; 2],
328    cameras: [SceneDdaPerGridCamera; SCENE_MAX_GRIDS],
329    /// GPU.8 — `[r, g, b, fog_near]`. The `near` distance is packed
330    /// into the colour's alpha channel to keep std140 alignment
331    /// tidy (a bare `f32` after the `vec4` would force extra pads).
332    fog_color: [f32; 4],
333    fog_far: f32,
334    /// GPU.9 — `1` when the sprite pass is active (scene pass then
335    /// records `best_t` into the depth buffer), `0` otherwise.
336    write_depth: u32,
337    /// Occupancy paging: words per storage page (see
338    /// `scene::split_occupancy_pages`). Only consulted by the shader
339    /// when `occ_num_pages > 1`.
340    occ_page_words: u32,
341    /// Number of real occupancy pages (1 on multi-GiB GPUs → the
342    /// shader takes a branch-free single-page read).
343    occ_num_pages: u32,
344    /// GPU.11.1 — scene-grid LOD scan distance (world units). A chunk
345    /// entered at world-t `t` marches at mip
346    /// `floor(log2(max(t, msd) / msd))`, clamped to the grid's mip
347    /// count. `0` disables LOD (always mip-0).
348    mip_scan_dist: f32,
349    _pad2: u32,
350    _pad3: u32,
351    _pad4: u32,
352}
353
354#[repr(C)]
355#[derive(Clone, Copy, Pod, Zeroable)]
356struct GridDdaUniform {
357    camera_pos: [f32; 3],
358    _pad0: f32,
359    camera_right: [f32; 3],
360    _pad1: f32,
361    camera_down: [f32; 3],
362    _pad2: f32,
363    camera_forward: [f32; 3],
364    fov_y_rad: f32,
365    screen_size: [u32; 2],
366    vsid: u32,
367    max_outer_steps: u32,
368    chunks_dims: [u32; 3],
369    _pad3: u32,
370    origin_chunk: [i32; 3],
371    _pad4: u32,
372}
373
374#[repr(C)]
375#[derive(Clone, Copy, Pod, Zeroable)]
376struct ChunkDdaUniform {
377    camera_pos: [f32; 3],
378    _pad0: f32,
379    camera_right: [f32; 3],
380    _pad1: f32,
381    camera_down: [f32; 3],
382    _pad2: f32,
383    camera_forward: [f32; 3],
384    fov_y_rad: f32,
385    screen_size: [u32; 2],
386    vsid: u32,
387    max_scan_dist: u32,
388}
389
390impl GpuRenderer {
391    /// Stand up the device + surface + swapchain on `window`. Async
392    /// because `wgpu::Adapter`/`Device` requests are.
393    ///
394    /// `window` is any [`raw-window-handle`] provider (winit, SDL,
395    /// GLFW, …) wrapped in an `Arc`; `size` is its initial physical
396    /// framebuffer size in pixels — passed explicitly so the renderer
397    /// stays decoupled from any one windowing library's size API.
398    ///
399    /// [`raw-window-handle`]: raw_window_handle
400    ///
401    /// # Errors
402    /// Returns [`GpuInitError`] if surface creation, adapter
403    /// selection, or device request fails. Hosts treat any error as
404    /// "fall back to the CPU path".
405    pub async fn new<W>(
406        window: Arc<W>,
407        size: (u32, u32),
408        settings: GpuRendererSettings,
409    ) -> Result<Self, GpuInitError>
410    where
411        W: HasWindowHandle + HasDisplayHandle + Send + Sync + 'static,
412    {
413        let instance = wgpu::Instance::new(wgpu::InstanceDescriptor::default());
414        let surface = instance.create_surface(window.clone())?;
415        let power_preference = match settings.power_preference {
416            PowerPreference::Low => wgpu::PowerPreference::LowPower,
417            PowerPreference::High => wgpu::PowerPreference::HighPerformance,
418        };
419        let adapter = instance
420            .request_adapter(&wgpu::RequestAdapterOptions {
421                power_preference,
422                compatible_surface: Some(&surface),
423                force_fallback_adapter: false,
424            })
425            .await
426            .ok_or(GpuInitError::NoAdapter)?;
427
428        let info = adapter.get_info();
429        let adapter_info = format!(
430            "{name} ({backend:?}, {device_type:?})",
431            name = info.name,
432            backend = info.backend,
433            device_type = info.device_type,
434        );
435
436        let (device, queue) = adapter
437            .request_device(
438                &wgpu::DeviceDescriptor {
439                    label: Some("roxlap-gpu device"),
440                    required_features: wgpu::Features::empty(),
441                    required_limits: pick_required_limits(&adapter.limits()),
442                    memory_hints: wgpu::MemoryHints::default(),
443                },
444                None,
445            )
446            .await?;
447
448        let caps = surface.get_capabilities(&adapter);
449        // Pick a NON-sRGB swapchain format. Voxlap colours are
450        // already sRGB-encoded (the slab bytes are display-ready,
451        // matching what the CPU softbuffer path writes straight to
452        // the framebuffer with no conversion). An sRGB swapchain
453        // would re-apply the gamma curve on top, producing a
454        // washed-out / pastel look that diverges from the CPU
455        // renderer. Falls back to `caps.formats[0]` only if every
456        // offered format is sRGB.
457        let surface_format = caps
458            .formats
459            .iter()
460            .copied()
461            .find(|f| !f.is_srgb())
462            .unwrap_or(caps.formats[0]);
463        let present_mode = if settings.uncapped_present {
464            pick_present_mode(&caps.present_modes)
465        } else {
466            wgpu::PresentMode::Fifo
467        };
468        // GPU.11.2 — surface the present mode: `Fifo` is vsync-capped
469        // (FPS pinned to refresh rate → compute optimisations like the
470        // mip LOD won't show up in the FPS counter). Mailbox/Immediate
471        // are uncapped. Wayland under Mesa frequently offers only Fifo.
472        eprintln!(
473            "roxlap-gpu: present mode = {present_mode:?} (available: {:?})",
474            caps.present_modes,
475        );
476        let (init_w, init_h) = size;
477        let surface_config = wgpu::SurfaceConfiguration {
478            usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
479            format: surface_format,
480            width: init_w.max(1),
481            height: init_h.max(1),
482            present_mode,
483            alpha_mode: caps.alpha_modes[0],
484            view_formats: vec![],
485            desired_maximum_frame_latency: 2,
486        };
487        surface.configure(&device, &surface_config);
488
489        // GPU.8 default sky: a 1×1 mid-grey texture. Hosts replace
490        // it via `set_sky_panorama` with a real equirectangular
491        // panorama; the default stops the shader sampling
492        // uninitialised memory before that happens.
493        let default_sky_pixel = [0x80u8, 0x80, 0x80, 0xff];
494        let (sky_texture, sky_view) = create_sky_texture(&device, 1, 1, &default_sky_pixel);
495        queue.write_texture(
496            wgpu::ImageCopyTexture {
497                texture: &sky_texture,
498                mip_level: 0,
499                origin: wgpu::Origin3d::ZERO,
500                aspect: wgpu::TextureAspect::All,
501            },
502            &default_sky_pixel,
503            wgpu::ImageDataLayout {
504                offset: 0,
505                bytes_per_row: Some(4),
506                rows_per_image: Some(1),
507            },
508            wgpu::Extent3d {
509                width: 1,
510                height: 1,
511                depth_or_array_layers: 1,
512            },
513        );
514        let sky_sampler = device.create_sampler(&wgpu::SamplerDescriptor {
515            label: Some("roxlap-gpu sky_sampler"),
516            // Voxlap-convention panorama: u = elevation [0, 1]
517            // (Repeat is a no-op since values don't go outside),
518            // v = azimuth (wraps 360° — Repeat is required).
519            address_mode_u: wgpu::AddressMode::Repeat,
520            address_mode_v: wgpu::AddressMode::Repeat,
521            address_mode_w: wgpu::AddressMode::ClampToEdge,
522            mag_filter: wgpu::FilterMode::Linear,
523            min_filter: wgpu::FilterMode::Linear,
524            mipmap_filter: wgpu::FilterMode::Nearest,
525            ..Default::default()
526        });
527
528        Ok(Self {
529            surface,
530            surface_config,
531            device,
532            queue,
533            adapter_info,
534            clear_colour: settings.clear_colour,
535            frame_count: 0,
536            chunk_dda: None,
537            grid_dda: None,
538            scene_dda: None,
539            sky_texture,
540            sky_view,
541            sky_sampler,
542            // Fog disabled by default — voxlap's CPU rasterizer
543            // also runs without fog in the scene-demo, so matching
544            // it means no GPU fog out of the box. Hosts can opt in
545            // via `set_fog` (e.g. for atmospheric far-LOD masking).
546            fog_color: [0.66, 0.74, 0.88],
547            fog_near: 0.0,
548            fog_far: 1.0e30,
549            sprite_registry: None,
550            sprite_model_dda: None,
551            // GPU.10.4 — default LOD threshold: step to a coarser mip
552            // once a voxel projects below 4 px. Empirically the best
553            // quality/cost tradeoff; the host can override.
554            sprite_lod_px: 4.0,
555            // GPU.11.1 — matches the CPU demo's mip_scan_dist=64.
556            scene_mip_scan_dist: 64.0,
557            last_fov_y_rad: 0.0,
558            pending_frame: None,
559            #[cfg(feature = "hud")]
560            egui_renderer: None,
561        })
562    }
563
564    /// Synchronous wrapper for hosts that don't have an async
565    /// runtime. Internally `pollster::block_on`s [`Self::new`].
566    ///
567    /// # Errors
568    /// See [`Self::new`].
569    pub fn new_blocking<W>(
570        window: Arc<W>,
571        size: (u32, u32),
572        settings: GpuRendererSettings,
573    ) -> Result<Self, GpuInitError>
574    where
575        W: HasWindowHandle + HasDisplayHandle + Send + Sync + 'static,
576    {
577        pollster::block_on(Self::new(window, size, settings))
578    }
579
580    /// Human-readable adapter description — name + backend +
581    /// device type. The demo host prints this in the title bar.
582    pub fn adapter_info(&self) -> &str {
583        &self.adapter_info
584    }
585
586    /// Borrow the underlying wgpu device — hosts use this to build
587    /// chunk uploads (`GpuChunkResident::upload(gpu.device(), …)`).
588    pub fn device(&self) -> &wgpu::Device {
589        &self.device
590    }
591
592    /// Borrow the wgpu queue — hosts use this for read-back paths
593    /// (`GpuChunkResident::read_voxel_blocking(gpu.device(), gpu.queue(), …)`).
594    pub fn queue(&self) -> &wgpu::Queue {
595        &self.queue
596    }
597
598    /// GPU.8 — upload an equirectangular panorama as the scene's
599    /// sky texture. `rgba` is row-major, `width × height` pixels,
600    /// 4 bytes per pixel (R, G, B, A). The shader samples it with
601    /// `u = atan2(dir.x, dir.y) / (2π) + 0.5` (azimuth) and
602    /// `v = acos(-dir.z) / π` (elevation), matching standard
603    /// equirectangular layout (top of image = zenith for voxlap's
604    /// `+z = down` basis).
605    ///
606    /// # Panics
607    /// If `rgba.len() != (width * height * 4) as usize`.
608    pub fn set_sky_panorama(&mut self, rgba: &[u8], width: u32, height: u32) {
609        assert_eq!(
610            rgba.len(),
611            (width as usize) * (height as usize) * 4,
612            "set_sky_panorama: expected w*h*4 bytes, got {}",
613            rgba.len(),
614        );
615        let (tex, view) = create_sky_texture(&self.device, width, height, rgba);
616        // Upload pixel data via `queue.write_texture` so we don't
617        // have to map the buffer manually.
618        self.queue.write_texture(
619            wgpu::ImageCopyTexture {
620                texture: &tex,
621                mip_level: 0,
622                origin: wgpu::Origin3d::ZERO,
623                aspect: wgpu::TextureAspect::All,
624            },
625            rgba,
626            wgpu::ImageDataLayout {
627                offset: 0,
628                bytes_per_row: Some(width * 4),
629                rows_per_image: Some(height),
630            },
631            wgpu::Extent3d {
632                width,
633                height,
634                depth_or_array_layers: 1,
635            },
636        );
637        self.sky_texture = tex;
638        self.sky_view = view;
639    }
640
641    /// GPU.8 — set the fog blend. `color` is per-channel [0, 1];
642    /// `near`/`far` are world-space ray distances in voxel units.
643    /// Hits with `t < near` show their full colour; hits with
644    /// `t > far` show `color` exclusively; in between is a
645    /// smoothstep blend.
646    pub fn set_fog(&mut self, color: [f32; 3], near: f32, far: f32) {
647        self.fog_color = color;
648        self.fog_near = near;
649        self.fog_far = far.max(near + 1.0);
650    }
651
652    /// Re-configure the swapchain to a new physical size. Call from
653    /// `WindowEvent::Resized`. Drops the chunk-DDA storage texture
654    /// so [`Self::render_chunk`] rebuilds it at the new size.
655    pub fn resize(&mut self, width: u32, height: u32) {
656        if width == 0 || height == 0 {
657            return;
658        }
659        self.surface_config.width = width;
660        self.surface_config.height = height;
661        self.surface.configure(&self.device, &self.surface_config);
662        self.chunk_dda = None;
663        self.grid_dda = None;
664        self.scene_dda = None;
665    }
666
667    /// GPU.1 render: single render pass clearing the swapchain to a
668    /// slowly drifting colour, then presenting. Voxels arrive in
669    /// GPU.3+.
670    pub fn render(&mut self) {
671        let surf_tex = match self.surface.get_current_texture() {
672            Ok(t) => t,
673            Err(wgpu::SurfaceError::Outdated | wgpu::SurfaceError::Lost) => {
674                self.surface.configure(&self.device, &self.surface_config);
675                return;
676            }
677            Err(e) => {
678                eprintln!("roxlap-gpu surface error: {e:?}");
679                return;
680            }
681        };
682        let view = surf_tex
683            .texture
684            .create_view(&wgpu::TextureViewDescriptor::default());
685
686        // Slow colour drift so the user can tell the GPU path is
687        // actually presenting frames vs. e.g. a frozen window.
688        // Wrap at 2π/0.005 frames (~1257) so the cast stays exact.
689        let phase = f64::from(self.frame_count % 1257) * 0.005;
690        let [r, g, b] = self.clear_colour;
691        let drift = (phase.sin() * 0.04 + 0.04).clamp(0.0, 0.1);
692        let clear = wgpu::Color {
693            r: (r + drift).clamp(0.0, 1.0),
694            g: (g + drift * 0.5).clamp(0.0, 1.0),
695            b: (b + drift * 0.25).clamp(0.0, 1.0),
696            a: 1.0,
697        };
698
699        let mut encoder = self
700            .device
701            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
702                label: Some("roxlap-gpu encoder"),
703            });
704        {
705            let _rp = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
706                label: Some("roxlap-gpu clear"),
707                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
708                    view: &view,
709                    resolve_target: None,
710                    ops: wgpu::Operations {
711                        load: wgpu::LoadOp::Clear(clear),
712                        store: wgpu::StoreOp::Store,
713                    },
714                })],
715                depth_stencil_attachment: None,
716                timestamp_writes: None,
717                occlusion_query_set: None,
718            });
719        }
720        self.queue.submit(std::iter::once(encoder.finish()));
721        surf_tex.present();
722        self.frame_count = self.frame_count.wrapping_add(1);
723    }
724
725    /// GPU.3 single-chunk render. Dispatches `chunk_dda.wgsl`
726    /// against `resident`'s storage buffers, then blits the
727    /// low-res storage texture to the swapchain. `camera.position`
728    /// is in **chunk-local** voxel units (host translates from
729    /// world coords). `max_scan_dist` caps the per-pixel DDA loop —
730    /// scene-demo wires `+` / `-` through this each frame.
731    ///
732    /// # Panics
733    /// Internally `expect`s the chunk-DDA resources to be built —
734    /// they are constructed at the top of this function if missing.
735    /// Cannot fire in normal control flow.
736    pub fn render_chunk(
737        &mut self,
738        resident: &GpuChunkResident,
739        camera: &Camera,
740        max_scan_dist: u32,
741    ) {
742        let surf_tex = match self.surface.get_current_texture() {
743            Ok(t) => t,
744            Err(wgpu::SurfaceError::Outdated | wgpu::SurfaceError::Lost) => {
745                self.surface.configure(&self.device, &self.surface_config);
746                return;
747            }
748            Err(e) => {
749                eprintln!("roxlap-gpu surface error: {e:?}");
750                return;
751            }
752        };
753        let surf_view = surf_tex
754            .texture
755            .create_view(&wgpu::TextureViewDescriptor::default());
756
757        let surface_w = self.surface_config.width;
758        let surface_h = self.surface_config.height;
759        let surface_format = self.surface_config.format;
760
761        // Lazy-build chunk-DDA resources; rebuild when the swapchain
762        // grew or shrank.
763        let needs_build = match &self.chunk_dda {
764            Some(r) => r.storage_size != (surface_w, surface_h),
765            None => true,
766        };
767        if needs_build {
768            self.chunk_dda = Some(self.build_chunk_dda(surface_w, surface_h, surface_format));
769        }
770        let dda = self.chunk_dda.as_ref().expect("just built");
771
772        // Update uniforms.
773        let uniform = ChunkDdaUniform {
774            camera_pos: camera.position,
775            _pad0: 0.0,
776            camera_right: camera.right,
777            _pad1: 0.0,
778            camera_down: camera.down,
779            _pad2: 0.0,
780            camera_forward: camera.forward,
781            fov_y_rad: camera.fov_y_rad,
782            screen_size: [surface_w, surface_h],
783            vsid: resident.vsid,
784            max_scan_dist,
785        };
786        self.queue
787            .write_buffer(&dda.uniform_buf, 0, bytemuck::bytes_of(&uniform));
788
789        // Per-frame DDA bind group — references the chunk's buffers
790        // so we rebuild every frame (the resident can change between
791        // calls).
792        let dda_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
793            label: Some("roxlap-gpu chunk_dda.bg"),
794            layout: &dda.bgl_dda,
795            entries: &[
796                wgpu::BindGroupEntry {
797                    binding: 0,
798                    resource: dda.uniform_buf.as_entire_binding(),
799                },
800                wgpu::BindGroupEntry {
801                    binding: 1,
802                    resource: resident.occupancy.as_entire_binding(),
803                },
804                wgpu::BindGroupEntry {
805                    binding: 2,
806                    resource: resident.color_offsets.as_entire_binding(),
807                },
808                wgpu::BindGroupEntry {
809                    binding: 3,
810                    resource: resident.colors.as_entire_binding(),
811                },
812                wgpu::BindGroupEntry {
813                    binding: 4,
814                    resource: wgpu::BindingResource::TextureView(&dda.storage_view),
815                },
816            ],
817        });
818
819        let mut encoder = self
820            .device
821            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
822                label: Some("roxlap-gpu chunk encoder"),
823            });
824        {
825            let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
826                label: Some("roxlap-gpu chunk_dda compute"),
827                timestamp_writes: None,
828            });
829            cpass.set_pipeline(&dda.pipeline_dda);
830            cpass.set_bind_group(0, &dda_bg, &[]);
831            cpass.dispatch_workgroups(surface_w.div_ceil(8), surface_h.div_ceil(8), 1);
832        }
833        {
834            let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
835                label: Some("roxlap-gpu chunk_dda blit"),
836                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
837                    view: &surf_view,
838                    resolve_target: None,
839                    ops: wgpu::Operations {
840                        load: wgpu::LoadOp::Clear(wgpu::Color::BLACK),
841                        store: wgpu::StoreOp::Store,
842                    },
843                })],
844                depth_stencil_attachment: None,
845                timestamp_writes: None,
846                occlusion_query_set: None,
847            });
848            rpass.set_pipeline(&dda.pipeline_blit);
849            rpass.set_bind_group(0, &dda.blit_bg, &[]);
850            rpass.draw(0..3, 0..1);
851        }
852        self.queue.submit(std::iter::once(encoder.finish()));
853        surf_tex.present();
854        self.frame_count = self.frame_count.wrapping_add(1);
855    }
856
857    fn build_chunk_dda(
858        &self,
859        width: u32,
860        height: u32,
861        surface_format: wgpu::TextureFormat,
862    ) -> ChunkDdaResources {
863        let storage_tex = self.device.create_texture(&wgpu::TextureDescriptor {
864            label: Some("roxlap-gpu chunk_dda.storage"),
865            size: wgpu::Extent3d {
866                width,
867                height,
868                depth_or_array_layers: 1,
869            },
870            mip_level_count: 1,
871            sample_count: 1,
872            dimension: wgpu::TextureDimension::D2,
873            format: wgpu::TextureFormat::Rgba8Unorm,
874            usage: wgpu::TextureUsages::STORAGE_BINDING | wgpu::TextureUsages::TEXTURE_BINDING,
875            view_formats: &[],
876        });
877        let storage_view = storage_tex.create_view(&wgpu::TextureViewDescriptor::default());
878
879        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
880            label: Some("roxlap-gpu chunk_dda.uniform"),
881            size: std::mem::size_of::<ChunkDdaUniform>() as u64,
882            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
883            mapped_at_creation: false,
884        });
885
886        let dda_shader = self
887            .device
888            .create_shader_module(wgpu::ShaderModuleDescriptor {
889                label: Some("chunk_dda.wgsl"),
890                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/chunk_dda.wgsl").into()),
891            });
892        let bgl_dda = self
893            .device
894            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
895                label: Some("roxlap-gpu chunk_dda.bgl"),
896                entries: &[
897                    bgl_uniform_entry(0),
898                    bgl_storage_entry(1, true),
899                    bgl_storage_entry(2, true),
900                    bgl_storage_entry(3, true),
901                    wgpu::BindGroupLayoutEntry {
902                        binding: 4,
903                        visibility: wgpu::ShaderStages::COMPUTE,
904                        ty: wgpu::BindingType::StorageTexture {
905                            access: wgpu::StorageTextureAccess::WriteOnly,
906                            format: wgpu::TextureFormat::Rgba8Unorm,
907                            view_dimension: wgpu::TextureViewDimension::D2,
908                        },
909                        count: None,
910                    },
911                ],
912            });
913        let dda_pl = self
914            .device
915            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
916                label: Some("roxlap-gpu chunk_dda.layout"),
917                bind_group_layouts: &[&bgl_dda],
918                push_constant_ranges: &[],
919            });
920        let pipeline_dda = self
921            .device
922            .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
923                label: Some("roxlap-gpu chunk_dda.pipeline"),
924                layout: Some(&dda_pl),
925                module: &dda_shader,
926                entry_point: "render_chunk",
927                compilation_options: wgpu::PipelineCompilationOptions::default(),
928                cache: None,
929            });
930
931        // Fullscreen-triangle blit upscales the storage texture into
932        // the swapchain. Nearest filter keeps the retro pixel look.
933        let blit_shader = self
934            .device
935            .create_shader_module(wgpu::ShaderModuleDescriptor {
936                label: Some("blit.wgsl"),
937                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/blit.wgsl").into()),
938            });
939        let bgl_blit = self
940            .device
941            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
942                label: Some("roxlap-gpu chunk_dda.blit_bgl"),
943                entries: &[
944                    wgpu::BindGroupLayoutEntry {
945                        binding: 0,
946                        visibility: wgpu::ShaderStages::FRAGMENT,
947                        ty: wgpu::BindingType::Texture {
948                            sample_type: wgpu::TextureSampleType::Float { filterable: false },
949                            view_dimension: wgpu::TextureViewDimension::D2,
950                            multisampled: false,
951                        },
952                        count: None,
953                    },
954                    wgpu::BindGroupLayoutEntry {
955                        binding: 1,
956                        visibility: wgpu::ShaderStages::FRAGMENT,
957                        ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::NonFiltering),
958                        count: None,
959                    },
960                ],
961            });
962        let blit_pl = self
963            .device
964            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
965                label: Some("roxlap-gpu chunk_dda.blit_layout"),
966                bind_group_layouts: &[&bgl_blit],
967                push_constant_ranges: &[],
968            });
969        let pipeline_blit = self
970            .device
971            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
972                label: Some("roxlap-gpu chunk_dda.blit_pipeline"),
973                layout: Some(&blit_pl),
974                vertex: wgpu::VertexState {
975                    module: &blit_shader,
976                    entry_point: "vs_main",
977                    compilation_options: wgpu::PipelineCompilationOptions::default(),
978                    buffers: &[],
979                },
980                fragment: Some(wgpu::FragmentState {
981                    module: &blit_shader,
982                    entry_point: "fs_main",
983                    compilation_options: wgpu::PipelineCompilationOptions::default(),
984                    targets: &[Some(wgpu::ColorTargetState {
985                        format: surface_format,
986                        blend: None,
987                        write_mask: wgpu::ColorWrites::ALL,
988                    })],
989                }),
990                primitive: wgpu::PrimitiveState::default(),
991                depth_stencil: None,
992                multisample: wgpu::MultisampleState::default(),
993                multiview: None,
994                cache: None,
995            });
996        let sampler = self.device.create_sampler(&wgpu::SamplerDescriptor {
997            label: Some("roxlap-gpu chunk_dda.blit_sampler"),
998            address_mode_u: wgpu::AddressMode::ClampToEdge,
999            address_mode_v: wgpu::AddressMode::ClampToEdge,
1000            address_mode_w: wgpu::AddressMode::ClampToEdge,
1001            mag_filter: wgpu::FilterMode::Nearest,
1002            min_filter: wgpu::FilterMode::Nearest,
1003            mipmap_filter: wgpu::FilterMode::Nearest,
1004            ..Default::default()
1005        });
1006        let blit_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1007            label: Some("roxlap-gpu chunk_dda.blit_bg"),
1008            layout: &bgl_blit,
1009            entries: &[
1010                wgpu::BindGroupEntry {
1011                    binding: 0,
1012                    resource: wgpu::BindingResource::TextureView(&storage_view),
1013                },
1014                wgpu::BindGroupEntry {
1015                    binding: 1,
1016                    resource: wgpu::BindingResource::Sampler(&sampler),
1017                },
1018            ],
1019        });
1020
1021        ChunkDdaResources {
1022            storage_size: (width, height),
1023            storage_view,
1024            uniform_buf,
1025            bgl_dda,
1026            pipeline_dda,
1027            blit_bg,
1028            pipeline_blit,
1029            _sampler: sampler,
1030        }
1031    }
1032
1033    /// GPU.4 render — outer DDA over chunk indices + inner DDA into
1034    /// non-empty chunks. `camera.position` is in **grid-local**
1035    /// voxel units. `max_outer_steps` caps how many chunks the
1036    /// outer DDA may traverse per ray (scene-demo wires `+ / -`
1037    /// through this).
1038    ///
1039    /// # Panics
1040    /// Internally `expect`s the grid-DDA resources to be built;
1041    /// they are constructed at the top of this function if missing.
1042    pub fn render_grid(&mut self, grid: &GpuGridResident, camera: &Camera, max_outer_steps: u32) {
1043        let surf_tex = match self.surface.get_current_texture() {
1044            Ok(t) => t,
1045            Err(wgpu::SurfaceError::Outdated | wgpu::SurfaceError::Lost) => {
1046                self.surface.configure(&self.device, &self.surface_config);
1047                return;
1048            }
1049            Err(e) => {
1050                eprintln!("roxlap-gpu surface error: {e:?}");
1051                return;
1052            }
1053        };
1054        let surf_view = surf_tex
1055            .texture
1056            .create_view(&wgpu::TextureViewDescriptor::default());
1057
1058        let surface_w = self.surface_config.width;
1059        let surface_h = self.surface_config.height;
1060        let surface_format = self.surface_config.format;
1061
1062        let needs_build = match &self.grid_dda {
1063            Some(r) => r.storage_size != (surface_w, surface_h),
1064            None => true,
1065        };
1066        if needs_build {
1067            self.grid_dda = Some(self.build_grid_dda(surface_w, surface_h, surface_format));
1068        }
1069        let dda = self.grid_dda.as_ref().expect("just built");
1070
1071        let uniform = GridDdaUniform {
1072            camera_pos: camera.position,
1073            _pad0: 0.0,
1074            camera_right: camera.right,
1075            _pad1: 0.0,
1076            camera_down: camera.down,
1077            _pad2: 0.0,
1078            camera_forward: camera.forward,
1079            fov_y_rad: camera.fov_y_rad,
1080            screen_size: [surface_w, surface_h],
1081            vsid: grid.vsid,
1082            max_outer_steps,
1083            chunks_dims: grid.chunks_dims,
1084            _pad3: 0,
1085            origin_chunk: grid.origin_chunk,
1086            _pad4: 0,
1087        };
1088        self.queue
1089            .write_buffer(&dda.uniform_buf, 0, bytemuck::bytes_of(&uniform));
1090
1091        let dda_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1092            label: Some("roxlap-gpu grid_dda.bg"),
1093            layout: &dda.bgl_dda,
1094            entries: &[
1095                wgpu::BindGroupEntry {
1096                    binding: 0,
1097                    resource: dda.uniform_buf.as_entire_binding(),
1098                },
1099                wgpu::BindGroupEntry {
1100                    binding: 1,
1101                    resource: grid.occupancy.as_entire_binding(),
1102                },
1103                wgpu::BindGroupEntry {
1104                    binding: 2,
1105                    resource: grid.color_offsets.as_entire_binding(),
1106                },
1107                wgpu::BindGroupEntry {
1108                    binding: 3,
1109                    resource: grid.colors.as_entire_binding(),
1110                },
1111                wgpu::BindGroupEntry {
1112                    binding: 4,
1113                    resource: grid.chunk_colors_base.as_entire_binding(),
1114                },
1115                wgpu::BindGroupEntry {
1116                    binding: 5,
1117                    resource: grid.chunk_occupancy.as_entire_binding(),
1118                },
1119                wgpu::BindGroupEntry {
1120                    binding: 6,
1121                    resource: wgpu::BindingResource::TextureView(&dda.storage_view),
1122                },
1123            ],
1124        });
1125
1126        let mut encoder = self
1127            .device
1128            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
1129                label: Some("roxlap-gpu grid encoder"),
1130            });
1131        {
1132            let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
1133                label: Some("roxlap-gpu grid_dda compute"),
1134                timestamp_writes: None,
1135            });
1136            cpass.set_pipeline(&dda.pipeline_dda);
1137            cpass.set_bind_group(0, &dda_bg, &[]);
1138            cpass.dispatch_workgroups(surface_w.div_ceil(8), surface_h.div_ceil(8), 1);
1139        }
1140        {
1141            let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
1142                label: Some("roxlap-gpu grid_dda blit"),
1143                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
1144                    view: &surf_view,
1145                    resolve_target: None,
1146                    ops: wgpu::Operations {
1147                        load: wgpu::LoadOp::Clear(wgpu::Color::BLACK),
1148                        store: wgpu::StoreOp::Store,
1149                    },
1150                })],
1151                depth_stencil_attachment: None,
1152                timestamp_writes: None,
1153                occlusion_query_set: None,
1154            });
1155            rpass.set_pipeline(&dda.pipeline_blit);
1156            rpass.set_bind_group(0, &dda.blit_bg, &[]);
1157            rpass.draw(0..3, 0..1);
1158        }
1159        self.queue.submit(std::iter::once(encoder.finish()));
1160        surf_tex.present();
1161        self.frame_count = self.frame_count.wrapping_add(1);
1162    }
1163
1164    fn build_grid_dda(
1165        &self,
1166        width: u32,
1167        height: u32,
1168        surface_format: wgpu::TextureFormat,
1169    ) -> GridDdaResources {
1170        let storage_tex = self.device.create_texture(&wgpu::TextureDescriptor {
1171            label: Some("roxlap-gpu grid_dda.storage"),
1172            size: wgpu::Extent3d {
1173                width,
1174                height,
1175                depth_or_array_layers: 1,
1176            },
1177            mip_level_count: 1,
1178            sample_count: 1,
1179            dimension: wgpu::TextureDimension::D2,
1180            format: wgpu::TextureFormat::Rgba8Unorm,
1181            usage: wgpu::TextureUsages::STORAGE_BINDING | wgpu::TextureUsages::TEXTURE_BINDING,
1182            view_formats: &[],
1183        });
1184        let storage_view = storage_tex.create_view(&wgpu::TextureViewDescriptor::default());
1185
1186        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
1187            label: Some("roxlap-gpu grid_dda.uniform"),
1188            size: std::mem::size_of::<GridDdaUniform>() as u64,
1189            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
1190            mapped_at_creation: false,
1191        });
1192
1193        let dda_shader = self
1194            .device
1195            .create_shader_module(wgpu::ShaderModuleDescriptor {
1196                label: Some("grid_dda.wgsl"),
1197                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/grid_dda.wgsl").into()),
1198            });
1199        let bgl_dda = self
1200            .device
1201            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
1202                label: Some("roxlap-gpu grid_dda.bgl"),
1203                entries: &[
1204                    bgl_uniform_entry(0),
1205                    bgl_storage_entry(1, true),
1206                    bgl_storage_entry(2, true),
1207                    bgl_storage_entry(3, true),
1208                    bgl_storage_entry(4, true),
1209                    bgl_storage_entry(5, true),
1210                    wgpu::BindGroupLayoutEntry {
1211                        binding: 6,
1212                        visibility: wgpu::ShaderStages::COMPUTE,
1213                        ty: wgpu::BindingType::StorageTexture {
1214                            access: wgpu::StorageTextureAccess::WriteOnly,
1215                            format: wgpu::TextureFormat::Rgba8Unorm,
1216                            view_dimension: wgpu::TextureViewDimension::D2,
1217                        },
1218                        count: None,
1219                    },
1220                ],
1221            });
1222        let dda_pl = self
1223            .device
1224            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
1225                label: Some("roxlap-gpu grid_dda.layout"),
1226                bind_group_layouts: &[&bgl_dda],
1227                push_constant_ranges: &[],
1228            });
1229        let pipeline_dda = self
1230            .device
1231            .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
1232                label: Some("roxlap-gpu grid_dda.pipeline"),
1233                layout: Some(&dda_pl),
1234                module: &dda_shader,
1235                entry_point: "render_grid",
1236                compilation_options: wgpu::PipelineCompilationOptions::default(),
1237                cache: None,
1238            });
1239
1240        let blit_shader = self
1241            .device
1242            .create_shader_module(wgpu::ShaderModuleDescriptor {
1243                label: Some("blit.wgsl"),
1244                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/blit.wgsl").into()),
1245            });
1246        let bgl_blit = self
1247            .device
1248            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
1249                label: Some("roxlap-gpu grid_dda.blit_bgl"),
1250                entries: &[
1251                    wgpu::BindGroupLayoutEntry {
1252                        binding: 0,
1253                        visibility: wgpu::ShaderStages::FRAGMENT,
1254                        ty: wgpu::BindingType::Texture {
1255                            sample_type: wgpu::TextureSampleType::Float { filterable: false },
1256                            view_dimension: wgpu::TextureViewDimension::D2,
1257                            multisampled: false,
1258                        },
1259                        count: None,
1260                    },
1261                    wgpu::BindGroupLayoutEntry {
1262                        binding: 1,
1263                        visibility: wgpu::ShaderStages::FRAGMENT,
1264                        ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::NonFiltering),
1265                        count: None,
1266                    },
1267                ],
1268            });
1269        let blit_pl = self
1270            .device
1271            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
1272                label: Some("roxlap-gpu grid_dda.blit_layout"),
1273                bind_group_layouts: &[&bgl_blit],
1274                push_constant_ranges: &[],
1275            });
1276        let pipeline_blit = self
1277            .device
1278            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
1279                label: Some("roxlap-gpu grid_dda.blit_pipeline"),
1280                layout: Some(&blit_pl),
1281                vertex: wgpu::VertexState {
1282                    module: &blit_shader,
1283                    entry_point: "vs_main",
1284                    compilation_options: wgpu::PipelineCompilationOptions::default(),
1285                    buffers: &[],
1286                },
1287                fragment: Some(wgpu::FragmentState {
1288                    module: &blit_shader,
1289                    entry_point: "fs_main",
1290                    compilation_options: wgpu::PipelineCompilationOptions::default(),
1291                    targets: &[Some(wgpu::ColorTargetState {
1292                        format: surface_format,
1293                        blend: None,
1294                        write_mask: wgpu::ColorWrites::ALL,
1295                    })],
1296                }),
1297                primitive: wgpu::PrimitiveState::default(),
1298                depth_stencil: None,
1299                multisample: wgpu::MultisampleState::default(),
1300                multiview: None,
1301                cache: None,
1302            });
1303        let sampler = self.device.create_sampler(&wgpu::SamplerDescriptor {
1304            label: Some("roxlap-gpu grid_dda.blit_sampler"),
1305            address_mode_u: wgpu::AddressMode::ClampToEdge,
1306            address_mode_v: wgpu::AddressMode::ClampToEdge,
1307            address_mode_w: wgpu::AddressMode::ClampToEdge,
1308            mag_filter: wgpu::FilterMode::Nearest,
1309            min_filter: wgpu::FilterMode::Nearest,
1310            mipmap_filter: wgpu::FilterMode::Nearest,
1311            ..Default::default()
1312        });
1313        let blit_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1314            label: Some("roxlap-gpu grid_dda.blit_bg"),
1315            layout: &bgl_blit,
1316            entries: &[
1317                wgpu::BindGroupEntry {
1318                    binding: 0,
1319                    resource: wgpu::BindingResource::TextureView(&storage_view),
1320                },
1321                wgpu::BindGroupEntry {
1322                    binding: 1,
1323                    resource: wgpu::BindingResource::Sampler(&sampler),
1324                },
1325            ],
1326        });
1327
1328        GridDdaResources {
1329            storage_size: (width, height),
1330            storage_view,
1331            uniform_buf,
1332            bgl_dda,
1333            pipeline_dda,
1334            blit_bg,
1335            pipeline_blit,
1336            _sampler: sampler,
1337        }
1338    }
1339
1340    /// GPU.5 render — multi-grid scene marcher. `cameras[i]` is the
1341    /// world camera transformed into grid `i`'s local frame
1342    /// (caller-supplied; see scene-demo's `redraw_gpu` for the
1343    /// glam-based transform). `fov_y_rad` is the shared vertical
1344    /// FOV; `max_outer_steps` caps per-ray chunk-DDA work for each
1345    /// grid.
1346    ///
1347    /// # Panics
1348    /// If `cameras.len() != scene.grid_count` or
1349    /// `scene.grid_count > MAX_SCENE_GRIDS`.
1350    /// `cameras[i]` is grid `i`'s world camera transformed into that
1351    /// grid's local frame (the grid marcher works in grid-local space).
1352    /// `sprite_camera` is the **world** camera: instanced sprites carry
1353    /// world-space positions/transforms, so they must project through
1354    /// the untransformed world camera — not `cameras[0]`, which is only
1355    /// the world camera when grid 0 is at identity.
1356    pub fn render_scene(
1357        &mut self,
1358        scene: &GpuSceneResident,
1359        cameras: &[Camera],
1360        sprite_camera: &Camera,
1361        fov_y_rad: f32,
1362        max_outer_steps: u32,
1363    ) {
1364        assert_eq!(
1365            cameras.len(),
1366            scene.grid_count as usize,
1367            "render_scene: {} cameras supplied, scene has {} grids",
1368            cameras.len(),
1369            scene.grid_count,
1370        );
1371        assert!(
1372            scene.grid_count as usize <= SCENE_MAX_GRIDS,
1373            "render_scene: scene has {} grids, shader supports {}",
1374            scene.grid_count,
1375            SCENE_MAX_GRIDS,
1376        );
1377        self.last_fov_y_rad = fov_y_rad; // cached for pixel_ray (picking)
1378
1379        // Deferred present: drop any frame a prior render left
1380        // un-presented (a host that skipped present/paint_egui) so we
1381        // never hold two outstanding swapchain textures.
1382        self.pending_frame = None;
1383        let surf_tex = match self.surface.get_current_texture() {
1384            Ok(t) => t,
1385            Err(wgpu::SurfaceError::Outdated | wgpu::SurfaceError::Lost) => {
1386                self.surface.configure(&self.device, &self.surface_config);
1387                return;
1388            }
1389            Err(e) => {
1390                eprintln!("roxlap-gpu surface error: {e:?}");
1391                return;
1392            }
1393        };
1394        let surf_view = surf_tex
1395            .texture
1396            .create_view(&wgpu::TextureViewDescriptor::default());
1397
1398        let surface_w = self.surface_config.width;
1399        let surface_h = self.surface_config.height;
1400        let surface_format = self.surface_config.format;
1401
1402        let needs_build = match &self.scene_dda {
1403            Some(r) => r.storage_size != (surface_w, surface_h),
1404            None => true,
1405        };
1406        if needs_build {
1407            self.scene_dda = Some(self.build_scene_dda(surface_w, surface_h, surface_format));
1408        }
1409        // GPU.9 — materialise the sprite pipeline the first frame
1410        // sprites are present (before the immutable `dda` borrow).
1411        // GPU.10.0 — build the model-DDA pipeline the first frame a
1412        // sprite registry is present.
1413        if self.sprite_registry.is_some() && self.sprite_model_dda.is_none() {
1414            self.sprite_model_dda = Some(self.build_sprite_model_dda());
1415        }
1416        // GPU.10.3 — frustum-cull + screen-tile-bin the sprite instances
1417        // (needs &mut self for buffer growth, so before the immutable
1418        // scene_dda borrow). Captures (visible_count, tiles_x); None when
1419        // nothing is in view.
1420        let sprite_pass: Option<(u32, u32)> = if let Some(reg) = self.sprite_registry.as_mut() {
1421            if reg.instance_capacity > 0 {
1422                // World camera — sprite positions/transforms are world-
1423                // space (independent of any grid's transform).
1424                let cam = sprite_camera;
1425                #[allow(clippy::cast_precision_loss)]
1426                let aspect = surface_w as f32 / surface_h as f32;
1427                let half_h = (fov_y_rad * 0.5).tan();
1428                let frustum = sprite_model::ViewFrustum {
1429                    pos: cam.position,
1430                    right: cam.right,
1431                    down: cam.down,
1432                    forward: cam.forward,
1433                    half_w: half_h * aspect,
1434                    half_h,
1435                    far: 1.0e9,
1436                };
1437                let (visible, tiles_x, _tiles_y) = reg.cull_bin_upload(
1438                    &self.device,
1439                    &self.queue,
1440                    &frustum,
1441                    surface_w,
1442                    surface_h,
1443                    SPRITE_TILE_SIZE,
1444                    self.sprite_lod_px,
1445                );
1446                (visible > 0).then_some((visible, tiles_x))
1447            } else {
1448                None
1449            }
1450        } else {
1451            None
1452        };
1453        let dda = self.scene_dda.as_ref().expect("just built");
1454
1455        // Pack per-grid cameras.
1456        let mut cam_array = [SceneDdaPerGridCamera::zeroed(); SCENE_MAX_GRIDS];
1457        for (i, cam) in cameras.iter().enumerate() {
1458            cam_array[i] = SceneDdaPerGridCamera {
1459                pos: cam.position,
1460                _pad0: 0.0,
1461                right: cam.right,
1462                _pad1: 0.0,
1463                down: cam.down,
1464                _pad2: 0.0,
1465                forward: cam.forward,
1466                _pad3: 0.0,
1467            };
1468        }
1469        let uniform = SceneDdaUniform {
1470            fov_y_rad,
1471            grid_count: scene.grid_count,
1472            max_outer_steps,
1473            _pad0: 0,
1474            screen_size: [surface_w, surface_h],
1475            _pad1: [0; 2],
1476            cameras: cam_array,
1477            fog_color: [
1478                self.fog_color[0],
1479                self.fog_color[1],
1480                self.fog_color[2],
1481                self.fog_near,
1482            ],
1483            fog_far: self.fog_far,
1484            write_depth: u32::from(self.sprite_registry.is_some()),
1485            occ_page_words: scene.occupancy_page_words,
1486            occ_num_pages: scene.occupancy_num_pages,
1487            mip_scan_dist: self.scene_mip_scan_dist,
1488            _pad2: 0,
1489            _pad3: 0,
1490            _pad4: 0,
1491        };
1492        self.queue
1493            .write_buffer(&dda.uniform_buf, 0, bytemuck::bytes_of(&uniform));
1494
1495        let dda_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1496            label: Some("roxlap-gpu scene_dda.bg"),
1497            layout: &dda.bgl_dda,
1498            entries: &[
1499                wgpu::BindGroupEntry {
1500                    binding: 0,
1501                    resource: dda.uniform_buf.as_entire_binding(),
1502                },
1503                // Occupancy page 0 at binding 1; pages 1..MAX_OCC_PAGES
1504                // at bindings 12.. (see GPU.X occupancy paging).
1505                wgpu::BindGroupEntry {
1506                    binding: 1,
1507                    resource: scene.occupancy_pages[0].as_entire_binding(),
1508                },
1509                wgpu::BindGroupEntry {
1510                    binding: 2,
1511                    resource: scene.all_color_offsets.as_entire_binding(),
1512                },
1513                wgpu::BindGroupEntry {
1514                    binding: 3,
1515                    resource: scene.all_colors.as_entire_binding(),
1516                },
1517                wgpu::BindGroupEntry {
1518                    binding: 4,
1519                    resource: scene.all_chunk_colors_base.as_entire_binding(),
1520                },
1521                wgpu::BindGroupEntry {
1522                    binding: 5,
1523                    resource: scene.all_chunk_occupancy.as_entire_binding(),
1524                },
1525                wgpu::BindGroupEntry {
1526                    binding: 6,
1527                    resource: scene.grid_static_meta.as_entire_binding(),
1528                },
1529                wgpu::BindGroupEntry {
1530                    binding: 7,
1531                    resource: scene.all_slot_chunk_idx.as_entire_binding(),
1532                },
1533                wgpu::BindGroupEntry {
1534                    binding: 8,
1535                    resource: wgpu::BindingResource::TextureView(&dda.storage_view),
1536                },
1537                wgpu::BindGroupEntry {
1538                    binding: 9,
1539                    resource: wgpu::BindingResource::TextureView(&self.sky_view),
1540                },
1541                wgpu::BindGroupEntry {
1542                    binding: 10,
1543                    resource: wgpu::BindingResource::Sampler(&self.sky_sampler),
1544                },
1545                wgpu::BindGroupEntry {
1546                    binding: 11,
1547                    resource: dda.depth_buffer.as_entire_binding(),
1548                },
1549                wgpu::BindGroupEntry {
1550                    binding: 12,
1551                    resource: scene.occupancy_pages[1].as_entire_binding(),
1552                },
1553                wgpu::BindGroupEntry {
1554                    binding: 13,
1555                    resource: scene.occupancy_pages[2].as_entire_binding(),
1556                },
1557                wgpu::BindGroupEntry {
1558                    binding: 14,
1559                    resource: scene.occupancy_pages[3].as_entire_binding(),
1560                },
1561            ],
1562        });
1563
1564        // GPU.9 — when sprites are present, build both splatter bind
1565        // groups up front (the splat pass writes the key buffer; the
1566        // resolve pass reads keys + scene depth and writes colour).
1567        // GPU.10.3 — model-DDA bind group + per-frame uniform, using the
1568        // cull/bin results captured above. Per-model + per-instance data
1569        // + the tile lists live in the registry buffers.
1570        let sprite_model_bg = match (&self.sprite_model_dda, &self.sprite_registry, sprite_pass) {
1571            (Some(smd), Some(reg), Some((visible, tiles_x))) => {
1572                // World camera (see the cull pass above) — sprites
1573                // project through it regardless of grid 0's transform.
1574                let cam = sprite_camera;
1575                let uni = SpriteModelUniform {
1576                    cam_pos: cam.position,
1577                    _p0: 0.0,
1578                    cam_right: cam.right,
1579                    _p1: 0.0,
1580                    cam_down: cam.down,
1581                    _p2: 0.0,
1582                    cam_forward: cam.forward,
1583                    _p3: 0.0,
1584                    fog_color: [
1585                        self.fog_color[0],
1586                        self.fog_color[1],
1587                        self.fog_color[2],
1588                        self.fog_near,
1589                    ],
1590                    screen_size: [surface_w, surface_h],
1591                    instance_count: visible,
1592                    fog_far: self.fog_far,
1593                    fov_y_rad,
1594                    tiles_x,
1595                    tile_size: SPRITE_TILE_SIZE,
1596                    _p6: 0.0,
1597                };
1598                self.queue
1599                    .write_buffer(&smd.uniform_buf, 0, bytemuck::bytes_of(&uni));
1600                Some(self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1601                    label: Some("roxlap-gpu sprite_model_dda.bg"),
1602                    layout: &smd.bgl,
1603                    entries: &[
1604                        wgpu::BindGroupEntry {
1605                            binding: 0,
1606                            resource: smd.uniform_buf.as_entire_binding(),
1607                        },
1608                        wgpu::BindGroupEntry {
1609                            binding: 1,
1610                            resource: reg.occupancy.as_entire_binding(),
1611                        },
1612                        wgpu::BindGroupEntry {
1613                            binding: 2,
1614                            resource: reg.colors.as_entire_binding(),
1615                        },
1616                        wgpu::BindGroupEntry {
1617                            binding: 3,
1618                            resource: reg.color_offsets.as_entire_binding(),
1619                        },
1620                        wgpu::BindGroupEntry {
1621                            binding: 4,
1622                            resource: reg.model_meta.as_entire_binding(),
1623                        },
1624                        wgpu::BindGroupEntry {
1625                            binding: 5,
1626                            resource: reg.instances.as_entire_binding(),
1627                        },
1628                        wgpu::BindGroupEntry {
1629                            binding: 6,
1630                            resource: dda.depth_buffer.as_entire_binding(),
1631                        },
1632                        wgpu::BindGroupEntry {
1633                            binding: 7,
1634                            resource: wgpu::BindingResource::TextureView(&dda.storage_view),
1635                        },
1636                        wgpu::BindGroupEntry {
1637                            binding: 8,
1638                            resource: reg.tile_ranges.as_entire_binding(),
1639                        },
1640                        wgpu::BindGroupEntry {
1641                            binding: 9,
1642                            resource: reg.tile_instances.as_entire_binding(),
1643                        },
1644                    ],
1645                }))
1646            }
1647            _ => None,
1648        };
1649
1650        let mut encoder = self
1651            .device
1652            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
1653                label: Some("roxlap-gpu scene encoder"),
1654            });
1655        {
1656            let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
1657                label: Some("roxlap-gpu scene_dda compute"),
1658                timestamp_writes: None,
1659            });
1660            cpass.set_pipeline(&dda.pipeline_dda);
1661            cpass.set_bind_group(0, &dda_bg, &[]);
1662            cpass.dispatch_workgroups(surface_w.div_ceil(8), surface_h.div_ceil(8), 1);
1663        }
1664        // GPU.10 — sprite model-DDA pass: one thread per pixel marches
1665        // the tile's instances + composites against scene depth, after
1666        // the scene pass wrote the depth buffer and before the blit.
1667        if let (Some(smd), Some(bg)) = (&self.sprite_model_dda, &sprite_model_bg) {
1668            let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
1669                label: Some("roxlap-gpu sprite_model_dda"),
1670                timestamp_writes: None,
1671            });
1672            cpass.set_pipeline(&smd.pipeline);
1673            cpass.set_bind_group(0, bg, &[]);
1674            cpass.dispatch_workgroups(surface_w.div_ceil(8), surface_h.div_ceil(8), 1);
1675        }
1676        {
1677            let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
1678                label: Some("roxlap-gpu scene_dda blit"),
1679                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
1680                    view: &surf_view,
1681                    resolve_target: None,
1682                    ops: wgpu::Operations {
1683                        load: wgpu::LoadOp::Clear(wgpu::Color::BLACK),
1684                        store: wgpu::StoreOp::Store,
1685                    },
1686                })],
1687                depth_stencil_attachment: None,
1688                timestamp_writes: None,
1689                occlusion_query_set: None,
1690            });
1691            rpass.set_pipeline(&dda.pipeline_blit);
1692            rpass.set_bind_group(0, &dda.blit_bg, &[]);
1693            rpass.draw(0..3, 0..1);
1694        }
1695        self.queue.submit(std::iter::once(encoder.finish()));
1696        // Deferred present — the host calls `present` or `paint_egui`.
1697        self.pending_frame = Some((surf_tex, surf_view));
1698        self.frame_count = self.frame_count.wrapping_add(1);
1699    }
1700
1701    /// Like [`Self::render`] (clear to colour) but **deferred**: stashes
1702    /// the frame for [`Self::present`] / [`Self::paint_egui`] instead of
1703    /// presenting. The facade uses this before any grid is resident so a
1704    /// HUD can still be painted over an empty scene.
1705    pub fn render_clear_deferred(&mut self) {
1706        self.pending_frame = None;
1707        let surf_tex = match self.surface.get_current_texture() {
1708            Ok(t) => t,
1709            Err(wgpu::SurfaceError::Outdated | wgpu::SurfaceError::Lost) => {
1710                self.surface.configure(&self.device, &self.surface_config);
1711                return;
1712            }
1713            Err(e) => {
1714                eprintln!("roxlap-gpu surface error: {e:?}");
1715                return;
1716            }
1717        };
1718        let view = surf_tex
1719            .texture
1720            .create_view(&wgpu::TextureViewDescriptor::default());
1721        let [r, g, b] = self.clear_colour;
1722        let mut encoder = self
1723            .device
1724            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
1725                label: Some("roxlap-gpu clear (deferred)"),
1726            });
1727        {
1728            let _rp = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
1729                label: Some("roxlap-gpu clear (deferred)"),
1730                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
1731                    view: &view,
1732                    resolve_target: None,
1733                    ops: wgpu::Operations {
1734                        load: wgpu::LoadOp::Clear(wgpu::Color { r, g, b, a: 1.0 }),
1735                        store: wgpu::StoreOp::Store,
1736                    },
1737                })],
1738                depth_stencil_attachment: None,
1739                timestamp_writes: None,
1740                occlusion_query_set: None,
1741            });
1742        }
1743        self.queue.submit(std::iter::once(encoder.finish()));
1744        self.pending_frame = Some((surf_tex, view));
1745    }
1746
1747    /// Present the frame stashed by the last deferred render
1748    /// ([`Self::render_scene`] / [`Self::render_clear_deferred`]). No-op
1749    /// if nothing is pending (e.g. the surface was lost mid-render).
1750    pub fn present(&mut self) {
1751        if let Some((surf_tex, _view)) = self.pending_frame.take() {
1752            surf_tex.present();
1753        }
1754    }
1755
1756    /// Overlay an `egui` UI on the pending frame, then present it
1757    /// (`hud` feature). `jobs` are the host's tessellated primitives
1758    /// (`egui::Context::tessellate`), `textures` the per-frame texture
1759    /// delta from `egui::FullOutput`, `pixels_per_point` the UI scale.
1760    ///
1761    /// Draws with `LoadOp::Load` over the marcher's frame (a separate
1762    /// encoder submitted after the scene's), so the UI composites on top
1763    /// of the world. No-op if no frame is pending.
1764    #[cfg(feature = "hud")]
1765    pub fn paint_egui(
1766        &mut self,
1767        jobs: &[egui::ClippedPrimitive],
1768        textures: &egui::TexturesDelta,
1769        pixels_per_point: f32,
1770    ) {
1771        let Some((surf_tex, surf_view)) = self.pending_frame.take() else {
1772            return;
1773        };
1774        let format = self.surface_config.format;
1775        let egui_rend = self
1776            .egui_renderer
1777            .get_or_insert_with(|| egui_wgpu::Renderer::new(&self.device, format, None, 1, false));
1778
1779        let screen = egui_wgpu::ScreenDescriptor {
1780            size_in_pixels: [self.surface_config.width, self.surface_config.height],
1781            pixels_per_point,
1782        };
1783        for (id, delta) in &textures.set {
1784            egui_rend.update_texture(&self.device, &self.queue, *id, delta);
1785        }
1786        let mut encoder = self
1787            .device
1788            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
1789                label: Some("roxlap-gpu egui"),
1790            });
1791        let user_bufs =
1792            egui_rend.update_buffers(&self.device, &self.queue, &mut encoder, jobs, &screen);
1793        {
1794            // `LoadOp::Load` keeps the marcher's frame; egui draws over it.
1795            let mut pass = encoder
1796                .begin_render_pass(&wgpu::RenderPassDescriptor {
1797                    label: Some("roxlap-gpu egui paint"),
1798                    color_attachments: &[Some(wgpu::RenderPassColorAttachment {
1799                        view: &surf_view,
1800                        resolve_target: None,
1801                        ops: wgpu::Operations {
1802                            load: wgpu::LoadOp::Load,
1803                            store: wgpu::StoreOp::Store,
1804                        },
1805                    })],
1806                    depth_stencil_attachment: None,
1807                    timestamp_writes: None,
1808                    occlusion_query_set: None,
1809                })
1810                // egui-wgpu 0.29 requires a `'static` pass (see its docs).
1811                .forget_lifetime();
1812            egui_rend.render(&mut pass, jobs, &screen);
1813        }
1814        for id in &textures.free {
1815            egui_rend.free_texture(id);
1816        }
1817        self.queue.submit(
1818            user_bufs
1819                .into_iter()
1820                .chain(std::iter::once(encoder.finish())),
1821        );
1822        surf_tex.present();
1823    }
1824
1825    fn build_scene_dda(
1826        &self,
1827        width: u32,
1828        height: u32,
1829        surface_format: wgpu::TextureFormat,
1830    ) -> SceneDdaResources {
1831        let storage_tex = self.device.create_texture(&wgpu::TextureDescriptor {
1832            label: Some("roxlap-gpu scene_dda.storage"),
1833            size: wgpu::Extent3d {
1834                width,
1835                height,
1836                depth_or_array_layers: 1,
1837            },
1838            mip_level_count: 1,
1839            sample_count: 1,
1840            dimension: wgpu::TextureDimension::D2,
1841            format: wgpu::TextureFormat::Rgba8Unorm,
1842            usage: wgpu::TextureUsages::STORAGE_BINDING | wgpu::TextureUsages::TEXTURE_BINDING,
1843            view_formats: &[],
1844        });
1845        let storage_view = storage_tex.create_view(&wgpu::TextureViewDescriptor::default());
1846
1847        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
1848            label: Some("roxlap-gpu scene_dda.uniform"),
1849            size: std::mem::size_of::<SceneDdaUniform>() as u64,
1850            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
1851            mapped_at_creation: false,
1852        });
1853
1854        // GPU.9 — per-pixel world-t depth (f32 bits as u32). Sized to
1855        // the storage texture; written by the scene pass when sprites
1856        // are active, read+tested by the sprite splatter.
1857        let depth_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
1858            label: Some("roxlap-gpu scene_dda.depth"),
1859            size: u64::from(width) * u64::from(height) * 4,
1860            // COPY_SRC so `read_depth_pixel` can stage it for picking.
1861            usage: wgpu::BufferUsages::STORAGE
1862                | wgpu::BufferUsages::COPY_DST
1863                | wgpu::BufferUsages::COPY_SRC,
1864            mapped_at_creation: false,
1865        });
1866        let depth_readback = self.device.create_buffer(&wgpu::BufferDescriptor {
1867            label: Some("roxlap-gpu scene_dda.depth_readback"),
1868            size: u64::from(width) * u64::from(height) * 4,
1869            usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
1870            mapped_at_creation: false,
1871        });
1872        let dda_shader = self
1873            .device
1874            .create_shader_module(wgpu::ShaderModuleDescriptor {
1875                label: Some("scene_dda.wgsl"),
1876                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/scene_dda.wgsl").into()),
1877            });
1878        let bgl_dda = self
1879            .device
1880            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
1881                label: Some("roxlap-gpu scene_dda.bgl"),
1882                entries: &[
1883                    bgl_uniform_entry(0),
1884                    bgl_storage_entry(1, true),
1885                    bgl_storage_entry(2, true),
1886                    bgl_storage_entry(3, true),
1887                    bgl_storage_entry(4, true),
1888                    bgl_storage_entry(5, true),
1889                    bgl_storage_entry(6, true),
1890                    bgl_storage_entry(7, true),
1891                    wgpu::BindGroupLayoutEntry {
1892                        binding: 8,
1893                        visibility: wgpu::ShaderStages::COMPUTE,
1894                        ty: wgpu::BindingType::StorageTexture {
1895                            access: wgpu::StorageTextureAccess::WriteOnly,
1896                            format: wgpu::TextureFormat::Rgba8Unorm,
1897                            view_dimension: wgpu::TextureViewDimension::D2,
1898                        },
1899                        count: None,
1900                    },
1901                    // GPU.8 sky panorama + sampler.
1902                    wgpu::BindGroupLayoutEntry {
1903                        binding: 9,
1904                        visibility: wgpu::ShaderStages::COMPUTE,
1905                        ty: wgpu::BindingType::Texture {
1906                            sample_type: wgpu::TextureSampleType::Float { filterable: true },
1907                            view_dimension: wgpu::TextureViewDimension::D2,
1908                            multisampled: false,
1909                        },
1910                        count: None,
1911                    },
1912                    wgpu::BindGroupLayoutEntry {
1913                        binding: 10,
1914                        visibility: wgpu::ShaderStages::COMPUTE,
1915                        ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
1916                        count: None,
1917                    },
1918                    // GPU.9 — read-write per-pixel depth buffer.
1919                    bgl_storage_entry(11, false),
1920                    // Occupancy pages 1..MAX_OCC_PAGES (page 0 is
1921                    // binding 1). Unused pages bind a dummy buffer.
1922                    bgl_storage_entry(12, true),
1923                    bgl_storage_entry(13, true),
1924                    bgl_storage_entry(14, true),
1925                ],
1926            });
1927        let dda_pl = self
1928            .device
1929            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
1930                label: Some("roxlap-gpu scene_dda.layout"),
1931                bind_group_layouts: &[&bgl_dda],
1932                push_constant_ranges: &[],
1933            });
1934        let pipeline_dda = self
1935            .device
1936            .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
1937                label: Some("roxlap-gpu scene_dda.pipeline"),
1938                layout: Some(&dda_pl),
1939                module: &dda_shader,
1940                entry_point: "render_scene",
1941                compilation_options: wgpu::PipelineCompilationOptions::default(),
1942                cache: None,
1943            });
1944
1945        let blit_shader = self
1946            .device
1947            .create_shader_module(wgpu::ShaderModuleDescriptor {
1948                label: Some("blit.wgsl"),
1949                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/blit.wgsl").into()),
1950            });
1951        let bgl_blit = self
1952            .device
1953            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
1954                label: Some("roxlap-gpu scene_dda.blit_bgl"),
1955                entries: &[
1956                    wgpu::BindGroupLayoutEntry {
1957                        binding: 0,
1958                        visibility: wgpu::ShaderStages::FRAGMENT,
1959                        ty: wgpu::BindingType::Texture {
1960                            sample_type: wgpu::TextureSampleType::Float { filterable: false },
1961                            view_dimension: wgpu::TextureViewDimension::D2,
1962                            multisampled: false,
1963                        },
1964                        count: None,
1965                    },
1966                    wgpu::BindGroupLayoutEntry {
1967                        binding: 1,
1968                        visibility: wgpu::ShaderStages::FRAGMENT,
1969                        ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::NonFiltering),
1970                        count: None,
1971                    },
1972                ],
1973            });
1974        let blit_pl = self
1975            .device
1976            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
1977                label: Some("roxlap-gpu scene_dda.blit_layout"),
1978                bind_group_layouts: &[&bgl_blit],
1979                push_constant_ranges: &[],
1980            });
1981        let pipeline_blit = self
1982            .device
1983            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
1984                label: Some("roxlap-gpu scene_dda.blit_pipeline"),
1985                layout: Some(&blit_pl),
1986                vertex: wgpu::VertexState {
1987                    module: &blit_shader,
1988                    entry_point: "vs_main",
1989                    compilation_options: wgpu::PipelineCompilationOptions::default(),
1990                    buffers: &[],
1991                },
1992                fragment: Some(wgpu::FragmentState {
1993                    module: &blit_shader,
1994                    entry_point: "fs_main",
1995                    compilation_options: wgpu::PipelineCompilationOptions::default(),
1996                    targets: &[Some(wgpu::ColorTargetState {
1997                        format: surface_format,
1998                        blend: None,
1999                        write_mask: wgpu::ColorWrites::ALL,
2000                    })],
2001                }),
2002                primitive: wgpu::PrimitiveState::default(),
2003                depth_stencil: None,
2004                multisample: wgpu::MultisampleState::default(),
2005                multiview: None,
2006                cache: None,
2007            });
2008        let sampler = self.device.create_sampler(&wgpu::SamplerDescriptor {
2009            label: Some("roxlap-gpu scene_dda.blit_sampler"),
2010            address_mode_u: wgpu::AddressMode::ClampToEdge,
2011            address_mode_v: wgpu::AddressMode::ClampToEdge,
2012            address_mode_w: wgpu::AddressMode::ClampToEdge,
2013            mag_filter: wgpu::FilterMode::Nearest,
2014            min_filter: wgpu::FilterMode::Nearest,
2015            mipmap_filter: wgpu::FilterMode::Nearest,
2016            ..Default::default()
2017        });
2018        let blit_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
2019            label: Some("roxlap-gpu scene_dda.blit_bg"),
2020            layout: &bgl_blit,
2021            entries: &[
2022                wgpu::BindGroupEntry {
2023                    binding: 0,
2024                    resource: wgpu::BindingResource::TextureView(&storage_view),
2025                },
2026                wgpu::BindGroupEntry {
2027                    binding: 1,
2028                    resource: wgpu::BindingResource::Sampler(&sampler),
2029                },
2030            ],
2031        });
2032
2033        SceneDdaResources {
2034            storage_size: (width, height),
2035            storage_view,
2036            uniform_buf,
2037            bgl_dda,
2038            pipeline_dda,
2039            blit_bg,
2040            pipeline_blit,
2041            _sampler: sampler,
2042            depth_buffer,
2043            depth_readback,
2044        }
2045    }
2046
2047    /// Read back the per-pixel world-t depth at window pixel `(x, y)`
2048    /// from the last rendered frame, for screen→world picking. Returns
2049    /// the distance `t` along the (normalised) view ray to the nearest
2050    /// scene-grid surface, so the host reconstructs the world hit as
2051    /// `cam.pos + t * normalize(ray_dir)`. `None` for out-of-bounds
2052    /// pixels, sky / no-hit (the `T_INF` sentinel), or when no scene
2053    /// frame has been rendered.
2054    ///
2055    /// The depth buffer is the SCENE pass's output (terrain + grids),
2056    /// untouched by the sprite pass (which reads it read-only), so a
2057    /// cursor sprite under the pointer does not occlude the pick.
2058    ///
2059    /// Synchronous: copies the depth buffer to a mapped staging buffer
2060    /// and blocks on `device.poll(Wait)`. Cheap enough for click-time
2061    /// picks; do not call it every frame.
2062    ///
2063    /// Requires the last frame to have written depth, which happens
2064    /// when sprites are present (`write_depth`). The pick demo always
2065    /// has a cursor sprite, so this holds.
2066    #[must_use]
2067    pub fn read_depth_pixel(&self, x: u32, y: u32) -> Option<f32> {
2068        let dda = self.scene_dda.as_ref()?;
2069        let (w, h) = dda.storage_size;
2070        if x >= w || y >= h {
2071            return None;
2072        }
2073        let mut enc = self
2074            .device
2075            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
2076                label: Some("roxlap-gpu depth readback"),
2077            });
2078        let size = u64::from(w) * u64::from(h) * 4;
2079        enc.copy_buffer_to_buffer(&dda.depth_buffer, 0, &dda.depth_readback, 0, size);
2080        self.queue.submit(std::iter::once(enc.finish()));
2081
2082        let slice = dda.depth_readback.slice(..);
2083        let (tx, rx) = std::sync::mpsc::channel();
2084        slice.map_async(wgpu::MapMode::Read, move |r| {
2085            let _ = tx.send(r);
2086        });
2087        self.device.poll(wgpu::Maintain::Wait);
2088        rx.recv().ok()?.ok()?;
2089
2090        let t = {
2091            let data = slice.get_mapped_range();
2092            let idx = ((y * w + x) * 4) as usize;
2093            let bytes: [u8; 4] = data[idx..idx + 4].try_into().ok()?;
2094            f32::from_le_bytes(bytes)
2095        };
2096        dda.depth_readback.unmap();
2097
2098        // Reject sky / no-hit (T_INF == 1e30 in the shader) + non-finite.
2099        if !t.is_finite() || t >= 1.0e29 {
2100            return None;
2101        }
2102        Some(t)
2103    }
2104
2105    /// World-space view-ray direction (un-normalised) for window pixel
2106    /// `(x, y)`, under the GPU marcher's projection — the canonical GPU
2107    /// unproject, mirroring `scene_dda.wgsl`'s `render_scene`
2108    /// (vertical-FOV pinhole). Uses the last-rendered frame's target
2109    /// size + FOV; `None` before the first scene render. Pair with
2110    /// [`Self::read_depth_pixel`] for screen→world picking.
2111    #[must_use]
2112    pub fn pixel_ray(
2113        &self,
2114        right: [f64; 3],
2115        down: [f64; 3],
2116        forward: [f64; 3],
2117        x: f64,
2118        y: f64,
2119    ) -> Option<[f64; 3]> {
2120        let dda = self.scene_dda.as_ref()?;
2121        let (w, h) = dda.storage_size;
2122        if w == 0 || h == 0 || self.last_fov_y_rad <= 0.0 {
2123            return None;
2124        }
2125        Some(pinhole_pixel_ray(
2126            right,
2127            down,
2128            forward,
2129            x,
2130            y,
2131            f64::from(w),
2132            f64::from(h),
2133            f64::from(self.last_fov_y_rad),
2134        ))
2135    }
2136
2137    /// GPU.10.1 — upload a sprite model registry + its instances for
2138    /// the DDA path. An empty instance slice clears all sprites.
2139    pub fn set_sprite_instances(
2140        &mut self,
2141        registry: &sprite_model::SpriteModelRegistry,
2142        instances: &[sprite_model::SpriteInstance],
2143    ) {
2144        if instances.is_empty() {
2145            self.sprite_registry = None;
2146            return;
2147        }
2148        self.sprite_registry = Some(sprite_model::SpriteRegistryResident::upload(
2149            &self.device,
2150            registry,
2151            instances,
2152        ));
2153    }
2154
2155    /// GPU.10.4 — set the LOD pixel threshold: a sprite steps to the
2156    /// next mip once a mip-0 voxel would project below `px` screen
2157    /// pixels. `1.0` is the natural "no sub-pixel voxels" default;
2158    /// larger values force LOD in closer (useful for inspection).
2159    /// Clamped to ≥ 0.25.
2160    pub fn set_sprite_lod_px(&mut self, px: f32) {
2161        self.sprite_lod_px = px.max(0.25);
2162    }
2163
2164    /// GPU.11.1 — set the scene-grid LOD scan distance (world units).
2165    /// A chunk entered at world-t `t` is marched at mip
2166    /// `floor(log2(max(t, msd) / msd))`, clamped to its grid's mip
2167    /// ladder. `0` disables LOD (always mip-0). Larger values push
2168    /// the coarser mips farther out — the axis-aligned-mip-beams
2169    /// mitigation lever (GPU.11.2). Default 64 (matches CPU
2170    /// `mip_scan_dist`).
2171    pub fn set_scene_mip_scan_dist(&mut self, dist: f32) {
2172        self.scene_mip_scan_dist = dist.max(0.0);
2173    }
2174
2175    /// GPU.10.1 — build the instanced model-DDA pipeline (one thread
2176    /// per pixel). Lazily invoked the first frame a registry is present.
2177    fn build_sprite_model_dda(&self) -> SpriteModelDdaResources {
2178        let shader = self
2179            .device
2180            .create_shader_module(wgpu::ShaderModuleDescriptor {
2181                label: Some("sprite_model_dda.wgsl"),
2182                source: wgpu::ShaderSource::Wgsl(
2183                    include_str!("../shaders/sprite_model_dda.wgsl").into(),
2184                ),
2185            });
2186        let bgl = self
2187            .device
2188            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
2189                label: Some("roxlap-gpu sprite_model_dda.bgl"),
2190                entries: &[
2191                    bgl_uniform_entry(0),
2192                    bgl_storage_entry(1, true), // occupancy
2193                    bgl_storage_entry(2, true), // colors
2194                    bgl_storage_entry(3, true), // color_offsets
2195                    bgl_storage_entry(4, true), // model_meta
2196                    bgl_storage_entry(5, true), // instances
2197                    bgl_storage_entry(6, true), // scene depth
2198                    wgpu::BindGroupLayoutEntry {
2199                        binding: 7,
2200                        visibility: wgpu::ShaderStages::COMPUTE,
2201                        ty: wgpu::BindingType::StorageTexture {
2202                            access: wgpu::StorageTextureAccess::WriteOnly,
2203                            format: wgpu::TextureFormat::Rgba8Unorm,
2204                            view_dimension: wgpu::TextureViewDimension::D2,
2205                        },
2206                        count: None,
2207                    },
2208                    bgl_storage_entry(8, true), // tile_ranges
2209                    bgl_storage_entry(9, true), // tile_instances
2210                ],
2211            });
2212        let pl = self
2213            .device
2214            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
2215                label: Some("roxlap-gpu sprite_model_dda.layout"),
2216                bind_group_layouts: &[&bgl],
2217                push_constant_ranges: &[],
2218            });
2219        let pipeline = self
2220            .device
2221            .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
2222                label: Some("roxlap-gpu sprite_model_dda.pipeline"),
2223                layout: Some(&pl),
2224                module: &shader,
2225                entry_point: "march",
2226                compilation_options: wgpu::PipelineCompilationOptions::default(),
2227                cache: None,
2228            });
2229        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
2230            label: Some("roxlap-gpu sprite_model_dda.uniform"),
2231            size: std::mem::size_of::<SpriteModelUniform>() as u64,
2232            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
2233            mapped_at_creation: false,
2234        });
2235        SpriteModelDdaResources {
2236            bgl,
2237            pipeline,
2238            uniform_buf,
2239        }
2240    }
2241}
2242
2243/// GPU.11 — headless scene-DDA renderer for tests + offline visual
2244/// gates. Owns the `scene_dda.wgsl` compute pipeline with no surface
2245/// and no blit pass; renders a [`GpuSceneResident`] to an in-memory
2246/// RGBA framebuffer via texture readback. The per-substage visual
2247/// gate (render reference scenes, diff PPMs) and the GPU.11.1 mip
2248/// render-diff both ride on this.
2249pub struct HeadlessSceneRenderer {
2250    width: u32,
2251    height: u32,
2252    output_tex: wgpu::Texture,
2253    output_view: wgpu::TextureView,
2254    depth_buffer: wgpu::Buffer,
2255    uniform_buf: wgpu::Buffer,
2256    _sky_texture: wgpu::Texture,
2257    sky_view: wgpu::TextureView,
2258    sky_sampler: wgpu::Sampler,
2259    bgl: wgpu::BindGroupLayout,
2260    pipeline: wgpu::ComputePipeline,
2261    readback: wgpu::Buffer,
2262    padded_bytes_per_row: u32,
2263}
2264
2265impl HeadlessSceneRenderer {
2266    /// Build the compute pipeline + output/readback resources for a
2267    /// `width × height` framebuffer. Validates `scene_dda.wgsl` and
2268    /// the [`scene::GridStaticMeta`] std430 layout at pipeline /
2269    /// bind-group time.
2270    #[must_use]
2271    pub fn new(device: &wgpu::Device, width: u32, height: u32) -> Self {
2272        let output_tex = device.create_texture(&wgpu::TextureDescriptor {
2273            label: Some("roxlap-gpu headless.output"),
2274            size: wgpu::Extent3d {
2275                width,
2276                height,
2277                depth_or_array_layers: 1,
2278            },
2279            mip_level_count: 1,
2280            sample_count: 1,
2281            dimension: wgpu::TextureDimension::D2,
2282            format: wgpu::TextureFormat::Rgba8Unorm,
2283            usage: wgpu::TextureUsages::STORAGE_BINDING | wgpu::TextureUsages::COPY_SRC,
2284            view_formats: &[],
2285        });
2286        let output_view = output_tex.create_view(&wgpu::TextureViewDescriptor::default());
2287
2288        let uniform_buf = device.create_buffer(&wgpu::BufferDescriptor {
2289            label: Some("roxlap-gpu headless.uniform"),
2290            size: std::mem::size_of::<SceneDdaUniform>() as u64,
2291            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
2292            mapped_at_creation: false,
2293        });
2294        let depth_buffer = device.create_buffer(&wgpu::BufferDescriptor {
2295            label: Some("roxlap-gpu headless.depth"),
2296            size: u64::from(width) * u64::from(height) * 4,
2297            usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
2298            mapped_at_creation: false,
2299        });
2300
2301        let default_sky_pixel = [120u8, 150, 220, 255];
2302        let (sky_texture, sky_view) = create_sky_texture(device, 1, 1, &default_sky_pixel);
2303        let sky_sampler = device.create_sampler(&wgpu::SamplerDescriptor {
2304            label: Some("roxlap-gpu headless.sky_sampler"),
2305            address_mode_u: wgpu::AddressMode::Repeat,
2306            address_mode_v: wgpu::AddressMode::Repeat,
2307            mag_filter: wgpu::FilterMode::Linear,
2308            min_filter: wgpu::FilterMode::Linear,
2309            ..Default::default()
2310        });
2311
2312        let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
2313            label: Some("scene_dda.wgsl (headless)"),
2314            source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/scene_dda.wgsl").into()),
2315        });
2316        let bgl = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
2317            label: Some("roxlap-gpu headless.bgl"),
2318            entries: &[
2319                bgl_uniform_entry(0),
2320                bgl_storage_entry(1, true),
2321                bgl_storage_entry(2, true),
2322                bgl_storage_entry(3, true),
2323                bgl_storage_entry(4, true),
2324                bgl_storage_entry(5, true),
2325                bgl_storage_entry(6, true),
2326                bgl_storage_entry(7, true),
2327                wgpu::BindGroupLayoutEntry {
2328                    binding: 8,
2329                    visibility: wgpu::ShaderStages::COMPUTE,
2330                    ty: wgpu::BindingType::StorageTexture {
2331                        access: wgpu::StorageTextureAccess::WriteOnly,
2332                        format: wgpu::TextureFormat::Rgba8Unorm,
2333                        view_dimension: wgpu::TextureViewDimension::D2,
2334                    },
2335                    count: None,
2336                },
2337                wgpu::BindGroupLayoutEntry {
2338                    binding: 9,
2339                    visibility: wgpu::ShaderStages::COMPUTE,
2340                    ty: wgpu::BindingType::Texture {
2341                        sample_type: wgpu::TextureSampleType::Float { filterable: true },
2342                        view_dimension: wgpu::TextureViewDimension::D2,
2343                        multisampled: false,
2344                    },
2345                    count: None,
2346                },
2347                wgpu::BindGroupLayoutEntry {
2348                    binding: 10,
2349                    visibility: wgpu::ShaderStages::COMPUTE,
2350                    ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
2351                    count: None,
2352                },
2353                bgl_storage_entry(11, false),
2354                bgl_storage_entry(12, true),
2355                bgl_storage_entry(13, true),
2356                bgl_storage_entry(14, true),
2357            ],
2358        });
2359        let pl = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
2360            label: Some("roxlap-gpu headless.layout"),
2361            bind_group_layouts: &[&bgl],
2362            push_constant_ranges: &[],
2363        });
2364        let pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
2365            label: Some("roxlap-gpu headless.pipeline"),
2366            layout: Some(&pl),
2367            module: &shader,
2368            entry_point: "render_scene",
2369            compilation_options: wgpu::PipelineCompilationOptions::default(),
2370            cache: None,
2371        });
2372
2373        // Readback buffer: row pitch must be 256-aligned for
2374        // copy_texture_to_buffer.
2375        let padded_bytes_per_row = (width * 4).div_ceil(256) * 256;
2376        let readback = device.create_buffer(&wgpu::BufferDescriptor {
2377            label: Some("roxlap-gpu headless.readback"),
2378            size: u64::from(padded_bytes_per_row) * u64::from(height),
2379            usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
2380            mapped_at_creation: false,
2381        });
2382
2383        Self {
2384            width,
2385            height,
2386            output_tex,
2387            output_view,
2388            depth_buffer,
2389            uniform_buf,
2390            _sky_texture: sky_texture,
2391            sky_view,
2392            sky_sampler,
2393            bgl,
2394            pipeline,
2395            readback,
2396            padded_bytes_per_row,
2397        }
2398    }
2399
2400    /// Render `scene` from `cameras` (one per grid) and read the
2401    /// framebuffer back as `width*height` packed `0xAABBGGRR` pixels
2402    /// (R in the low byte). Fog is disabled. `mip_scan_dist` drives
2403    /// the GPU.11.1 scene-grid LOD (`0` = always mip-0). Blocks on
2404    /// readback.
2405    ///
2406    /// # Panics
2407    /// If `cameras.len() != scene.grid_count`.
2408    #[must_use]
2409    #[allow(clippy::too_many_arguments)]
2410    pub fn render(
2411        &self,
2412        device: &wgpu::Device,
2413        queue: &wgpu::Queue,
2414        scene: &GpuSceneResident,
2415        cameras: &[Camera],
2416        fov_y_rad: f32,
2417        max_outer_steps: u32,
2418        mip_scan_dist: f32,
2419    ) -> Vec<u32> {
2420        assert_eq!(
2421            cameras.len(),
2422            scene.grid_count as usize,
2423            "headless render: {} cameras for {} grids",
2424            cameras.len(),
2425            scene.grid_count,
2426        );
2427
2428        let mut cam_array = [SceneDdaPerGridCamera::zeroed(); SCENE_MAX_GRIDS];
2429        for (i, cam) in cameras.iter().enumerate() {
2430            cam_array[i] = SceneDdaPerGridCamera {
2431                pos: cam.position,
2432                _pad0: 0.0,
2433                right: cam.right,
2434                _pad1: 0.0,
2435                down: cam.down,
2436                _pad2: 0.0,
2437                forward: cam.forward,
2438                _pad3: 0.0,
2439            };
2440        }
2441        let uniform = SceneDdaUniform {
2442            fov_y_rad,
2443            grid_count: scene.grid_count,
2444            max_outer_steps,
2445            _pad0: 0,
2446            screen_size: [self.width, self.height],
2447            _pad1: [0; 2],
2448            cameras: cam_array,
2449            // Fog off: near/far past any reachable t → factor 0.
2450            fog_color: [0.0, 0.0, 0.0, 1.0e29],
2451            fog_far: 1.0e30,
2452            write_depth: 0,
2453            occ_page_words: scene.occupancy_page_words,
2454            occ_num_pages: scene.occupancy_num_pages,
2455            mip_scan_dist,
2456            _pad2: 0,
2457            _pad3: 0,
2458            _pad4: 0,
2459        };
2460        queue.write_buffer(&self.uniform_buf, 0, bytemuck::bytes_of(&uniform));
2461
2462        let bg = device.create_bind_group(&wgpu::BindGroupDescriptor {
2463            label: Some("roxlap-gpu headless.bg"),
2464            layout: &self.bgl,
2465            entries: &[
2466                wgpu::BindGroupEntry {
2467                    binding: 0,
2468                    resource: self.uniform_buf.as_entire_binding(),
2469                },
2470                wgpu::BindGroupEntry {
2471                    binding: 1,
2472                    resource: scene.occupancy_pages[0].as_entire_binding(),
2473                },
2474                wgpu::BindGroupEntry {
2475                    binding: 2,
2476                    resource: scene.all_color_offsets.as_entire_binding(),
2477                },
2478                wgpu::BindGroupEntry {
2479                    binding: 3,
2480                    resource: scene.all_colors.as_entire_binding(),
2481                },
2482                wgpu::BindGroupEntry {
2483                    binding: 4,
2484                    resource: scene.all_chunk_colors_base.as_entire_binding(),
2485                },
2486                wgpu::BindGroupEntry {
2487                    binding: 5,
2488                    resource: scene.all_chunk_occupancy.as_entire_binding(),
2489                },
2490                wgpu::BindGroupEntry {
2491                    binding: 6,
2492                    resource: scene.grid_static_meta.as_entire_binding(),
2493                },
2494                wgpu::BindGroupEntry {
2495                    binding: 7,
2496                    resource: scene.all_slot_chunk_idx.as_entire_binding(),
2497                },
2498                wgpu::BindGroupEntry {
2499                    binding: 8,
2500                    resource: wgpu::BindingResource::TextureView(&self.output_view),
2501                },
2502                wgpu::BindGroupEntry {
2503                    binding: 9,
2504                    resource: wgpu::BindingResource::TextureView(&self.sky_view),
2505                },
2506                wgpu::BindGroupEntry {
2507                    binding: 10,
2508                    resource: wgpu::BindingResource::Sampler(&self.sky_sampler),
2509                },
2510                wgpu::BindGroupEntry {
2511                    binding: 11,
2512                    resource: self.depth_buffer.as_entire_binding(),
2513                },
2514                wgpu::BindGroupEntry {
2515                    binding: 12,
2516                    resource: scene.occupancy_pages[1].as_entire_binding(),
2517                },
2518                wgpu::BindGroupEntry {
2519                    binding: 13,
2520                    resource: scene.occupancy_pages[2].as_entire_binding(),
2521                },
2522                wgpu::BindGroupEntry {
2523                    binding: 14,
2524                    resource: scene.occupancy_pages[3].as_entire_binding(),
2525                },
2526            ],
2527        });
2528
2529        let mut enc =
2530            device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
2531        {
2532            let mut pass = enc.begin_compute_pass(&wgpu::ComputePassDescriptor {
2533                label: Some("roxlap-gpu headless.pass"),
2534                timestamp_writes: None,
2535            });
2536            pass.set_pipeline(&self.pipeline);
2537            pass.set_bind_group(0, &bg, &[]);
2538            pass.dispatch_workgroups(self.width.div_ceil(8), self.height.div_ceil(8), 1);
2539        }
2540        enc.copy_texture_to_buffer(
2541            wgpu::ImageCopyTexture {
2542                texture: &self.output_tex,
2543                mip_level: 0,
2544                origin: wgpu::Origin3d::ZERO,
2545                aspect: wgpu::TextureAspect::All,
2546            },
2547            wgpu::ImageCopyBuffer {
2548                buffer: &self.readback,
2549                layout: wgpu::ImageDataLayout {
2550                    offset: 0,
2551                    bytes_per_row: Some(self.padded_bytes_per_row),
2552                    rows_per_image: Some(self.height),
2553                },
2554            },
2555            wgpu::Extent3d {
2556                width: self.width,
2557                height: self.height,
2558                depth_or_array_layers: 1,
2559            },
2560        );
2561        queue.submit(Some(enc.finish()));
2562
2563        let slice = self.readback.slice(..);
2564        let (tx, rx) = std::sync::mpsc::channel();
2565        slice.map_async(wgpu::MapMode::Read, move |r| {
2566            let _ = tx.send(r);
2567        });
2568        device.poll(wgpu::Maintain::Wait);
2569        rx.recv().expect("map_async channel").expect("map_async");
2570
2571        let data = slice.get_mapped_range();
2572        let mut out = Vec::with_capacity((self.width * self.height) as usize);
2573        let pitch = self.padded_bytes_per_row as usize;
2574        for y in 0..self.height as usize {
2575            let row = &data[y * pitch..y * pitch + self.width as usize * 4];
2576            for px in row.chunks_exact(4) {
2577                out.push(
2578                    u32::from(px[0])
2579                        | (u32::from(px[1]) << 8)
2580                        | (u32::from(px[2]) << 16)
2581                        | (u32::from(px[3]) << 24),
2582                );
2583            }
2584        }
2585        drop(data);
2586        self.readback.unmap();
2587        out
2588    }
2589}
2590
2591fn bgl_uniform_entry(binding: u32) -> wgpu::BindGroupLayoutEntry {
2592    wgpu::BindGroupLayoutEntry {
2593        binding,
2594        visibility: wgpu::ShaderStages::COMPUTE,
2595        ty: wgpu::BindingType::Buffer {
2596            ty: wgpu::BufferBindingType::Uniform,
2597            has_dynamic_offset: false,
2598            min_binding_size: None,
2599        },
2600        count: None,
2601    }
2602}
2603
2604fn bgl_storage_entry(binding: u32, read_only: bool) -> wgpu::BindGroupLayoutEntry {
2605    wgpu::BindGroupLayoutEntry {
2606        binding,
2607        visibility: wgpu::ShaderStages::COMPUTE,
2608        ty: wgpu::BindingType::Buffer {
2609            ty: wgpu::BufferBindingType::Storage { read_only },
2610            has_dynamic_offset: false,
2611            min_binding_size: None,
2612        },
2613        count: None,
2614    }
2615}
2616
2617/// Create a fresh sky panorama texture sized `width × height` with
2618/// the initial pixel data uploaded via `write_texture`. Used by
2619/// `GpuRenderer::new` (1×1 default) and `set_sky_panorama` (host-
2620/// supplied panorama).
2621fn create_sky_texture(
2622    device: &wgpu::Device,
2623    width: u32,
2624    height: u32,
2625    _initial_pixels: &[u8],
2626) -> (wgpu::Texture, wgpu::TextureView) {
2627    let tex = device.create_texture(&wgpu::TextureDescriptor {
2628        label: Some("roxlap-gpu sky_texture"),
2629        size: wgpu::Extent3d {
2630            width,
2631            height,
2632            depth_or_array_layers: 1,
2633        },
2634        mip_level_count: 1,
2635        sample_count: 1,
2636        dimension: wgpu::TextureDimension::D2,
2637        format: wgpu::TextureFormat::Rgba8Unorm,
2638        usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST,
2639        view_formats: &[],
2640    });
2641    let view = tex.create_view(&wgpu::TextureViewDescriptor::default());
2642    (tex, view)
2643}
2644
2645/// GPU.4 needs to upload a whole grid (~hundreds of MiB) as a few
2646/// storage buffers. wgpu's default `max_storage_buffer_binding_size`
2647/// is 128 MiB, which is just enough for the demo's 32×32 ground
2648/// occupancy (~128 MiB) but not the colour array. We request as
2649/// much as the adapter is willing to give — most desktop GPUs cap
2650/// individual storage buffers at 2-4 GiB; iGPUs often offer the
2651/// full system memory.
2652pub(crate) fn pick_required_limits(adapter_limits: &wgpu::Limits) -> wgpu::Limits {
2653    wgpu::Limits {
2654        max_storage_buffer_binding_size: adapter_limits.max_storage_buffer_binding_size,
2655        max_buffer_size: adapter_limits.max_buffer_size,
2656        // Occupancy paging adds up to MAX_OCC_PAGES-1 extra storage
2657        // bindings; with the scene's other buffers + the GPU.9 depth
2658        // buffer the scene_dda stage needs ~11. The default cap is 8.
2659        // Both NVK and lavapipe advertise ≫16, so request 16.
2660        max_storage_buffers_per_shader_stage: adapter_limits
2661            .max_storage_buffers_per_shader_stage
2662            .min(16),
2663        ..wgpu::Limits::default()
2664    }
2665}
2666
2667fn pick_present_mode(modes: &[wgpu::PresentMode]) -> wgpu::PresentMode {
2668    // Prefer Mailbox > Immediate > Fifo. Fifo is the universal
2669    // fallback and the only one Wayland-on-Mesa always offers.
2670    for &m in &[wgpu::PresentMode::Mailbox, wgpu::PresentMode::Immediate] {
2671        if modes.contains(&m) {
2672            return m;
2673        }
2674    }
2675    wgpu::PresentMode::Fifo
2676}
2677
2678/// World-space view-ray direction (un-normalised) for window pixel
2679/// `(x, y)` under a vertical-FOV pinhole — the projection
2680/// `scene_dda.wgsl`'s `render_scene` uses. Shared by
2681/// [`GpuRenderer::pixel_ray`]; standalone so it's unit-testable without
2682/// a device. `right`/`down`/`forward` are the camera basis.
2683#[must_use]
2684#[allow(clippy::too_many_arguments)]
2685pub fn pinhole_pixel_ray(
2686    right: [f64; 3],
2687    down: [f64; 3],
2688    forward: [f64; 3],
2689    x: f64,
2690    y: f64,
2691    w: f64,
2692    h: f64,
2693    fov_y_rad: f64,
2694) -> [f64; 3] {
2695    let half_h = (fov_y_rad * 0.5).tan();
2696    let half_w = half_h * (w / h);
2697    let ndc_x = (x + 0.5) / w * 2.0 - 1.0;
2698    let ndc_y_top = 1.0 - (y + 0.5) / h * 2.0;
2699    let (kx, ky) = (ndc_x * half_w, ndc_y_top * half_h);
2700    [
2701        forward[0] + kx * right[0] - ky * down[0],
2702        forward[1] + kx * right[1] - ky * down[1],
2703        forward[2] + kx * right[2] - ky * down[2],
2704    ]
2705}
2706
2707#[cfg(test)]
2708mod pixel_ray_tests {
2709    use super::pinhole_pixel_ray;
2710
2711    const RIGHT: [f64; 3] = [1.0, 0.0, 0.0];
2712    const DOWN: [f64; 3] = [0.0, 1.0, 0.0];
2713    const FWD: [f64; 3] = [0.0, 0.0, 1.0]; // voxlap z-down "look down"
2714
2715    // Frame centre (NDC 0,0) points straight along `forward`.
2716    #[test]
2717    fn centre_pixel_is_forward() {
2718        let d = pinhole_pixel_ray(
2719            RIGHT,
2720            DOWN,
2721            FWD,
2722            639.5,
2723            359.5,
2724            1280.0,
2725            720.0,
2726            60_f64.to_radians(),
2727        );
2728        assert!(
2729            d[0].abs() < 1e-9 && d[1].abs() < 1e-9,
2730            "centre ≈ forward, got {d:?}"
2731        );
2732        assert!((d[2] - 1.0).abs() < 1e-9);
2733    }
2734
2735    // Right edge pixel tilts +right by tan(hfov/2); the lateral
2736    // component equals half_w = tan(fov_y/2)*aspect at the very edge.
2737    #[test]
2738    fn right_edge_tilts_by_half_w() {
2739        let fov = 60_f64.to_radians();
2740        let d = pinhole_pixel_ray(RIGHT, DOWN, FWD, 1279.5, 359.5, 1280.0, 720.0, fov);
2741        let half_w = (fov * 0.5).tan() * (1280.0 / 720.0);
2742        assert!((d[0] - half_w).abs() < 1e-6, "x={}, half_w={half_w}", d[0]);
2743        assert!(d[0] > 0.0, "right edge tilts +right");
2744    }
2745}