Skip to main content

roxlap_gpu/
lib.rs

1//! WGPU-backed compute-shader renderer scaffold for the roxlap
2//! voxel engine. GPU.1 in `PORTING-GPU.md`.
3//!
4//! GPU.1's job: stand up the device + surface + swapchain on a
5//! winit window, present a clear-to-colour frame each render call,
6//! and give the host a one-call opt-in. No voxel marching yet — the
7//! [`examples/probe.rs`](../examples/probe.rs) standalone holds
8//! the empirical FPS baseline from GPU.0.
9//!
10//! Later sub-substages flesh `GpuRenderer::render` out: GPU.2
11//! uploads voxel data, GPU.3 dispatches the inner-DDA compute
12//! shader, GPU.4 layers in chunk skipping, GPU.5 plugs the renderer
13//! into `roxlap-scene::Scene`, …
14//!
15//! ## Host integration shape (GPU.1)
16//!
17//! ```no_run
18//! use std::sync::Arc;
19//! use roxlap_gpu::{GpuRenderer, GpuRendererSettings};
20//! # use winit::window::Window;
21//! # fn pick(w: Arc<Window>) -> Option<GpuRenderer> {
22//! match GpuRenderer::new_blocking(w, GpuRendererSettings::default()) {
23//!     Ok(r) => Some(r),
24//!     Err(e) => {
25//!         eprintln!("GPU init failed: {e}; falling back to CPU");
26//!         None
27//!     }
28//! }
29//! # }
30//! ```
31
32#![allow(clippy::must_use_candidate, clippy::too_many_lines)]
33
34pub mod camera;
35pub mod decompress;
36pub mod grid;
37pub mod headless;
38pub mod resident;
39pub mod scene;
40pub mod sprite_model;
41
42pub use camera::Camera;
43pub use decompress::{decompress_chunk, ChunkUpload, BEDROCK_RGB, CHUNK_Z};
44pub use grid::{bounding_box_of, GpuGridResident, GridUpload};
45pub use headless::HeadlessGpu;
46pub use resident::GpuChunkResident;
47pub use scene::{
48    GpuSceneResident, GridRuntimeTransform, GridStaticMeta, RefreshOutcome, SceneUpload,
49    MAX_SCENE_GRIDS,
50};
51pub use sprite_model::{
52    build_sprite_model, SpriteInstance, SpriteInstanceTransform, SpriteModel, SpriteModelRegistry,
53    SpriteRegistryResident,
54};
55
56use std::sync::Arc;
57
58use bytemuck::{Pod, Zeroable};
59use winit::window::Window;
60
61/// Caller-controllable knobs for [`GpuRenderer::new`]. Defaults
62/// target "highest-performance GPU, prefer Mailbox/Immediate over
63/// vsync" — i.e. the same configuration the GPU.0 probe used to
64/// measure the FPS ceiling.
65#[derive(Debug, Clone, Copy)]
66pub struct GpuRendererSettings {
67    pub power_preference: PowerPreference,
68    /// Initial clear colour cycled by GPU.1's empty render path.
69    /// The voxel-rendering substages overwrite this entirely.
70    pub clear_colour: [f64; 3],
71    /// Prefer mailbox/immediate when offered; falls back to FIFO if
72    /// the surface only supports it (Wayland under Mesa often does).
73    pub uncapped_present: bool,
74}
75
76#[derive(Debug, Clone, Copy)]
77pub enum PowerPreference {
78    Low,
79    High,
80}
81
82impl Default for GpuRendererSettings {
83    fn default() -> Self {
84        Self {
85            power_preference: PowerPreference::High,
86            clear_colour: [0.06, 0.08, 0.12],
87            uncapped_present: true,
88        }
89    }
90}
91
92/// Errors `GpuRenderer::new` surfaces to the host. The host's
93/// expected flow is "try this, fall back to the CPU path on Err".
94#[derive(Debug)]
95pub enum GpuInitError {
96    CreateSurface(wgpu::CreateSurfaceError),
97    NoAdapter,
98    RequestDevice(wgpu::RequestDeviceError),
99}
100
101impl std::fmt::Display for GpuInitError {
102    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
103        match self {
104            Self::CreateSurface(e) => write!(f, "create_surface failed: {e}"),
105            Self::NoAdapter => write!(
106                f,
107                "no compatible adapter — does this system have a Vulkan/Metal/DX12 driver?"
108            ),
109            Self::RequestDevice(e) => write!(f, "request_device failed: {e}"),
110        }
111    }
112}
113
114impl std::error::Error for GpuInitError {
115    fn source(&self) -> Option<&(dyn std::error::Error + 'static)> {
116        match self {
117            Self::CreateSurface(e) => Some(e),
118            Self::RequestDevice(e) => Some(e),
119            Self::NoAdapter => None,
120        }
121    }
122}
123
124impl From<wgpu::CreateSurfaceError> for GpuInitError {
125    fn from(value: wgpu::CreateSurfaceError) -> Self {
126        Self::CreateSurface(value)
127    }
128}
129
130impl From<wgpu::RequestDeviceError> for GpuInitError {
131    fn from(value: wgpu::RequestDeviceError) -> Self {
132        Self::RequestDevice(value)
133    }
134}
135
136/// WGPU-backed renderer. Owns the device, queue, and surface
137/// bound to the host's winit window. [`Self::render`] is the GPU.1
138/// clear-to-colour path; [`Self::render_chunk`] is GPU.3's
139/// single-chunk DDA marcher.
140pub struct GpuRenderer {
141    window: Arc<Window>,
142    surface: wgpu::Surface<'static>,
143    surface_config: wgpu::SurfaceConfiguration,
144    device: wgpu::Device,
145    queue: wgpu::Queue,
146    adapter_info: String,
147    clear_colour: [f64; 3],
148    frame_count: u32,
149    /// Lazy-built on first [`Self::render_chunk`] call; rebuilt when
150    /// the swapchain resizes (storage texture must match).
151    chunk_dda: Option<ChunkDdaResources>,
152    /// Lazy-built on first [`Self::render_grid`] call; same resize
153    /// trigger as `chunk_dda`. The two paths share the same blit
154    /// pipeline structure but bind different storage layouts.
155    grid_dda: Option<GridDdaResources>,
156    /// Lazy-built on first [`Self::render_scene`] call. Holds the
157    /// multi-grid pipeline + per-grid camera uniforms.
158    scene_dda: Option<SceneDdaResources>,
159    /// GPU.8 — panoramic sky texture + sampler. Created at
160    /// `new` as a 1×1 mid-grey default; [`Self::set_sky_panorama`]
161    /// replaces it. The scene-DDA bind group references this each
162    /// frame.
163    sky_texture: wgpu::Texture,
164    sky_view: wgpu::TextureView,
165    sky_sampler: wgpu::Sampler,
166    /// GPU.8 fog state. `color` is BGRA-style premultiplied (each
167    /// channel in [0, 1]); `near` is the world-t distance at which
168    /// fog starts kicking in; `far` is the distance at which it's
169    /// fully opaque. The shader does
170    /// `mix(hit, fog, smoothstep(near, far, t))`.
171    fog_color: [f32; 3],
172    fog_near: f32,
173    fog_far: f32,
174    /// GPU.10 — sprites rendered as DDA-marched voxel models (the
175    /// precise path; the GPU.9 compute splatter it replaced was
176    /// retired in 10.5). Holds the concatenated model registry + the
177    /// per-frame instance array; set via [`Self::set_sprite_instances`].
178    sprite_registry: Option<sprite_model::SpriteRegistryResident>,
179    /// Lazy-built pipeline + uniform for the model-DDA pass.
180    sprite_model_dda: Option<SpriteModelDdaResources>,
181    /// GPU.10.4 — LOD aggressiveness: step a sprite to the next mip
182    /// once a mip-0 voxel projects below this many screen pixels.
183    /// Defaults to 4.0 (the empirical sweet spot); the host can tune
184    /// via [`Self::set_sprite_lod_px`].
185    sprite_lod_px: f32,
186    /// GPU.11.1 — scene-grid LOD scan distance (world units). A chunk
187    /// entered at world-t `t` is marched at the mip level
188    /// `floor(log2(max(t, msd) / msd))`, clamped to the grid's mip
189    /// ladder. `0` disables LOD (always mip-0). Tunable via
190    /// [`Self::set_scene_mip_scan_dist`] — the axis-aligned-mip-beams
191    /// mitigation (GPU.11.2) pushes it outward if banding appears.
192    scene_mip_scan_dist: f32,
193    /// Vertical FOV (radians) the last `render_scene` marched with —
194    /// cached so [`Self::pixel_ray`] reconstructs the matching view ray
195    /// for picking. `0` until the first scene render.
196    last_fov_y_rad: f32,
197}
198
199/// Per-renderer chunk-DDA pipeline state. The compute shader writes
200/// into the storage texture; a fullscreen-triangle render pass
201/// nearest-neighbour blits it to the swapchain.
202struct ChunkDdaResources {
203    storage_size: (u32, u32),
204    storage_view: wgpu::TextureView,
205    uniform_buf: wgpu::Buffer,
206    bgl_dda: wgpu::BindGroupLayout,
207    pipeline_dda: wgpu::ComputePipeline,
208    blit_bg: wgpu::BindGroup,
209    pipeline_blit: wgpu::RenderPipeline,
210    // wgpu BindGroups internally Arc their resources, but we keep
211    // the handle so the sampler shows up in profiler dumps.
212    _sampler: wgpu::Sampler,
213}
214
215struct GridDdaResources {
216    storage_size: (u32, u32),
217    storage_view: wgpu::TextureView,
218    uniform_buf: wgpu::Buffer,
219    bgl_dda: wgpu::BindGroupLayout,
220    pipeline_dda: wgpu::ComputePipeline,
221    blit_bg: wgpu::BindGroup,
222    pipeline_blit: wgpu::RenderPipeline,
223    _sampler: wgpu::Sampler,
224}
225
226struct SceneDdaResources {
227    storage_size: (u32, u32),
228    storage_view: wgpu::TextureView,
229    uniform_buf: wgpu::Buffer,
230    bgl_dda: wgpu::BindGroupLayout,
231    pipeline_dda: wgpu::ComputePipeline,
232    blit_bg: wgpu::BindGroup,
233    pipeline_blit: wgpu::RenderPipeline,
234    _sampler: wgpu::Sampler,
235    /// GPU.9 — per-pixel world-t depth (f32 bits as u32), sized
236    /// `width * height * 4`. The scene pass writes it when sprites
237    /// are present; the sprite model-DDA pass reads + composites
238    /// against it.
239    depth_buffer: wgpu::Buffer,
240    /// Picking — a `COPY_DST | MAP_READ` staging copy of `depth_buffer`
241    /// so the host can read back the per-pixel world-t after a frame
242    /// (e.g. click → which voxel). Same size as `depth_buffer`.
243    depth_readback: wgpu::Buffer,
244}
245
246/// GPU.10.0 — single-sprite model-DDA pipeline: one thread per pixel
247/// marches the model voxel volume and composites against the scene
248/// depth buffer.
249struct SpriteModelDdaResources {
250    bgl: wgpu::BindGroupLayout,
251    pipeline: wgpu::ComputePipeline,
252    uniform_buf: wgpu::Buffer,
253}
254
255/// Per-frame uniform for the model-DDA pass. Mirrors `Uniform` in
256/// `sprite_model_dda.wgsl` (std140). Per-model + per-instance data
257/// now live in storage buffers; this holds only the camera, fog, and
258/// instance count.
259#[repr(C)]
260#[derive(Clone, Copy, Pod, Zeroable)]
261struct SpriteModelUniform {
262    cam_pos: [f32; 3],
263    _p0: f32,
264    cam_right: [f32; 3],
265    _p1: f32,
266    cam_down: [f32; 3],
267    _p2: f32,
268    cam_forward: [f32; 3],
269    _p3: f32,
270    fog_color: [f32; 4],
271    screen_size: [u32; 2],
272    instance_count: u32,
273    fog_far: f32,
274    fov_y_rad: f32,
275    tiles_x: u32,
276    tile_size: u32,
277    _p6: f32,
278}
279
280const SCENE_MAX_GRIDS: usize = MAX_SCENE_GRIDS as usize;
281
282/// GPU.10.3 — sprite screen-tile edge in pixels for instance binning.
283const SPRITE_TILE_SIZE: u32 = 16;
284
285// The scene_dda bind group + layout wire occupancy pages 1..=3 at
286// bindings 12..=14 explicitly; keep that in lockstep with the page
287// count. Bump the bindings (here, in the WGSL, and in the bind
288// group) if MAX_OCC_PAGES changes.
289const _: () = assert!(scene::MAX_OCC_PAGES == 4);
290
291#[repr(C)]
292#[derive(Clone, Copy, Pod, Zeroable)]
293struct SceneDdaPerGridCamera {
294    pos: [f32; 3],
295    _pad0: f32,
296    right: [f32; 3],
297    _pad1: f32,
298    down: [f32; 3],
299    _pad2: f32,
300    forward: [f32; 3],
301    _pad3: f32,
302}
303
304#[repr(C)]
305#[derive(Clone, Copy, Pod, Zeroable)]
306struct SceneDdaUniform {
307    fov_y_rad: f32,
308    grid_count: u32,
309    max_outer_steps: u32,
310    _pad0: u32,
311    screen_size: [u32; 2],
312    _pad1: [u32; 2],
313    cameras: [SceneDdaPerGridCamera; SCENE_MAX_GRIDS],
314    /// GPU.8 — `[r, g, b, fog_near]`. The `near` distance is packed
315    /// into the colour's alpha channel to keep std140 alignment
316    /// tidy (a bare `f32` after the `vec4` would force extra pads).
317    fog_color: [f32; 4],
318    fog_far: f32,
319    /// GPU.9 — `1` when the sprite pass is active (scene pass then
320    /// records `best_t` into the depth buffer), `0` otherwise.
321    write_depth: u32,
322    /// Occupancy paging: words per storage page (see
323    /// `scene::split_occupancy_pages`). Only consulted by the shader
324    /// when `occ_num_pages > 1`.
325    occ_page_words: u32,
326    /// Number of real occupancy pages (1 on multi-GiB GPUs → the
327    /// shader takes a branch-free single-page read).
328    occ_num_pages: u32,
329    /// GPU.11.1 — scene-grid LOD scan distance (world units). A chunk
330    /// entered at world-t `t` marches at mip
331    /// `floor(log2(max(t, msd) / msd))`, clamped to the grid's mip
332    /// count. `0` disables LOD (always mip-0).
333    mip_scan_dist: f32,
334    _pad2: u32,
335    _pad3: u32,
336    _pad4: u32,
337}
338
339#[repr(C)]
340#[derive(Clone, Copy, Pod, Zeroable)]
341struct GridDdaUniform {
342    camera_pos: [f32; 3],
343    _pad0: f32,
344    camera_right: [f32; 3],
345    _pad1: f32,
346    camera_down: [f32; 3],
347    _pad2: f32,
348    camera_forward: [f32; 3],
349    fov_y_rad: f32,
350    screen_size: [u32; 2],
351    vsid: u32,
352    max_outer_steps: u32,
353    chunks_dims: [u32; 3],
354    _pad3: u32,
355    origin_chunk: [i32; 3],
356    _pad4: u32,
357}
358
359#[repr(C)]
360#[derive(Clone, Copy, Pod, Zeroable)]
361struct ChunkDdaUniform {
362    camera_pos: [f32; 3],
363    _pad0: f32,
364    camera_right: [f32; 3],
365    _pad1: f32,
366    camera_down: [f32; 3],
367    _pad2: f32,
368    camera_forward: [f32; 3],
369    fov_y_rad: f32,
370    screen_size: [u32; 2],
371    vsid: u32,
372    max_scan_dist: u32,
373}
374
375impl GpuRenderer {
376    /// Stand up the device + surface + swapchain on `window`. Async
377    /// because `wgpu::Adapter`/`Device` requests are.
378    ///
379    /// # Errors
380    /// Returns [`GpuInitError`] if surface creation, adapter
381    /// selection, or device request fails. Hosts treat any error as
382    /// "fall back to the CPU path".
383    pub async fn new(
384        window: Arc<Window>,
385        settings: GpuRendererSettings,
386    ) -> Result<Self, GpuInitError> {
387        let instance = wgpu::Instance::new(wgpu::InstanceDescriptor::default());
388        let surface = instance.create_surface(window.clone())?;
389        let power_preference = match settings.power_preference {
390            PowerPreference::Low => wgpu::PowerPreference::LowPower,
391            PowerPreference::High => wgpu::PowerPreference::HighPerformance,
392        };
393        let adapter = instance
394            .request_adapter(&wgpu::RequestAdapterOptions {
395                power_preference,
396                compatible_surface: Some(&surface),
397                force_fallback_adapter: false,
398            })
399            .await
400            .ok_or(GpuInitError::NoAdapter)?;
401
402        let info = adapter.get_info();
403        let adapter_info = format!(
404            "{name} ({backend:?}, {device_type:?})",
405            name = info.name,
406            backend = info.backend,
407            device_type = info.device_type,
408        );
409
410        let (device, queue) = adapter
411            .request_device(
412                &wgpu::DeviceDescriptor {
413                    label: Some("roxlap-gpu device"),
414                    required_features: wgpu::Features::empty(),
415                    required_limits: pick_required_limits(&adapter.limits()),
416                    memory_hints: wgpu::MemoryHints::default(),
417                },
418                None,
419            )
420            .await?;
421
422        let caps = surface.get_capabilities(&adapter);
423        // Pick a NON-sRGB swapchain format. Voxlap colours are
424        // already sRGB-encoded (the slab bytes are display-ready,
425        // matching what the CPU softbuffer path writes straight to
426        // the framebuffer with no conversion). An sRGB swapchain
427        // would re-apply the gamma curve on top, producing a
428        // washed-out / pastel look that diverges from the CPU
429        // renderer. Falls back to `caps.formats[0]` only if every
430        // offered format is sRGB.
431        let surface_format = caps
432            .formats
433            .iter()
434            .copied()
435            .find(|f| !f.is_srgb())
436            .unwrap_or(caps.formats[0]);
437        let present_mode = if settings.uncapped_present {
438            pick_present_mode(&caps.present_modes)
439        } else {
440            wgpu::PresentMode::Fifo
441        };
442        // GPU.11.2 — surface the present mode: `Fifo` is vsync-capped
443        // (FPS pinned to refresh rate → compute optimisations like the
444        // mip LOD won't show up in the FPS counter). Mailbox/Immediate
445        // are uncapped. Wayland under Mesa frequently offers only Fifo.
446        eprintln!(
447            "roxlap-gpu: present mode = {present_mode:?} (available: {:?})",
448            caps.present_modes,
449        );
450        let physical = window.inner_size();
451        let surface_config = wgpu::SurfaceConfiguration {
452            usage: wgpu::TextureUsages::RENDER_ATTACHMENT,
453            format: surface_format,
454            width: physical.width.max(1),
455            height: physical.height.max(1),
456            present_mode,
457            alpha_mode: caps.alpha_modes[0],
458            view_formats: vec![],
459            desired_maximum_frame_latency: 2,
460        };
461        surface.configure(&device, &surface_config);
462
463        // GPU.8 default sky: a 1×1 mid-grey texture. Hosts replace
464        // it via `set_sky_panorama` with a real equirectangular
465        // panorama; the default stops the shader sampling
466        // uninitialised memory before that happens.
467        let default_sky_pixel = [0x80u8, 0x80, 0x80, 0xff];
468        let (sky_texture, sky_view) = create_sky_texture(&device, 1, 1, &default_sky_pixel);
469        queue.write_texture(
470            wgpu::ImageCopyTexture {
471                texture: &sky_texture,
472                mip_level: 0,
473                origin: wgpu::Origin3d::ZERO,
474                aspect: wgpu::TextureAspect::All,
475            },
476            &default_sky_pixel,
477            wgpu::ImageDataLayout {
478                offset: 0,
479                bytes_per_row: Some(4),
480                rows_per_image: Some(1),
481            },
482            wgpu::Extent3d {
483                width: 1,
484                height: 1,
485                depth_or_array_layers: 1,
486            },
487        );
488        let sky_sampler = device.create_sampler(&wgpu::SamplerDescriptor {
489            label: Some("roxlap-gpu sky_sampler"),
490            // Voxlap-convention panorama: u = elevation [0, 1]
491            // (Repeat is a no-op since values don't go outside),
492            // v = azimuth (wraps 360° — Repeat is required).
493            address_mode_u: wgpu::AddressMode::Repeat,
494            address_mode_v: wgpu::AddressMode::Repeat,
495            address_mode_w: wgpu::AddressMode::ClampToEdge,
496            mag_filter: wgpu::FilterMode::Linear,
497            min_filter: wgpu::FilterMode::Linear,
498            mipmap_filter: wgpu::FilterMode::Nearest,
499            ..Default::default()
500        });
501
502        Ok(Self {
503            window,
504            surface,
505            surface_config,
506            device,
507            queue,
508            adapter_info,
509            clear_colour: settings.clear_colour,
510            frame_count: 0,
511            chunk_dda: None,
512            grid_dda: None,
513            scene_dda: None,
514            sky_texture,
515            sky_view,
516            sky_sampler,
517            // Fog disabled by default — voxlap's CPU rasterizer
518            // also runs without fog in the scene-demo, so matching
519            // it means no GPU fog out of the box. Hosts can opt in
520            // via `set_fog` (e.g. for atmospheric far-LOD masking).
521            fog_color: [0.66, 0.74, 0.88],
522            fog_near: 0.0,
523            fog_far: 1.0e30,
524            sprite_registry: None,
525            sprite_model_dda: None,
526            // GPU.10.4 — default LOD threshold: step to a coarser mip
527            // once a voxel projects below 4 px. Empirically the best
528            // quality/cost tradeoff; the host can override.
529            sprite_lod_px: 4.0,
530            // GPU.11.1 — matches the CPU demo's mip_scan_dist=64.
531            scene_mip_scan_dist: 64.0,
532            last_fov_y_rad: 0.0,
533        })
534    }
535
536    /// Synchronous wrapper for hosts that don't have an async
537    /// runtime. Internally `pollster::block_on`s [`Self::new`].
538    ///
539    /// # Errors
540    /// See [`Self::new`].
541    pub fn new_blocking(
542        window: Arc<Window>,
543        settings: GpuRendererSettings,
544    ) -> Result<Self, GpuInitError> {
545        pollster::block_on(Self::new(window, settings))
546    }
547
548    /// Human-readable adapter description — name + backend +
549    /// device type. The demo host prints this in the title bar.
550    pub fn adapter_info(&self) -> &str {
551        &self.adapter_info
552    }
553
554    pub fn window(&self) -> &Window {
555        &self.window
556    }
557
558    /// Borrow the underlying wgpu device — hosts use this to build
559    /// chunk uploads (`GpuChunkResident::upload(gpu.device(), …)`).
560    pub fn device(&self) -> &wgpu::Device {
561        &self.device
562    }
563
564    /// Borrow the wgpu queue — hosts use this for read-back paths
565    /// (`GpuChunkResident::read_voxel_blocking(gpu.device(), gpu.queue(), …)`).
566    pub fn queue(&self) -> &wgpu::Queue {
567        &self.queue
568    }
569
570    /// GPU.8 — upload an equirectangular panorama as the scene's
571    /// sky texture. `rgba` is row-major, `width × height` pixels,
572    /// 4 bytes per pixel (R, G, B, A). The shader samples it with
573    /// `u = atan2(dir.x, dir.y) / (2π) + 0.5` (azimuth) and
574    /// `v = acos(-dir.z) / π` (elevation), matching standard
575    /// equirectangular layout (top of image = zenith for voxlap's
576    /// `+z = down` basis).
577    ///
578    /// # Panics
579    /// If `rgba.len() != (width * height * 4) as usize`.
580    pub fn set_sky_panorama(&mut self, rgba: &[u8], width: u32, height: u32) {
581        assert_eq!(
582            rgba.len(),
583            (width as usize) * (height as usize) * 4,
584            "set_sky_panorama: expected w*h*4 bytes, got {}",
585            rgba.len(),
586        );
587        let (tex, view) = create_sky_texture(&self.device, width, height, rgba);
588        // Upload pixel data via `queue.write_texture` so we don't
589        // have to map the buffer manually.
590        self.queue.write_texture(
591            wgpu::ImageCopyTexture {
592                texture: &tex,
593                mip_level: 0,
594                origin: wgpu::Origin3d::ZERO,
595                aspect: wgpu::TextureAspect::All,
596            },
597            rgba,
598            wgpu::ImageDataLayout {
599                offset: 0,
600                bytes_per_row: Some(width * 4),
601                rows_per_image: Some(height),
602            },
603            wgpu::Extent3d {
604                width,
605                height,
606                depth_or_array_layers: 1,
607            },
608        );
609        self.sky_texture = tex;
610        self.sky_view = view;
611    }
612
613    /// GPU.8 — set the fog blend. `color` is per-channel [0, 1];
614    /// `near`/`far` are world-space ray distances in voxel units.
615    /// Hits with `t < near` show their full colour; hits with
616    /// `t > far` show `color` exclusively; in between is a
617    /// smoothstep blend.
618    pub fn set_fog(&mut self, color: [f32; 3], near: f32, far: f32) {
619        self.fog_color = color;
620        self.fog_near = near;
621        self.fog_far = far.max(near + 1.0);
622    }
623
624    /// Re-configure the swapchain to a new physical size. Call from
625    /// `WindowEvent::Resized`. Drops the chunk-DDA storage texture
626    /// so [`Self::render_chunk`] rebuilds it at the new size.
627    pub fn resize(&mut self, width: u32, height: u32) {
628        if width == 0 || height == 0 {
629            return;
630        }
631        self.surface_config.width = width;
632        self.surface_config.height = height;
633        self.surface.configure(&self.device, &self.surface_config);
634        self.chunk_dda = None;
635        self.grid_dda = None;
636        self.scene_dda = None;
637    }
638
639    /// GPU.1 render: single render pass clearing the swapchain to a
640    /// slowly drifting colour, then presenting. Voxels arrive in
641    /// GPU.3+.
642    pub fn render(&mut self) {
643        let surf_tex = match self.surface.get_current_texture() {
644            Ok(t) => t,
645            Err(wgpu::SurfaceError::Outdated | wgpu::SurfaceError::Lost) => {
646                self.surface.configure(&self.device, &self.surface_config);
647                return;
648            }
649            Err(e) => {
650                eprintln!("roxlap-gpu surface error: {e:?}");
651                return;
652            }
653        };
654        let view = surf_tex
655            .texture
656            .create_view(&wgpu::TextureViewDescriptor::default());
657
658        // Slow colour drift so the user can tell the GPU path is
659        // actually presenting frames vs. e.g. a frozen window.
660        // Wrap at 2π/0.005 frames (~1257) so the cast stays exact.
661        let phase = f64::from(self.frame_count % 1257) * 0.005;
662        let [r, g, b] = self.clear_colour;
663        let drift = (phase.sin() * 0.04 + 0.04).clamp(0.0, 0.1);
664        let clear = wgpu::Color {
665            r: (r + drift).clamp(0.0, 1.0),
666            g: (g + drift * 0.5).clamp(0.0, 1.0),
667            b: (b + drift * 0.25).clamp(0.0, 1.0),
668            a: 1.0,
669        };
670
671        let mut encoder = self
672            .device
673            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
674                label: Some("roxlap-gpu encoder"),
675            });
676        {
677            let _rp = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
678                label: Some("roxlap-gpu clear"),
679                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
680                    view: &view,
681                    resolve_target: None,
682                    ops: wgpu::Operations {
683                        load: wgpu::LoadOp::Clear(clear),
684                        store: wgpu::StoreOp::Store,
685                    },
686                })],
687                depth_stencil_attachment: None,
688                timestamp_writes: None,
689                occlusion_query_set: None,
690            });
691        }
692        self.queue.submit(std::iter::once(encoder.finish()));
693        surf_tex.present();
694        self.frame_count = self.frame_count.wrapping_add(1);
695    }
696
697    /// GPU.3 single-chunk render. Dispatches `chunk_dda.wgsl`
698    /// against `resident`'s storage buffers, then blits the
699    /// low-res storage texture to the swapchain. `camera.position`
700    /// is in **chunk-local** voxel units (host translates from
701    /// world coords). `max_scan_dist` caps the per-pixel DDA loop —
702    /// scene-demo wires `+` / `-` through this each frame.
703    ///
704    /// # Panics
705    /// Internally `expect`s the chunk-DDA resources to be built —
706    /// they are constructed at the top of this function if missing.
707    /// Cannot fire in normal control flow.
708    pub fn render_chunk(
709        &mut self,
710        resident: &GpuChunkResident,
711        camera: &Camera,
712        max_scan_dist: u32,
713    ) {
714        let surf_tex = match self.surface.get_current_texture() {
715            Ok(t) => t,
716            Err(wgpu::SurfaceError::Outdated | wgpu::SurfaceError::Lost) => {
717                self.surface.configure(&self.device, &self.surface_config);
718                return;
719            }
720            Err(e) => {
721                eprintln!("roxlap-gpu surface error: {e:?}");
722                return;
723            }
724        };
725        let surf_view = surf_tex
726            .texture
727            .create_view(&wgpu::TextureViewDescriptor::default());
728
729        let surface_w = self.surface_config.width;
730        let surface_h = self.surface_config.height;
731        let surface_format = self.surface_config.format;
732
733        // Lazy-build chunk-DDA resources; rebuild when the swapchain
734        // grew or shrank.
735        let needs_build = match &self.chunk_dda {
736            Some(r) => r.storage_size != (surface_w, surface_h),
737            None => true,
738        };
739        if needs_build {
740            self.chunk_dda = Some(self.build_chunk_dda(surface_w, surface_h, surface_format));
741        }
742        let dda = self.chunk_dda.as_ref().expect("just built");
743
744        // Update uniforms.
745        let uniform = ChunkDdaUniform {
746            camera_pos: camera.position,
747            _pad0: 0.0,
748            camera_right: camera.right,
749            _pad1: 0.0,
750            camera_down: camera.down,
751            _pad2: 0.0,
752            camera_forward: camera.forward,
753            fov_y_rad: camera.fov_y_rad,
754            screen_size: [surface_w, surface_h],
755            vsid: resident.vsid,
756            max_scan_dist,
757        };
758        self.queue
759            .write_buffer(&dda.uniform_buf, 0, bytemuck::bytes_of(&uniform));
760
761        // Per-frame DDA bind group — references the chunk's buffers
762        // so we rebuild every frame (the resident can change between
763        // calls).
764        let dda_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
765            label: Some("roxlap-gpu chunk_dda.bg"),
766            layout: &dda.bgl_dda,
767            entries: &[
768                wgpu::BindGroupEntry {
769                    binding: 0,
770                    resource: dda.uniform_buf.as_entire_binding(),
771                },
772                wgpu::BindGroupEntry {
773                    binding: 1,
774                    resource: resident.occupancy.as_entire_binding(),
775                },
776                wgpu::BindGroupEntry {
777                    binding: 2,
778                    resource: resident.color_offsets.as_entire_binding(),
779                },
780                wgpu::BindGroupEntry {
781                    binding: 3,
782                    resource: resident.colors.as_entire_binding(),
783                },
784                wgpu::BindGroupEntry {
785                    binding: 4,
786                    resource: wgpu::BindingResource::TextureView(&dda.storage_view),
787                },
788            ],
789        });
790
791        let mut encoder = self
792            .device
793            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
794                label: Some("roxlap-gpu chunk encoder"),
795            });
796        {
797            let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
798                label: Some("roxlap-gpu chunk_dda compute"),
799                timestamp_writes: None,
800            });
801            cpass.set_pipeline(&dda.pipeline_dda);
802            cpass.set_bind_group(0, &dda_bg, &[]);
803            cpass.dispatch_workgroups(surface_w.div_ceil(8), surface_h.div_ceil(8), 1);
804        }
805        {
806            let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
807                label: Some("roxlap-gpu chunk_dda blit"),
808                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
809                    view: &surf_view,
810                    resolve_target: None,
811                    ops: wgpu::Operations {
812                        load: wgpu::LoadOp::Clear(wgpu::Color::BLACK),
813                        store: wgpu::StoreOp::Store,
814                    },
815                })],
816                depth_stencil_attachment: None,
817                timestamp_writes: None,
818                occlusion_query_set: None,
819            });
820            rpass.set_pipeline(&dda.pipeline_blit);
821            rpass.set_bind_group(0, &dda.blit_bg, &[]);
822            rpass.draw(0..3, 0..1);
823        }
824        self.queue.submit(std::iter::once(encoder.finish()));
825        surf_tex.present();
826        self.frame_count = self.frame_count.wrapping_add(1);
827    }
828
829    fn build_chunk_dda(
830        &self,
831        width: u32,
832        height: u32,
833        surface_format: wgpu::TextureFormat,
834    ) -> ChunkDdaResources {
835        let storage_tex = self.device.create_texture(&wgpu::TextureDescriptor {
836            label: Some("roxlap-gpu chunk_dda.storage"),
837            size: wgpu::Extent3d {
838                width,
839                height,
840                depth_or_array_layers: 1,
841            },
842            mip_level_count: 1,
843            sample_count: 1,
844            dimension: wgpu::TextureDimension::D2,
845            format: wgpu::TextureFormat::Rgba8Unorm,
846            usage: wgpu::TextureUsages::STORAGE_BINDING | wgpu::TextureUsages::TEXTURE_BINDING,
847            view_formats: &[],
848        });
849        let storage_view = storage_tex.create_view(&wgpu::TextureViewDescriptor::default());
850
851        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
852            label: Some("roxlap-gpu chunk_dda.uniform"),
853            size: std::mem::size_of::<ChunkDdaUniform>() as u64,
854            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
855            mapped_at_creation: false,
856        });
857
858        let dda_shader = self
859            .device
860            .create_shader_module(wgpu::ShaderModuleDescriptor {
861                label: Some("chunk_dda.wgsl"),
862                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/chunk_dda.wgsl").into()),
863            });
864        let bgl_dda = self
865            .device
866            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
867                label: Some("roxlap-gpu chunk_dda.bgl"),
868                entries: &[
869                    bgl_uniform_entry(0),
870                    bgl_storage_entry(1, true),
871                    bgl_storage_entry(2, true),
872                    bgl_storage_entry(3, true),
873                    wgpu::BindGroupLayoutEntry {
874                        binding: 4,
875                        visibility: wgpu::ShaderStages::COMPUTE,
876                        ty: wgpu::BindingType::StorageTexture {
877                            access: wgpu::StorageTextureAccess::WriteOnly,
878                            format: wgpu::TextureFormat::Rgba8Unorm,
879                            view_dimension: wgpu::TextureViewDimension::D2,
880                        },
881                        count: None,
882                    },
883                ],
884            });
885        let dda_pl = self
886            .device
887            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
888                label: Some("roxlap-gpu chunk_dda.layout"),
889                bind_group_layouts: &[&bgl_dda],
890                push_constant_ranges: &[],
891            });
892        let pipeline_dda = self
893            .device
894            .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
895                label: Some("roxlap-gpu chunk_dda.pipeline"),
896                layout: Some(&dda_pl),
897                module: &dda_shader,
898                entry_point: "render_chunk",
899                compilation_options: wgpu::PipelineCompilationOptions::default(),
900                cache: None,
901            });
902
903        // Fullscreen-triangle blit upscales the storage texture into
904        // the swapchain. Nearest filter keeps the retro pixel look.
905        let blit_shader = self
906            .device
907            .create_shader_module(wgpu::ShaderModuleDescriptor {
908                label: Some("blit.wgsl"),
909                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/blit.wgsl").into()),
910            });
911        let bgl_blit = self
912            .device
913            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
914                label: Some("roxlap-gpu chunk_dda.blit_bgl"),
915                entries: &[
916                    wgpu::BindGroupLayoutEntry {
917                        binding: 0,
918                        visibility: wgpu::ShaderStages::FRAGMENT,
919                        ty: wgpu::BindingType::Texture {
920                            sample_type: wgpu::TextureSampleType::Float { filterable: false },
921                            view_dimension: wgpu::TextureViewDimension::D2,
922                            multisampled: false,
923                        },
924                        count: None,
925                    },
926                    wgpu::BindGroupLayoutEntry {
927                        binding: 1,
928                        visibility: wgpu::ShaderStages::FRAGMENT,
929                        ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::NonFiltering),
930                        count: None,
931                    },
932                ],
933            });
934        let blit_pl = self
935            .device
936            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
937                label: Some("roxlap-gpu chunk_dda.blit_layout"),
938                bind_group_layouts: &[&bgl_blit],
939                push_constant_ranges: &[],
940            });
941        let pipeline_blit = self
942            .device
943            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
944                label: Some("roxlap-gpu chunk_dda.blit_pipeline"),
945                layout: Some(&blit_pl),
946                vertex: wgpu::VertexState {
947                    module: &blit_shader,
948                    entry_point: "vs_main",
949                    compilation_options: wgpu::PipelineCompilationOptions::default(),
950                    buffers: &[],
951                },
952                fragment: Some(wgpu::FragmentState {
953                    module: &blit_shader,
954                    entry_point: "fs_main",
955                    compilation_options: wgpu::PipelineCompilationOptions::default(),
956                    targets: &[Some(wgpu::ColorTargetState {
957                        format: surface_format,
958                        blend: None,
959                        write_mask: wgpu::ColorWrites::ALL,
960                    })],
961                }),
962                primitive: wgpu::PrimitiveState::default(),
963                depth_stencil: None,
964                multisample: wgpu::MultisampleState::default(),
965                multiview: None,
966                cache: None,
967            });
968        let sampler = self.device.create_sampler(&wgpu::SamplerDescriptor {
969            label: Some("roxlap-gpu chunk_dda.blit_sampler"),
970            address_mode_u: wgpu::AddressMode::ClampToEdge,
971            address_mode_v: wgpu::AddressMode::ClampToEdge,
972            address_mode_w: wgpu::AddressMode::ClampToEdge,
973            mag_filter: wgpu::FilterMode::Nearest,
974            min_filter: wgpu::FilterMode::Nearest,
975            mipmap_filter: wgpu::FilterMode::Nearest,
976            ..Default::default()
977        });
978        let blit_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
979            label: Some("roxlap-gpu chunk_dda.blit_bg"),
980            layout: &bgl_blit,
981            entries: &[
982                wgpu::BindGroupEntry {
983                    binding: 0,
984                    resource: wgpu::BindingResource::TextureView(&storage_view),
985                },
986                wgpu::BindGroupEntry {
987                    binding: 1,
988                    resource: wgpu::BindingResource::Sampler(&sampler),
989                },
990            ],
991        });
992
993        ChunkDdaResources {
994            storage_size: (width, height),
995            storage_view,
996            uniform_buf,
997            bgl_dda,
998            pipeline_dda,
999            blit_bg,
1000            pipeline_blit,
1001            _sampler: sampler,
1002        }
1003    }
1004
1005    /// GPU.4 render — outer DDA over chunk indices + inner DDA into
1006    /// non-empty chunks. `camera.position` is in **grid-local**
1007    /// voxel units. `max_outer_steps` caps how many chunks the
1008    /// outer DDA may traverse per ray (scene-demo wires `+ / -`
1009    /// through this).
1010    ///
1011    /// # Panics
1012    /// Internally `expect`s the grid-DDA resources to be built;
1013    /// they are constructed at the top of this function if missing.
1014    pub fn render_grid(&mut self, grid: &GpuGridResident, camera: &Camera, max_outer_steps: u32) {
1015        let surf_tex = match self.surface.get_current_texture() {
1016            Ok(t) => t,
1017            Err(wgpu::SurfaceError::Outdated | wgpu::SurfaceError::Lost) => {
1018                self.surface.configure(&self.device, &self.surface_config);
1019                return;
1020            }
1021            Err(e) => {
1022                eprintln!("roxlap-gpu surface error: {e:?}");
1023                return;
1024            }
1025        };
1026        let surf_view = surf_tex
1027            .texture
1028            .create_view(&wgpu::TextureViewDescriptor::default());
1029
1030        let surface_w = self.surface_config.width;
1031        let surface_h = self.surface_config.height;
1032        let surface_format = self.surface_config.format;
1033
1034        let needs_build = match &self.grid_dda {
1035            Some(r) => r.storage_size != (surface_w, surface_h),
1036            None => true,
1037        };
1038        if needs_build {
1039            self.grid_dda = Some(self.build_grid_dda(surface_w, surface_h, surface_format));
1040        }
1041        let dda = self.grid_dda.as_ref().expect("just built");
1042
1043        let uniform = GridDdaUniform {
1044            camera_pos: camera.position,
1045            _pad0: 0.0,
1046            camera_right: camera.right,
1047            _pad1: 0.0,
1048            camera_down: camera.down,
1049            _pad2: 0.0,
1050            camera_forward: camera.forward,
1051            fov_y_rad: camera.fov_y_rad,
1052            screen_size: [surface_w, surface_h],
1053            vsid: grid.vsid,
1054            max_outer_steps,
1055            chunks_dims: grid.chunks_dims,
1056            _pad3: 0,
1057            origin_chunk: grid.origin_chunk,
1058            _pad4: 0,
1059        };
1060        self.queue
1061            .write_buffer(&dda.uniform_buf, 0, bytemuck::bytes_of(&uniform));
1062
1063        let dda_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1064            label: Some("roxlap-gpu grid_dda.bg"),
1065            layout: &dda.bgl_dda,
1066            entries: &[
1067                wgpu::BindGroupEntry {
1068                    binding: 0,
1069                    resource: dda.uniform_buf.as_entire_binding(),
1070                },
1071                wgpu::BindGroupEntry {
1072                    binding: 1,
1073                    resource: grid.occupancy.as_entire_binding(),
1074                },
1075                wgpu::BindGroupEntry {
1076                    binding: 2,
1077                    resource: grid.color_offsets.as_entire_binding(),
1078                },
1079                wgpu::BindGroupEntry {
1080                    binding: 3,
1081                    resource: grid.colors.as_entire_binding(),
1082                },
1083                wgpu::BindGroupEntry {
1084                    binding: 4,
1085                    resource: grid.chunk_colors_base.as_entire_binding(),
1086                },
1087                wgpu::BindGroupEntry {
1088                    binding: 5,
1089                    resource: grid.chunk_occupancy.as_entire_binding(),
1090                },
1091                wgpu::BindGroupEntry {
1092                    binding: 6,
1093                    resource: wgpu::BindingResource::TextureView(&dda.storage_view),
1094                },
1095            ],
1096        });
1097
1098        let mut encoder = self
1099            .device
1100            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
1101                label: Some("roxlap-gpu grid encoder"),
1102            });
1103        {
1104            let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
1105                label: Some("roxlap-gpu grid_dda compute"),
1106                timestamp_writes: None,
1107            });
1108            cpass.set_pipeline(&dda.pipeline_dda);
1109            cpass.set_bind_group(0, &dda_bg, &[]);
1110            cpass.dispatch_workgroups(surface_w.div_ceil(8), surface_h.div_ceil(8), 1);
1111        }
1112        {
1113            let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
1114                label: Some("roxlap-gpu grid_dda blit"),
1115                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
1116                    view: &surf_view,
1117                    resolve_target: None,
1118                    ops: wgpu::Operations {
1119                        load: wgpu::LoadOp::Clear(wgpu::Color::BLACK),
1120                        store: wgpu::StoreOp::Store,
1121                    },
1122                })],
1123                depth_stencil_attachment: None,
1124                timestamp_writes: None,
1125                occlusion_query_set: None,
1126            });
1127            rpass.set_pipeline(&dda.pipeline_blit);
1128            rpass.set_bind_group(0, &dda.blit_bg, &[]);
1129            rpass.draw(0..3, 0..1);
1130        }
1131        self.queue.submit(std::iter::once(encoder.finish()));
1132        surf_tex.present();
1133        self.frame_count = self.frame_count.wrapping_add(1);
1134    }
1135
1136    fn build_grid_dda(
1137        &self,
1138        width: u32,
1139        height: u32,
1140        surface_format: wgpu::TextureFormat,
1141    ) -> GridDdaResources {
1142        let storage_tex = self.device.create_texture(&wgpu::TextureDescriptor {
1143            label: Some("roxlap-gpu grid_dda.storage"),
1144            size: wgpu::Extent3d {
1145                width,
1146                height,
1147                depth_or_array_layers: 1,
1148            },
1149            mip_level_count: 1,
1150            sample_count: 1,
1151            dimension: wgpu::TextureDimension::D2,
1152            format: wgpu::TextureFormat::Rgba8Unorm,
1153            usage: wgpu::TextureUsages::STORAGE_BINDING | wgpu::TextureUsages::TEXTURE_BINDING,
1154            view_formats: &[],
1155        });
1156        let storage_view = storage_tex.create_view(&wgpu::TextureViewDescriptor::default());
1157
1158        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
1159            label: Some("roxlap-gpu grid_dda.uniform"),
1160            size: std::mem::size_of::<GridDdaUniform>() as u64,
1161            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
1162            mapped_at_creation: false,
1163        });
1164
1165        let dda_shader = self
1166            .device
1167            .create_shader_module(wgpu::ShaderModuleDescriptor {
1168                label: Some("grid_dda.wgsl"),
1169                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/grid_dda.wgsl").into()),
1170            });
1171        let bgl_dda = self
1172            .device
1173            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
1174                label: Some("roxlap-gpu grid_dda.bgl"),
1175                entries: &[
1176                    bgl_uniform_entry(0),
1177                    bgl_storage_entry(1, true),
1178                    bgl_storage_entry(2, true),
1179                    bgl_storage_entry(3, true),
1180                    bgl_storage_entry(4, true),
1181                    bgl_storage_entry(5, true),
1182                    wgpu::BindGroupLayoutEntry {
1183                        binding: 6,
1184                        visibility: wgpu::ShaderStages::COMPUTE,
1185                        ty: wgpu::BindingType::StorageTexture {
1186                            access: wgpu::StorageTextureAccess::WriteOnly,
1187                            format: wgpu::TextureFormat::Rgba8Unorm,
1188                            view_dimension: wgpu::TextureViewDimension::D2,
1189                        },
1190                        count: None,
1191                    },
1192                ],
1193            });
1194        let dda_pl = self
1195            .device
1196            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
1197                label: Some("roxlap-gpu grid_dda.layout"),
1198                bind_group_layouts: &[&bgl_dda],
1199                push_constant_ranges: &[],
1200            });
1201        let pipeline_dda = self
1202            .device
1203            .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
1204                label: Some("roxlap-gpu grid_dda.pipeline"),
1205                layout: Some(&dda_pl),
1206                module: &dda_shader,
1207                entry_point: "render_grid",
1208                compilation_options: wgpu::PipelineCompilationOptions::default(),
1209                cache: None,
1210            });
1211
1212        let blit_shader = self
1213            .device
1214            .create_shader_module(wgpu::ShaderModuleDescriptor {
1215                label: Some("blit.wgsl"),
1216                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/blit.wgsl").into()),
1217            });
1218        let bgl_blit = self
1219            .device
1220            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
1221                label: Some("roxlap-gpu grid_dda.blit_bgl"),
1222                entries: &[
1223                    wgpu::BindGroupLayoutEntry {
1224                        binding: 0,
1225                        visibility: wgpu::ShaderStages::FRAGMENT,
1226                        ty: wgpu::BindingType::Texture {
1227                            sample_type: wgpu::TextureSampleType::Float { filterable: false },
1228                            view_dimension: wgpu::TextureViewDimension::D2,
1229                            multisampled: false,
1230                        },
1231                        count: None,
1232                    },
1233                    wgpu::BindGroupLayoutEntry {
1234                        binding: 1,
1235                        visibility: wgpu::ShaderStages::FRAGMENT,
1236                        ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::NonFiltering),
1237                        count: None,
1238                    },
1239                ],
1240            });
1241        let blit_pl = self
1242            .device
1243            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
1244                label: Some("roxlap-gpu grid_dda.blit_layout"),
1245                bind_group_layouts: &[&bgl_blit],
1246                push_constant_ranges: &[],
1247            });
1248        let pipeline_blit = self
1249            .device
1250            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
1251                label: Some("roxlap-gpu grid_dda.blit_pipeline"),
1252                layout: Some(&blit_pl),
1253                vertex: wgpu::VertexState {
1254                    module: &blit_shader,
1255                    entry_point: "vs_main",
1256                    compilation_options: wgpu::PipelineCompilationOptions::default(),
1257                    buffers: &[],
1258                },
1259                fragment: Some(wgpu::FragmentState {
1260                    module: &blit_shader,
1261                    entry_point: "fs_main",
1262                    compilation_options: wgpu::PipelineCompilationOptions::default(),
1263                    targets: &[Some(wgpu::ColorTargetState {
1264                        format: surface_format,
1265                        blend: None,
1266                        write_mask: wgpu::ColorWrites::ALL,
1267                    })],
1268                }),
1269                primitive: wgpu::PrimitiveState::default(),
1270                depth_stencil: None,
1271                multisample: wgpu::MultisampleState::default(),
1272                multiview: None,
1273                cache: None,
1274            });
1275        let sampler = self.device.create_sampler(&wgpu::SamplerDescriptor {
1276            label: Some("roxlap-gpu grid_dda.blit_sampler"),
1277            address_mode_u: wgpu::AddressMode::ClampToEdge,
1278            address_mode_v: wgpu::AddressMode::ClampToEdge,
1279            address_mode_w: wgpu::AddressMode::ClampToEdge,
1280            mag_filter: wgpu::FilterMode::Nearest,
1281            min_filter: wgpu::FilterMode::Nearest,
1282            mipmap_filter: wgpu::FilterMode::Nearest,
1283            ..Default::default()
1284        });
1285        let blit_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1286            label: Some("roxlap-gpu grid_dda.blit_bg"),
1287            layout: &bgl_blit,
1288            entries: &[
1289                wgpu::BindGroupEntry {
1290                    binding: 0,
1291                    resource: wgpu::BindingResource::TextureView(&storage_view),
1292                },
1293                wgpu::BindGroupEntry {
1294                    binding: 1,
1295                    resource: wgpu::BindingResource::Sampler(&sampler),
1296                },
1297            ],
1298        });
1299
1300        GridDdaResources {
1301            storage_size: (width, height),
1302            storage_view,
1303            uniform_buf,
1304            bgl_dda,
1305            pipeline_dda,
1306            blit_bg,
1307            pipeline_blit,
1308            _sampler: sampler,
1309        }
1310    }
1311
1312    /// GPU.5 render — multi-grid scene marcher. `cameras[i]` is the
1313    /// world camera transformed into grid `i`'s local frame
1314    /// (caller-supplied; see scene-demo's `redraw_gpu` for the
1315    /// glam-based transform). `fov_y_rad` is the shared vertical
1316    /// FOV; `max_outer_steps` caps per-ray chunk-DDA work for each
1317    /// grid.
1318    ///
1319    /// # Panics
1320    /// If `cameras.len() != scene.grid_count` or
1321    /// `scene.grid_count > MAX_SCENE_GRIDS`.
1322    pub fn render_scene(
1323        &mut self,
1324        scene: &GpuSceneResident,
1325        cameras: &[Camera],
1326        fov_y_rad: f32,
1327        max_outer_steps: u32,
1328    ) {
1329        assert_eq!(
1330            cameras.len(),
1331            scene.grid_count as usize,
1332            "render_scene: {} cameras supplied, scene has {} grids",
1333            cameras.len(),
1334            scene.grid_count,
1335        );
1336        assert!(
1337            scene.grid_count as usize <= SCENE_MAX_GRIDS,
1338            "render_scene: scene has {} grids, shader supports {}",
1339            scene.grid_count,
1340            SCENE_MAX_GRIDS,
1341        );
1342        self.last_fov_y_rad = fov_y_rad; // cached for pixel_ray (picking)
1343
1344        let surf_tex = match self.surface.get_current_texture() {
1345            Ok(t) => t,
1346            Err(wgpu::SurfaceError::Outdated | wgpu::SurfaceError::Lost) => {
1347                self.surface.configure(&self.device, &self.surface_config);
1348                return;
1349            }
1350            Err(e) => {
1351                eprintln!("roxlap-gpu surface error: {e:?}");
1352                return;
1353            }
1354        };
1355        let surf_view = surf_tex
1356            .texture
1357            .create_view(&wgpu::TextureViewDescriptor::default());
1358
1359        let surface_w = self.surface_config.width;
1360        let surface_h = self.surface_config.height;
1361        let surface_format = self.surface_config.format;
1362
1363        let needs_build = match &self.scene_dda {
1364            Some(r) => r.storage_size != (surface_w, surface_h),
1365            None => true,
1366        };
1367        if needs_build {
1368            self.scene_dda = Some(self.build_scene_dda(surface_w, surface_h, surface_format));
1369        }
1370        // GPU.9 — materialise the sprite pipeline the first frame
1371        // sprites are present (before the immutable `dda` borrow).
1372        // GPU.10.0 — build the model-DDA pipeline the first frame a
1373        // sprite registry is present.
1374        if self.sprite_registry.is_some() && self.sprite_model_dda.is_none() {
1375            self.sprite_model_dda = Some(self.build_sprite_model_dda());
1376        }
1377        // GPU.10.3 — frustum-cull + screen-tile-bin the sprite instances
1378        // (needs &mut self for buffer growth, so before the immutable
1379        // scene_dda borrow). Captures (visible_count, tiles_x); None when
1380        // nothing is in view.
1381        let sprite_pass: Option<(u32, u32)> = if let Some(reg) = self.sprite_registry.as_mut() {
1382            if !cameras.is_empty() && reg.instance_capacity > 0 {
1383                let cam = &cameras[0];
1384                #[allow(clippy::cast_precision_loss)]
1385                let aspect = surface_w as f32 / surface_h as f32;
1386                let half_h = (fov_y_rad * 0.5).tan();
1387                let frustum = sprite_model::ViewFrustum {
1388                    pos: cam.position,
1389                    right: cam.right,
1390                    down: cam.down,
1391                    forward: cam.forward,
1392                    half_w: half_h * aspect,
1393                    half_h,
1394                    far: 1.0e9,
1395                };
1396                let (visible, tiles_x, _tiles_y) = reg.cull_bin_upload(
1397                    &self.device,
1398                    &self.queue,
1399                    &frustum,
1400                    surface_w,
1401                    surface_h,
1402                    SPRITE_TILE_SIZE,
1403                    self.sprite_lod_px,
1404                );
1405                (visible > 0).then_some((visible, tiles_x))
1406            } else {
1407                None
1408            }
1409        } else {
1410            None
1411        };
1412        let dda = self.scene_dda.as_ref().expect("just built");
1413
1414        // Pack per-grid cameras.
1415        let mut cam_array = [SceneDdaPerGridCamera::zeroed(); SCENE_MAX_GRIDS];
1416        for (i, cam) in cameras.iter().enumerate() {
1417            cam_array[i] = SceneDdaPerGridCamera {
1418                pos: cam.position,
1419                _pad0: 0.0,
1420                right: cam.right,
1421                _pad1: 0.0,
1422                down: cam.down,
1423                _pad2: 0.0,
1424                forward: cam.forward,
1425                _pad3: 0.0,
1426            };
1427        }
1428        let uniform = SceneDdaUniform {
1429            fov_y_rad,
1430            grid_count: scene.grid_count,
1431            max_outer_steps,
1432            _pad0: 0,
1433            screen_size: [surface_w, surface_h],
1434            _pad1: [0; 2],
1435            cameras: cam_array,
1436            fog_color: [
1437                self.fog_color[0],
1438                self.fog_color[1],
1439                self.fog_color[2],
1440                self.fog_near,
1441            ],
1442            fog_far: self.fog_far,
1443            write_depth: u32::from(self.sprite_registry.is_some()),
1444            occ_page_words: scene.occupancy_page_words,
1445            occ_num_pages: scene.occupancy_num_pages,
1446            mip_scan_dist: self.scene_mip_scan_dist,
1447            _pad2: 0,
1448            _pad3: 0,
1449            _pad4: 0,
1450        };
1451        self.queue
1452            .write_buffer(&dda.uniform_buf, 0, bytemuck::bytes_of(&uniform));
1453
1454        let dda_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1455            label: Some("roxlap-gpu scene_dda.bg"),
1456            layout: &dda.bgl_dda,
1457            entries: &[
1458                wgpu::BindGroupEntry {
1459                    binding: 0,
1460                    resource: dda.uniform_buf.as_entire_binding(),
1461                },
1462                // Occupancy page 0 at binding 1; pages 1..MAX_OCC_PAGES
1463                // at bindings 12.. (see GPU.X occupancy paging).
1464                wgpu::BindGroupEntry {
1465                    binding: 1,
1466                    resource: scene.occupancy_pages[0].as_entire_binding(),
1467                },
1468                wgpu::BindGroupEntry {
1469                    binding: 2,
1470                    resource: scene.all_color_offsets.as_entire_binding(),
1471                },
1472                wgpu::BindGroupEntry {
1473                    binding: 3,
1474                    resource: scene.all_colors.as_entire_binding(),
1475                },
1476                wgpu::BindGroupEntry {
1477                    binding: 4,
1478                    resource: scene.all_chunk_colors_base.as_entire_binding(),
1479                },
1480                wgpu::BindGroupEntry {
1481                    binding: 5,
1482                    resource: scene.all_chunk_occupancy.as_entire_binding(),
1483                },
1484                wgpu::BindGroupEntry {
1485                    binding: 6,
1486                    resource: scene.grid_static_meta.as_entire_binding(),
1487                },
1488                wgpu::BindGroupEntry {
1489                    binding: 7,
1490                    resource: scene.all_slot_chunk_idx.as_entire_binding(),
1491                },
1492                wgpu::BindGroupEntry {
1493                    binding: 8,
1494                    resource: wgpu::BindingResource::TextureView(&dda.storage_view),
1495                },
1496                wgpu::BindGroupEntry {
1497                    binding: 9,
1498                    resource: wgpu::BindingResource::TextureView(&self.sky_view),
1499                },
1500                wgpu::BindGroupEntry {
1501                    binding: 10,
1502                    resource: wgpu::BindingResource::Sampler(&self.sky_sampler),
1503                },
1504                wgpu::BindGroupEntry {
1505                    binding: 11,
1506                    resource: dda.depth_buffer.as_entire_binding(),
1507                },
1508                wgpu::BindGroupEntry {
1509                    binding: 12,
1510                    resource: scene.occupancy_pages[1].as_entire_binding(),
1511                },
1512                wgpu::BindGroupEntry {
1513                    binding: 13,
1514                    resource: scene.occupancy_pages[2].as_entire_binding(),
1515                },
1516                wgpu::BindGroupEntry {
1517                    binding: 14,
1518                    resource: scene.occupancy_pages[3].as_entire_binding(),
1519                },
1520            ],
1521        });
1522
1523        // GPU.9 — when sprites are present, build both splatter bind
1524        // groups up front (the splat pass writes the key buffer; the
1525        // resolve pass reads keys + scene depth and writes colour).
1526        // GPU.10.3 — model-DDA bind group + per-frame uniform, using the
1527        // cull/bin results captured above. Per-model + per-instance data
1528        // + the tile lists live in the registry buffers.
1529        let sprite_model_bg = match (&self.sprite_model_dda, &self.sprite_registry, sprite_pass) {
1530            (Some(smd), Some(reg), Some((visible, tiles_x))) => {
1531                let cam = &cameras[0];
1532                let uni = SpriteModelUniform {
1533                    cam_pos: cam.position,
1534                    _p0: 0.0,
1535                    cam_right: cam.right,
1536                    _p1: 0.0,
1537                    cam_down: cam.down,
1538                    _p2: 0.0,
1539                    cam_forward: cam.forward,
1540                    _p3: 0.0,
1541                    fog_color: [
1542                        self.fog_color[0],
1543                        self.fog_color[1],
1544                        self.fog_color[2],
1545                        self.fog_near,
1546                    ],
1547                    screen_size: [surface_w, surface_h],
1548                    instance_count: visible,
1549                    fog_far: self.fog_far,
1550                    fov_y_rad,
1551                    tiles_x,
1552                    tile_size: SPRITE_TILE_SIZE,
1553                    _p6: 0.0,
1554                };
1555                self.queue
1556                    .write_buffer(&smd.uniform_buf, 0, bytemuck::bytes_of(&uni));
1557                Some(self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1558                    label: Some("roxlap-gpu sprite_model_dda.bg"),
1559                    layout: &smd.bgl,
1560                    entries: &[
1561                        wgpu::BindGroupEntry {
1562                            binding: 0,
1563                            resource: smd.uniform_buf.as_entire_binding(),
1564                        },
1565                        wgpu::BindGroupEntry {
1566                            binding: 1,
1567                            resource: reg.occupancy.as_entire_binding(),
1568                        },
1569                        wgpu::BindGroupEntry {
1570                            binding: 2,
1571                            resource: reg.colors.as_entire_binding(),
1572                        },
1573                        wgpu::BindGroupEntry {
1574                            binding: 3,
1575                            resource: reg.color_offsets.as_entire_binding(),
1576                        },
1577                        wgpu::BindGroupEntry {
1578                            binding: 4,
1579                            resource: reg.model_meta.as_entire_binding(),
1580                        },
1581                        wgpu::BindGroupEntry {
1582                            binding: 5,
1583                            resource: reg.instances.as_entire_binding(),
1584                        },
1585                        wgpu::BindGroupEntry {
1586                            binding: 6,
1587                            resource: dda.depth_buffer.as_entire_binding(),
1588                        },
1589                        wgpu::BindGroupEntry {
1590                            binding: 7,
1591                            resource: wgpu::BindingResource::TextureView(&dda.storage_view),
1592                        },
1593                        wgpu::BindGroupEntry {
1594                            binding: 8,
1595                            resource: reg.tile_ranges.as_entire_binding(),
1596                        },
1597                        wgpu::BindGroupEntry {
1598                            binding: 9,
1599                            resource: reg.tile_instances.as_entire_binding(),
1600                        },
1601                    ],
1602                }))
1603            }
1604            _ => None,
1605        };
1606
1607        let mut encoder = self
1608            .device
1609            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
1610                label: Some("roxlap-gpu scene encoder"),
1611            });
1612        {
1613            let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
1614                label: Some("roxlap-gpu scene_dda compute"),
1615                timestamp_writes: None,
1616            });
1617            cpass.set_pipeline(&dda.pipeline_dda);
1618            cpass.set_bind_group(0, &dda_bg, &[]);
1619            cpass.dispatch_workgroups(surface_w.div_ceil(8), surface_h.div_ceil(8), 1);
1620        }
1621        // GPU.10 — sprite model-DDA pass: one thread per pixel marches
1622        // the tile's instances + composites against scene depth, after
1623        // the scene pass wrote the depth buffer and before the blit.
1624        if let (Some(smd), Some(bg)) = (&self.sprite_model_dda, &sprite_model_bg) {
1625            let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
1626                label: Some("roxlap-gpu sprite_model_dda"),
1627                timestamp_writes: None,
1628            });
1629            cpass.set_pipeline(&smd.pipeline);
1630            cpass.set_bind_group(0, bg, &[]);
1631            cpass.dispatch_workgroups(surface_w.div_ceil(8), surface_h.div_ceil(8), 1);
1632        }
1633        {
1634            let mut rpass = encoder.begin_render_pass(&wgpu::RenderPassDescriptor {
1635                label: Some("roxlap-gpu scene_dda blit"),
1636                color_attachments: &[Some(wgpu::RenderPassColorAttachment {
1637                    view: &surf_view,
1638                    resolve_target: None,
1639                    ops: wgpu::Operations {
1640                        load: wgpu::LoadOp::Clear(wgpu::Color::BLACK),
1641                        store: wgpu::StoreOp::Store,
1642                    },
1643                })],
1644                depth_stencil_attachment: None,
1645                timestamp_writes: None,
1646                occlusion_query_set: None,
1647            });
1648            rpass.set_pipeline(&dda.pipeline_blit);
1649            rpass.set_bind_group(0, &dda.blit_bg, &[]);
1650            rpass.draw(0..3, 0..1);
1651        }
1652        self.queue.submit(std::iter::once(encoder.finish()));
1653        surf_tex.present();
1654        self.frame_count = self.frame_count.wrapping_add(1);
1655    }
1656
1657    fn build_scene_dda(
1658        &self,
1659        width: u32,
1660        height: u32,
1661        surface_format: wgpu::TextureFormat,
1662    ) -> SceneDdaResources {
1663        let storage_tex = self.device.create_texture(&wgpu::TextureDescriptor {
1664            label: Some("roxlap-gpu scene_dda.storage"),
1665            size: wgpu::Extent3d {
1666                width,
1667                height,
1668                depth_or_array_layers: 1,
1669            },
1670            mip_level_count: 1,
1671            sample_count: 1,
1672            dimension: wgpu::TextureDimension::D2,
1673            format: wgpu::TextureFormat::Rgba8Unorm,
1674            usage: wgpu::TextureUsages::STORAGE_BINDING | wgpu::TextureUsages::TEXTURE_BINDING,
1675            view_formats: &[],
1676        });
1677        let storage_view = storage_tex.create_view(&wgpu::TextureViewDescriptor::default());
1678
1679        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
1680            label: Some("roxlap-gpu scene_dda.uniform"),
1681            size: std::mem::size_of::<SceneDdaUniform>() as u64,
1682            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
1683            mapped_at_creation: false,
1684        });
1685
1686        // GPU.9 — per-pixel world-t depth (f32 bits as u32). Sized to
1687        // the storage texture; written by the scene pass when sprites
1688        // are active, read+tested by the sprite splatter.
1689        let depth_buffer = self.device.create_buffer(&wgpu::BufferDescriptor {
1690            label: Some("roxlap-gpu scene_dda.depth"),
1691            size: u64::from(width) * u64::from(height) * 4,
1692            // COPY_SRC so `read_depth_pixel` can stage it for picking.
1693            usage: wgpu::BufferUsages::STORAGE
1694                | wgpu::BufferUsages::COPY_DST
1695                | wgpu::BufferUsages::COPY_SRC,
1696            mapped_at_creation: false,
1697        });
1698        let depth_readback = self.device.create_buffer(&wgpu::BufferDescriptor {
1699            label: Some("roxlap-gpu scene_dda.depth_readback"),
1700            size: u64::from(width) * u64::from(height) * 4,
1701            usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
1702            mapped_at_creation: false,
1703        });
1704        let dda_shader = self
1705            .device
1706            .create_shader_module(wgpu::ShaderModuleDescriptor {
1707                label: Some("scene_dda.wgsl"),
1708                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/scene_dda.wgsl").into()),
1709            });
1710        let bgl_dda = self
1711            .device
1712            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
1713                label: Some("roxlap-gpu scene_dda.bgl"),
1714                entries: &[
1715                    bgl_uniform_entry(0),
1716                    bgl_storage_entry(1, true),
1717                    bgl_storage_entry(2, true),
1718                    bgl_storage_entry(3, true),
1719                    bgl_storage_entry(4, true),
1720                    bgl_storage_entry(5, true),
1721                    bgl_storage_entry(6, true),
1722                    bgl_storage_entry(7, true),
1723                    wgpu::BindGroupLayoutEntry {
1724                        binding: 8,
1725                        visibility: wgpu::ShaderStages::COMPUTE,
1726                        ty: wgpu::BindingType::StorageTexture {
1727                            access: wgpu::StorageTextureAccess::WriteOnly,
1728                            format: wgpu::TextureFormat::Rgba8Unorm,
1729                            view_dimension: wgpu::TextureViewDimension::D2,
1730                        },
1731                        count: None,
1732                    },
1733                    // GPU.8 sky panorama + sampler.
1734                    wgpu::BindGroupLayoutEntry {
1735                        binding: 9,
1736                        visibility: wgpu::ShaderStages::COMPUTE,
1737                        ty: wgpu::BindingType::Texture {
1738                            sample_type: wgpu::TextureSampleType::Float { filterable: true },
1739                            view_dimension: wgpu::TextureViewDimension::D2,
1740                            multisampled: false,
1741                        },
1742                        count: None,
1743                    },
1744                    wgpu::BindGroupLayoutEntry {
1745                        binding: 10,
1746                        visibility: wgpu::ShaderStages::COMPUTE,
1747                        ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
1748                        count: None,
1749                    },
1750                    // GPU.9 — read-write per-pixel depth buffer.
1751                    bgl_storage_entry(11, false),
1752                    // Occupancy pages 1..MAX_OCC_PAGES (page 0 is
1753                    // binding 1). Unused pages bind a dummy buffer.
1754                    bgl_storage_entry(12, true),
1755                    bgl_storage_entry(13, true),
1756                    bgl_storage_entry(14, true),
1757                ],
1758            });
1759        let dda_pl = self
1760            .device
1761            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
1762                label: Some("roxlap-gpu scene_dda.layout"),
1763                bind_group_layouts: &[&bgl_dda],
1764                push_constant_ranges: &[],
1765            });
1766        let pipeline_dda = self
1767            .device
1768            .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
1769                label: Some("roxlap-gpu scene_dda.pipeline"),
1770                layout: Some(&dda_pl),
1771                module: &dda_shader,
1772                entry_point: "render_scene",
1773                compilation_options: wgpu::PipelineCompilationOptions::default(),
1774                cache: None,
1775            });
1776
1777        let blit_shader = self
1778            .device
1779            .create_shader_module(wgpu::ShaderModuleDescriptor {
1780                label: Some("blit.wgsl"),
1781                source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/blit.wgsl").into()),
1782            });
1783        let bgl_blit = self
1784            .device
1785            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
1786                label: Some("roxlap-gpu scene_dda.blit_bgl"),
1787                entries: &[
1788                    wgpu::BindGroupLayoutEntry {
1789                        binding: 0,
1790                        visibility: wgpu::ShaderStages::FRAGMENT,
1791                        ty: wgpu::BindingType::Texture {
1792                            sample_type: wgpu::TextureSampleType::Float { filterable: false },
1793                            view_dimension: wgpu::TextureViewDimension::D2,
1794                            multisampled: false,
1795                        },
1796                        count: None,
1797                    },
1798                    wgpu::BindGroupLayoutEntry {
1799                        binding: 1,
1800                        visibility: wgpu::ShaderStages::FRAGMENT,
1801                        ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::NonFiltering),
1802                        count: None,
1803                    },
1804                ],
1805            });
1806        let blit_pl = self
1807            .device
1808            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
1809                label: Some("roxlap-gpu scene_dda.blit_layout"),
1810                bind_group_layouts: &[&bgl_blit],
1811                push_constant_ranges: &[],
1812            });
1813        let pipeline_blit = self
1814            .device
1815            .create_render_pipeline(&wgpu::RenderPipelineDescriptor {
1816                label: Some("roxlap-gpu scene_dda.blit_pipeline"),
1817                layout: Some(&blit_pl),
1818                vertex: wgpu::VertexState {
1819                    module: &blit_shader,
1820                    entry_point: "vs_main",
1821                    compilation_options: wgpu::PipelineCompilationOptions::default(),
1822                    buffers: &[],
1823                },
1824                fragment: Some(wgpu::FragmentState {
1825                    module: &blit_shader,
1826                    entry_point: "fs_main",
1827                    compilation_options: wgpu::PipelineCompilationOptions::default(),
1828                    targets: &[Some(wgpu::ColorTargetState {
1829                        format: surface_format,
1830                        blend: None,
1831                        write_mask: wgpu::ColorWrites::ALL,
1832                    })],
1833                }),
1834                primitive: wgpu::PrimitiveState::default(),
1835                depth_stencil: None,
1836                multisample: wgpu::MultisampleState::default(),
1837                multiview: None,
1838                cache: None,
1839            });
1840        let sampler = self.device.create_sampler(&wgpu::SamplerDescriptor {
1841            label: Some("roxlap-gpu scene_dda.blit_sampler"),
1842            address_mode_u: wgpu::AddressMode::ClampToEdge,
1843            address_mode_v: wgpu::AddressMode::ClampToEdge,
1844            address_mode_w: wgpu::AddressMode::ClampToEdge,
1845            mag_filter: wgpu::FilterMode::Nearest,
1846            min_filter: wgpu::FilterMode::Nearest,
1847            mipmap_filter: wgpu::FilterMode::Nearest,
1848            ..Default::default()
1849        });
1850        let blit_bg = self.device.create_bind_group(&wgpu::BindGroupDescriptor {
1851            label: Some("roxlap-gpu scene_dda.blit_bg"),
1852            layout: &bgl_blit,
1853            entries: &[
1854                wgpu::BindGroupEntry {
1855                    binding: 0,
1856                    resource: wgpu::BindingResource::TextureView(&storage_view),
1857                },
1858                wgpu::BindGroupEntry {
1859                    binding: 1,
1860                    resource: wgpu::BindingResource::Sampler(&sampler),
1861                },
1862            ],
1863        });
1864
1865        SceneDdaResources {
1866            storage_size: (width, height),
1867            storage_view,
1868            uniform_buf,
1869            bgl_dda,
1870            pipeline_dda,
1871            blit_bg,
1872            pipeline_blit,
1873            _sampler: sampler,
1874            depth_buffer,
1875            depth_readback,
1876        }
1877    }
1878
1879    /// Read back the per-pixel world-t depth at window pixel `(x, y)`
1880    /// from the last rendered frame, for screen→world picking. Returns
1881    /// the distance `t` along the (normalised) view ray to the nearest
1882    /// scene-grid surface, so the host reconstructs the world hit as
1883    /// `cam.pos + t * normalize(ray_dir)`. `None` for out-of-bounds
1884    /// pixels, sky / no-hit (the `T_INF` sentinel), or when no scene
1885    /// frame has been rendered.
1886    ///
1887    /// The depth buffer is the SCENE pass's output (terrain + grids),
1888    /// untouched by the sprite pass (which reads it read-only), so a
1889    /// cursor sprite under the pointer does not occlude the pick.
1890    ///
1891    /// Synchronous: copies the depth buffer to a mapped staging buffer
1892    /// and blocks on `device.poll(Wait)`. Cheap enough for click-time
1893    /// picks; do not call it every frame.
1894    ///
1895    /// Requires the last frame to have written depth, which happens
1896    /// when sprites are present (`write_depth`). The pick demo always
1897    /// has a cursor sprite, so this holds.
1898    #[must_use]
1899    pub fn read_depth_pixel(&self, x: u32, y: u32) -> Option<f32> {
1900        let dda = self.scene_dda.as_ref()?;
1901        let (w, h) = dda.storage_size;
1902        if x >= w || y >= h {
1903            return None;
1904        }
1905        let mut enc = self
1906            .device
1907            .create_command_encoder(&wgpu::CommandEncoderDescriptor {
1908                label: Some("roxlap-gpu depth readback"),
1909            });
1910        let size = u64::from(w) * u64::from(h) * 4;
1911        enc.copy_buffer_to_buffer(&dda.depth_buffer, 0, &dda.depth_readback, 0, size);
1912        self.queue.submit(std::iter::once(enc.finish()));
1913
1914        let slice = dda.depth_readback.slice(..);
1915        let (tx, rx) = std::sync::mpsc::channel();
1916        slice.map_async(wgpu::MapMode::Read, move |r| {
1917            let _ = tx.send(r);
1918        });
1919        self.device.poll(wgpu::Maintain::Wait);
1920        rx.recv().ok()?.ok()?;
1921
1922        let t = {
1923            let data = slice.get_mapped_range();
1924            let idx = ((y * w + x) * 4) as usize;
1925            let bytes: [u8; 4] = data[idx..idx + 4].try_into().ok()?;
1926            f32::from_le_bytes(bytes)
1927        };
1928        dda.depth_readback.unmap();
1929
1930        // Reject sky / no-hit (T_INF == 1e30 in the shader) + non-finite.
1931        if !t.is_finite() || t >= 1.0e29 {
1932            return None;
1933        }
1934        Some(t)
1935    }
1936
1937    /// World-space view-ray direction (un-normalised) for window pixel
1938    /// `(x, y)`, under the GPU marcher's projection — the canonical GPU
1939    /// unproject, mirroring `scene_dda.wgsl`'s `render_scene`
1940    /// (vertical-FOV pinhole). Uses the last-rendered frame's target
1941    /// size + FOV; `None` before the first scene render. Pair with
1942    /// [`Self::read_depth_pixel`] for screen→world picking.
1943    #[must_use]
1944    pub fn pixel_ray(
1945        &self,
1946        right: [f64; 3],
1947        down: [f64; 3],
1948        forward: [f64; 3],
1949        x: f64,
1950        y: f64,
1951    ) -> Option<[f64; 3]> {
1952        let dda = self.scene_dda.as_ref()?;
1953        let (w, h) = dda.storage_size;
1954        if w == 0 || h == 0 || self.last_fov_y_rad <= 0.0 {
1955            return None;
1956        }
1957        Some(pinhole_pixel_ray(
1958            right,
1959            down,
1960            forward,
1961            x,
1962            y,
1963            f64::from(w),
1964            f64::from(h),
1965            f64::from(self.last_fov_y_rad),
1966        ))
1967    }
1968
1969    /// GPU.10.1 — upload a sprite model registry + its instances for
1970    /// the DDA path. An empty instance slice clears all sprites.
1971    pub fn set_sprite_instances(
1972        &mut self,
1973        registry: &sprite_model::SpriteModelRegistry,
1974        instances: &[sprite_model::SpriteInstance],
1975    ) {
1976        if instances.is_empty() {
1977            self.sprite_registry = None;
1978            return;
1979        }
1980        self.sprite_registry = Some(sprite_model::SpriteRegistryResident::upload(
1981            &self.device,
1982            registry,
1983            instances,
1984        ));
1985    }
1986
1987    /// GPU.10.4 — set the LOD pixel threshold: a sprite steps to the
1988    /// next mip once a mip-0 voxel would project below `px` screen
1989    /// pixels. `1.0` is the natural "no sub-pixel voxels" default;
1990    /// larger values force LOD in closer (useful for inspection).
1991    /// Clamped to ≥ 0.25.
1992    pub fn set_sprite_lod_px(&mut self, px: f32) {
1993        self.sprite_lod_px = px.max(0.25);
1994    }
1995
1996    /// GPU.11.1 — set the scene-grid LOD scan distance (world units).
1997    /// A chunk entered at world-t `t` is marched at mip
1998    /// `floor(log2(max(t, msd) / msd))`, clamped to its grid's mip
1999    /// ladder. `0` disables LOD (always mip-0). Larger values push
2000    /// the coarser mips farther out — the axis-aligned-mip-beams
2001    /// mitigation lever (GPU.11.2). Default 64 (matches CPU
2002    /// `mip_scan_dist`).
2003    pub fn set_scene_mip_scan_dist(&mut self, dist: f32) {
2004        self.scene_mip_scan_dist = dist.max(0.0);
2005    }
2006
2007    /// GPU.10.1 — build the instanced model-DDA pipeline (one thread
2008    /// per pixel). Lazily invoked the first frame a registry is present.
2009    fn build_sprite_model_dda(&self) -> SpriteModelDdaResources {
2010        let shader = self
2011            .device
2012            .create_shader_module(wgpu::ShaderModuleDescriptor {
2013                label: Some("sprite_model_dda.wgsl"),
2014                source: wgpu::ShaderSource::Wgsl(
2015                    include_str!("../shaders/sprite_model_dda.wgsl").into(),
2016                ),
2017            });
2018        let bgl = self
2019            .device
2020            .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
2021                label: Some("roxlap-gpu sprite_model_dda.bgl"),
2022                entries: &[
2023                    bgl_uniform_entry(0),
2024                    bgl_storage_entry(1, true), // occupancy
2025                    bgl_storage_entry(2, true), // colors
2026                    bgl_storage_entry(3, true), // color_offsets
2027                    bgl_storage_entry(4, true), // model_meta
2028                    bgl_storage_entry(5, true), // instances
2029                    bgl_storage_entry(6, true), // scene depth
2030                    wgpu::BindGroupLayoutEntry {
2031                        binding: 7,
2032                        visibility: wgpu::ShaderStages::COMPUTE,
2033                        ty: wgpu::BindingType::StorageTexture {
2034                            access: wgpu::StorageTextureAccess::WriteOnly,
2035                            format: wgpu::TextureFormat::Rgba8Unorm,
2036                            view_dimension: wgpu::TextureViewDimension::D2,
2037                        },
2038                        count: None,
2039                    },
2040                    bgl_storage_entry(8, true), // tile_ranges
2041                    bgl_storage_entry(9, true), // tile_instances
2042                ],
2043            });
2044        let pl = self
2045            .device
2046            .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
2047                label: Some("roxlap-gpu sprite_model_dda.layout"),
2048                bind_group_layouts: &[&bgl],
2049                push_constant_ranges: &[],
2050            });
2051        let pipeline = self
2052            .device
2053            .create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
2054                label: Some("roxlap-gpu sprite_model_dda.pipeline"),
2055                layout: Some(&pl),
2056                module: &shader,
2057                entry_point: "march",
2058                compilation_options: wgpu::PipelineCompilationOptions::default(),
2059                cache: None,
2060            });
2061        let uniform_buf = self.device.create_buffer(&wgpu::BufferDescriptor {
2062            label: Some("roxlap-gpu sprite_model_dda.uniform"),
2063            size: std::mem::size_of::<SpriteModelUniform>() as u64,
2064            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
2065            mapped_at_creation: false,
2066        });
2067        SpriteModelDdaResources {
2068            bgl,
2069            pipeline,
2070            uniform_buf,
2071        }
2072    }
2073}
2074
2075/// GPU.11 — headless scene-DDA renderer for tests + offline visual
2076/// gates. Owns the `scene_dda.wgsl` compute pipeline with no surface
2077/// and no blit pass; renders a [`GpuSceneResident`] to an in-memory
2078/// RGBA framebuffer via texture readback. The per-substage visual
2079/// gate (render reference scenes, diff PPMs) and the GPU.11.1 mip
2080/// render-diff both ride on this.
2081pub struct HeadlessSceneRenderer {
2082    width: u32,
2083    height: u32,
2084    output_tex: wgpu::Texture,
2085    output_view: wgpu::TextureView,
2086    depth_buffer: wgpu::Buffer,
2087    uniform_buf: wgpu::Buffer,
2088    _sky_texture: wgpu::Texture,
2089    sky_view: wgpu::TextureView,
2090    sky_sampler: wgpu::Sampler,
2091    bgl: wgpu::BindGroupLayout,
2092    pipeline: wgpu::ComputePipeline,
2093    readback: wgpu::Buffer,
2094    padded_bytes_per_row: u32,
2095}
2096
2097impl HeadlessSceneRenderer {
2098    /// Build the compute pipeline + output/readback resources for a
2099    /// `width × height` framebuffer. Validates `scene_dda.wgsl` and
2100    /// the [`scene::GridStaticMeta`] std430 layout at pipeline /
2101    /// bind-group time.
2102    #[must_use]
2103    pub fn new(device: &wgpu::Device, width: u32, height: u32) -> Self {
2104        let output_tex = device.create_texture(&wgpu::TextureDescriptor {
2105            label: Some("roxlap-gpu headless.output"),
2106            size: wgpu::Extent3d {
2107                width,
2108                height,
2109                depth_or_array_layers: 1,
2110            },
2111            mip_level_count: 1,
2112            sample_count: 1,
2113            dimension: wgpu::TextureDimension::D2,
2114            format: wgpu::TextureFormat::Rgba8Unorm,
2115            usage: wgpu::TextureUsages::STORAGE_BINDING | wgpu::TextureUsages::COPY_SRC,
2116            view_formats: &[],
2117        });
2118        let output_view = output_tex.create_view(&wgpu::TextureViewDescriptor::default());
2119
2120        let uniform_buf = device.create_buffer(&wgpu::BufferDescriptor {
2121            label: Some("roxlap-gpu headless.uniform"),
2122            size: std::mem::size_of::<SceneDdaUniform>() as u64,
2123            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
2124            mapped_at_creation: false,
2125        });
2126        let depth_buffer = device.create_buffer(&wgpu::BufferDescriptor {
2127            label: Some("roxlap-gpu headless.depth"),
2128            size: u64::from(width) * u64::from(height) * 4,
2129            usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
2130            mapped_at_creation: false,
2131        });
2132
2133        let default_sky_pixel = [120u8, 150, 220, 255];
2134        let (sky_texture, sky_view) = create_sky_texture(device, 1, 1, &default_sky_pixel);
2135        let sky_sampler = device.create_sampler(&wgpu::SamplerDescriptor {
2136            label: Some("roxlap-gpu headless.sky_sampler"),
2137            address_mode_u: wgpu::AddressMode::Repeat,
2138            address_mode_v: wgpu::AddressMode::Repeat,
2139            mag_filter: wgpu::FilterMode::Linear,
2140            min_filter: wgpu::FilterMode::Linear,
2141            ..Default::default()
2142        });
2143
2144        let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
2145            label: Some("scene_dda.wgsl (headless)"),
2146            source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/scene_dda.wgsl").into()),
2147        });
2148        let bgl = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
2149            label: Some("roxlap-gpu headless.bgl"),
2150            entries: &[
2151                bgl_uniform_entry(0),
2152                bgl_storage_entry(1, true),
2153                bgl_storage_entry(2, true),
2154                bgl_storage_entry(3, true),
2155                bgl_storage_entry(4, true),
2156                bgl_storage_entry(5, true),
2157                bgl_storage_entry(6, true),
2158                bgl_storage_entry(7, true),
2159                wgpu::BindGroupLayoutEntry {
2160                    binding: 8,
2161                    visibility: wgpu::ShaderStages::COMPUTE,
2162                    ty: wgpu::BindingType::StorageTexture {
2163                        access: wgpu::StorageTextureAccess::WriteOnly,
2164                        format: wgpu::TextureFormat::Rgba8Unorm,
2165                        view_dimension: wgpu::TextureViewDimension::D2,
2166                    },
2167                    count: None,
2168                },
2169                wgpu::BindGroupLayoutEntry {
2170                    binding: 9,
2171                    visibility: wgpu::ShaderStages::COMPUTE,
2172                    ty: wgpu::BindingType::Texture {
2173                        sample_type: wgpu::TextureSampleType::Float { filterable: true },
2174                        view_dimension: wgpu::TextureViewDimension::D2,
2175                        multisampled: false,
2176                    },
2177                    count: None,
2178                },
2179                wgpu::BindGroupLayoutEntry {
2180                    binding: 10,
2181                    visibility: wgpu::ShaderStages::COMPUTE,
2182                    ty: wgpu::BindingType::Sampler(wgpu::SamplerBindingType::Filtering),
2183                    count: None,
2184                },
2185                bgl_storage_entry(11, false),
2186                bgl_storage_entry(12, true),
2187                bgl_storage_entry(13, true),
2188                bgl_storage_entry(14, true),
2189            ],
2190        });
2191        let pl = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
2192            label: Some("roxlap-gpu headless.layout"),
2193            bind_group_layouts: &[&bgl],
2194            push_constant_ranges: &[],
2195        });
2196        let pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
2197            label: Some("roxlap-gpu headless.pipeline"),
2198            layout: Some(&pl),
2199            module: &shader,
2200            entry_point: "render_scene",
2201            compilation_options: wgpu::PipelineCompilationOptions::default(),
2202            cache: None,
2203        });
2204
2205        // Readback buffer: row pitch must be 256-aligned for
2206        // copy_texture_to_buffer.
2207        let padded_bytes_per_row = (width * 4).div_ceil(256) * 256;
2208        let readback = device.create_buffer(&wgpu::BufferDescriptor {
2209            label: Some("roxlap-gpu headless.readback"),
2210            size: u64::from(padded_bytes_per_row) * u64::from(height),
2211            usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
2212            mapped_at_creation: false,
2213        });
2214
2215        Self {
2216            width,
2217            height,
2218            output_tex,
2219            output_view,
2220            depth_buffer,
2221            uniform_buf,
2222            _sky_texture: sky_texture,
2223            sky_view,
2224            sky_sampler,
2225            bgl,
2226            pipeline,
2227            readback,
2228            padded_bytes_per_row,
2229        }
2230    }
2231
2232    /// Render `scene` from `cameras` (one per grid) and read the
2233    /// framebuffer back as `width*height` packed `0xAABBGGRR` pixels
2234    /// (R in the low byte). Fog is disabled. `mip_scan_dist` drives
2235    /// the GPU.11.1 scene-grid LOD (`0` = always mip-0). Blocks on
2236    /// readback.
2237    ///
2238    /// # Panics
2239    /// If `cameras.len() != scene.grid_count`.
2240    #[must_use]
2241    #[allow(clippy::too_many_arguments)]
2242    pub fn render(
2243        &self,
2244        device: &wgpu::Device,
2245        queue: &wgpu::Queue,
2246        scene: &GpuSceneResident,
2247        cameras: &[Camera],
2248        fov_y_rad: f32,
2249        max_outer_steps: u32,
2250        mip_scan_dist: f32,
2251    ) -> Vec<u32> {
2252        assert_eq!(
2253            cameras.len(),
2254            scene.grid_count as usize,
2255            "headless render: {} cameras for {} grids",
2256            cameras.len(),
2257            scene.grid_count,
2258        );
2259
2260        let mut cam_array = [SceneDdaPerGridCamera::zeroed(); SCENE_MAX_GRIDS];
2261        for (i, cam) in cameras.iter().enumerate() {
2262            cam_array[i] = SceneDdaPerGridCamera {
2263                pos: cam.position,
2264                _pad0: 0.0,
2265                right: cam.right,
2266                _pad1: 0.0,
2267                down: cam.down,
2268                _pad2: 0.0,
2269                forward: cam.forward,
2270                _pad3: 0.0,
2271            };
2272        }
2273        let uniform = SceneDdaUniform {
2274            fov_y_rad,
2275            grid_count: scene.grid_count,
2276            max_outer_steps,
2277            _pad0: 0,
2278            screen_size: [self.width, self.height],
2279            _pad1: [0; 2],
2280            cameras: cam_array,
2281            // Fog off: near/far past any reachable t → factor 0.
2282            fog_color: [0.0, 0.0, 0.0, 1.0e29],
2283            fog_far: 1.0e30,
2284            write_depth: 0,
2285            occ_page_words: scene.occupancy_page_words,
2286            occ_num_pages: scene.occupancy_num_pages,
2287            mip_scan_dist,
2288            _pad2: 0,
2289            _pad3: 0,
2290            _pad4: 0,
2291        };
2292        queue.write_buffer(&self.uniform_buf, 0, bytemuck::bytes_of(&uniform));
2293
2294        let bg = device.create_bind_group(&wgpu::BindGroupDescriptor {
2295            label: Some("roxlap-gpu headless.bg"),
2296            layout: &self.bgl,
2297            entries: &[
2298                wgpu::BindGroupEntry {
2299                    binding: 0,
2300                    resource: self.uniform_buf.as_entire_binding(),
2301                },
2302                wgpu::BindGroupEntry {
2303                    binding: 1,
2304                    resource: scene.occupancy_pages[0].as_entire_binding(),
2305                },
2306                wgpu::BindGroupEntry {
2307                    binding: 2,
2308                    resource: scene.all_color_offsets.as_entire_binding(),
2309                },
2310                wgpu::BindGroupEntry {
2311                    binding: 3,
2312                    resource: scene.all_colors.as_entire_binding(),
2313                },
2314                wgpu::BindGroupEntry {
2315                    binding: 4,
2316                    resource: scene.all_chunk_colors_base.as_entire_binding(),
2317                },
2318                wgpu::BindGroupEntry {
2319                    binding: 5,
2320                    resource: scene.all_chunk_occupancy.as_entire_binding(),
2321                },
2322                wgpu::BindGroupEntry {
2323                    binding: 6,
2324                    resource: scene.grid_static_meta.as_entire_binding(),
2325                },
2326                wgpu::BindGroupEntry {
2327                    binding: 7,
2328                    resource: scene.all_slot_chunk_idx.as_entire_binding(),
2329                },
2330                wgpu::BindGroupEntry {
2331                    binding: 8,
2332                    resource: wgpu::BindingResource::TextureView(&self.output_view),
2333                },
2334                wgpu::BindGroupEntry {
2335                    binding: 9,
2336                    resource: wgpu::BindingResource::TextureView(&self.sky_view),
2337                },
2338                wgpu::BindGroupEntry {
2339                    binding: 10,
2340                    resource: wgpu::BindingResource::Sampler(&self.sky_sampler),
2341                },
2342                wgpu::BindGroupEntry {
2343                    binding: 11,
2344                    resource: self.depth_buffer.as_entire_binding(),
2345                },
2346                wgpu::BindGroupEntry {
2347                    binding: 12,
2348                    resource: scene.occupancy_pages[1].as_entire_binding(),
2349                },
2350                wgpu::BindGroupEntry {
2351                    binding: 13,
2352                    resource: scene.occupancy_pages[2].as_entire_binding(),
2353                },
2354                wgpu::BindGroupEntry {
2355                    binding: 14,
2356                    resource: scene.occupancy_pages[3].as_entire_binding(),
2357                },
2358            ],
2359        });
2360
2361        let mut enc =
2362            device.create_command_encoder(&wgpu::CommandEncoderDescriptor { label: None });
2363        {
2364            let mut pass = enc.begin_compute_pass(&wgpu::ComputePassDescriptor {
2365                label: Some("roxlap-gpu headless.pass"),
2366                timestamp_writes: None,
2367            });
2368            pass.set_pipeline(&self.pipeline);
2369            pass.set_bind_group(0, &bg, &[]);
2370            pass.dispatch_workgroups(self.width.div_ceil(8), self.height.div_ceil(8), 1);
2371        }
2372        enc.copy_texture_to_buffer(
2373            wgpu::ImageCopyTexture {
2374                texture: &self.output_tex,
2375                mip_level: 0,
2376                origin: wgpu::Origin3d::ZERO,
2377                aspect: wgpu::TextureAspect::All,
2378            },
2379            wgpu::ImageCopyBuffer {
2380                buffer: &self.readback,
2381                layout: wgpu::ImageDataLayout {
2382                    offset: 0,
2383                    bytes_per_row: Some(self.padded_bytes_per_row),
2384                    rows_per_image: Some(self.height),
2385                },
2386            },
2387            wgpu::Extent3d {
2388                width: self.width,
2389                height: self.height,
2390                depth_or_array_layers: 1,
2391            },
2392        );
2393        queue.submit(Some(enc.finish()));
2394
2395        let slice = self.readback.slice(..);
2396        let (tx, rx) = std::sync::mpsc::channel();
2397        slice.map_async(wgpu::MapMode::Read, move |r| {
2398            let _ = tx.send(r);
2399        });
2400        device.poll(wgpu::Maintain::Wait);
2401        rx.recv().expect("map_async channel").expect("map_async");
2402
2403        let data = slice.get_mapped_range();
2404        let mut out = Vec::with_capacity((self.width * self.height) as usize);
2405        let pitch = self.padded_bytes_per_row as usize;
2406        for y in 0..self.height as usize {
2407            let row = &data[y * pitch..y * pitch + self.width as usize * 4];
2408            for px in row.chunks_exact(4) {
2409                out.push(
2410                    u32::from(px[0])
2411                        | (u32::from(px[1]) << 8)
2412                        | (u32::from(px[2]) << 16)
2413                        | (u32::from(px[3]) << 24),
2414                );
2415            }
2416        }
2417        drop(data);
2418        self.readback.unmap();
2419        out
2420    }
2421}
2422
2423fn bgl_uniform_entry(binding: u32) -> wgpu::BindGroupLayoutEntry {
2424    wgpu::BindGroupLayoutEntry {
2425        binding,
2426        visibility: wgpu::ShaderStages::COMPUTE,
2427        ty: wgpu::BindingType::Buffer {
2428            ty: wgpu::BufferBindingType::Uniform,
2429            has_dynamic_offset: false,
2430            min_binding_size: None,
2431        },
2432        count: None,
2433    }
2434}
2435
2436fn bgl_storage_entry(binding: u32, read_only: bool) -> wgpu::BindGroupLayoutEntry {
2437    wgpu::BindGroupLayoutEntry {
2438        binding,
2439        visibility: wgpu::ShaderStages::COMPUTE,
2440        ty: wgpu::BindingType::Buffer {
2441            ty: wgpu::BufferBindingType::Storage { read_only },
2442            has_dynamic_offset: false,
2443            min_binding_size: None,
2444        },
2445        count: None,
2446    }
2447}
2448
2449/// Create a fresh sky panorama texture sized `width × height` with
2450/// the initial pixel data uploaded via `write_texture`. Used by
2451/// `GpuRenderer::new` (1×1 default) and `set_sky_panorama` (host-
2452/// supplied panorama).
2453fn create_sky_texture(
2454    device: &wgpu::Device,
2455    width: u32,
2456    height: u32,
2457    _initial_pixels: &[u8],
2458) -> (wgpu::Texture, wgpu::TextureView) {
2459    let tex = device.create_texture(&wgpu::TextureDescriptor {
2460        label: Some("roxlap-gpu sky_texture"),
2461        size: wgpu::Extent3d {
2462            width,
2463            height,
2464            depth_or_array_layers: 1,
2465        },
2466        mip_level_count: 1,
2467        sample_count: 1,
2468        dimension: wgpu::TextureDimension::D2,
2469        format: wgpu::TextureFormat::Rgba8Unorm,
2470        usage: wgpu::TextureUsages::TEXTURE_BINDING | wgpu::TextureUsages::COPY_DST,
2471        view_formats: &[],
2472    });
2473    let view = tex.create_view(&wgpu::TextureViewDescriptor::default());
2474    (tex, view)
2475}
2476
2477/// GPU.4 needs to upload a whole grid (~hundreds of MiB) as a few
2478/// storage buffers. wgpu's default `max_storage_buffer_binding_size`
2479/// is 128 MiB, which is just enough for the demo's 32×32 ground
2480/// occupancy (~128 MiB) but not the colour array. We request as
2481/// much as the adapter is willing to give — most desktop GPUs cap
2482/// individual storage buffers at 2-4 GiB; iGPUs often offer the
2483/// full system memory.
2484pub(crate) fn pick_required_limits(adapter_limits: &wgpu::Limits) -> wgpu::Limits {
2485    wgpu::Limits {
2486        max_storage_buffer_binding_size: adapter_limits.max_storage_buffer_binding_size,
2487        max_buffer_size: adapter_limits.max_buffer_size,
2488        // Occupancy paging adds up to MAX_OCC_PAGES-1 extra storage
2489        // bindings; with the scene's other buffers + the GPU.9 depth
2490        // buffer the scene_dda stage needs ~11. The default cap is 8.
2491        // Both NVK and lavapipe advertise ≫16, so request 16.
2492        max_storage_buffers_per_shader_stage: adapter_limits
2493            .max_storage_buffers_per_shader_stage
2494            .min(16),
2495        ..wgpu::Limits::default()
2496    }
2497}
2498
2499fn pick_present_mode(modes: &[wgpu::PresentMode]) -> wgpu::PresentMode {
2500    // Prefer Mailbox > Immediate > Fifo. Fifo is the universal
2501    // fallback and the only one Wayland-on-Mesa always offers.
2502    for &m in &[wgpu::PresentMode::Mailbox, wgpu::PresentMode::Immediate] {
2503        if modes.contains(&m) {
2504            return m;
2505        }
2506    }
2507    wgpu::PresentMode::Fifo
2508}
2509
2510/// World-space view-ray direction (un-normalised) for window pixel
2511/// `(x, y)` under a vertical-FOV pinhole — the projection
2512/// `scene_dda.wgsl`'s `render_scene` uses. Shared by
2513/// [`GpuRenderer::pixel_ray`]; standalone so it's unit-testable without
2514/// a device. `right`/`down`/`forward` are the camera basis.
2515#[must_use]
2516#[allow(clippy::too_many_arguments)]
2517pub fn pinhole_pixel_ray(
2518    right: [f64; 3],
2519    down: [f64; 3],
2520    forward: [f64; 3],
2521    x: f64,
2522    y: f64,
2523    w: f64,
2524    h: f64,
2525    fov_y_rad: f64,
2526) -> [f64; 3] {
2527    let half_h = (fov_y_rad * 0.5).tan();
2528    let half_w = half_h * (w / h);
2529    let ndc_x = (x + 0.5) / w * 2.0 - 1.0;
2530    let ndc_y_top = 1.0 - (y + 0.5) / h * 2.0;
2531    let (kx, ky) = (ndc_x * half_w, ndc_y_top * half_h);
2532    [
2533        forward[0] + kx * right[0] - ky * down[0],
2534        forward[1] + kx * right[1] - ky * down[1],
2535        forward[2] + kx * right[2] - ky * down[2],
2536    ]
2537}
2538
2539#[cfg(test)]
2540mod pixel_ray_tests {
2541    use super::pinhole_pixel_ray;
2542
2543    const RIGHT: [f64; 3] = [1.0, 0.0, 0.0];
2544    const DOWN: [f64; 3] = [0.0, 1.0, 0.0];
2545    const FWD: [f64; 3] = [0.0, 0.0, 1.0]; // voxlap z-down "look down"
2546
2547    // Frame centre (NDC 0,0) points straight along `forward`.
2548    #[test]
2549    fn centre_pixel_is_forward() {
2550        let d = pinhole_pixel_ray(
2551            RIGHT,
2552            DOWN,
2553            FWD,
2554            639.5,
2555            359.5,
2556            1280.0,
2557            720.0,
2558            60_f64.to_radians(),
2559        );
2560        assert!(
2561            d[0].abs() < 1e-9 && d[1].abs() < 1e-9,
2562            "centre ≈ forward, got {d:?}"
2563        );
2564        assert!((d[2] - 1.0).abs() < 1e-9);
2565    }
2566
2567    // Right edge pixel tilts +right by tan(hfov/2); the lateral
2568    // component equals half_w = tan(fov_y/2)*aspect at the very edge.
2569    #[test]
2570    fn right_edge_tilts_by_half_w() {
2571        let fov = 60_f64.to_radians();
2572        let d = pinhole_pixel_ray(RIGHT, DOWN, FWD, 1279.5, 359.5, 1280.0, 720.0, fov);
2573        let half_w = (fov * 0.5).tan() * (1280.0 / 720.0);
2574        assert!((d[0] - half_w).abs() < 1e-6, "x={}, half_w={half_w}", d[0]);
2575        assert!(d[0] > 0.0, "right edge tilts +right");
2576    }
2577}