Skip to main content

roxlap_gpu/
resident.rs

1//! GPU.2 — chunk-resident storage buffers + debug read-back.
2//!
3//! Uploads a [`ChunkUpload`] as three storage buffers (occupancy,
4//! per-column colour offsets, packed colour array) on a wgpu
5//! device. The [`GpuChunkResident::read_voxel_blocking`] helper
6//! dispatches the `debug_read.wgsl` shader to extract a single
7//! voxel's colour via map-async readback, validating the round trip
8//! demanded by `PORTING-GPU.md` §GPU.2.
9
10#![allow(clippy::too_many_lines, clippy::missing_panics_doc)]
11
12use std::num::NonZeroU64;
13
14use bytemuck::{Pod, Zeroable};
15use wgpu::util::DeviceExt;
16
17use crate::decompress::{ChunkUpload, CHUNK_Z};
18
19/// Uniform handed to `debug_read.wgsl` — the voxel coordinate to
20/// probe plus the chunk extents the shader needs to index occupancy.
21#[repr(C)]
22#[derive(Clone, Copy, Pod, Zeroable)]
23struct ProbeUniform {
24    coord: [u32; 3],
25    vsid: u32,
26    chunk_z: u32,
27    _pad: [u32; 3],
28}
29
30/// GPU-side storage for one decompressed chunk. Owns its buffers;
31/// dropping releases them.
32pub struct GpuChunkResident {
33    pub vsid: u32,
34    pub occupancy: wgpu::Buffer,
35    pub color_offsets: wgpu::Buffer,
36    pub colors: wgpu::Buffer,
37    pub occupancy_bytes: u64,
38    pub color_offsets_bytes: u64,
39    pub colors_bytes: u64,
40
41    // Debug-read scaffolding. In GPU.3+ the main render shader
42    // consumes the storage buffers directly; for GPU.2 these are
43    // the only consumer.
44    probe_uniform: wgpu::Buffer,
45    probe_output: wgpu::Buffer,
46    probe_readback: wgpu::Buffer,
47    probe_bg: wgpu::BindGroup,
48    probe_pipeline: wgpu::ComputePipeline,
49}
50
51impl GpuChunkResident {
52    /// Upload `chunk` to `device`. Single-shot allocation; no
53    /// streaming machinery yet — that arrives in GPU.6 / GPU.7.
54    pub fn upload(device: &wgpu::Device, chunk: &ChunkUpload) -> Self {
55        let occupancy = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
56            label: Some("roxlap-gpu chunk.occupancy"),
57            contents: bytemuck::cast_slice(&chunk.occupancy),
58            usage: wgpu::BufferUsages::STORAGE,
59        });
60        let color_offsets = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
61            label: Some("roxlap-gpu chunk.color_offsets"),
62            contents: bytemuck::cast_slice(&chunk.color_offsets),
63            usage: wgpu::BufferUsages::STORAGE,
64        });
65        let colors = device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
66            label: Some("roxlap-gpu chunk.colors"),
67            contents: bytemuck::cast_slice(&chunk.colors),
68            usage: wgpu::BufferUsages::STORAGE,
69        });
70
71        let occupancy_bytes = (chunk.occupancy.len() * 4) as u64;
72        let color_offsets_bytes = (chunk.color_offsets.len() * 4) as u64;
73        let colors_bytes = (chunk.colors.len() * 4) as u64;
74
75        // Debug-read scaffolding ----------------------------------------------
76        let probe_uniform = device.create_buffer(&wgpu::BufferDescriptor {
77            label: Some("roxlap-gpu chunk.probe_uniform"),
78            size: std::mem::size_of::<ProbeUniform>() as u64,
79            usage: wgpu::BufferUsages::UNIFORM | wgpu::BufferUsages::COPY_DST,
80            mapped_at_creation: false,
81        });
82        let probe_output = device.create_buffer(&wgpu::BufferDescriptor {
83            label: Some("roxlap-gpu chunk.probe_output"),
84            size: 4,
85            usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_SRC,
86            mapped_at_creation: false,
87        });
88        let probe_readback = device.create_buffer(&wgpu::BufferDescriptor {
89            label: Some("roxlap-gpu chunk.probe_readback"),
90            size: 4,
91            usage: wgpu::BufferUsages::MAP_READ | wgpu::BufferUsages::COPY_DST,
92            mapped_at_creation: false,
93        });
94
95        let shader = device.create_shader_module(wgpu::ShaderModuleDescriptor {
96            label: Some("debug_read.wgsl"),
97            source: wgpu::ShaderSource::Wgsl(include_str!("../shaders/debug_read.wgsl").into()),
98        });
99
100        let bgl = device.create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
101            label: Some("roxlap-gpu chunk.probe_bgl"),
102            entries: &[
103                bgl_uniform_entry(0),
104                bgl_storage_entry(1, true),
105                bgl_storage_entry(2, true),
106                bgl_storage_entry(3, true),
107                bgl_storage_entry(4, false),
108            ],
109        });
110        let pl = device.create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
111            label: Some("roxlap-gpu chunk.probe_layout"),
112            bind_group_layouts: &[Some(&bgl)],
113            immediate_size: 0,
114        });
115        let pipeline = device.create_compute_pipeline(&wgpu::ComputePipelineDescriptor {
116            label: Some("roxlap-gpu chunk.probe_pipeline"),
117            layout: Some(&pl),
118            module: &shader,
119            entry_point: Some("debug_read"),
120            compilation_options: wgpu::PipelineCompilationOptions::default(),
121            cache: None,
122        });
123        let bg = device.create_bind_group(&wgpu::BindGroupDescriptor {
124            label: Some("roxlap-gpu chunk.probe_bg"),
125            layout: &bgl,
126            entries: &[
127                wgpu::BindGroupEntry {
128                    binding: 0,
129                    resource: probe_uniform.as_entire_binding(),
130                },
131                wgpu::BindGroupEntry {
132                    binding: 1,
133                    resource: occupancy.as_entire_binding(),
134                },
135                wgpu::BindGroupEntry {
136                    binding: 2,
137                    resource: color_offsets.as_entire_binding(),
138                },
139                wgpu::BindGroupEntry {
140                    binding: 3,
141                    resource: colors.as_entire_binding(),
142                },
143                wgpu::BindGroupEntry {
144                    binding: 4,
145                    resource: probe_output.as_entire_binding(),
146                },
147            ],
148        });
149
150        Self {
151            vsid: chunk.vsid,
152            occupancy,
153            color_offsets,
154            colors,
155            occupancy_bytes,
156            color_offsets_bytes,
157            colors_bytes,
158            probe_uniform,
159            probe_output,
160            probe_readback,
161            probe_bg: bg,
162            probe_pipeline: pipeline,
163        }
164    }
165
166    /// Total resident bytes (occupancy + offsets + colours) — for
167    /// the upload-time benchmark.
168    pub fn resident_bytes(&self) -> u64 {
169        self.occupancy_bytes + self.color_offsets_bytes + self.colors_bytes
170    }
171
172    /// Round-trip read of a single voxel via the debug shader.
173    /// Returns `Some(rgb)` for a solid voxel (textured or bedrock),
174    /// `None` for empty / out-of-bounds.
175    ///
176    /// Blocks until the GPU finishes; not intended for the render
177    /// hot path. The GPU.2 validation test is the only caller (native).
178    pub fn read_voxel_blocking(
179        &self,
180        device: &wgpu::Device,
181        queue: &wgpu::Queue,
182        x: u32,
183        y: u32,
184        z: u32,
185    ) -> Option<u32> {
186        let uniform = ProbeUniform {
187            coord: [x, y, z],
188            vsid: self.vsid,
189            chunk_z: CHUNK_Z,
190            _pad: [0; 3],
191        };
192        queue.write_buffer(&self.probe_uniform, 0, bytemuck::bytes_of(&uniform));
193
194        let mut encoder = device.create_command_encoder(&wgpu::CommandEncoderDescriptor {
195            label: Some("roxlap-gpu chunk.read_voxel"),
196        });
197        {
198            let mut cpass = encoder.begin_compute_pass(&wgpu::ComputePassDescriptor {
199                label: Some("roxlap-gpu chunk.debug_read"),
200                timestamp_writes: None,
201            });
202            cpass.set_pipeline(&self.probe_pipeline);
203            cpass.set_bind_group(0, &self.probe_bg, &[]);
204            cpass.dispatch_workgroups(1, 1, 1);
205        }
206        encoder.copy_buffer_to_buffer(&self.probe_output, 0, &self.probe_readback, 0, 4);
207        queue.submit(std::iter::once(encoder.finish()));
208
209        // Map the readback buffer. wgpu's map_async runs the
210        // callback when the device.poll(Wait) services it; pollster
211        // turns the resulting future into a blocking wait.
212        let slice = self.probe_readback.slice(..);
213        let (tx, rx) = std::sync::mpsc::channel();
214        slice.map_async(wgpu::MapMode::Read, move |res| {
215            tx.send(res).expect("send map result");
216        });
217        device.poll(wgpu::PollType::wait_indefinitely()).ok();
218        rx.recv()
219            .expect("recv map result")
220            .expect("map_async returned an error");
221
222        let bytes = slice.get_mapped_range();
223        let value = u32::from_le_bytes(bytes[..4].try_into().expect("4 bytes"));
224        drop(bytes);
225        self.probe_readback.unmap();
226
227        if value == 0 {
228            None
229        } else {
230            Some(value)
231        }
232    }
233}
234
235fn bgl_uniform_entry(binding: u32) -> wgpu::BindGroupLayoutEntry {
236    wgpu::BindGroupLayoutEntry {
237        binding,
238        visibility: wgpu::ShaderStages::COMPUTE,
239        ty: wgpu::BindingType::Buffer {
240            ty: wgpu::BufferBindingType::Uniform,
241            has_dynamic_offset: false,
242            min_binding_size: NonZeroU64::new(std::mem::size_of::<ProbeUniform>() as u64),
243        },
244        count: None,
245    }
246}
247
248fn bgl_storage_entry(binding: u32, read_only: bool) -> wgpu::BindGroupLayoutEntry {
249    wgpu::BindGroupLayoutEntry {
250        binding,
251        visibility: wgpu::ShaderStages::COMPUTE,
252        ty: wgpu::BindingType::Buffer {
253            ty: wgpu::BufferBindingType::Storage { read_only },
254            has_dynamic_offset: false,
255            min_binding_size: None,
256        },
257        count: None,
258    }
259}