roxlap_gpu/grid.rs
1//! GPU.4 — grid-of-chunks upload + storage layout.
2//!
3//! Concatenates every chunk of one `roxlap-scene::Grid` into a few
4//! flat storage buffers so a single compute dispatch can outer-DDA
5//! through chunk-space + inner-DDA into any chunk it hits.
6//!
7#![allow(
8 clippy::cast_sign_loss,
9 clippy::cast_lossless,
10 clippy::doc_markdown,
11 clippy::field_reassign_with_default
12)]
13
14//! Memory layout (post-bedrock-strip):
15//!
16//! * `occupancy[meta_idx]` — one chunk's 128 KiB occupancy slice
17//! starts at `meta_idx * vsid² * OCC_WORDS_PER_COLUMN` u32 words.
18//! Uniform per chunk (all chunks are vsid² × CHUNK_Z voxels).
19//! * `color_offsets[meta_idx]` — one chunk's `vsid² + 1` u32
20//! offsets start at `meta_idx * (vsid² + 1)` u32 words. Uniform
21//! per chunk.
22//! * `colors` — variable per chunk. Per-chunk base index lives in
23//! `chunk_colors_base[meta_idx]`.
24//! * `chunk_occupancy` — 1 bit per chunk position. Bit at
25//! `meta_idx` set iff that chunk has any textured voxels. The
26//! outer DDA uses this to skip empty chunks in one step.
27//!
28//! The `meta_idx` for a chunk at `(chx, chy, chz)` is its row-major
29//! offset within the grid's `chunks_dims` bounding box:
30//!
31//! ```text
32//! rel = chunk_idx - origin_chunk
33//! meta_idx = rel.x + rel.y * chunks_dims.x + rel.z * chunks_dims.x * chunks_dims.y
34//! ```
35
36use wgpu::util::DeviceExt;
37
38use crate::decompress::{ChunkUpload, CHUNK_Z, OCC_WORDS_PER_COLUMN};
39
40/// CPU-side aggregation of a grid's chunks ready to upload. Host
41/// (e.g. `roxlap-scene-demo`) builds this by iterating its
42/// `roxlap-scene::Grid` and calling [`crate::decompress_chunk`] per
43/// materialised chunk.
44pub struct GridUpload {
45 /// Shared XY extent of every chunk in voxels. Matches
46 /// `roxlap-scene::CHUNK_SIZE_XY = 128`.
47 pub vsid: u32,
48 /// Lowest chunk index present in the grid `(min_chx, min_chy,
49 /// min_chz)`. The grid's bounding box runs from `origin_chunk`
50 /// to `origin_chunk + chunks_dims` exclusive.
51 pub origin_chunk: [i32; 3],
52 /// Chunk-count along each axis = `max - min + 1`.
53 pub chunks_dims: [u32; 3],
54 /// GPU.7 slot-pool dimensions for modular chunk indexing.
55 /// Every component MUST be a power of 2. A chunk at index
56 /// `(chx, chy, chz)` maps to slot
57 /// `(chx & (pool_dims.x - 1), chy & (pool_dims.y - 1),
58 /// chz & (pool_dims.z - 1))`. As long as
59 /// `pool_dims_axis ≥ active_range_along_axis`, no two
60 /// simultaneously-resident chunks collide. Set this larger than
61 /// `chunks_dims` only when streaming may install chunks at
62 /// indices outside the initial bbox.
63 pub pool_dims: [u32; 3],
64 /// `(chunk_idx, decompressed)` pairs. Chunks outside the
65 /// pool's collision-free active range are still accepted —
66 /// modular indexing will assign them slots; the caller is
67 /// responsible for avoiding collisions with other resident
68 /// chunks.
69 pub chunks: Vec<([i32; 3], ChunkUpload)>,
70}
71
72impl GridUpload {
73 #[must_use]
74 pub fn total_chunks(&self) -> u32 {
75 self.chunks_dims[0] * self.chunks_dims[1] * self.chunks_dims[2]
76 }
77
78 /// Default GPU.7 [`Self::pool_dims`] derived from
79 /// `chunks_dims` — each axis rounded up to the next power of 2.
80 /// Use this when the grid is static + slots map 1:1 to bbox
81 /// positions; for streaming grids, callers should pick a
82 /// larger pool that covers `2 × r_active_chunks + 1` along
83 /// each axis.
84 #[must_use]
85 pub fn default_pool_dims(chunks_dims: [u32; 3]) -> [u32; 3] {
86 [
87 ceil_pow2(chunks_dims[0]),
88 ceil_pow2(chunks_dims[1]),
89 ceil_pow2(chunks_dims[2]),
90 ]
91 }
92
93 /// Linear chunk index `(meta_idx)` for `(chx, chy, chz)` in the
94 /// grid's row-major bounding-box order. `None` if the index is
95 /// outside the grid.
96 #[must_use]
97 pub fn meta_idx_of(&self, chunk_idx: [i32; 3]) -> Option<u32> {
98 let dx = chunk_idx[0] - self.origin_chunk[0];
99 let dy = chunk_idx[1] - self.origin_chunk[1];
100 let dz = chunk_idx[2] - self.origin_chunk[2];
101 if dx < 0
102 || dy < 0
103 || dz < 0
104 || (dx as u32) >= self.chunks_dims[0]
105 || (dy as u32) >= self.chunks_dims[1]
106 || (dz as u32) >= self.chunks_dims[2]
107 {
108 return None;
109 }
110 Some(
111 (dx as u32)
112 + (dy as u32) * self.chunks_dims[0]
113 + (dz as u32) * self.chunks_dims[0] * self.chunks_dims[1],
114 )
115 }
116}
117
118/// GPU-resident storage for one grid's chunks. Lives until the
119/// host drops it; in GPU.6 (edit invalidation) we'll re-upload
120/// individual chunks via partial buffer writes.
121pub struct GpuGridResident {
122 pub vsid: u32,
123 pub origin_chunk: [i32; 3],
124 pub chunks_dims: [u32; 3],
125 pub total_chunks: u32,
126 pub occupancy: wgpu::Buffer,
127 pub color_offsets: wgpu::Buffer,
128 pub colors: wgpu::Buffer,
129 pub chunk_colors_base: wgpu::Buffer,
130 pub chunk_occupancy: wgpu::Buffer,
131 pub occupancy_bytes: u64,
132 pub color_offsets_bytes: u64,
133 pub colors_bytes: u64,
134}
135
136impl GpuGridResident {
137 /// Pack + upload `info`. All buffers are sized to fit
138 /// `total_chunks` regardless of which chunks are actually
139 /// present in `info.chunks` — missing chunks have their
140 /// `chunk_occupancy` bit clear and their per-chunk slices
141 /// zero-filled, which the GPU.4 marcher reads as "no voxels
142 /// here, skip".
143 ///
144 /// # Panics
145 /// If a chunk's `vsid` doesn't match `info.vsid`.
146 pub fn upload(device: &wgpu::Device, info: &GridUpload) -> Self {
147 let vsid = info.vsid;
148 let vsid_usize = vsid as usize;
149 let cols_per_chunk = vsid_usize * vsid_usize;
150 let occ_words_per_chunk = cols_per_chunk * (OCC_WORDS_PER_COLUMN as usize);
151 let offsets_words_per_chunk = cols_per_chunk + 1;
152
153 let total_chunks = info.total_chunks();
154 let total_chunks_usize = total_chunks as usize;
155
156 let mut occupancy = vec![0u32; total_chunks_usize * occ_words_per_chunk];
157 let mut color_offsets = vec![0u32; total_chunks_usize * offsets_words_per_chunk];
158 let mut chunk_colors_base = vec![0u32; total_chunks_usize];
159 let mut chunk_occupancy = vec![0u32; total_chunks_usize.div_ceil(32)];
160 let mut colors: Vec<u32> = Vec::new();
161
162 let mut populated = 0u32;
163 for (chunk_idx, chunk) in &info.chunks {
164 let Some(meta_idx) = info.meta_idx_of(*chunk_idx) else {
165 continue;
166 };
167 assert_eq!(
168 chunk.vsid, vsid,
169 "GpuGridResident: chunk vsid {} disagrees with grid vsid {}",
170 chunk.vsid, vsid,
171 );
172 let meta_idx_us = meta_idx as usize;
173
174 // Per-chunk occupancy slice.
175 let occ_start = meta_idx_us * occ_words_per_chunk;
176 occupancy[occ_start..occ_start + occ_words_per_chunk].copy_from_slice(&chunk.occupancy);
177
178 // Per-chunk color_offsets slice. Rebase: the per-column
179 // offsets in `chunk.color_offsets` are local to this
180 // chunk's colours; the GPU shader adds
181 // `chunk_colors_base[meta_idx]` on the outside, so the
182 // values copy in verbatim.
183 let off_start = meta_idx_us * offsets_words_per_chunk;
184 color_offsets[off_start..off_start + offsets_words_per_chunk]
185 .copy_from_slice(&chunk.color_offsets);
186
187 // Append this chunk's colours; record the base.
188 chunk_colors_base[meta_idx_us] =
189 u32::try_from(colors.len()).expect("colours fit in u32");
190 colors.extend_from_slice(&chunk.colors);
191
192 // Mark chunk as non-empty iff it has any colour entries
193 // (every textured voxel writes one). Empty chunks (all
194 // air) skip cheaply in the outer DDA.
195 if !chunk.colors.is_empty() {
196 chunk_occupancy[meta_idx_us >> 5] |= 1u32 << (meta_idx_us & 31);
197 populated += 1;
198 }
199 }
200
201 // Storage buffers can't be empty — `colors` may be all-zero
202 // length when every chunk is air. Pad with one sentinel u32
203 // (never read because no chunk's `chunk_occupancy` bit is
204 // set in that case).
205 if colors.is_empty() {
206 colors.push(0);
207 }
208
209 let occupancy_buf = create_storage(device, "roxlap-gpu grid.occupancy", &occupancy);
210 let color_offsets_buf =
211 create_storage(device, "roxlap-gpu grid.color_offsets", &color_offsets);
212 let colors_buf = create_storage(device, "roxlap-gpu grid.colors", &colors);
213 let chunk_colors_base_buf = create_storage(
214 device,
215 "roxlap-gpu grid.chunk_colors_base",
216 &chunk_colors_base,
217 );
218 let chunk_occupancy_buf =
219 create_storage(device, "roxlap-gpu grid.chunk_occupancy", &chunk_occupancy);
220
221 let occupancy_bytes = (occupancy.len() * 4) as u64;
222 let color_offsets_bytes = (color_offsets.len() * 4) as u64;
223 let colors_bytes = (colors.len() * 4) as u64;
224 let _ = populated; // reserved for future telemetry
225
226 Self {
227 vsid,
228 origin_chunk: info.origin_chunk,
229 chunks_dims: info.chunks_dims,
230 total_chunks,
231 occupancy: occupancy_buf,
232 color_offsets: color_offsets_buf,
233 colors: colors_buf,
234 chunk_colors_base: chunk_colors_base_buf,
235 chunk_occupancy: chunk_occupancy_buf,
236 occupancy_bytes,
237 color_offsets_bytes,
238 colors_bytes,
239 }
240 }
241
242 /// Total resident bytes — sum of all five storage buffers.
243 /// Used by the demo's startup print.
244 pub fn resident_bytes(&self) -> u64 {
245 self.occupancy_bytes
246 + self.color_offsets_bytes
247 + self.colors_bytes
248 + (self.total_chunks as u64) * 4 // chunk_colors_base
249 + (u64::from(self.total_chunks).div_ceil(32)) * 4 // chunk_occupancy
250 }
251}
252
253fn create_storage(device: &wgpu::Device, label: &str, data: &[u32]) -> wgpu::Buffer {
254 device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
255 label: Some(label),
256 contents: bytemuck::cast_slice(data),
257 usage: wgpu::BufferUsages::STORAGE,
258 })
259}
260
261/// Round `n` up to the nearest power of 2. `0` and `1` both return
262/// `1`. Used to derive a GPU.7 [`GridUpload::pool_dims`] from a
263/// non-pow2 `chunks_dims`.
264#[must_use]
265pub fn ceil_pow2(n: u32) -> u32 {
266 if n <= 1 {
267 return 1;
268 }
269 1u32 << (32 - (n - 1).leading_zeros())
270}
271
272/// Compute the smallest bounding box that contains every
273/// `(chunk_idx, _)` in `chunks`. Returns `None` if `chunks` is
274/// empty.
275#[must_use]
276pub fn bounding_box_of(chunks: impl IntoIterator<Item = [i32; 3]>) -> Option<([i32; 3], [u32; 3])> {
277 let mut min = [i32::MAX; 3];
278 let mut max = [i32::MIN; 3];
279 let mut any = false;
280 for idx in chunks {
281 for i in 0..3 {
282 if idx[i] < min[i] {
283 min[i] = idx[i];
284 }
285 if idx[i] > max[i] {
286 max[i] = idx[i];
287 }
288 }
289 any = true;
290 }
291 if !any {
292 return None;
293 }
294 #[allow(clippy::cast_sign_loss)]
295 let dims = [
296 (max[0] - min[0] + 1) as u32,
297 (max[1] - min[1] + 1) as u32,
298 (max[2] - min[2] + 1) as u32,
299 ];
300 Some((min, dims))
301}
302
303/// Number of u32 words a single chunk's per-chunk occupancy slice
304/// occupies in the concatenated grid occupancy buffer. Useful for
305/// host-side memory budgeting.
306#[must_use]
307pub fn occ_words_per_chunk(vsid: u32) -> u32 {
308 vsid * vsid * OCC_WORDS_PER_COLUMN
309}
310
311/// Z-extent of every chunk — re-export of the `CHUNK_Z` constant
312/// so hosts can budget without pulling `crate::decompress` in.
313pub const GRID_CHUNK_Z: u32 = CHUNK_Z;