Skip to main content

roxlap_gpu/
grid.rs

1//! GPU.4 — grid-of-chunks upload + storage layout.
2//!
3//! Concatenates every chunk of one `roxlap-scene::Grid` into a few
4//! flat storage buffers so a single compute dispatch can outer-DDA
5//! through chunk-space + inner-DDA into any chunk it hits.
6//!
7#![allow(
8    clippy::cast_sign_loss,
9    clippy::cast_lossless,
10    clippy::doc_markdown,
11    clippy::field_reassign_with_default
12)]
13
14//! Memory layout (post-bedrock-strip):
15//!
16//! * `occupancy[meta_idx]` — one chunk's 128 KiB occupancy slice
17//!   starts at `meta_idx * vsid² * OCC_WORDS_PER_COLUMN` u32 words.
18//!   Uniform per chunk (all chunks are vsid² × CHUNK_Z voxels).
19//! * `color_offsets[meta_idx]` — one chunk's `vsid² + 1` u32
20//!   offsets start at `meta_idx * (vsid² + 1)` u32 words. Uniform
21//!   per chunk.
22//! * `colors` — variable per chunk. Per-chunk base index lives in
23//!   `chunk_colors_base[meta_idx]`.
24//! * `chunk_occupancy` — 1 bit per chunk position. Bit at
25//!   `meta_idx` set iff that chunk has any textured voxels. The
26//!   outer DDA uses this to skip empty chunks in one step.
27//!
28//! The `meta_idx` for a chunk at `(chx, chy, chz)` is its row-major
29//! offset within the grid's `chunks_dims` bounding box:
30//!
31//! ```text
32//! rel = chunk_idx - origin_chunk
33//! meta_idx = rel.x + rel.y * chunks_dims.x + rel.z * chunks_dims.x * chunks_dims.y
34//! ```
35
36use wgpu::util::DeviceExt;
37
38use crate::decompress::{ChunkUpload, CHUNK_Z, OCC_WORDS_PER_COLUMN};
39
40/// CPU-side aggregation of a grid's chunks ready to upload. Host
41/// (e.g. `roxlap-scene-demo`) builds this by iterating its
42/// `roxlap-scene::Grid` and calling [`crate::decompress_chunk`] per
43/// materialised chunk.
44pub struct GridUpload {
45    /// Shared XY extent of every chunk in voxels. Matches
46    /// `roxlap-scene::CHUNK_SIZE_XY = 128`.
47    pub vsid: u32,
48    /// Lowest chunk index present in the grid `(min_chx, min_chy,
49    /// min_chz)`. The grid's bounding box runs from `origin_chunk`
50    /// to `origin_chunk + chunks_dims` exclusive.
51    pub origin_chunk: [i32; 3],
52    /// Chunk-count along each axis = `max - min + 1`.
53    pub chunks_dims: [u32; 3],
54    /// GPU.7 slot-pool dimensions for modular chunk indexing.
55    /// Every component MUST be a power of 2. A chunk at index
56    /// `(chx, chy, chz)` maps to slot
57    /// `(chx & (pool_dims.x - 1), chy & (pool_dims.y - 1),
58    /// chz & (pool_dims.z - 1))`. As long as
59    /// `pool_dims_axis ≥ active_range_along_axis`, no two
60    /// simultaneously-resident chunks collide. Set this larger than
61    /// `chunks_dims` only when streaming may install chunks at
62    /// indices outside the initial bbox.
63    pub pool_dims: [u32; 3],
64    /// `(chunk_idx, decompressed)` pairs. Chunks outside the
65    /// pool's collision-free active range are still accepted —
66    /// modular indexing will assign them slots; the caller is
67    /// responsible for avoiding collisions with other resident
68    /// chunks.
69    pub chunks: Vec<([i32; 3], ChunkUpload)>,
70}
71
72impl GridUpload {
73    #[must_use]
74    pub fn total_chunks(&self) -> u32 {
75        self.chunks_dims[0] * self.chunks_dims[1] * self.chunks_dims[2]
76    }
77
78    /// Default GPU.7 [`Self::pool_dims`] derived from
79    /// `chunks_dims` — each axis rounded up to the next power of 2.
80    /// Use this when the grid is static + slots map 1:1 to bbox
81    /// positions; for streaming grids, callers should pick a
82    /// larger pool that covers `2 × r_active_chunks + 1` along
83    /// each axis.
84    #[must_use]
85    pub fn default_pool_dims(chunks_dims: [u32; 3]) -> [u32; 3] {
86        [
87            ceil_pow2(chunks_dims[0]),
88            ceil_pow2(chunks_dims[1]),
89            ceil_pow2(chunks_dims[2]),
90        ]
91    }
92
93    /// Linear chunk index `(meta_idx)` for `(chx, chy, chz)` in the
94    /// grid's row-major bounding-box order. `None` if the index is
95    /// outside the grid.
96    #[must_use]
97    pub fn meta_idx_of(&self, chunk_idx: [i32; 3]) -> Option<u32> {
98        let dx = chunk_idx[0] - self.origin_chunk[0];
99        let dy = chunk_idx[1] - self.origin_chunk[1];
100        let dz = chunk_idx[2] - self.origin_chunk[2];
101        if dx < 0
102            || dy < 0
103            || dz < 0
104            || (dx as u32) >= self.chunks_dims[0]
105            || (dy as u32) >= self.chunks_dims[1]
106            || (dz as u32) >= self.chunks_dims[2]
107        {
108            return None;
109        }
110        Some(
111            (dx as u32)
112                + (dy as u32) * self.chunks_dims[0]
113                + (dz as u32) * self.chunks_dims[0] * self.chunks_dims[1],
114        )
115    }
116}
117
118/// GPU-resident storage for one grid's chunks. Lives until the
119/// host drops it; in GPU.6 (edit invalidation) we'll re-upload
120/// individual chunks via partial buffer writes.
121pub struct GpuGridResident {
122    pub vsid: u32,
123    pub origin_chunk: [i32; 3],
124    pub chunks_dims: [u32; 3],
125    pub total_chunks: u32,
126    pub occupancy: wgpu::Buffer,
127    pub color_offsets: wgpu::Buffer,
128    pub colors: wgpu::Buffer,
129    pub chunk_colors_base: wgpu::Buffer,
130    pub chunk_occupancy: wgpu::Buffer,
131    pub occupancy_bytes: u64,
132    pub color_offsets_bytes: u64,
133    pub colors_bytes: u64,
134}
135
136impl GpuGridResident {
137    /// Pack + upload `info`. All buffers are sized to fit
138    /// `total_chunks` regardless of which chunks are actually
139    /// present in `info.chunks` — missing chunks have their
140    /// `chunk_occupancy` bit clear and their per-chunk slices
141    /// zero-filled, which the GPU.4 marcher reads as "no voxels
142    /// here, skip".
143    ///
144    /// # Panics
145    /// If a chunk's `vsid` doesn't match `info.vsid`.
146    pub fn upload(device: &wgpu::Device, info: &GridUpload) -> Self {
147        let vsid = info.vsid;
148        let vsid_usize = vsid as usize;
149        let cols_per_chunk = vsid_usize * vsid_usize;
150        let occ_words_per_chunk = cols_per_chunk * (OCC_WORDS_PER_COLUMN as usize);
151        let offsets_words_per_chunk = cols_per_chunk + 1;
152
153        let total_chunks = info.total_chunks();
154        let total_chunks_usize = total_chunks as usize;
155
156        let mut occupancy = vec![0u32; total_chunks_usize * occ_words_per_chunk];
157        let mut color_offsets = vec![0u32; total_chunks_usize * offsets_words_per_chunk];
158        let mut chunk_colors_base = vec![0u32; total_chunks_usize];
159        let mut chunk_occupancy = vec![0u32; total_chunks_usize.div_ceil(32)];
160        let mut colors: Vec<u32> = Vec::new();
161
162        let mut populated = 0u32;
163        for (chunk_idx, chunk) in &info.chunks {
164            let Some(meta_idx) = info.meta_idx_of(*chunk_idx) else {
165                continue;
166            };
167            assert_eq!(
168                chunk.vsid, vsid,
169                "GpuGridResident: chunk vsid {} disagrees with grid vsid {}",
170                chunk.vsid, vsid,
171            );
172            let meta_idx_us = meta_idx as usize;
173
174            // Per-chunk occupancy slice.
175            let occ_start = meta_idx_us * occ_words_per_chunk;
176            occupancy[occ_start..occ_start + occ_words_per_chunk].copy_from_slice(&chunk.occupancy);
177
178            // Per-chunk color_offsets slice. Rebase: the per-column
179            // offsets in `chunk.color_offsets` are local to this
180            // chunk's colours; the GPU shader adds
181            // `chunk_colors_base[meta_idx]` on the outside, so the
182            // values copy in verbatim.
183            let off_start = meta_idx_us * offsets_words_per_chunk;
184            color_offsets[off_start..off_start + offsets_words_per_chunk]
185                .copy_from_slice(&chunk.color_offsets);
186
187            // Append this chunk's colours; record the base.
188            chunk_colors_base[meta_idx_us] =
189                u32::try_from(colors.len()).expect("colours fit in u32");
190            colors.extend_from_slice(&chunk.colors);
191
192            // Mark chunk as non-empty iff it has any colour entries
193            // (every textured voxel writes one). Empty chunks (all
194            // air) skip cheaply in the outer DDA.
195            if !chunk.colors.is_empty() {
196                chunk_occupancy[meta_idx_us >> 5] |= 1u32 << (meta_idx_us & 31);
197                populated += 1;
198            }
199        }
200
201        // Storage buffers can't be empty — `colors` may be all-zero
202        // length when every chunk is air. Pad with one sentinel u32
203        // (never read because no chunk's `chunk_occupancy` bit is
204        // set in that case).
205        if colors.is_empty() {
206            colors.push(0);
207        }
208
209        let occupancy_buf = create_storage(device, "roxlap-gpu grid.occupancy", &occupancy);
210        let color_offsets_buf =
211            create_storage(device, "roxlap-gpu grid.color_offsets", &color_offsets);
212        let colors_buf = create_storage(device, "roxlap-gpu grid.colors", &colors);
213        let chunk_colors_base_buf = create_storage(
214            device,
215            "roxlap-gpu grid.chunk_colors_base",
216            &chunk_colors_base,
217        );
218        let chunk_occupancy_buf =
219            create_storage(device, "roxlap-gpu grid.chunk_occupancy", &chunk_occupancy);
220
221        let occupancy_bytes = (occupancy.len() * 4) as u64;
222        let color_offsets_bytes = (color_offsets.len() * 4) as u64;
223        let colors_bytes = (colors.len() * 4) as u64;
224        let _ = populated; // reserved for future telemetry
225
226        Self {
227            vsid,
228            origin_chunk: info.origin_chunk,
229            chunks_dims: info.chunks_dims,
230            total_chunks,
231            occupancy: occupancy_buf,
232            color_offsets: color_offsets_buf,
233            colors: colors_buf,
234            chunk_colors_base: chunk_colors_base_buf,
235            chunk_occupancy: chunk_occupancy_buf,
236            occupancy_bytes,
237            color_offsets_bytes,
238            colors_bytes,
239        }
240    }
241
242    /// Total resident bytes — sum of all five storage buffers.
243    /// Used by the demo's startup print.
244    pub fn resident_bytes(&self) -> u64 {
245        self.occupancy_bytes
246            + self.color_offsets_bytes
247            + self.colors_bytes
248            + (self.total_chunks as u64) * 4 // chunk_colors_base
249            + (u64::from(self.total_chunks).div_ceil(32)) * 4 // chunk_occupancy
250    }
251}
252
253fn create_storage(device: &wgpu::Device, label: &str, data: &[u32]) -> wgpu::Buffer {
254    device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
255        label: Some(label),
256        contents: bytemuck::cast_slice(data),
257        usage: wgpu::BufferUsages::STORAGE,
258    })
259}
260
261/// Round `n` up to the nearest power of 2. `0` and `1` both return
262/// `1`. Used to derive a GPU.7 [`GridUpload::pool_dims`] from a
263/// non-pow2 `chunks_dims`.
264#[must_use]
265pub fn ceil_pow2(n: u32) -> u32 {
266    if n <= 1 {
267        return 1;
268    }
269    1u32 << (32 - (n - 1).leading_zeros())
270}
271
272/// Compute the smallest bounding box that contains every
273/// `(chunk_idx, _)` in `chunks`. Returns `None` if `chunks` is
274/// empty.
275#[must_use]
276pub fn bounding_box_of(chunks: impl IntoIterator<Item = [i32; 3]>) -> Option<([i32; 3], [u32; 3])> {
277    let mut min = [i32::MAX; 3];
278    let mut max = [i32::MIN; 3];
279    let mut any = false;
280    for idx in chunks {
281        for i in 0..3 {
282            if idx[i] < min[i] {
283                min[i] = idx[i];
284            }
285            if idx[i] > max[i] {
286                max[i] = idx[i];
287            }
288        }
289        any = true;
290    }
291    if !any {
292        return None;
293    }
294    #[allow(clippy::cast_sign_loss)]
295    let dims = [
296        (max[0] - min[0] + 1) as u32,
297        (max[1] - min[1] + 1) as u32,
298        (max[2] - min[2] + 1) as u32,
299    ];
300    Some((min, dims))
301}
302
303/// Number of u32 words a single chunk's per-chunk occupancy slice
304/// occupies in the concatenated grid occupancy buffer. Useful for
305/// host-side memory budgeting.
306#[must_use]
307pub fn occ_words_per_chunk(vsid: u32) -> u32 {
308    vsid * vsid * OCC_WORDS_PER_COLUMN
309}
310
311/// Z-extent of every chunk — re-export of the `CHUNK_Z` constant
312/// so hosts can budget without pulling `crate::decompress` in.
313pub const GRID_CHUNK_Z: u32 = CHUNK_Z;