Skip to main content

roxlap_gpu/
sprite_model.rs

1//! GPU.10 — KV6 sprite as a DDA-marchable voxel model.
2//!
3//! Unlike the GPU.9 splatter (one thread per voxel, screen-space
4//! squares, overdraw + atomic contention), a sprite model is a small
5//! voxel volume the precise ray-DDA marches one ray per pixel —
6//! crisp, correct occlusion, no overdraw. This is the GPU.10.0 single
7//! sprite; instancing + tiling + LOD come in later sub-substages.
8//!
9//! The volume reuses the chunk occupancy/colour scheme but sized to
10//! the KV6 bbox: per-column occupancy bitmask (`occ_words_per_col`
11//! u32s, `CHUNK_Z`-style 32-bits-per-word), a flat colour array in
12//! ascending-z order per column, and a `color_offsets` prefix table.
13//! The shader finds a voxel's colour by `offset[col] + popcount(bits
14//! below z)`, so colours MUST be ascending-z (we sort per column).
15
16#![allow(
17    clippy::cast_precision_loss,
18    clippy::cast_possible_truncation,
19    clippy::cast_possible_wrap,
20    clippy::cast_sign_loss,
21    clippy::many_single_char_names,
22    clippy::similar_names
23)]
24
25use bytemuck::{Pod, Zeroable};
26use roxlap_formats::kv6::Kv6;
27use roxlap_formats::sprite::Sprite;
28
29/// CPU-built voxel volume for one KV6 model.
30#[derive(Debug, Clone)]
31pub struct SpriteModel {
32    /// Voxel extent `(mx, my, mz)`.
33    pub dims: [u32; 3],
34    /// `ceil(mz / 32)` — u32 words of occupancy per (x, y) column.
35    pub occ_words_per_col: u32,
36    /// KV6 pivot in model-local voxel space.
37    pub pivot: [f32; 3],
38    /// Per-column occupancy bitmask, `mx * my * occ_words_per_col`.
39    pub occupancy: Vec<u32>,
40    /// Voxel colours, ascending z within each column.
41    pub colors: Vec<u32>,
42    /// Prefix sums: `color_offsets[col]` is the first colour index of
43    /// column `col`; length `mx * my + 1`.
44    pub color_offsets: Vec<u32>,
45    /// World-space size of one voxel of this model (GPU.10.4 LOD): 1.0
46    /// at mip-0, doubling each [`SpriteModel::downsample`]. The shader
47    /// divides the local ray by this so a coarse voxel spans the right
48    /// world extent and the march `t` stays in world units.
49    pub voxel_world_size: f32,
50}
51
52/// Build the DDA volume from a KV6. Columns are packed in
53/// `x + y*mx` order; each column's voxels are sorted ascending by z
54/// so the shader's popcount-rank colour lookup is correct.
55///
56/// # Panics
57/// If the KV6's `ylen` counters disagree with `voxels.len()` (a
58/// malformed model).
59#[must_use]
60pub fn build_sprite_model(kv6: &Kv6) -> SpriteModel {
61    let (mx, my, mz) = (kv6.xsiz, kv6.ysiz, kv6.zsiz);
62    let occ_words_per_col = mz.div_ceil(32).max(1);
63    let cols = (mx * my) as usize;
64
65    let mut occupancy = vec![0u32; cols * occ_words_per_col as usize];
66    let mut color_offsets = vec![0u32; cols + 1];
67    let mut colors: Vec<u32> = Vec::with_capacity(kv6.voxels.len());
68
69    // Pass 1 — consume voxels in KV6 storage order (x-outer / y-inner)
70    // into per-column buckets keyed by `col = x + y*mx`.
71    let mut buckets: Vec<Vec<(u16, u32)>> = vec![Vec::new(); cols];
72    let mut voxel_iter = kv6.voxels.iter();
73    for x in 0..mx {
74        for y in 0..my {
75            let col = (x + y * mx) as usize;
76            let count = kv6.ylen[x as usize][y as usize];
77            for _ in 0..count {
78                let v = voxel_iter.next().expect("KV6 ylen / voxels.len mismatch");
79                buckets[col].push((v.z, v.col));
80            }
81        }
82    }
83
84    // Pass 2 — emit in COLUMN-INDEX order so `color_offsets` is a true
85    // monotonic prefix sum (the shader indexes by `col` either way, but
86    // structural edits / mip rebuilds rely on monotonic offsets). Each
87    // column's voxels sorted ascending z for the popcount-rank lookup.
88    for (col, bucket) in buckets.iter_mut().enumerate() {
89        color_offsets[col] = colors.len() as u32;
90        bucket.sort_by_key(|(z, _)| *z);
91        for &(z, col_rgba) in bucket.iter() {
92            let z = u32::from(z);
93            let base = col * occ_words_per_col as usize + (z >> 5) as usize;
94            occupancy[base] |= 1u32 << (z & 31);
95            colors.push(col_rgba);
96        }
97    }
98    color_offsets[cols] = colors.len() as u32;
99
100    SpriteModel {
101        dims: [mx, my, mz],
102        occ_words_per_col,
103        pivot: [kv6.xpiv, kv6.ypiv, kv6.zpiv],
104        occupancy,
105        color_offsets,
106        colors,
107        voxel_world_size: 1.0,
108    }
109}
110
111/// Per-instance transform consumed by the model-DDA shader: the
112/// inverse model→world rotation (so a world ray can be brought into
113/// model-local space) plus the instance's world position. Stored as
114/// three padded columns for std140/std430 (`mat3x3` 16-byte columns).
115#[repr(C)]
116#[derive(Clone, Copy, Pod, Zeroable, Debug)]
117pub struct SpriteInstanceTransform {
118    /// Inverse of `[s | h | f]`, column-major, each column padded to
119    /// `vec4`. `inv_rot * v = c0*v.x + c1*v.y + c2*v.z`.
120    pub inv_rot: [[f32; 4]; 3],
121    /// Instance world position (the KV6 pivot maps here).
122    pub pos: [f32; 3],
123    _pad: f32,
124}
125
126impl SpriteInstanceTransform {
127    /// Build from a sprite pose. `s/h/f` are the model→world basis
128    /// columns; we invert them so the shader can map world→local.
129    #[must_use]
130    pub fn from_sprite(sprite: &Sprite) -> Self {
131        let inv = mat3_inverse([sprite.s, sprite.h, sprite.f]);
132        Self {
133            inv_rot: [
134                [inv[0][0], inv[0][1], inv[0][2], 0.0],
135                [inv[1][0], inv[1][1], inv[1][2], 0.0],
136                [inv[2][0], inv[2][1], inv[2][2], 0.0],
137            ],
138            pos: sprite.p,
139            _pad: 0.0,
140        }
141    }
142}
143
144/// A registry of sprite models. Instances reference a model by
145/// `model_id`, which is a **LOD chain** id: each chain holds one or
146/// more concrete mip levels (finest first; GPU.10.4), and the renderer
147/// picks the level per instance by distance. Identical KV6s are added
148/// once and shared by many instances. **Copy-on-modify**:
149/// [`Self::fork`] deep-copies a chain so edits to the fork leave the
150/// parent (and its instances) intact.
151#[derive(Debug, Clone, Default)]
152pub struct SpriteModelRegistry {
153    /// Concrete mip-level volumes (the GPU buffers concatenate these).
154    entries: Vec<SpriteModel>,
155    /// `chains[model_id]` = entry ids, finest (mip-0) first.
156    chains: Vec<Vec<u32>>,
157}
158
159impl SpriteModelRegistry {
160    #[must_use]
161    pub fn new() -> Self {
162        Self::default()
163    }
164
165    fn push_entry(&mut self, model: SpriteModel) -> u32 {
166        let id = self.entries.len() as u32;
167        self.entries.push(model);
168        id
169    }
170
171    /// Register a single-level (no-LOD) model; returns its `model_id`.
172    pub fn add(&mut self, model: SpriteModel) -> u32 {
173        let e = self.push_entry(model);
174        let id = self.chains.len() as u32;
175        self.chains.push(vec![e]);
176        id
177    }
178
179    /// Register a model with up to `max_levels` LOD mips (each a 2×
180    /// [`SpriteModel::downsample`] of the previous; stops early once a
181    /// level collapses to 1³). Returns its `model_id`.
182    pub fn add_lod(&mut self, model: SpriteModel, max_levels: u32) -> u32 {
183        let mut levels = vec![self.push_entry(model.clone())];
184        let mut cur = model;
185        for _ in 1..max_levels.max(1) {
186            if cur.dims == [1, 1, 1] {
187                break;
188            }
189            cur = cur.downsample();
190            levels.push(self.push_entry(cur.clone()));
191        }
192        let id = self.chains.len() as u32;
193        self.chains.push(levels);
194        id
195    }
196
197    /// Copy-on-modify: deep-copy every level of chain `parent` into new
198    /// entries + a new chain, and return its `model_id`. The fork owns
199    /// independent voxel data, so mutating it does not affect the
200    /// parent or any instance still pointing at it.
201    ///
202    /// # Panics
203    /// If `parent` is not a registered `model_id`.
204    pub fn fork(&mut self, parent: u32) -> u32 {
205        let src = self.chains[parent as usize].clone();
206        let levels: Vec<u32> = src
207            .iter()
208            .map(|&e| {
209                let copy = self.entries[e as usize].clone();
210                self.push_entry(copy)
211            })
212            .collect();
213        let id = self.chains.len() as u32;
214        self.chains.push(levels);
215        id
216    }
217
218    /// The finest (mip-0) model of chain `id`.
219    #[must_use]
220    pub fn model(&self, id: u32) -> &SpriteModel {
221        &self.entries[self.chains[id as usize][0] as usize]
222    }
223
224    /// Mutable access to the finest (mip-0) model for editing — the
225    /// copy-on-modify entry point (typically on a [`Self::fork`]).
226    /// After a *structural* edit (occupancy/dims), call
227    /// [`Self::rebuild_lod`] so the coarser mips match; a pure recolour
228    /// can use [`Self::recolor_chain`] instead.
229    pub fn model_mut(&mut self, id: u32) -> &mut SpriteModel {
230        let e = self.chains[id as usize][0] as usize;
231        &mut self.entries[e]
232    }
233
234    /// Recolour every LOD level of chain `id` (so a forked tint shows
235    /// at all distances).
236    pub fn recolor_chain(&mut self, id: u32, f: impl Fn(u32) -> u32 + Copy) {
237        for li in 0..self.chains[id as usize].len() {
238            let e = self.chains[id as usize][li] as usize;
239            self.entries[e].recolor(f);
240        }
241    }
242
243    /// Regenerate chain `id`'s coarser mip levels from its (possibly
244    /// just-edited) mip-0. Run after a structural edit via
245    /// [`Self::model_mut`] so the LOD ladder stays consistent. No-op
246    /// for a single-level (no-LOD) chain.
247    pub fn rebuild_lod(&mut self, id: u32) {
248        let levels = self.chains[id as usize].clone();
249        if levels.len() <= 1 {
250            return;
251        }
252        let mut cur = self.entries[levels[0] as usize].clone();
253        for &e in &levels[1..] {
254            cur = cur.downsample();
255            self.entries[e as usize] = cur.clone();
256        }
257    }
258
259    /// Number of LOD chains (distinct `model_id`s).
260    #[must_use]
261    pub fn len(&self) -> usize {
262        self.chains.len()
263    }
264
265    #[must_use]
266    pub fn is_empty(&self) -> bool {
267        self.chains.is_empty()
268    }
269}
270
271impl SpriteModel {
272    /// Recolour every voxel via `f(old_rgba) -> new_rgba`. Structure
273    /// (occupancy / offsets) is untouched, so this is a cheap in-place
274    /// edit — handy on a [`SpriteModelRegistry::fork`] to make a tinted
275    /// variant. For structural edits, mutate the public occupancy /
276    /// colours / dims directly (via `model_mut`) then rebuild the LOD.
277    pub fn recolor(&mut self, f: impl Fn(u32) -> u32) {
278        for c in &mut self.colors {
279            *c = f(*c);
280        }
281    }
282
283    /// GPU.12 — structural edit of a single voxel within the model's
284    /// existing bounds. `Some(rgba)` sets/replaces the voxel at
285    /// `(x, y, z)`; `None` clears it. Maintains the ascending-z colour
286    /// invariant by inserting/removing at the voxel's popcount rank and
287    /// shifting the affected columns' `color_offsets`. Returns `true`
288    /// if the model changed. Out-of-bounds coordinates are ignored
289    /// (returns `false`) — growing `dims` is a separate concern.
290    ///
291    /// After editing, call [`SpriteModelRegistry::rebuild_lod`] to
292    /// refresh coarser mips, then re-upload via `set_sprite_instances`.
293    pub fn set_voxel(&mut self, x: u32, y: u32, z: u32, color: Option<u32>) -> bool {
294        if x >= self.dims[0] || y >= self.dims[1] || z >= self.dims[2] {
295            return false;
296        }
297        let owpc = self.occ_words_per_col as usize;
298        let cols = (self.dims[0] * self.dims[1]) as usize;
299        let col = (x + y * self.dims[0]) as usize;
300        let base = col * owpc;
301        let zw = (z >> 5) as usize;
302        let zb = z & 31;
303
304        // Rank = solid voxels strictly below z in this column.
305        let mut rank = 0usize;
306        for w in 0..zw {
307            rank += self.occupancy[base + w].count_ones() as usize;
308        }
309        let below_mask = if zb > 0 { (1u32 << zb) - 1 } else { 0 };
310        rank += (self.occupancy[base + zw] & below_mask).count_ones() as usize;
311        let idx = self.color_offsets[col] as usize + rank;
312        let was_set = (self.occupancy[base + zw] >> zb) & 1 == 1;
313
314        if let Some(rgba) = color {
315            if was_set {
316                self.colors[idx] = rgba; // replace in place
317            } else {
318                self.occupancy[base + zw] |= 1u32 << zb;
319                self.colors.insert(idx, rgba);
320                for c in &mut self.color_offsets[col + 1..=cols] {
321                    *c += 1;
322                }
323            }
324            true
325        } else {
326            if !was_set {
327                return false;
328            }
329            self.occupancy[base + zw] &= !(1u32 << zb);
330            self.colors.remove(idx);
331            for c in &mut self.color_offsets[col + 1..=cols] {
332                *c -= 1;
333            }
334            true
335        }
336    }
337
338    /// Radius of a bounding sphere centred at the instance position
339    /// (the pivot maps there): the farthest bbox corner from the
340    /// pivot. Used for frustum culling. Assumes a unit basis; scaled
341    /// instances would multiply this by their max basis length.
342    #[must_use]
343    pub fn bound_radius(&self) -> f32 {
344        let mut r2 = 0.0_f32;
345        for &cx in &[0.0, self.dims[0] as f32] {
346            for &cy in &[0.0, self.dims[1] as f32] {
347                for &cz in &[0.0, self.dims[2] as f32] {
348                    let d = [cx - self.pivot[0], cy - self.pivot[1], cz - self.pivot[2]];
349                    r2 = r2.max(d[0] * d[0] + d[1] * d[1] + d[2] * d[2]);
350                }
351            }
352        }
353        r2.sqrt()
354    }
355
356    /// GPU.10.4 — 2× voxel downsample for the next LOD level. A coarse
357    /// voxel is solid if any of its 2×2×2 fine voxels is, coloured by
358    /// their per-channel average. Dims/pivot halve and
359    /// `voxel_world_size` doubles, so the coarse model occupies the
360    /// same world box at half the resolution (origin-corner aligned).
361    #[must_use]
362    #[allow(clippy::manual_checked_ops)] // `n > 0` guards 4 divisions, not one checked_div
363    pub fn downsample(&self) -> SpriteModel {
364        let [fx, fy, fz] = self.dims;
365        let fidx = |x: u32, y: u32, z: u32| (x + y * fx + z * fx * fy) as usize;
366
367        // Reconstruct dense fine voxels (solid flag + colour).
368        let mut solid = vec![false; (fx * fy * fz) as usize];
369        let mut fine = vec![0u32; (fx * fy * fz) as usize];
370        for x in 0..fx {
371            for y in 0..fy {
372                let col = (x + y * fx) as usize;
373                let base = col * self.occ_words_per_col as usize;
374                let off = self.color_offsets[col] as usize;
375                let mut seen = 0usize;
376                for z in 0..fz {
377                    let w = base + (z >> 5) as usize;
378                    if (self.occupancy[w] >> (z & 31)) & 1 == 1 {
379                        fine[fidx(x, y, z)] = self.colors[off + seen];
380                        solid[fidx(x, y, z)] = true;
381                        seen += 1;
382                    }
383                }
384            }
385        }
386
387        let nx = fx.div_ceil(2).max(1);
388        let ny = fy.div_ceil(2).max(1);
389        let nz = fz.div_ceil(2).max(1);
390        let owpc = nz.div_ceil(32).max(1);
391        let cols = (nx * ny) as usize;
392        let mut occupancy = vec![0u32; cols * owpc as usize];
393        let mut color_offsets = vec![0u32; cols + 1];
394        let mut colors: Vec<u32> = Vec::new();
395
396        // Emit in column-index order (`ccol = cx + cy*nx`), cy outer,
397        // so `color_offsets` is a monotonic prefix sum like build's.
398        for cy in 0..ny {
399            for cx in 0..nx {
400                let ccol = (cx + cy * nx) as usize;
401                color_offsets[ccol] = colors.len() as u32;
402                for cz in 0..nz {
403                    let (mut a, mut r, mut g, mut b, mut n) = (0u32, 0u32, 0u32, 0u32, 0u32);
404                    for dz in 0..2 {
405                        for dy in 0..2 {
406                            for dx in 0..2 {
407                                let (x, y, z) = (2 * cx + dx, 2 * cy + dy, 2 * cz + dz);
408                                if x < fx && y < fy && z < fz && solid[fidx(x, y, z)] {
409                                    let c = fine[fidx(x, y, z)];
410                                    a += (c >> 24) & 0xff;
411                                    r += (c >> 16) & 0xff;
412                                    g += (c >> 8) & 0xff;
413                                    b += c & 0xff;
414                                    n += 1;
415                                }
416                            }
417                        }
418                    }
419                    if n > 0 {
420                        let avg = ((a / n) << 24) | ((r / n) << 16) | ((g / n) << 8) | (b / n);
421                        let base = ccol * owpc as usize + (cz >> 5) as usize;
422                        occupancy[base] |= 1u32 << (cz & 31);
423                        colors.push(avg);
424                    }
425                }
426            }
427        }
428        color_offsets[cols] = colors.len() as u32;
429
430        SpriteModel {
431            dims: [nx, ny, nz],
432            occ_words_per_col: owpc,
433            pivot: [
434                self.pivot[0] * 0.5,
435                self.pivot[1] * 0.5,
436                self.pivot[2] * 0.5,
437            ],
438            occupancy,
439            colors,
440            color_offsets,
441            voxel_world_size: self.voxel_world_size * 2.0,
442        }
443    }
444}
445
446/// View frustum for CPU instance culling, in world space. Built each
447/// frame from the world camera. `half_w`/`half_h` are the tangents of
448/// the half-FOV (so the side planes are `|x| <= half_w * z` etc. in
449/// camera space).
450#[derive(Clone, Copy, Debug)]
451pub struct ViewFrustum {
452    pub pos: [f32; 3],
453    pub right: [f32; 3],
454    pub down: [f32; 3],
455    pub forward: [f32; 3],
456    pub half_w: f32,
457    pub half_h: f32,
458    pub far: f32,
459}
460
461/// CPU cull record: the GPU instance + its world bounding sphere.
462#[derive(Clone, Copy)]
463struct CullInstance {
464    /// Instance transform + a placeholder `model_id`; the cull
465    /// overwrites `model_id` with the distance-chosen LOD entry.
466    gpu: SpriteInstanceGpu,
467    /// LOD chain this instance draws (the user-facing `model_id`).
468    chain_id: u32,
469    center: [f32; 3],
470    radius: f32,
471}
472
473fn dot3(a: [f32; 3], b: [f32; 3]) -> f32 {
474    a[0] * b[0] + a[1] * b[1] + a[2] * b[2]
475}
476
477/// One sprite instance: a model reference + world pose.
478#[derive(Debug, Clone, Copy)]
479pub struct SpriteInstance {
480    pub model_id: u32,
481    pub transform: SpriteInstanceTransform,
482}
483
484/// GPU per-model metadata: where this model's data starts in the
485/// shared registry buffers + its dims/pivot. Mirrors `ModelMeta` in
486/// the shader (std430, 48 bytes).
487#[repr(C)]
488#[derive(Clone, Copy, Pod, Zeroable, Debug)]
489struct SpriteModelMeta {
490    occupancy_offset: u32,
491    colors_offset: u32,
492    color_offsets_offset: u32,
493    occ_words_per_col: u32,
494    dims: [u32; 3],
495    _pad0: u32,
496    pivot: [f32; 3],
497    /// GPU.10.4 — world size of one voxel of this (mip) entry.
498    voxel_world_size: f32,
499}
500
501/// GPU per-instance record. Mirrors `Instance` in the shader (std430,
502/// 64 bytes): inverse rotation columns + position + model id.
503#[repr(C)]
504#[derive(Clone, Copy, Pod, Zeroable, Debug)]
505struct SpriteInstanceGpu {
506    inv_rot0: [f32; 4],
507    inv_rot1: [f32; 4],
508    inv_rot2: [f32; 4],
509    pos: [f32; 3],
510    model_id: u32,
511}
512
513/// Invert a 3×3 matrix given as basis columns `[c0, c1, c2]`,
514/// returning the inverse as columns. For an orthonormal basis this is
515/// the transpose; the general path covers rotation + non-unit scale.
516#[must_use]
517fn mat3_inverse(cols: [[f32; 3]; 3]) -> [[f32; 3]; 3] {
518    let [a, b, c] = cols; // columns
519                          // Determinant via scalar triple product a · (b × c).
520    let cross = |u: [f32; 3], v: [f32; 3]| {
521        [
522            u[1] * v[2] - u[2] * v[1],
523            u[2] * v[0] - u[0] * v[2],
524            u[0] * v[1] - u[1] * v[0],
525        ]
526    };
527    let bc = cross(b, c);
528    let ca = cross(c, a);
529    let ab = cross(a, b);
530    let det = a[0] * bc[0] + a[1] * bc[1] + a[2] * bc[2];
531    let inv_det = if det.abs() < 1e-12 { 0.0 } else { 1.0 / det };
532    // Inverse rows are (b×c, c×a, a×b)/det; return as columns of the
533    // inverse, i.e. transpose of those rows.
534    [
535        [bc[0] * inv_det, ca[0] * inv_det, ab[0] * inv_det],
536        [bc[1] * inv_det, ca[1] * inv_det, ab[1] * inv_det],
537        [bc[2] * inv_det, ca[2] * inv_det, ab[2] * inv_det],
538    ]
539}
540
541/// GPU-resident registry + instances: every model's occupancy /
542/// colours / offsets concatenated into shared storage buffers, a
543/// per-model metadata table, and a capacity-sized instance buffer
544/// rewritten each frame with the frustum-visible subset (GPU.10.2).
545/// One bind group serves all models (same approach as the multi-grid
546/// scene).
547pub struct SpriteRegistryResident {
548    pub occupancy: wgpu::Buffer,
549    pub colors: wgpu::Buffer,
550    pub color_offsets: wgpu::Buffer,
551    pub model_meta: wgpu::Buffer,
552    /// Holds up to `instance_capacity` instances; the visible subset
553    /// is packed into `[0, count)` each frame by [`Self::cull_bin_upload`].
554    pub instances: wgpu::Buffer,
555    pub instance_capacity: u32,
556    /// GPU.10.3 — per-tile `(offset, count)` into `tile_instances`,
557    /// flat `2 * tiles_x * tiles_y` u32s. Grown to fit the screen.
558    pub tile_ranges: wgpu::Buffer,
559    tile_ranges_cap: u32,
560    /// GPU.10.3 — flat list of visible-instance indices grouped by
561    /// tile. Grown to fit the per-frame total.
562    pub tile_instances: wgpu::Buffer,
563    tile_instances_cap: u32,
564    /// CPU cull records (full set), with precomputed bounding spheres.
565    cull: Vec<CullInstance>,
566    /// GPU.10.4 — LOD chains: `chains[chain_id]` = entry ids, finest
567    /// first. The cull picks a level by distance and writes its entry
568    /// id into the packed instance's `model_id`.
569    chains: Vec<Vec<u32>>,
570}
571
572impl SpriteRegistryResident {
573    /// Concatenate `registry`'s models into shared buffers and prepare
574    /// `instances` for per-frame culling. Model-relative indices stay
575    /// as built; the shader adds each model's base offset from the
576    /// metadata table.
577    #[must_use]
578    pub fn upload(
579        device: &wgpu::Device,
580        registry: &SpriteModelRegistry,
581        instances: &[SpriteInstance],
582    ) -> Self {
583        let mut all_occ: Vec<u32> = Vec::new();
584        let mut all_colors: Vec<u32> = Vec::new();
585        let mut all_offsets: Vec<u32> = Vec::new();
586        let mut meta: Vec<SpriteModelMeta> = Vec::with_capacity(registry.entries.len());
587
588        // One meta + concatenated data per concrete (mip-level) entry.
589        for m in &registry.entries {
590            meta.push(SpriteModelMeta {
591                occupancy_offset: all_occ.len() as u32,
592                colors_offset: all_colors.len() as u32,
593                color_offsets_offset: all_offsets.len() as u32,
594                occ_words_per_col: m.occ_words_per_col,
595                dims: m.dims,
596                _pad0: 0,
597                pivot: m.pivot,
598                voxel_world_size: m.voxel_world_size,
599            });
600            all_occ.extend_from_slice(&m.occupancy);
601            all_colors.extend_from_slice(&m.colors);
602            all_offsets.extend_from_slice(&m.color_offsets);
603        }
604
605        // Per-instance cull records: sphere centred at the instance
606        // position, radius from the chain's finest (mip-0) model.
607        let cull: Vec<CullInstance> = instances
608            .iter()
609            .map(|i| CullInstance {
610                gpu: SpriteInstanceGpu {
611                    inv_rot0: i.transform.inv_rot[0],
612                    inv_rot1: i.transform.inv_rot[1],
613                    inv_rot2: i.transform.inv_rot[2],
614                    pos: i.transform.pos,
615                    model_id: i.model_id, // placeholder; cull rewrites
616                },
617                chain_id: i.model_id,
618                center: i.transform.pos,
619                radius: registry.model(i.model_id).bound_radius(),
620            })
621            .collect();
622
623        // Capacity buffer (COPY_DST so cull can rewrite it each frame),
624        // seeded with the full set so frame 0 is valid pre-cull.
625        let seed: Vec<SpriteInstanceGpu> = cull.iter().map(|c| c.gpu).collect();
626        let instances_buf = {
627            use wgpu::util::DeviceExt;
628            let one = [SpriteInstanceGpu::zeroed()];
629            let src: &[SpriteInstanceGpu] = if seed.is_empty() { &one } else { &seed };
630            device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
631                label: Some("roxlap-gpu sprite_reg.instances"),
632                contents: bytemuck::cast_slice(src),
633                usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
634            })
635        };
636
637        let tile_ranges = storage_dst_u32(device, "roxlap-gpu sprite_reg.tile_ranges", 1);
638        let tile_instances = storage_dst_u32(device, "roxlap-gpu sprite_reg.tile_instances", 1);
639        Self {
640            occupancy: storage_u32(device, "roxlap-gpu sprite_reg.occupancy", &all_occ),
641            colors: storage_u32(device, "roxlap-gpu sprite_reg.colors", &all_colors),
642            color_offsets: storage_u32(device, "roxlap-gpu sprite_reg.color_offsets", &all_offsets),
643            model_meta: storage_pod(device, "roxlap-gpu sprite_reg.model_meta", &meta),
644            instances: instances_buf,
645            instance_capacity: cull.len() as u32,
646            tile_ranges,
647            tile_ranges_cap: 1,
648            tile_instances,
649            tile_instances_cap: 1,
650            cull,
651            chains: registry.chains.clone(),
652        }
653    }
654
655    /// GPU.10.3 — frustum-cull, pack the visible subset into the
656    /// instance buffer, then bin those instances into screen tiles:
657    /// project each visible bounding sphere to a screen AABB and append
658    /// its (visible) index to every overlapped tile. Uploads the
659    /// instance buffer + `tile_ranges` (per-tile offset/count) +
660    /// `tile_instances` (flat grouped indices), growing the tile
661    /// buffers as needed. Returns `(visible_count, tiles_x, tiles_y)`.
662    #[allow(clippy::too_many_arguments)]
663    pub fn cull_bin_upload(
664        &mut self,
665        device: &wgpu::Device,
666        queue: &wgpu::Queue,
667        f: &ViewFrustum,
668        screen_w: u32,
669        screen_h: u32,
670        tile_size: u32,
671        lod_px: f32,
672    ) -> (u32, u32, u32) {
673        let tiles_x = screen_w.div_ceil(tile_size).max(1);
674        let tiles_y = screen_h.div_ceil(tile_size).max(1);
675        let n_tiles = (tiles_x * tiles_y) as usize;
676
677        let nw = (1.0 + f.half_w * f.half_w).sqrt();
678        let nh = (1.0 + f.half_h * f.half_h).sqrt();
679        let cx = screen_w as f32 * 0.5;
680        let cy = screen_h as f32 * 0.5;
681        let px_per_world = cx / f.half_w; // isotropic: == cy/half_h
682        let ts = tile_size as f32;
683        let tx_max = tiles_x as i32 - 1;
684        let ty_max = tiles_y as i32 - 1;
685
686        let mut visible: Vec<SpriteInstanceGpu> = Vec::with_capacity(self.cull.len());
687        // Per-visible tile AABB (tx0, tx1, ty0, ty1) for the bin pass.
688        let mut boxes: Vec<[i32; 4]> = Vec::with_capacity(self.cull.len());
689        let mut counts = vec![0u32; n_tiles];
690
691        for ci in &self.cull {
692            let rel = [
693                ci.center[0] - f.pos[0],
694                ci.center[1] - f.pos[1],
695                ci.center[2] - f.pos[2],
696            ];
697            let z = dot3(rel, f.forward);
698            let r = ci.radius;
699            if z + r < 0.0 || z - r > f.far {
700                continue; // behind / beyond far
701            }
702            let x = dot3(rel, f.right);
703            if (x - f.half_w * z) > r * nw || (-x - f.half_w * z) > r * nw {
704                continue; // right / left
705            }
706            let y = dot3(rel, f.down);
707            if (y - f.half_h * z) > r * nh || (-y - f.half_h * z) > r * nh {
708                continue; // bottom / top
709            }
710
711            // Visible: project the sphere to a screen AABB → tile range.
712            let (tx0, tx1, ty0, ty1) = if z > 1e-3 {
713                let sx = cx + (x / z) * px_per_world;
714                let sy = cy + (y / z) * px_per_world;
715                let sr = (r / z) * px_per_world;
716                (
717                    (((sx - sr) / ts).floor() as i32).clamp(0, tx_max),
718                    (((sx + sr) / ts).floor() as i32).clamp(0, tx_max),
719                    (((sy - sr) / ts).floor() as i32).clamp(0, ty_max),
720                    (((sy + sr) / ts).floor() as i32).clamp(0, ty_max),
721                )
722            } else {
723                // Sphere crosses the camera plane — cover all tiles.
724                (0, tx_max, 0, ty_max)
725            };
726            // GPU.10.4 — pick the LOD level by projected voxel size:
727            // choose the coarsest level whose voxel still covers at
728            // least `lod_px` screen pixels, i.e. step up once a mip-0
729            // voxel would be smaller than that. `lod_px = 1` is the
730            // natural "don't go sub-pixel" threshold; larger values
731            // force LOD in closer (tuning/inspection).
732            let chain = &self.chains[ci.chain_id as usize];
733            let level = if z > 1e-3 && chain.len() > 1 {
734                let voxel_px = px_per_world / z; // mip-0 voxel screen size
735                ((lod_px / voxel_px).log2().ceil().max(0.0) as usize).min(chain.len() - 1)
736            } else {
737                0
738            };
739            let mut g = ci.gpu;
740            g.model_id = chain[level];
741            visible.push(g);
742            boxes.push([tx0, tx1, ty0, ty1]);
743            for ty in ty0..=ty1 {
744                for tx in tx0..=tx1 {
745                    counts[(ty * tiles_x as i32 + tx) as usize] += 1;
746                }
747            }
748        }
749
750        if visible.is_empty() {
751            return (0, tiles_x, tiles_y);
752        }
753
754        // Prefix-sum counts → per-tile offsets; build the flat grouped
755        // index list.
756        let mut tile_ranges = vec![0u32; n_tiles * 2];
757        let mut running = 0u32;
758        for t in 0..n_tiles {
759            tile_ranges[2 * t] = running; // offset
760            tile_ranges[2 * t + 1] = counts[t]; // count
761            running += counts[t];
762        }
763        let total = running as usize;
764        let mut tile_instances = vec![0u32; total.max(1)];
765        let mut cursor: Vec<u32> = (0..n_tiles).map(|t| tile_ranges[2 * t]).collect();
766        for (vis_idx, b) in boxes.iter().enumerate() {
767            for ty in b[2]..=b[3] {
768                for tx in b[0]..=b[1] {
769                    let t = (ty * tiles_x as i32 + tx) as usize;
770                    tile_instances[cursor[t] as usize] = vis_idx as u32;
771                    cursor[t] += 1;
772                }
773            }
774        }
775
776        // Upload: instances + (grown) tile buffers. Grow a tile buffer
777        // only when this frame needs more than its capacity (wgpu has
778        // no Clone on Buffer, so we replace the field in place).
779        queue.write_buffer(&self.instances, 0, bytemuck::cast_slice(&visible));
780        let need_ranges = tile_ranges.len() as u32;
781        if need_ranges > self.tile_ranges_cap {
782            self.tile_ranges_cap = need_ranges.next_power_of_two();
783            self.tile_ranges = storage_dst_u32(
784                device,
785                "roxlap-gpu sprite_reg.tile_ranges",
786                self.tile_ranges_cap,
787            );
788        }
789        let need_inst = tile_instances.len() as u32;
790        if need_inst > self.tile_instances_cap {
791            self.tile_instances_cap = need_inst.next_power_of_two();
792            self.tile_instances = storage_dst_u32(
793                device,
794                "roxlap-gpu sprite_reg.tile_instances",
795                self.tile_instances_cap,
796            );
797        }
798        queue.write_buffer(&self.tile_ranges, 0, bytemuck::cast_slice(&tile_ranges));
799        queue.write_buffer(
800            &self.tile_instances,
801            0,
802            bytemuck::cast_slice(&tile_instances),
803        );
804
805        (visible.len() as u32, tiles_x, tiles_y)
806    }
807}
808
809/// Create a STORAGE buffer of u32s; pads empty input (wgpu rejects
810/// zero-sized storage bindings).
811fn storage_u32(device: &wgpu::Device, label: &str, data: &[u32]) -> wgpu::Buffer {
812    use wgpu::util::DeviceExt;
813    let bytes: &[u8] = if data.is_empty() {
814        bytemuck::cast_slice(&[0u32])
815    } else {
816        bytemuck::cast_slice(data)
817    };
818    device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
819        label: Some(label),
820        contents: bytes,
821        usage: wgpu::BufferUsages::STORAGE,
822    })
823}
824
825/// Create an uninitialised `STORAGE | COPY_DST` `u32` buffer of `cap`
826/// words (≥1). Written each frame via `queue.write_buffer`.
827fn storage_dst_u32(device: &wgpu::Device, label: &str, cap: u32) -> wgpu::Buffer {
828    device.create_buffer(&wgpu::BufferDescriptor {
829        label: Some(label),
830        size: u64::from(cap.max(1)) * 4,
831        usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
832        mapped_at_creation: false,
833    })
834}
835
836/// Create a STORAGE buffer of Pod records; pads empty input with one
837/// zeroed `T`.
838fn storage_pod<T: Pod + Zeroable>(device: &wgpu::Device, label: &str, data: &[T]) -> wgpu::Buffer {
839    use wgpu::util::DeviceExt;
840    let one = [T::zeroed()];
841    let src: &[T] = if data.is_empty() { &one } else { data };
842    device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
843        label: Some(label),
844        contents: bytemuck::cast_slice(src),
845        usage: wgpu::BufferUsages::STORAGE,
846    })
847}
848
849#[cfg(test)]
850mod tests {
851    use super::*;
852    use roxlap_formats::kv6::{Kv6, Voxel};
853
854    /// 2×1 kv6: column (0,0) has voxels at z=5 (red) and z=1 (green)
855    /// stored OUT of z-order; column (1,0) has one voxel at z=3.
856    fn kv6_unsorted() -> Kv6 {
857        let mk = |z, col| Voxel {
858            col,
859            z,
860            vis: 0,
861            dir: 0,
862        };
863        Kv6 {
864            xsiz: 2,
865            ysiz: 1,
866            zsiz: 8,
867            xpiv: 0.0,
868            ypiv: 0.0,
869            zpiv: 0.0,
870            voxels: vec![mk(5, 0xAA), mk(1, 0xBB), mk(3, 0xCC)],
871            xlen: vec![2, 1],
872            ylen: vec![vec![2], vec![1]],
873            palette: None,
874        }
875    }
876
877    #[test]
878    fn occupancy_bits_set_at_voxel_z() {
879        let m = build_sprite_model(&kv6_unsorted());
880        assert_eq!(m.dims, [2, 1, 8]);
881        assert_eq!(m.occ_words_per_col, 1); // ceil(8/32)
882                                            // col 0: bits 1 and 5; col 1: bit 3.
883        assert_eq!(m.occupancy[0], (1 << 1) | (1 << 5));
884        assert_eq!(m.occupancy[1], 1 << 3);
885    }
886
887    #[test]
888    fn colors_are_ascending_z_for_rank_lookup() {
889        let m = build_sprite_model(&kv6_unsorted());
890        // col 0 sorted ascending z ⇒ z=1 (green 0xBB) before z=5 (0xAA).
891        assert_eq!(m.color_offsets, vec![0, 2, 3]);
892        assert_eq!(&m.colors, &[0xBB, 0xAA, 0xCC]);
893    }
894
895    #[test]
896    fn identity_basis_inverts_to_identity() {
897        let inv = mat3_inverse([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]);
898        assert_eq!(inv, [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]);
899    }
900
901    #[test]
902    fn fork_is_independent_of_parent() {
903        let mut reg = SpriteModelRegistry::new();
904        let base = reg.add(build_sprite_model(&kv6_unsorted()));
905        let forked = reg.fork(base);
906        assert_ne!(base, forked);
907        // Recolour only the fork.
908        reg.model_mut(forked).recolor(|_| 0x11);
909        // Parent colours untouched; fork fully overwritten.
910        assert_eq!(&reg.model(base).colors, &[0xBB, 0xAA, 0xCC]);
911        assert_eq!(&reg.model(forked).colors, &[0x11, 0x11, 0x11]);
912    }
913
914    #[test]
915    fn registry_gpu_structs_have_expected_sizes() {
916        assert_eq!(std::mem::size_of::<SpriteModelMeta>(), 48);
917        assert_eq!(std::mem::size_of::<SpriteInstanceGpu>(), 64);
918    }
919
920    #[test]
921    fn add_lod_builds_halving_mip_chain() {
922        let mut reg = SpriteModelRegistry::new();
923        // 8×8×8 single voxel-filled column model would be ideal, but
924        // kv6_unsorted is 2×1×8 → mips: 2×1×8 → 1×1×4 → 1×1×2 → 1×1×1.
925        let id = reg.add_lod(build_sprite_model(&kv6_unsorted()), 4);
926        let m0 = reg.model(id);
927        assert_eq!(m0.dims, [2, 1, 8]);
928        assert!((m0.voxel_world_size - 1.0).abs() < 1e-6);
929    }
930
931    /// kv6 from explicit voxels, ordered x-major/y-inner to match
932    /// `build_sprite_model`'s column walk.
933    fn kv6_from(xsiz: u32, ysiz: u32, zsiz: u32, voxels: &[(u32, u32, u16, u32)]) -> Kv6 {
934        let mut ylen = vec![vec![0u16; ysiz as usize]; xsiz as usize];
935        let mut flat = Vec::new();
936        for x in 0..xsiz {
937            for y in 0..ysiz {
938                let mut col: Vec<(u16, u32)> = voxels
939                    .iter()
940                    .filter(|(vx, vy, _, _)| *vx == x && *vy == y)
941                    .map(|(_, _, z, c)| (*z, *c))
942                    .collect();
943                col.sort_by_key(|(z, _)| *z);
944                ylen[x as usize][y as usize] = col.len() as u16;
945                for (z, c) in col {
946                    flat.push(Voxel {
947                        col: c,
948                        z,
949                        vis: 0,
950                        dir: 0,
951                    });
952                }
953            }
954        }
955        let xlen = ylen
956            .iter()
957            .map(|c| c.iter().map(|&v| u32::from(v)).sum())
958            .collect();
959        Kv6 {
960            xsiz,
961            ysiz,
962            zsiz,
963            xpiv: 0.0,
964            ypiv: 0.0,
965            zpiv: 0.0,
966            voxels: flat,
967            xlen,
968            ylen,
969            palette: None,
970        }
971    }
972
973    fn offsets_consistent(m: &SpriteModel) -> bool {
974        let cols = (m.dims[0] * m.dims[1]) as usize;
975        if m.color_offsets.len() != cols + 1 {
976            return false;
977        }
978        // Monotonic non-decreasing + last == colors.len + each column's
979        // span == its solid-voxel count.
980        for w in m.color_offsets.windows(2) {
981            if w[1] < w[0] {
982                return false;
983            }
984        }
985        m.color_offsets[cols] as usize == m.colors.len()
986    }
987
988    #[test]
989    fn carve_two_layers_keeps_offsets_consistent() {
990        // Mirror the demo's carve: columns with voxels at varied z,
991        // some sharing z=0/z=1, some not.
992        let kv6 = kv6_from(
993            3,
994            2,
995            8,
996            &[
997                (0, 0, 0, 0xA0),
998                (0, 0, 1, 0xA1),
999                (0, 0, 5, 0xA5),
1000                (1, 0, 1, 0xB1),
1001                (2, 1, 0, 0xC0),
1002                (2, 1, 3, 0xC3),
1003            ],
1004        );
1005        let mut m = build_sprite_model(&kv6);
1006        assert!(offsets_consistent(&m));
1007        for z in 0..2u32 {
1008            for y in 0..m.dims[1] {
1009                for x in 0..m.dims[0] {
1010                    m.set_voxel(x, y, z, None);
1011                }
1012            }
1013            assert!(offsets_consistent(&m), "inconsistent after carving z={z}");
1014            // downsample must not panic on the carved model.
1015            let _ = m.downsample();
1016        }
1017    }
1018
1019    #[test]
1020    fn set_voxel_inserts_replaces_and_clears() {
1021        // col 0 starts with z=1 (0xBB), z=5 (0xAA); col 1 with z=3 (0xCC).
1022        let mut m = build_sprite_model(&kv6_unsorted());
1023
1024        // Insert z=3 into col 0 (between z=1 and z=5) → rank 1.
1025        assert!(m.set_voxel(0, 0, 3, Some(0x55)));
1026        assert_eq!(m.occupancy[0], (1 << 1) | (1 << 3) | (1 << 5));
1027        // col 0 colours ascending z: 0xBB(z1), 0x55(z3), 0xAA(z5).
1028        assert_eq!(m.color_offsets, vec![0, 3, 4]);
1029        assert_eq!(&m.colors, &[0xBB, 0x55, 0xAA, 0xCC]);
1030
1031        // Replace z=3 in place (no offset shift).
1032        assert!(m.set_voxel(0, 0, 3, Some(0x66)));
1033        assert_eq!(&m.colors, &[0xBB, 0x66, 0xAA, 0xCC]);
1034        assert_eq!(m.color_offsets, vec![0, 3, 4]);
1035
1036        // Clear z=1 (rank 0) from col 0.
1037        assert!(m.set_voxel(0, 0, 1, None));
1038        assert_eq!(m.occupancy[0], (1 << 3) | (1 << 5));
1039        assert_eq!(m.color_offsets, vec![0, 2, 3]);
1040        assert_eq!(&m.colors, &[0x66, 0xAA, 0xCC]);
1041
1042        // No-ops: clear an empty voxel, edit out of bounds.
1043        assert!(!m.set_voxel(0, 0, 2, None));
1044        assert!(!m.set_voxel(9, 0, 0, Some(1)));
1045    }
1046
1047    #[test]
1048    fn rebuild_lod_refreshes_coarse_levels_from_mip0() {
1049        let mut reg = SpriteModelRegistry::new();
1050        let id = reg.add_lod(build_sprite_model(&kv6_unsorted()), 3);
1051        // Recolour mip-0 only via model_mut, then rebuild the ladder.
1052        reg.model_mut(id).recolor(|_| 0x0000_2000);
1053        reg.rebuild_lod(id);
1054        // The mip-1 average of all-0x2000 voxels is still 0x2000.
1055        let lvl1_entry = reg.chains[id as usize][1] as usize;
1056        assert!(reg.entries[lvl1_entry]
1057            .colors
1058            .iter()
1059            .all(|&c| c == 0x0000_2000));
1060    }
1061}