Skip to main content

roxlap_gpu/
sprite_model.rs

1//! GPU.10 — KV6 sprite as a DDA-marchable voxel model.
2//!
3//! Unlike the GPU.9 splatter (one thread per voxel, screen-space
4//! squares, overdraw + atomic contention), a sprite model is a small
5//! voxel volume the precise ray-DDA marches one ray per pixel —
6//! crisp, correct occlusion, no overdraw. This is the GPU.10.0 single
7//! sprite; instancing + tiling + LOD come in later sub-substages.
8//!
9//! The volume reuses the chunk occupancy/colour scheme but sized to
10//! the KV6 bbox: per-column occupancy bitmask (`occ_words_per_col`
11//! u32s, `CHUNK_Z`-style 32-bits-per-word), a flat colour array in
12//! ascending-z order per column, and a `color_offsets` prefix table.
13//! The shader finds a voxel's colour by `offset[col] + popcount(bits
14//! below z)`, so colours MUST be ascending-z (we sort per column).
15
16#![allow(
17    clippy::cast_precision_loss,
18    clippy::cast_possible_truncation,
19    clippy::cast_possible_wrap,
20    clippy::cast_sign_loss,
21    clippy::many_single_char_names,
22    clippy::similar_names
23)]
24
25use bytemuck::{Pod, Zeroable};
26use roxlap_formats::kv6::Kv6;
27use roxlap_formats::sprite::Sprite;
28
29/// CPU-built voxel volume for one KV6 model.
30#[derive(Debug, Clone)]
31pub struct SpriteModel {
32    /// Voxel extent `(mx, my, mz)`.
33    pub dims: [u32; 3],
34    /// `ceil(mz / 32)` — u32 words of occupancy per (x, y) column.
35    pub occ_words_per_col: u32,
36    /// KV6 pivot in model-local voxel space.
37    pub pivot: [f32; 3],
38    /// Per-column occupancy bitmask, `mx * my * occ_words_per_col`.
39    pub occupancy: Vec<u32>,
40    /// Voxel colours, ascending z within each column.
41    pub colors: Vec<u32>,
42    /// Per-voxel surface-normal index (`Kv6::Voxel::dir`, 0..256),
43    /// parallel to [`colors`](Self::colors). The GPU sprite shader uses
44    /// it to index the per-instance `kv6colmul` lighting table, matching
45    /// the CPU rasteriser's normal-based shading.
46    pub dirs: Vec<u32>,
47    /// Prefix sums: `color_offsets[col]` is the first colour index of
48    /// column `col`; length `mx * my + 1`.
49    pub color_offsets: Vec<u32>,
50    /// World-space size of one voxel of this model (GPU.10.4 LOD): 1.0
51    /// at mip-0, doubling each [`SpriteModel::downsample`]. The shader
52    /// divides the local ray by this so a coarse voxel spans the right
53    /// world extent and the march `t` stays in world units.
54    pub voxel_world_size: f32,
55}
56
57/// Build the DDA volume from a KV6. Columns are packed in
58/// `x + y*mx` order; each column's voxels are sorted ascending by z
59/// so the shader's popcount-rank colour lookup is correct.
60///
61/// # Panics
62/// If the KV6's `ylen` counters disagree with `voxels.len()` (a
63/// malformed model).
64#[must_use]
65pub fn build_sprite_model(kv6: &Kv6) -> SpriteModel {
66    let (mx, my, mz) = (kv6.xsiz, kv6.ysiz, kv6.zsiz);
67    let occ_words_per_col = mz.div_ceil(32).max(1);
68    let cols = (mx * my) as usize;
69
70    let mut occupancy = vec![0u32; cols * occ_words_per_col as usize];
71    let mut color_offsets = vec![0u32; cols + 1];
72    let mut colors: Vec<u32> = Vec::with_capacity(kv6.voxels.len());
73    let mut dirs: Vec<u32> = Vec::with_capacity(kv6.voxels.len());
74
75    // Pass 1 — consume voxels in KV6 storage order (x-outer / y-inner)
76    // into per-column buckets keyed by `col = x + y*mx`. Each entry is
77    // `(z, colour, normal-dir)`.
78    let mut buckets: Vec<Vec<(u16, u32, u8)>> = vec![Vec::new(); cols];
79    let mut voxel_iter = kv6.voxels.iter();
80    for x in 0..mx {
81        for y in 0..my {
82            let col = (x + y * mx) as usize;
83            let count = kv6.ylen[x as usize][y as usize];
84            for _ in 0..count {
85                let v = voxel_iter.next().expect("KV6 ylen / voxels.len mismatch");
86                buckets[col].push((v.z, v.col, v.dir));
87            }
88        }
89    }
90
91    // Pass 2 — emit in COLUMN-INDEX order so `color_offsets` is a true
92    // monotonic prefix sum (the shader indexes by `col` either way, but
93    // structural edits / mip rebuilds rely on monotonic offsets). Each
94    // column's voxels sorted ascending z for the popcount-rank lookup.
95    for (col, bucket) in buckets.iter_mut().enumerate() {
96        color_offsets[col] = colors.len() as u32;
97        bucket.sort_by_key(|(z, _, _)| *z);
98        for &(z, col_rgba, dir) in bucket.iter() {
99            let z = u32::from(z);
100            let base = col * occ_words_per_col as usize + (z >> 5) as usize;
101            occupancy[base] |= 1u32 << (z & 31);
102            colors.push(col_rgba);
103            dirs.push(u32::from(dir));
104        }
105    }
106    color_offsets[cols] = colors.len() as u32;
107
108    SpriteModel {
109        dims: [mx, my, mz],
110        occ_words_per_col,
111        pivot: [kv6.xpiv, kv6.ypiv, kv6.zpiv],
112        occupancy,
113        color_offsets,
114        colors,
115        dirs,
116        voxel_world_size: 1.0,
117    }
118}
119
120/// Per-instance transform consumed by the model-DDA shader: the
121/// inverse model→world rotation (so a world ray can be brought into
122/// model-local space) plus the instance's world position. Stored as
123/// three padded columns for std140/std430 (`mat3x3` 16-byte columns).
124#[repr(C)]
125#[derive(Clone, Copy, Pod, Zeroable, Debug)]
126pub struct SpriteInstanceTransform {
127    /// Inverse of `[s | h | f]`, column-major, each column padded to
128    /// `vec4`. `inv_rot * v = c0*v.x + c1*v.y + c2*v.z`.
129    pub inv_rot: [[f32; 4]; 3],
130    /// Instance world position (the KV6 pivot maps here).
131    pub pos: [f32; 3],
132    _pad: f32,
133}
134
135impl SpriteInstanceTransform {
136    /// Build from a sprite pose. `s/h/f` are the model→world basis
137    /// columns; we invert them so the shader can map world→local.
138    #[must_use]
139    pub fn from_sprite(sprite: &Sprite) -> Self {
140        let inv = mat3_inverse([sprite.s, sprite.h, sprite.f]);
141        Self {
142            inv_rot: [
143                [inv[0][0], inv[0][1], inv[0][2], 0.0],
144                [inv[1][0], inv[1][1], inv[1][2], 0.0],
145                [inv[2][0], inv[2][1], inv[2][2], 0.0],
146            ],
147            pos: sprite.p,
148            _pad: 0.0,
149        }
150    }
151}
152
153/// A registry of sprite models. Instances reference a model by
154/// `model_id`, which is a **LOD chain** id: each chain holds one or
155/// more concrete mip levels (finest first; GPU.10.4), and the renderer
156/// picks the level per instance by distance. Identical KV6s are added
157/// once and shared by many instances. **Copy-on-modify**:
158/// [`Self::fork`] deep-copies a chain so edits to the fork leave the
159/// parent (and its instances) intact.
160#[derive(Debug, Clone, Default)]
161pub struct SpriteModelRegistry {
162    /// Concrete mip-level volumes (the GPU buffers concatenate these).
163    entries: Vec<SpriteModel>,
164    /// `chains[model_id]` = entry ids, finest (mip-0) first.
165    chains: Vec<Vec<u32>>,
166}
167
168impl SpriteModelRegistry {
169    #[must_use]
170    pub fn new() -> Self {
171        Self::default()
172    }
173
174    fn push_entry(&mut self, model: SpriteModel) -> u32 {
175        let id = self.entries.len() as u32;
176        self.entries.push(model);
177        id
178    }
179
180    /// Register a single-level (no-LOD) model; returns its `model_id`.
181    pub fn add(&mut self, model: SpriteModel) -> u32 {
182        let e = self.push_entry(model);
183        let id = self.chains.len() as u32;
184        self.chains.push(vec![e]);
185        id
186    }
187
188    /// Register a model with up to `max_levels` LOD mips (each a 2×
189    /// [`SpriteModel::downsample`] of the previous; stops early once a
190    /// level collapses to 1³). Returns its `model_id`.
191    pub fn add_lod(&mut self, model: SpriteModel, max_levels: u32) -> u32 {
192        let mut levels = vec![self.push_entry(model.clone())];
193        let mut cur = model;
194        for _ in 1..max_levels.max(1) {
195            if cur.dims == [1, 1, 1] {
196                break;
197            }
198            cur = cur.downsample();
199            levels.push(self.push_entry(cur.clone()));
200        }
201        let id = self.chains.len() as u32;
202        self.chains.push(levels);
203        id
204    }
205
206    /// Copy-on-modify: deep-copy every level of chain `parent` into new
207    /// entries + a new chain, and return its `model_id`. The fork owns
208    /// independent voxel data, so mutating it does not affect the
209    /// parent or any instance still pointing at it.
210    ///
211    /// # Panics
212    /// If `parent` is not a registered `model_id`.
213    pub fn fork(&mut self, parent: u32) -> u32 {
214        let src = self.chains[parent as usize].clone();
215        let levels: Vec<u32> = src
216            .iter()
217            .map(|&e| {
218                let copy = self.entries[e as usize].clone();
219                self.push_entry(copy)
220            })
221            .collect();
222        let id = self.chains.len() as u32;
223        self.chains.push(levels);
224        id
225    }
226
227    /// The finest (mip-0) model of chain `id`.
228    #[must_use]
229    pub fn model(&self, id: u32) -> &SpriteModel {
230        &self.entries[self.chains[id as usize][0] as usize]
231    }
232
233    /// Mutable access to the finest (mip-0) model for editing — the
234    /// copy-on-modify entry point (typically on a [`Self::fork`]).
235    /// After a *structural* edit (occupancy/dims), call
236    /// [`Self::rebuild_lod`] so the coarser mips match; a pure recolour
237    /// can use [`Self::recolor_chain`] instead.
238    pub fn model_mut(&mut self, id: u32) -> &mut SpriteModel {
239        let e = self.chains[id as usize][0] as usize;
240        &mut self.entries[e]
241    }
242
243    /// Recolour every LOD level of chain `id` (so a forked tint shows
244    /// at all distances).
245    pub fn recolor_chain(&mut self, id: u32, f: impl Fn(u32) -> u32 + Copy) {
246        for li in 0..self.chains[id as usize].len() {
247            let e = self.chains[id as usize][li] as usize;
248            self.entries[e].recolor(f);
249        }
250    }
251
252    /// Regenerate chain `id`'s coarser mip levels from its (possibly
253    /// just-edited) mip-0. Run after a structural edit via
254    /// [`Self::model_mut`] so the LOD ladder stays consistent. No-op
255    /// for a single-level (no-LOD) chain.
256    pub fn rebuild_lod(&mut self, id: u32) {
257        let levels = self.chains[id as usize].clone();
258        if levels.len() <= 1 {
259            return;
260        }
261        let mut cur = self.entries[levels[0] as usize].clone();
262        for &e in &levels[1..] {
263            cur = cur.downsample();
264            self.entries[e as usize] = cur.clone();
265        }
266    }
267
268    /// Free chain `chain_id`'s voxel data **in place**: replace each of
269    /// its LOD entries with [`SpriteModel::empty`] and clear the chain.
270    /// Entry ids and every other `model_id` are **preserved** (the chain
271    /// becomes empty, its entries become placeholders), so no id remap is
272    /// needed and the resident registry's entry alignment stays intact.
273    ///
274    /// This is safe to pair with the resident side because
275    /// [`SpriteRegistryResident::remove_model`] tombstones the same
276    /// entries (`dead[e]`) and [`compact`](SpriteRegistryResident::compact)
277    /// reads only live entries — so the resident never touches the empty
278    /// placeholders left here. Call `remove_model` (resident) **before**
279    /// this so those tombstones are set. No-op if `chain_id` is out of
280    /// range or already removed.
281    pub fn remove(&mut self, chain_id: u32) {
282        let Some(entries) = self.chains.get(chain_id as usize) else {
283            return;
284        };
285        // Clone the small id list so we can mutate `entries` while iterating.
286        let entries = entries.clone();
287        for e in entries {
288            self.entries[e as usize] = SpriteModel::empty();
289        }
290        self.chains[chain_id as usize] = Vec::new(); // tombstone (slot kept)
291    }
292
293    /// Whether `chain_id` is a live (registered, not [`removed`](Self::remove))
294    /// model. `false` for an out-of-range id or a tombstoned chain.
295    #[must_use]
296    pub fn is_live(&self, chain_id: u32) -> bool {
297        self.chains
298            .get(chain_id as usize)
299            .is_some_and(|c| !c.is_empty())
300    }
301
302    /// Number of LOD chains (distinct `model_id`s). Counts tombstoned
303    /// (removed) chains too — ids are never reused, so this is also the
304    /// next id that [`Self::add`] / [`Self::add_lod`] will mint.
305    #[must_use]
306    pub fn len(&self) -> usize {
307        self.chains.len()
308    }
309
310    #[must_use]
311    pub fn is_empty(&self) -> bool {
312        self.chains.is_empty()
313    }
314}
315
316impl SpriteModel {
317    /// An empty (zero-voxel, zero-extent) placeholder model. Used by
318    /// [`SpriteModelRegistry::remove`] to free a removed chain's voxel
319    /// data while keeping its entry slot, so ids stay stable. Carries no
320    /// occupancy/colours; `color_offsets` is the single-element prefix
321    /// `[0]` (`cols + 1` with `cols == 0`), keeping the structural
322    /// invariant intact for any code that inspects it.
323    #[must_use]
324    pub fn empty() -> Self {
325        Self {
326            dims: [0, 0, 0],
327            occ_words_per_col: 1,
328            pivot: [0.0, 0.0, 0.0],
329            occupancy: Vec::new(),
330            colors: Vec::new(),
331            dirs: Vec::new(),
332            color_offsets: vec![0],
333            voxel_world_size: 1.0,
334        }
335    }
336
337    /// Recolour every voxel via `f(old_rgba) -> new_rgba`. Structure
338    /// (occupancy / offsets) is untouched, so this is a cheap in-place
339    /// edit — handy on a [`SpriteModelRegistry::fork`] to make a tinted
340    /// variant. For structural edits, mutate the public occupancy /
341    /// colours / dims directly (via `model_mut`) then rebuild the LOD.
342    pub fn recolor(&mut self, f: impl Fn(u32) -> u32) {
343        for c in &mut self.colors {
344            *c = f(*c);
345        }
346    }
347
348    /// GPU.12 — structural edit of a single voxel within the model's
349    /// existing bounds. `Some(rgba)` sets/replaces the voxel at
350    /// `(x, y, z)`; `None` clears it. Maintains the ascending-z colour
351    /// invariant by inserting/removing at the voxel's popcount rank and
352    /// shifting the affected columns' `color_offsets`. Returns `true`
353    /// if the model changed. Out-of-bounds coordinates are ignored
354    /// (returns `false`) — growing `dims` is a separate concern.
355    ///
356    /// After editing, call [`SpriteModelRegistry::rebuild_lod`] to
357    /// refresh coarser mips, then re-upload via `set_sprite_instances`.
358    pub fn set_voxel(&mut self, x: u32, y: u32, z: u32, color: Option<u32>) -> bool {
359        if x >= self.dims[0] || y >= self.dims[1] || z >= self.dims[2] {
360            return false;
361        }
362        let owpc = self.occ_words_per_col as usize;
363        let cols = (self.dims[0] * self.dims[1]) as usize;
364        let col = (x + y * self.dims[0]) as usize;
365        let base = col * owpc;
366        let zw = (z >> 5) as usize;
367        let zb = z & 31;
368
369        // Rank = solid voxels strictly below z in this column.
370        let mut rank = 0usize;
371        for w in 0..zw {
372            rank += self.occupancy[base + w].count_ones() as usize;
373        }
374        let below_mask = if zb > 0 { (1u32 << zb) - 1 } else { 0 };
375        rank += (self.occupancy[base + zw] & below_mask).count_ones() as usize;
376        let idx = self.color_offsets[col] as usize + rank;
377        let was_set = (self.occupancy[base + zw] >> zb) & 1 == 1;
378
379        if let Some(rgba) = color {
380            if was_set {
381                self.colors[idx] = rgba; // replace in place (keeps dir)
382            } else {
383                self.occupancy[base + zw] |= 1u32 << zb;
384                self.colors.insert(idx, rgba);
385                // No normal supplied by this API — default to dir 0 (the
386                // sole caller, the carve hotkey, only ever clears).
387                self.dirs.insert(idx, 0);
388                for c in &mut self.color_offsets[col + 1..=cols] {
389                    *c += 1;
390                }
391            }
392            true
393        } else {
394            if !was_set {
395                return false;
396            }
397            self.occupancy[base + zw] &= !(1u32 << zb);
398            self.colors.remove(idx);
399            self.dirs.remove(idx);
400            for c in &mut self.color_offsets[col + 1..=cols] {
401                *c -= 1;
402            }
403            true
404        }
405    }
406
407    /// Radius of a bounding sphere centred at the instance position
408    /// (the pivot maps there): the farthest bbox corner from the
409    /// pivot. Used for frustum culling. Assumes a unit basis; scaled
410    /// instances would multiply this by their max basis length.
411    #[must_use]
412    pub fn bound_radius(&self) -> f32 {
413        let mut r2 = 0.0_f32;
414        for &cx in &[0.0, self.dims[0] as f32] {
415            for &cy in &[0.0, self.dims[1] as f32] {
416                for &cz in &[0.0, self.dims[2] as f32] {
417                    let d = [cx - self.pivot[0], cy - self.pivot[1], cz - self.pivot[2]];
418                    r2 = r2.max(d[0] * d[0] + d[1] * d[1] + d[2] * d[2]);
419                }
420            }
421        }
422        r2.sqrt()
423    }
424
425    /// GPU.10.4 — 2× voxel downsample for the next LOD level. A coarse
426    /// voxel is solid if any of its 2×2×2 fine voxels is, coloured by
427    /// their per-channel average. Dims/pivot halve and
428    /// `voxel_world_size` doubles, so the coarse model occupies the
429    /// same world box at half the resolution (origin-corner aligned).
430    #[must_use]
431    #[allow(clippy::manual_checked_ops)] // `n > 0` guards 4 divisions, not one checked_div
432    pub fn downsample(&self) -> SpriteModel {
433        let [fx, fy, fz] = self.dims;
434        let fidx = |x: u32, y: u32, z: u32| (x + y * fx + z * fx * fy) as usize;
435
436        // Reconstruct dense fine voxels (solid flag + colour + normal).
437        let mut solid = vec![false; (fx * fy * fz) as usize];
438        let mut fine = vec![0u32; (fx * fy * fz) as usize];
439        let mut fine_dir = vec![0u32; (fx * fy * fz) as usize];
440        for x in 0..fx {
441            for y in 0..fy {
442                let col = (x + y * fx) as usize;
443                let base = col * self.occ_words_per_col as usize;
444                let off = self.color_offsets[col] as usize;
445                let mut seen = 0usize;
446                for z in 0..fz {
447                    let w = base + (z >> 5) as usize;
448                    if (self.occupancy[w] >> (z & 31)) & 1 == 1 {
449                        fine[fidx(x, y, z)] = self.colors[off + seen];
450                        fine_dir[fidx(x, y, z)] = self.dirs[off + seen];
451                        solid[fidx(x, y, z)] = true;
452                        seen += 1;
453                    }
454                }
455            }
456        }
457
458        let nx = fx.div_ceil(2).max(1);
459        let ny = fy.div_ceil(2).max(1);
460        let nz = fz.div_ceil(2).max(1);
461        let owpc = nz.div_ceil(32).max(1);
462        let cols = (nx * ny) as usize;
463        let mut occupancy = vec![0u32; cols * owpc as usize];
464        let mut color_offsets = vec![0u32; cols + 1];
465        let mut colors: Vec<u32> = Vec::new();
466        let mut dirs: Vec<u32> = Vec::new();
467
468        // Emit in column-index order (`ccol = cx + cy*nx`), cy outer,
469        // so `color_offsets` is a monotonic prefix sum like build's.
470        for cy in 0..ny {
471            for cx in 0..nx {
472                let ccol = (cx + cy * nx) as usize;
473                color_offsets[ccol] = colors.len() as u32;
474                for cz in 0..nz {
475                    let (mut a, mut r, mut g, mut b, mut n) = (0u32, 0u32, 0u32, 0u32, 0u32);
476                    // Normals don't average meaningfully — keep the first
477                    // solid child's `dir` as the coarse voxel's normal.
478                    let mut rep_dir = 0u32;
479                    for dz in 0..2 {
480                        for dy in 0..2 {
481                            for dx in 0..2 {
482                                let (x, y, z) = (2 * cx + dx, 2 * cy + dy, 2 * cz + dz);
483                                if x < fx && y < fy && z < fz && solid[fidx(x, y, z)] {
484                                    let c = fine[fidx(x, y, z)];
485                                    if n == 0 {
486                                        rep_dir = fine_dir[fidx(x, y, z)];
487                                    }
488                                    a += (c >> 24) & 0xff;
489                                    r += (c >> 16) & 0xff;
490                                    g += (c >> 8) & 0xff;
491                                    b += c & 0xff;
492                                    n += 1;
493                                }
494                            }
495                        }
496                    }
497                    if n > 0 {
498                        let avg = ((a / n) << 24) | ((r / n) << 16) | ((g / n) << 8) | (b / n);
499                        let base = ccol * owpc as usize + (cz >> 5) as usize;
500                        occupancy[base] |= 1u32 << (cz & 31);
501                        colors.push(avg);
502                        dirs.push(rep_dir);
503                    }
504                }
505            }
506        }
507        color_offsets[cols] = colors.len() as u32;
508
509        SpriteModel {
510            dims: [nx, ny, nz],
511            occ_words_per_col: owpc,
512            pivot: [
513                self.pivot[0] * 0.5,
514                self.pivot[1] * 0.5,
515                self.pivot[2] * 0.5,
516            ],
517            occupancy,
518            colors,
519            dirs,
520            color_offsets,
521            voxel_world_size: self.voxel_world_size * 2.0,
522        }
523    }
524}
525
526/// View frustum for CPU instance culling, in world space. Built each
527/// frame from the world camera. `half_w`/`half_h` are the tangents of
528/// the half-FOV (so the side planes are `|x| <= half_w * z` etc. in
529/// camera space).
530#[derive(Clone, Copy, Debug)]
531pub struct ViewFrustum {
532    pub pos: [f32; 3],
533    pub right: [f32; 3],
534    pub down: [f32; 3],
535    pub forward: [f32; 3],
536    pub half_w: f32,
537    pub half_h: f32,
538    pub far: f32,
539}
540
541/// CPU cull record: the GPU instance + its world bounding sphere.
542/// Not `Copy` — carries a boxed 256-entry `kv6colmul` table.
543#[derive(Clone)]
544struct CullInstance {
545    /// Instance transform + a placeholder `model_id`; the cull
546    /// overwrites `model_id` with the distance-chosen LOD entry.
547    gpu: SpriteInstanceGpu,
548    /// LOD chain this instance draws (the user-facing `model_id`).
549    chain_id: u32,
550    center: [f32; 3],
551    radius: f32,
552    /// voxlap `kv6colmul[256]` — per-surface-normal colour modulation
553    /// for this instance's pose + lighting. Defaults to identity
554    /// (`0x0100` in every channel lane → unshaded) until the facade sets
555    /// it via [`SpriteRegistryResident::set_instance_colmul`]. Packed
556    /// into the `colmul` GPU buffer (in visible order) each frame.
557    colmul: Box<[u64; 256]>,
558}
559
560/// Identity `kv6colmul` table: every channel lane = `0x0100`, so the
561/// shader's `(rgb[c] << 8) * 0x0100 >> 16 == rgb[c]` — i.e. no shading.
562fn identity_colmul() -> Box<[u64; 256]> {
563    const LANE: u64 = 0x0100;
564    let w = LANE | (LANE << 16) | (LANE << 32) | (LANE << 48);
565    Box::new([w; 256])
566}
567
568fn dot3(a: [f32; 3], b: [f32; 3]) -> f32 {
569    a[0] * b[0] + a[1] * b[1] + a[2] * b[2]
570}
571
572/// Build one CPU cull record from a user [`SpriteInstance`]: pack the
573/// transform, seed the bounding sphere from the chain's finest model, and
574/// start `colmul` at identity. Shared by the full
575/// [`SpriteRegistryResident::upload`] and the incremental
576/// [`SpriteRegistryResident::append_instances`].
577fn make_cull(registry: &SpriteModelRegistry, i: &SpriteInstance) -> CullInstance {
578    CullInstance {
579        gpu: SpriteInstanceGpu {
580            inv_rot0: i.transform.inv_rot[0],
581            inv_rot1: i.transform.inv_rot[1],
582            inv_rot2: i.transform.inv_rot[2],
583            pos: i.transform.pos,
584            model_id: i.model_id, // placeholder; cull rewrites per frame
585        },
586        chain_id: i.model_id,
587        center: i.transform.pos,
588        radius: registry.model(i.model_id).bound_radius(),
589        colmul: identity_colmul(),
590    }
591}
592
593/// Allocate the `instances` capacity buffer (`STORAGE | COPY_DST`) sized
594/// for `cap` records (≥1). Left uninitialised — `cull_bin_upload`
595/// rewrites it (offset 0) each frame, and `append_instances` seeds the
596/// live records after a grow.
597fn instances_buffer(device: &wgpu::Device, cap: u32) -> wgpu::Buffer {
598    device.create_buffer(&wgpu::BufferDescriptor {
599        label: Some("roxlap-gpu sprite_reg.instances"),
600        size: u64::from(cap.max(1)) * std::mem::size_of::<SpriteInstanceGpu>() as u64,
601        usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
602        mapped_at_creation: false,
603    })
604}
605
606/// One sprite instance: a model reference + world pose.
607#[derive(Debug, Clone, Copy)]
608pub struct SpriteInstance {
609    pub model_id: u32,
610    pub transform: SpriteInstanceTransform,
611}
612
613/// GPU per-model metadata: where this model's data starts in the
614/// shared registry buffers + its dims/pivot. Mirrors `ModelMeta` in
615/// the shader (std430, 48 bytes).
616#[repr(C)]
617#[derive(Clone, Copy, Pod, Zeroable, Debug)]
618struct SpriteModelMeta {
619    occupancy_offset: u32,
620    colors_offset: u32,
621    color_offsets_offset: u32,
622    occ_words_per_col: u32,
623    dims: [u32; 3],
624    _pad0: u32,
625    pivot: [f32; 3],
626    /// GPU.10.4 — world size of one voxel of this (mip) entry.
627    voxel_world_size: f32,
628}
629
630/// GPU per-instance record. Mirrors `Instance` in the shader (std430,
631/// 64 bytes): inverse rotation columns + position + model id.
632#[repr(C)]
633#[derive(Clone, Copy, Pod, Zeroable, Debug)]
634struct SpriteInstanceGpu {
635    inv_rot0: [f32; 4],
636    inv_rot1: [f32; 4],
637    inv_rot2: [f32; 4],
638    pos: [f32; 3],
639    model_id: u32,
640}
641
642/// Invert a 3×3 matrix given as basis columns `[c0, c1, c2]`,
643/// returning the inverse as columns. For an orthonormal basis this is
644/// the transpose; the general path covers rotation + non-unit scale.
645#[must_use]
646fn mat3_inverse(cols: [[f32; 3]; 3]) -> [[f32; 3]; 3] {
647    let [a, b, c] = cols; // columns
648                          // Determinant via scalar triple product a · (b × c).
649    let cross = |u: [f32; 3], v: [f32; 3]| {
650        [
651            u[1] * v[2] - u[2] * v[1],
652            u[2] * v[0] - u[0] * v[2],
653            u[0] * v[1] - u[1] * v[0],
654        ]
655    };
656    let bc = cross(b, c);
657    let ca = cross(c, a);
658    let ab = cross(a, b);
659    let det = a[0] * bc[0] + a[1] * bc[1] + a[2] * bc[2];
660    let inv_det = if det.abs() < 1e-12 { 0.0 } else { 1.0 / det };
661    // Inverse rows are (b×c, c×a, a×b)/det; return as columns of the
662    // inverse, i.e. transpose of those rows.
663    [
664        [bc[0] * inv_det, ca[0] * inv_det, ab[0] * inv_det],
665        [bc[1] * inv_det, ca[1] * inv_det, ab[1] * inv_det],
666        [bc[2] * inv_det, ca[2] * inv_det, ab[2] * inv_det],
667    ]
668}
669
670/// GPU-resident registry + instances: every model's occupancy /
671/// colours / offsets concatenated into shared storage buffers, a
672/// per-model metadata table, and a capacity-sized instance buffer
673/// rewritten each frame with the frustum-visible subset (GPU.10.2).
674/// One bind group serves all models (same approach as the multi-grid
675/// scene).
676pub struct SpriteRegistryResident {
677    pub occupancy: wgpu::Buffer,
678    pub colors: wgpu::Buffer,
679    /// Per-voxel surface-normal index, concatenated across models in the
680    /// same layout as [`colors`](Self::colors). The shader indexes the
681    /// per-instance `kv6colmul` table by it.
682    pub dirs: wgpu::Buffer,
683    pub color_offsets: wgpu::Buffer,
684    pub model_meta: wgpu::Buffer,
685    /// Holds up to `instance_capacity` instances; the visible subset
686    /// is packed into `[0, count)` each frame by [`Self::cull_bin_upload`].
687    pub instances: wgpu::Buffer,
688    pub instance_capacity: u32,
689    /// Per-visible-instance `kv6colmul[256]` tables, packed in the same
690    /// order as the `instances` buffer each frame (two u32 per u64
691    /// entry: lanes 0|1 then 2|3). Sized `instance_capacity * 256 * 2`
692    /// u32; rewritten by [`Self::cull_bin_upload`].
693    pub colmul: wgpu::Buffer,
694    colmul_cap: u32,
695    /// GPU.10.3 — per-tile `(offset, count)` into `tile_instances`,
696    /// flat `2 * tiles_x * tiles_y` u32s. Grown to fit the screen.
697    pub tile_ranges: wgpu::Buffer,
698    tile_ranges_cap: u32,
699    /// GPU.10.3 — flat list of visible-instance indices grouped by
700    /// tile. Grown to fit the per-frame total.
701    pub tile_instances: wgpu::Buffer,
702    tile_instances_cap: u32,
703    /// CPU cull records (full set), with precomputed bounding spheres.
704    cull: Vec<CullInstance>,
705    /// GPU.10.4 — LOD chains: `chains[chain_id]` = entry ids, finest
706    /// first. The cull picks a level by distance and writes its entry
707    /// id into the packed instance's `model_id`.
708    chains: Vec<Vec<u32>>,
709    /// GPU.12 incremental — CPU mirror of the GPU `model_meta` table, one
710    /// per concrete entry. [`Self::update_model`] reads the fixed
711    /// occupancy/color_offsets bases from here and rewrites the changed
712    /// `colors_offset` on a relocation.
713    meta: Vec<SpriteModelMeta>,
714    /// GPU.12 incremental — per-entry placement of `colors`/`dirs` in the
715    /// shared buffers (drives both; same offsets/ranks). Lets an edit
716    /// re-upload one model's data without touching the others.
717    colors_alloc: ColorsAllocator,
718    /// Per-entry word length of the dims-fixed `occupancy` and
719    /// `color_offsets` arrays, kept so [`Self::update_model`] can assert a
720    /// carve never changed dims (which would invalidate the in-place
721    /// writes — growing dims is out of scope, handled by a full re-upload).
722    occ_lens: Vec<u32>,
723    coloff_lens: Vec<u32>,
724    /// Used / allocated words of the tightly-concatenated `occupancy`
725    /// buffer. `add_model` bump-appends at `occ_used`; when it would pass
726    /// `occ_cap` the buffer is grown (with slack) and rebuilt from the
727    /// registry. (`colors`/`dirs` track theirs in [`ColorsAllocator`].)
728    occ_used: u32,
729    occ_cap: u32,
730    /// Used / allocated words of the tightly-concatenated `color_offsets`
731    /// buffer — same growth scheme as `occ_*`.
732    coloff_used: u32,
733    coloff_cap: u32,
734    /// Allocated record count of the `model_meta` buffer; `add_model`
735    /// grows it (with slack) when the entry count passes it.
736    meta_cap: u32,
737    /// Per-entry tombstone: `true` once its model was removed
738    /// ([`Self::remove_model`]). Dead entries keep their `meta` slot (so
739    /// entry ids — and the caller's `chain_id`s — stay stable) but their
740    /// colours are freed for reuse and they contribute nothing to a
741    /// repack / [`Self::compact`]. Parallel to `meta`.
742    dead: Vec<bool>,
743}
744
745/// Which tightly-concatenated registry buffer [`SpriteRegistryResident::
746/// sync_concat`] is operating on.
747#[derive(Clone, Copy)]
748enum ConcatBuf {
749    Occupancy,
750    ColorOffsets,
751}
752
753/// The model's source array for a given [`ConcatBuf`] — a free fn (not a
754/// closure) so the returned borrow keeps `m`'s lifetime.
755fn concat_data(m: &SpriteModel, which: ConcatBuf) -> &[u32] {
756    match which {
757        ConcatBuf::Occupancy => &m.occupancy,
758        ConcatBuf::ColorOffsets => &m.color_offsets,
759    }
760}
761
762impl SpriteRegistryResident {
763    /// Concatenate `registry`'s models into shared buffers and prepare
764    /// `instances` for per-frame culling. Model-relative indices stay
765    /// as built; the shader adds each model's base offset from the
766    /// metadata table.
767    #[must_use]
768    pub fn upload(
769        device: &wgpu::Device,
770        registry: &SpriteModelRegistry,
771        instances: &[SpriteInstance],
772    ) -> Self {
773        // `occupancy` + `color_offsets` are dims-fixed → tightly
774        // concatenated (never grow on a carve). `colors` + `dirs` are
775        // variable → laid out by the suballocator with per-slot slack so
776        // an incremental edit can rewrite one model in place.
777        let entry_lens: Vec<u32> = registry
778            .entries
779            .iter()
780            .map(|m| m.colors.len() as u32)
781            .collect();
782        let colors_alloc = ColorsAllocator::new(&entry_lens);
783        let cap_total = colors_alloc.cap_total();
784
785        let mut all_occ: Vec<u32> = Vec::new();
786        let mut all_offsets: Vec<u32> = Vec::new();
787        let mut all_colors: Vec<u32> = vec![0; cap_total as usize];
788        let mut all_dirs: Vec<u32> = vec![0; cap_total as usize];
789        let mut meta: Vec<SpriteModelMeta> = Vec::with_capacity(registry.entries.len());
790        let mut occ_lens: Vec<u32> = Vec::with_capacity(registry.entries.len());
791        let mut coloff_lens: Vec<u32> = Vec::with_capacity(registry.entries.len());
792
793        // One meta + placed data per concrete (mip-level) entry.
794        for (e, m) in registry.entries.iter().enumerate() {
795            let slot = colors_alloc.slot(e);
796            meta.push(SpriteModelMeta {
797                occupancy_offset: all_occ.len() as u32,
798                colors_offset: slot.off,
799                color_offsets_offset: all_offsets.len() as u32,
800                occ_words_per_col: m.occ_words_per_col,
801                dims: m.dims,
802                _pad0: 0,
803                pivot: m.pivot,
804                voxel_world_size: m.voxel_world_size,
805            });
806            occ_lens.push(m.occupancy.len() as u32);
807            coloff_lens.push(m.color_offsets.len() as u32);
808            all_occ.extend_from_slice(&m.occupancy);
809            all_offsets.extend_from_slice(&m.color_offsets);
810            let off = slot.off as usize;
811            all_colors[off..off + m.colors.len()].copy_from_slice(&m.colors);
812            all_dirs[off..off + m.dirs.len()].copy_from_slice(&m.dirs);
813        }
814
815        // Per-instance cull records: sphere centred at the instance
816        // position, radius from the chain's finest (mip-0) model.
817        // `colmul` starts at identity (unshaded) until the facade sets
818        // per-instance lighting via `set_instance_colmul`.
819        let cull: Vec<CullInstance> = instances.iter().map(|i| make_cull(registry, i)).collect();
820
821        // Capacity buffer (COPY_DST so cull can rewrite it each frame),
822        // seeded with the full set so frame 0 is valid pre-cull.
823        let seed: Vec<SpriteInstanceGpu> = cull.iter().map(|c| c.gpu).collect();
824        let instances_buf = {
825            use wgpu::util::DeviceExt;
826            let one = [SpriteInstanceGpu::zeroed()];
827            let src: &[SpriteInstanceGpu] = if seed.is_empty() { &one } else { &seed };
828            device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
829                label: Some("roxlap-gpu sprite_reg.instances"),
830                contents: bytemuck::cast_slice(src),
831                usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
832            })
833        };
834
835        let tile_ranges = storage_dst_u32(device, "roxlap-gpu sprite_reg.tile_ranges", 1);
836        let tile_instances = storage_dst_u32(device, "roxlap-gpu sprite_reg.tile_instances", 1);
837        // colmul: 256 entries × 2 u32 per visible instance. Sized to the
838        // full instance set (worst case all visible); rewritten per frame.
839        let colmul_cap = (cull.len() as u32).max(1) * 256 * 2;
840        let colmul = storage_dst_u32(device, "roxlap-gpu sprite_reg.colmul", colmul_cap);
841        Self {
842            occupancy: storage_dst_u32_cap(
843                device,
844                "roxlap-gpu sprite_reg.occupancy",
845                &all_occ,
846                all_occ.len() as u32,
847            ),
848            colors: storage_dst_u32_cap(
849                device,
850                "roxlap-gpu sprite_reg.colors",
851                &all_colors,
852                cap_total,
853            ),
854            dirs: storage_dst_u32_cap(device, "roxlap-gpu sprite_reg.dirs", &all_dirs, cap_total),
855            color_offsets: storage_dst_u32_cap(
856                device,
857                "roxlap-gpu sprite_reg.color_offsets",
858                &all_offsets,
859                all_offsets.len() as u32,
860            ),
861            model_meta: storage_dst_pod(device, "roxlap-gpu sprite_reg.model_meta", &meta),
862            instances: instances_buf,
863            instance_capacity: cull.len() as u32,
864            colmul,
865            colmul_cap,
866            tile_ranges,
867            tile_ranges_cap: 1,
868            tile_instances,
869            tile_instances_cap: 1,
870            cull,
871            chains: registry.chains.clone(),
872            occ_used: all_occ.len() as u32,
873            occ_cap: all_occ.len() as u32,
874            coloff_used: all_offsets.len() as u32,
875            coloff_cap: all_offsets.len() as u32,
876            meta_cap: meta.len() as u32,
877            dead: vec![false; meta.len()],
878            meta,
879            colors_alloc,
880            occ_lens,
881            coloff_lens,
882        }
883    }
884
885    /// Number of resident instances (the cull set length).
886    #[must_use]
887    pub fn instance_count(&self) -> usize {
888        self.cull.len()
889    }
890
891    /// Append new instances **without** re-uploading any model volume —
892    /// the incremental counterpart to [`Self::upload`], for streaming
893    /// spawns (asteroids, projectiles, …). Returns the index of the first
894    /// appended instance; the block occupies `[base, base + N)`.
895    ///
896    /// The model volumes are untouched, so every appended instance must
897    /// reference a `model_id` (LOD chain) that was already present in the
898    /// `registry` passed to [`Self::upload`]. Registering a *new* model
899    /// still requires a full [`Self::upload`] (its voxels must be laid
900    /// into the shared buffers). `registry` here is only read for the new
901    /// instances' bound-sphere radii and must be the resident one.
902    ///
903    /// The `instances` GPU buffer is only *grown* here (power-of-two,
904    /// amortised O(1)); its contents are **not** written. [`Self::
905    /// cull_bin_upload`] rewrites the whole visible range from `cull` every
906    /// frame before the sprite pass reads it — exactly as for the static
907    /// instances — so appending only needs to extend `cull` and ensure
908    /// capacity. Writing the buffer here too caused a mid-frame
909    /// write-while-in-flight hazard on some drivers (a stray full-screen
910    /// flash on append). `colmul` likewise grows lazily in
911    /// `cull_bin_upload`. After a removal the capacity is not shrunk.
912    pub fn append_instances(
913        &mut self,
914        device: &wgpu::Device,
915        registry: &SpriteModelRegistry,
916        instances: &[SpriteInstance],
917    ) -> u32 {
918        let base = self.cull.len() as u32;
919        if instances.is_empty() {
920            return base;
921        }
922        for i in instances {
923            debug_assert!(
924                (i.model_id as usize) < self.chains.len(),
925                "append_instances: model_id {} not resident (run upload to register new models)",
926                i.model_id
927            );
928            self.cull.push(make_cull(registry, i));
929        }
930        let need = self.cull.len() as u32;
931        if need > self.instance_capacity {
932            // Grow power-of-two and recreate the buffer (the next frame's
933            // bind group picks up the new handle). No seed write — the
934            // per-frame cull_bin_upload populates it.
935            self.instance_capacity = need.next_power_of_two();
936            self.instances = instances_buffer(device, self.instance_capacity);
937        }
938        base
939    }
940
941    /// Remove the instance at `index` by swap-remove — O(1), no GPU work
942    /// (the next [`Self::cull_bin_upload`] repacks the visible set from
943    /// the shrunk cull list). Capacity is retained for reuse.
944    ///
945    /// Returns `Some(old_last)` when a different instance was moved into
946    /// `index` to fill the hole (its index changed from `old_last` to
947    /// `index` — callers holding instance handles must fix up that one),
948    /// or `None` if `index` was the last element or out of range. Because
949    /// this reorders, any [`Self::set_instance_colmul`] table set by
950    /// position should be re-applied after a removal.
951    pub fn remove_instance(&mut self, index: usize) -> Option<usize> {
952        if index >= self.cull.len() {
953            return None;
954        }
955        let last = self.cull.len() - 1;
956        self.cull.swap_remove(index);
957        (index != last).then_some(last)
958    }
959
960    /// Set the per-instance `kv6colmul[256]` lighting tables (voxlap's
961    /// `update_reflects` output), in the same order/length as the
962    /// instances passed to [`Self::upload`]. The next
963    /// [`Self::cull_bin_upload`] packs the visible subset to the GPU.
964    /// Instances beyond `tables.len()` keep their previous tables.
965    pub fn set_instance_colmul(&mut self, tables: &[[u64; 256]]) {
966        for (ci, t) in self.cull.iter_mut().zip(tables) {
967            ci.colmul.copy_from_slice(t);
968        }
969    }
970
971    /// Refresh instance poses in place from `instances` — for animated
972    /// sprites (e.g. KFA limbs re-posed each frame) — **without** any
973    /// model-volume re-upload. `instances` must match the set passed to
974    /// [`Self::upload`] in length + order; each keeps its `model_id`
975    /// (LOD chain) so only the transform + cull centre change. No GPU
976    /// write happens here: the next [`Self::cull_bin_upload`] re-uploads
977    /// the packed visible subset, as it already does every frame.
978    pub fn update_transforms(&mut self, instances: &[SpriteInstance]) {
979        debug_assert_eq!(
980            instances.len(),
981            self.cull.len(),
982            "update_transforms instance count must match upload"
983        );
984        for (ci, inst) in self.cull.iter_mut().zip(instances) {
985            ci.gpu.inv_rot0 = inst.transform.inv_rot[0];
986            ci.gpu.inv_rot1 = inst.transform.inv_rot[1];
987            ci.gpu.inv_rot2 = inst.transform.inv_rot[2];
988            ci.gpu.pos = inst.transform.pos;
989            // Bounding sphere follows the pivot; radius/chain unchanged.
990            ci.center = inst.transform.pos;
991        }
992    }
993
994    /// GPU.12 incremental — re-upload only the entries of LOD chain
995    /// `chain_id` after an in-place edit (carve / recolour) of its model,
996    /// **without** rebuilding the whole registry. `registry` must be the
997    /// same registry uploaded (same entry ids), with chain `chain_id`'s
998    /// entries already edited (`model_mut` + `rebuild_lod`).
999    ///
1000    /// For each entry: occupancy + color_offsets are dims-fixed, so they
1001    /// are written in place; colors + dirs (variable, parallel) go through
1002    /// the suballocator — written in place when they fit the slack,
1003    /// relocated (with a `model_meta` rewrite) when they outgrow it, and
1004    /// only when the buffer tail overflows are colors/dirs grown + the
1005    /// whole registry repacked. Instances / cull / colmul are untouched
1006    /// (a carve never moves an instance or grows its bounds) — that is the
1007    /// win over [`Self::upload`].
1008    ///
1009    /// # Panics (debug)
1010    /// If an entry's dims changed (occupancy / color_offsets length), which
1011    /// the in-place path can't absorb — growing dims needs a full
1012    /// re-upload via [`Self::upload`].
1013    pub fn update_model(
1014        &mut self,
1015        device: &wgpu::Device,
1016        queue: &wgpu::Queue,
1017        registry: &SpriteModelRegistry,
1018        chain_id: u32,
1019    ) {
1020        let entries = self.chains[chain_id as usize].clone();
1021        let mut grew = false;
1022        for &e in &entries {
1023            let e = e as usize;
1024            let m = &registry.entries[e];
1025
1026            // Dims-fixed arrays: assert unchanged, then write in place.
1027            debug_assert_eq!(
1028                m.occupancy.len() as u32,
1029                self.occ_lens[e],
1030                "update_model: entry {e} occupancy length changed (dims grew?)"
1031            );
1032            debug_assert_eq!(
1033                m.color_offsets.len() as u32,
1034                self.coloff_lens[e],
1035                "update_model: entry {e} color_offsets length changed (dims grew?)"
1036            );
1037            queue.write_buffer(
1038                &self.occupancy,
1039                u64::from(self.meta[e].occupancy_offset) * 4,
1040                bytemuck::cast_slice(&m.occupancy),
1041            );
1042            queue.write_buffer(
1043                &self.color_offsets,
1044                u64::from(self.meta[e].color_offsets_offset) * 4,
1045                bytemuck::cast_slice(&m.color_offsets),
1046            );
1047
1048            // Variable colors/dirs via the suballocator.
1049            let new_len = m.colors.len() as u32;
1050            match self.colors_alloc.place(e, new_len) {
1051                Some(off) => {
1052                    queue.write_buffer(
1053                        &self.colors,
1054                        u64::from(off) * 4,
1055                        bytemuck::cast_slice(&m.colors),
1056                    );
1057                    queue.write_buffer(
1058                        &self.dirs,
1059                        u64::from(off) * 4,
1060                        bytemuck::cast_slice(&m.dirs),
1061                    );
1062                    if self.meta[e].colors_offset != off {
1063                        // Relocated — rewrite this entry's meta record.
1064                        self.meta[e].colors_offset = off;
1065                        queue.write_buffer(
1066                            &self.model_meta,
1067                            (e * std::mem::size_of::<SpriteModelMeta>()) as u64,
1068                            bytemuck::bytes_of(&self.meta[e]),
1069                        );
1070                    }
1071                }
1072                None => grew = true,
1073            }
1074        }
1075
1076        // Buffer overflow on at least one entry → grow colors/dirs and
1077        // repack the WHOLE registry (rare; offsets for every entry move).
1078        if grew {
1079            self.grow_and_repack(device, queue, registry);
1080        }
1081    }
1082
1083    /// Grow the `colors`/`dirs` buffers and repack every entry compactly
1084    /// (with fresh slack) when an [`Self::update_model`] edit overflowed
1085    /// the buffer tail. Recreates both buffers (the next frame's bind
1086    /// group picks up the new handles) and rewrites every `model_meta`
1087    /// `colors_offset`. O(registry) but rare — logged so a growth burst
1088    /// is visible.
1089    fn grow_and_repack(
1090        &mut self,
1091        device: &wgpu::Device,
1092        queue: &wgpu::Queue,
1093        registry: &SpriteModelRegistry,
1094    ) {
1095        self.repack_colors_dirs(device, registry);
1096        // Every entry's colors_offset moved → rewrite the whole meta table.
1097        queue.write_buffer(&self.model_meta, 0, bytemuck::cast_slice(&self.meta));
1098    }
1099
1100    /// Repack `colors`/`dirs` compactly (with fresh slack) from the full
1101    /// `registry`, recreating both buffers and updating every CPU
1102    /// `meta[e].colors_offset`. Does **not** touch the GPU `model_meta`
1103    /// buffer — the caller writes it ([`Self::grow_and_repack`] writes the
1104    /// whole table; [`Self::add_model`] writes it once after all entries
1105    /// are placed). O(registry) but rare — logged so a growth burst is
1106    /// visible.
1107    fn repack_colors_dirs(&mut self, device: &wgpu::Device, registry: &SpriteModelRegistry) {
1108        // Dead (removed) entries collapse to 0 length so they reclaim no
1109        // space; live entries keep their colours.
1110        let new_lens: Vec<u32> = registry
1111            .entries
1112            .iter()
1113            .enumerate()
1114            .map(|(e, m)| {
1115                if self.dead[e] {
1116                    0
1117                } else {
1118                    m.colors.len() as u32
1119                }
1120            })
1121            .collect();
1122        self.colors_alloc.repack(&new_lens);
1123        let cap_total = self.colors_alloc.cap_total();
1124
1125        let mut all_colors = vec![0u32; cap_total as usize];
1126        let mut all_dirs = vec![0u32; cap_total as usize];
1127        for (e, m) in registry.entries.iter().enumerate() {
1128            if self.dead[e] {
1129                self.meta[e].colors_offset = 0;
1130                continue;
1131            }
1132            let off = self.colors_alloc.slot(e).off as usize;
1133            all_colors[off..off + m.colors.len()].copy_from_slice(&m.colors);
1134            all_dirs[off..off + m.dirs.len()].copy_from_slice(&m.dirs);
1135            self.meta[e].colors_offset = off as u32;
1136        }
1137        self.colors = storage_dst_u32_cap(
1138            device,
1139            "roxlap-gpu sprite_reg.colors",
1140            &all_colors,
1141            cap_total,
1142        );
1143        self.dirs = storage_dst_u32_cap(device, "roxlap-gpu sprite_reg.dirs", &all_dirs, cap_total);
1144        eprintln!("roxlap-gpu: sprite registry colors/dirs grew + repacked to {cap_total} words");
1145    }
1146
1147    /// Append a new model (its full LOD chain) to the resident registry
1148    /// **without** re-uploading the existing models' volumes — the
1149    /// incremental counterpart to a full [`Self::upload`], for streaming
1150    /// in new geometry (unique asteroids, generated meshes).
1151    ///
1152    /// Contract (mirrors [`Self::update_model`]): the caller owns the
1153    /// `SpriteModelRegistry`, has just appended this chain to it (e.g. via
1154    /// [`SpriteModelRegistry::add_lod`]), and passes the resulting
1155    /// `chain_id`. The chain's entries must be the registry's newest (ids
1156    /// `>= ` the resident entry count) — entries are append-only.
1157    ///
1158    /// The large `colors`/`dirs`/`occupancy`/`color_offsets` buffers carry
1159    /// slack and bump-append the new entries in place; a buffer that
1160    /// overflows is grown (with slack) and rebuilt once from the registry
1161    /// (amortised O(1) per add). The small `model_meta` table is rewritten
1162    /// each call. After this, [`Self::append_instances`] can reference the
1163    /// new `chain_id`.
1164    pub fn add_model(
1165        &mut self,
1166        device: &wgpu::Device,
1167        queue: &wgpu::Queue,
1168        registry: &SpriteModelRegistry,
1169        chain_id: u32,
1170    ) {
1171        let entries = registry.chains[chain_id as usize].clone();
1172        debug_assert_eq!(
1173            chain_id as usize,
1174            self.chains.len(),
1175            "add_model: chains must be appended in order"
1176        );
1177
1178        // CPU bookkeeping: assign each new entry a tight occ/coloff offset
1179        // and an allocator slot for colors/dirs. `need_colors_grow` marks
1180        // a slot that didn't fit → a colors/dirs repack below.
1181        let mut need_colors_grow = false;
1182        for &e in &entries {
1183            let e = e as usize;
1184            debug_assert_eq!(
1185                e,
1186                self.meta.len(),
1187                "add_model: entries must be appended in order"
1188            );
1189            let m = &registry.entries[e];
1190            let occ_off = self.occ_used;
1191            let coloff_off = self.coloff_used;
1192            self.occ_used += m.occupancy.len() as u32;
1193            self.coloff_used += m.color_offsets.len() as u32;
1194            let colors_off = match self.colors_alloc.push(m.colors.len() as u32) {
1195                Some(off) => off,
1196                None => {
1197                    need_colors_grow = true;
1198                    0 // placeholder; repack assigns the real offset
1199                }
1200            };
1201            self.meta.push(SpriteModelMeta {
1202                occupancy_offset: occ_off,
1203                colors_offset: colors_off,
1204                color_offsets_offset: coloff_off,
1205                occ_words_per_col: m.occ_words_per_col,
1206                dims: m.dims,
1207                _pad0: 0,
1208                pivot: m.pivot,
1209                voxel_world_size: m.voxel_world_size,
1210            });
1211            self.occ_lens.push(m.occupancy.len() as u32);
1212            self.coloff_lens.push(m.color_offsets.len() as u32);
1213            self.dead.push(false);
1214        }
1215        self.chains.push(entries.clone());
1216
1217        // occupancy + color_offsets: grow+rebuild on overflow, else write
1218        // the new tails in place.
1219        self.sync_concat(device, queue, registry, &entries, ConcatBuf::Occupancy);
1220        self.sync_concat(device, queue, registry, &entries, ConcatBuf::ColorOffsets);
1221
1222        // colors/dirs: repack on overflow (rebuilds both + every CPU
1223        // colors_offset), else write the new entries at their slots.
1224        if need_colors_grow {
1225            self.repack_colors_dirs(device, registry);
1226        } else {
1227            for &e in &entries {
1228                let e = e as usize;
1229                let m = &registry.entries[e];
1230                let off = u64::from(self.meta[e].colors_offset) * 4;
1231                queue.write_buffer(&self.colors, off, bytemuck::cast_slice(&m.colors));
1232                queue.write_buffer(&self.dirs, off, bytemuck::cast_slice(&m.dirs));
1233            }
1234        }
1235
1236        // model_meta: grow the record buffer if needed, then rewrite the
1237        // whole (small) table — covers both new records and any
1238        // colors_offset relocations from a repack.
1239        let count = self.meta.len() as u32;
1240        if count > self.meta_cap {
1241            self.meta_cap = grow_records(count);
1242            self.model_meta = storage_dst_pod_cap(
1243                device,
1244                "roxlap-gpu sprite_reg.model_meta",
1245                &self.meta,
1246                self.meta_cap,
1247            );
1248        } else {
1249            queue.write_buffer(&self.model_meta, 0, bytemuck::cast_slice(&self.meta));
1250        }
1251    }
1252
1253    /// Sync one tightly-concatenated buffer (`occupancy` or
1254    /// `color_offsets`) after `add_model` appended `new_entries`: if the
1255    /// used length now exceeds capacity, grow (with slack) and rebuild the
1256    /// whole buffer from the registry; otherwise write just the appended
1257    /// tails at their offsets.
1258    fn sync_concat(
1259        &mut self,
1260        device: &wgpu::Device,
1261        queue: &wgpu::Queue,
1262        registry: &SpriteModelRegistry,
1263        new_entries: &[u32],
1264        which: ConcatBuf,
1265    ) {
1266        let (used, cap) = match which {
1267            ConcatBuf::Occupancy => (self.occ_used, self.occ_cap),
1268            ConcatBuf::ColorOffsets => (self.coloff_used, self.coloff_cap),
1269        };
1270        if used > cap {
1271            let new_cap = grow_words(used);
1272            let all: Vec<u32> = registry
1273                .entries
1274                .iter()
1275                .flat_map(|m| concat_data(m, which).iter().copied())
1276                .collect();
1277            let label = match which {
1278                ConcatBuf::Occupancy => "roxlap-gpu sprite_reg.occupancy",
1279                ConcatBuf::ColorOffsets => "roxlap-gpu sprite_reg.color_offsets",
1280            };
1281            let buf = storage_dst_u32_cap(device, label, &all, new_cap);
1282            match which {
1283                ConcatBuf::Occupancy => {
1284                    self.occupancy = buf;
1285                    self.occ_cap = new_cap;
1286                }
1287                ConcatBuf::ColorOffsets => {
1288                    self.color_offsets = buf;
1289                    self.coloff_cap = new_cap;
1290                }
1291            }
1292        } else {
1293            let target = match which {
1294                ConcatBuf::Occupancy => &self.occupancy,
1295                ConcatBuf::ColorOffsets => &self.color_offsets,
1296            };
1297            for &e in new_entries {
1298                let e = e as usize;
1299                let off = match which {
1300                    ConcatBuf::Occupancy => self.meta[e].occupancy_offset,
1301                    ConcatBuf::ColorOffsets => self.meta[e].color_offsets_offset,
1302                };
1303                queue.write_buffer(
1304                    target,
1305                    u64::from(off) * 4,
1306                    bytemuck::cast_slice(concat_data(&registry.entries[e], which)),
1307                );
1308            }
1309        }
1310    }
1311
1312    /// Number of removed-but-not-yet-compacted models (tombstoned chains).
1313    /// A caller streams `add_model` / `remove_model` and calls
1314    /// [`Self::compact`] once this (relative to [`Self::live_model_count`])
1315    /// crosses a threshold.
1316    #[must_use]
1317    pub fn dead_model_count(&self) -> usize {
1318        self.chains.iter().filter(|c| c.is_empty()).count()
1319    }
1320
1321    /// Number of live (non-removed) models.
1322    #[must_use]
1323    pub fn live_model_count(&self) -> usize {
1324        self.chains.iter().filter(|c| !c.is_empty()).count()
1325    }
1326
1327    /// Remove a model (tombstone its LOD chain) — the counterpart to
1328    /// [`Self::add_model`]. O(chain length): marks the chain's entries
1329    /// dead and frees their `colors`/`dirs` slots for reuse by a later
1330    /// `add_model`. The `occupancy` / `color_offsets` holes are **not**
1331    /// reclaimed until [`Self::compact`]; entry ids (and the caller's other
1332    /// `chain_id`s) stay stable.
1333    ///
1334    /// Instances of the removed chain are **not** dropped here — they
1335    /// linger in the cull set but draw as nothing (skipped in
1336    /// [`Self::cull_bin_upload`]); the caller removes them via
1337    /// [`Self::remove_instance`] when convenient. A no-op if `chain_id` is
1338    /// out of range or already removed.
1339    pub fn remove_model(&mut self, chain_id: u32) {
1340        let Some(entries) = self.chains.get(chain_id as usize).cloned() else {
1341            return;
1342        };
1343        if entries.is_empty() {
1344            return; // already removed
1345        }
1346        for &e in &entries {
1347            let e = e as usize;
1348            self.dead[e] = true;
1349            self.colors_alloc.free(e);
1350        }
1351        self.chains[chain_id as usize] = Vec::new(); // tombstone
1352    }
1353
1354    /// Reclaim the holes left by [`Self::remove_model`]: rebuild the shared
1355    /// volume buffers from the live entries only, dropping every dead
1356    /// entry's data. Entry ids and `chain_id`s are preserved (dead entries
1357    /// keep a zero-length `meta` tombstone), so the caller's handles stay
1358    /// valid and no remap is needed.
1359    ///
1360    /// `registry` must be the resident one (entry ids 1:1, as for
1361    /// [`Self::add_model`] / [`Self::update_model`]). O(live volume) —
1362    /// call it when [`Self::dead_model_count`] is high, not every frame.
1363    pub fn compact(
1364        &mut self,
1365        device: &wgpu::Device,
1366        queue: &wgpu::Queue,
1367        registry: &SpriteModelRegistry,
1368    ) {
1369        // occupancy + color_offsets: re-pack live entries tightly, rewrite
1370        // each live entry's meta offset, zero the dead ones.
1371        self.compact_concat(device, registry, ConcatBuf::Occupancy);
1372        self.compact_concat(device, registry, ConcatBuf::ColorOffsets);
1373        // colors/dirs: the dead-aware repack already drops dead entries.
1374        self.repack_colors_dirs(device, registry);
1375        // model_meta: rewrite the (unchanged-length) table with the new
1376        // offsets. Buffer count didn't change, so no grow needed.
1377        queue.write_buffer(&self.model_meta, 0, bytemuck::cast_slice(&self.meta));
1378    }
1379
1380    /// Rebuild one tightly-concatenated buffer from live entries only
1381    /// (used by [`Self::compact`]): assign each live entry a fresh tight
1382    /// offset, zero dead entries' offset, and recreate the buffer with
1383    /// slack.
1384    fn compact_concat(
1385        &mut self,
1386        device: &wgpu::Device,
1387        registry: &SpriteModelRegistry,
1388        which: ConcatBuf,
1389    ) {
1390        let mut all: Vec<u32> = Vec::new();
1391        for e in 0..self.meta.len() {
1392            if self.dead[e] {
1393                match which {
1394                    ConcatBuf::Occupancy => self.meta[e].occupancy_offset = 0,
1395                    ConcatBuf::ColorOffsets => self.meta[e].color_offsets_offset = 0,
1396                }
1397                continue;
1398            }
1399            let off = all.len() as u32;
1400            match which {
1401                ConcatBuf::Occupancy => self.meta[e].occupancy_offset = off,
1402                ConcatBuf::ColorOffsets => self.meta[e].color_offsets_offset = off,
1403            }
1404            all.extend_from_slice(concat_data(&registry.entries[e], which));
1405        }
1406        let used = all.len() as u32;
1407        let cap = grow_words(used);
1408        let (label, buf) = match which {
1409            ConcatBuf::Occupancy => ("roxlap-gpu sprite_reg.occupancy", &mut self.occupancy),
1410            ConcatBuf::ColorOffsets => (
1411                "roxlap-gpu sprite_reg.color_offsets",
1412                &mut self.color_offsets,
1413            ),
1414        };
1415        *buf = storage_dst_u32_cap(device, label, &all, cap);
1416        match which {
1417            ConcatBuf::Occupancy => {
1418                self.occ_used = used;
1419                self.occ_cap = cap;
1420            }
1421            ConcatBuf::ColorOffsets => {
1422                self.coloff_used = used;
1423                self.coloff_cap = cap;
1424            }
1425        }
1426    }
1427
1428    /// GPU.10.3 — frustum-cull, pack the visible subset into the
1429    /// instance buffer, then bin those instances into screen tiles:
1430    /// project each visible bounding sphere to a screen AABB and append
1431    /// its (visible) index to every overlapped tile. Uploads the
1432    /// instance buffer + `tile_ranges` (per-tile offset/count) +
1433    /// `tile_instances` (flat grouped indices), growing the tile
1434    /// buffers as needed. Returns `(visible_count, tiles_x, tiles_y)`.
1435    #[allow(clippy::too_many_arguments)]
1436    pub fn cull_bin_upload(
1437        &mut self,
1438        device: &wgpu::Device,
1439        queue: &wgpu::Queue,
1440        f: &ViewFrustum,
1441        screen_w: u32,
1442        screen_h: u32,
1443        tile_size: u32,
1444        lod_px: f32,
1445    ) -> (u32, u32, u32) {
1446        let tiles_x = screen_w.div_ceil(tile_size).max(1);
1447        let tiles_y = screen_h.div_ceil(tile_size).max(1);
1448        let n_tiles = (tiles_x * tiles_y) as usize;
1449
1450        let nw = (1.0 + f.half_w * f.half_w).sqrt();
1451        let nh = (1.0 + f.half_h * f.half_h).sqrt();
1452        let cx = screen_w as f32 * 0.5;
1453        let cy = screen_h as f32 * 0.5;
1454        let px_per_world = cx / f.half_w; // isotropic: == cy/half_h
1455        let ts = tile_size as f32;
1456        let tx_max = tiles_x as i32 - 1;
1457        let ty_max = tiles_y as i32 - 1;
1458
1459        let mut visible: Vec<SpriteInstanceGpu> = Vec::with_capacity(self.cull.len());
1460        // Per-visible tile AABB (tx0, tx1, ty0, ty1) for the bin pass.
1461        let mut boxes: Vec<[i32; 4]> = Vec::with_capacity(self.cull.len());
1462        // Per-visible kv6colmul tables, flattened to two u32 per u64
1463        // entry (lanes 0|1, then 2|3), packed in visible order so the
1464        // shader indexes `colmul[inst_idx*512 + dir*2 + {0,1}]`.
1465        let mut visible_colmul: Vec<u32> = Vec::with_capacity(self.cull.len() * 512);
1466        let mut counts = vec![0u32; n_tiles];
1467
1468        for ci in &self.cull {
1469            // Skip instances of a removed model (tombstoned chain) — they
1470            // linger in `cull` until the caller drops them, but draw as
1471            // nothing.
1472            if self.chains[ci.chain_id as usize].is_empty() {
1473                continue;
1474            }
1475            let rel = [
1476                ci.center[0] - f.pos[0],
1477                ci.center[1] - f.pos[1],
1478                ci.center[2] - f.pos[2],
1479            ];
1480            let z = dot3(rel, f.forward);
1481            let r = ci.radius;
1482            if z + r < 0.0 || z - r > f.far {
1483                continue; // behind / beyond far
1484            }
1485            let x = dot3(rel, f.right);
1486            if (x - f.half_w * z) > r * nw || (-x - f.half_w * z) > r * nw {
1487                continue; // right / left
1488            }
1489            let y = dot3(rel, f.down);
1490            if (y - f.half_h * z) > r * nh || (-y - f.half_h * z) > r * nh {
1491                continue; // bottom / top
1492            }
1493
1494            // Visible: project the sphere to a screen AABB → tile range.
1495            let (tx0, tx1, ty0, ty1) = if z > 1e-3 {
1496                let sx = cx + (x / z) * px_per_world;
1497                let sy = cy + (y / z) * px_per_world;
1498                let sr = (r / z) * px_per_world;
1499                (
1500                    (((sx - sr) / ts).floor() as i32).clamp(0, tx_max),
1501                    (((sx + sr) / ts).floor() as i32).clamp(0, tx_max),
1502                    (((sy - sr) / ts).floor() as i32).clamp(0, ty_max),
1503                    (((sy + sr) / ts).floor() as i32).clamp(0, ty_max),
1504                )
1505            } else {
1506                (0, tx_max, 0, ty_max)
1507            };
1508            // GPU.10.4 — pick the LOD level by projected voxel size:
1509            // choose the coarsest level whose voxel still covers at
1510            // least `lod_px` screen pixels, i.e. step up once a mip-0
1511            // voxel would be smaller than that. `lod_px = 1` is the
1512            // natural "don't go sub-pixel" threshold; larger values
1513            // force LOD in closer (tuning/inspection).
1514            let chain = &self.chains[ci.chain_id as usize];
1515            let level = if z > 1e-3 && chain.len() > 1 {
1516                let voxel_px = px_per_world / z; // mip-0 voxel screen size
1517                ((lod_px / voxel_px).log2().ceil().max(0.0) as usize).min(chain.len() - 1)
1518            } else {
1519                0
1520            };
1521            let mut g = ci.gpu;
1522            g.model_id = chain[level];
1523            visible.push(g);
1524            boxes.push([tx0, tx1, ty0, ty1]);
1525            for &w in ci.colmul.iter() {
1526                visible_colmul.push((w & 0xffff_ffff) as u32);
1527                visible_colmul.push((w >> 32) as u32);
1528            }
1529            for ty in ty0..=ty1 {
1530                for tx in tx0..=tx1 {
1531                    counts[(ty * tiles_x as i32 + tx) as usize] += 1;
1532                }
1533            }
1534        }
1535
1536        if visible.is_empty() {
1537            return (0, tiles_x, tiles_y);
1538        }
1539
1540        // Prefix-sum counts → per-tile offsets; build the flat grouped
1541        // index list.
1542        let mut tile_ranges = vec![0u32; n_tiles * 2];
1543        let mut running = 0u32;
1544        for t in 0..n_tiles {
1545            tile_ranges[2 * t] = running; // offset
1546            tile_ranges[2 * t + 1] = counts[t]; // count
1547            running += counts[t];
1548        }
1549        let total = running as usize;
1550        let mut tile_instances = vec![0u32; total.max(1)];
1551        let mut cursor: Vec<u32> = (0..n_tiles).map(|t| tile_ranges[2 * t]).collect();
1552        for (vis_idx, b) in boxes.iter().enumerate() {
1553            for ty in b[2]..=b[3] {
1554                for tx in b[0]..=b[1] {
1555                    let t = (ty * tiles_x as i32 + tx) as usize;
1556                    tile_instances[cursor[t] as usize] = vis_idx as u32;
1557                    cursor[t] += 1;
1558                }
1559            }
1560        }
1561
1562        // Upload: instances + (grown) tile buffers. Grow a tile buffer
1563        // only when this frame needs more than its capacity (wgpu has
1564        // no Clone on Buffer, so we replace the field in place).
1565        queue.write_buffer(&self.instances, 0, bytemuck::cast_slice(&visible));
1566        let need_ranges = tile_ranges.len() as u32;
1567        if need_ranges > self.tile_ranges_cap {
1568            self.tile_ranges_cap = need_ranges.next_power_of_two();
1569            self.tile_ranges = storage_dst_u32(
1570                device,
1571                "roxlap-gpu sprite_reg.tile_ranges",
1572                self.tile_ranges_cap,
1573            );
1574        }
1575        let need_inst = tile_instances.len() as u32;
1576        if need_inst > self.tile_instances_cap {
1577            self.tile_instances_cap = need_inst.next_power_of_two();
1578            self.tile_instances = storage_dst_u32(
1579                device,
1580                "roxlap-gpu sprite_reg.tile_instances",
1581                self.tile_instances_cap,
1582            );
1583        }
1584        queue.write_buffer(&self.tile_ranges, 0, bytemuck::cast_slice(&tile_ranges));
1585        queue.write_buffer(
1586            &self.tile_instances,
1587            0,
1588            bytemuck::cast_slice(&tile_instances),
1589        );
1590        let need_colmul = visible_colmul.len() as u32;
1591        if need_colmul > self.colmul_cap {
1592            self.colmul_cap = need_colmul.next_power_of_two();
1593            self.colmul = storage_dst_u32(device, "roxlap-gpu sprite_reg.colmul", self.colmul_cap);
1594        }
1595        queue.write_buffer(&self.colmul, 0, bytemuck::cast_slice(&visible_colmul));
1596
1597        (visible.len() as u32, tiles_x, tiles_y)
1598    }
1599}
1600
1601/// GPU.12 incremental — per-entry placement of one model's `colors`
1602/// (and the parallel `dirs`) within the shared registry buffers: a
1603/// `[off, off+cap)` word window holding `len` live words. `cap >= len`
1604/// gives slack so a carve that *grows* the surface-voxel count can be
1605/// rewritten in place without relocating.
1606#[derive(Clone, Copy, Debug, PartialEq, Eq)]
1607struct ColorSlot {
1608    off: u32,
1609    cap: u32,
1610    len: u32,
1611}
1612
1613/// First-fit suballocator over the parallel `colors`/`dirs` buffers
1614/// (same offsets/ranks → one allocator drives both). Each registry
1615/// entry owns a [`ColorSlot`]; growth past a slot's `cap` relocates it
1616/// (freeing the old block) via the free list or a bump tail, and only
1617/// when the tail would exceed `cap_total` does the caller grow + repack
1618/// the whole buffer. Pure (no GPU) so it unit-tests on its own.
1619#[derive(Debug, Default)]
1620struct ColorsAllocator {
1621    /// Per-entry slot, indexed by entry id.
1622    slots: Vec<ColorSlot>,
1623    /// Freed `(off, cap)` blocks available for first-fit reuse.
1624    free: Vec<(u32, u32)>,
1625    /// Next bump-allocation position (words).
1626    tail: u32,
1627    /// Total buffer capacity in words.
1628    cap_total: u32,
1629}
1630
1631/// Slack-padded capacity for a `len`-word array: +25% + 16 words, so a
1632/// few extra surface voxels from a carve fit without relocating.
1633fn slot_cap(len: u32) -> u32 {
1634    len + len / 4 + 16
1635}
1636
1637/// Slack capacity (words) for a grown concatenated buffer: +50% + 256, so
1638/// a burst of `add_model` calls bump-appends rather than re-growing every
1639/// time. Matches [`ColorsAllocator`]'s `cap_total` headroom.
1640fn grow_words(used: u32) -> u32 {
1641    used + used / 2 + 256
1642}
1643
1644/// Slack capacity (records) for a grown `model_meta` buffer: +50% + 8.
1645fn grow_records(count: u32) -> u32 {
1646    count + count / 2 + 8
1647}
1648
1649impl ColorsAllocator {
1650    /// Lay every entry out contiguously (with per-slot slack) and add a
1651    /// global tail headroom so early growth bump-allocates rather than
1652    /// repacks.
1653    fn new(entry_lens: &[u32]) -> Self {
1654        let mut a = Self::default();
1655        a.repack(entry_lens);
1656        a
1657    }
1658
1659    fn slot(&self, entry: usize) -> ColorSlot {
1660        self.slots[entry]
1661    }
1662
1663    fn cap_total(&self) -> u32 {
1664        self.cap_total
1665    }
1666
1667    /// Repack ALL entries compactly to fit `new_lens`, resetting the
1668    /// free list + tail and choosing a fresh `cap_total` with headroom.
1669    /// Used at initial build and on a buffer grow.
1670    fn repack(&mut self, new_lens: &[u32]) {
1671        self.free.clear();
1672        let mut off = 0u32;
1673        let mut slots = Vec::with_capacity(new_lens.len());
1674        for &len in new_lens {
1675            // A 0-length (dead / removed) entry takes no space — keeps a
1676            // tombstone slot so entry ids stay positional.
1677            let cap = if len == 0 { 0 } else { slot_cap(len) };
1678            slots.push(ColorSlot { off, cap, len });
1679            off += cap;
1680        }
1681        self.slots = slots;
1682        self.tail = off;
1683        // Global headroom: +50% + 256 words.
1684        self.cap_total = off + off / 2 + 256;
1685    }
1686
1687    /// Place `new_len` words for `entry`. Returns `Some(off)` with the
1688    /// (possibly relocated) slot offset, or `None` if the buffer must
1689    /// grow + repack. On relocation the old block is pushed to the free
1690    /// list; an in-place fit returns the unchanged offset.
1691    fn place(&mut self, entry: usize, new_len: u32) -> Option<u32> {
1692        let cur = self.slots[entry];
1693        if new_len <= cur.cap {
1694            self.slots[entry] = ColorSlot {
1695                len: new_len,
1696                ..cur
1697            };
1698            return Some(cur.off);
1699        }
1700        let old = (cur.off, cur.cap);
1701        // First-fit a freed block big enough for the live data.
1702        if let Some(i) = self.free.iter().position(|&(_, c)| c >= new_len) {
1703            let (off, cap) = self.free.remove(i);
1704            self.free.push(old);
1705            self.slots[entry] = ColorSlot {
1706                off,
1707                cap,
1708                len: new_len,
1709            };
1710            return Some(off);
1711        }
1712        // Bump the tail if there's room.
1713        let want = slot_cap(new_len);
1714        if self.tail + want <= self.cap_total {
1715            let off = self.tail;
1716            self.tail += want;
1717            self.free.push(old);
1718            self.slots[entry] = ColorSlot {
1719                off,
1720                cap: want,
1721                len: new_len,
1722            };
1723            return Some(off);
1724        }
1725        None
1726    }
1727
1728    /// Append a slot for a brand-new entry of `new_len` words (used by
1729    /// [`SpriteRegistryResident::add_model`]). Returns `Some(off)` placed
1730    /// via the free list or the bump tail, or `None` if the buffer must
1731    /// grow + repack — in which case **no** slot is pushed (the caller's
1732    /// repack rebuilds every slot from scratch).
1733    fn push(&mut self, new_len: u32) -> Option<u32> {
1734        if let Some(i) = self.free.iter().position(|&(_, c)| c >= new_len) {
1735            let (off, cap) = self.free.remove(i);
1736            self.slots.push(ColorSlot {
1737                off,
1738                cap,
1739                len: new_len,
1740            });
1741            return Some(off);
1742        }
1743        let want = slot_cap(new_len);
1744        if self.tail + want <= self.cap_total {
1745            let off = self.tail;
1746            self.tail += want;
1747            self.slots.push(ColorSlot {
1748                off,
1749                cap: want,
1750                len: new_len,
1751            });
1752            return Some(off);
1753        }
1754        None
1755    }
1756
1757    /// Free `entry`'s slot back to the pool ([`SpriteRegistryResident::
1758    /// remove_model`]). Its `(off, cap)` block joins the free list for
1759    /// first-fit reuse by a later [`Self::push`]; the slot is zeroed so a
1760    /// repack treats it as a 0-length tombstone.
1761    fn free(&mut self, entry: usize) {
1762        let s = self.slots[entry];
1763        if s.cap > 0 {
1764            self.free.push((s.off, s.cap));
1765        }
1766        self.slots[entry] = ColorSlot {
1767            off: 0,
1768            cap: 0,
1769            len: 0,
1770        };
1771    }
1772}
1773
1774/// Create a STORAGE buffer of u32s; pads empty input (wgpu rejects
1775/// zero-sized storage bindings).
1776#[allow(dead_code)]
1777fn storage_u32(device: &wgpu::Device, label: &str, data: &[u32]) -> wgpu::Buffer {
1778    use wgpu::util::DeviceExt;
1779    let bytes: &[u8] = if data.is_empty() {
1780        bytemuck::cast_slice(&[0u32])
1781    } else {
1782        bytemuck::cast_slice(data)
1783    };
1784    device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
1785        label: Some(label),
1786        contents: bytes,
1787        usage: wgpu::BufferUsages::STORAGE,
1788    })
1789}
1790
1791/// Create an uninitialised `STORAGE | COPY_DST` `u32` buffer of `cap`
1792/// words (≥1). Written each frame via `queue.write_buffer`.
1793fn storage_dst_u32(device: &wgpu::Device, label: &str, cap: u32) -> wgpu::Buffer {
1794    device.create_buffer(&wgpu::BufferDescriptor {
1795        label: Some(label),
1796        size: u64::from(cap.max(1)) * 4,
1797        usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
1798        mapped_at_creation: false,
1799    })
1800}
1801
1802/// Create a `STORAGE | COPY_DST` `u32` buffer of `cap` words (≥ data
1803/// length, ≥ 1), initialised with `data` at offset 0 and the tail left
1804/// zeroed. Unlike [`storage_u32`] (STORAGE-only, exact-size) this both
1805/// reserves spare capacity and is `COPY_DST`, so the incremental
1806/// [`SpriteRegistryResident::update_model`] can `write_buffer` a growing
1807/// `colors`/`dirs` array in place. Filled via `mapped_at_creation` so no
1808/// queue is needed at upload time.
1809fn storage_dst_u32_cap(device: &wgpu::Device, label: &str, data: &[u32], cap: u32) -> wgpu::Buffer {
1810    let cap = cap.max(data.len() as u32).max(1);
1811    let buf = device.create_buffer(&wgpu::BufferDescriptor {
1812        label: Some(label),
1813        size: u64::from(cap) * 4,
1814        usage: wgpu::BufferUsages::STORAGE
1815            | wgpu::BufferUsages::COPY_DST
1816            | wgpu::BufferUsages::COPY_SRC,
1817        mapped_at_creation: true,
1818    });
1819    if !data.is_empty() {
1820        buf.slice(..(data.len() as u64 * 4))
1821            .get_mapped_range_mut()
1822            .copy_from_slice(bytemuck::cast_slice(data));
1823    }
1824    buf.unmap();
1825    buf
1826}
1827
1828/// Create a `STORAGE | COPY_DST` buffer of Pod records, exact-size
1829/// (≥ 1, zero-padded), so individual records can be rewritten in place
1830/// by [`SpriteRegistryResident::update_model`] on a relocation. The
1831/// record *count* never changes on an incremental edit (no model is
1832/// added/removed), so no slack is needed here.
1833fn storage_dst_pod<T: Pod + Zeroable>(
1834    device: &wgpu::Device,
1835    label: &str,
1836    data: &[T],
1837) -> wgpu::Buffer {
1838    let one = [T::zeroed()];
1839    let src: &[T] = if data.is_empty() { &one } else { data };
1840    let buf = device.create_buffer(&wgpu::BufferDescriptor {
1841        label: Some(label),
1842        size: std::mem::size_of_val(src) as u64,
1843        usage: wgpu::BufferUsages::STORAGE
1844            | wgpu::BufferUsages::COPY_DST
1845            | wgpu::BufferUsages::COPY_SRC,
1846        mapped_at_creation: true,
1847    });
1848    buf.slice(..)
1849        .get_mapped_range_mut()
1850        .copy_from_slice(bytemuck::cast_slice(src));
1851    buf.unmap();
1852    buf
1853}
1854
1855/// Create a `STORAGE | COPY_DST` Pod buffer holding `cap` records
1856/// (≥ `data.len()`, ≥ 1), initialised with `data` at record 0 and the
1857/// tail zeroed. The slack lets [`SpriteRegistryResident::add_model`] grow
1858/// the `model_meta` table without re-growing on every add.
1859fn storage_dst_pod_cap<T: Pod + Zeroable>(
1860    device: &wgpu::Device,
1861    label: &str,
1862    data: &[T],
1863    cap: u32,
1864) -> wgpu::Buffer {
1865    let rec = std::mem::size_of::<T>() as u64;
1866    let cap = u64::from(cap.max(data.len() as u32).max(1));
1867    let buf = device.create_buffer(&wgpu::BufferDescriptor {
1868        label: Some(label),
1869        size: cap * rec,
1870        usage: wgpu::BufferUsages::STORAGE
1871            | wgpu::BufferUsages::COPY_DST
1872            | wgpu::BufferUsages::COPY_SRC,
1873        mapped_at_creation: true,
1874    });
1875    if !data.is_empty() {
1876        buf.slice(..(data.len() as u64 * rec))
1877            .get_mapped_range_mut()
1878            .copy_from_slice(bytemuck::cast_slice(data));
1879    }
1880    buf.unmap();
1881    buf
1882}
1883
1884/// Create a STORAGE buffer of Pod records; pads empty input with one
1885/// zeroed `T`.
1886#[allow(dead_code)]
1887fn storage_pod<T: Pod + Zeroable>(device: &wgpu::Device, label: &str, data: &[T]) -> wgpu::Buffer {
1888    use wgpu::util::DeviceExt;
1889    let one = [T::zeroed()];
1890    let src: &[T] = if data.is_empty() { &one } else { data };
1891    device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
1892        label: Some(label),
1893        contents: bytemuck::cast_slice(src),
1894        usage: wgpu::BufferUsages::STORAGE,
1895    })
1896}
1897
1898#[cfg(test)]
1899mod tests {
1900    use super::*;
1901    use roxlap_formats::kv6::{Kv6, Voxel};
1902
1903    /// 2×1 kv6: column (0,0) has voxels at z=5 (red) and z=1 (green)
1904    /// stored OUT of z-order; column (1,0) has one voxel at z=3.
1905    fn kv6_unsorted() -> Kv6 {
1906        let mk = |z, col| Voxel {
1907            col,
1908            z,
1909            vis: 0,
1910            dir: 0,
1911        };
1912        Kv6 {
1913            xsiz: 2,
1914            ysiz: 1,
1915            zsiz: 8,
1916            xpiv: 0.0,
1917            ypiv: 0.0,
1918            zpiv: 0.0,
1919            voxels: vec![mk(5, 0xAA), mk(1, 0xBB), mk(3, 0xCC)],
1920            xlen: vec![2, 1],
1921            ylen: vec![vec![2], vec![1]],
1922            palette: None,
1923        }
1924    }
1925
1926    #[test]
1927    fn occupancy_bits_set_at_voxel_z() {
1928        let m = build_sprite_model(&kv6_unsorted());
1929        assert_eq!(m.dims, [2, 1, 8]);
1930        assert_eq!(m.occ_words_per_col, 1); // ceil(8/32)
1931                                            // col 0: bits 1 and 5; col 1: bit 3.
1932        assert_eq!(m.occupancy[0], (1 << 1) | (1 << 5));
1933        assert_eq!(m.occupancy[1], 1 << 3);
1934    }
1935
1936    #[test]
1937    fn colors_are_ascending_z_for_rank_lookup() {
1938        let m = build_sprite_model(&kv6_unsorted());
1939        // col 0 sorted ascending z ⇒ z=1 (green 0xBB) before z=5 (0xAA).
1940        assert_eq!(m.color_offsets, vec![0, 2, 3]);
1941        assert_eq!(&m.colors, &[0xBB, 0xAA, 0xCC]);
1942    }
1943
1944    #[test]
1945    fn identity_basis_inverts_to_identity() {
1946        let inv = mat3_inverse([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]);
1947        assert_eq!(inv, [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]);
1948    }
1949
1950    #[test]
1951    fn fork_is_independent_of_parent() {
1952        let mut reg = SpriteModelRegistry::new();
1953        let base = reg.add(build_sprite_model(&kv6_unsorted()));
1954        let forked = reg.fork(base);
1955        assert_ne!(base, forked);
1956        // Recolour only the fork.
1957        reg.model_mut(forked).recolor(|_| 0x11);
1958        // Parent colours untouched; fork fully overwritten.
1959        assert_eq!(&reg.model(base).colors, &[0xBB, 0xAA, 0xCC]);
1960        assert_eq!(&reg.model(forked).colors, &[0x11, 0x11, 0x11]);
1961    }
1962
1963    #[test]
1964    fn remove_frees_chain_data_keeps_ids_stable() {
1965        let mut reg = SpriteModelRegistry::new();
1966        let a = reg.add_lod(build_sprite_model(&kv6_unsorted()), 4);
1967        let b = reg.add_lod(build_sprite_model(&kv6_unsorted()), 4);
1968        let len_before = reg.len();
1969        assert!(reg.is_live(a) && reg.is_live(b));
1970
1971        reg.remove(a);
1972        // Chain `a` is tombstoned (its entries are freed to empty models;
1973        // they're unreachable via `model()` now — that's the tombstone).
1974        assert!(!reg.is_live(a));
1975        // `b` is untouched and still live; `len()` (next id) is unchanged.
1976        assert!(reg.is_live(b));
1977        assert_eq!(&reg.model(b).colors, &[0xBB, 0xAA, 0xCC]);
1978        assert_eq!(reg.len(), len_before);
1979
1980        // A later add mints a fresh id past the tombstone (no slot reuse).
1981        let c = reg.add_lod(build_sprite_model(&kv6_unsorted()), 4);
1982        assert_eq!(c, len_before as u32);
1983        assert!(reg.is_live(c));
1984        // `b`'s id stayed valid across the remove + add round-trip.
1985        assert_eq!(&reg.model(b).colors, &[0xBB, 0xAA, 0xCC]);
1986    }
1987
1988    #[test]
1989    fn remove_is_idempotent_and_bounds_safe() {
1990        let mut reg = SpriteModelRegistry::new();
1991        let a = reg.add(build_sprite_model(&kv6_unsorted()));
1992        reg.remove(a);
1993        reg.remove(a); // already removed → no-op, no panic
1994        reg.remove(999); // out of range → no-op
1995        assert!(!reg.is_live(a));
1996        assert!(!reg.is_live(999));
1997    }
1998
1999    #[test]
2000    fn registry_gpu_structs_have_expected_sizes() {
2001        assert_eq!(std::mem::size_of::<SpriteModelMeta>(), 48);
2002        assert_eq!(std::mem::size_of::<SpriteInstanceGpu>(), 64);
2003    }
2004
2005    #[test]
2006    fn add_lod_builds_halving_mip_chain() {
2007        let mut reg = SpriteModelRegistry::new();
2008        // 8×8×8 single voxel-filled column model would be ideal, but
2009        // kv6_unsorted is 2×1×8 → mips: 2×1×8 → 1×1×4 → 1×1×2 → 1×1×1.
2010        let id = reg.add_lod(build_sprite_model(&kv6_unsorted()), 4);
2011        let m0 = reg.model(id);
2012        assert_eq!(m0.dims, [2, 1, 8]);
2013        assert!((m0.voxel_world_size - 1.0).abs() < 1e-6);
2014    }
2015
2016    /// kv6 from explicit voxels, ordered x-major/y-inner to match
2017    /// `build_sprite_model`'s column walk.
2018    fn kv6_from(xsiz: u32, ysiz: u32, zsiz: u32, voxels: &[(u32, u32, u16, u32)]) -> Kv6 {
2019        let mut ylen = vec![vec![0u16; ysiz as usize]; xsiz as usize];
2020        let mut flat = Vec::new();
2021        for x in 0..xsiz {
2022            for y in 0..ysiz {
2023                let mut col: Vec<(u16, u32)> = voxels
2024                    .iter()
2025                    .filter(|(vx, vy, _, _)| *vx == x && *vy == y)
2026                    .map(|(_, _, z, c)| (*z, *c))
2027                    .collect();
2028                col.sort_by_key(|(z, _)| *z);
2029                ylen[x as usize][y as usize] = col.len() as u16;
2030                for (z, c) in col {
2031                    flat.push(Voxel {
2032                        col: c,
2033                        z,
2034                        vis: 0,
2035                        dir: 0,
2036                    });
2037                }
2038            }
2039        }
2040        let xlen = ylen
2041            .iter()
2042            .map(|c| c.iter().map(|&v| u32::from(v)).sum())
2043            .collect();
2044        Kv6 {
2045            xsiz,
2046            ysiz,
2047            zsiz,
2048            xpiv: 0.0,
2049            ypiv: 0.0,
2050            zpiv: 0.0,
2051            voxels: flat,
2052            xlen,
2053            ylen,
2054            palette: None,
2055        }
2056    }
2057
2058    fn offsets_consistent(m: &SpriteModel) -> bool {
2059        let cols = (m.dims[0] * m.dims[1]) as usize;
2060        if m.color_offsets.len() != cols + 1 {
2061            return false;
2062        }
2063        // Monotonic non-decreasing + last == colors.len + each column's
2064        // span == its solid-voxel count.
2065        for w in m.color_offsets.windows(2) {
2066            if w[1] < w[0] {
2067                return false;
2068            }
2069        }
2070        m.color_offsets[cols] as usize == m.colors.len()
2071    }
2072
2073    #[test]
2074    fn carve_two_layers_keeps_offsets_consistent() {
2075        // Mirror the demo's carve: columns with voxels at varied z,
2076        // some sharing z=0/z=1, some not.
2077        let kv6 = kv6_from(
2078            3,
2079            2,
2080            8,
2081            &[
2082                (0, 0, 0, 0xA0),
2083                (0, 0, 1, 0xA1),
2084                (0, 0, 5, 0xA5),
2085                (1, 0, 1, 0xB1),
2086                (2, 1, 0, 0xC0),
2087                (2, 1, 3, 0xC3),
2088            ],
2089        );
2090        let mut m = build_sprite_model(&kv6);
2091        assert!(offsets_consistent(&m));
2092        for z in 0..2u32 {
2093            for y in 0..m.dims[1] {
2094                for x in 0..m.dims[0] {
2095                    m.set_voxel(x, y, z, None);
2096                }
2097            }
2098            assert!(offsets_consistent(&m), "inconsistent after carving z={z}");
2099            // downsample must not panic on the carved model.
2100            let _ = m.downsample();
2101        }
2102    }
2103
2104    #[test]
2105    fn set_voxel_inserts_replaces_and_clears() {
2106        // col 0 starts with z=1 (0xBB), z=5 (0xAA); col 1 with z=3 (0xCC).
2107        let mut m = build_sprite_model(&kv6_unsorted());
2108
2109        // Insert z=3 into col 0 (between z=1 and z=5) → rank 1.
2110        assert!(m.set_voxel(0, 0, 3, Some(0x55)));
2111        assert_eq!(m.occupancy[0], (1 << 1) | (1 << 3) | (1 << 5));
2112        // col 0 colours ascending z: 0xBB(z1), 0x55(z3), 0xAA(z5).
2113        assert_eq!(m.color_offsets, vec![0, 3, 4]);
2114        assert_eq!(&m.colors, &[0xBB, 0x55, 0xAA, 0xCC]);
2115
2116        // Replace z=3 in place (no offset shift).
2117        assert!(m.set_voxel(0, 0, 3, Some(0x66)));
2118        assert_eq!(&m.colors, &[0xBB, 0x66, 0xAA, 0xCC]);
2119        assert_eq!(m.color_offsets, vec![0, 3, 4]);
2120
2121        // Clear z=1 (rank 0) from col 0.
2122        assert!(m.set_voxel(0, 0, 1, None));
2123        assert_eq!(m.occupancy[0], (1 << 3) | (1 << 5));
2124        assert_eq!(m.color_offsets, vec![0, 2, 3]);
2125        assert_eq!(&m.colors, &[0x66, 0xAA, 0xCC]);
2126
2127        // No-ops: clear an empty voxel, edit out of bounds.
2128        assert!(!m.set_voxel(0, 0, 2, None));
2129        assert!(!m.set_voxel(9, 0, 0, Some(1)));
2130    }
2131
2132    #[test]
2133    fn rebuild_lod_refreshes_coarse_levels_from_mip0() {
2134        let mut reg = SpriteModelRegistry::new();
2135        let id = reg.add_lod(build_sprite_model(&kv6_unsorted()), 3);
2136        // Recolour mip-0 only via model_mut, then rebuild the ladder.
2137        reg.model_mut(id).recolor(|_| 0x0000_2000);
2138        reg.rebuild_lod(id);
2139        // The mip-1 average of all-0x2000 voxels is still 0x2000.
2140        let lvl1_entry = reg.chains[id as usize][1] as usize;
2141        assert!(reg.entries[lvl1_entry]
2142            .colors
2143            .iter()
2144            .all(|&c| c == 0x0000_2000));
2145    }
2146
2147    // ---- GPU.12 incremental: colors/dirs suballocator -----------------
2148
2149    /// Every slot fits its data, has slack, doesn't overlap the next, and
2150    /// the buffer reserves tail headroom past the last slot.
2151    fn alloc_invariants(a: &ColorsAllocator, lens: &[u32]) {
2152        let mut prev_end = 0u32;
2153        for (e, &len) in lens.iter().enumerate() {
2154            let s = a.slot(e);
2155            assert_eq!(s.len, len, "slot {e} len");
2156            assert!(s.cap >= s.len, "slot {e} cap >= len");
2157            // In a freshly repacked layout slots are in entry order.
2158            assert!(s.off >= prev_end, "slot {e} overlaps previous");
2159            assert!(s.off + s.cap <= a.cap_total(), "slot {e} past cap_total");
2160            prev_end = s.off + s.cap;
2161        }
2162        assert!(a.cap_total() >= prev_end, "tail headroom");
2163    }
2164
2165    #[test]
2166    fn allocator_new_lays_out_with_slack_and_headroom() {
2167        let lens = [10u32, 0, 64, 7];
2168        let a = ColorsAllocator::new(&lens);
2169        alloc_invariants(&a, &lens);
2170        // Slack: a 64-word slot has cap > 64 so a small carve-grow fits.
2171        assert!(a.slot(2).cap > 64);
2172        // Headroom past the bump tail for early growth.
2173        assert!(a.cap_total() > a.slot(3).off + a.slot(3).cap);
2174    }
2175
2176    #[test]
2177    fn allocator_place_in_place_when_within_cap() {
2178        let mut a = ColorsAllocator::new(&[10, 20]);
2179        let off0 = a.slot(0).off;
2180        let cap0 = a.slot(0).cap;
2181        // Shrink: still the same slot.
2182        assert_eq!(a.place(0, 5), Some(off0));
2183        assert_eq!(a.slot(0).len, 5);
2184        assert_eq!(a.slot(0).cap, cap0);
2185        // Grow within slack: same offset, no relocation.
2186        assert_eq!(a.place(0, cap0), Some(off0));
2187        assert_eq!(a.slot(0).off, off0);
2188        assert!(a.free.is_empty(), "no relocation should free anything");
2189    }
2190
2191    #[test]
2192    fn allocator_place_relocates_to_tail_and_frees_old() {
2193        let mut a = ColorsAllocator::new(&[10, 20]);
2194        let old0 = (a.slot(0).off, a.slot(0).cap);
2195        let tail_before = a.tail;
2196        // Overgrow entry 0 past its cap → relocate to the bump tail.
2197        let new_len = a.slot(0).cap + 5;
2198        let off = a.place(0, new_len).expect("fits in headroom");
2199        assert_eq!(off, tail_before, "relocated to old tail");
2200        assert_eq!(a.slot(0).off, off);
2201        assert_eq!(a.slot(0).len, new_len);
2202        assert!(a.free.contains(&old0), "old slot freed");
2203    }
2204
2205    #[test]
2206    fn allocator_reuses_freed_block_first_fit() {
2207        // Entry 0 has a large slot; entry 1 a tiny one, so growing 1 must
2208        // relocate (it can't fit in place) and lands in 0's freed block.
2209        let mut a = ColorsAllocator::new(&[10, 2]);
2210        let old0 = (a.slot(0).off, a.slot(0).cap);
2211        // Relocate entry 0 to the tail, freeing its original block.
2212        let _ = a.place(0, a.slot(0).cap + 5).unwrap();
2213        assert!(a.free.contains(&old0));
2214        // Grow entry 1 past its (tiny) cap but ≤ the freed block's cap →
2215        // first-fit reuses that block rather than bumping the tail.
2216        let new1 = a.slot(1).cap + 1;
2217        assert!(new1 <= old0.1, "freed block big enough");
2218        let off = a.place(1, new1).expect("reuses freed block");
2219        assert_eq!(off, old0.0, "first-fit reused the freed slot offset");
2220        assert!(!a.free.contains(&old0), "freed block consumed");
2221    }
2222
2223    #[test]
2224    fn allocator_signals_grow_then_repack_restores() {
2225        let mut a = ColorsAllocator::new(&[8, 8]);
2226        // Force overflow: ask for far more than cap_total.
2227        let huge = a.cap_total() + 100;
2228        assert_eq!(a.place(0, huge), None, "overflow must signal grow");
2229        // Repack with the new lengths compacts + grows the buffer.
2230        a.repack(&[huge, 8]);
2231        alloc_invariants(&a, &[huge, 8]);
2232        assert!(a.cap_total() > huge);
2233        // After repack the entry now fits in place.
2234        assert_eq!(a.place(0, huge), Some(a.slot(0).off));
2235    }
2236
2237    /// Drive the allocator like a real carve loop (mirroring
2238    /// `update_model`): one model's colour count drifts up and down
2239    /// across many edits while two neighbours stay put. Growth is
2240    /// absorbed in place / via the free list / by the bump tail, and on
2241    /// the rare overflow we repack (as `update_model` does). After every
2242    /// edit the live `[off, off+len)` windows must stay disjoint.
2243    #[test]
2244    fn allocator_carve_loop_keeps_live_windows_disjoint() {
2245        let mut a = ColorsAllocator::new(&[40, 12, 40]);
2246        let mut lens = [40u32, 12, 40];
2247        // A deterministic up/down walk of entry 1's length, incl. a jump
2248        // that forces at least one grow+repack.
2249        let walk = [13u32, 30, 60, 18, 9, 80, 80, 25, 200, 7];
2250        let mut grew = false;
2251        for &len in &walk {
2252            lens[1] = len;
2253            // Entry 1 re-placed; on overflow, repack the whole set.
2254            if a.place(1, len).is_none() {
2255                grew = true;
2256                a.repack(&lens);
2257            } else {
2258                // Neighbours fit in place every time.
2259                assert_eq!(a.place(0, 40), Some(a.slot(0).off));
2260                assert_eq!(a.place(2, 40), Some(a.slot(2).off));
2261            }
2262            assert_eq!(a.slot(1).len, len);
2263
2264            // No two entries' live windows overlap.
2265            let mut wins: Vec<(u32, u32)> =
2266                (0..3).map(|e| (a.slot(e).off, a.slot(e).len)).collect();
2267            wins.sort_by_key(|w| w.0);
2268            for pair in wins.windows(2) {
2269                let (o0, l0) = pair[0];
2270                let (o1, _) = pair[1];
2271                assert!(o0 + l0 <= o1, "live windows overlap: {pair:?}");
2272            }
2273        }
2274        assert!(grew, "the 200-word jump should have forced a repack");
2275    }
2276
2277    // --- incremental instance path (device-backed; skips w/o adapter) ---
2278
2279    fn headless() -> Option<crate::HeadlessGpu> {
2280        match crate::HeadlessGpu::new_blocking(crate::GpuRendererSettings::default()) {
2281            Ok(h) => Some(h),
2282            Err(e) => {
2283                eprintln!("[skip] no GPU adapter reachable: {e}");
2284                None
2285            }
2286        }
2287    }
2288
2289    fn one_model_registry() -> (SpriteModelRegistry, u32) {
2290        let mut reg = SpriteModelRegistry::new();
2291        let id = reg.add(build_sprite_model(&kv6_unsorted()));
2292        (reg, id)
2293    }
2294
2295    fn inst(model_id: u32, pos: [f32; 3]) -> SpriteInstance {
2296        use roxlap_formats::sprite::Sprite;
2297        SpriteInstance {
2298            model_id,
2299            transform: SpriteInstanceTransform::from_sprite(&Sprite::axis_aligned(
2300                kv6_unsorted(),
2301                pos,
2302            )),
2303        }
2304    }
2305
2306    #[test]
2307    fn append_grows_count_and_capacity_pow2() {
2308        let Some(h) = headless() else { return };
2309        let (reg, m) = one_model_registry();
2310        let mut res = SpriteRegistryResident::upload(&h.device, &reg, &[inst(m, [0.0; 3])]);
2311        assert_eq!(res.instance_count(), 1);
2312        assert_eq!(res.instance_capacity, 1);
2313
2314        // Append 4 → count 5, capacity grows to next_pow2(5) = 8.
2315        let more: Vec<_> = (1..=4).map(|i| inst(m, [i as f32, 0.0, 0.0])).collect();
2316        let base = res.append_instances(&h.device, &reg, &more);
2317        assert_eq!(base, 1, "first appended index follows the seed instance");
2318        assert_eq!(res.instance_count(), 5);
2319        assert_eq!(res.instance_capacity, 8, "power-of-two growth");
2320
2321        // A second append that still fits keeps the same capacity (no realloc).
2322        let base2 = res.append_instances(&h.device, &reg, &[inst(m, [9.0, 0.0, 0.0])]);
2323        assert_eq!(base2, 5);
2324        assert_eq!(res.instance_count(), 6);
2325        assert_eq!(res.instance_capacity, 8, "fits existing capacity, no grow");
2326    }
2327
2328    #[test]
2329    fn append_empty_is_noop() {
2330        let Some(h) = headless() else { return };
2331        let (reg, m) = one_model_registry();
2332        let mut res = SpriteRegistryResident::upload(&h.device, &reg, &[inst(m, [0.0; 3])]);
2333        let base = res.append_instances(&h.device, &reg, &[]);
2334        assert_eq!(base, 1);
2335        assert_eq!(res.instance_count(), 1);
2336        assert_eq!(res.instance_capacity, 1);
2337    }
2338
2339    /// Read `words` u32s back from a GPU buffer (needs COPY_SRC).
2340    fn read_u32(h: &crate::HeadlessGpu, buf: &wgpu::Buffer, words: u64) -> Vec<u32> {
2341        let bytes = words * 4;
2342        let staging = h.device.create_buffer(&wgpu::BufferDescriptor {
2343            label: Some("readback"),
2344            size: bytes,
2345            usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
2346            mapped_at_creation: false,
2347        });
2348        let mut enc = h
2349            .device
2350            .create_command_encoder(&wgpu::CommandEncoderDescriptor::default());
2351        enc.copy_buffer_to_buffer(buf, 0, &staging, 0, bytes);
2352        h.queue.submit(std::iter::once(enc.finish()));
2353        let slice = staging.slice(..);
2354        let (tx, rx) = std::sync::mpsc::channel();
2355        slice.map_async(wgpu::MapMode::Read, move |r| tx.send(r).unwrap());
2356        h.device.poll(wgpu::PollType::wait_indefinitely()).ok();
2357        rx.recv().unwrap().unwrap();
2358        let data = slice.get_mapped_range();
2359        let out = bytemuck::cast_slice::<u8, u32>(&data).to_vec();
2360        drop(data);
2361        staging.unmap();
2362        out
2363    }
2364
2365    /// A second distinct model so add_model has real new geometry to lay
2366    /// down (different dims + colours from `kv6_unsorted`).
2367    fn kv6_other() -> Kv6 {
2368        let mk = |z, col| Voxel {
2369            col,
2370            z,
2371            vis: 0,
2372            dir: 0,
2373        };
2374        Kv6 {
2375            xsiz: 1,
2376            ysiz: 1,
2377            zsiz: 4,
2378            xpiv: 0.0,
2379            ypiv: 0.0,
2380            zpiv: 0.0,
2381            voxels: vec![mk(0, 0x11), mk(2, 0x22)],
2382            xlen: vec![2],
2383            ylen: vec![vec![2]],
2384            palette: None,
2385        }
2386    }
2387
2388    /// add_model lays the new model's volume on the GPU at the offsets its
2389    /// meta record claims — verified by reading the shared buffers back
2390    /// and matching each entry against its source SpriteModel.
2391    #[test]
2392    fn add_model_uploads_new_volume_incrementally() {
2393        let Some(h) = headless() else { return };
2394
2395        // Residency starts with model A only.
2396        let mut reg = SpriteModelRegistry::new();
2397        let a = reg.add(build_sprite_model(&kv6_unsorted()));
2398        let mut res = SpriteRegistryResident::upload(&h.device, &reg, &[inst(a, [0.0; 3])]);
2399        assert_eq!(res.chains.len(), 1);
2400        let entries_before = res.meta.len();
2401
2402        // Append model B (single-level) to the registry, then sync it.
2403        let b = reg.add(build_sprite_model(&kv6_other()));
2404        res.add_model(&h.device, &h.queue, &reg, b);
2405        assert_eq!(res.chains.len(), 2);
2406        assert_eq!(res.meta.len(), entries_before + 1, "one new entry");
2407
2408        // Read the shared buffers back and check EVERY entry's data sits
2409        // where its meta record points — both the pre-existing A and the
2410        // newly streamed B.
2411        let occ = read_u32(&h, &res.occupancy, u64::from(res.occ_cap));
2412        let coloff = read_u32(&h, &res.color_offsets, u64::from(res.coloff_cap));
2413        let cols = read_u32(&h, &res.colors, u64::from(res.colors_alloc.cap_total()));
2414        for (e, m) in reg.entries.iter().enumerate() {
2415            let meta = res.meta[e];
2416            let oo = meta.occupancy_offset as usize;
2417            assert_eq!(
2418                &occ[oo..oo + m.occupancy.len()],
2419                &m.occupancy[..],
2420                "occ entry {e}"
2421            );
2422            let co = meta.color_offsets_offset as usize;
2423            assert_eq!(
2424                &coloff[co..co + m.color_offsets.len()],
2425                &m.color_offsets[..],
2426                "color_offsets entry {e}"
2427            );
2428            let cc = meta.colors_offset as usize;
2429            assert_eq!(
2430                &cols[cc..cc + m.colors.len()],
2431                &m.colors[..],
2432                "colors entry {e}"
2433            );
2434        }
2435
2436        // And an instance of the freshly-added model can now be appended.
2437        let base = res.append_instances(&h.device, &reg, &[inst(b, [5.0, 0.0, 0.0])]);
2438        assert_eq!(base, 1);
2439        assert_eq!(res.instance_count(), 2);
2440    }
2441
2442    /// Adding many small models forces the volume buffers to grow + rebuild
2443    /// at least once; every entry must still read back correctly across the
2444    /// grow boundary.
2445    #[test]
2446    fn add_model_survives_buffer_growth() {
2447        let Some(h) = headless() else { return };
2448        let mut reg = SpriteModelRegistry::new();
2449        let a = reg.add(build_sprite_model(&kv6_unsorted()));
2450        let mut res = SpriteRegistryResident::upload(&h.device, &reg, &[inst(a, [0.0; 3])]);
2451        let occ_cap0 = res.occ_cap;
2452
2453        // 40 adds — occupancy starts exact-sized (cap == used), so the very
2454        // first add overflows and grows; later ones ride the slack.
2455        for _ in 0..40 {
2456            let id = reg.add(build_sprite_model(&kv6_other()));
2457            res.add_model(&h.device, &h.queue, &reg, id);
2458        }
2459        assert_eq!(res.chains.len(), 41);
2460        assert!(res.occ_cap > occ_cap0, "occupancy buffer grew");
2461
2462        let occ = read_u32(&h, &res.occupancy, u64::from(res.occ_cap));
2463        let cols = read_u32(&h, &res.colors, u64::from(res.colors_alloc.cap_total()));
2464        for (e, m) in reg.entries.iter().enumerate() {
2465            let meta = res.meta[e];
2466            let oo = meta.occupancy_offset as usize;
2467            assert_eq!(
2468                &occ[oo..oo + m.occupancy.len()],
2469                &m.occupancy[..],
2470                "occ entry {e}"
2471            );
2472            let cc = meta.colors_offset as usize;
2473            assert_eq!(
2474                &cols[cc..cc + m.colors.len()],
2475                &m.colors[..],
2476                "colors entry {e}"
2477            );
2478        }
2479    }
2480
2481    fn test_frustum() -> ViewFrustum {
2482        ViewFrustum {
2483            pos: [0.0, 0.0, 0.0],
2484            right: [1.0, 0.0, 0.0],
2485            down: [0.0, 1.0, 0.0],
2486            forward: [0.0, 0.0, 1.0],
2487            half_w: 1.0,
2488            half_h: 1.0,
2489            far: 10_000.0,
2490        }
2491    }
2492
2493    #[test]
2494    fn remove_model_tombstones_frees_and_reuses() {
2495        let Some(h) = headless() else { return };
2496        // Residency with models A and B, one instance each.
2497        let mut reg = SpriteModelRegistry::new();
2498        let a = reg.add(build_sprite_model(&kv6_unsorted()));
2499        let b = reg.add(build_sprite_model(&kv6_other()));
2500        let mut res = SpriteRegistryResident::upload(
2501            &h.device,
2502            &reg,
2503            &[inst(a, [0.0; 3]), inst(b, [1.0, 0.0, 0.0])],
2504        );
2505        assert_eq!(res.live_model_count(), 2);
2506        assert_eq!(res.dead_model_count(), 0);
2507
2508        // Remove B → tombstoned, its colours freed into the pool.
2509        res.remove_model(b);
2510        assert_eq!(res.live_model_count(), 1);
2511        assert_eq!(res.dead_model_count(), 1);
2512        assert_eq!(res.dead.iter().filter(|&&d| d).count(), 1, "one entry dead");
2513        assert!(!res.colors_alloc.free.is_empty(), "B's colour slot freed");
2514
2515        // Adding C reuses the freed slot (free-list first-fit).
2516        let c = reg.add(build_sprite_model(&kv6_other()));
2517        res.add_model(&h.device, &h.queue, &reg, c);
2518        assert_eq!(res.live_model_count(), 2);
2519
2520        // A and C read back correctly; B is dead (skipped).
2521        let cols = read_u32(&h, &res.colors, u64::from(res.colors_alloc.cap_total()));
2522        for e in [a as usize, c as usize] {
2523            let m = &reg.entries[e];
2524            let cc = res.meta[e].colors_offset as usize;
2525            assert_eq!(
2526                &cols[cc..cc + m.colors.len()],
2527                &m.colors[..],
2528                "colors entry {e}"
2529            );
2530        }
2531
2532        // The lingering instance of removed B is skipped without panic.
2533        let f = test_frustum();
2534        let _ = res.cull_bin_upload(&h.device, &h.queue, &f, 64, 64, 16, 1.0);
2535    }
2536
2537    #[test]
2538    fn compact_reclaims_holes_keeps_ids_stable() {
2539        let Some(h) = headless() else { return };
2540        let mut reg = SpriteModelRegistry::new();
2541        let a = reg.add(build_sprite_model(&kv6_unsorted()));
2542        let b = reg.add(build_sprite_model(&kv6_other()));
2543        let c = reg.add(build_sprite_model(&kv6_other()));
2544        let mut res = SpriteRegistryResident::upload(
2545            &h.device,
2546            &reg,
2547            &[inst(a, [0.0; 3]), inst(b, [1.0; 3]), inst(c, [2.0; 3])],
2548        );
2549        let occ_used_full = res.occ_used;
2550
2551        // Remove the middle model, then compact.
2552        res.remove_model(b);
2553        res.compact(&h.device, &h.queue, &reg);
2554
2555        // Holes reclaimed: occupancy now only covers A + C.
2556        let live_occ: u32 = [a, c]
2557            .iter()
2558            .map(|&e| reg.entries[e as usize].occupancy.len() as u32)
2559            .sum();
2560        assert_eq!(res.occ_used, live_occ);
2561        assert!(res.occ_used < occ_used_full, "compaction shrank occupancy");
2562        // Dead entry keeps a zeroed tombstone; ids unchanged.
2563        assert_eq!(res.meta[b as usize].occupancy_offset, 0);
2564        assert_eq!(res.live_model_count(), 2);
2565        assert_eq!(res.dead_model_count(), 1);
2566
2567        // Live entries read back correctly at their new offsets.
2568        let occ = read_u32(&h, &res.occupancy, u64::from(res.occ_cap));
2569        let cols = read_u32(&h, &res.colors, u64::from(res.colors_alloc.cap_total()));
2570        for &e in &[a as usize, c as usize] {
2571            let m = &reg.entries[e];
2572            let oo = res.meta[e].occupancy_offset as usize;
2573            assert_eq!(
2574                &occ[oo..oo + m.occupancy.len()],
2575                &m.occupancy[..],
2576                "occ {e}"
2577            );
2578            let cc = res.meta[e].colors_offset as usize;
2579            assert_eq!(&cols[cc..cc + m.colors.len()], &m.colors[..], "cols {e}");
2580        }
2581
2582        // Chain ids still valid: C's chain still resolves; B's is empty.
2583        assert!(!res.chains[c as usize].is_empty());
2584        assert!(res.chains[b as usize].is_empty());
2585    }
2586
2587    #[test]
2588    fn remove_swap_semantics_and_capacity_retained() {
2589        let Some(h) = headless() else { return };
2590        let (reg, m) = one_model_registry();
2591        let seed: Vec<_> = (0..4).map(|i| inst(m, [i as f32, 0.0, 0.0])).collect();
2592        let mut res = SpriteRegistryResident::upload(&h.device, &reg, &seed);
2593        assert_eq!(res.instance_count(), 4);
2594        let cap = res.instance_capacity;
2595
2596        // Remove a middle element → the previous last (idx 3) moved into it.
2597        assert_eq!(res.remove_instance(1), Some(3));
2598        assert_eq!(res.instance_count(), 3);
2599
2600        // Remove the current last (idx 2) → nothing moved.
2601        assert_eq!(res.remove_instance(2), None);
2602        assert_eq!(res.instance_count(), 2);
2603
2604        // Out of range → None.
2605        assert_eq!(res.remove_instance(99), None);
2606        assert_eq!(res.instance_count(), 2);
2607
2608        // Capacity is retained for reuse (no shrink).
2609        assert_eq!(res.instance_capacity, cap);
2610    }
2611}