Skip to main content

roxlap_gpu/
sprite_model.rs

1//! GPU.10 — KV6 sprite as a DDA-marchable voxel model.
2//!
3//! Unlike the GPU.9 splatter (one thread per voxel, screen-space
4//! squares, overdraw + atomic contention), a sprite model is a small
5//! voxel volume the precise ray-DDA marches one ray per pixel —
6//! crisp, correct occlusion, no overdraw. This is the GPU.10.0 single
7//! sprite; instancing + tiling + LOD come in later sub-substages.
8//!
9//! The volume reuses the chunk occupancy/colour scheme but sized to
10//! the KV6 bbox: per-column occupancy bitmask (`occ_words_per_col`
11//! u32s, `CHUNK_Z`-style 32-bits-per-word), a flat colour array in
12//! ascending-z order per column, and a `color_offsets` prefix table.
13//! The shader finds a voxel's colour by `offset[col] + popcount(bits
14//! below z)`, so colours MUST be ascending-z (we sort per column).
15
16#![allow(
17    clippy::cast_precision_loss,
18    clippy::cast_possible_truncation,
19    clippy::cast_possible_wrap,
20    clippy::cast_sign_loss,
21    clippy::many_single_char_names,
22    clippy::similar_names
23)]
24
25use bytemuck::{Pod, Zeroable};
26use roxlap_formats::kv6::Kv6;
27use roxlap_formats::sprite::Sprite;
28use roxlap_formats::voxel_clip::{DecodedClip, VoxelFrame};
29
30/// CPU-built voxel volume for one KV6 model.
31#[derive(Debug, Clone)]
32pub struct SpriteModel {
33    /// Voxel extent `(mx, my, mz)`.
34    pub dims: [u32; 3],
35    /// `ceil(mz / 32)` — u32 words of occupancy per (x, y) column.
36    pub occ_words_per_col: u32,
37    /// KV6 pivot in model-local voxel space.
38    pub pivot: [f32; 3],
39    /// Per-column occupancy bitmask, `mx * my * occ_words_per_col`.
40    pub occupancy: Vec<u32>,
41    /// Voxel colours, ascending z within each column.
42    pub colors: Vec<u32>,
43    /// Per-voxel surface-normal index (`Kv6::Voxel::dir`, 0..256),
44    /// parallel to [`colors`](Self::colors). The GPU sprite shader uses
45    /// it to index the per-instance `kv6colmul` lighting table, matching
46    /// the CPU rasteriser's normal-based shading.
47    pub dirs: Vec<u32>,
48    /// Prefix sums: `color_offsets[col]` is the first colour index of
49    /// column `col`; length `mx * my + 1`.
50    pub color_offsets: Vec<u32>,
51    /// World-space size of one voxel of this model (GPU.10.4 LOD): 1.0
52    /// at mip-0, doubling each [`SpriteModel::downsample`]. The shader
53    /// divides the local ray by this so a coarse voxel spans the right
54    /// world extent and the march `t` stays in world units.
55    pub voxel_world_size: f32,
56}
57
58/// Build the DDA volume from a KV6. Columns are packed in
59/// `x + y*mx` order; each column's voxels are sorted ascending by z
60/// so the shader's popcount-rank colour lookup is correct.
61///
62/// # Panics
63/// If the KV6's `ylen` counters disagree with `voxels.len()` (a
64/// malformed model).
65#[must_use]
66pub fn build_sprite_model(kv6: &Kv6) -> SpriteModel {
67    let (mx, my, mz) = (kv6.xsiz, kv6.ysiz, kv6.zsiz);
68    let occ_words_per_col = mz.div_ceil(32).max(1);
69    let cols = (mx * my) as usize;
70
71    let mut occupancy = vec![0u32; cols * occ_words_per_col as usize];
72    let mut color_offsets = vec![0u32; cols + 1];
73    let mut colors: Vec<u32> = Vec::with_capacity(kv6.voxels.len());
74    let mut dirs: Vec<u32> = Vec::with_capacity(kv6.voxels.len());
75
76    // Pass 1 — consume voxels in KV6 storage order (x-outer / y-inner)
77    // into per-column buckets keyed by `col = x + y*mx`. Each entry is
78    // `(z, colour, normal-dir)`.
79    let mut buckets: Vec<Vec<(u16, u32, u8)>> = vec![Vec::new(); cols];
80    let mut voxel_iter = kv6.voxels.iter();
81    for x in 0..mx {
82        for y in 0..my {
83            let col = (x + y * mx) as usize;
84            let count = kv6.ylen[x as usize][y as usize];
85            for _ in 0..count {
86                let v = voxel_iter.next().expect("KV6 ylen / voxels.len mismatch");
87                buckets[col].push((v.z, v.col, v.dir));
88            }
89        }
90    }
91
92    // Pass 2 — emit in COLUMN-INDEX order so `color_offsets` is a true
93    // monotonic prefix sum (the shader indexes by `col` either way, but
94    // structural edits / mip rebuilds rely on monotonic offsets). Each
95    // column's voxels sorted ascending z for the popcount-rank lookup.
96    for (col, bucket) in buckets.iter_mut().enumerate() {
97        color_offsets[col] = colors.len() as u32;
98        bucket.sort_by_key(|(z, _, _)| *z);
99        for &(z, col_rgba, dir) in bucket.iter() {
100            let z = u32::from(z);
101            let base = col * occ_words_per_col as usize + (z >> 5) as usize;
102            occupancy[base] |= 1u32 << (z & 31);
103            colors.push(col_rgba);
104            dirs.push(u32::from(dir));
105        }
106    }
107    color_offsets[cols] = colors.len() as u32;
108
109    SpriteModel {
110        dims: [mx, my, mz],
111        occ_words_per_col,
112        pivot: [kv6.xpiv, kv6.ypiv, kv6.zpiv],
113        occupancy,
114        color_offsets,
115        colors,
116        dirs,
117        voxel_world_size: 1.0,
118    }
119}
120
121/// Build a [`SpriteModel`] directly from a decoded voxel-clip frame
122/// (VCL.2). The [`VoxelFrame`] dense-column layout is byte-for-byte the
123/// [`SpriteModel`] layout that [`build_sprite_model`] produces, so this is
124/// a field move — no per-column bucket-sort. `dirs` is the frame's
125/// surface-normal LUT indices (from [`DecodedClip::dirs`]), parallel to
126/// `frame.colors`.
127///
128/// # Panics
129/// In debug, if `dirs.len() != frame.colors.len()` or the field shapes
130/// don't match `dims` (the same invariants [`build_sprite_model`] upholds).
131#[must_use]
132pub fn sprite_model_from_voxel_frame(
133    frame: &VoxelFrame,
134    dirs: &[u32],
135    dims: [u32; 3],
136    pivot: [f32; 3],
137    voxel_world_size: f32,
138) -> SpriteModel {
139    let occ_words_per_col = dims[2].div_ceil(32).max(1);
140    let cols = (dims[0] * dims[1]) as usize;
141    debug_assert_eq!(frame.occupancy.len(), cols * occ_words_per_col as usize);
142    debug_assert_eq!(frame.color_offsets.len(), cols + 1);
143    debug_assert_eq!(dirs.len(), frame.colors.len());
144    SpriteModel {
145        dims,
146        occ_words_per_col,
147        pivot,
148        occupancy: frame.occupancy.clone(),
149        colors: frame.colors.clone(),
150        dirs: dirs.to_vec(),
151        color_offsets: frame.color_offsets.clone(),
152        voxel_world_size,
153    }
154}
155
156/// Build the [`SpriteModel`] for frame `frame` of a decoded clip — the
157/// per-frame model uploaded into a flipbook chain (VCL.2).
158///
159/// # Panics
160/// If `frame` is out of range, or the frame fails the layout invariants.
161#[must_use]
162pub fn sprite_model_from_clip_frame(clip: &DecodedClip, frame: usize) -> SpriteModel {
163    sprite_model_from_voxel_frame(
164        &clip.frames[frame],
165        &clip.dirs[frame],
166        clip.dims,
167        clip.pivot,
168        clip.voxel_world_size,
169    )
170}
171
172/// Per-instance transform consumed by the model-DDA shader: the
173/// inverse model→world rotation (so a world ray can be brought into
174/// model-local space) plus the instance's world position. Stored as
175/// three padded columns for std140/std430 (`mat3x3` 16-byte columns).
176#[repr(C)]
177#[derive(Clone, Copy, Pod, Zeroable, Debug)]
178pub struct SpriteInstanceTransform {
179    /// Inverse of `[s | h | f]`, column-major, each column padded to
180    /// `vec4`. `inv_rot * v = c0*v.x + c1*v.y + c2*v.z`.
181    pub inv_rot: [[f32; 4]; 3],
182    /// Instance world position (the KV6 pivot maps here).
183    pub pos: [f32; 3],
184    _pad: f32,
185}
186
187impl SpriteInstanceTransform {
188    /// Build from a sprite pose. `s/h/f` are the model→world basis
189    /// columns; we invert them so the shader can map world→local.
190    #[must_use]
191    pub fn from_sprite(sprite: &Sprite) -> Self {
192        let inv = mat3_inverse([sprite.s, sprite.h, sprite.f]);
193        Self {
194            inv_rot: [
195                [inv[0][0], inv[0][1], inv[0][2], 0.0],
196                [inv[1][0], inv[1][1], inv[1][2], 0.0],
197                [inv[2][0], inv[2][1], inv[2][2], 0.0],
198            ],
199            pos: sprite.p,
200            _pad: 0.0,
201        }
202    }
203}
204
205/// A registry of sprite models. Instances reference a model by
206/// `model_id`, which is a **LOD chain** id: each chain holds one or
207/// more concrete mip levels (finest first; GPU.10.4), and the renderer
208/// picks the level per instance by distance. Identical KV6s are added
209/// once and shared by many instances. **Copy-on-modify**:
210/// [`Self::fork`] deep-copies a chain so edits to the fork leave the
211/// parent (and its instances) intact.
212#[derive(Debug, Clone, Default)]
213pub struct SpriteModelRegistry {
214    /// Concrete mip-level volumes (the GPU buffers concatenate these).
215    entries: Vec<SpriteModel>,
216    /// `chains[model_id]` = entry ids, finest (mip-0) first.
217    chains: Vec<Vec<u32>>,
218}
219
220impl SpriteModelRegistry {
221    #[must_use]
222    pub fn new() -> Self {
223        Self::default()
224    }
225
226    fn push_entry(&mut self, model: SpriteModel) -> u32 {
227        let id = self.entries.len() as u32;
228        self.entries.push(model);
229        id
230    }
231
232    /// Register a single-level (no-LOD) model; returns its `model_id`.
233    pub fn add(&mut self, model: SpriteModel) -> u32 {
234        let e = self.push_entry(model);
235        let id = self.chains.len() as u32;
236        self.chains.push(vec![e]);
237        id
238    }
239
240    /// Register a model with up to `max_levels` LOD mips (each a 2×
241    /// [`SpriteModel::downsample`] of the previous; stops early once a
242    /// level collapses to 1³). Returns its `model_id`.
243    pub fn add_lod(&mut self, model: SpriteModel, max_levels: u32) -> u32 {
244        let mut levels = vec![self.push_entry(model.clone())];
245        let mut cur = model;
246        for _ in 1..max_levels.max(1) {
247            if cur.dims == [1, 1, 1] {
248                break;
249            }
250            cur = cur.downsample();
251            levels.push(self.push_entry(cur.clone()));
252        }
253        let id = self.chains.len() as u32;
254        self.chains.push(levels);
255        id
256    }
257
258    /// Copy-on-modify: deep-copy every level of chain `parent` into new
259    /// entries + a new chain, and return its `model_id`. The fork owns
260    /// independent voxel data, so mutating it does not affect the
261    /// parent or any instance still pointing at it.
262    ///
263    /// # Panics
264    /// If `parent` is not a registered `model_id`.
265    pub fn fork(&mut self, parent: u32) -> u32 {
266        let src = self.chains[parent as usize].clone();
267        let levels: Vec<u32> = src
268            .iter()
269            .map(|&e| {
270                let copy = self.entries[e as usize].clone();
271                self.push_entry(copy)
272            })
273            .collect();
274        let id = self.chains.len() as u32;
275        self.chains.push(levels);
276        id
277    }
278
279    /// The finest (mip-0) model of chain `id`.
280    #[must_use]
281    pub fn model(&self, id: u32) -> &SpriteModel {
282        &self.entries[self.chains[id as usize][0] as usize]
283    }
284
285    /// Like [`Self::model`] but returns `None` for an out-of-range or
286    /// tombstoned (emptied) chain instead of panicking — the guarded form
287    /// for public primitives handed an arbitrary `chain_id`.
288    #[must_use]
289    pub fn model_checked(&self, id: u32) -> Option<&SpriteModel> {
290        let entry = *self.chains.get(id as usize)?.first()?;
291        self.entries.get(entry as usize)
292    }
293
294    /// Mutable access to the finest (mip-0) model for editing — the
295    /// copy-on-modify entry point (typically on a [`Self::fork`]).
296    /// After a *structural* edit (occupancy/dims), call
297    /// [`Self::rebuild_lod`] so the coarser mips match; a pure recolour
298    /// can use [`Self::recolor_chain`] instead.
299    pub fn model_mut(&mut self, id: u32) -> &mut SpriteModel {
300        let e = self.chains[id as usize][0] as usize;
301        &mut self.entries[e]
302    }
303
304    /// Recolour every LOD level of chain `id` (so a forked tint shows
305    /// at all distances).
306    pub fn recolor_chain(&mut self, id: u32, f: impl Fn(u32) -> u32 + Copy) {
307        for li in 0..self.chains[id as usize].len() {
308            let e = self.chains[id as usize][li] as usize;
309            self.entries[e].recolor(f);
310        }
311    }
312
313    /// Regenerate chain `id`'s coarser mip levels from its (possibly
314    /// just-edited) mip-0. Run after a structural edit via
315    /// [`Self::model_mut`] so the LOD ladder stays consistent. No-op
316    /// for a single-level (no-LOD) chain.
317    pub fn rebuild_lod(&mut self, id: u32) {
318        let levels = self.chains[id as usize].clone();
319        if levels.len() <= 1 {
320            return;
321        }
322        let mut cur = self.entries[levels[0] as usize].clone();
323        for &e in &levels[1..] {
324            cur = cur.downsample();
325            self.entries[e as usize] = cur.clone();
326        }
327    }
328
329    /// Free chain `chain_id`'s voxel data **in place**: replace each of
330    /// its LOD entries with [`SpriteModel::empty`] and clear the chain.
331    /// Entry ids and every other `model_id` are **preserved** (the chain
332    /// becomes empty, its entries become placeholders), so no id remap is
333    /// needed and the resident registry's entry alignment stays intact.
334    ///
335    /// This is safe to pair with the resident side because
336    /// [`SpriteRegistryResident::remove_model`] tombstones the same
337    /// entries (`dead[e]`) and [`compact`](SpriteRegistryResident::compact)
338    /// reads only live entries — so the resident never touches the empty
339    /// placeholders left here. Call `remove_model` (resident) **before**
340    /// this so those tombstones are set. No-op if `chain_id` is out of
341    /// range or already removed.
342    pub fn remove(&mut self, chain_id: u32) {
343        let Some(entries) = self.chains.get(chain_id as usize) else {
344            return;
345        };
346        // Clone the small id list so we can mutate `entries` while iterating.
347        let entries = entries.clone();
348        for e in entries {
349            self.entries[e as usize] = SpriteModel::empty();
350        }
351        self.chains[chain_id as usize] = Vec::new(); // tombstone (slot kept)
352    }
353
354    /// Whether `chain_id` is a live (registered, not [`removed`](Self::remove))
355    /// model. `false` for an out-of-range id or a tombstoned chain.
356    #[must_use]
357    pub fn is_live(&self, chain_id: u32) -> bool {
358        self.chains
359            .get(chain_id as usize)
360            .is_some_and(|c| !c.is_empty())
361    }
362
363    /// Number of LOD chains (distinct `model_id`s). Counts tombstoned
364    /// (removed) chains too — ids are never reused, so this is also the
365    /// next id that [`Self::add`] / [`Self::add_lod`] will mint.
366    #[must_use]
367    pub fn len(&self) -> usize {
368        self.chains.len()
369    }
370
371    #[must_use]
372    pub fn is_empty(&self) -> bool {
373        self.chains.is_empty()
374    }
375}
376
377impl SpriteModel {
378    /// An empty (zero-voxel, zero-extent) placeholder model. Used by
379    /// [`SpriteModelRegistry::remove`] to free a removed chain's voxel
380    /// data while keeping its entry slot, so ids stay stable. Carries no
381    /// occupancy/colours; `color_offsets` is the single-element prefix
382    /// `[0]` (`cols + 1` with `cols == 0`), keeping the structural
383    /// invariant intact for any code that inspects it.
384    #[must_use]
385    pub fn empty() -> Self {
386        Self {
387            dims: [0, 0, 0],
388            occ_words_per_col: 1,
389            pivot: [0.0, 0.0, 0.0],
390            occupancy: Vec::new(),
391            colors: Vec::new(),
392            dirs: Vec::new(),
393            color_offsets: vec![0],
394            voxel_world_size: 1.0,
395        }
396    }
397
398    /// Recolour every voxel via `f(old_rgba) -> new_rgba`. Structure
399    /// (occupancy / offsets) is untouched, so this is a cheap in-place
400    /// edit — handy on a [`SpriteModelRegistry::fork`] to make a tinted
401    /// variant. For structural edits, mutate the public occupancy /
402    /// colours / dims directly (via `model_mut`) then rebuild the LOD.
403    pub fn recolor(&mut self, f: impl Fn(u32) -> u32) {
404        for c in &mut self.colors {
405            *c = f(*c);
406        }
407    }
408
409    /// GPU.12 — structural edit of a single voxel within the model's
410    /// existing bounds. `Some(rgba)` sets/replaces the voxel at
411    /// `(x, y, z)`; `None` clears it. Maintains the ascending-z colour
412    /// invariant by inserting/removing at the voxel's popcount rank and
413    /// shifting the affected columns' `color_offsets`. Returns `true`
414    /// if the model changed. Out-of-bounds coordinates are ignored
415    /// (returns `false`) — growing `dims` is a separate concern.
416    ///
417    /// After editing, call [`SpriteModelRegistry::rebuild_lod`] to
418    /// refresh coarser mips, then re-upload via `set_sprite_instances`.
419    pub fn set_voxel(&mut self, x: u32, y: u32, z: u32, color: Option<u32>) -> bool {
420        if x >= self.dims[0] || y >= self.dims[1] || z >= self.dims[2] {
421            return false;
422        }
423        let owpc = self.occ_words_per_col as usize;
424        let cols = (self.dims[0] * self.dims[1]) as usize;
425        let col = (x + y * self.dims[0]) as usize;
426        let base = col * owpc;
427        let zw = (z >> 5) as usize;
428        let zb = z & 31;
429
430        // Rank = solid voxels strictly below z in this column.
431        let mut rank = 0usize;
432        for w in 0..zw {
433            rank += self.occupancy[base + w].count_ones() as usize;
434        }
435        let below_mask = if zb > 0 { (1u32 << zb) - 1 } else { 0 };
436        rank += (self.occupancy[base + zw] & below_mask).count_ones() as usize;
437        let idx = self.color_offsets[col] as usize + rank;
438        let was_set = (self.occupancy[base + zw] >> zb) & 1 == 1;
439
440        if let Some(rgba) = color {
441            if was_set {
442                self.colors[idx] = rgba; // replace in place (keeps dir)
443            } else {
444                self.occupancy[base + zw] |= 1u32 << zb;
445                self.colors.insert(idx, rgba);
446                // No normal supplied by this API — default to dir 0 (the
447                // sole caller, the carve hotkey, only ever clears).
448                self.dirs.insert(idx, 0);
449                for c in &mut self.color_offsets[col + 1..=cols] {
450                    *c += 1;
451                }
452            }
453            true
454        } else {
455            if !was_set {
456                return false;
457            }
458            self.occupancy[base + zw] &= !(1u32 << zb);
459            self.colors.remove(idx);
460            self.dirs.remove(idx);
461            for c in &mut self.color_offsets[col + 1..=cols] {
462                *c -= 1;
463            }
464            true
465        }
466    }
467
468    /// Radius of a bounding sphere centred at the instance position
469    /// (the pivot maps there): the farthest bbox corner from the
470    /// pivot. Used for frustum culling. Assumes a unit basis; scaled
471    /// instances would multiply this by their max basis length.
472    #[must_use]
473    pub fn bound_radius(&self) -> f32 {
474        let mut r2 = 0.0_f32;
475        for &cx in &[0.0, self.dims[0] as f32] {
476            for &cy in &[0.0, self.dims[1] as f32] {
477                for &cz in &[0.0, self.dims[2] as f32] {
478                    let d = [cx - self.pivot[0], cy - self.pivot[1], cz - self.pivot[2]];
479                    r2 = r2.max(d[0] * d[0] + d[1] * d[1] + d[2] * d[2]);
480                }
481            }
482        }
483        r2.sqrt()
484    }
485
486    /// GPU.10.4 — 2× voxel downsample for the next LOD level. A coarse
487    /// voxel is solid if any of its 2×2×2 fine voxels is, coloured by
488    /// their per-channel average. Dims/pivot halve and
489    /// `voxel_world_size` doubles, so the coarse model occupies the
490    /// same world box at half the resolution (origin-corner aligned).
491    #[must_use]
492    #[allow(clippy::manual_checked_ops)] // `n > 0` guards 4 divisions, not one checked_div
493    pub fn downsample(&self) -> SpriteModel {
494        let [fx, fy, fz] = self.dims;
495        let fidx = |x: u32, y: u32, z: u32| (x + y * fx + z * fx * fy) as usize;
496
497        // Reconstruct dense fine voxels (solid flag + colour + normal).
498        let mut solid = vec![false; (fx * fy * fz) as usize];
499        let mut fine = vec![0u32; (fx * fy * fz) as usize];
500        let mut fine_dir = vec![0u32; (fx * fy * fz) as usize];
501        for x in 0..fx {
502            for y in 0..fy {
503                let col = (x + y * fx) as usize;
504                let base = col * self.occ_words_per_col as usize;
505                let off = self.color_offsets[col] as usize;
506                let mut seen = 0usize;
507                for z in 0..fz {
508                    let w = base + (z >> 5) as usize;
509                    if (self.occupancy[w] >> (z & 31)) & 1 == 1 {
510                        fine[fidx(x, y, z)] = self.colors[off + seen];
511                        fine_dir[fidx(x, y, z)] = self.dirs[off + seen];
512                        solid[fidx(x, y, z)] = true;
513                        seen += 1;
514                    }
515                }
516            }
517        }
518
519        let nx = fx.div_ceil(2).max(1);
520        let ny = fy.div_ceil(2).max(1);
521        let nz = fz.div_ceil(2).max(1);
522        let owpc = nz.div_ceil(32).max(1);
523        let cols = (nx * ny) as usize;
524        let mut occupancy = vec![0u32; cols * owpc as usize];
525        let mut color_offsets = vec![0u32; cols + 1];
526        let mut colors: Vec<u32> = Vec::new();
527        let mut dirs: Vec<u32> = Vec::new();
528
529        // Emit in column-index order (`ccol = cx + cy*nx`), cy outer,
530        // so `color_offsets` is a monotonic prefix sum like build's.
531        for cy in 0..ny {
532            for cx in 0..nx {
533                let ccol = (cx + cy * nx) as usize;
534                color_offsets[ccol] = colors.len() as u32;
535                for cz in 0..nz {
536                    let (mut a, mut r, mut g, mut b, mut n) = (0u32, 0u32, 0u32, 0u32, 0u32);
537                    // Normals don't average meaningfully — keep the first
538                    // solid child's `dir` as the coarse voxel's normal.
539                    let mut rep_dir = 0u32;
540                    for dz in 0..2 {
541                        for dy in 0..2 {
542                            for dx in 0..2 {
543                                let (x, y, z) = (2 * cx + dx, 2 * cy + dy, 2 * cz + dz);
544                                if x < fx && y < fy && z < fz && solid[fidx(x, y, z)] {
545                                    let c = fine[fidx(x, y, z)];
546                                    if n == 0 {
547                                        rep_dir = fine_dir[fidx(x, y, z)];
548                                    }
549                                    a += (c >> 24) & 0xff;
550                                    r += (c >> 16) & 0xff;
551                                    g += (c >> 8) & 0xff;
552                                    b += c & 0xff;
553                                    n += 1;
554                                }
555                            }
556                        }
557                    }
558                    if n > 0 {
559                        let avg = ((a / n) << 24) | ((r / n) << 16) | ((g / n) << 8) | (b / n);
560                        let base = ccol * owpc as usize + (cz >> 5) as usize;
561                        occupancy[base] |= 1u32 << (cz & 31);
562                        colors.push(avg);
563                        dirs.push(rep_dir);
564                    }
565                }
566            }
567        }
568        color_offsets[cols] = colors.len() as u32;
569
570        SpriteModel {
571            dims: [nx, ny, nz],
572            occ_words_per_col: owpc,
573            pivot: [
574                self.pivot[0] * 0.5,
575                self.pivot[1] * 0.5,
576                self.pivot[2] * 0.5,
577            ],
578            occupancy,
579            colors,
580            dirs,
581            color_offsets,
582            voxel_world_size: self.voxel_world_size * 2.0,
583        }
584    }
585}
586
587/// View frustum for CPU instance culling, in world space. Built each
588/// frame from the world camera. `half_w`/`half_h` are the tangents of
589/// the half-FOV (so the side planes are `|x| <= half_w * z` etc. in
590/// camera space).
591#[derive(Clone, Copy, Debug)]
592pub struct ViewFrustum {
593    pub pos: [f32; 3],
594    pub right: [f32; 3],
595    pub down: [f32; 3],
596    pub forward: [f32; 3],
597    pub half_w: f32,
598    pub half_h: f32,
599    pub far: f32,
600}
601
602/// CPU cull record: the GPU instance + its world bounding sphere.
603/// Not `Copy` — carries a boxed 256-entry `kv6colmul` table.
604#[derive(Clone)]
605struct CullInstance {
606    /// Instance transform + a placeholder `model_id`; the cull
607    /// overwrites `model_id` with the distance-chosen LOD entry.
608    gpu: SpriteInstanceGpu,
609    /// LOD chain this instance draws (the user-facing `model_id`).
610    chain_id: u32,
611    center: [f32; 3],
612    radius: f32,
613    /// voxlap `kv6colmul[256]` — per-surface-normal colour modulation
614    /// for this instance's pose + lighting. Defaults to identity
615    /// (`0x0100` in every channel lane → unshaded) until the facade sets
616    /// it via [`SpriteRegistryResident::set_instance_colmul`]. Packed
617    /// into the `colmul` GPU buffer (in visible order) each frame.
618    colmul: Box<[u64; 256]>,
619}
620
621/// Identity `kv6colmul` table: every channel lane = `0x0100`, so the
622/// shader's `(rgb[c] << 8) * 0x0100 >> 16 == rgb[c]` — i.e. no shading.
623fn identity_colmul() -> Box<[u64; 256]> {
624    const LANE: u64 = 0x0100;
625    let w = LANE | (LANE << 16) | (LANE << 32) | (LANE << 48);
626    Box::new([w; 256])
627}
628
629fn dot3(a: [f32; 3], b: [f32; 3]) -> f32 {
630    a[0] * b[0] + a[1] * b[1] + a[2] * b[2]
631}
632
633/// Build one CPU cull record from a user [`SpriteInstance`]: pack the
634/// transform, seed the bounding sphere from the chain's finest model, and
635/// start `colmul` at identity. Shared by the full
636/// [`SpriteRegistryResident::upload`] and the incremental
637/// [`SpriteRegistryResident::append_instances`].
638fn make_cull(registry: &SpriteModelRegistry, i: &SpriteInstance) -> CullInstance {
639    CullInstance {
640        gpu: SpriteInstanceGpu {
641            inv_rot0: i.transform.inv_rot[0],
642            inv_rot1: i.transform.inv_rot[1],
643            inv_rot2: i.transform.inv_rot[2],
644            pos: i.transform.pos,
645            model_id: i.model_id, // placeholder; cull rewrites per frame
646        },
647        chain_id: i.model_id,
648        center: i.transform.pos,
649        radius: registry.model(i.model_id).bound_radius(),
650        colmul: identity_colmul(),
651    }
652}
653
654/// Allocate the `instances` capacity buffer (`STORAGE | COPY_DST`) sized
655/// for `cap` records (≥1). Left uninitialised — `cull_bin_upload`
656/// rewrites it (offset 0) each frame, and `append_instances` seeds the
657/// live records after a grow.
658fn instances_buffer(device: &wgpu::Device, cap: u32) -> wgpu::Buffer {
659    device.create_buffer(&wgpu::BufferDescriptor {
660        label: Some("roxlap-gpu sprite_reg.instances"),
661        size: u64::from(cap.max(1)) * std::mem::size_of::<SpriteInstanceGpu>() as u64,
662        usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
663        mapped_at_creation: false,
664    })
665}
666
667/// One sprite instance: a model reference + world pose.
668#[derive(Debug, Clone, Copy)]
669pub struct SpriteInstance {
670    pub model_id: u32,
671    pub transform: SpriteInstanceTransform,
672}
673
674/// GPU per-model metadata: where this model's data starts in the
675/// shared registry buffers + its dims/pivot. Mirrors `ModelMeta` in
676/// the shader (std430, 48 bytes).
677#[repr(C)]
678#[derive(Clone, Copy, Pod, Zeroable, Debug)]
679struct SpriteModelMeta {
680    occupancy_offset: u32,
681    colors_offset: u32,
682    color_offsets_offset: u32,
683    occ_words_per_col: u32,
684    dims: [u32; 3],
685    _pad0: u32,
686    pivot: [f32; 3],
687    /// GPU.10.4 — world size of one voxel of this (mip) entry.
688    voxel_world_size: f32,
689}
690
691/// GPU per-instance record. Mirrors `Instance` in the shader (std430,
692/// 64 bytes): inverse rotation columns + position + model id.
693#[repr(C)]
694#[derive(Clone, Copy, Pod, Zeroable, Debug)]
695struct SpriteInstanceGpu {
696    inv_rot0: [f32; 4],
697    inv_rot1: [f32; 4],
698    inv_rot2: [f32; 4],
699    pos: [f32; 3],
700    model_id: u32,
701}
702
703/// Invert a 3×3 matrix given as basis columns `[c0, c1, c2]`,
704/// returning the inverse as columns. For an orthonormal basis this is
705/// the transpose; the general path covers rotation + non-unit scale.
706#[must_use]
707fn mat3_inverse(cols: [[f32; 3]; 3]) -> [[f32; 3]; 3] {
708    let [a, b, c] = cols; // columns
709                          // Determinant via scalar triple product a · (b × c).
710    let cross = |u: [f32; 3], v: [f32; 3]| {
711        [
712            u[1] * v[2] - u[2] * v[1],
713            u[2] * v[0] - u[0] * v[2],
714            u[0] * v[1] - u[1] * v[0],
715        ]
716    };
717    let bc = cross(b, c);
718    let ca = cross(c, a);
719    let ab = cross(a, b);
720    let det = a[0] * bc[0] + a[1] * bc[1] + a[2] * bc[2];
721    let inv_det = if det.abs() < 1e-12 { 0.0 } else { 1.0 / det };
722    // Inverse rows are (b×c, c×a, a×b)/det; return as columns of the
723    // inverse, i.e. transpose of those rows.
724    [
725        [bc[0] * inv_det, ca[0] * inv_det, ab[0] * inv_det],
726        [bc[1] * inv_det, ca[1] * inv_det, ab[1] * inv_det],
727        [bc[2] * inv_det, ca[2] * inv_det, ab[2] * inv_det],
728    ]
729}
730
731/// GPU-resident registry + instances: every model's occupancy /
732/// colours / offsets concatenated into shared storage buffers, a
733/// per-model metadata table, and a capacity-sized instance buffer
734/// rewritten each frame with the frustum-visible subset (GPU.10.2).
735/// One bind group serves all models (same approach as the multi-grid
736/// scene).
737pub struct SpriteRegistryResident {
738    pub occupancy: wgpu::Buffer,
739    pub colors: wgpu::Buffer,
740    /// Per-voxel surface-normal index, concatenated across models in the
741    /// same layout as [`colors`](Self::colors). The shader indexes the
742    /// per-instance `kv6colmul` table by it.
743    pub dirs: wgpu::Buffer,
744    pub color_offsets: wgpu::Buffer,
745    pub model_meta: wgpu::Buffer,
746    /// Holds up to `instance_capacity` instances; the visible subset
747    /// is packed into `[0, count)` each frame by [`Self::cull_bin_upload`].
748    pub instances: wgpu::Buffer,
749    pub instance_capacity: u32,
750    /// Per-visible-instance `kv6colmul[256]` tables, packed in the same
751    /// order as the `instances` buffer each frame (two u32 per u64
752    /// entry: lanes 0|1 then 2|3). Sized `instance_capacity * 256 * 2`
753    /// u32; rewritten by [`Self::cull_bin_upload`].
754    pub colmul: wgpu::Buffer,
755    colmul_cap: u32,
756    /// GPU.10.3 — per-tile `(offset, count)` into `tile_instances`,
757    /// flat `2 * tiles_x * tiles_y` u32s. Grown to fit the screen.
758    pub tile_ranges: wgpu::Buffer,
759    tile_ranges_cap: u32,
760    /// GPU.10.3 — flat list of visible-instance indices grouped by
761    /// tile. Grown to fit the per-frame total.
762    pub tile_instances: wgpu::Buffer,
763    tile_instances_cap: u32,
764    /// CPU cull records (full set), with precomputed bounding spheres.
765    cull: Vec<CullInstance>,
766    /// GPU.10.4 — LOD chains: `chains[chain_id]` = entry ids, finest
767    /// first. The cull picks a level by distance and writes its entry
768    /// id into the packed instance's `model_id`.
769    chains: Vec<Vec<u32>>,
770    /// GPU.12 incremental — CPU mirror of the GPU `model_meta` table, one
771    /// per concrete entry. [`Self::update_model`] reads the fixed
772    /// occupancy/color_offsets bases from here and rewrites the changed
773    /// `colors_offset` on a relocation.
774    meta: Vec<SpriteModelMeta>,
775    /// GPU.12 incremental — per-entry placement of `colors`/`dirs` in the
776    /// shared buffers (drives both; same offsets/ranks). Lets an edit
777    /// re-upload one model's data without touching the others.
778    colors_alloc: ColorsAllocator,
779    /// Per-entry word length of the dims-fixed `occupancy` and
780    /// `color_offsets` arrays, kept so [`Self::update_model`] can assert a
781    /// carve never changed dims (which would invalidate the in-place
782    /// writes — growing dims is out of scope, handled by a full re-upload).
783    occ_lens: Vec<u32>,
784    coloff_lens: Vec<u32>,
785    /// Used / allocated words of the tightly-concatenated `occupancy`
786    /// buffer. `add_model` bump-appends at `occ_used`; when it would pass
787    /// `occ_cap` the buffer is grown (with slack) and rebuilt from the
788    /// registry. (`colors`/`dirs` track theirs in [`ColorsAllocator`].)
789    occ_used: u32,
790    occ_cap: u32,
791    /// Used / allocated words of the tightly-concatenated `color_offsets`
792    /// buffer — same growth scheme as `occ_*`.
793    coloff_used: u32,
794    coloff_cap: u32,
795    /// Allocated record count of the `model_meta` buffer; `add_model`
796    /// grows it (with slack) when the entry count passes it.
797    meta_cap: u32,
798    /// Per-entry tombstone: `true` once its model was removed
799    /// ([`Self::remove_model`]). Dead entries keep their `meta` slot (so
800    /// entry ids — and the caller's `chain_id`s — stay stable) but their
801    /// colours are freed for reuse and they contribute nothing to a
802    /// repack / [`Self::compact`]. Parallel to `meta`.
803    dead: Vec<bool>,
804}
805
806/// Which tightly-concatenated registry buffer [`SpriteRegistryResident::
807/// sync_concat`] is operating on.
808#[derive(Clone, Copy)]
809enum ConcatBuf {
810    Occupancy,
811    ColorOffsets,
812}
813
814/// The model's source array for a given [`ConcatBuf`] — a free fn (not a
815/// closure) so the returned borrow keeps `m`'s lifetime.
816fn concat_data(m: &SpriteModel, which: ConcatBuf) -> &[u32] {
817    match which {
818        ConcatBuf::Occupancy => &m.occupancy,
819        ConcatBuf::ColorOffsets => &m.color_offsets,
820    }
821}
822
823impl SpriteRegistryResident {
824    /// Concatenate `registry`'s models into shared buffers and prepare
825    /// `instances` for per-frame culling. Model-relative indices stay
826    /// as built; the shader adds each model's base offset from the
827    /// metadata table.
828    #[must_use]
829    pub fn upload(
830        device: &wgpu::Device,
831        registry: &SpriteModelRegistry,
832        instances: &[SpriteInstance],
833    ) -> Self {
834        // `occupancy` + `color_offsets` are dims-fixed → tightly
835        // concatenated (never grow on a carve). `colors` + `dirs` are
836        // variable → laid out by the suballocator with per-slot slack so
837        // an incremental edit can rewrite one model in place.
838        let entry_lens: Vec<u32> = registry
839            .entries
840            .iter()
841            .map(|m| m.colors.len() as u32)
842            .collect();
843        let colors_alloc = ColorsAllocator::new(&entry_lens);
844        let cap_total = colors_alloc.cap_total();
845
846        let mut all_occ: Vec<u32> = Vec::new();
847        let mut all_offsets: Vec<u32> = Vec::new();
848        let mut all_colors: Vec<u32> = vec![0; cap_total as usize];
849        let mut all_dirs: Vec<u32> = vec![0; cap_total as usize];
850        let mut meta: Vec<SpriteModelMeta> = Vec::with_capacity(registry.entries.len());
851        let mut occ_lens: Vec<u32> = Vec::with_capacity(registry.entries.len());
852        let mut coloff_lens: Vec<u32> = Vec::with_capacity(registry.entries.len());
853
854        // One meta + placed data per concrete (mip-level) entry.
855        for (e, m) in registry.entries.iter().enumerate() {
856            let slot = colors_alloc.slot(e);
857            meta.push(SpriteModelMeta {
858                occupancy_offset: all_occ.len() as u32,
859                colors_offset: slot.off,
860                color_offsets_offset: all_offsets.len() as u32,
861                occ_words_per_col: m.occ_words_per_col,
862                dims: m.dims,
863                _pad0: 0,
864                pivot: m.pivot,
865                voxel_world_size: m.voxel_world_size,
866            });
867            occ_lens.push(m.occupancy.len() as u32);
868            coloff_lens.push(m.color_offsets.len() as u32);
869            all_occ.extend_from_slice(&m.occupancy);
870            all_offsets.extend_from_slice(&m.color_offsets);
871            let off = slot.off as usize;
872            all_colors[off..off + m.colors.len()].copy_from_slice(&m.colors);
873            all_dirs[off..off + m.dirs.len()].copy_from_slice(&m.dirs);
874        }
875
876        // Per-instance cull records: sphere centred at the instance
877        // position, radius from the chain's finest (mip-0) model.
878        // `colmul` starts at identity (unshaded) until the facade sets
879        // per-instance lighting via `set_instance_colmul`.
880        let cull: Vec<CullInstance> = instances.iter().map(|i| make_cull(registry, i)).collect();
881
882        // Capacity buffer (COPY_DST so cull can rewrite it each frame),
883        // seeded with the full set so frame 0 is valid pre-cull.
884        let seed: Vec<SpriteInstanceGpu> = cull.iter().map(|c| c.gpu).collect();
885        let instances_buf = {
886            use wgpu::util::DeviceExt;
887            let one = [SpriteInstanceGpu::zeroed()];
888            let src: &[SpriteInstanceGpu] = if seed.is_empty() { &one } else { &seed };
889            device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
890                label: Some("roxlap-gpu sprite_reg.instances"),
891                contents: bytemuck::cast_slice(src),
892                usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
893            })
894        };
895
896        let tile_ranges = storage_dst_u32(device, "roxlap-gpu sprite_reg.tile_ranges", 1);
897        let tile_instances = storage_dst_u32(device, "roxlap-gpu sprite_reg.tile_instances", 1);
898        // colmul: 256 entries × 2 u32 per visible instance. Sized to the
899        // full instance set (worst case all visible); rewritten per frame.
900        let colmul_cap = (cull.len() as u32).max(1) * 256 * 2;
901        let colmul = storage_dst_u32(device, "roxlap-gpu sprite_reg.colmul", colmul_cap);
902        Self {
903            occupancy: storage_dst_u32_cap(
904                device,
905                "roxlap-gpu sprite_reg.occupancy",
906                &all_occ,
907                all_occ.len() as u32,
908            ),
909            colors: storage_dst_u32_cap(
910                device,
911                "roxlap-gpu sprite_reg.colors",
912                &all_colors,
913                cap_total,
914            ),
915            dirs: storage_dst_u32_cap(device, "roxlap-gpu sprite_reg.dirs", &all_dirs, cap_total),
916            color_offsets: storage_dst_u32_cap(
917                device,
918                "roxlap-gpu sprite_reg.color_offsets",
919                &all_offsets,
920                all_offsets.len() as u32,
921            ),
922            model_meta: storage_dst_pod(device, "roxlap-gpu sprite_reg.model_meta", &meta),
923            instances: instances_buf,
924            instance_capacity: cull.len() as u32,
925            colmul,
926            colmul_cap,
927            tile_ranges,
928            tile_ranges_cap: 1,
929            tile_instances,
930            tile_instances_cap: 1,
931            cull,
932            chains: registry.chains.clone(),
933            occ_used: all_occ.len() as u32,
934            occ_cap: all_occ.len() as u32,
935            coloff_used: all_offsets.len() as u32,
936            coloff_cap: all_offsets.len() as u32,
937            meta_cap: meta.len() as u32,
938            dead: vec![false; meta.len()],
939            meta,
940            colors_alloc,
941            occ_lens,
942            coloff_lens,
943        }
944    }
945
946    /// Number of resident instances (the cull set length).
947    #[must_use]
948    pub fn instance_count(&self) -> usize {
949        self.cull.len()
950    }
951
952    /// Append new instances **without** re-uploading any model volume —
953    /// the incremental counterpart to [`Self::upload`], for streaming
954    /// spawns (asteroids, projectiles, …). Returns the index of the first
955    /// appended instance; the block occupies `[base, base + N)`.
956    ///
957    /// The model volumes are untouched, so every appended instance must
958    /// reference a `model_id` (LOD chain) that was already present in the
959    /// `registry` passed to [`Self::upload`]. Registering a *new* model
960    /// still requires a full [`Self::upload`] (its voxels must be laid
961    /// into the shared buffers). `registry` here is only read for the new
962    /// instances' bound-sphere radii and must be the resident one.
963    ///
964    /// The `instances` GPU buffer is only *grown* here (power-of-two,
965    /// amortised O(1)); its contents are **not** written. [`Self::
966    /// cull_bin_upload`] rewrites the whole visible range from `cull` every
967    /// frame before the sprite pass reads it — exactly as for the static
968    /// instances — so appending only needs to extend `cull` and ensure
969    /// capacity. Writing the buffer here too caused a mid-frame
970    /// write-while-in-flight hazard on some drivers (a stray full-screen
971    /// flash on append). `colmul` likewise grows lazily in
972    /// `cull_bin_upload`. After a removal the capacity is not shrunk.
973    pub fn append_instances(
974        &mut self,
975        device: &wgpu::Device,
976        registry: &SpriteModelRegistry,
977        instances: &[SpriteInstance],
978    ) -> u32 {
979        let base = self.cull.len() as u32;
980        if instances.is_empty() {
981            return base;
982        }
983        for i in instances {
984            debug_assert!(
985                (i.model_id as usize) < self.chains.len(),
986                "append_instances: model_id {} not resident (run upload to register new models)",
987                i.model_id
988            );
989            self.cull.push(make_cull(registry, i));
990        }
991        let need = self.cull.len() as u32;
992        if need > self.instance_capacity {
993            // Grow power-of-two and recreate the buffer (the next frame's
994            // bind group picks up the new handle). No seed write — the
995            // per-frame cull_bin_upload populates it.
996            self.instance_capacity = need.next_power_of_two();
997            self.instances = instances_buffer(device, self.instance_capacity);
998        }
999        base
1000    }
1001
1002    /// Remove the instance at `index` by swap-remove — O(1), no GPU work
1003    /// (the next [`Self::cull_bin_upload`] repacks the visible set from
1004    /// the shrunk cull list). Capacity is retained for reuse.
1005    ///
1006    /// Returns `Some(old_last)` when a different instance was moved into
1007    /// `index` to fill the hole (its index changed from `old_last` to
1008    /// `index` — callers holding instance handles must fix up that one),
1009    /// or `None` if `index` was the last element or out of range. Because
1010    /// this reorders, any [`Self::set_instance_colmul`] table set by
1011    /// position should be re-applied after a removal.
1012    pub fn remove_instance(&mut self, index: usize) -> Option<usize> {
1013        if index >= self.cull.len() {
1014            return None;
1015        }
1016        let last = self.cull.len() - 1;
1017        self.cull.swap_remove(index);
1018        (index != last).then_some(last)
1019    }
1020
1021    /// Set the per-instance `kv6colmul[256]` lighting tables (voxlap's
1022    /// `update_reflects` output), in the same order/length as the
1023    /// instances passed to [`Self::upload`]. The next
1024    /// [`Self::cull_bin_upload`] packs the visible subset to the GPU.
1025    /// Instances beyond `tables.len()` keep their previous tables.
1026    pub fn set_instance_colmul(&mut self, tables: &[[u64; 256]]) {
1027        for (ci, t) in self.cull.iter_mut().zip(tables) {
1028            ci.colmul.copy_from_slice(t);
1029        }
1030    }
1031
1032    /// Refresh instance poses in place from `instances` — for animated
1033    /// sprites (e.g. KFA limbs re-posed each frame) — **without** any
1034    /// model-volume re-upload. `instances` must match the set passed to
1035    /// [`Self::upload`] in length + order; each keeps its `model_id`
1036    /// (LOD chain) so only the transform + cull centre change. No GPU
1037    /// write happens here: the next [`Self::cull_bin_upload`] re-uploads
1038    /// the packed visible subset, as it already does every frame.
1039    pub fn update_transforms(&mut self, instances: &[SpriteInstance]) {
1040        debug_assert_eq!(
1041            instances.len(),
1042            self.cull.len(),
1043            "update_transforms instance count must match upload"
1044        );
1045        for (ci, inst) in self.cull.iter_mut().zip(instances) {
1046            ci.gpu.inv_rot0 = inst.transform.inv_rot[0];
1047            ci.gpu.inv_rot1 = inst.transform.inv_rot[1];
1048            ci.gpu.inv_rot2 = inst.transform.inv_rot[2];
1049            ci.gpu.pos = inst.transform.pos;
1050            // Bounding sphere follows the pivot; radius/chain unchanged.
1051            ci.center = inst.transform.pos;
1052        }
1053    }
1054
1055    /// Repoint instance `idx` at a different LOD chain — the per-frame
1056    /// **flipbook** step for animated voxel clips (VCL.2). The instance's
1057    /// transform / colmul are untouched; only which model's volume it
1058    /// draws changes. The new chain's volume must already be resident
1059    /// (uploaded via [`Self::add_model`] / [`Self::upload`]); `registry`
1060    /// is the one those uploads used (so the bounding radius reseeds from
1061    /// the new model). Like [`Self::update_transforms`], this is a CPU-side
1062    /// rewrite — the next [`Self::cull_bin_upload`] re-uploads the packed
1063    /// visible subset, so it costs nothing extra on the GPU. No-op if `idx`
1064    /// is out of range.
1065    ///
1066    /// All frames of a clip share the same `dims`, so a flipbook swap
1067    /// leaves the bounding radius unchanged; reseeding it anyway keeps the
1068    /// method correct for arbitrary chain swaps.
1069    pub fn set_instance_model(
1070        &mut self,
1071        registry: &SpriteModelRegistry,
1072        idx: usize,
1073        chain_id: u32,
1074    ) {
1075        // Guard `chain_id` (the `cull.get_mut` below only covers `idx`): a
1076        // public caller could pass an out-of-range / tombstoned chain, which
1077        // `registry.model` would index-panic on.
1078        let Some(radius) = registry
1079            .model_checked(chain_id)
1080            .map(SpriteModel::bound_radius)
1081        else {
1082            return;
1083        };
1084        let Some(ci) = self.cull.get_mut(idx) else {
1085            return;
1086        };
1087        ci.chain_id = chain_id;
1088        ci.gpu.model_id = chain_id; // placeholder; cull rewrites to the LOD entry
1089        ci.radius = radius;
1090    }
1091
1092    /// GPU.12 incremental — re-upload only the entries of LOD chain
1093    /// `chain_id` after an in-place edit (carve / recolour) of its model,
1094    /// **without** rebuilding the whole registry. `registry` must be the
1095    /// same registry uploaded (same entry ids), with chain `chain_id`'s
1096    /// entries already edited (`model_mut` + `rebuild_lod`).
1097    ///
1098    /// For each entry: occupancy + color_offsets are dims-fixed, so they
1099    /// are written in place; colors + dirs (variable, parallel) go through
1100    /// the suballocator — written in place when they fit the slack,
1101    /// relocated (with a `model_meta` rewrite) when they outgrow it, and
1102    /// only when the buffer tail overflows are colors/dirs grown + the
1103    /// whole registry repacked. Instances / cull / colmul are untouched
1104    /// (a carve never moves an instance or grows its bounds) — that is the
1105    /// win over [`Self::upload`].
1106    ///
1107    /// # Panics (debug)
1108    /// If an entry's dims changed (occupancy / color_offsets length), which
1109    /// the in-place path can't absorb — growing dims needs a full
1110    /// re-upload via [`Self::upload`].
1111    pub fn update_model(
1112        &mut self,
1113        device: &wgpu::Device,
1114        queue: &wgpu::Queue,
1115        registry: &SpriteModelRegistry,
1116        chain_id: u32,
1117    ) {
1118        let entries = self.chains[chain_id as usize].clone();
1119        let mut grew = false;
1120        for &e in &entries {
1121            let e = e as usize;
1122            let m = &registry.entries[e];
1123
1124            // Dims-fixed arrays: assert unchanged, then write in place.
1125            debug_assert_eq!(
1126                m.occupancy.len() as u32,
1127                self.occ_lens[e],
1128                "update_model: entry {e} occupancy length changed (dims grew?)"
1129            );
1130            debug_assert_eq!(
1131                m.color_offsets.len() as u32,
1132                self.coloff_lens[e],
1133                "update_model: entry {e} color_offsets length changed (dims grew?)"
1134            );
1135            queue.write_buffer(
1136                &self.occupancy,
1137                u64::from(self.meta[e].occupancy_offset) * 4,
1138                bytemuck::cast_slice(&m.occupancy),
1139            );
1140            queue.write_buffer(
1141                &self.color_offsets,
1142                u64::from(self.meta[e].color_offsets_offset) * 4,
1143                bytemuck::cast_slice(&m.color_offsets),
1144            );
1145
1146            // Variable colors/dirs via the suballocator.
1147            let new_len = m.colors.len() as u32;
1148            match self.colors_alloc.place(e, new_len) {
1149                Some(off) => {
1150                    queue.write_buffer(
1151                        &self.colors,
1152                        u64::from(off) * 4,
1153                        bytemuck::cast_slice(&m.colors),
1154                    );
1155                    queue.write_buffer(
1156                        &self.dirs,
1157                        u64::from(off) * 4,
1158                        bytemuck::cast_slice(&m.dirs),
1159                    );
1160                    if self.meta[e].colors_offset != off {
1161                        // Relocated — rewrite this entry's meta record.
1162                        self.meta[e].colors_offset = off;
1163                        queue.write_buffer(
1164                            &self.model_meta,
1165                            (e * std::mem::size_of::<SpriteModelMeta>()) as u64,
1166                            bytemuck::bytes_of(&self.meta[e]),
1167                        );
1168                    }
1169                }
1170                None => grew = true,
1171            }
1172        }
1173
1174        // Buffer overflow on at least one entry → grow colors/dirs and
1175        // repack the WHOLE registry (rare; offsets for every entry move).
1176        if grew {
1177            self.grow_and_repack(device, queue, registry);
1178        }
1179    }
1180
1181    /// Grow the `colors`/`dirs` buffers and repack every entry compactly
1182    /// (with fresh slack) when an [`Self::update_model`] edit overflowed
1183    /// the buffer tail. Recreates both buffers (the next frame's bind
1184    /// group picks up the new handles) and rewrites every `model_meta`
1185    /// `colors_offset`. O(registry) but rare — logged so a growth burst
1186    /// is visible.
1187    fn grow_and_repack(
1188        &mut self,
1189        device: &wgpu::Device,
1190        queue: &wgpu::Queue,
1191        registry: &SpriteModelRegistry,
1192    ) {
1193        self.repack_colors_dirs(device, registry);
1194        // Every entry's colors_offset moved → rewrite the whole meta table.
1195        queue.write_buffer(&self.model_meta, 0, bytemuck::cast_slice(&self.meta));
1196    }
1197
1198    /// Repack `colors`/`dirs` compactly (with fresh slack) from the full
1199    /// `registry`, recreating both buffers and updating every CPU
1200    /// `meta[e].colors_offset`. Does **not** touch the GPU `model_meta`
1201    /// buffer — the caller writes it ([`Self::grow_and_repack`] writes the
1202    /// whole table; [`Self::add_model`] writes it once after all entries
1203    /// are placed). O(registry) but rare — logged so a growth burst is
1204    /// visible.
1205    fn repack_colors_dirs(&mut self, device: &wgpu::Device, registry: &SpriteModelRegistry) {
1206        // Dead (removed) entries collapse to 0 length so they reclaim no
1207        // space; live entries keep their colours.
1208        let new_lens: Vec<u32> = registry
1209            .entries
1210            .iter()
1211            .enumerate()
1212            .map(|(e, m)| {
1213                if self.dead[e] {
1214                    0
1215                } else {
1216                    m.colors.len() as u32
1217                }
1218            })
1219            .collect();
1220        self.colors_alloc.repack(&new_lens);
1221        let cap_total = self.colors_alloc.cap_total();
1222
1223        let mut all_colors = vec![0u32; cap_total as usize];
1224        let mut all_dirs = vec![0u32; cap_total as usize];
1225        for (e, m) in registry.entries.iter().enumerate() {
1226            if self.dead[e] {
1227                self.meta[e].colors_offset = 0;
1228                continue;
1229            }
1230            let off = self.colors_alloc.slot(e).off as usize;
1231            all_colors[off..off + m.colors.len()].copy_from_slice(&m.colors);
1232            all_dirs[off..off + m.dirs.len()].copy_from_slice(&m.dirs);
1233            self.meta[e].colors_offset = off as u32;
1234        }
1235        self.colors = storage_dst_u32_cap(
1236            device,
1237            "roxlap-gpu sprite_reg.colors",
1238            &all_colors,
1239            cap_total,
1240        );
1241        self.dirs = storage_dst_u32_cap(device, "roxlap-gpu sprite_reg.dirs", &all_dirs, cap_total);
1242        eprintln!("roxlap-gpu: sprite registry colors/dirs grew + repacked to {cap_total} words");
1243    }
1244
1245    /// Append a new model (its full LOD chain) to the resident registry
1246    /// **without** re-uploading the existing models' volumes — the
1247    /// incremental counterpart to a full [`Self::upload`], for streaming
1248    /// in new geometry (unique asteroids, generated meshes).
1249    ///
1250    /// Contract (mirrors [`Self::update_model`]): the caller owns the
1251    /// `SpriteModelRegistry`, has just appended this chain to it (e.g. via
1252    /// [`SpriteModelRegistry::add_lod`]), and passes the resulting
1253    /// `chain_id`. The chain's entries must be the registry's newest (ids
1254    /// `>= ` the resident entry count) — entries are append-only.
1255    ///
1256    /// The large `colors`/`dirs`/`occupancy`/`color_offsets` buffers carry
1257    /// slack and bump-append the new entries in place; a buffer that
1258    /// overflows is grown (with slack) and rebuilt once from the registry
1259    /// (amortised O(1) per add). The small `model_meta` table is rewritten
1260    /// each call. After this, [`Self::append_instances`] can reference the
1261    /// new `chain_id`.
1262    pub fn add_model(
1263        &mut self,
1264        device: &wgpu::Device,
1265        queue: &wgpu::Queue,
1266        registry: &SpriteModelRegistry,
1267        chain_id: u32,
1268    ) {
1269        let entries = registry.chains[chain_id as usize].clone();
1270        debug_assert_eq!(
1271            chain_id as usize,
1272            self.chains.len(),
1273            "add_model: chains must be appended in order"
1274        );
1275
1276        // CPU bookkeeping: assign each new entry a tight occ/coloff offset
1277        // and an allocator slot for colors/dirs. `need_colors_grow` marks
1278        // a slot that didn't fit → a colors/dirs repack below.
1279        let mut need_colors_grow = false;
1280        for &e in &entries {
1281            let e = e as usize;
1282            debug_assert_eq!(
1283                e,
1284                self.meta.len(),
1285                "add_model: entries must be appended in order"
1286            );
1287            let m = &registry.entries[e];
1288            let occ_off = self.occ_used;
1289            let coloff_off = self.coloff_used;
1290            self.occ_used += m.occupancy.len() as u32;
1291            self.coloff_used += m.color_offsets.len() as u32;
1292            let colors_off = match self.colors_alloc.push(m.colors.len() as u32) {
1293                Some(off) => off,
1294                None => {
1295                    need_colors_grow = true;
1296                    0 // placeholder; repack assigns the real offset
1297                }
1298            };
1299            self.meta.push(SpriteModelMeta {
1300                occupancy_offset: occ_off,
1301                colors_offset: colors_off,
1302                color_offsets_offset: coloff_off,
1303                occ_words_per_col: m.occ_words_per_col,
1304                dims: m.dims,
1305                _pad0: 0,
1306                pivot: m.pivot,
1307                voxel_world_size: m.voxel_world_size,
1308            });
1309            self.occ_lens.push(m.occupancy.len() as u32);
1310            self.coloff_lens.push(m.color_offsets.len() as u32);
1311            self.dead.push(false);
1312        }
1313        self.chains.push(entries.clone());
1314
1315        // occupancy + color_offsets: grow+rebuild on overflow, else write
1316        // the new tails in place.
1317        self.sync_concat(device, queue, registry, &entries, ConcatBuf::Occupancy);
1318        self.sync_concat(device, queue, registry, &entries, ConcatBuf::ColorOffsets);
1319
1320        // colors/dirs: repack on overflow (rebuilds both + every CPU
1321        // colors_offset), else write the new entries at their slots.
1322        if need_colors_grow {
1323            self.repack_colors_dirs(device, registry);
1324        } else {
1325            for &e in &entries {
1326                let e = e as usize;
1327                let m = &registry.entries[e];
1328                let off = u64::from(self.meta[e].colors_offset) * 4;
1329                queue.write_buffer(&self.colors, off, bytemuck::cast_slice(&m.colors));
1330                queue.write_buffer(&self.dirs, off, bytemuck::cast_slice(&m.dirs));
1331            }
1332        }
1333
1334        // model_meta: grow the record buffer if needed, then rewrite the
1335        // whole (small) table — covers both new records and any
1336        // colors_offset relocations from a repack.
1337        let count = self.meta.len() as u32;
1338        if count > self.meta_cap {
1339            self.meta_cap = grow_records(count);
1340            self.model_meta = storage_dst_pod_cap(
1341                device,
1342                "roxlap-gpu sprite_reg.model_meta",
1343                &self.meta,
1344                self.meta_cap,
1345            );
1346        } else {
1347            queue.write_buffer(&self.model_meta, 0, bytemuck::cast_slice(&self.meta));
1348        }
1349    }
1350
1351    /// Sync one tightly-concatenated buffer (`occupancy` or
1352    /// `color_offsets`) after `add_model` appended `new_entries`: if the
1353    /// used length now exceeds capacity, grow (with slack) and rebuild the
1354    /// whole buffer from the registry; otherwise write just the appended
1355    /// tails at their offsets.
1356    fn sync_concat(
1357        &mut self,
1358        device: &wgpu::Device,
1359        queue: &wgpu::Queue,
1360        registry: &SpriteModelRegistry,
1361        new_entries: &[u32],
1362        which: ConcatBuf,
1363    ) {
1364        let (used, cap) = match which {
1365            ConcatBuf::Occupancy => (self.occ_used, self.occ_cap),
1366            ConcatBuf::ColorOffsets => (self.coloff_used, self.coloff_cap),
1367        };
1368        if used > cap {
1369            let new_cap = grow_words(used);
1370            let all: Vec<u32> = registry
1371                .entries
1372                .iter()
1373                .flat_map(|m| concat_data(m, which).iter().copied())
1374                .collect();
1375            let label = match which {
1376                ConcatBuf::Occupancy => "roxlap-gpu sprite_reg.occupancy",
1377                ConcatBuf::ColorOffsets => "roxlap-gpu sprite_reg.color_offsets",
1378            };
1379            let buf = storage_dst_u32_cap(device, label, &all, new_cap);
1380            match which {
1381                ConcatBuf::Occupancy => {
1382                    self.occupancy = buf;
1383                    self.occ_cap = new_cap;
1384                }
1385                ConcatBuf::ColorOffsets => {
1386                    self.color_offsets = buf;
1387                    self.coloff_cap = new_cap;
1388                }
1389            }
1390        } else {
1391            let target = match which {
1392                ConcatBuf::Occupancy => &self.occupancy,
1393                ConcatBuf::ColorOffsets => &self.color_offsets,
1394            };
1395            for &e in new_entries {
1396                let e = e as usize;
1397                let off = match which {
1398                    ConcatBuf::Occupancy => self.meta[e].occupancy_offset,
1399                    ConcatBuf::ColorOffsets => self.meta[e].color_offsets_offset,
1400                };
1401                queue.write_buffer(
1402                    target,
1403                    u64::from(off) * 4,
1404                    bytemuck::cast_slice(concat_data(&registry.entries[e], which)),
1405                );
1406            }
1407        }
1408    }
1409
1410    /// Number of removed-but-not-yet-compacted models (tombstoned chains).
1411    /// A caller streams `add_model` / `remove_model` and calls
1412    /// [`Self::compact`] once this (relative to [`Self::live_model_count`])
1413    /// crosses a threshold.
1414    #[must_use]
1415    pub fn dead_model_count(&self) -> usize {
1416        self.chains.iter().filter(|c| c.is_empty()).count()
1417    }
1418
1419    /// Number of live (non-removed) models.
1420    #[must_use]
1421    pub fn live_model_count(&self) -> usize {
1422        self.chains.iter().filter(|c| !c.is_empty()).count()
1423    }
1424
1425    /// Remove a model (tombstone its LOD chain) — the counterpart to
1426    /// [`Self::add_model`]. O(chain length): marks the chain's entries
1427    /// dead and frees their `colors`/`dirs` slots for reuse by a later
1428    /// `add_model`. The `occupancy` / `color_offsets` holes are **not**
1429    /// reclaimed until [`Self::compact`]; entry ids (and the caller's other
1430    /// `chain_id`s) stay stable.
1431    ///
1432    /// Instances of the removed chain are **not** dropped here — they
1433    /// linger in the cull set but draw as nothing (skipped in
1434    /// [`Self::cull_bin_upload`]); the caller removes them via
1435    /// [`Self::remove_instance`] when convenient. A no-op if `chain_id` is
1436    /// out of range or already removed.
1437    pub fn remove_model(&mut self, chain_id: u32) {
1438        let Some(entries) = self.chains.get(chain_id as usize).cloned() else {
1439            return;
1440        };
1441        if entries.is_empty() {
1442            return; // already removed
1443        }
1444        for &e in &entries {
1445            let e = e as usize;
1446            self.dead[e] = true;
1447            self.colors_alloc.free(e);
1448        }
1449        self.chains[chain_id as usize] = Vec::new(); // tombstone
1450    }
1451
1452    /// Reclaim the holes left by [`Self::remove_model`]: rebuild the shared
1453    /// volume buffers from the live entries only, dropping every dead
1454    /// entry's data. Entry ids and `chain_id`s are preserved (dead entries
1455    /// keep a zero-length `meta` tombstone), so the caller's handles stay
1456    /// valid and no remap is needed.
1457    ///
1458    /// `registry` must be the resident one (entry ids 1:1, as for
1459    /// [`Self::add_model`] / [`Self::update_model`]). O(live volume) —
1460    /// call it when [`Self::dead_model_count`] is high, not every frame.
1461    pub fn compact(
1462        &mut self,
1463        device: &wgpu::Device,
1464        queue: &wgpu::Queue,
1465        registry: &SpriteModelRegistry,
1466    ) {
1467        // occupancy + color_offsets: re-pack live entries tightly, rewrite
1468        // each live entry's meta offset, zero the dead ones.
1469        self.compact_concat(device, registry, ConcatBuf::Occupancy);
1470        self.compact_concat(device, registry, ConcatBuf::ColorOffsets);
1471        // colors/dirs: the dead-aware repack already drops dead entries.
1472        self.repack_colors_dirs(device, registry);
1473        // model_meta: rewrite the (unchanged-length) table with the new
1474        // offsets. Buffer count didn't change, so no grow needed.
1475        queue.write_buffer(&self.model_meta, 0, bytemuck::cast_slice(&self.meta));
1476    }
1477
1478    /// Rebuild one tightly-concatenated buffer from live entries only
1479    /// (used by [`Self::compact`]): assign each live entry a fresh tight
1480    /// offset, zero dead entries' offset, and recreate the buffer with
1481    /// slack.
1482    fn compact_concat(
1483        &mut self,
1484        device: &wgpu::Device,
1485        registry: &SpriteModelRegistry,
1486        which: ConcatBuf,
1487    ) {
1488        let mut all: Vec<u32> = Vec::new();
1489        for e in 0..self.meta.len() {
1490            if self.dead[e] {
1491                match which {
1492                    ConcatBuf::Occupancy => self.meta[e].occupancy_offset = 0,
1493                    ConcatBuf::ColorOffsets => self.meta[e].color_offsets_offset = 0,
1494                }
1495                continue;
1496            }
1497            let off = all.len() as u32;
1498            match which {
1499                ConcatBuf::Occupancy => self.meta[e].occupancy_offset = off,
1500                ConcatBuf::ColorOffsets => self.meta[e].color_offsets_offset = off,
1501            }
1502            all.extend_from_slice(concat_data(&registry.entries[e], which));
1503        }
1504        let used = all.len() as u32;
1505        let cap = grow_words(used);
1506        let (label, buf) = match which {
1507            ConcatBuf::Occupancy => ("roxlap-gpu sprite_reg.occupancy", &mut self.occupancy),
1508            ConcatBuf::ColorOffsets => (
1509                "roxlap-gpu sprite_reg.color_offsets",
1510                &mut self.color_offsets,
1511            ),
1512        };
1513        *buf = storage_dst_u32_cap(device, label, &all, cap);
1514        match which {
1515            ConcatBuf::Occupancy => {
1516                self.occ_used = used;
1517                self.occ_cap = cap;
1518            }
1519            ConcatBuf::ColorOffsets => {
1520                self.coloff_used = used;
1521                self.coloff_cap = cap;
1522            }
1523        }
1524    }
1525
1526    /// GPU.10.3 — frustum-cull, pack the visible subset into the
1527    /// instance buffer, then bin those instances into screen tiles:
1528    /// project each visible bounding sphere to a screen AABB and append
1529    /// its (visible) index to every overlapped tile. Uploads the
1530    /// instance buffer + `tile_ranges` (per-tile offset/count) +
1531    /// `tile_instances` (flat grouped indices), growing the tile
1532    /// buffers as needed. Returns `(visible_count, tiles_x, tiles_y)`.
1533    #[allow(clippy::too_many_arguments)]
1534    pub fn cull_bin_upload(
1535        &mut self,
1536        device: &wgpu::Device,
1537        queue: &wgpu::Queue,
1538        f: &ViewFrustum,
1539        screen_w: u32,
1540        screen_h: u32,
1541        tile_size: u32,
1542        lod_px: f32,
1543    ) -> (u32, u32, u32) {
1544        let tiles_x = screen_w.div_ceil(tile_size).max(1);
1545        let tiles_y = screen_h.div_ceil(tile_size).max(1);
1546        let n_tiles = (tiles_x * tiles_y) as usize;
1547
1548        let nw = (1.0 + f.half_w * f.half_w).sqrt();
1549        let nh = (1.0 + f.half_h * f.half_h).sqrt();
1550        let cx = screen_w as f32 * 0.5;
1551        let cy = screen_h as f32 * 0.5;
1552        let px_per_world = cx / f.half_w; // isotropic: == cy/half_h
1553        let ts = tile_size as f32;
1554        let tx_max = tiles_x as i32 - 1;
1555        let ty_max = tiles_y as i32 - 1;
1556
1557        let mut visible: Vec<SpriteInstanceGpu> = Vec::with_capacity(self.cull.len());
1558        // Per-visible tile AABB (tx0, tx1, ty0, ty1) for the bin pass.
1559        let mut boxes: Vec<[i32; 4]> = Vec::with_capacity(self.cull.len());
1560        // Per-visible kv6colmul tables, flattened to two u32 per u64
1561        // entry (lanes 0|1, then 2|3), packed in visible order so the
1562        // shader indexes `colmul[inst_idx*512 + dir*2 + {0,1}]`.
1563        let mut visible_colmul: Vec<u32> = Vec::with_capacity(self.cull.len() * 512);
1564        let mut counts = vec![0u32; n_tiles];
1565
1566        for ci in &self.cull {
1567            // Skip instances of a removed model (tombstoned chain) — they
1568            // linger in `cull` until the caller drops them, but draw as
1569            // nothing.
1570            if self.chains[ci.chain_id as usize].is_empty() {
1571                continue;
1572            }
1573            let rel = [
1574                ci.center[0] - f.pos[0],
1575                ci.center[1] - f.pos[1],
1576                ci.center[2] - f.pos[2],
1577            ];
1578            let z = dot3(rel, f.forward);
1579            let r = ci.radius;
1580            if z + r < 0.0 || z - r > f.far {
1581                continue; // behind / beyond far
1582            }
1583            let x = dot3(rel, f.right);
1584            if (x - f.half_w * z) > r * nw || (-x - f.half_w * z) > r * nw {
1585                continue; // right / left
1586            }
1587            let y = dot3(rel, f.down);
1588            if (y - f.half_h * z) > r * nh || (-y - f.half_h * z) > r * nh {
1589                continue; // bottom / top
1590            }
1591
1592            // Visible: project the sphere to a screen AABB → tile range.
1593            let (tx0, tx1, ty0, ty1) = if z > 1e-3 {
1594                let sx = cx + (x / z) * px_per_world;
1595                let sy = cy + (y / z) * px_per_world;
1596                let sr = (r / z) * px_per_world;
1597                (
1598                    (((sx - sr) / ts).floor() as i32).clamp(0, tx_max),
1599                    (((sx + sr) / ts).floor() as i32).clamp(0, tx_max),
1600                    (((sy - sr) / ts).floor() as i32).clamp(0, ty_max),
1601                    (((sy + sr) / ts).floor() as i32).clamp(0, ty_max),
1602                )
1603            } else {
1604                (0, tx_max, 0, ty_max)
1605            };
1606            // GPU.10.4 — pick the LOD level by projected voxel size:
1607            // choose the coarsest level whose voxel still covers at
1608            // least `lod_px` screen pixels, i.e. step up once a mip-0
1609            // voxel would be smaller than that. `lod_px = 1` is the
1610            // natural "don't go sub-pixel" threshold; larger values
1611            // force LOD in closer (tuning/inspection).
1612            let chain = &self.chains[ci.chain_id as usize];
1613            let level = if z > 1e-3 && chain.len() > 1 {
1614                let voxel_px = px_per_world / z; // mip-0 voxel screen size
1615                ((lod_px / voxel_px).log2().ceil().max(0.0) as usize).min(chain.len() - 1)
1616            } else {
1617                0
1618            };
1619            let mut g = ci.gpu;
1620            g.model_id = chain[level];
1621            visible.push(g);
1622            boxes.push([tx0, tx1, ty0, ty1]);
1623            for &w in ci.colmul.iter() {
1624                visible_colmul.push((w & 0xffff_ffff) as u32);
1625                visible_colmul.push((w >> 32) as u32);
1626            }
1627            for ty in ty0..=ty1 {
1628                for tx in tx0..=tx1 {
1629                    counts[(ty * tiles_x as i32 + tx) as usize] += 1;
1630                }
1631            }
1632        }
1633
1634        if visible.is_empty() {
1635            return (0, tiles_x, tiles_y);
1636        }
1637
1638        // Prefix-sum counts → per-tile offsets; build the flat grouped
1639        // index list.
1640        let mut tile_ranges = vec![0u32; n_tiles * 2];
1641        let mut running = 0u32;
1642        for t in 0..n_tiles {
1643            tile_ranges[2 * t] = running; // offset
1644            tile_ranges[2 * t + 1] = counts[t]; // count
1645            running += counts[t];
1646        }
1647        let total = running as usize;
1648        let mut tile_instances = vec![0u32; total.max(1)];
1649        let mut cursor: Vec<u32> = (0..n_tiles).map(|t| tile_ranges[2 * t]).collect();
1650        for (vis_idx, b) in boxes.iter().enumerate() {
1651            for ty in b[2]..=b[3] {
1652                for tx in b[0]..=b[1] {
1653                    let t = (ty * tiles_x as i32 + tx) as usize;
1654                    tile_instances[cursor[t] as usize] = vis_idx as u32;
1655                    cursor[t] += 1;
1656                }
1657            }
1658        }
1659
1660        // Upload: instances + (grown) tile buffers. Grow a tile buffer
1661        // only when this frame needs more than its capacity (wgpu has
1662        // no Clone on Buffer, so we replace the field in place).
1663        queue.write_buffer(&self.instances, 0, bytemuck::cast_slice(&visible));
1664        let need_ranges = tile_ranges.len() as u32;
1665        if need_ranges > self.tile_ranges_cap {
1666            self.tile_ranges_cap = need_ranges.next_power_of_two();
1667            self.tile_ranges = storage_dst_u32(
1668                device,
1669                "roxlap-gpu sprite_reg.tile_ranges",
1670                self.tile_ranges_cap,
1671            );
1672        }
1673        let need_inst = tile_instances.len() as u32;
1674        if need_inst > self.tile_instances_cap {
1675            self.tile_instances_cap = need_inst.next_power_of_two();
1676            self.tile_instances = storage_dst_u32(
1677                device,
1678                "roxlap-gpu sprite_reg.tile_instances",
1679                self.tile_instances_cap,
1680            );
1681        }
1682        queue.write_buffer(&self.tile_ranges, 0, bytemuck::cast_slice(&tile_ranges));
1683        queue.write_buffer(
1684            &self.tile_instances,
1685            0,
1686            bytemuck::cast_slice(&tile_instances),
1687        );
1688        let need_colmul = visible_colmul.len() as u32;
1689        if need_colmul > self.colmul_cap {
1690            self.colmul_cap = need_colmul.next_power_of_two();
1691            self.colmul = storage_dst_u32(device, "roxlap-gpu sprite_reg.colmul", self.colmul_cap);
1692        }
1693        queue.write_buffer(&self.colmul, 0, bytemuck::cast_slice(&visible_colmul));
1694
1695        (visible.len() as u32, tiles_x, tiles_y)
1696    }
1697}
1698
1699/// GPU.12 incremental — per-entry placement of one model's `colors`
1700/// (and the parallel `dirs`) within the shared registry buffers: a
1701/// `[off, off+cap)` word window holding `len` live words. `cap >= len`
1702/// gives slack so a carve that *grows* the surface-voxel count can be
1703/// rewritten in place without relocating.
1704#[derive(Clone, Copy, Debug, PartialEq, Eq)]
1705struct ColorSlot {
1706    off: u32,
1707    cap: u32,
1708    len: u32,
1709}
1710
1711/// First-fit suballocator over the parallel `colors`/`dirs` buffers
1712/// (same offsets/ranks → one allocator drives both). Each registry
1713/// entry owns a [`ColorSlot`]; growth past a slot's `cap` relocates it
1714/// (freeing the old block) via the free list or a bump tail, and only
1715/// when the tail would exceed `cap_total` does the caller grow + repack
1716/// the whole buffer. Pure (no GPU) so it unit-tests on its own.
1717#[derive(Debug, Default)]
1718struct ColorsAllocator {
1719    /// Per-entry slot, indexed by entry id.
1720    slots: Vec<ColorSlot>,
1721    /// Freed `(off, cap)` blocks available for first-fit reuse.
1722    free: Vec<(u32, u32)>,
1723    /// Next bump-allocation position (words).
1724    tail: u32,
1725    /// Total buffer capacity in words.
1726    cap_total: u32,
1727}
1728
1729/// Slack-padded capacity for a `len`-word array: +25% + 16 words, so a
1730/// few extra surface voxels from a carve fit without relocating.
1731fn slot_cap(len: u32) -> u32 {
1732    len + len / 4 + 16
1733}
1734
1735/// Slack capacity (words) for a grown concatenated buffer: +50% + 256, so
1736/// a burst of `add_model` calls bump-appends rather than re-growing every
1737/// time. Matches [`ColorsAllocator`]'s `cap_total` headroom.
1738fn grow_words(used: u32) -> u32 {
1739    used + used / 2 + 256
1740}
1741
1742/// Slack capacity (records) for a grown `model_meta` buffer: +50% + 8.
1743fn grow_records(count: u32) -> u32 {
1744    count + count / 2 + 8
1745}
1746
1747impl ColorsAllocator {
1748    /// Lay every entry out contiguously (with per-slot slack) and add a
1749    /// global tail headroom so early growth bump-allocates rather than
1750    /// repacks.
1751    fn new(entry_lens: &[u32]) -> Self {
1752        let mut a = Self::default();
1753        a.repack(entry_lens);
1754        a
1755    }
1756
1757    fn slot(&self, entry: usize) -> ColorSlot {
1758        self.slots[entry]
1759    }
1760
1761    fn cap_total(&self) -> u32 {
1762        self.cap_total
1763    }
1764
1765    /// Repack ALL entries compactly to fit `new_lens`, resetting the
1766    /// free list + tail and choosing a fresh `cap_total` with headroom.
1767    /// Used at initial build and on a buffer grow.
1768    fn repack(&mut self, new_lens: &[u32]) {
1769        self.free.clear();
1770        let mut off = 0u32;
1771        let mut slots = Vec::with_capacity(new_lens.len());
1772        for &len in new_lens {
1773            // A 0-length (dead / removed) entry takes no space — keeps a
1774            // tombstone slot so entry ids stay positional.
1775            let cap = if len == 0 { 0 } else { slot_cap(len) };
1776            slots.push(ColorSlot { off, cap, len });
1777            off += cap;
1778        }
1779        self.slots = slots;
1780        self.tail = off;
1781        // Global headroom: +50% + 256 words.
1782        self.cap_total = off + off / 2 + 256;
1783    }
1784
1785    /// Place `new_len` words for `entry`. Returns `Some(off)` with the
1786    /// (possibly relocated) slot offset, or `None` if the buffer must
1787    /// grow + repack. On relocation the old block is pushed to the free
1788    /// list; an in-place fit returns the unchanged offset.
1789    fn place(&mut self, entry: usize, new_len: u32) -> Option<u32> {
1790        let cur = self.slots[entry];
1791        if new_len <= cur.cap {
1792            self.slots[entry] = ColorSlot {
1793                len: new_len,
1794                ..cur
1795            };
1796            return Some(cur.off);
1797        }
1798        let old = (cur.off, cur.cap);
1799        // First-fit a freed block big enough for the live data.
1800        if let Some(i) = self.free.iter().position(|&(_, c)| c >= new_len) {
1801            let (off, cap) = self.free.remove(i);
1802            self.free.push(old);
1803            self.slots[entry] = ColorSlot {
1804                off,
1805                cap,
1806                len: new_len,
1807            };
1808            return Some(off);
1809        }
1810        // Bump the tail if there's room.
1811        let want = slot_cap(new_len);
1812        if self.tail + want <= self.cap_total {
1813            let off = self.tail;
1814            self.tail += want;
1815            self.free.push(old);
1816            self.slots[entry] = ColorSlot {
1817                off,
1818                cap: want,
1819                len: new_len,
1820            };
1821            return Some(off);
1822        }
1823        None
1824    }
1825
1826    /// Append a slot for a brand-new entry of `new_len` words (used by
1827    /// [`SpriteRegistryResident::add_model`]). Returns `Some(off)` placed
1828    /// via the free list or the bump tail, or `None` if the buffer must
1829    /// grow + repack — in which case **no** slot is pushed (the caller's
1830    /// repack rebuilds every slot from scratch).
1831    fn push(&mut self, new_len: u32) -> Option<u32> {
1832        if let Some(i) = self.free.iter().position(|&(_, c)| c >= new_len) {
1833            let (off, cap) = self.free.remove(i);
1834            self.slots.push(ColorSlot {
1835                off,
1836                cap,
1837                len: new_len,
1838            });
1839            return Some(off);
1840        }
1841        let want = slot_cap(new_len);
1842        if self.tail + want <= self.cap_total {
1843            let off = self.tail;
1844            self.tail += want;
1845            self.slots.push(ColorSlot {
1846                off,
1847                cap: want,
1848                len: new_len,
1849            });
1850            return Some(off);
1851        }
1852        None
1853    }
1854
1855    /// Free `entry`'s slot back to the pool ([`SpriteRegistryResident::
1856    /// remove_model`]). Its `(off, cap)` block joins the free list for
1857    /// first-fit reuse by a later [`Self::push`]; the slot is zeroed so a
1858    /// repack treats it as a 0-length tombstone.
1859    fn free(&mut self, entry: usize) {
1860        let s = self.slots[entry];
1861        if s.cap > 0 {
1862            self.free.push((s.off, s.cap));
1863        }
1864        self.slots[entry] = ColorSlot {
1865            off: 0,
1866            cap: 0,
1867            len: 0,
1868        };
1869    }
1870}
1871
1872/// Create a STORAGE buffer of u32s; pads empty input (wgpu rejects
1873/// zero-sized storage bindings).
1874#[allow(dead_code)]
1875fn storage_u32(device: &wgpu::Device, label: &str, data: &[u32]) -> wgpu::Buffer {
1876    use wgpu::util::DeviceExt;
1877    let bytes: &[u8] = if data.is_empty() {
1878        bytemuck::cast_slice(&[0u32])
1879    } else {
1880        bytemuck::cast_slice(data)
1881    };
1882    device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
1883        label: Some(label),
1884        contents: bytes,
1885        usage: wgpu::BufferUsages::STORAGE,
1886    })
1887}
1888
1889/// Create an uninitialised `STORAGE | COPY_DST` `u32` buffer of `cap`
1890/// words (≥1). Written each frame via `queue.write_buffer`.
1891fn storage_dst_u32(device: &wgpu::Device, label: &str, cap: u32) -> wgpu::Buffer {
1892    device.create_buffer(&wgpu::BufferDescriptor {
1893        label: Some(label),
1894        size: u64::from(cap.max(1)) * 4,
1895        usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
1896        mapped_at_creation: false,
1897    })
1898}
1899
1900/// Create a `STORAGE | COPY_DST` `u32` buffer of `cap` words (≥ data
1901/// length, ≥ 1), initialised with `data` at offset 0 and the tail left
1902/// zeroed. Unlike [`storage_u32`] (STORAGE-only, exact-size) this both
1903/// reserves spare capacity and is `COPY_DST`, so the incremental
1904/// [`SpriteRegistryResident::update_model`] can `write_buffer` a growing
1905/// `colors`/`dirs` array in place. Filled via `mapped_at_creation` so no
1906/// queue is needed at upload time.
1907fn storage_dst_u32_cap(device: &wgpu::Device, label: &str, data: &[u32], cap: u32) -> wgpu::Buffer {
1908    let cap = cap.max(data.len() as u32).max(1);
1909    let buf = device.create_buffer(&wgpu::BufferDescriptor {
1910        label: Some(label),
1911        size: u64::from(cap) * 4,
1912        usage: wgpu::BufferUsages::STORAGE
1913            | wgpu::BufferUsages::COPY_DST
1914            | wgpu::BufferUsages::COPY_SRC,
1915        mapped_at_creation: true,
1916    });
1917    if !data.is_empty() {
1918        buf.slice(..(data.len() as u64 * 4))
1919            .get_mapped_range_mut()
1920            .copy_from_slice(bytemuck::cast_slice(data));
1921    }
1922    buf.unmap();
1923    buf
1924}
1925
1926/// Create a `STORAGE | COPY_DST` buffer of Pod records, exact-size
1927/// (≥ 1, zero-padded), so individual records can be rewritten in place
1928/// by [`SpriteRegistryResident::update_model`] on a relocation. The
1929/// record *count* never changes on an incremental edit (no model is
1930/// added/removed), so no slack is needed here.
1931fn storage_dst_pod<T: Pod + Zeroable>(
1932    device: &wgpu::Device,
1933    label: &str,
1934    data: &[T],
1935) -> wgpu::Buffer {
1936    let one = [T::zeroed()];
1937    let src: &[T] = if data.is_empty() { &one } else { data };
1938    let buf = device.create_buffer(&wgpu::BufferDescriptor {
1939        label: Some(label),
1940        size: std::mem::size_of_val(src) as u64,
1941        usage: wgpu::BufferUsages::STORAGE
1942            | wgpu::BufferUsages::COPY_DST
1943            | wgpu::BufferUsages::COPY_SRC,
1944        mapped_at_creation: true,
1945    });
1946    buf.slice(..)
1947        .get_mapped_range_mut()
1948        .copy_from_slice(bytemuck::cast_slice(src));
1949    buf.unmap();
1950    buf
1951}
1952
1953/// Create a `STORAGE | COPY_DST` Pod buffer holding `cap` records
1954/// (≥ `data.len()`, ≥ 1), initialised with `data` at record 0 and the
1955/// tail zeroed. The slack lets [`SpriteRegistryResident::add_model`] grow
1956/// the `model_meta` table without re-growing on every add.
1957fn storage_dst_pod_cap<T: Pod + Zeroable>(
1958    device: &wgpu::Device,
1959    label: &str,
1960    data: &[T],
1961    cap: u32,
1962) -> wgpu::Buffer {
1963    let rec = std::mem::size_of::<T>() as u64;
1964    let cap = u64::from(cap.max(data.len() as u32).max(1));
1965    let buf = device.create_buffer(&wgpu::BufferDescriptor {
1966        label: Some(label),
1967        size: cap * rec,
1968        usage: wgpu::BufferUsages::STORAGE
1969            | wgpu::BufferUsages::COPY_DST
1970            | wgpu::BufferUsages::COPY_SRC,
1971        mapped_at_creation: true,
1972    });
1973    if !data.is_empty() {
1974        buf.slice(..(data.len() as u64 * rec))
1975            .get_mapped_range_mut()
1976            .copy_from_slice(bytemuck::cast_slice(data));
1977    }
1978    buf.unmap();
1979    buf
1980}
1981
1982/// Create a STORAGE buffer of Pod records; pads empty input with one
1983/// zeroed `T`.
1984#[allow(dead_code)]
1985fn storage_pod<T: Pod + Zeroable>(device: &wgpu::Device, label: &str, data: &[T]) -> wgpu::Buffer {
1986    use wgpu::util::DeviceExt;
1987    let one = [T::zeroed()];
1988    let src: &[T] = if data.is_empty() { &one } else { data };
1989    device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
1990        label: Some(label),
1991        contents: bytemuck::cast_slice(src),
1992        usage: wgpu::BufferUsages::STORAGE,
1993    })
1994}
1995
1996#[cfg(test)]
1997mod tests {
1998    use super::*;
1999    use roxlap_formats::kv6::{Kv6, Voxel};
2000
2001    /// 2×1 kv6: column (0,0) has voxels at z=5 (red) and z=1 (green)
2002    /// stored OUT of z-order; column (1,0) has one voxel at z=3.
2003    fn kv6_unsorted() -> Kv6 {
2004        let mk = |z, col| Voxel {
2005            col,
2006            z,
2007            vis: 0,
2008            dir: 0,
2009        };
2010        Kv6 {
2011            xsiz: 2,
2012            ysiz: 1,
2013            zsiz: 8,
2014            xpiv: 0.0,
2015            ypiv: 0.0,
2016            zpiv: 0.0,
2017            voxels: vec![mk(5, 0xAA), mk(1, 0xBB), mk(3, 0xCC)],
2018            xlen: vec![2, 1],
2019            ylen: vec![vec![2], vec![1]],
2020            palette: None,
2021        }
2022    }
2023
2024    #[test]
2025    fn occupancy_bits_set_at_voxel_z() {
2026        let m = build_sprite_model(&kv6_unsorted());
2027        assert_eq!(m.dims, [2, 1, 8]);
2028        assert_eq!(m.occ_words_per_col, 1); // ceil(8/32)
2029                                            // col 0: bits 1 and 5; col 1: bit 3.
2030        assert_eq!(m.occupancy[0], (1 << 1) | (1 << 5));
2031        assert_eq!(m.occupancy[1], 1 << 3);
2032    }
2033
2034    #[test]
2035    fn colors_are_ascending_z_for_rank_lookup() {
2036        let m = build_sprite_model(&kv6_unsorted());
2037        // col 0 sorted ascending z ⇒ z=1 (green 0xBB) before z=5 (0xAA).
2038        assert_eq!(m.color_offsets, vec![0, 2, 3]);
2039        assert_eq!(&m.colors, &[0xBB, 0xAA, 0xCC]);
2040    }
2041
2042    #[test]
2043    fn identity_basis_inverts_to_identity() {
2044        let inv = mat3_inverse([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]);
2045        assert_eq!(inv, [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]);
2046    }
2047
2048    #[test]
2049    fn fork_is_independent_of_parent() {
2050        let mut reg = SpriteModelRegistry::new();
2051        let base = reg.add(build_sprite_model(&kv6_unsorted()));
2052        let forked = reg.fork(base);
2053        assert_ne!(base, forked);
2054        // Recolour only the fork.
2055        reg.model_mut(forked).recolor(|_| 0x11);
2056        // Parent colours untouched; fork fully overwritten.
2057        assert_eq!(&reg.model(base).colors, &[0xBB, 0xAA, 0xCC]);
2058        assert_eq!(&reg.model(forked).colors, &[0x11, 0x11, 0x11]);
2059    }
2060
2061    #[test]
2062    fn remove_frees_chain_data_keeps_ids_stable() {
2063        let mut reg = SpriteModelRegistry::new();
2064        let a = reg.add_lod(build_sprite_model(&kv6_unsorted()), 4);
2065        let b = reg.add_lod(build_sprite_model(&kv6_unsorted()), 4);
2066        let len_before = reg.len();
2067        assert!(reg.is_live(a) && reg.is_live(b));
2068
2069        reg.remove(a);
2070        // Chain `a` is tombstoned (its entries are freed to empty models;
2071        // they're unreachable via `model()` now — that's the tombstone).
2072        assert!(!reg.is_live(a));
2073        // `b` is untouched and still live; `len()` (next id) is unchanged.
2074        assert!(reg.is_live(b));
2075        assert_eq!(&reg.model(b).colors, &[0xBB, 0xAA, 0xCC]);
2076        assert_eq!(reg.len(), len_before);
2077
2078        // A later add mints a fresh id past the tombstone (no slot reuse).
2079        let c = reg.add_lod(build_sprite_model(&kv6_unsorted()), 4);
2080        assert_eq!(c, len_before as u32);
2081        assert!(reg.is_live(c));
2082        // `b`'s id stayed valid across the remove + add round-trip.
2083        assert_eq!(&reg.model(b).colors, &[0xBB, 0xAA, 0xCC]);
2084    }
2085
2086    #[test]
2087    fn model_checked_guards_out_of_range_and_tombstoned() {
2088        // The guard `set_instance_model` relies on: `model()` would
2089        // index-panic on these, `model_checked` returns `None`.
2090        let mut reg = SpriteModelRegistry::new();
2091        let a = reg.add_lod(build_sprite_model(&kv6_unsorted()), 4);
2092        assert!(reg.model_checked(a).is_some());
2093        assert!(reg.model_checked(9999).is_none(), "out of range → None");
2094        reg.remove(a);
2095        assert!(reg.model_checked(a).is_none(), "tombstoned chain → None");
2096    }
2097
2098    #[test]
2099    fn remove_is_idempotent_and_bounds_safe() {
2100        let mut reg = SpriteModelRegistry::new();
2101        let a = reg.add(build_sprite_model(&kv6_unsorted()));
2102        reg.remove(a);
2103        reg.remove(a); // already removed → no-op, no panic
2104        reg.remove(999); // out of range → no-op
2105        assert!(!reg.is_live(a));
2106        assert!(!reg.is_live(999));
2107    }
2108
2109    #[test]
2110    fn registry_gpu_structs_have_expected_sizes() {
2111        assert_eq!(std::mem::size_of::<SpriteModelMeta>(), 48);
2112        assert_eq!(std::mem::size_of::<SpriteInstanceGpu>(), 64);
2113    }
2114
2115    #[test]
2116    fn add_lod_builds_halving_mip_chain() {
2117        let mut reg = SpriteModelRegistry::new();
2118        // 8×8×8 single voxel-filled column model would be ideal, but
2119        // kv6_unsorted is 2×1×8 → mips: 2×1×8 → 1×1×4 → 1×1×2 → 1×1×1.
2120        let id = reg.add_lod(build_sprite_model(&kv6_unsorted()), 4);
2121        let m0 = reg.model(id);
2122        assert_eq!(m0.dims, [2, 1, 8]);
2123        assert!((m0.voxel_world_size - 1.0).abs() < 1e-6);
2124    }
2125
2126    /// kv6 from explicit voxels, ordered x-major/y-inner to match
2127    /// `build_sprite_model`'s column walk.
2128    fn kv6_from(xsiz: u32, ysiz: u32, zsiz: u32, voxels: &[(u32, u32, u16, u32)]) -> Kv6 {
2129        let mut ylen = vec![vec![0u16; ysiz as usize]; xsiz as usize];
2130        let mut flat = Vec::new();
2131        for x in 0..xsiz {
2132            for y in 0..ysiz {
2133                let mut col: Vec<(u16, u32)> = voxels
2134                    .iter()
2135                    .filter(|(vx, vy, _, _)| *vx == x && *vy == y)
2136                    .map(|(_, _, z, c)| (*z, *c))
2137                    .collect();
2138                col.sort_by_key(|(z, _)| *z);
2139                ylen[x as usize][y as usize] = col.len() as u16;
2140                for (z, c) in col {
2141                    flat.push(Voxel {
2142                        col: c,
2143                        z,
2144                        vis: 0,
2145                        dir: 0,
2146                    });
2147                }
2148            }
2149        }
2150        let xlen = ylen
2151            .iter()
2152            .map(|c| c.iter().map(|&v| u32::from(v)).sum())
2153            .collect();
2154        Kv6 {
2155            xsiz,
2156            ysiz,
2157            zsiz,
2158            xpiv: 0.0,
2159            ypiv: 0.0,
2160            zpiv: 0.0,
2161            voxels: flat,
2162            xlen,
2163            ylen,
2164            palette: None,
2165        }
2166    }
2167
2168    fn offsets_consistent(m: &SpriteModel) -> bool {
2169        let cols = (m.dims[0] * m.dims[1]) as usize;
2170        if m.color_offsets.len() != cols + 1 {
2171            return false;
2172        }
2173        // Monotonic non-decreasing + last == colors.len + each column's
2174        // span == its solid-voxel count.
2175        for w in m.color_offsets.windows(2) {
2176            if w[1] < w[0] {
2177                return false;
2178            }
2179        }
2180        m.color_offsets[cols] as usize == m.colors.len()
2181    }
2182
2183    #[test]
2184    fn carve_two_layers_keeps_offsets_consistent() {
2185        // Mirror the demo's carve: columns with voxels at varied z,
2186        // some sharing z=0/z=1, some not.
2187        let kv6 = kv6_from(
2188            3,
2189            2,
2190            8,
2191            &[
2192                (0, 0, 0, 0xA0),
2193                (0, 0, 1, 0xA1),
2194                (0, 0, 5, 0xA5),
2195                (1, 0, 1, 0xB1),
2196                (2, 1, 0, 0xC0),
2197                (2, 1, 3, 0xC3),
2198            ],
2199        );
2200        let mut m = build_sprite_model(&kv6);
2201        assert!(offsets_consistent(&m));
2202        for z in 0..2u32 {
2203            for y in 0..m.dims[1] {
2204                for x in 0..m.dims[0] {
2205                    m.set_voxel(x, y, z, None);
2206                }
2207            }
2208            assert!(offsets_consistent(&m), "inconsistent after carving z={z}");
2209            // downsample must not panic on the carved model.
2210            let _ = m.downsample();
2211        }
2212    }
2213
2214    #[test]
2215    fn set_voxel_inserts_replaces_and_clears() {
2216        // col 0 starts with z=1 (0xBB), z=5 (0xAA); col 1 with z=3 (0xCC).
2217        let mut m = build_sprite_model(&kv6_unsorted());
2218
2219        // Insert z=3 into col 0 (between z=1 and z=5) → rank 1.
2220        assert!(m.set_voxel(0, 0, 3, Some(0x55)));
2221        assert_eq!(m.occupancy[0], (1 << 1) | (1 << 3) | (1 << 5));
2222        // col 0 colours ascending z: 0xBB(z1), 0x55(z3), 0xAA(z5).
2223        assert_eq!(m.color_offsets, vec![0, 3, 4]);
2224        assert_eq!(&m.colors, &[0xBB, 0x55, 0xAA, 0xCC]);
2225
2226        // Replace z=3 in place (no offset shift).
2227        assert!(m.set_voxel(0, 0, 3, Some(0x66)));
2228        assert_eq!(&m.colors, &[0xBB, 0x66, 0xAA, 0xCC]);
2229        assert_eq!(m.color_offsets, vec![0, 3, 4]);
2230
2231        // Clear z=1 (rank 0) from col 0.
2232        assert!(m.set_voxel(0, 0, 1, None));
2233        assert_eq!(m.occupancy[0], (1 << 3) | (1 << 5));
2234        assert_eq!(m.color_offsets, vec![0, 2, 3]);
2235        assert_eq!(&m.colors, &[0x66, 0xAA, 0xCC]);
2236
2237        // No-ops: clear an empty voxel, edit out of bounds.
2238        assert!(!m.set_voxel(0, 0, 2, None));
2239        assert!(!m.set_voxel(9, 0, 0, Some(1)));
2240    }
2241
2242    #[test]
2243    fn rebuild_lod_refreshes_coarse_levels_from_mip0() {
2244        let mut reg = SpriteModelRegistry::new();
2245        let id = reg.add_lod(build_sprite_model(&kv6_unsorted()), 3);
2246        // Recolour mip-0 only via model_mut, then rebuild the ladder.
2247        reg.model_mut(id).recolor(|_| 0x0000_2000);
2248        reg.rebuild_lod(id);
2249        // The mip-1 average of all-0x2000 voxels is still 0x2000.
2250        let lvl1_entry = reg.chains[id as usize][1] as usize;
2251        assert!(reg.entries[lvl1_entry]
2252            .colors
2253            .iter()
2254            .all(|&c| c == 0x0000_2000));
2255    }
2256
2257    // ---- GPU.12 incremental: colors/dirs suballocator -----------------
2258
2259    /// Every slot fits its data, has slack, doesn't overlap the next, and
2260    /// the buffer reserves tail headroom past the last slot.
2261    fn alloc_invariants(a: &ColorsAllocator, lens: &[u32]) {
2262        let mut prev_end = 0u32;
2263        for (e, &len) in lens.iter().enumerate() {
2264            let s = a.slot(e);
2265            assert_eq!(s.len, len, "slot {e} len");
2266            assert!(s.cap >= s.len, "slot {e} cap >= len");
2267            // In a freshly repacked layout slots are in entry order.
2268            assert!(s.off >= prev_end, "slot {e} overlaps previous");
2269            assert!(s.off + s.cap <= a.cap_total(), "slot {e} past cap_total");
2270            prev_end = s.off + s.cap;
2271        }
2272        assert!(a.cap_total() >= prev_end, "tail headroom");
2273    }
2274
2275    #[test]
2276    fn allocator_new_lays_out_with_slack_and_headroom() {
2277        let lens = [10u32, 0, 64, 7];
2278        let a = ColorsAllocator::new(&lens);
2279        alloc_invariants(&a, &lens);
2280        // Slack: a 64-word slot has cap > 64 so a small carve-grow fits.
2281        assert!(a.slot(2).cap > 64);
2282        // Headroom past the bump tail for early growth.
2283        assert!(a.cap_total() > a.slot(3).off + a.slot(3).cap);
2284    }
2285
2286    #[test]
2287    fn allocator_place_in_place_when_within_cap() {
2288        let mut a = ColorsAllocator::new(&[10, 20]);
2289        let off0 = a.slot(0).off;
2290        let cap0 = a.slot(0).cap;
2291        // Shrink: still the same slot.
2292        assert_eq!(a.place(0, 5), Some(off0));
2293        assert_eq!(a.slot(0).len, 5);
2294        assert_eq!(a.slot(0).cap, cap0);
2295        // Grow within slack: same offset, no relocation.
2296        assert_eq!(a.place(0, cap0), Some(off0));
2297        assert_eq!(a.slot(0).off, off0);
2298        assert!(a.free.is_empty(), "no relocation should free anything");
2299    }
2300
2301    #[test]
2302    fn allocator_place_relocates_to_tail_and_frees_old() {
2303        let mut a = ColorsAllocator::new(&[10, 20]);
2304        let old0 = (a.slot(0).off, a.slot(0).cap);
2305        let tail_before = a.tail;
2306        // Overgrow entry 0 past its cap → relocate to the bump tail.
2307        let new_len = a.slot(0).cap + 5;
2308        let off = a.place(0, new_len).expect("fits in headroom");
2309        assert_eq!(off, tail_before, "relocated to old tail");
2310        assert_eq!(a.slot(0).off, off);
2311        assert_eq!(a.slot(0).len, new_len);
2312        assert!(a.free.contains(&old0), "old slot freed");
2313    }
2314
2315    #[test]
2316    fn allocator_reuses_freed_block_first_fit() {
2317        // Entry 0 has a large slot; entry 1 a tiny one, so growing 1 must
2318        // relocate (it can't fit in place) and lands in 0's freed block.
2319        let mut a = ColorsAllocator::new(&[10, 2]);
2320        let old0 = (a.slot(0).off, a.slot(0).cap);
2321        // Relocate entry 0 to the tail, freeing its original block.
2322        let _ = a.place(0, a.slot(0).cap + 5).unwrap();
2323        assert!(a.free.contains(&old0));
2324        // Grow entry 1 past its (tiny) cap but ≤ the freed block's cap →
2325        // first-fit reuses that block rather than bumping the tail.
2326        let new1 = a.slot(1).cap + 1;
2327        assert!(new1 <= old0.1, "freed block big enough");
2328        let off = a.place(1, new1).expect("reuses freed block");
2329        assert_eq!(off, old0.0, "first-fit reused the freed slot offset");
2330        assert!(!a.free.contains(&old0), "freed block consumed");
2331    }
2332
2333    #[test]
2334    fn allocator_signals_grow_then_repack_restores() {
2335        let mut a = ColorsAllocator::new(&[8, 8]);
2336        // Force overflow: ask for far more than cap_total.
2337        let huge = a.cap_total() + 100;
2338        assert_eq!(a.place(0, huge), None, "overflow must signal grow");
2339        // Repack with the new lengths compacts + grows the buffer.
2340        a.repack(&[huge, 8]);
2341        alloc_invariants(&a, &[huge, 8]);
2342        assert!(a.cap_total() > huge);
2343        // After repack the entry now fits in place.
2344        assert_eq!(a.place(0, huge), Some(a.slot(0).off));
2345    }
2346
2347    /// Drive the allocator like a real carve loop (mirroring
2348    /// `update_model`): one model's colour count drifts up and down
2349    /// across many edits while two neighbours stay put. Growth is
2350    /// absorbed in place / via the free list / by the bump tail, and on
2351    /// the rare overflow we repack (as `update_model` does). After every
2352    /// edit the live `[off, off+len)` windows must stay disjoint.
2353    #[test]
2354    fn allocator_carve_loop_keeps_live_windows_disjoint() {
2355        let mut a = ColorsAllocator::new(&[40, 12, 40]);
2356        let mut lens = [40u32, 12, 40];
2357        // A deterministic up/down walk of entry 1's length, incl. a jump
2358        // that forces at least one grow+repack.
2359        let walk = [13u32, 30, 60, 18, 9, 80, 80, 25, 200, 7];
2360        let mut grew = false;
2361        for &len in &walk {
2362            lens[1] = len;
2363            // Entry 1 re-placed; on overflow, repack the whole set.
2364            if a.place(1, len).is_none() {
2365                grew = true;
2366                a.repack(&lens);
2367            } else {
2368                // Neighbours fit in place every time.
2369                assert_eq!(a.place(0, 40), Some(a.slot(0).off));
2370                assert_eq!(a.place(2, 40), Some(a.slot(2).off));
2371            }
2372            assert_eq!(a.slot(1).len, len);
2373
2374            // No two entries' live windows overlap.
2375            let mut wins: Vec<(u32, u32)> =
2376                (0..3).map(|e| (a.slot(e).off, a.slot(e).len)).collect();
2377            wins.sort_by_key(|w| w.0);
2378            for pair in wins.windows(2) {
2379                let (o0, l0) = pair[0];
2380                let (o1, _) = pair[1];
2381                assert!(o0 + l0 <= o1, "live windows overlap: {pair:?}");
2382            }
2383        }
2384        assert!(grew, "the 200-word jump should have forced a repack");
2385    }
2386
2387    // --- incremental instance path (device-backed; skips w/o adapter) ---
2388
2389    fn headless() -> Option<crate::HeadlessGpu> {
2390        match crate::HeadlessGpu::new_blocking(crate::GpuRendererSettings::default()) {
2391            Ok(h) => Some(h),
2392            Err(e) => {
2393                eprintln!("[skip] no GPU adapter reachable: {e}");
2394                None
2395            }
2396        }
2397    }
2398
2399    fn one_model_registry() -> (SpriteModelRegistry, u32) {
2400        let mut reg = SpriteModelRegistry::new();
2401        let id = reg.add(build_sprite_model(&kv6_unsorted()));
2402        (reg, id)
2403    }
2404
2405    fn inst(model_id: u32, pos: [f32; 3]) -> SpriteInstance {
2406        use roxlap_formats::sprite::Sprite;
2407        SpriteInstance {
2408            model_id,
2409            transform: SpriteInstanceTransform::from_sprite(&Sprite::axis_aligned(
2410                kv6_unsorted(),
2411                pos,
2412            )),
2413        }
2414    }
2415
2416    #[test]
2417    fn append_grows_count_and_capacity_pow2() {
2418        let Some(h) = headless() else { return };
2419        let (reg, m) = one_model_registry();
2420        let mut res = SpriteRegistryResident::upload(&h.device, &reg, &[inst(m, [0.0; 3])]);
2421        assert_eq!(res.instance_count(), 1);
2422        assert_eq!(res.instance_capacity, 1);
2423
2424        // Append 4 → count 5, capacity grows to next_pow2(5) = 8.
2425        let more: Vec<_> = (1..=4).map(|i| inst(m, [i as f32, 0.0, 0.0])).collect();
2426        let base = res.append_instances(&h.device, &reg, &more);
2427        assert_eq!(base, 1, "first appended index follows the seed instance");
2428        assert_eq!(res.instance_count(), 5);
2429        assert_eq!(res.instance_capacity, 8, "power-of-two growth");
2430
2431        // A second append that still fits keeps the same capacity (no realloc).
2432        let base2 = res.append_instances(&h.device, &reg, &[inst(m, [9.0, 0.0, 0.0])]);
2433        assert_eq!(base2, 5);
2434        assert_eq!(res.instance_count(), 6);
2435        assert_eq!(res.instance_capacity, 8, "fits existing capacity, no grow");
2436    }
2437
2438    #[test]
2439    fn append_empty_is_noop() {
2440        let Some(h) = headless() else { return };
2441        let (reg, m) = one_model_registry();
2442        let mut res = SpriteRegistryResident::upload(&h.device, &reg, &[inst(m, [0.0; 3])]);
2443        let base = res.append_instances(&h.device, &reg, &[]);
2444        assert_eq!(base, 1);
2445        assert_eq!(res.instance_count(), 1);
2446        assert_eq!(res.instance_capacity, 1);
2447    }
2448
2449    /// Read `words` u32s back from a GPU buffer (needs COPY_SRC).
2450    fn read_u32(h: &crate::HeadlessGpu, buf: &wgpu::Buffer, words: u64) -> Vec<u32> {
2451        let bytes = words * 4;
2452        let staging = h.device.create_buffer(&wgpu::BufferDescriptor {
2453            label: Some("readback"),
2454            size: bytes,
2455            usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
2456            mapped_at_creation: false,
2457        });
2458        let mut enc = h
2459            .device
2460            .create_command_encoder(&wgpu::CommandEncoderDescriptor::default());
2461        enc.copy_buffer_to_buffer(buf, 0, &staging, 0, bytes);
2462        h.queue.submit(std::iter::once(enc.finish()));
2463        let slice = staging.slice(..);
2464        let (tx, rx) = std::sync::mpsc::channel();
2465        slice.map_async(wgpu::MapMode::Read, move |r| tx.send(r).unwrap());
2466        h.device.poll(wgpu::PollType::wait_indefinitely()).ok();
2467        rx.recv().unwrap().unwrap();
2468        let data = slice.get_mapped_range();
2469        let out = bytemuck::cast_slice::<u8, u32>(&data).to_vec();
2470        drop(data);
2471        staging.unmap();
2472        out
2473    }
2474
2475    /// A second distinct model so add_model has real new geometry to lay
2476    /// down (different dims + colours from `kv6_unsorted`).
2477    fn kv6_other() -> Kv6 {
2478        let mk = |z, col| Voxel {
2479            col,
2480            z,
2481            vis: 0,
2482            dir: 0,
2483        };
2484        Kv6 {
2485            xsiz: 1,
2486            ysiz: 1,
2487            zsiz: 4,
2488            xpiv: 0.0,
2489            ypiv: 0.0,
2490            zpiv: 0.0,
2491            voxels: vec![mk(0, 0x11), mk(2, 0x22)],
2492            xlen: vec![2],
2493            ylen: vec![vec![2]],
2494            palette: None,
2495        }
2496    }
2497
2498    /// add_model lays the new model's volume on the GPU at the offsets its
2499    /// meta record claims — verified by reading the shared buffers back
2500    /// and matching each entry against its source SpriteModel.
2501    #[test]
2502    fn add_model_uploads_new_volume_incrementally() {
2503        let Some(h) = headless() else { return };
2504
2505        // Residency starts with model A only.
2506        let mut reg = SpriteModelRegistry::new();
2507        let a = reg.add(build_sprite_model(&kv6_unsorted()));
2508        let mut res = SpriteRegistryResident::upload(&h.device, &reg, &[inst(a, [0.0; 3])]);
2509        assert_eq!(res.chains.len(), 1);
2510        let entries_before = res.meta.len();
2511
2512        // Append model B (single-level) to the registry, then sync it.
2513        let b = reg.add(build_sprite_model(&kv6_other()));
2514        res.add_model(&h.device, &h.queue, &reg, b);
2515        assert_eq!(res.chains.len(), 2);
2516        assert_eq!(res.meta.len(), entries_before + 1, "one new entry");
2517
2518        // Read the shared buffers back and check EVERY entry's data sits
2519        // where its meta record points — both the pre-existing A and the
2520        // newly streamed B.
2521        let occ = read_u32(&h, &res.occupancy, u64::from(res.occ_cap));
2522        let coloff = read_u32(&h, &res.color_offsets, u64::from(res.coloff_cap));
2523        let cols = read_u32(&h, &res.colors, u64::from(res.colors_alloc.cap_total()));
2524        for (e, m) in reg.entries.iter().enumerate() {
2525            let meta = res.meta[e];
2526            let oo = meta.occupancy_offset as usize;
2527            assert_eq!(
2528                &occ[oo..oo + m.occupancy.len()],
2529                &m.occupancy[..],
2530                "occ entry {e}"
2531            );
2532            let co = meta.color_offsets_offset as usize;
2533            assert_eq!(
2534                &coloff[co..co + m.color_offsets.len()],
2535                &m.color_offsets[..],
2536                "color_offsets entry {e}"
2537            );
2538            let cc = meta.colors_offset as usize;
2539            assert_eq!(
2540                &cols[cc..cc + m.colors.len()],
2541                &m.colors[..],
2542                "colors entry {e}"
2543            );
2544        }
2545
2546        // And an instance of the freshly-added model can now be appended.
2547        let base = res.append_instances(&h.device, &reg, &[inst(b, [5.0, 0.0, 0.0])]);
2548        assert_eq!(base, 1);
2549        assert_eq!(res.instance_count(), 2);
2550    }
2551
2552    /// Adding many small models forces the volume buffers to grow + rebuild
2553    /// at least once; every entry must still read back correctly across the
2554    /// grow boundary.
2555    #[test]
2556    fn add_model_survives_buffer_growth() {
2557        let Some(h) = headless() else { return };
2558        let mut reg = SpriteModelRegistry::new();
2559        let a = reg.add(build_sprite_model(&kv6_unsorted()));
2560        let mut res = SpriteRegistryResident::upload(&h.device, &reg, &[inst(a, [0.0; 3])]);
2561        let occ_cap0 = res.occ_cap;
2562
2563        // 40 adds — occupancy starts exact-sized (cap == used), so the very
2564        // first add overflows and grows; later ones ride the slack.
2565        for _ in 0..40 {
2566            let id = reg.add(build_sprite_model(&kv6_other()));
2567            res.add_model(&h.device, &h.queue, &reg, id);
2568        }
2569        assert_eq!(res.chains.len(), 41);
2570        assert!(res.occ_cap > occ_cap0, "occupancy buffer grew");
2571
2572        let occ = read_u32(&h, &res.occupancy, u64::from(res.occ_cap));
2573        let cols = read_u32(&h, &res.colors, u64::from(res.colors_alloc.cap_total()));
2574        for (e, m) in reg.entries.iter().enumerate() {
2575            let meta = res.meta[e];
2576            let oo = meta.occupancy_offset as usize;
2577            assert_eq!(
2578                &occ[oo..oo + m.occupancy.len()],
2579                &m.occupancy[..],
2580                "occ entry {e}"
2581            );
2582            let cc = meta.colors_offset as usize;
2583            assert_eq!(
2584                &cols[cc..cc + m.colors.len()],
2585                &m.colors[..],
2586                "colors entry {e}"
2587            );
2588        }
2589    }
2590
2591    /// VCL.2 — a decoded voxel clip's frames register as a flipbook of LOD
2592    /// chains, and `set_instance_model` flips which frame an instance
2593    /// draws. The cull state it updates is exactly what
2594    /// `cull_bin_upload` packs into the GPU instance buffer each frame, so
2595    /// flipping `chain_id` redirects the rendered instance to the new
2596    /// frame's resident volume.
2597    #[test]
2598    fn voxel_clip_flipbook_set_instance_model() {
2599        use roxlap_formats::voxel_clip::{LoopMode, VoxelClip, VoxelFrame};
2600        let Some(h) = headless() else { return };
2601
2602        // Two distinct frames of a 1×1×4 clip: frame 0 has a voxel at z=0;
2603        // frame 1 adds z=1 — different occupancy + a longer colour run.
2604        let dims = [1u32, 1, 4];
2605        let owpc = dims[2].div_ceil(32).max(1) as usize; // 1
2606        let mk_frame = |zs: &[u32], cols: &[u32]| -> VoxelFrame {
2607            let mut occ = vec![0u32; owpc];
2608            for &z in zs {
2609                occ[(z >> 5) as usize] |= 1u32 << (z & 31);
2610            }
2611            VoxelFrame {
2612                occupancy: occ,
2613                colors: cols.to_vec(),
2614                color_offsets: vec![0, cols.len() as u32],
2615            }
2616        };
2617        let f0 = mk_frame(&[0], &[0x8011_2233]);
2618        let f1 = mk_frame(&[0, 1], &[0x8011_2233, 0x80AA_BBCC]);
2619        let clip = VoxelClip::from_frames(
2620            dims,
2621            [0.5, 0.5, 2.0],
2622            1.0,
2623            LoopMode::Loop,
2624            &[f0, f1],
2625            &[],
2626            33,
2627            0,
2628        );
2629        let decoded = clip.decode().expect("decode");
2630
2631        // Each frame → a single-level chain; both volumes resident + distinct.
2632        let mut reg = SpriteModelRegistry::new();
2633        let c0 = reg.add(sprite_model_from_clip_frame(&decoded, 0));
2634        let c1 = reg.add(sprite_model_from_clip_frame(&decoded, 1));
2635        assert_eq!(reg.model(c0).colors.len(), 1);
2636        assert_eq!(reg.model(c1).colors.len(), 2);
2637
2638        // One instance, in front of the test frustum, drawing frame 0.
2639        let mut res = SpriteRegistryResident::upload(&h.device, &reg, &[inst(c0, [0.0, 0.0, 5.0])]);
2640        assert_eq!(res.cull[0].chain_id, c0);
2641
2642        // Flip to frame 1: the cull now draws chain c1 (radius reseeded).
2643        res.set_instance_model(&reg, 0, c1);
2644        assert_eq!(res.cull[0].chain_id, c1);
2645        assert_eq!(res.cull[0].radius, reg.model(c1).bound_radius());
2646
2647        // The next cull packs the new chain into the GPU instance buffer
2648        // (visible, no panic).
2649        let f = test_frustum();
2650        let (visible, _, _) = res.cull_bin_upload(&h.device, &h.queue, &f, 64, 64, 16, 1.0);
2651        assert_eq!(visible, 1);
2652
2653        // …and back to frame 0.
2654        res.set_instance_model(&reg, 0, c0);
2655        assert_eq!(res.cull[0].chain_id, c0);
2656
2657        // Out-of-range index is a safe no-op.
2658        res.set_instance_model(&reg, 99, c1);
2659        assert_eq!(res.cull[0].chain_id, c0);
2660    }
2661
2662    fn test_frustum() -> ViewFrustum {
2663        ViewFrustum {
2664            pos: [0.0, 0.0, 0.0],
2665            right: [1.0, 0.0, 0.0],
2666            down: [0.0, 1.0, 0.0],
2667            forward: [0.0, 0.0, 1.0],
2668            half_w: 1.0,
2669            half_h: 1.0,
2670            far: 10_000.0,
2671        }
2672    }
2673
2674    #[test]
2675    fn remove_model_tombstones_frees_and_reuses() {
2676        let Some(h) = headless() else { return };
2677        // Residency with models A and B, one instance each.
2678        let mut reg = SpriteModelRegistry::new();
2679        let a = reg.add(build_sprite_model(&kv6_unsorted()));
2680        let b = reg.add(build_sprite_model(&kv6_other()));
2681        let mut res = SpriteRegistryResident::upload(
2682            &h.device,
2683            &reg,
2684            &[inst(a, [0.0; 3]), inst(b, [1.0, 0.0, 0.0])],
2685        );
2686        assert_eq!(res.live_model_count(), 2);
2687        assert_eq!(res.dead_model_count(), 0);
2688
2689        // Remove B → tombstoned, its colours freed into the pool.
2690        res.remove_model(b);
2691        assert_eq!(res.live_model_count(), 1);
2692        assert_eq!(res.dead_model_count(), 1);
2693        assert_eq!(res.dead.iter().filter(|&&d| d).count(), 1, "one entry dead");
2694        assert!(!res.colors_alloc.free.is_empty(), "B's colour slot freed");
2695
2696        // Adding C reuses the freed slot (free-list first-fit).
2697        let c = reg.add(build_sprite_model(&kv6_other()));
2698        res.add_model(&h.device, &h.queue, &reg, c);
2699        assert_eq!(res.live_model_count(), 2);
2700
2701        // A and C read back correctly; B is dead (skipped).
2702        let cols = read_u32(&h, &res.colors, u64::from(res.colors_alloc.cap_total()));
2703        for e in [a as usize, c as usize] {
2704            let m = &reg.entries[e];
2705            let cc = res.meta[e].colors_offset as usize;
2706            assert_eq!(
2707                &cols[cc..cc + m.colors.len()],
2708                &m.colors[..],
2709                "colors entry {e}"
2710            );
2711        }
2712
2713        // The lingering instance of removed B is skipped without panic.
2714        let f = test_frustum();
2715        let _ = res.cull_bin_upload(&h.device, &h.queue, &f, 64, 64, 16, 1.0);
2716    }
2717
2718    #[test]
2719    fn compact_reclaims_holes_keeps_ids_stable() {
2720        let Some(h) = headless() else { return };
2721        let mut reg = SpriteModelRegistry::new();
2722        let a = reg.add(build_sprite_model(&kv6_unsorted()));
2723        let b = reg.add(build_sprite_model(&kv6_other()));
2724        let c = reg.add(build_sprite_model(&kv6_other()));
2725        let mut res = SpriteRegistryResident::upload(
2726            &h.device,
2727            &reg,
2728            &[inst(a, [0.0; 3]), inst(b, [1.0; 3]), inst(c, [2.0; 3])],
2729        );
2730        let occ_used_full = res.occ_used;
2731
2732        // Remove the middle model, then compact.
2733        res.remove_model(b);
2734        res.compact(&h.device, &h.queue, &reg);
2735
2736        // Holes reclaimed: occupancy now only covers A + C.
2737        let live_occ: u32 = [a, c]
2738            .iter()
2739            .map(|&e| reg.entries[e as usize].occupancy.len() as u32)
2740            .sum();
2741        assert_eq!(res.occ_used, live_occ);
2742        assert!(res.occ_used < occ_used_full, "compaction shrank occupancy");
2743        // Dead entry keeps a zeroed tombstone; ids unchanged.
2744        assert_eq!(res.meta[b as usize].occupancy_offset, 0);
2745        assert_eq!(res.live_model_count(), 2);
2746        assert_eq!(res.dead_model_count(), 1);
2747
2748        // Live entries read back correctly at their new offsets.
2749        let occ = read_u32(&h, &res.occupancy, u64::from(res.occ_cap));
2750        let cols = read_u32(&h, &res.colors, u64::from(res.colors_alloc.cap_total()));
2751        for &e in &[a as usize, c as usize] {
2752            let m = &reg.entries[e];
2753            let oo = res.meta[e].occupancy_offset as usize;
2754            assert_eq!(
2755                &occ[oo..oo + m.occupancy.len()],
2756                &m.occupancy[..],
2757                "occ {e}"
2758            );
2759            let cc = res.meta[e].colors_offset as usize;
2760            assert_eq!(&cols[cc..cc + m.colors.len()], &m.colors[..], "cols {e}");
2761        }
2762
2763        // Chain ids still valid: C's chain still resolves; B's is empty.
2764        assert!(!res.chains[c as usize].is_empty());
2765        assert!(res.chains[b as usize].is_empty());
2766    }
2767
2768    #[test]
2769    fn remove_swap_semantics_and_capacity_retained() {
2770        let Some(h) = headless() else { return };
2771        let (reg, m) = one_model_registry();
2772        let seed: Vec<_> = (0..4).map(|i| inst(m, [i as f32, 0.0, 0.0])).collect();
2773        let mut res = SpriteRegistryResident::upload(&h.device, &reg, &seed);
2774        assert_eq!(res.instance_count(), 4);
2775        let cap = res.instance_capacity;
2776
2777        // Remove a middle element → the previous last (idx 3) moved into it.
2778        assert_eq!(res.remove_instance(1), Some(3));
2779        assert_eq!(res.instance_count(), 3);
2780
2781        // Remove the current last (idx 2) → nothing moved.
2782        assert_eq!(res.remove_instance(2), None);
2783        assert_eq!(res.instance_count(), 2);
2784
2785        // Out of range → None.
2786        assert_eq!(res.remove_instance(99), None);
2787        assert_eq!(res.instance_count(), 2);
2788
2789        // Capacity is retained for reuse (no shrink).
2790        assert_eq!(res.instance_capacity, cap);
2791    }
2792}