Skip to main content

roxlap_gpu/
sprite_model.rs

1//! GPU.10 — KV6 sprite as a DDA-marchable voxel model.
2//!
3//! Unlike the GPU.9 splatter (one thread per voxel, screen-space
4//! squares, overdraw + atomic contention), a sprite model is a small
5//! voxel volume the precise ray-DDA marches one ray per pixel —
6//! crisp, correct occlusion, no overdraw. This is the GPU.10.0 single
7//! sprite; instancing + tiling + LOD come in later sub-substages.
8//!
9//! The volume reuses the chunk occupancy/colour scheme but sized to
10//! the KV6 bbox: per-column occupancy bitmask (`occ_words_per_col`
11//! u32s, `CHUNK_Z`-style 32-bits-per-word), a flat colour array in
12//! ascending-z order per column, and a `color_offsets` prefix table.
13//! The shader finds a voxel's colour by `offset[col] + popcount(bits
14//! below z)`, so colours MUST be ascending-z (we sort per column).
15
16#![allow(
17    clippy::cast_precision_loss,
18    clippy::cast_possible_truncation,
19    clippy::cast_possible_wrap,
20    clippy::cast_sign_loss,
21    clippy::many_single_char_names,
22    clippy::similar_names
23)]
24
25use bytemuck::{Pod, Zeroable};
26use roxlap_formats::kv6::Kv6;
27use roxlap_formats::material::material_for_color;
28use roxlap_formats::sprite::Sprite;
29use roxlap_formats::voxel_clip::{DecodedClip, VoxelFrame};
30
31/// CPU-built voxel volume for one KV6 model.
32#[derive(Debug, Clone)]
33pub struct SpriteModel {
34    /// Voxel extent `(mx, my, mz)`.
35    pub dims: [u32; 3],
36    /// `ceil(mz / 32)` — u32 words of occupancy per (x, y) column.
37    pub occ_words_per_col: u32,
38    /// KV6 pivot in model-local voxel space.
39    pub pivot: [f32; 3],
40    /// Per-column occupancy bitmask, `mx * my * occ_words_per_col`.
41    pub occupancy: Vec<u32>,
42    /// Voxel colours, ascending z within each column.
43    pub colors: Vec<u32>,
44    /// Per-voxel surface-normal index (`Kv6::Voxel::dir`, 0..256),
45    /// parallel to [`colors`](Self::colors). The GPU sprite shader uses
46    /// it to index the per-instance `kv6colmul` lighting table, matching
47    /// the CPU rasteriser's normal-based shading.
48    pub dirs: Vec<u32>,
49    /// Prefix sums: `color_offsets[col]` is the first colour index of
50    /// column `col`; length `mx * my + 1`.
51    pub color_offsets: Vec<u32>,
52    /// Per-voxel material id (TV.3), parallel to [`colors`](Self::colors).
53    /// **Empty** means the model has no per-voxel materials — every voxel
54    /// uses the instance's uniform material (the TV.1/TV.2 path). A non-empty
55    /// array gives mixed-material models (opaque frame + glass). Built by
56    /// [`build_sprite_model_with_materials`].
57    pub materials: Vec<u8>,
58    /// World-space size of one voxel of this model (GPU.10.4 LOD): 1.0
59    /// at mip-0, doubling each [`SpriteModel::downsample`]. The shader
60    /// divides the local ray by this so a coarse voxel spans the right
61    /// world extent and the march `t` stays in world units.
62    pub voxel_world_size: f32,
63}
64
65/// Build the DDA volume from a KV6. Columns are packed in
66/// `x + y*mx` order; each column's voxels are sorted ascending by z
67/// so the shader's popcount-rank colour lookup is correct.
68///
69/// # Panics
70/// If the KV6's `ylen` counters disagree with `voxels.len()` (a
71/// malformed model).
72#[must_use]
73pub fn build_sprite_model(kv6: &Kv6) -> SpriteModel {
74    build_sprite_model_inner(kv6, &[])
75}
76
77/// Build the DDA volume from a KV6, classifying each voxel into a per-voxel
78/// **material id** by colour (TV.3 mixed models) via `material_map`
79/// (`(rgb, material_id)` pairs; see
80/// [`material_for_color`](roxlap_formats::material::material_for_color)).
81/// An empty map produces a model with no per-voxel materials (identical to
82/// [`build_sprite_model`]).
83///
84/// # Panics
85/// As [`build_sprite_model`].
86#[must_use]
87pub fn build_sprite_model_with_materials(kv6: &Kv6, material_map: &[(u32, u8)]) -> SpriteModel {
88    build_sprite_model_inner(kv6, material_map)
89}
90
91fn build_sprite_model_inner(kv6: &Kv6, material_map: &[(u32, u8)]) -> SpriteModel {
92    let (mx, my, mz) = (kv6.xsiz, kv6.ysiz, kv6.zsiz);
93    let occ_words_per_col = mz.div_ceil(32).max(1);
94    let cols = (mx * my) as usize;
95    let want_mats = !material_map.is_empty();
96
97    let mut occupancy = vec![0u32; cols * occ_words_per_col as usize];
98    let mut color_offsets = vec![0u32; cols + 1];
99    let mut colors: Vec<u32> = Vec::with_capacity(kv6.voxels.len());
100    let mut dirs: Vec<u32> = Vec::with_capacity(kv6.voxels.len());
101    let mut materials: Vec<u8> = if want_mats {
102        Vec::with_capacity(kv6.voxels.len())
103    } else {
104        Vec::new()
105    };
106
107    // Pass 1 — consume voxels in KV6 storage order (x-outer / y-inner)
108    // into per-column buckets keyed by `col = x + y*mx`. Each entry is
109    // `(z, colour, normal-dir)`.
110    let mut buckets: Vec<Vec<(u16, u32, u8)>> = vec![Vec::new(); cols];
111    let mut voxel_iter = kv6.voxels.iter();
112    for x in 0..mx {
113        for y in 0..my {
114            let col = (x + y * mx) as usize;
115            let count = kv6.ylen[x as usize][y as usize];
116            for _ in 0..count {
117                let v = voxel_iter.next().expect("KV6 ylen / voxels.len mismatch");
118                buckets[col].push((v.z, v.col, v.dir));
119            }
120        }
121    }
122
123    // Pass 2 — emit in COLUMN-INDEX order so `color_offsets` is a true
124    // monotonic prefix sum (the shader indexes by `col` either way, but
125    // structural edits / mip rebuilds rely on monotonic offsets). Each
126    // column's voxels sorted ascending z for the popcount-rank lookup.
127    for (col, bucket) in buckets.iter_mut().enumerate() {
128        color_offsets[col] = colors.len() as u32;
129        bucket.sort_by_key(|(z, _, _)| *z);
130        for &(z, col_rgba, dir) in bucket.iter() {
131            let z = u32::from(z);
132            let base = col * occ_words_per_col as usize + (z >> 5) as usize;
133            occupancy[base] |= 1u32 << (z & 31);
134            colors.push(col_rgba);
135            dirs.push(u32::from(dir));
136            if want_mats {
137                materials.push(material_for_color(material_map, col_rgba));
138            }
139        }
140    }
141    color_offsets[cols] = colors.len() as u32;
142
143    SpriteModel {
144        dims: [mx, my, mz],
145        occ_words_per_col,
146        pivot: [kv6.xpiv, kv6.ypiv, kv6.zpiv],
147        occupancy,
148        color_offsets,
149        colors,
150        dirs,
151        materials,
152        voxel_world_size: 1.0,
153    }
154}
155
156/// Build a [`SpriteModel`] directly from a decoded voxel-clip frame
157/// (VCL.2). The [`VoxelFrame`] dense-column layout is byte-for-byte the
158/// [`SpriteModel`] layout that [`build_sprite_model`] produces, so this is
159/// a field move — no per-column bucket-sort. `dirs` is the frame's
160/// surface-normal LUT indices (from [`DecodedClip::dirs`]), parallel to
161/// `frame.colors`.
162///
163/// # Panics
164/// In debug, if `dirs.len() != frame.colors.len()` or the field shapes
165/// don't match `dims` (the same invariants [`build_sprite_model`] upholds).
166#[must_use]
167pub fn sprite_model_from_voxel_frame(
168    frame: &VoxelFrame,
169    dirs: &[u32],
170    dims: [u32; 3],
171    pivot: [f32; 3],
172    voxel_world_size: f32,
173) -> SpriteModel {
174    let occ_words_per_col = dims[2].div_ceil(32).max(1);
175    let cols = (dims[0] * dims[1]) as usize;
176    debug_assert_eq!(frame.occupancy.len(), cols * occ_words_per_col as usize);
177    debug_assert_eq!(frame.color_offsets.len(), cols + 1);
178    debug_assert_eq!(dirs.len(), frame.colors.len());
179    SpriteModel {
180        dims,
181        occ_words_per_col,
182        pivot,
183        occupancy: frame.occupancy.clone(),
184        colors: frame.colors.clone(),
185        dirs: dirs.to_vec(),
186        color_offsets: frame.color_offsets.clone(),
187        // Voxel clips have no per-voxel materials yet (per-instance only).
188        materials: Vec::new(),
189        voxel_world_size,
190    }
191}
192
193/// Build the [`SpriteModel`] for frame `frame` of a decoded clip — the
194/// per-frame model uploaded into a flipbook chain (VCL.2).
195///
196/// # Panics
197/// If `frame` is out of range, or the frame fails the layout invariants.
198#[must_use]
199pub fn sprite_model_from_clip_frame(clip: &DecodedClip, frame: usize) -> SpriteModel {
200    sprite_model_from_voxel_frame(
201        &clip.frames[frame],
202        &clip.dirs[frame],
203        clip.dims,
204        clip.pivot,
205        clip.voxel_world_size,
206    )
207}
208
209/// Per-instance transform consumed by the model-DDA shader: the
210/// inverse model→world rotation (so a world ray can be brought into
211/// model-local space) plus the instance's world position. Stored as
212/// three padded columns for std140/std430 (`mat3x3` 16-byte columns).
213#[repr(C)]
214#[derive(Clone, Copy, Pod, Zeroable, Debug)]
215pub struct SpriteInstanceTransform {
216    /// Inverse of `[s | h | f]`, column-major, each column padded to
217    /// `vec4`. `inv_rot * v = c0*v.x + c1*v.y + c2*v.z`.
218    pub inv_rot: [[f32; 4]; 3],
219    /// Instance world position (the KV6 pivot maps here).
220    pub pos: [f32; 3],
221    _pad: f32,
222}
223
224impl SpriteInstanceTransform {
225    /// Build from a sprite pose. `s/h/f` are the model→world basis
226    /// columns; we invert them so the shader can map world→local.
227    #[must_use]
228    pub fn from_sprite(sprite: &Sprite) -> Self {
229        let inv = mat3_inverse([sprite.s, sprite.h, sprite.f]);
230        Self {
231            inv_rot: [
232                [inv[0][0], inv[0][1], inv[0][2], 0.0],
233                [inv[1][0], inv[1][1], inv[1][2], 0.0],
234                [inv[2][0], inv[2][1], inv[2][2], 0.0],
235            ],
236            pos: sprite.p,
237            _pad: 0.0,
238        }
239    }
240}
241
242/// A registry of sprite models. Instances reference a model by
243/// `model_id`, which is a **LOD chain** id: each chain holds one or
244/// more concrete mip levels (finest first; GPU.10.4), and the renderer
245/// picks the level per instance by distance. Identical KV6s are added
246/// once and shared by many instances. **Copy-on-modify**:
247/// [`Self::fork`] deep-copies a chain so edits to the fork leave the
248/// parent (and its instances) intact.
249#[derive(Debug, Clone, Default)]
250pub struct SpriteModelRegistry {
251    /// Concrete mip-level volumes (the GPU buffers concatenate these).
252    entries: Vec<SpriteModel>,
253    /// `chains[model_id]` = entry ids, finest (mip-0) first.
254    chains: Vec<Vec<u32>>,
255}
256
257impl SpriteModelRegistry {
258    #[must_use]
259    pub fn new() -> Self {
260        Self::default()
261    }
262
263    fn push_entry(&mut self, model: SpriteModel) -> u32 {
264        let id = self.entries.len() as u32;
265        self.entries.push(model);
266        id
267    }
268
269    /// Register a single-level (no-LOD) model; returns its `model_id`.
270    pub fn add(&mut self, model: SpriteModel) -> u32 {
271        let e = self.push_entry(model);
272        let id = self.chains.len() as u32;
273        self.chains.push(vec![e]);
274        id
275    }
276
277    /// Register a model with up to `max_levels` LOD mips (each a 2×
278    /// [`SpriteModel::downsample`] of the previous; stops early once a
279    /// level collapses to 1³). Returns its `model_id`.
280    pub fn add_lod(&mut self, model: SpriteModel, max_levels: u32) -> u32 {
281        let mut levels = vec![self.push_entry(model.clone())];
282        let mut cur = model;
283        for _ in 1..max_levels.max(1) {
284            if cur.dims == [1, 1, 1] {
285                break;
286            }
287            cur = cur.downsample();
288            levels.push(self.push_entry(cur.clone()));
289        }
290        let id = self.chains.len() as u32;
291        self.chains.push(levels);
292        id
293    }
294
295    /// Copy-on-modify: deep-copy every level of chain `parent` into new
296    /// entries + a new chain, and return its `model_id`. The fork owns
297    /// independent voxel data, so mutating it does not affect the
298    /// parent or any instance still pointing at it.
299    ///
300    /// # Panics
301    /// If `parent` is not a registered `model_id`.
302    pub fn fork(&mut self, parent: u32) -> u32 {
303        let src = self.chains[parent as usize].clone();
304        let levels: Vec<u32> = src
305            .iter()
306            .map(|&e| {
307                let copy = self.entries[e as usize].clone();
308                self.push_entry(copy)
309            })
310            .collect();
311        let id = self.chains.len() as u32;
312        self.chains.push(levels);
313        id
314    }
315
316    /// The finest (mip-0) model of chain `id`.
317    #[must_use]
318    pub fn model(&self, id: u32) -> &SpriteModel {
319        &self.entries[self.chains[id as usize][0] as usize]
320    }
321
322    /// Like [`Self::model`] but returns `None` for an out-of-range or
323    /// tombstoned (emptied) chain instead of panicking — the guarded form
324    /// for public primitives handed an arbitrary `chain_id`.
325    #[must_use]
326    pub fn model_checked(&self, id: u32) -> Option<&SpriteModel> {
327        let entry = *self.chains.get(id as usize)?.first()?;
328        self.entries.get(entry as usize)
329    }
330
331    /// Mutable access to the finest (mip-0) model for editing — the
332    /// copy-on-modify entry point (typically on a [`Self::fork`]).
333    /// After a *structural* edit (occupancy/dims), call
334    /// [`Self::rebuild_lod`] so the coarser mips match; a pure recolour
335    /// can use [`Self::recolor_chain`] instead.
336    pub fn model_mut(&mut self, id: u32) -> &mut SpriteModel {
337        let e = self.chains[id as usize][0] as usize;
338        &mut self.entries[e]
339    }
340
341    /// Recolour every LOD level of chain `id` (so a forked tint shows
342    /// at all distances).
343    pub fn recolor_chain(&mut self, id: u32, f: impl Fn(u32) -> u32 + Copy) {
344        for li in 0..self.chains[id as usize].len() {
345            let e = self.chains[id as usize][li] as usize;
346            self.entries[e].recolor(f);
347        }
348    }
349
350    /// Regenerate chain `id`'s coarser mip levels from its (possibly
351    /// just-edited) mip-0. Run after a structural edit via
352    /// [`Self::model_mut`] so the LOD ladder stays consistent. No-op
353    /// for a single-level (no-LOD) chain.
354    pub fn rebuild_lod(&mut self, id: u32) {
355        let levels = self.chains[id as usize].clone();
356        if levels.len() <= 1 {
357            return;
358        }
359        let mut cur = self.entries[levels[0] as usize].clone();
360        for &e in &levels[1..] {
361            cur = cur.downsample();
362            self.entries[e as usize] = cur.clone();
363        }
364    }
365
366    /// Free chain `chain_id`'s voxel data **in place**: replace each of
367    /// its LOD entries with [`SpriteModel::empty`] and clear the chain.
368    /// Entry ids and every other `model_id` are **preserved** (the chain
369    /// becomes empty, its entries become placeholders), so no id remap is
370    /// needed and the resident registry's entry alignment stays intact.
371    ///
372    /// This is safe to pair with the resident side because
373    /// [`SpriteRegistryResident::remove_model`] tombstones the same
374    /// entries (`dead[e]`) and [`compact`](SpriteRegistryResident::compact)
375    /// reads only live entries — so the resident never touches the empty
376    /// placeholders left here. Call `remove_model` (resident) **before**
377    /// this so those tombstones are set. No-op if `chain_id` is out of
378    /// range or already removed.
379    pub fn remove(&mut self, chain_id: u32) {
380        let Some(entries) = self.chains.get(chain_id as usize) else {
381            return;
382        };
383        // Clone the small id list so we can mutate `entries` while iterating.
384        let entries = entries.clone();
385        for e in entries {
386            self.entries[e as usize] = SpriteModel::empty();
387        }
388        self.chains[chain_id as usize] = Vec::new(); // tombstone (slot kept)
389    }
390
391    /// Whether `chain_id` is a live (registered, not [`removed`](Self::remove))
392    /// model. `false` for an out-of-range id or a tombstoned chain.
393    #[must_use]
394    pub fn is_live(&self, chain_id: u32) -> bool {
395        self.chains
396            .get(chain_id as usize)
397            .is_some_and(|c| !c.is_empty())
398    }
399
400    /// Number of LOD chains (distinct `model_id`s). Counts tombstoned
401    /// (removed) chains too — ids are never reused, so this is also the
402    /// next id that [`Self::add`] / [`Self::add_lod`] will mint.
403    #[must_use]
404    pub fn len(&self) -> usize {
405        self.chains.len()
406    }
407
408    #[must_use]
409    pub fn is_empty(&self) -> bool {
410        self.chains.is_empty()
411    }
412}
413
414impl SpriteModel {
415    /// An empty (zero-voxel, zero-extent) placeholder model. Used by
416    /// [`SpriteModelRegistry::remove`] to free a removed chain's voxel
417    /// data while keeping its entry slot, so ids stay stable. Carries no
418    /// occupancy/colours; `color_offsets` is the single-element prefix
419    /// `[0]` (`cols + 1` with `cols == 0`), keeping the structural
420    /// invariant intact for any code that inspects it.
421    #[must_use]
422    pub fn empty() -> Self {
423        Self {
424            dims: [0, 0, 0],
425            occ_words_per_col: 1,
426            pivot: [0.0, 0.0, 0.0],
427            occupancy: Vec::new(),
428            colors: Vec::new(),
429            dirs: Vec::new(),
430            color_offsets: vec![0],
431            materials: Vec::new(),
432            voxel_world_size: 1.0,
433        }
434    }
435
436    /// Recolour every voxel via `f(old_rgba) -> new_rgba`. Structure
437    /// (occupancy / offsets) is untouched, so this is a cheap in-place
438    /// edit — handy on a [`SpriteModelRegistry::fork`] to make a tinted
439    /// variant. For structural edits, mutate the public occupancy /
440    /// colours / dims directly (via `model_mut`) then rebuild the LOD.
441    pub fn recolor(&mut self, f: impl Fn(u32) -> u32) {
442        for c in &mut self.colors {
443            *c = f(*c);
444        }
445    }
446
447    /// GPU.12 — structural edit of a single voxel within the model's
448    /// existing bounds. `Some(rgba)` sets/replaces the voxel at
449    /// `(x, y, z)`; `None` clears it. Maintains the ascending-z colour
450    /// invariant by inserting/removing at the voxel's popcount rank and
451    /// shifting the affected columns' `color_offsets`. Returns `true`
452    /// if the model changed. Out-of-bounds coordinates are ignored
453    /// (returns `false`) — growing `dims` is a separate concern.
454    ///
455    /// After editing, call [`SpriteModelRegistry::rebuild_lod`] to
456    /// refresh coarser mips, then re-upload via `set_sprite_instances`.
457    pub fn set_voxel(&mut self, x: u32, y: u32, z: u32, color: Option<u32>) -> bool {
458        if x >= self.dims[0] || y >= self.dims[1] || z >= self.dims[2] {
459            return false;
460        }
461        let owpc = self.occ_words_per_col as usize;
462        let cols = (self.dims[0] * self.dims[1]) as usize;
463        let col = (x + y * self.dims[0]) as usize;
464        let base = col * owpc;
465        let zw = (z >> 5) as usize;
466        let zb = z & 31;
467
468        // Rank = solid voxels strictly below z in this column.
469        let mut rank = 0usize;
470        for w in 0..zw {
471            rank += self.occupancy[base + w].count_ones() as usize;
472        }
473        let below_mask = if zb > 0 { (1u32 << zb) - 1 } else { 0 };
474        rank += (self.occupancy[base + zw] & below_mask).count_ones() as usize;
475        let idx = self.color_offsets[col] as usize + rank;
476        let was_set = (self.occupancy[base + zw] >> zb) & 1 == 1;
477
478        if let Some(rgba) = color {
479            if was_set {
480                self.colors[idx] = rgba; // replace in place (keeps dir)
481            } else {
482                self.occupancy[base + zw] |= 1u32 << zb;
483                self.colors.insert(idx, rgba);
484                // No normal supplied by this API — default to dir 0 (the
485                // sole caller, the carve hotkey, only ever clears).
486                self.dirs.insert(idx, 0);
487                if !self.materials.is_empty() {
488                    self.materials.insert(idx, 0); // new voxel → opaque material
489                }
490                for c in &mut self.color_offsets[col + 1..=cols] {
491                    *c += 1;
492                }
493            }
494            true
495        } else {
496            if !was_set {
497                return false;
498            }
499            self.occupancy[base + zw] &= !(1u32 << zb);
500            self.colors.remove(idx);
501            self.dirs.remove(idx);
502            if !self.materials.is_empty() {
503                self.materials.remove(idx);
504            }
505            for c in &mut self.color_offsets[col + 1..=cols] {
506                *c -= 1;
507            }
508            true
509        }
510    }
511
512    /// Radius of a bounding sphere centred at the instance position
513    /// (the pivot maps there): the farthest bbox corner from the
514    /// pivot. Used for frustum culling. Assumes a unit basis; scaled
515    /// instances would multiply this by their max basis length.
516    #[must_use]
517    pub fn bound_radius(&self) -> f32 {
518        let mut r2 = 0.0_f32;
519        for &cx in &[0.0, self.dims[0] as f32] {
520            for &cy in &[0.0, self.dims[1] as f32] {
521                for &cz in &[0.0, self.dims[2] as f32] {
522                    let d = [cx - self.pivot[0], cy - self.pivot[1], cz - self.pivot[2]];
523                    r2 = r2.max(d[0] * d[0] + d[1] * d[1] + d[2] * d[2]);
524                }
525            }
526        }
527        r2.sqrt()
528    }
529
530    /// GPU.10.4 — 2× voxel downsample for the next LOD level. A coarse
531    /// voxel is solid if any of its 2×2×2 fine voxels is, coloured by
532    /// their per-channel average. Dims/pivot halve and
533    /// `voxel_world_size` doubles, so the coarse model occupies the
534    /// same world box at half the resolution (origin-corner aligned).
535    #[must_use]
536    #[allow(clippy::manual_checked_ops)] // `n > 0` guards 4 divisions, not one checked_div
537    pub fn downsample(&self) -> SpriteModel {
538        let [fx, fy, fz] = self.dims;
539        let fidx = |x: u32, y: u32, z: u32| (x + y * fx + z * fx * fy) as usize;
540
541        // Reconstruct dense fine voxels (solid flag + colour + normal + TV
542        // material).
543        let has_mats = !self.materials.is_empty();
544        let mut solid = vec![false; (fx * fy * fz) as usize];
545        let mut fine = vec![0u32; (fx * fy * fz) as usize];
546        let mut fine_dir = vec![0u32; (fx * fy * fz) as usize];
547        let mut fine_mat = vec![0u8; (fx * fy * fz) as usize];
548        for x in 0..fx {
549            for y in 0..fy {
550                let col = (x + y * fx) as usize;
551                let base = col * self.occ_words_per_col as usize;
552                let off = self.color_offsets[col] as usize;
553                let mut seen = 0usize;
554                for z in 0..fz {
555                    let w = base + (z >> 5) as usize;
556                    if (self.occupancy[w] >> (z & 31)) & 1 == 1 {
557                        fine[fidx(x, y, z)] = self.colors[off + seen];
558                        fine_dir[fidx(x, y, z)] = self.dirs[off + seen];
559                        if has_mats {
560                            fine_mat[fidx(x, y, z)] = self.materials[off + seen];
561                        }
562                        solid[fidx(x, y, z)] = true;
563                        seen += 1;
564                    }
565                }
566            }
567        }
568
569        let nx = fx.div_ceil(2).max(1);
570        let ny = fy.div_ceil(2).max(1);
571        let nz = fz.div_ceil(2).max(1);
572        let owpc = nz.div_ceil(32).max(1);
573        let cols = (nx * ny) as usize;
574        let mut occupancy = vec![0u32; cols * owpc as usize];
575        let mut color_offsets = vec![0u32; cols + 1];
576        let mut colors: Vec<u32> = Vec::new();
577        let mut dirs: Vec<u32> = Vec::new();
578        let mut materials: Vec<u8> = Vec::new();
579
580        // Emit in column-index order (`ccol = cx + cy*nx`), cy outer,
581        // so `color_offsets` is a monotonic prefix sum like build's.
582        for cy in 0..ny {
583            for cx in 0..nx {
584                let ccol = (cx + cy * nx) as usize;
585                color_offsets[ccol] = colors.len() as u32;
586                for cz in 0..nz {
587                    let (mut a, mut r, mut g, mut b, mut n) = (0u32, 0u32, 0u32, 0u32, 0u32);
588                    // Normals + materials don't average meaningfully — keep
589                    // the first solid child's `dir` / material for the coarse
590                    // voxel.
591                    let mut rep_dir = 0u32;
592                    let mut rep_mat = 0u8;
593                    for dz in 0..2 {
594                        for dy in 0..2 {
595                            for dx in 0..2 {
596                                let (x, y, z) = (2 * cx + dx, 2 * cy + dy, 2 * cz + dz);
597                                if x < fx && y < fy && z < fz && solid[fidx(x, y, z)] {
598                                    let c = fine[fidx(x, y, z)];
599                                    if n == 0 {
600                                        rep_dir = fine_dir[fidx(x, y, z)];
601                                        rep_mat = fine_mat[fidx(x, y, z)];
602                                    }
603                                    a += (c >> 24) & 0xff;
604                                    r += (c >> 16) & 0xff;
605                                    g += (c >> 8) & 0xff;
606                                    b += c & 0xff;
607                                    n += 1;
608                                }
609                            }
610                        }
611                    }
612                    if n > 0 {
613                        let avg = ((a / n) << 24) | ((r / n) << 16) | ((g / n) << 8) | (b / n);
614                        let base = ccol * owpc as usize + (cz >> 5) as usize;
615                        occupancy[base] |= 1u32 << (cz & 31);
616                        colors.push(avg);
617                        dirs.push(rep_dir);
618                        if has_mats {
619                            materials.push(rep_mat);
620                        }
621                    }
622                }
623            }
624        }
625        color_offsets[cols] = colors.len() as u32;
626
627        SpriteModel {
628            dims: [nx, ny, nz],
629            occ_words_per_col: owpc,
630            pivot: [
631                self.pivot[0] * 0.5,
632                self.pivot[1] * 0.5,
633                self.pivot[2] * 0.5,
634            ],
635            occupancy,
636            colors,
637            dirs,
638            color_offsets,
639            materials,
640            voxel_world_size: self.voxel_world_size * 2.0,
641        }
642    }
643}
644
645/// View frustum for CPU instance culling, in world space. Built each
646/// frame from the world camera. `half_w`/`half_h` are the tangents of
647/// the half-FOV (so the side planes are `|x| <= half_w * z` etc. in
648/// camera space).
649#[derive(Clone, Copy, Debug)]
650pub struct ViewFrustum {
651    pub pos: [f32; 3],
652    pub right: [f32; 3],
653    pub down: [f32; 3],
654    pub forward: [f32; 3],
655    pub half_w: f32,
656    pub half_h: f32,
657    pub far: f32,
658}
659
660/// CPU cull record: the GPU instance + its world bounding sphere.
661/// Not `Copy` — carries a boxed 256-entry `kv6colmul` table.
662#[derive(Clone)]
663struct CullInstance {
664    /// Instance transform + a placeholder `model_id`; the cull
665    /// overwrites `model_id` with the distance-chosen LOD entry.
666    gpu: SpriteInstanceGpu,
667    /// LOD chain this instance draws (the user-facing `model_id`).
668    chain_id: u32,
669    center: [f32; 3],
670    radius: f32,
671    /// voxlap `kv6colmul[256]` — per-surface-normal colour modulation
672    /// for this instance's pose + lighting. Defaults to identity
673    /// (`0x0100` in every channel lane → unshaded) until the facade sets
674    /// it via [`SpriteRegistryResident::set_instance_colmul`]. Packed
675    /// into the `colmul` GPU buffer (in visible order) each frame.
676    colmul: Box<[u64; 256]>,
677}
678
679/// Identity `kv6colmul` table: every channel lane = `0x0100`, so the
680/// shader's `(rgb[c] << 8) * 0x0100 >> 16 == rgb[c]` — i.e. no shading.
681fn identity_colmul() -> Box<[u64; 256]> {
682    const LANE: u64 = 0x0100;
683    let w = LANE | (LANE << 16) | (LANE << 32) | (LANE << 48);
684    Box::new([w; 256])
685}
686
687fn dot3(a: [f32; 3], b: [f32; 3]) -> f32 {
688    a[0] * b[0] + a[1] * b[1] + a[2] * b[2]
689}
690
691/// Build one CPU cull record from a user [`SpriteInstance`]: pack the
692/// transform, seed the bounding sphere from the chain's finest model, and
693/// start `colmul` at identity. Shared by the full
694/// [`SpriteRegistryResident::upload`] and the incremental
695/// [`SpriteRegistryResident::append_instances`].
696fn make_cull(registry: &SpriteModelRegistry, i: &SpriteInstance) -> CullInstance {
697    CullInstance {
698        gpu: SpriteInstanceGpu {
699            inv_rot0: i.transform.inv_rot[0],
700            inv_rot1: i.transform.inv_rot[1],
701            inv_rot2: i.transform.inv_rot[2],
702            pos: i.transform.pos,
703            model_id: i.model_id, // placeholder; cull rewrites per frame
704            material: u32::from(i.material),
705            alpha_mul: f32::from(i.alpha_mul) / 255.0,
706            _pad0: 0,
707            _pad1: 0,
708        },
709        chain_id: i.model_id,
710        center: i.transform.pos,
711        radius: registry.model(i.model_id).bound_radius(),
712        colmul: identity_colmul(),
713    }
714}
715
716/// Allocate the `instances` capacity buffer (`STORAGE | COPY_DST`) sized
717/// for `cap` records (≥1). Left uninitialised — `cull_bin_upload`
718/// rewrites it (offset 0) each frame, and `append_instances` seeds the
719/// live records after a grow.
720fn instances_buffer(device: &wgpu::Device, cap: u32) -> wgpu::Buffer {
721    device.create_buffer(&wgpu::BufferDescriptor {
722        label: Some("roxlap-gpu sprite_reg.instances"),
723        size: u64::from(cap.max(1)) * std::mem::size_of::<SpriteInstanceGpu>() as u64,
724        usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
725        mapped_at_creation: false,
726    })
727}
728
729/// One sprite instance: a model reference + world pose.
730#[derive(Debug, Clone, Copy)]
731pub struct SpriteInstance {
732    pub model_id: u32,
733    pub transform: SpriteInstanceTransform,
734    /// Voxel-material id (TV stage): indexes the renderer's global material
735    /// palette for this instance's opacity + blend mode. `0` (the default)
736    /// is opaque, so an unset instance renders unchanged.
737    pub material: u8,
738    /// Per-instance alpha multiplier (TV stage), `0..=255` (`255` =
739    /// unscaled, the default).
740    pub alpha_mul: u8,
741}
742
743impl SpriteInstance {
744    /// A model reference + pose with the default opaque material
745    /// (`material = 0`, `alpha_mul = 255`).
746    #[must_use]
747    pub fn new(model_id: u32, transform: SpriteInstanceTransform) -> Self {
748        Self {
749            model_id,
750            transform,
751            material: 0,
752            alpha_mul: 255,
753        }
754    }
755}
756
757/// GPU per-model metadata: where this model's data starts in the
758/// shared registry buffers + its dims/pivot. Mirrors `ModelMeta` in
759/// the shader (std430, 48 bytes).
760#[repr(C)]
761#[derive(Clone, Copy, Pod, Zeroable, Debug)]
762struct SpriteModelMeta {
763    occupancy_offset: u32,
764    colors_offset: u32,
765    color_offsets_offset: u32,
766    occ_words_per_col: u32,
767    dims: [u32; 3],
768    /// TV.3 — 1 if this model has per-voxel materials (`materials_vox` is
769    /// populated for it); 0 ⇒ use the instance's uniform material.
770    has_vox_materials: u32,
771    pivot: [f32; 3],
772    /// GPU.10.4 — world size of one voxel of this (mip) entry.
773    voxel_world_size: f32,
774}
775
776/// GPU per-instance record. Mirrors `Instance` in the shader (std430,
777/// 80 bytes): inverse rotation columns + position + model id + the TV
778/// material id and per-instance alpha multiplier.
779#[repr(C)]
780#[derive(Clone, Copy, Pod, Zeroable, Debug)]
781struct SpriteInstanceGpu {
782    inv_rot0: [f32; 4],
783    inv_rot1: [f32; 4],
784    inv_rot2: [f32; 4],
785    pos: [f32; 3],
786    model_id: u32,
787    /// TV: material id into the global palette (binding 12).
788    material: u32,
789    /// TV: per-instance alpha multiplier, normalised to `0..=1`.
790    alpha_mul: f32,
791    _pad0: u32,
792    _pad1: u32,
793}
794
795/// Invert a 3×3 matrix given as basis columns `[c0, c1, c2]`,
796/// returning the inverse as columns. For an orthonormal basis this is
797/// the transpose; the general path covers rotation + non-unit scale.
798#[must_use]
799fn mat3_inverse(cols: [[f32; 3]; 3]) -> [[f32; 3]; 3] {
800    let [a, b, c] = cols; // columns
801                          // Determinant via scalar triple product a · (b × c).
802    let cross = |u: [f32; 3], v: [f32; 3]| {
803        [
804            u[1] * v[2] - u[2] * v[1],
805            u[2] * v[0] - u[0] * v[2],
806            u[0] * v[1] - u[1] * v[0],
807        ]
808    };
809    let bc = cross(b, c);
810    let ca = cross(c, a);
811    let ab = cross(a, b);
812    let det = a[0] * bc[0] + a[1] * bc[1] + a[2] * bc[2];
813    let inv_det = if det.abs() < 1e-12 { 0.0 } else { 1.0 / det };
814    // Inverse rows are (b×c, c×a, a×b)/det; return as columns of the
815    // inverse, i.e. transpose of those rows.
816    [
817        [bc[0] * inv_det, ca[0] * inv_det, ab[0] * inv_det],
818        [bc[1] * inv_det, ca[1] * inv_det, ab[1] * inv_det],
819        [bc[2] * inv_det, ca[2] * inv_det, ab[2] * inv_det],
820    ]
821}
822
823/// GPU-resident registry + instances: every model's occupancy /
824/// colours / offsets concatenated into shared storage buffers, a
825/// per-model metadata table, and a capacity-sized instance buffer
826/// rewritten each frame with the frustum-visible subset (GPU.10.2).
827/// One bind group serves all models (same approach as the multi-grid
828/// scene).
829pub struct SpriteRegistryResident {
830    pub occupancy: wgpu::Buffer,
831    pub colors: wgpu::Buffer,
832    /// Per-voxel surface-normal index, concatenated across models in the
833    /// same layout as [`colors`](Self::colors). The shader indexes the
834    /// per-instance `kv6colmul` table by it.
835    pub dirs: wgpu::Buffer,
836    /// Per-voxel material id (TV.3), same layout as [`colors`](Self::colors)
837    /// (one u32 per voxel). `0` for models without per-voxel materials; the
838    /// per-model `has_vox_materials` flag in `model_meta` says whether to use
839    /// it (else the shader falls back to the instance's uniform material).
840    pub materials_vox: wgpu::Buffer,
841    pub color_offsets: wgpu::Buffer,
842    pub model_meta: wgpu::Buffer,
843    /// Holds up to `instance_capacity` instances; the visible subset
844    /// is packed into `[0, count)` each frame by [`Self::cull_bin_upload`].
845    pub instances: wgpu::Buffer,
846    pub instance_capacity: u32,
847    /// Per-visible-instance `kv6colmul[256]` tables, packed in the same
848    /// order as the `instances` buffer each frame (two u32 per u64
849    /// entry: lanes 0|1 then 2|3). Sized `instance_capacity * 256 * 2`
850    /// u32; rewritten by [`Self::cull_bin_upload`].
851    pub colmul: wgpu::Buffer,
852    colmul_cap: u32,
853    /// GPU.10.3 — per-tile `(offset, count)` into `tile_instances`,
854    /// flat `2 * tiles_x * tiles_y` u32s. Grown to fit the screen.
855    pub tile_ranges: wgpu::Buffer,
856    tile_ranges_cap: u32,
857    /// GPU.10.3 — flat list of visible-instance indices grouped by
858    /// tile. Grown to fit the per-frame total.
859    pub tile_instances: wgpu::Buffer,
860    tile_instances_cap: u32,
861    /// CPU cull records (full set), with precomputed bounding spheres.
862    cull: Vec<CullInstance>,
863    /// GPU.10.4 — LOD chains: `chains[chain_id]` = entry ids, finest
864    /// first. The cull picks a level by distance and writes its entry
865    /// id into the packed instance's `model_id`.
866    chains: Vec<Vec<u32>>,
867    /// GPU.12 incremental — CPU mirror of the GPU `model_meta` table, one
868    /// per concrete entry. [`Self::update_model`] reads the fixed
869    /// occupancy/color_offsets bases from here and rewrites the changed
870    /// `colors_offset` on a relocation.
871    meta: Vec<SpriteModelMeta>,
872    /// GPU.12 incremental — per-entry placement of `colors`/`dirs` in the
873    /// shared buffers (drives both; same offsets/ranks). Lets an edit
874    /// re-upload one model's data without touching the others.
875    colors_alloc: ColorsAllocator,
876    /// Per-entry word length of the dims-fixed `occupancy` and
877    /// `color_offsets` arrays, kept so [`Self::update_model`] can assert a
878    /// carve never changed dims (which would invalidate the in-place
879    /// writes — growing dims is out of scope, handled by a full re-upload).
880    occ_lens: Vec<u32>,
881    coloff_lens: Vec<u32>,
882    /// Used / allocated words of the tightly-concatenated `occupancy`
883    /// buffer. `add_model` bump-appends at `occ_used`; when it would pass
884    /// `occ_cap` the buffer is grown (with slack) and rebuilt from the
885    /// registry. (`colors`/`dirs` track theirs in [`ColorsAllocator`].)
886    occ_used: u32,
887    occ_cap: u32,
888    /// Used / allocated words of the tightly-concatenated `color_offsets`
889    /// buffer — same growth scheme as `occ_*`.
890    coloff_used: u32,
891    coloff_cap: u32,
892    /// Allocated record count of the `model_meta` buffer; `add_model`
893    /// grows it (with slack) when the entry count passes it.
894    meta_cap: u32,
895    /// Per-entry tombstone: `true` once its model was removed
896    /// ([`Self::remove_model`]). Dead entries keep their `meta` slot (so
897    /// entry ids — and the caller's `chain_id`s — stay stable) but their
898    /// colours are freed for reuse and they contribute nothing to a
899    /// repack / [`Self::compact`]. Parallel to `meta`.
900    dead: Vec<bool>,
901}
902
903/// Which tightly-concatenated registry buffer [`SpriteRegistryResident::
904/// sync_concat`] is operating on.
905#[derive(Clone, Copy)]
906enum ConcatBuf {
907    Occupancy,
908    ColorOffsets,
909}
910
911/// The model's source array for a given [`ConcatBuf`] — a free fn (not a
912/// closure) so the returned borrow keeps `m`'s lifetime.
913fn concat_data(m: &SpriteModel, which: ConcatBuf) -> &[u32] {
914    match which {
915        ConcatBuf::Occupancy => &m.occupancy,
916        ConcatBuf::ColorOffsets => &m.color_offsets,
917    }
918}
919
920impl SpriteRegistryResident {
921    /// Concatenate `registry`'s models into shared buffers and prepare
922    /// `instances` for per-frame culling. Model-relative indices stay
923    /// as built; the shader adds each model's base offset from the
924    /// metadata table.
925    #[must_use]
926    pub fn upload(
927        device: &wgpu::Device,
928        registry: &SpriteModelRegistry,
929        instances: &[SpriteInstance],
930    ) -> Self {
931        // `occupancy` + `color_offsets` are dims-fixed → tightly
932        // concatenated (never grow on a carve). `colors` + `dirs` are
933        // variable → laid out by the suballocator with per-slot slack so
934        // an incremental edit can rewrite one model in place.
935        let entry_lens: Vec<u32> = registry
936            .entries
937            .iter()
938            .map(|m| m.colors.len() as u32)
939            .collect();
940        let colors_alloc = ColorsAllocator::new(&entry_lens);
941        let cap_total = colors_alloc.cap_total();
942
943        let mut all_occ: Vec<u32> = Vec::new();
944        let mut all_offsets: Vec<u32> = Vec::new();
945        let mut all_colors: Vec<u32> = vec![0; cap_total as usize];
946        let mut all_dirs: Vec<u32> = vec![0; cap_total as usize];
947        let mut all_materials: Vec<u32> = vec![0; cap_total as usize];
948        let mut meta: Vec<SpriteModelMeta> = Vec::with_capacity(registry.entries.len());
949        let mut occ_lens: Vec<u32> = Vec::with_capacity(registry.entries.len());
950        let mut coloff_lens: Vec<u32> = Vec::with_capacity(registry.entries.len());
951
952        // One meta + placed data per concrete (mip-level) entry.
953        for (e, m) in registry.entries.iter().enumerate() {
954            let slot = colors_alloc.slot(e);
955            meta.push(SpriteModelMeta {
956                occupancy_offset: all_occ.len() as u32,
957                colors_offset: slot.off,
958                color_offsets_offset: all_offsets.len() as u32,
959                occ_words_per_col: m.occ_words_per_col,
960                dims: m.dims,
961                has_vox_materials: u32::from(!m.materials.is_empty()),
962                pivot: m.pivot,
963                voxel_world_size: m.voxel_world_size,
964            });
965            occ_lens.push(m.occupancy.len() as u32);
966            coloff_lens.push(m.color_offsets.len() as u32);
967            all_occ.extend_from_slice(&m.occupancy);
968            all_offsets.extend_from_slice(&m.color_offsets);
969            let off = slot.off as usize;
970            all_colors[off..off + m.colors.len()].copy_from_slice(&m.colors);
971            all_dirs[off..off + m.dirs.len()].copy_from_slice(&m.dirs);
972            for (i, &mat) in m.materials.iter().enumerate() {
973                all_materials[off + i] = u32::from(mat);
974            }
975        }
976
977        // Per-instance cull records: sphere centred at the instance
978        // position, radius from the chain's finest (mip-0) model.
979        // `colmul` starts at identity (unshaded) until the facade sets
980        // per-instance lighting via `set_instance_colmul`.
981        let cull: Vec<CullInstance> = instances.iter().map(|i| make_cull(registry, i)).collect();
982
983        // Capacity buffer (COPY_DST so cull can rewrite it each frame),
984        // seeded with the full set so frame 0 is valid pre-cull.
985        let seed: Vec<SpriteInstanceGpu> = cull.iter().map(|c| c.gpu).collect();
986        let instances_buf = {
987            use wgpu::util::DeviceExt;
988            let one = [SpriteInstanceGpu::zeroed()];
989            let src: &[SpriteInstanceGpu] = if seed.is_empty() { &one } else { &seed };
990            device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
991                label: Some("roxlap-gpu sprite_reg.instances"),
992                contents: bytemuck::cast_slice(src),
993                usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
994            })
995        };
996
997        let tile_ranges = storage_dst_u32(device, "roxlap-gpu sprite_reg.tile_ranges", 1);
998        let tile_instances = storage_dst_u32(device, "roxlap-gpu sprite_reg.tile_instances", 1);
999        // colmul: 256 entries × 2 u32 per visible instance. Sized to the
1000        // full instance set (worst case all visible); rewritten per frame.
1001        let colmul_cap = (cull.len() as u32).max(1) * 256 * 2;
1002        let colmul = storage_dst_u32(device, "roxlap-gpu sprite_reg.colmul", colmul_cap);
1003        Self {
1004            occupancy: storage_dst_u32_cap(
1005                device,
1006                "roxlap-gpu sprite_reg.occupancy",
1007                &all_occ,
1008                all_occ.len() as u32,
1009            ),
1010            colors: storage_dst_u32_cap(
1011                device,
1012                "roxlap-gpu sprite_reg.colors",
1013                &all_colors,
1014                cap_total,
1015            ),
1016            dirs: storage_dst_u32_cap(device, "roxlap-gpu sprite_reg.dirs", &all_dirs, cap_total),
1017            materials_vox: storage_dst_u32_cap(
1018                device,
1019                "roxlap-gpu sprite_reg.materials_vox",
1020                &all_materials,
1021                cap_total,
1022            ),
1023            color_offsets: storage_dst_u32_cap(
1024                device,
1025                "roxlap-gpu sprite_reg.color_offsets",
1026                &all_offsets,
1027                all_offsets.len() as u32,
1028            ),
1029            model_meta: storage_dst_pod(device, "roxlap-gpu sprite_reg.model_meta", &meta),
1030            instances: instances_buf,
1031            instance_capacity: cull.len() as u32,
1032            colmul,
1033            colmul_cap,
1034            tile_ranges,
1035            tile_ranges_cap: 1,
1036            tile_instances,
1037            tile_instances_cap: 1,
1038            cull,
1039            chains: registry.chains.clone(),
1040            occ_used: all_occ.len() as u32,
1041            occ_cap: all_occ.len() as u32,
1042            coloff_used: all_offsets.len() as u32,
1043            coloff_cap: all_offsets.len() as u32,
1044            meta_cap: meta.len() as u32,
1045            dead: vec![false; meta.len()],
1046            meta,
1047            colors_alloc,
1048            occ_lens,
1049            coloff_lens,
1050        }
1051    }
1052
1053    /// Number of resident instances (the cull set length).
1054    #[must_use]
1055    pub fn instance_count(&self) -> usize {
1056        self.cull.len()
1057    }
1058
1059    /// Append new instances **without** re-uploading any model volume —
1060    /// the incremental counterpart to [`Self::upload`], for streaming
1061    /// spawns (asteroids, projectiles, …). Returns the index of the first
1062    /// appended instance; the block occupies `[base, base + N)`.
1063    ///
1064    /// The model volumes are untouched, so every appended instance must
1065    /// reference a `model_id` (LOD chain) that was already present in the
1066    /// `registry` passed to [`Self::upload`]. Registering a *new* model
1067    /// still requires a full [`Self::upload`] (its voxels must be laid
1068    /// into the shared buffers). `registry` here is only read for the new
1069    /// instances' bound-sphere radii and must be the resident one.
1070    ///
1071    /// The `instances` GPU buffer is only *grown* here (power-of-two,
1072    /// amortised O(1)); its contents are **not** written. [`Self::
1073    /// cull_bin_upload`] rewrites the whole visible range from `cull` every
1074    /// frame before the sprite pass reads it — exactly as for the static
1075    /// instances — so appending only needs to extend `cull` and ensure
1076    /// capacity. Writing the buffer here too caused a mid-frame
1077    /// write-while-in-flight hazard on some drivers (a stray full-screen
1078    /// flash on append). `colmul` likewise grows lazily in
1079    /// `cull_bin_upload`. After a removal the capacity is not shrunk.
1080    pub fn append_instances(
1081        &mut self,
1082        device: &wgpu::Device,
1083        registry: &SpriteModelRegistry,
1084        instances: &[SpriteInstance],
1085    ) -> u32 {
1086        let base = self.cull.len() as u32;
1087        if instances.is_empty() {
1088            return base;
1089        }
1090        for i in instances {
1091            debug_assert!(
1092                (i.model_id as usize) < self.chains.len(),
1093                "append_instances: model_id {} not resident (run upload to register new models)",
1094                i.model_id
1095            );
1096            self.cull.push(make_cull(registry, i));
1097        }
1098        let need = self.cull.len() as u32;
1099        if need > self.instance_capacity {
1100            // Grow power-of-two and recreate the buffer (the next frame's
1101            // bind group picks up the new handle). No seed write — the
1102            // per-frame cull_bin_upload populates it.
1103            self.instance_capacity = need.next_power_of_two();
1104            self.instances = instances_buffer(device, self.instance_capacity);
1105        }
1106        base
1107    }
1108
1109    /// Remove the instance at `index` by swap-remove — O(1), no GPU work
1110    /// (the next [`Self::cull_bin_upload`] repacks the visible set from
1111    /// the shrunk cull list). Capacity is retained for reuse.
1112    ///
1113    /// Returns `Some(old_last)` when a different instance was moved into
1114    /// `index` to fill the hole (its index changed from `old_last` to
1115    /// `index` — callers holding instance handles must fix up that one),
1116    /// or `None` if `index` was the last element or out of range. Because
1117    /// this reorders, any [`Self::set_instance_colmul`] table set by
1118    /// position should be re-applied after a removal.
1119    pub fn remove_instance(&mut self, index: usize) -> Option<usize> {
1120        if index >= self.cull.len() {
1121            return None;
1122        }
1123        let last = self.cull.len() - 1;
1124        self.cull.swap_remove(index);
1125        (index != last).then_some(last)
1126    }
1127
1128    /// Set the per-instance `kv6colmul[256]` lighting tables (voxlap's
1129    /// `update_reflects` output), in the same order/length as the
1130    /// instances passed to [`Self::upload`]. The next
1131    /// [`Self::cull_bin_upload`] packs the visible subset to the GPU.
1132    /// Instances beyond `tables.len()` keep their previous tables.
1133    pub fn set_instance_colmul(&mut self, tables: &[[u64; 256]]) {
1134        for (ci, t) in self.cull.iter_mut().zip(tables) {
1135            ci.colmul.copy_from_slice(t);
1136        }
1137    }
1138
1139    /// Refresh instance poses in place from `instances` — for animated
1140    /// sprites (e.g. KFA limbs re-posed each frame) — **without** any
1141    /// model-volume re-upload. `instances` must match the set passed to
1142    /// [`Self::upload`] in length + order; each keeps its `model_id`
1143    /// (LOD chain) so only the transform + cull centre change. No GPU
1144    /// write happens here: the next [`Self::cull_bin_upload`] re-uploads
1145    /// the packed visible subset, as it already does every frame.
1146    pub fn update_transforms(&mut self, instances: &[SpriteInstance]) {
1147        debug_assert_eq!(
1148            instances.len(),
1149            self.cull.len(),
1150            "update_transforms instance count must match upload"
1151        );
1152        for (ci, inst) in self.cull.iter_mut().zip(instances) {
1153            ci.gpu.inv_rot0 = inst.transform.inv_rot[0];
1154            ci.gpu.inv_rot1 = inst.transform.inv_rot[1];
1155            ci.gpu.inv_rot2 = inst.transform.inv_rot[2];
1156            ci.gpu.pos = inst.transform.pos;
1157            // TV: material id + alpha multiplier ride the same coalesced
1158            // update as the pose (set via the facade's per-instance setters).
1159            ci.gpu.material = u32::from(inst.material);
1160            ci.gpu.alpha_mul = f32::from(inst.alpha_mul) / 255.0;
1161            // Bounding sphere follows the pivot; radius/chain unchanged.
1162            ci.center = inst.transform.pos;
1163        }
1164    }
1165
1166    /// Repoint instance `idx` at a different LOD chain — the per-frame
1167    /// **flipbook** step for animated voxel clips (VCL.2). The instance's
1168    /// transform / colmul are untouched; only which model's volume it
1169    /// draws changes. The new chain's volume must already be resident
1170    /// (uploaded via [`Self::add_model`] / [`Self::upload`]); `registry`
1171    /// is the one those uploads used (so the bounding radius reseeds from
1172    /// the new model). Like [`Self::update_transforms`], this is a CPU-side
1173    /// rewrite — the next [`Self::cull_bin_upload`] re-uploads the packed
1174    /// visible subset, so it costs nothing extra on the GPU. No-op if `idx`
1175    /// is out of range.
1176    ///
1177    /// All frames of a clip share the same `dims`, so a flipbook swap
1178    /// leaves the bounding radius unchanged; reseeding it anyway keeps the
1179    /// method correct for arbitrary chain swaps.
1180    pub fn set_instance_model(
1181        &mut self,
1182        registry: &SpriteModelRegistry,
1183        idx: usize,
1184        chain_id: u32,
1185    ) {
1186        // Guard `chain_id` (the `cull.get_mut` below only covers `idx`): a
1187        // public caller could pass an out-of-range / tombstoned chain, which
1188        // `registry.model` would index-panic on.
1189        let Some(radius) = registry
1190            .model_checked(chain_id)
1191            .map(SpriteModel::bound_radius)
1192        else {
1193            return;
1194        };
1195        let Some(ci) = self.cull.get_mut(idx) else {
1196            return;
1197        };
1198        ci.chain_id = chain_id;
1199        ci.gpu.model_id = chain_id; // placeholder; cull rewrites to the LOD entry
1200        ci.radius = radius;
1201    }
1202
1203    /// GPU.12 incremental — re-upload only the entries of LOD chain
1204    /// `chain_id` after an in-place edit (carve / recolour) of its model,
1205    /// **without** rebuilding the whole registry. `registry` must be the
1206    /// same registry uploaded (same entry ids), with chain `chain_id`'s
1207    /// entries already edited (`model_mut` + `rebuild_lod`).
1208    ///
1209    /// For each entry: occupancy + color_offsets are dims-fixed, so they
1210    /// are written in place; colors + dirs (variable, parallel) go through
1211    /// the suballocator — written in place when they fit the slack,
1212    /// relocated (with a `model_meta` rewrite) when they outgrow it, and
1213    /// only when the buffer tail overflows are colors/dirs grown + the
1214    /// whole registry repacked. Instances / cull / colmul are untouched
1215    /// (a carve never moves an instance or grows its bounds) — that is the
1216    /// win over [`Self::upload`].
1217    ///
1218    /// # Panics (debug)
1219    /// If an entry's dims changed (occupancy / color_offsets length), which
1220    /// the in-place path can't absorb — growing dims needs a full
1221    /// re-upload via [`Self::upload`].
1222    pub fn update_model(
1223        &mut self,
1224        device: &wgpu::Device,
1225        queue: &wgpu::Queue,
1226        registry: &SpriteModelRegistry,
1227        chain_id: u32,
1228    ) {
1229        let entries = self.chains[chain_id as usize].clone();
1230        let mut grew = false;
1231        for &e in &entries {
1232            let e = e as usize;
1233            let m = &registry.entries[e];
1234
1235            // Dims-fixed arrays: assert unchanged, then write in place.
1236            debug_assert_eq!(
1237                m.occupancy.len() as u32,
1238                self.occ_lens[e],
1239                "update_model: entry {e} occupancy length changed (dims grew?)"
1240            );
1241            debug_assert_eq!(
1242                m.color_offsets.len() as u32,
1243                self.coloff_lens[e],
1244                "update_model: entry {e} color_offsets length changed (dims grew?)"
1245            );
1246            queue.write_buffer(
1247                &self.occupancy,
1248                u64::from(self.meta[e].occupancy_offset) * 4,
1249                bytemuck::cast_slice(&m.occupancy),
1250            );
1251            queue.write_buffer(
1252                &self.color_offsets,
1253                u64::from(self.meta[e].color_offsets_offset) * 4,
1254                bytemuck::cast_slice(&m.color_offsets),
1255            );
1256
1257            // Variable colors/dirs via the suballocator.
1258            let new_len = m.colors.len() as u32;
1259            match self.colors_alloc.place(e, new_len) {
1260                Some(off) => {
1261                    queue.write_buffer(
1262                        &self.colors,
1263                        u64::from(off) * 4,
1264                        bytemuck::cast_slice(&m.colors),
1265                    );
1266                    queue.write_buffer(
1267                        &self.dirs,
1268                        u64::from(off) * 4,
1269                        bytemuck::cast_slice(&m.dirs),
1270                    );
1271                    let mats: Vec<u32> = m.materials.iter().map(|&x| u32::from(x)).collect();
1272                    queue.write_buffer(
1273                        &self.materials_vox,
1274                        u64::from(off) * 4,
1275                        bytemuck::cast_slice(&mats),
1276                    );
1277                    if self.meta[e].colors_offset != off {
1278                        // Relocated — rewrite this entry's meta record.
1279                        self.meta[e].colors_offset = off;
1280                        queue.write_buffer(
1281                            &self.model_meta,
1282                            (e * std::mem::size_of::<SpriteModelMeta>()) as u64,
1283                            bytemuck::bytes_of(&self.meta[e]),
1284                        );
1285                    }
1286                }
1287                None => grew = true,
1288            }
1289        }
1290
1291        // Buffer overflow on at least one entry → grow colors/dirs and
1292        // repack the WHOLE registry (rare; offsets for every entry move).
1293        if grew {
1294            self.grow_and_repack(device, queue, registry);
1295        }
1296    }
1297
1298    /// Grow the `colors`/`dirs` buffers and repack every entry compactly
1299    /// (with fresh slack) when an [`Self::update_model`] edit overflowed
1300    /// the buffer tail. Recreates both buffers (the next frame's bind
1301    /// group picks up the new handles) and rewrites every `model_meta`
1302    /// `colors_offset`. O(registry) but rare — logged so a growth burst
1303    /// is visible.
1304    fn grow_and_repack(
1305        &mut self,
1306        device: &wgpu::Device,
1307        queue: &wgpu::Queue,
1308        registry: &SpriteModelRegistry,
1309    ) {
1310        self.repack_colors_dirs(device, registry);
1311        // Every entry's colors_offset moved → rewrite the whole meta table.
1312        queue.write_buffer(&self.model_meta, 0, bytemuck::cast_slice(&self.meta));
1313    }
1314
1315    /// Repack `colors`/`dirs` compactly (with fresh slack) from the full
1316    /// `registry`, recreating both buffers and updating every CPU
1317    /// `meta[e].colors_offset`. Does **not** touch the GPU `model_meta`
1318    /// buffer — the caller writes it ([`Self::grow_and_repack`] writes the
1319    /// whole table; [`Self::add_model`] writes it once after all entries
1320    /// are placed). O(registry) but rare — logged so a growth burst is
1321    /// visible.
1322    fn repack_colors_dirs(&mut self, device: &wgpu::Device, registry: &SpriteModelRegistry) {
1323        // Dead (removed) entries collapse to 0 length so they reclaim no
1324        // space; live entries keep their colours.
1325        let new_lens: Vec<u32> = registry
1326            .entries
1327            .iter()
1328            .enumerate()
1329            .map(|(e, m)| {
1330                if self.dead[e] {
1331                    0
1332                } else {
1333                    m.colors.len() as u32
1334                }
1335            })
1336            .collect();
1337        self.colors_alloc.repack(&new_lens);
1338        let cap_total = self.colors_alloc.cap_total();
1339
1340        let mut all_colors = vec![0u32; cap_total as usize];
1341        let mut all_dirs = vec![0u32; cap_total as usize];
1342        let mut all_materials = vec![0u32; cap_total as usize];
1343        for (e, m) in registry.entries.iter().enumerate() {
1344            if self.dead[e] {
1345                self.meta[e].colors_offset = 0;
1346                continue;
1347            }
1348            let off = self.colors_alloc.slot(e).off as usize;
1349            all_colors[off..off + m.colors.len()].copy_from_slice(&m.colors);
1350            all_dirs[off..off + m.dirs.len()].copy_from_slice(&m.dirs);
1351            for (i, &mat) in m.materials.iter().enumerate() {
1352                all_materials[off + i] = u32::from(mat);
1353            }
1354            self.meta[e].colors_offset = off as u32;
1355        }
1356        self.colors = storage_dst_u32_cap(
1357            device,
1358            "roxlap-gpu sprite_reg.colors",
1359            &all_colors,
1360            cap_total,
1361        );
1362        self.dirs = storage_dst_u32_cap(device, "roxlap-gpu sprite_reg.dirs", &all_dirs, cap_total);
1363        self.materials_vox = storage_dst_u32_cap(
1364            device,
1365            "roxlap-gpu sprite_reg.materials_vox",
1366            &all_materials,
1367            cap_total,
1368        );
1369        eprintln!(
1370            "roxlap-gpu: sprite registry colors/dirs/materials grew + repacked to {cap_total} words"
1371        );
1372    }
1373
1374    /// Append a new model (its full LOD chain) to the resident registry
1375    /// **without** re-uploading the existing models' volumes — the
1376    /// incremental counterpart to a full [`Self::upload`], for streaming
1377    /// in new geometry (unique asteroids, generated meshes).
1378    ///
1379    /// Contract (mirrors [`Self::update_model`]): the caller owns the
1380    /// `SpriteModelRegistry`, has just appended this chain to it (e.g. via
1381    /// [`SpriteModelRegistry::add_lod`]), and passes the resulting
1382    /// `chain_id`. The chain's entries must be the registry's newest (ids
1383    /// `>= ` the resident entry count) — entries are append-only.
1384    ///
1385    /// The large `colors`/`dirs`/`occupancy`/`color_offsets` buffers carry
1386    /// slack and bump-append the new entries in place; a buffer that
1387    /// overflows is grown (with slack) and rebuilt once from the registry
1388    /// (amortised O(1) per add). The small `model_meta` table is rewritten
1389    /// each call. After this, [`Self::append_instances`] can reference the
1390    /// new `chain_id`.
1391    pub fn add_model(
1392        &mut self,
1393        device: &wgpu::Device,
1394        queue: &wgpu::Queue,
1395        registry: &SpriteModelRegistry,
1396        chain_id: u32,
1397    ) {
1398        let entries = registry.chains[chain_id as usize].clone();
1399        debug_assert_eq!(
1400            chain_id as usize,
1401            self.chains.len(),
1402            "add_model: chains must be appended in order"
1403        );
1404
1405        // CPU bookkeeping: assign each new entry a tight occ/coloff offset
1406        // and an allocator slot for colors/dirs. `need_colors_grow` marks
1407        // a slot that didn't fit → a colors/dirs repack below.
1408        let mut need_colors_grow = false;
1409        for &e in &entries {
1410            let e = e as usize;
1411            debug_assert_eq!(
1412                e,
1413                self.meta.len(),
1414                "add_model: entries must be appended in order"
1415            );
1416            let m = &registry.entries[e];
1417            let occ_off = self.occ_used;
1418            let coloff_off = self.coloff_used;
1419            self.occ_used += m.occupancy.len() as u32;
1420            self.coloff_used += m.color_offsets.len() as u32;
1421            let colors_off = match self.colors_alloc.push(m.colors.len() as u32) {
1422                Some(off) => off,
1423                None => {
1424                    need_colors_grow = true;
1425                    0 // placeholder; repack assigns the real offset
1426                }
1427            };
1428            self.meta.push(SpriteModelMeta {
1429                occupancy_offset: occ_off,
1430                colors_offset: colors_off,
1431                color_offsets_offset: coloff_off,
1432                occ_words_per_col: m.occ_words_per_col,
1433                dims: m.dims,
1434                has_vox_materials: u32::from(!m.materials.is_empty()),
1435                pivot: m.pivot,
1436                voxel_world_size: m.voxel_world_size,
1437            });
1438            self.occ_lens.push(m.occupancy.len() as u32);
1439            self.coloff_lens.push(m.color_offsets.len() as u32);
1440            self.dead.push(false);
1441        }
1442        self.chains.push(entries.clone());
1443
1444        // occupancy + color_offsets: grow+rebuild on overflow, else write
1445        // the new tails in place.
1446        self.sync_concat(device, queue, registry, &entries, ConcatBuf::Occupancy);
1447        self.sync_concat(device, queue, registry, &entries, ConcatBuf::ColorOffsets);
1448
1449        // colors/dirs: repack on overflow (rebuilds both + every CPU
1450        // colors_offset), else write the new entries at their slots.
1451        if need_colors_grow {
1452            self.repack_colors_dirs(device, registry);
1453        } else {
1454            for &e in &entries {
1455                let e = e as usize;
1456                let m = &registry.entries[e];
1457                let off = u64::from(self.meta[e].colors_offset) * 4;
1458                queue.write_buffer(&self.colors, off, bytemuck::cast_slice(&m.colors));
1459                queue.write_buffer(&self.dirs, off, bytemuck::cast_slice(&m.dirs));
1460                let mats: Vec<u32> = m.materials.iter().map(|&x| u32::from(x)).collect();
1461                queue.write_buffer(&self.materials_vox, off, bytemuck::cast_slice(&mats));
1462            }
1463        }
1464
1465        // model_meta: grow the record buffer if needed, then rewrite the
1466        // whole (small) table — covers both new records and any
1467        // colors_offset relocations from a repack.
1468        let count = self.meta.len() as u32;
1469        if count > self.meta_cap {
1470            self.meta_cap = grow_records(count);
1471            self.model_meta = storage_dst_pod_cap(
1472                device,
1473                "roxlap-gpu sprite_reg.model_meta",
1474                &self.meta,
1475                self.meta_cap,
1476            );
1477        } else {
1478            queue.write_buffer(&self.model_meta, 0, bytemuck::cast_slice(&self.meta));
1479        }
1480    }
1481
1482    /// Sync one tightly-concatenated buffer (`occupancy` or
1483    /// `color_offsets`) after `add_model` appended `new_entries`: if the
1484    /// used length now exceeds capacity, grow (with slack) and rebuild the
1485    /// whole buffer from the registry; otherwise write just the appended
1486    /// tails at their offsets.
1487    fn sync_concat(
1488        &mut self,
1489        device: &wgpu::Device,
1490        queue: &wgpu::Queue,
1491        registry: &SpriteModelRegistry,
1492        new_entries: &[u32],
1493        which: ConcatBuf,
1494    ) {
1495        let (used, cap) = match which {
1496            ConcatBuf::Occupancy => (self.occ_used, self.occ_cap),
1497            ConcatBuf::ColorOffsets => (self.coloff_used, self.coloff_cap),
1498        };
1499        if used > cap {
1500            let new_cap = grow_words(used);
1501            let all: Vec<u32> = registry
1502                .entries
1503                .iter()
1504                .flat_map(|m| concat_data(m, which).iter().copied())
1505                .collect();
1506            let label = match which {
1507                ConcatBuf::Occupancy => "roxlap-gpu sprite_reg.occupancy",
1508                ConcatBuf::ColorOffsets => "roxlap-gpu sprite_reg.color_offsets",
1509            };
1510            let buf = storage_dst_u32_cap(device, label, &all, new_cap);
1511            match which {
1512                ConcatBuf::Occupancy => {
1513                    self.occupancy = buf;
1514                    self.occ_cap = new_cap;
1515                }
1516                ConcatBuf::ColorOffsets => {
1517                    self.color_offsets = buf;
1518                    self.coloff_cap = new_cap;
1519                }
1520            }
1521        } else {
1522            let target = match which {
1523                ConcatBuf::Occupancy => &self.occupancy,
1524                ConcatBuf::ColorOffsets => &self.color_offsets,
1525            };
1526            for &e in new_entries {
1527                let e = e as usize;
1528                let off = match which {
1529                    ConcatBuf::Occupancy => self.meta[e].occupancy_offset,
1530                    ConcatBuf::ColorOffsets => self.meta[e].color_offsets_offset,
1531                };
1532                queue.write_buffer(
1533                    target,
1534                    u64::from(off) * 4,
1535                    bytemuck::cast_slice(concat_data(&registry.entries[e], which)),
1536                );
1537            }
1538        }
1539    }
1540
1541    /// Number of removed-but-not-yet-compacted models (tombstoned chains).
1542    /// A caller streams `add_model` / `remove_model` and calls
1543    /// [`Self::compact`] once this (relative to [`Self::live_model_count`])
1544    /// crosses a threshold.
1545    #[must_use]
1546    pub fn dead_model_count(&self) -> usize {
1547        self.chains.iter().filter(|c| c.is_empty()).count()
1548    }
1549
1550    /// Number of live (non-removed) models.
1551    #[must_use]
1552    pub fn live_model_count(&self) -> usize {
1553        self.chains.iter().filter(|c| !c.is_empty()).count()
1554    }
1555
1556    /// Remove a model (tombstone its LOD chain) — the counterpart to
1557    /// [`Self::add_model`]. O(chain length): marks the chain's entries
1558    /// dead and frees their `colors`/`dirs` slots for reuse by a later
1559    /// `add_model`. The `occupancy` / `color_offsets` holes are **not**
1560    /// reclaimed until [`Self::compact`]; entry ids (and the caller's other
1561    /// `chain_id`s) stay stable.
1562    ///
1563    /// Instances of the removed chain are **not** dropped here — they
1564    /// linger in the cull set but draw as nothing (skipped in
1565    /// [`Self::cull_bin_upload`]); the caller removes them via
1566    /// [`Self::remove_instance`] when convenient. A no-op if `chain_id` is
1567    /// out of range or already removed.
1568    pub fn remove_model(&mut self, chain_id: u32) {
1569        let Some(entries) = self.chains.get(chain_id as usize).cloned() else {
1570            return;
1571        };
1572        if entries.is_empty() {
1573            return; // already removed
1574        }
1575        for &e in &entries {
1576            let e = e as usize;
1577            self.dead[e] = true;
1578            self.colors_alloc.free(e);
1579        }
1580        self.chains[chain_id as usize] = Vec::new(); // tombstone
1581    }
1582
1583    /// Reclaim the holes left by [`Self::remove_model`]: rebuild the shared
1584    /// volume buffers from the live entries only, dropping every dead
1585    /// entry's data. Entry ids and `chain_id`s are preserved (dead entries
1586    /// keep a zero-length `meta` tombstone), so the caller's handles stay
1587    /// valid and no remap is needed.
1588    ///
1589    /// `registry` must be the resident one (entry ids 1:1, as for
1590    /// [`Self::add_model`] / [`Self::update_model`]). O(live volume) —
1591    /// call it when [`Self::dead_model_count`] is high, not every frame.
1592    pub fn compact(
1593        &mut self,
1594        device: &wgpu::Device,
1595        queue: &wgpu::Queue,
1596        registry: &SpriteModelRegistry,
1597    ) {
1598        // occupancy + color_offsets: re-pack live entries tightly, rewrite
1599        // each live entry's meta offset, zero the dead ones.
1600        self.compact_concat(device, registry, ConcatBuf::Occupancy);
1601        self.compact_concat(device, registry, ConcatBuf::ColorOffsets);
1602        // colors/dirs: the dead-aware repack already drops dead entries.
1603        self.repack_colors_dirs(device, registry);
1604        // model_meta: rewrite the (unchanged-length) table with the new
1605        // offsets. Buffer count didn't change, so no grow needed.
1606        queue.write_buffer(&self.model_meta, 0, bytemuck::cast_slice(&self.meta));
1607    }
1608
1609    /// Rebuild one tightly-concatenated buffer from live entries only
1610    /// (used by [`Self::compact`]): assign each live entry a fresh tight
1611    /// offset, zero dead entries' offset, and recreate the buffer with
1612    /// slack.
1613    fn compact_concat(
1614        &mut self,
1615        device: &wgpu::Device,
1616        registry: &SpriteModelRegistry,
1617        which: ConcatBuf,
1618    ) {
1619        let mut all: Vec<u32> = Vec::new();
1620        for e in 0..self.meta.len() {
1621            if self.dead[e] {
1622                match which {
1623                    ConcatBuf::Occupancy => self.meta[e].occupancy_offset = 0,
1624                    ConcatBuf::ColorOffsets => self.meta[e].color_offsets_offset = 0,
1625                }
1626                continue;
1627            }
1628            let off = all.len() as u32;
1629            match which {
1630                ConcatBuf::Occupancy => self.meta[e].occupancy_offset = off,
1631                ConcatBuf::ColorOffsets => self.meta[e].color_offsets_offset = off,
1632            }
1633            all.extend_from_slice(concat_data(&registry.entries[e], which));
1634        }
1635        let used = all.len() as u32;
1636        let cap = grow_words(used);
1637        let (label, buf) = match which {
1638            ConcatBuf::Occupancy => ("roxlap-gpu sprite_reg.occupancy", &mut self.occupancy),
1639            ConcatBuf::ColorOffsets => (
1640                "roxlap-gpu sprite_reg.color_offsets",
1641                &mut self.color_offsets,
1642            ),
1643        };
1644        *buf = storage_dst_u32_cap(device, label, &all, cap);
1645        match which {
1646            ConcatBuf::Occupancy => {
1647                self.occ_used = used;
1648                self.occ_cap = cap;
1649            }
1650            ConcatBuf::ColorOffsets => {
1651                self.coloff_used = used;
1652                self.coloff_cap = cap;
1653            }
1654        }
1655    }
1656
1657    /// GPU.10.3 — frustum-cull, pack the visible subset into the
1658    /// instance buffer, then bin those instances into screen tiles:
1659    /// project each visible bounding sphere to a screen AABB and append
1660    /// its (visible) index to every overlapped tile. Uploads the
1661    /// instance buffer + `tile_ranges` (per-tile offset/count) +
1662    /// `tile_instances` (flat grouped indices), growing the tile
1663    /// buffers as needed. Returns `(visible_count, tiles_x, tiles_y)`.
1664    #[allow(clippy::too_many_arguments)]
1665    pub fn cull_bin_upload(
1666        &mut self,
1667        device: &wgpu::Device,
1668        queue: &wgpu::Queue,
1669        f: &ViewFrustum,
1670        screen_w: u32,
1671        screen_h: u32,
1672        tile_size: u32,
1673        lod_px: f32,
1674    ) -> (u32, u32, u32) {
1675        let tiles_x = screen_w.div_ceil(tile_size).max(1);
1676        let tiles_y = screen_h.div_ceil(tile_size).max(1);
1677        let n_tiles = (tiles_x * tiles_y) as usize;
1678
1679        let nw = (1.0 + f.half_w * f.half_w).sqrt();
1680        let nh = (1.0 + f.half_h * f.half_h).sqrt();
1681        let cx = screen_w as f32 * 0.5;
1682        let cy = screen_h as f32 * 0.5;
1683        let px_per_world = cx / f.half_w; // isotropic: == cy/half_h
1684        let ts = tile_size as f32;
1685        let tx_max = tiles_x as i32 - 1;
1686        let ty_max = tiles_y as i32 - 1;
1687
1688        let mut visible: Vec<SpriteInstanceGpu> = Vec::with_capacity(self.cull.len());
1689        // Per-visible tile AABB (tx0, tx1, ty0, ty1) for the bin pass.
1690        let mut boxes: Vec<[i32; 4]> = Vec::with_capacity(self.cull.len());
1691        // Per-visible kv6colmul tables, flattened to two u32 per u64
1692        // entry (lanes 0|1, then 2|3), packed in visible order so the
1693        // shader indexes `colmul[inst_idx*512 + dir*2 + {0,1}]`.
1694        let mut visible_colmul: Vec<u32> = Vec::with_capacity(self.cull.len() * 512);
1695        let mut counts = vec![0u32; n_tiles];
1696
1697        for ci in &self.cull {
1698            // Skip instances of a removed model (tombstoned chain) — they
1699            // linger in `cull` until the caller drops them, but draw as
1700            // nothing.
1701            if self.chains[ci.chain_id as usize].is_empty() {
1702                continue;
1703            }
1704            let rel = [
1705                ci.center[0] - f.pos[0],
1706                ci.center[1] - f.pos[1],
1707                ci.center[2] - f.pos[2],
1708            ];
1709            let z = dot3(rel, f.forward);
1710            let r = ci.radius;
1711            if z + r < 0.0 || z - r > f.far {
1712                continue; // behind / beyond far
1713            }
1714            let x = dot3(rel, f.right);
1715            if (x - f.half_w * z) > r * nw || (-x - f.half_w * z) > r * nw {
1716                continue; // right / left
1717            }
1718            let y = dot3(rel, f.down);
1719            if (y - f.half_h * z) > r * nh || (-y - f.half_h * z) > r * nh {
1720                continue; // bottom / top
1721            }
1722
1723            // Visible: project the sphere to a screen AABB → tile range.
1724            let (tx0, tx1, ty0, ty1) = if z > 1e-3 {
1725                let sx = cx + (x / z) * px_per_world;
1726                let sy = cy + (y / z) * px_per_world;
1727                let sr = (r / z) * px_per_world;
1728                (
1729                    (((sx - sr) / ts).floor() as i32).clamp(0, tx_max),
1730                    (((sx + sr) / ts).floor() as i32).clamp(0, tx_max),
1731                    (((sy - sr) / ts).floor() as i32).clamp(0, ty_max),
1732                    (((sy + sr) / ts).floor() as i32).clamp(0, ty_max),
1733                )
1734            } else {
1735                (0, tx_max, 0, ty_max)
1736            };
1737            // GPU.10.4 — pick the LOD level by projected voxel size:
1738            // choose the coarsest level whose voxel still covers at
1739            // least `lod_px` screen pixels, i.e. step up once a mip-0
1740            // voxel would be smaller than that. `lod_px = 1` is the
1741            // natural "don't go sub-pixel" threshold; larger values
1742            // force LOD in closer (tuning/inspection).
1743            let chain = &self.chains[ci.chain_id as usize];
1744            let level = if z > 1e-3 && chain.len() > 1 {
1745                let voxel_px = px_per_world / z; // mip-0 voxel screen size
1746                ((lod_px / voxel_px).log2().ceil().max(0.0) as usize).min(chain.len() - 1)
1747            } else {
1748                0
1749            };
1750            let mut g = ci.gpu;
1751            g.model_id = chain[level];
1752            visible.push(g);
1753            boxes.push([tx0, tx1, ty0, ty1]);
1754            for &w in ci.colmul.iter() {
1755                visible_colmul.push((w & 0xffff_ffff) as u32);
1756                visible_colmul.push((w >> 32) as u32);
1757            }
1758            for ty in ty0..=ty1 {
1759                for tx in tx0..=tx1 {
1760                    counts[(ty * tiles_x as i32 + tx) as usize] += 1;
1761                }
1762            }
1763        }
1764
1765        if visible.is_empty() {
1766            return (0, tiles_x, tiles_y);
1767        }
1768
1769        // Prefix-sum counts → per-tile offsets; build the flat grouped
1770        // index list.
1771        let mut tile_ranges = vec![0u32; n_tiles * 2];
1772        let mut running = 0u32;
1773        for t in 0..n_tiles {
1774            tile_ranges[2 * t] = running; // offset
1775            tile_ranges[2 * t + 1] = counts[t]; // count
1776            running += counts[t];
1777        }
1778        let total = running as usize;
1779        let mut tile_instances = vec![0u32; total.max(1)];
1780        let mut cursor: Vec<u32> = (0..n_tiles).map(|t| tile_ranges[2 * t]).collect();
1781        for (vis_idx, b) in boxes.iter().enumerate() {
1782            for ty in b[2]..=b[3] {
1783                for tx in b[0]..=b[1] {
1784                    let t = (ty * tiles_x as i32 + tx) as usize;
1785                    tile_instances[cursor[t] as usize] = vis_idx as u32;
1786                    cursor[t] += 1;
1787                }
1788            }
1789        }
1790
1791        // Upload: instances + (grown) tile buffers. Grow a tile buffer
1792        // only when this frame needs more than its capacity (wgpu has
1793        // no Clone on Buffer, so we replace the field in place).
1794        queue.write_buffer(&self.instances, 0, bytemuck::cast_slice(&visible));
1795        let need_ranges = tile_ranges.len() as u32;
1796        if need_ranges > self.tile_ranges_cap {
1797            self.tile_ranges_cap = need_ranges.next_power_of_two();
1798            self.tile_ranges = storage_dst_u32(
1799                device,
1800                "roxlap-gpu sprite_reg.tile_ranges",
1801                self.tile_ranges_cap,
1802            );
1803        }
1804        let need_inst = tile_instances.len() as u32;
1805        if need_inst > self.tile_instances_cap {
1806            self.tile_instances_cap = need_inst.next_power_of_two();
1807            self.tile_instances = storage_dst_u32(
1808                device,
1809                "roxlap-gpu sprite_reg.tile_instances",
1810                self.tile_instances_cap,
1811            );
1812        }
1813        queue.write_buffer(&self.tile_ranges, 0, bytemuck::cast_slice(&tile_ranges));
1814        queue.write_buffer(
1815            &self.tile_instances,
1816            0,
1817            bytemuck::cast_slice(&tile_instances),
1818        );
1819        let need_colmul = visible_colmul.len() as u32;
1820        if need_colmul > self.colmul_cap {
1821            self.colmul_cap = need_colmul.next_power_of_two();
1822            self.colmul = storage_dst_u32(device, "roxlap-gpu sprite_reg.colmul", self.colmul_cap);
1823        }
1824        queue.write_buffer(&self.colmul, 0, bytemuck::cast_slice(&visible_colmul));
1825
1826        (visible.len() as u32, tiles_x, tiles_y)
1827    }
1828}
1829
1830/// GPU.12 incremental — per-entry placement of one model's `colors`
1831/// (and the parallel `dirs`) within the shared registry buffers: a
1832/// `[off, off+cap)` word window holding `len` live words. `cap >= len`
1833/// gives slack so a carve that *grows* the surface-voxel count can be
1834/// rewritten in place without relocating.
1835#[derive(Clone, Copy, Debug, PartialEq, Eq)]
1836struct ColorSlot {
1837    off: u32,
1838    cap: u32,
1839    len: u32,
1840}
1841
1842/// First-fit suballocator over the parallel `colors`/`dirs` buffers
1843/// (same offsets/ranks → one allocator drives both). Each registry
1844/// entry owns a [`ColorSlot`]; growth past a slot's `cap` relocates it
1845/// (freeing the old block) via the free list or a bump tail, and only
1846/// when the tail would exceed `cap_total` does the caller grow + repack
1847/// the whole buffer. Pure (no GPU) so it unit-tests on its own.
1848#[derive(Debug, Default)]
1849struct ColorsAllocator {
1850    /// Per-entry slot, indexed by entry id.
1851    slots: Vec<ColorSlot>,
1852    /// Freed `(off, cap)` blocks available for first-fit reuse.
1853    free: Vec<(u32, u32)>,
1854    /// Next bump-allocation position (words).
1855    tail: u32,
1856    /// Total buffer capacity in words.
1857    cap_total: u32,
1858}
1859
1860/// Slack-padded capacity for a `len`-word array: +25% + 16 words, so a
1861/// few extra surface voxels from a carve fit without relocating.
1862fn slot_cap(len: u32) -> u32 {
1863    len + len / 4 + 16
1864}
1865
1866/// Slack capacity (words) for a grown concatenated buffer: +50% + 256, so
1867/// a burst of `add_model` calls bump-appends rather than re-growing every
1868/// time. Matches [`ColorsAllocator`]'s `cap_total` headroom.
1869fn grow_words(used: u32) -> u32 {
1870    used + used / 2 + 256
1871}
1872
1873/// Slack capacity (records) for a grown `model_meta` buffer: +50% + 8.
1874fn grow_records(count: u32) -> u32 {
1875    count + count / 2 + 8
1876}
1877
1878impl ColorsAllocator {
1879    /// Lay every entry out contiguously (with per-slot slack) and add a
1880    /// global tail headroom so early growth bump-allocates rather than
1881    /// repacks.
1882    fn new(entry_lens: &[u32]) -> Self {
1883        let mut a = Self::default();
1884        a.repack(entry_lens);
1885        a
1886    }
1887
1888    fn slot(&self, entry: usize) -> ColorSlot {
1889        self.slots[entry]
1890    }
1891
1892    fn cap_total(&self) -> u32 {
1893        self.cap_total
1894    }
1895
1896    /// Repack ALL entries compactly to fit `new_lens`, resetting the
1897    /// free list + tail and choosing a fresh `cap_total` with headroom.
1898    /// Used at initial build and on a buffer grow.
1899    fn repack(&mut self, new_lens: &[u32]) {
1900        self.free.clear();
1901        let mut off = 0u32;
1902        let mut slots = Vec::with_capacity(new_lens.len());
1903        for &len in new_lens {
1904            // A 0-length (dead / removed) entry takes no space — keeps a
1905            // tombstone slot so entry ids stay positional.
1906            let cap = if len == 0 { 0 } else { slot_cap(len) };
1907            slots.push(ColorSlot { off, cap, len });
1908            off += cap;
1909        }
1910        self.slots = slots;
1911        self.tail = off;
1912        // Global headroom: +50% + 256 words.
1913        self.cap_total = off + off / 2 + 256;
1914    }
1915
1916    /// Place `new_len` words for `entry`. Returns `Some(off)` with the
1917    /// (possibly relocated) slot offset, or `None` if the buffer must
1918    /// grow + repack. On relocation the old block is pushed to the free
1919    /// list; an in-place fit returns the unchanged offset.
1920    fn place(&mut self, entry: usize, new_len: u32) -> Option<u32> {
1921        let cur = self.slots[entry];
1922        if new_len <= cur.cap {
1923            self.slots[entry] = ColorSlot {
1924                len: new_len,
1925                ..cur
1926            };
1927            return Some(cur.off);
1928        }
1929        let old = (cur.off, cur.cap);
1930        // First-fit a freed block big enough for the live data.
1931        if let Some(i) = self.free.iter().position(|&(_, c)| c >= new_len) {
1932            let (off, cap) = self.free.remove(i);
1933            self.free.push(old);
1934            self.slots[entry] = ColorSlot {
1935                off,
1936                cap,
1937                len: new_len,
1938            };
1939            return Some(off);
1940        }
1941        // Bump the tail if there's room.
1942        let want = slot_cap(new_len);
1943        if self.tail + want <= self.cap_total {
1944            let off = self.tail;
1945            self.tail += want;
1946            self.free.push(old);
1947            self.slots[entry] = ColorSlot {
1948                off,
1949                cap: want,
1950                len: new_len,
1951            };
1952            return Some(off);
1953        }
1954        None
1955    }
1956
1957    /// Append a slot for a brand-new entry of `new_len` words (used by
1958    /// [`SpriteRegistryResident::add_model`]). Returns `Some(off)` placed
1959    /// via the free list or the bump tail, or `None` if the buffer must
1960    /// grow + repack — in which case **no** slot is pushed (the caller's
1961    /// repack rebuilds every slot from scratch).
1962    fn push(&mut self, new_len: u32) -> Option<u32> {
1963        if let Some(i) = self.free.iter().position(|&(_, c)| c >= new_len) {
1964            let (off, cap) = self.free.remove(i);
1965            self.slots.push(ColorSlot {
1966                off,
1967                cap,
1968                len: new_len,
1969            });
1970            return Some(off);
1971        }
1972        let want = slot_cap(new_len);
1973        if self.tail + want <= self.cap_total {
1974            let off = self.tail;
1975            self.tail += want;
1976            self.slots.push(ColorSlot {
1977                off,
1978                cap: want,
1979                len: new_len,
1980            });
1981            return Some(off);
1982        }
1983        None
1984    }
1985
1986    /// Free `entry`'s slot back to the pool ([`SpriteRegistryResident::
1987    /// remove_model`]). Its `(off, cap)` block joins the free list for
1988    /// first-fit reuse by a later [`Self::push`]; the slot is zeroed so a
1989    /// repack treats it as a 0-length tombstone.
1990    fn free(&mut self, entry: usize) {
1991        let s = self.slots[entry];
1992        if s.cap > 0 {
1993            self.free.push((s.off, s.cap));
1994        }
1995        self.slots[entry] = ColorSlot {
1996            off: 0,
1997            cap: 0,
1998            len: 0,
1999        };
2000    }
2001}
2002
2003/// Create a STORAGE buffer of u32s; pads empty input (wgpu rejects
2004/// zero-sized storage bindings).
2005#[allow(dead_code)]
2006fn storage_u32(device: &wgpu::Device, label: &str, data: &[u32]) -> wgpu::Buffer {
2007    use wgpu::util::DeviceExt;
2008    let bytes: &[u8] = if data.is_empty() {
2009        bytemuck::cast_slice(&[0u32])
2010    } else {
2011        bytemuck::cast_slice(data)
2012    };
2013    device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
2014        label: Some(label),
2015        contents: bytes,
2016        usage: wgpu::BufferUsages::STORAGE,
2017    })
2018}
2019
2020/// Create an uninitialised `STORAGE | COPY_DST` `u32` buffer of `cap`
2021/// words (≥1). Written each frame via `queue.write_buffer`.
2022fn storage_dst_u32(device: &wgpu::Device, label: &str, cap: u32) -> wgpu::Buffer {
2023    device.create_buffer(&wgpu::BufferDescriptor {
2024        label: Some(label),
2025        size: u64::from(cap.max(1)) * 4,
2026        usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
2027        mapped_at_creation: false,
2028    })
2029}
2030
2031/// Create a `STORAGE | COPY_DST` `u32` buffer of `cap` words (≥ data
2032/// length, ≥ 1), initialised with `data` at offset 0 and the tail left
2033/// zeroed. Unlike [`storage_u32`] (STORAGE-only, exact-size) this both
2034/// reserves spare capacity and is `COPY_DST`, so the incremental
2035/// [`SpriteRegistryResident::update_model`] can `write_buffer` a growing
2036/// `colors`/`dirs` array in place. Filled via `mapped_at_creation` so no
2037/// queue is needed at upload time.
2038fn storage_dst_u32_cap(device: &wgpu::Device, label: &str, data: &[u32], cap: u32) -> wgpu::Buffer {
2039    let cap = cap.max(data.len() as u32).max(1);
2040    let buf = device.create_buffer(&wgpu::BufferDescriptor {
2041        label: Some(label),
2042        size: u64::from(cap) * 4,
2043        usage: wgpu::BufferUsages::STORAGE
2044            | wgpu::BufferUsages::COPY_DST
2045            | wgpu::BufferUsages::COPY_SRC,
2046        mapped_at_creation: true,
2047    });
2048    if !data.is_empty() {
2049        buf.slice(..(data.len() as u64 * 4))
2050            .get_mapped_range_mut()
2051            .copy_from_slice(bytemuck::cast_slice(data));
2052    }
2053    buf.unmap();
2054    buf
2055}
2056
2057/// Create a `STORAGE | COPY_DST` buffer of Pod records, exact-size
2058/// (≥ 1, zero-padded), so individual records can be rewritten in place
2059/// by [`SpriteRegistryResident::update_model`] on a relocation. The
2060/// record *count* never changes on an incremental edit (no model is
2061/// added/removed), so no slack is needed here.
2062fn storage_dst_pod<T: Pod + Zeroable>(
2063    device: &wgpu::Device,
2064    label: &str,
2065    data: &[T],
2066) -> wgpu::Buffer {
2067    let one = [T::zeroed()];
2068    let src: &[T] = if data.is_empty() { &one } else { data };
2069    let buf = device.create_buffer(&wgpu::BufferDescriptor {
2070        label: Some(label),
2071        size: std::mem::size_of_val(src) as u64,
2072        usage: wgpu::BufferUsages::STORAGE
2073            | wgpu::BufferUsages::COPY_DST
2074            | wgpu::BufferUsages::COPY_SRC,
2075        mapped_at_creation: true,
2076    });
2077    buf.slice(..)
2078        .get_mapped_range_mut()
2079        .copy_from_slice(bytemuck::cast_slice(src));
2080    buf.unmap();
2081    buf
2082}
2083
2084/// Create a `STORAGE | COPY_DST` Pod buffer holding `cap` records
2085/// (≥ `data.len()`, ≥ 1), initialised with `data` at record 0 and the
2086/// tail zeroed. The slack lets [`SpriteRegistryResident::add_model`] grow
2087/// the `model_meta` table without re-growing on every add.
2088fn storage_dst_pod_cap<T: Pod + Zeroable>(
2089    device: &wgpu::Device,
2090    label: &str,
2091    data: &[T],
2092    cap: u32,
2093) -> wgpu::Buffer {
2094    let rec = std::mem::size_of::<T>() as u64;
2095    let cap = u64::from(cap.max(data.len() as u32).max(1));
2096    let buf = device.create_buffer(&wgpu::BufferDescriptor {
2097        label: Some(label),
2098        size: cap * rec,
2099        usage: wgpu::BufferUsages::STORAGE
2100            | wgpu::BufferUsages::COPY_DST
2101            | wgpu::BufferUsages::COPY_SRC,
2102        mapped_at_creation: true,
2103    });
2104    if !data.is_empty() {
2105        buf.slice(..(data.len() as u64 * rec))
2106            .get_mapped_range_mut()
2107            .copy_from_slice(bytemuck::cast_slice(data));
2108    }
2109    buf.unmap();
2110    buf
2111}
2112
2113/// Create a STORAGE buffer of Pod records; pads empty input with one
2114/// zeroed `T`.
2115#[allow(dead_code)]
2116fn storage_pod<T: Pod + Zeroable>(device: &wgpu::Device, label: &str, data: &[T]) -> wgpu::Buffer {
2117    use wgpu::util::DeviceExt;
2118    let one = [T::zeroed()];
2119    let src: &[T] = if data.is_empty() { &one } else { data };
2120    device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
2121        label: Some(label),
2122        contents: bytemuck::cast_slice(src),
2123        usage: wgpu::BufferUsages::STORAGE,
2124    })
2125}
2126
2127#[cfg(test)]
2128mod tests {
2129    use super::*;
2130    use roxlap_formats::kv6::{Kv6, Voxel};
2131
2132    /// 2×1 kv6: column (0,0) has voxels at z=5 (red) and z=1 (green)
2133    /// stored OUT of z-order; column (1,0) has one voxel at z=3.
2134    fn kv6_unsorted() -> Kv6 {
2135        let mk = |z, col| Voxel {
2136            col,
2137            z,
2138            vis: 0,
2139            dir: 0,
2140        };
2141        Kv6 {
2142            xsiz: 2,
2143            ysiz: 1,
2144            zsiz: 8,
2145            xpiv: 0.0,
2146            ypiv: 0.0,
2147            zpiv: 0.0,
2148            voxels: vec![mk(5, 0xAA), mk(1, 0xBB), mk(3, 0xCC)],
2149            xlen: vec![2, 1],
2150            ylen: vec![vec![2], vec![1]],
2151            palette: None,
2152        }
2153    }
2154
2155    #[test]
2156    fn occupancy_bits_set_at_voxel_z() {
2157        let m = build_sprite_model(&kv6_unsorted());
2158        assert_eq!(m.dims, [2, 1, 8]);
2159        assert_eq!(m.occ_words_per_col, 1); // ceil(8/32)
2160                                            // col 0: bits 1 and 5; col 1: bit 3.
2161        assert_eq!(m.occupancy[0], (1 << 1) | (1 << 5));
2162        assert_eq!(m.occupancy[1], 1 << 3);
2163    }
2164
2165    #[test]
2166    fn colors_are_ascending_z_for_rank_lookup() {
2167        let m = build_sprite_model(&kv6_unsorted());
2168        // col 0 sorted ascending z ⇒ z=1 (green 0xBB) before z=5 (0xAA).
2169        assert_eq!(m.color_offsets, vec![0, 2, 3]);
2170        assert_eq!(&m.colors, &[0xBB, 0xAA, 0xCC]);
2171    }
2172
2173    #[test]
2174    fn identity_basis_inverts_to_identity() {
2175        let inv = mat3_inverse([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]);
2176        assert_eq!(inv, [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]);
2177    }
2178
2179    #[test]
2180    fn fork_is_independent_of_parent() {
2181        let mut reg = SpriteModelRegistry::new();
2182        let base = reg.add(build_sprite_model(&kv6_unsorted()));
2183        let forked = reg.fork(base);
2184        assert_ne!(base, forked);
2185        // Recolour only the fork.
2186        reg.model_mut(forked).recolor(|_| 0x11);
2187        // Parent colours untouched; fork fully overwritten.
2188        assert_eq!(&reg.model(base).colors, &[0xBB, 0xAA, 0xCC]);
2189        assert_eq!(&reg.model(forked).colors, &[0x11, 0x11, 0x11]);
2190    }
2191
2192    #[test]
2193    fn remove_frees_chain_data_keeps_ids_stable() {
2194        let mut reg = SpriteModelRegistry::new();
2195        let a = reg.add_lod(build_sprite_model(&kv6_unsorted()), 4);
2196        let b = reg.add_lod(build_sprite_model(&kv6_unsorted()), 4);
2197        let len_before = reg.len();
2198        assert!(reg.is_live(a) && reg.is_live(b));
2199
2200        reg.remove(a);
2201        // Chain `a` is tombstoned (its entries are freed to empty models;
2202        // they're unreachable via `model()` now — that's the tombstone).
2203        assert!(!reg.is_live(a));
2204        // `b` is untouched and still live; `len()` (next id) is unchanged.
2205        assert!(reg.is_live(b));
2206        assert_eq!(&reg.model(b).colors, &[0xBB, 0xAA, 0xCC]);
2207        assert_eq!(reg.len(), len_before);
2208
2209        // A later add mints a fresh id past the tombstone (no slot reuse).
2210        let c = reg.add_lod(build_sprite_model(&kv6_unsorted()), 4);
2211        assert_eq!(c, len_before as u32);
2212        assert!(reg.is_live(c));
2213        // `b`'s id stayed valid across the remove + add round-trip.
2214        assert_eq!(&reg.model(b).colors, &[0xBB, 0xAA, 0xCC]);
2215    }
2216
2217    #[test]
2218    fn model_checked_guards_out_of_range_and_tombstoned() {
2219        // The guard `set_instance_model` relies on: `model()` would
2220        // index-panic on these, `model_checked` returns `None`.
2221        let mut reg = SpriteModelRegistry::new();
2222        let a = reg.add_lod(build_sprite_model(&kv6_unsorted()), 4);
2223        assert!(reg.model_checked(a).is_some());
2224        assert!(reg.model_checked(9999).is_none(), "out of range → None");
2225        reg.remove(a);
2226        assert!(reg.model_checked(a).is_none(), "tombstoned chain → None");
2227    }
2228
2229    #[test]
2230    fn remove_is_idempotent_and_bounds_safe() {
2231        let mut reg = SpriteModelRegistry::new();
2232        let a = reg.add(build_sprite_model(&kv6_unsorted()));
2233        reg.remove(a);
2234        reg.remove(a); // already removed → no-op, no panic
2235        reg.remove(999); // out of range → no-op
2236        assert!(!reg.is_live(a));
2237        assert!(!reg.is_live(999));
2238    }
2239
2240    #[test]
2241    fn registry_gpu_structs_have_expected_sizes() {
2242        assert_eq!(std::mem::size_of::<SpriteModelMeta>(), 48);
2243        // TV — grew 64 → 80 with the per-instance material id + alpha_mul
2244        // (+ 8 bytes pad to keep the 16-byte std430 stride).
2245        assert_eq!(std::mem::size_of::<SpriteInstanceGpu>(), 80);
2246    }
2247
2248    #[test]
2249    fn add_lod_builds_halving_mip_chain() {
2250        let mut reg = SpriteModelRegistry::new();
2251        // 8×8×8 single voxel-filled column model would be ideal, but
2252        // kv6_unsorted is 2×1×8 → mips: 2×1×8 → 1×1×4 → 1×1×2 → 1×1×1.
2253        let id = reg.add_lod(build_sprite_model(&kv6_unsorted()), 4);
2254        let m0 = reg.model(id);
2255        assert_eq!(m0.dims, [2, 1, 8]);
2256        assert!((m0.voxel_world_size - 1.0).abs() < 1e-6);
2257    }
2258
2259    /// kv6 from explicit voxels, ordered x-major/y-inner to match
2260    /// `build_sprite_model`'s column walk.
2261    fn kv6_from(xsiz: u32, ysiz: u32, zsiz: u32, voxels: &[(u32, u32, u16, u32)]) -> Kv6 {
2262        let mut ylen = vec![vec![0u16; ysiz as usize]; xsiz as usize];
2263        let mut flat = Vec::new();
2264        for x in 0..xsiz {
2265            for y in 0..ysiz {
2266                let mut col: Vec<(u16, u32)> = voxels
2267                    .iter()
2268                    .filter(|(vx, vy, _, _)| *vx == x && *vy == y)
2269                    .map(|(_, _, z, c)| (*z, *c))
2270                    .collect();
2271                col.sort_by_key(|(z, _)| *z);
2272                ylen[x as usize][y as usize] = col.len() as u16;
2273                for (z, c) in col {
2274                    flat.push(Voxel {
2275                        col: c,
2276                        z,
2277                        vis: 0,
2278                        dir: 0,
2279                    });
2280                }
2281            }
2282        }
2283        let xlen = ylen
2284            .iter()
2285            .map(|c| c.iter().map(|&v| u32::from(v)).sum())
2286            .collect();
2287        Kv6 {
2288            xsiz,
2289            ysiz,
2290            zsiz,
2291            xpiv: 0.0,
2292            ypiv: 0.0,
2293            zpiv: 0.0,
2294            voxels: flat,
2295            xlen,
2296            ylen,
2297            palette: None,
2298        }
2299    }
2300
2301    fn offsets_consistent(m: &SpriteModel) -> bool {
2302        let cols = (m.dims[0] * m.dims[1]) as usize;
2303        if m.color_offsets.len() != cols + 1 {
2304            return false;
2305        }
2306        // Monotonic non-decreasing + last == colors.len + each column's
2307        // span == its solid-voxel count.
2308        for w in m.color_offsets.windows(2) {
2309            if w[1] < w[0] {
2310                return false;
2311            }
2312        }
2313        m.color_offsets[cols] as usize == m.colors.len()
2314    }
2315
2316    #[test]
2317    fn carve_two_layers_keeps_offsets_consistent() {
2318        // Mirror the demo's carve: columns with voxels at varied z,
2319        // some sharing z=0/z=1, some not.
2320        let kv6 = kv6_from(
2321            3,
2322            2,
2323            8,
2324            &[
2325                (0, 0, 0, 0xA0),
2326                (0, 0, 1, 0xA1),
2327                (0, 0, 5, 0xA5),
2328                (1, 0, 1, 0xB1),
2329                (2, 1, 0, 0xC0),
2330                (2, 1, 3, 0xC3),
2331            ],
2332        );
2333        let mut m = build_sprite_model(&kv6);
2334        assert!(offsets_consistent(&m));
2335        for z in 0..2u32 {
2336            for y in 0..m.dims[1] {
2337                for x in 0..m.dims[0] {
2338                    m.set_voxel(x, y, z, None);
2339                }
2340            }
2341            assert!(offsets_consistent(&m), "inconsistent after carving z={z}");
2342            // downsample must not panic on the carved model.
2343            let _ = m.downsample();
2344        }
2345    }
2346
2347    #[test]
2348    fn set_voxel_inserts_replaces_and_clears() {
2349        // col 0 starts with z=1 (0xBB), z=5 (0xAA); col 1 with z=3 (0xCC).
2350        let mut m = build_sprite_model(&kv6_unsorted());
2351
2352        // Insert z=3 into col 0 (between z=1 and z=5) → rank 1.
2353        assert!(m.set_voxel(0, 0, 3, Some(0x55)));
2354        assert_eq!(m.occupancy[0], (1 << 1) | (1 << 3) | (1 << 5));
2355        // col 0 colours ascending z: 0xBB(z1), 0x55(z3), 0xAA(z5).
2356        assert_eq!(m.color_offsets, vec![0, 3, 4]);
2357        assert_eq!(&m.colors, &[0xBB, 0x55, 0xAA, 0xCC]);
2358
2359        // Replace z=3 in place (no offset shift).
2360        assert!(m.set_voxel(0, 0, 3, Some(0x66)));
2361        assert_eq!(&m.colors, &[0xBB, 0x66, 0xAA, 0xCC]);
2362        assert_eq!(m.color_offsets, vec![0, 3, 4]);
2363
2364        // Clear z=1 (rank 0) from col 0.
2365        assert!(m.set_voxel(0, 0, 1, None));
2366        assert_eq!(m.occupancy[0], (1 << 3) | (1 << 5));
2367        assert_eq!(m.color_offsets, vec![0, 2, 3]);
2368        assert_eq!(&m.colors, &[0x66, 0xAA, 0xCC]);
2369
2370        // No-ops: clear an empty voxel, edit out of bounds.
2371        assert!(!m.set_voxel(0, 0, 2, None));
2372        assert!(!m.set_voxel(9, 0, 0, Some(1)));
2373    }
2374
2375    #[test]
2376    fn rebuild_lod_refreshes_coarse_levels_from_mip0() {
2377        let mut reg = SpriteModelRegistry::new();
2378        let id = reg.add_lod(build_sprite_model(&kv6_unsorted()), 3);
2379        // Recolour mip-0 only via model_mut, then rebuild the ladder.
2380        reg.model_mut(id).recolor(|_| 0x0000_2000);
2381        reg.rebuild_lod(id);
2382        // The mip-1 average of all-0x2000 voxels is still 0x2000.
2383        let lvl1_entry = reg.chains[id as usize][1] as usize;
2384        assert!(reg.entries[lvl1_entry]
2385            .colors
2386            .iter()
2387            .all(|&c| c == 0x0000_2000));
2388    }
2389
2390    // ---- GPU.12 incremental: colors/dirs suballocator -----------------
2391
2392    /// Every slot fits its data, has slack, doesn't overlap the next, and
2393    /// the buffer reserves tail headroom past the last slot.
2394    fn alloc_invariants(a: &ColorsAllocator, lens: &[u32]) {
2395        let mut prev_end = 0u32;
2396        for (e, &len) in lens.iter().enumerate() {
2397            let s = a.slot(e);
2398            assert_eq!(s.len, len, "slot {e} len");
2399            assert!(s.cap >= s.len, "slot {e} cap >= len");
2400            // In a freshly repacked layout slots are in entry order.
2401            assert!(s.off >= prev_end, "slot {e} overlaps previous");
2402            assert!(s.off + s.cap <= a.cap_total(), "slot {e} past cap_total");
2403            prev_end = s.off + s.cap;
2404        }
2405        assert!(a.cap_total() >= prev_end, "tail headroom");
2406    }
2407
2408    #[test]
2409    fn allocator_new_lays_out_with_slack_and_headroom() {
2410        let lens = [10u32, 0, 64, 7];
2411        let a = ColorsAllocator::new(&lens);
2412        alloc_invariants(&a, &lens);
2413        // Slack: a 64-word slot has cap > 64 so a small carve-grow fits.
2414        assert!(a.slot(2).cap > 64);
2415        // Headroom past the bump tail for early growth.
2416        assert!(a.cap_total() > a.slot(3).off + a.slot(3).cap);
2417    }
2418
2419    #[test]
2420    fn allocator_place_in_place_when_within_cap() {
2421        let mut a = ColorsAllocator::new(&[10, 20]);
2422        let off0 = a.slot(0).off;
2423        let cap0 = a.slot(0).cap;
2424        // Shrink: still the same slot.
2425        assert_eq!(a.place(0, 5), Some(off0));
2426        assert_eq!(a.slot(0).len, 5);
2427        assert_eq!(a.slot(0).cap, cap0);
2428        // Grow within slack: same offset, no relocation.
2429        assert_eq!(a.place(0, cap0), Some(off0));
2430        assert_eq!(a.slot(0).off, off0);
2431        assert!(a.free.is_empty(), "no relocation should free anything");
2432    }
2433
2434    #[test]
2435    fn allocator_place_relocates_to_tail_and_frees_old() {
2436        let mut a = ColorsAllocator::new(&[10, 20]);
2437        let old0 = (a.slot(0).off, a.slot(0).cap);
2438        let tail_before = a.tail;
2439        // Overgrow entry 0 past its cap → relocate to the bump tail.
2440        let new_len = a.slot(0).cap + 5;
2441        let off = a.place(0, new_len).expect("fits in headroom");
2442        assert_eq!(off, tail_before, "relocated to old tail");
2443        assert_eq!(a.slot(0).off, off);
2444        assert_eq!(a.slot(0).len, new_len);
2445        assert!(a.free.contains(&old0), "old slot freed");
2446    }
2447
2448    #[test]
2449    fn allocator_reuses_freed_block_first_fit() {
2450        // Entry 0 has a large slot; entry 1 a tiny one, so growing 1 must
2451        // relocate (it can't fit in place) and lands in 0's freed block.
2452        let mut a = ColorsAllocator::new(&[10, 2]);
2453        let old0 = (a.slot(0).off, a.slot(0).cap);
2454        // Relocate entry 0 to the tail, freeing its original block.
2455        let _ = a.place(0, a.slot(0).cap + 5).unwrap();
2456        assert!(a.free.contains(&old0));
2457        // Grow entry 1 past its (tiny) cap but ≤ the freed block's cap →
2458        // first-fit reuses that block rather than bumping the tail.
2459        let new1 = a.slot(1).cap + 1;
2460        assert!(new1 <= old0.1, "freed block big enough");
2461        let off = a.place(1, new1).expect("reuses freed block");
2462        assert_eq!(off, old0.0, "first-fit reused the freed slot offset");
2463        assert!(!a.free.contains(&old0), "freed block consumed");
2464    }
2465
2466    #[test]
2467    fn allocator_signals_grow_then_repack_restores() {
2468        let mut a = ColorsAllocator::new(&[8, 8]);
2469        // Force overflow: ask for far more than cap_total.
2470        let huge = a.cap_total() + 100;
2471        assert_eq!(a.place(0, huge), None, "overflow must signal grow");
2472        // Repack with the new lengths compacts + grows the buffer.
2473        a.repack(&[huge, 8]);
2474        alloc_invariants(&a, &[huge, 8]);
2475        assert!(a.cap_total() > huge);
2476        // After repack the entry now fits in place.
2477        assert_eq!(a.place(0, huge), Some(a.slot(0).off));
2478    }
2479
2480    /// Drive the allocator like a real carve loop (mirroring
2481    /// `update_model`): one model's colour count drifts up and down
2482    /// across many edits while two neighbours stay put. Growth is
2483    /// absorbed in place / via the free list / by the bump tail, and on
2484    /// the rare overflow we repack (as `update_model` does). After every
2485    /// edit the live `[off, off+len)` windows must stay disjoint.
2486    #[test]
2487    fn allocator_carve_loop_keeps_live_windows_disjoint() {
2488        let mut a = ColorsAllocator::new(&[40, 12, 40]);
2489        let mut lens = [40u32, 12, 40];
2490        // A deterministic up/down walk of entry 1's length, incl. a jump
2491        // that forces at least one grow+repack.
2492        let walk = [13u32, 30, 60, 18, 9, 80, 80, 25, 200, 7];
2493        let mut grew = false;
2494        for &len in &walk {
2495            lens[1] = len;
2496            // Entry 1 re-placed; on overflow, repack the whole set.
2497            if a.place(1, len).is_none() {
2498                grew = true;
2499                a.repack(&lens);
2500            } else {
2501                // Neighbours fit in place every time.
2502                assert_eq!(a.place(0, 40), Some(a.slot(0).off));
2503                assert_eq!(a.place(2, 40), Some(a.slot(2).off));
2504            }
2505            assert_eq!(a.slot(1).len, len);
2506
2507            // No two entries' live windows overlap.
2508            let mut wins: Vec<(u32, u32)> =
2509                (0..3).map(|e| (a.slot(e).off, a.slot(e).len)).collect();
2510            wins.sort_by_key(|w| w.0);
2511            for pair in wins.windows(2) {
2512                let (o0, l0) = pair[0];
2513                let (o1, _) = pair[1];
2514                assert!(o0 + l0 <= o1, "live windows overlap: {pair:?}");
2515            }
2516        }
2517        assert!(grew, "the 200-word jump should have forced a repack");
2518    }
2519
2520    // --- incremental instance path (device-backed; skips w/o adapter) ---
2521
2522    fn headless() -> Option<crate::HeadlessGpu> {
2523        match crate::HeadlessGpu::new_blocking(crate::GpuRendererSettings::default()) {
2524            Ok(h) => Some(h),
2525            Err(e) => {
2526                eprintln!("[skip] no GPU adapter reachable: {e}");
2527                None
2528            }
2529        }
2530    }
2531
2532    fn one_model_registry() -> (SpriteModelRegistry, u32) {
2533        let mut reg = SpriteModelRegistry::new();
2534        let id = reg.add(build_sprite_model(&kv6_unsorted()));
2535        (reg, id)
2536    }
2537
2538    fn inst(model_id: u32, pos: [f32; 3]) -> SpriteInstance {
2539        use roxlap_formats::sprite::Sprite;
2540        SpriteInstance::new(
2541            model_id,
2542            SpriteInstanceTransform::from_sprite(&Sprite::axis_aligned(kv6_unsorted(), pos)),
2543        )
2544    }
2545
2546    #[test]
2547    fn append_grows_count_and_capacity_pow2() {
2548        let Some(h) = headless() else { return };
2549        let (reg, m) = one_model_registry();
2550        let mut res = SpriteRegistryResident::upload(&h.device, &reg, &[inst(m, [0.0; 3])]);
2551        assert_eq!(res.instance_count(), 1);
2552        assert_eq!(res.instance_capacity, 1);
2553
2554        // Append 4 → count 5, capacity grows to next_pow2(5) = 8.
2555        let more: Vec<_> = (1..=4).map(|i| inst(m, [i as f32, 0.0, 0.0])).collect();
2556        let base = res.append_instances(&h.device, &reg, &more);
2557        assert_eq!(base, 1, "first appended index follows the seed instance");
2558        assert_eq!(res.instance_count(), 5);
2559        assert_eq!(res.instance_capacity, 8, "power-of-two growth");
2560
2561        // A second append that still fits keeps the same capacity (no realloc).
2562        let base2 = res.append_instances(&h.device, &reg, &[inst(m, [9.0, 0.0, 0.0])]);
2563        assert_eq!(base2, 5);
2564        assert_eq!(res.instance_count(), 6);
2565        assert_eq!(res.instance_capacity, 8, "fits existing capacity, no grow");
2566    }
2567
2568    #[test]
2569    fn append_empty_is_noop() {
2570        let Some(h) = headless() else { return };
2571        let (reg, m) = one_model_registry();
2572        let mut res = SpriteRegistryResident::upload(&h.device, &reg, &[inst(m, [0.0; 3])]);
2573        let base = res.append_instances(&h.device, &reg, &[]);
2574        assert_eq!(base, 1);
2575        assert_eq!(res.instance_count(), 1);
2576        assert_eq!(res.instance_capacity, 1);
2577    }
2578
2579    /// Read `words` u32s back from a GPU buffer (needs COPY_SRC).
2580    fn read_u32(h: &crate::HeadlessGpu, buf: &wgpu::Buffer, words: u64) -> Vec<u32> {
2581        let bytes = words * 4;
2582        let staging = h.device.create_buffer(&wgpu::BufferDescriptor {
2583            label: Some("readback"),
2584            size: bytes,
2585            usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
2586            mapped_at_creation: false,
2587        });
2588        let mut enc = h
2589            .device
2590            .create_command_encoder(&wgpu::CommandEncoderDescriptor::default());
2591        enc.copy_buffer_to_buffer(buf, 0, &staging, 0, bytes);
2592        h.queue.submit(std::iter::once(enc.finish()));
2593        let slice = staging.slice(..);
2594        let (tx, rx) = std::sync::mpsc::channel();
2595        slice.map_async(wgpu::MapMode::Read, move |r| tx.send(r).unwrap());
2596        h.device.poll(wgpu::PollType::wait_indefinitely()).ok();
2597        rx.recv().unwrap().unwrap();
2598        let data = slice.get_mapped_range();
2599        let out = bytemuck::cast_slice::<u8, u32>(&data).to_vec();
2600        drop(data);
2601        staging.unmap();
2602        out
2603    }
2604
2605    /// A second distinct model so add_model has real new geometry to lay
2606    /// down (different dims + colours from `kv6_unsorted`).
2607    fn kv6_other() -> Kv6 {
2608        let mk = |z, col| Voxel {
2609            col,
2610            z,
2611            vis: 0,
2612            dir: 0,
2613        };
2614        Kv6 {
2615            xsiz: 1,
2616            ysiz: 1,
2617            zsiz: 4,
2618            xpiv: 0.0,
2619            ypiv: 0.0,
2620            zpiv: 0.0,
2621            voxels: vec![mk(0, 0x11), mk(2, 0x22)],
2622            xlen: vec![2],
2623            ylen: vec![vec![2]],
2624            palette: None,
2625        }
2626    }
2627
2628    /// add_model lays the new model's volume on the GPU at the offsets its
2629    /// meta record claims — verified by reading the shared buffers back
2630    /// and matching each entry against its source SpriteModel.
2631    #[test]
2632    fn add_model_uploads_new_volume_incrementally() {
2633        let Some(h) = headless() else { return };
2634
2635        // Residency starts with model A only.
2636        let mut reg = SpriteModelRegistry::new();
2637        let a = reg.add(build_sprite_model(&kv6_unsorted()));
2638        let mut res = SpriteRegistryResident::upload(&h.device, &reg, &[inst(a, [0.0; 3])]);
2639        assert_eq!(res.chains.len(), 1);
2640        let entries_before = res.meta.len();
2641
2642        // Append model B (single-level) to the registry, then sync it.
2643        let b = reg.add(build_sprite_model(&kv6_other()));
2644        res.add_model(&h.device, &h.queue, &reg, b);
2645        assert_eq!(res.chains.len(), 2);
2646        assert_eq!(res.meta.len(), entries_before + 1, "one new entry");
2647
2648        // Read the shared buffers back and check EVERY entry's data sits
2649        // where its meta record points — both the pre-existing A and the
2650        // newly streamed B.
2651        let occ = read_u32(&h, &res.occupancy, u64::from(res.occ_cap));
2652        let coloff = read_u32(&h, &res.color_offsets, u64::from(res.coloff_cap));
2653        let cols = read_u32(&h, &res.colors, u64::from(res.colors_alloc.cap_total()));
2654        for (e, m) in reg.entries.iter().enumerate() {
2655            let meta = res.meta[e];
2656            let oo = meta.occupancy_offset as usize;
2657            assert_eq!(
2658                &occ[oo..oo + m.occupancy.len()],
2659                &m.occupancy[..],
2660                "occ entry {e}"
2661            );
2662            let co = meta.color_offsets_offset as usize;
2663            assert_eq!(
2664                &coloff[co..co + m.color_offsets.len()],
2665                &m.color_offsets[..],
2666                "color_offsets entry {e}"
2667            );
2668            let cc = meta.colors_offset as usize;
2669            assert_eq!(
2670                &cols[cc..cc + m.colors.len()],
2671                &m.colors[..],
2672                "colors entry {e}"
2673            );
2674        }
2675
2676        // And an instance of the freshly-added model can now be appended.
2677        let base = res.append_instances(&h.device, &reg, &[inst(b, [5.0, 0.0, 0.0])]);
2678        assert_eq!(base, 1);
2679        assert_eq!(res.instance_count(), 2);
2680    }
2681
2682    /// Adding many small models forces the volume buffers to grow + rebuild
2683    /// at least once; every entry must still read back correctly across the
2684    /// grow boundary.
2685    #[test]
2686    fn add_model_survives_buffer_growth() {
2687        let Some(h) = headless() else { return };
2688        let mut reg = SpriteModelRegistry::new();
2689        let a = reg.add(build_sprite_model(&kv6_unsorted()));
2690        let mut res = SpriteRegistryResident::upload(&h.device, &reg, &[inst(a, [0.0; 3])]);
2691        let occ_cap0 = res.occ_cap;
2692
2693        // 40 adds — occupancy starts exact-sized (cap == used), so the very
2694        // first add overflows and grows; later ones ride the slack.
2695        for _ in 0..40 {
2696            let id = reg.add(build_sprite_model(&kv6_other()));
2697            res.add_model(&h.device, &h.queue, &reg, id);
2698        }
2699        assert_eq!(res.chains.len(), 41);
2700        assert!(res.occ_cap > occ_cap0, "occupancy buffer grew");
2701
2702        let occ = read_u32(&h, &res.occupancy, u64::from(res.occ_cap));
2703        let cols = read_u32(&h, &res.colors, u64::from(res.colors_alloc.cap_total()));
2704        for (e, m) in reg.entries.iter().enumerate() {
2705            let meta = res.meta[e];
2706            let oo = meta.occupancy_offset as usize;
2707            assert_eq!(
2708                &occ[oo..oo + m.occupancy.len()],
2709                &m.occupancy[..],
2710                "occ entry {e}"
2711            );
2712            let cc = meta.colors_offset as usize;
2713            assert_eq!(
2714                &cols[cc..cc + m.colors.len()],
2715                &m.colors[..],
2716                "colors entry {e}"
2717            );
2718        }
2719    }
2720
2721    /// VCL.2 — a decoded voxel clip's frames register as a flipbook of LOD
2722    /// chains, and `set_instance_model` flips which frame an instance
2723    /// draws. The cull state it updates is exactly what
2724    /// `cull_bin_upload` packs into the GPU instance buffer each frame, so
2725    /// flipping `chain_id` redirects the rendered instance to the new
2726    /// frame's resident volume.
2727    #[test]
2728    fn voxel_clip_flipbook_set_instance_model() {
2729        use roxlap_formats::voxel_clip::{LoopMode, VoxelClip, VoxelFrame};
2730        let Some(h) = headless() else { return };
2731
2732        // Two distinct frames of a 1×1×4 clip: frame 0 has a voxel at z=0;
2733        // frame 1 adds z=1 — different occupancy + a longer colour run.
2734        let dims = [1u32, 1, 4];
2735        let owpc = dims[2].div_ceil(32).max(1) as usize; // 1
2736        let mk_frame = |zs: &[u32], cols: &[u32]| -> VoxelFrame {
2737            let mut occ = vec![0u32; owpc];
2738            for &z in zs {
2739                occ[(z >> 5) as usize] |= 1u32 << (z & 31);
2740            }
2741            VoxelFrame {
2742                occupancy: occ,
2743                colors: cols.to_vec(),
2744                color_offsets: vec![0, cols.len() as u32],
2745            }
2746        };
2747        let f0 = mk_frame(&[0], &[0x8011_2233]);
2748        let f1 = mk_frame(&[0, 1], &[0x8011_2233, 0x80AA_BBCC]);
2749        let clip = VoxelClip::from_frames(
2750            dims,
2751            [0.5, 0.5, 2.0],
2752            1.0,
2753            LoopMode::Loop,
2754            &[f0, f1],
2755            &[],
2756            33,
2757            0,
2758        );
2759        let decoded = clip.decode().expect("decode");
2760
2761        // Each frame → a single-level chain; both volumes resident + distinct.
2762        let mut reg = SpriteModelRegistry::new();
2763        let c0 = reg.add(sprite_model_from_clip_frame(&decoded, 0));
2764        let c1 = reg.add(sprite_model_from_clip_frame(&decoded, 1));
2765        assert_eq!(reg.model(c0).colors.len(), 1);
2766        assert_eq!(reg.model(c1).colors.len(), 2);
2767
2768        // One instance, in front of the test frustum, drawing frame 0.
2769        let mut res = SpriteRegistryResident::upload(&h.device, &reg, &[inst(c0, [0.0, 0.0, 5.0])]);
2770        assert_eq!(res.cull[0].chain_id, c0);
2771
2772        // Flip to frame 1: the cull now draws chain c1 (radius reseeded).
2773        res.set_instance_model(&reg, 0, c1);
2774        assert_eq!(res.cull[0].chain_id, c1);
2775        assert_eq!(res.cull[0].radius, reg.model(c1).bound_radius());
2776
2777        // The next cull packs the new chain into the GPU instance buffer
2778        // (visible, no panic).
2779        let f = test_frustum();
2780        let (visible, _, _) = res.cull_bin_upload(&h.device, &h.queue, &f, 64, 64, 16, 1.0);
2781        assert_eq!(visible, 1);
2782
2783        // …and back to frame 0.
2784        res.set_instance_model(&reg, 0, c0);
2785        assert_eq!(res.cull[0].chain_id, c0);
2786
2787        // Out-of-range index is a safe no-op.
2788        res.set_instance_model(&reg, 99, c1);
2789        assert_eq!(res.cull[0].chain_id, c0);
2790    }
2791
2792    fn test_frustum() -> ViewFrustum {
2793        ViewFrustum {
2794            pos: [0.0, 0.0, 0.0],
2795            right: [1.0, 0.0, 0.0],
2796            down: [0.0, 1.0, 0.0],
2797            forward: [0.0, 0.0, 1.0],
2798            half_w: 1.0,
2799            half_h: 1.0,
2800            far: 10_000.0,
2801        }
2802    }
2803
2804    #[test]
2805    fn remove_model_tombstones_frees_and_reuses() {
2806        let Some(h) = headless() else { return };
2807        // Residency with models A and B, one instance each.
2808        let mut reg = SpriteModelRegistry::new();
2809        let a = reg.add(build_sprite_model(&kv6_unsorted()));
2810        let b = reg.add(build_sprite_model(&kv6_other()));
2811        let mut res = SpriteRegistryResident::upload(
2812            &h.device,
2813            &reg,
2814            &[inst(a, [0.0; 3]), inst(b, [1.0, 0.0, 0.0])],
2815        );
2816        assert_eq!(res.live_model_count(), 2);
2817        assert_eq!(res.dead_model_count(), 0);
2818
2819        // Remove B → tombstoned, its colours freed into the pool.
2820        res.remove_model(b);
2821        assert_eq!(res.live_model_count(), 1);
2822        assert_eq!(res.dead_model_count(), 1);
2823        assert_eq!(res.dead.iter().filter(|&&d| d).count(), 1, "one entry dead");
2824        assert!(!res.colors_alloc.free.is_empty(), "B's colour slot freed");
2825
2826        // Adding C reuses the freed slot (free-list first-fit).
2827        let c = reg.add(build_sprite_model(&kv6_other()));
2828        res.add_model(&h.device, &h.queue, &reg, c);
2829        assert_eq!(res.live_model_count(), 2);
2830
2831        // A and C read back correctly; B is dead (skipped).
2832        let cols = read_u32(&h, &res.colors, u64::from(res.colors_alloc.cap_total()));
2833        for e in [a as usize, c as usize] {
2834            let m = &reg.entries[e];
2835            let cc = res.meta[e].colors_offset as usize;
2836            assert_eq!(
2837                &cols[cc..cc + m.colors.len()],
2838                &m.colors[..],
2839                "colors entry {e}"
2840            );
2841        }
2842
2843        // The lingering instance of removed B is skipped without panic.
2844        let f = test_frustum();
2845        let _ = res.cull_bin_upload(&h.device, &h.queue, &f, 64, 64, 16, 1.0);
2846    }
2847
2848    #[test]
2849    fn compact_reclaims_holes_keeps_ids_stable() {
2850        let Some(h) = headless() else { return };
2851        let mut reg = SpriteModelRegistry::new();
2852        let a = reg.add(build_sprite_model(&kv6_unsorted()));
2853        let b = reg.add(build_sprite_model(&kv6_other()));
2854        let c = reg.add(build_sprite_model(&kv6_other()));
2855        let mut res = SpriteRegistryResident::upload(
2856            &h.device,
2857            &reg,
2858            &[inst(a, [0.0; 3]), inst(b, [1.0; 3]), inst(c, [2.0; 3])],
2859        );
2860        let occ_used_full = res.occ_used;
2861
2862        // Remove the middle model, then compact.
2863        res.remove_model(b);
2864        res.compact(&h.device, &h.queue, &reg);
2865
2866        // Holes reclaimed: occupancy now only covers A + C.
2867        let live_occ: u32 = [a, c]
2868            .iter()
2869            .map(|&e| reg.entries[e as usize].occupancy.len() as u32)
2870            .sum();
2871        assert_eq!(res.occ_used, live_occ);
2872        assert!(res.occ_used < occ_used_full, "compaction shrank occupancy");
2873        // Dead entry keeps a zeroed tombstone; ids unchanged.
2874        assert_eq!(res.meta[b as usize].occupancy_offset, 0);
2875        assert_eq!(res.live_model_count(), 2);
2876        assert_eq!(res.dead_model_count(), 1);
2877
2878        // Live entries read back correctly at their new offsets.
2879        let occ = read_u32(&h, &res.occupancy, u64::from(res.occ_cap));
2880        let cols = read_u32(&h, &res.colors, u64::from(res.colors_alloc.cap_total()));
2881        for &e in &[a as usize, c as usize] {
2882            let m = &reg.entries[e];
2883            let oo = res.meta[e].occupancy_offset as usize;
2884            assert_eq!(
2885                &occ[oo..oo + m.occupancy.len()],
2886                &m.occupancy[..],
2887                "occ {e}"
2888            );
2889            let cc = res.meta[e].colors_offset as usize;
2890            assert_eq!(&cols[cc..cc + m.colors.len()], &m.colors[..], "cols {e}");
2891        }
2892
2893        // Chain ids still valid: C's chain still resolves; B's is empty.
2894        assert!(!res.chains[c as usize].is_empty());
2895        assert!(res.chains[b as usize].is_empty());
2896    }
2897
2898    #[test]
2899    fn remove_swap_semantics_and_capacity_retained() {
2900        let Some(h) = headless() else { return };
2901        let (reg, m) = one_model_registry();
2902        let seed: Vec<_> = (0..4).map(|i| inst(m, [i as f32, 0.0, 0.0])).collect();
2903        let mut res = SpriteRegistryResident::upload(&h.device, &reg, &seed);
2904        assert_eq!(res.instance_count(), 4);
2905        let cap = res.instance_capacity;
2906
2907        // Remove a middle element → the previous last (idx 3) moved into it.
2908        assert_eq!(res.remove_instance(1), Some(3));
2909        assert_eq!(res.instance_count(), 3);
2910
2911        // Remove the current last (idx 2) → nothing moved.
2912        assert_eq!(res.remove_instance(2), None);
2913        assert_eq!(res.instance_count(), 2);
2914
2915        // Out of range → None.
2916        assert_eq!(res.remove_instance(99), None);
2917        assert_eq!(res.instance_count(), 2);
2918
2919        // Capacity is retained for reuse (no shrink).
2920        assert_eq!(res.instance_capacity, cap);
2921    }
2922}