Skip to main content

roxlap_gpu/
sprite_model.rs

1//! GPU.10 — KV6 sprite as a DDA-marchable voxel model.
2//!
3//! Unlike the GPU.9 splatter (one thread per voxel, screen-space
4//! squares, overdraw + atomic contention), a sprite model is a small
5//! voxel volume the precise ray-DDA marches one ray per pixel —
6//! crisp, correct occlusion, no overdraw. This is the GPU.10.0 single
7//! sprite; instancing + tiling + LOD come in later sub-substages.
8//!
9//! The volume reuses the chunk occupancy/colour scheme but sized to
10//! the KV6 bbox: per-column occupancy bitmask (`occ_words_per_col`
11//! u32s, `CHUNK_Z`-style 32-bits-per-word), a flat colour array in
12//! ascending-z order per column, and a `color_offsets` prefix table.
13//! The shader finds a voxel's colour by `offset[col] + popcount(bits
14//! below z)`, so colours MUST be ascending-z (we sort per column).
15
16#![allow(
17    clippy::cast_precision_loss,
18    clippy::cast_possible_truncation,
19    clippy::cast_possible_wrap,
20    clippy::cast_sign_loss,
21    clippy::many_single_char_names,
22    clippy::similar_names
23)]
24
25use bytemuck::{Pod, Zeroable};
26use roxlap_formats::kv6::Kv6;
27use roxlap_formats::material::material_for_color;
28use roxlap_formats::sprite::Sprite;
29use roxlap_formats::voxel_clip::{DecodedClip, VoxelFrame};
30
31/// CPU-built voxel volume for one KV6 model.
32#[derive(Debug, Clone)]
33pub struct SpriteModel {
34    /// Voxel extent `(mx, my, mz)`.
35    pub dims: [u32; 3],
36    /// `ceil(mz / 32)` — u32 words of occupancy per (x, y) column.
37    pub occ_words_per_col: u32,
38    /// KV6 pivot in model-local voxel space.
39    pub pivot: [f32; 3],
40    /// Per-column occupancy bitmask, `mx * my * occ_words_per_col`.
41    pub occupancy: Vec<u32>,
42    /// Voxel colours, ascending z within each column.
43    pub colors: Vec<u32>,
44    /// Per-voxel surface-normal index (`Kv6::Voxel::dir`, 0..256),
45    /// parallel to [`colors`](Self::colors). The GPU sprite shader uses
46    /// it to index the per-instance `kv6colmul` lighting table, matching
47    /// the CPU rasteriser's normal-based shading.
48    pub dirs: Vec<u32>,
49    /// Prefix sums: `color_offsets[col]` is the first colour index of
50    /// column `col`; length `mx * my + 1`.
51    pub color_offsets: Vec<u32>,
52    /// Per-voxel material id (TV.3), parallel to [`colors`](Self::colors).
53    /// **Empty** means the model has no per-voxel materials — every voxel
54    /// uses the instance's uniform material (the TV.1/TV.2 path). A non-empty
55    /// array gives mixed-material models (opaque frame + glass). Built by
56    /// [`build_sprite_model_with_materials`].
57    pub materials: Vec<u8>,
58    /// World-space size of one voxel of this model (GPU.10.4 LOD): 1.0
59    /// at mip-0, doubling each [`SpriteModel::downsample`]. The shader
60    /// divides the local ray by this so a coarse voxel spans the right
61    /// world extent and the march `t` stays in world units.
62    pub voxel_world_size: f32,
63}
64
65/// Build the DDA volume from a KV6. Columns are packed in
66/// `x + y*mx` order; each column's voxels are sorted ascending by z
67/// so the shader's popcount-rank colour lookup is correct.
68///
69/// # Panics
70/// If the KV6's `ylen` counters disagree with `voxels.len()` (a
71/// malformed model).
72#[must_use]
73pub fn build_sprite_model(kv6: &Kv6) -> SpriteModel {
74    build_sprite_model_inner(kv6, &[])
75}
76
77/// Build the DDA volume from a KV6, classifying each voxel into a per-voxel
78/// **material id** by colour (TV.3 mixed models) via `material_map`
79/// (`(rgb, material_id)` pairs; see
80/// [`material_for_color`](roxlap_formats::material::material_for_color)).
81/// An empty map produces a model with no per-voxel materials (identical to
82/// [`build_sprite_model`]).
83///
84/// # Panics
85/// As [`build_sprite_model`].
86#[must_use]
87pub fn build_sprite_model_with_materials(kv6: &Kv6, material_map: &[(u32, u8)]) -> SpriteModel {
88    build_sprite_model_inner(kv6, material_map)
89}
90
91fn build_sprite_model_inner(kv6: &Kv6, material_map: &[(u32, u8)]) -> SpriteModel {
92    let (mx, my, mz) = (kv6.xsiz, kv6.ysiz, kv6.zsiz);
93    let occ_words_per_col = mz.div_ceil(32).max(1);
94    let cols = (mx * my) as usize;
95    let want_mats = !material_map.is_empty();
96
97    let mut occupancy = vec![0u32; cols * occ_words_per_col as usize];
98    let mut color_offsets = vec![0u32; cols + 1];
99    let mut colors: Vec<u32> = Vec::with_capacity(kv6.voxels.len());
100    let mut dirs: Vec<u32> = Vec::with_capacity(kv6.voxels.len());
101    let mut materials: Vec<u8> = if want_mats {
102        Vec::with_capacity(kv6.voxels.len())
103    } else {
104        Vec::new()
105    };
106
107    // Pass 1 — consume voxels in KV6 storage order (x-outer / y-inner)
108    // into per-column buckets keyed by `col = x + y*mx`. Each entry is
109    // `(z, colour, normal-dir)`.
110    let mut buckets: Vec<Vec<(u16, u32, u8)>> = vec![Vec::new(); cols];
111    let mut voxel_iter = kv6.voxels.iter();
112    for x in 0..mx {
113        for y in 0..my {
114            let col = (x + y * mx) as usize;
115            let count = kv6.ylen[x as usize][y as usize];
116            for _ in 0..count {
117                let v = voxel_iter.next().expect("KV6 ylen / voxels.len mismatch");
118                buckets[col].push((v.z, v.col, v.dir));
119            }
120        }
121    }
122
123    // Pass 2 — emit in COLUMN-INDEX order so `color_offsets` is a true
124    // monotonic prefix sum (the shader indexes by `col` either way, but
125    // structural edits / mip rebuilds rely on monotonic offsets). Each
126    // column's voxels sorted ascending z for the popcount-rank lookup.
127    for (col, bucket) in buckets.iter_mut().enumerate() {
128        color_offsets[col] = colors.len() as u32;
129        bucket.sort_by_key(|(z, _, _)| *z);
130        for &(z, col_rgba, dir) in bucket.iter() {
131            let z = u32::from(z);
132            let base = col * occ_words_per_col as usize + (z >> 5) as usize;
133            occupancy[base] |= 1u32 << (z & 31);
134            colors.push(col_rgba);
135            dirs.push(u32::from(dir));
136            if want_mats {
137                materials.push(material_for_color(material_map, col_rgba));
138            }
139        }
140    }
141    color_offsets[cols] = colors.len() as u32;
142
143    SpriteModel {
144        dims: [mx, my, mz],
145        occ_words_per_col,
146        pivot: [kv6.xpiv, kv6.ypiv, kv6.zpiv],
147        occupancy,
148        color_offsets,
149        colors,
150        dirs,
151        materials,
152        voxel_world_size: 1.0,
153    }
154}
155
156/// Build a [`SpriteModel`] directly from a decoded voxel-clip frame
157/// (VCL.2). The [`VoxelFrame`] dense-column layout is byte-for-byte the
158/// [`SpriteModel`] layout that [`build_sprite_model`] produces, so this is
159/// a field move — no per-column bucket-sort. `dirs` is the frame's
160/// surface-normal LUT indices (from [`DecodedClip::dirs`]), parallel to
161/// `frame.colors`.
162///
163/// # Panics
164/// In debug, if `dirs.len() != frame.colors.len()` or the field shapes
165/// don't match `dims` (the same invariants [`build_sprite_model`] upholds).
166#[must_use]
167pub fn sprite_model_from_voxel_frame(
168    frame: &VoxelFrame,
169    dirs: &[u32],
170    dims: [u32; 3],
171    pivot: [f32; 3],
172    voxel_world_size: f32,
173) -> SpriteModel {
174    sprite_model_from_voxel_frame_with_materials(frame, dirs, dims, pivot, voxel_world_size, &[])
175}
176
177/// Like [`sprite_model_from_voxel_frame`] but classifies each voxel into a
178/// per-voxel **material id** by colour (TV.3 mixed models) via `material_map`
179/// (`(rgb, material_id)` pairs). An empty map produces a model with no
180/// per-voxel materials (identical to [`sprite_model_from_voxel_frame`]).
181///
182/// # Panics
183/// As [`sprite_model_from_voxel_frame`].
184#[must_use]
185pub fn sprite_model_from_voxel_frame_with_materials(
186    frame: &VoxelFrame,
187    dirs: &[u32],
188    dims: [u32; 3],
189    pivot: [f32; 3],
190    voxel_world_size: f32,
191    material_map: &[(u32, u8)],
192) -> SpriteModel {
193    let occ_words_per_col = dims[2].div_ceil(32).max(1);
194    let cols = (dims[0] * dims[1]) as usize;
195    debug_assert_eq!(frame.occupancy.len(), cols * occ_words_per_col as usize);
196    debug_assert_eq!(frame.color_offsets.len(), cols + 1);
197    debug_assert_eq!(dirs.len(), frame.colors.len());
198    // Per-voxel materials are parallel to `colors` (popcount-rank order), so
199    // classify the frame's colour run directly — no re-index needed.
200    let materials: Vec<u8> = if material_map.is_empty() {
201        Vec::new()
202    } else {
203        frame
204            .colors
205            .iter()
206            .map(|&c| material_for_color(material_map, c))
207            .collect()
208    };
209    SpriteModel {
210        dims,
211        occ_words_per_col,
212        pivot,
213        occupancy: frame.occupancy.clone(),
214        colors: frame.colors.clone(),
215        dirs: dirs.to_vec(),
216        color_offsets: frame.color_offsets.clone(),
217        materials,
218        voxel_world_size,
219    }
220}
221
222/// Build the [`SpriteModel`] for frame `frame` of a decoded clip — the
223/// per-frame model uploaded into a flipbook chain (VCL.2).
224///
225/// # Panics
226/// If `frame` is out of range, or the frame fails the layout invariants.
227#[must_use]
228pub fn sprite_model_from_clip_frame(clip: &DecodedClip, frame: usize) -> SpriteModel {
229    sprite_model_from_clip_frame_with_materials(clip, frame, &[])
230}
231
232/// Like [`sprite_model_from_clip_frame`] but classifies the frame's voxels
233/// into per-voxel material ids by colour (TV.3 mixed models) via
234/// `material_map`. An empty map is identical to [`sprite_model_from_clip_frame`].
235///
236/// # Panics
237/// If `frame` is out of range, or the frame fails the layout invariants.
238#[must_use]
239pub fn sprite_model_from_clip_frame_with_materials(
240    clip: &DecodedClip,
241    frame: usize,
242    material_map: &[(u32, u8)],
243) -> SpriteModel {
244    sprite_model_from_voxel_frame_with_materials(
245        &clip.frames[frame],
246        &clip.dirs[frame],
247        clip.dims,
248        clip.pivot,
249        clip.voxel_world_size,
250        material_map,
251    )
252}
253
254/// Per-instance transform consumed by the model-DDA shader: the
255/// inverse model→world rotation (so a world ray can be brought into
256/// model-local space) plus the instance's world position. Stored as
257/// three padded columns for std140/std430 (`mat3x3` 16-byte columns).
258#[repr(C)]
259#[derive(Clone, Copy, Pod, Zeroable, Debug)]
260pub struct SpriteInstanceTransform {
261    /// Inverse of `[s | h | f]`, column-major, each column padded to
262    /// `vec4`. `inv_rot * v = c0*v.x + c1*v.y + c2*v.z`.
263    pub inv_rot: [[f32; 4]; 3],
264    /// Instance world position (the KV6 pivot maps here).
265    pub pos: [f32; 3],
266    _pad: f32,
267}
268
269impl SpriteInstanceTransform {
270    /// Build from a sprite pose. `s/h/f` are the model→world basis
271    /// columns; we invert them so the shader can map world→local.
272    #[must_use]
273    pub fn from_sprite(sprite: &Sprite) -> Self {
274        let inv = mat3_inverse([sprite.s, sprite.h, sprite.f]);
275        Self {
276            inv_rot: [
277                [inv[0][0], inv[0][1], inv[0][2], 0.0],
278                [inv[1][0], inv[1][1], inv[1][2], 0.0],
279                [inv[2][0], inv[2][1], inv[2][2], 0.0],
280            ],
281            pos: sprite.p,
282            _pad: 0.0,
283        }
284    }
285}
286
287/// A registry of sprite models. Instances reference a model by
288/// `model_id`, which is a **LOD chain** id: each chain holds one or
289/// more concrete mip levels (finest first; GPU.10.4), and the renderer
290/// picks the level per instance by distance. Identical KV6s are added
291/// once and shared by many instances. **Copy-on-modify**:
292/// [`Self::fork`] deep-copies a chain so edits to the fork leave the
293/// parent (and its instances) intact.
294#[derive(Debug, Clone, Default)]
295pub struct SpriteModelRegistry {
296    /// Concrete mip-level volumes (the GPU buffers concatenate these).
297    entries: Vec<SpriteModel>,
298    /// `chains[model_id]` = entry ids, finest (mip-0) first.
299    chains: Vec<Vec<u32>>,
300}
301
302impl SpriteModelRegistry {
303    #[must_use]
304    pub fn new() -> Self {
305        Self::default()
306    }
307
308    fn push_entry(&mut self, model: SpriteModel) -> u32 {
309        let id = self.entries.len() as u32;
310        self.entries.push(model);
311        id
312    }
313
314    /// Register a single-level (no-LOD) model; returns its `model_id`.
315    pub fn add(&mut self, model: SpriteModel) -> u32 {
316        let e = self.push_entry(model);
317        let id = self.chains.len() as u32;
318        self.chains.push(vec![e]);
319        id
320    }
321
322    /// Register a model with up to `max_levels` LOD mips (each a 2×
323    /// [`SpriteModel::downsample`] of the previous; stops early once a
324    /// level collapses to 1³). Returns its `model_id`.
325    pub fn add_lod(&mut self, model: SpriteModel, max_levels: u32) -> u32 {
326        let mut levels = vec![self.push_entry(model.clone())];
327        let mut cur = model;
328        for _ in 1..max_levels.max(1) {
329            if cur.dims == [1, 1, 1] {
330                break;
331            }
332            cur = cur.downsample();
333            levels.push(self.push_entry(cur.clone()));
334        }
335        let id = self.chains.len() as u32;
336        self.chains.push(levels);
337        id
338    }
339
340    /// Copy-on-modify: deep-copy every level of chain `parent` into new
341    /// entries + a new chain, and return its `model_id`. The fork owns
342    /// independent voxel data, so mutating it does not affect the
343    /// parent or any instance still pointing at it.
344    ///
345    /// # Panics
346    /// If `parent` is not a registered `model_id`.
347    pub fn fork(&mut self, parent: u32) -> u32 {
348        let src = self.chains[parent as usize].clone();
349        let levels: Vec<u32> = src
350            .iter()
351            .map(|&e| {
352                let copy = self.entries[e as usize].clone();
353                self.push_entry(copy)
354            })
355            .collect();
356        let id = self.chains.len() as u32;
357        self.chains.push(levels);
358        id
359    }
360
361    /// The finest (mip-0) model of chain `id`.
362    #[must_use]
363    pub fn model(&self, id: u32) -> &SpriteModel {
364        &self.entries[self.chains[id as usize][0] as usize]
365    }
366
367    /// Like [`Self::model`] but returns `None` for an out-of-range or
368    /// tombstoned (emptied) chain instead of panicking — the guarded form
369    /// for public primitives handed an arbitrary `chain_id`.
370    #[must_use]
371    pub fn model_checked(&self, id: u32) -> Option<&SpriteModel> {
372        let entry = *self.chains.get(id as usize)?.first()?;
373        self.entries.get(entry as usize)
374    }
375
376    /// Mutable access to the finest (mip-0) model for editing — the
377    /// copy-on-modify entry point (typically on a [`Self::fork`]).
378    /// After a *structural* edit (occupancy/dims), call
379    /// [`Self::rebuild_lod`] so the coarser mips match; a pure recolour
380    /// can use [`Self::recolor_chain`] instead.
381    pub fn model_mut(&mut self, id: u32) -> &mut SpriteModel {
382        let e = self.chains[id as usize][0] as usize;
383        &mut self.entries[e]
384    }
385
386    /// Recolour every LOD level of chain `id` (so a forked tint shows
387    /// at all distances).
388    pub fn recolor_chain(&mut self, id: u32, f: impl Fn(u32) -> u32 + Copy) {
389        for li in 0..self.chains[id as usize].len() {
390            let e = self.chains[id as usize][li] as usize;
391            self.entries[e].recolor(f);
392        }
393    }
394
395    /// Regenerate chain `id`'s coarser mip levels from its (possibly
396    /// just-edited) mip-0. Run after a structural edit via
397    /// [`Self::model_mut`] so the LOD ladder stays consistent. No-op
398    /// for a single-level (no-LOD) chain.
399    pub fn rebuild_lod(&mut self, id: u32) {
400        let levels = self.chains[id as usize].clone();
401        if levels.len() <= 1 {
402            return;
403        }
404        let mut cur = self.entries[levels[0] as usize].clone();
405        for &e in &levels[1..] {
406            cur = cur.downsample();
407            self.entries[e as usize] = cur.clone();
408        }
409    }
410
411    /// Free chain `chain_id`'s voxel data **in place**: replace each of
412    /// its LOD entries with [`SpriteModel::empty`] and clear the chain.
413    /// Entry ids and every other `model_id` are **preserved** (the chain
414    /// becomes empty, its entries become placeholders), so no id remap is
415    /// needed and the resident registry's entry alignment stays intact.
416    ///
417    /// This is safe to pair with the resident side because
418    /// [`SpriteRegistryResident::remove_model`] tombstones the same
419    /// entries (`dead[e]`) and [`compact`](SpriteRegistryResident::compact)
420    /// reads only live entries — so the resident never touches the empty
421    /// placeholders left here. Call `remove_model` (resident) **before**
422    /// this so those tombstones are set. No-op if `chain_id` is out of
423    /// range or already removed.
424    pub fn remove(&mut self, chain_id: u32) {
425        let Some(entries) = self.chains.get(chain_id as usize) else {
426            return;
427        };
428        // Clone the small id list so we can mutate `entries` while iterating.
429        let entries = entries.clone();
430        for e in entries {
431            self.entries[e as usize] = SpriteModel::empty();
432        }
433        self.chains[chain_id as usize] = Vec::new(); // tombstone (slot kept)
434    }
435
436    /// Whether `chain_id` is a live (registered, not [`removed`](Self::remove))
437    /// model. `false` for an out-of-range id or a tombstoned chain.
438    #[must_use]
439    pub fn is_live(&self, chain_id: u32) -> bool {
440        self.chains
441            .get(chain_id as usize)
442            .is_some_and(|c| !c.is_empty())
443    }
444
445    /// Number of LOD chains (distinct `model_id`s). Counts tombstoned
446    /// (removed) chains too — ids are never reused, so this is also the
447    /// next id that [`Self::add`] / [`Self::add_lod`] will mint.
448    #[must_use]
449    pub fn len(&self) -> usize {
450        self.chains.len()
451    }
452
453    #[must_use]
454    pub fn is_empty(&self) -> bool {
455        self.chains.is_empty()
456    }
457}
458
459impl SpriteModel {
460    /// An empty (zero-voxel, zero-extent) placeholder model. Used by
461    /// [`SpriteModelRegistry::remove`] to free a removed chain's voxel
462    /// data while keeping its entry slot, so ids stay stable. Carries no
463    /// occupancy/colours; `color_offsets` is the single-element prefix
464    /// `[0]` (`cols + 1` with `cols == 0`), keeping the structural
465    /// invariant intact for any code that inspects it.
466    #[must_use]
467    pub fn empty() -> Self {
468        Self {
469            dims: [0, 0, 0],
470            occ_words_per_col: 1,
471            pivot: [0.0, 0.0, 0.0],
472            occupancy: Vec::new(),
473            colors: Vec::new(),
474            dirs: Vec::new(),
475            color_offsets: vec![0],
476            materials: Vec::new(),
477            voxel_world_size: 1.0,
478        }
479    }
480
481    /// Recolour every voxel via `f(old_rgba) -> new_rgba`. Structure
482    /// (occupancy / offsets) is untouched, so this is a cheap in-place
483    /// edit — handy on a [`SpriteModelRegistry::fork`] to make a tinted
484    /// variant. For structural edits, mutate the public occupancy /
485    /// colours / dims directly (via `model_mut`) then rebuild the LOD.
486    pub fn recolor(&mut self, f: impl Fn(u32) -> u32) {
487        for c in &mut self.colors {
488            *c = f(*c);
489        }
490    }
491
492    /// GPU.12 — structural edit of a single voxel within the model's
493    /// existing bounds. `Some(rgba)` sets/replaces the voxel at
494    /// `(x, y, z)`; `None` clears it. Maintains the ascending-z colour
495    /// invariant by inserting/removing at the voxel's popcount rank and
496    /// shifting the affected columns' `color_offsets`. Returns `true`
497    /// if the model changed. Out-of-bounds coordinates are ignored
498    /// (returns `false`) — growing `dims` is a separate concern.
499    ///
500    /// After editing, call [`SpriteModelRegistry::rebuild_lod`] to
501    /// refresh coarser mips, then re-upload via `set_sprite_instances`.
502    pub fn set_voxel(&mut self, x: u32, y: u32, z: u32, color: Option<u32>) -> bool {
503        if x >= self.dims[0] || y >= self.dims[1] || z >= self.dims[2] {
504            return false;
505        }
506        let owpc = self.occ_words_per_col as usize;
507        let cols = (self.dims[0] * self.dims[1]) as usize;
508        let col = (x + y * self.dims[0]) as usize;
509        let base = col * owpc;
510        let zw = (z >> 5) as usize;
511        let zb = z & 31;
512
513        // Rank = solid voxels strictly below z in this column.
514        let mut rank = 0usize;
515        for w in 0..zw {
516            rank += self.occupancy[base + w].count_ones() as usize;
517        }
518        let below_mask = if zb > 0 { (1u32 << zb) - 1 } else { 0 };
519        rank += (self.occupancy[base + zw] & below_mask).count_ones() as usize;
520        let idx = self.color_offsets[col] as usize + rank;
521        let was_set = (self.occupancy[base + zw] >> zb) & 1 == 1;
522
523        if let Some(rgba) = color {
524            if was_set {
525                self.colors[idx] = rgba; // replace in place (keeps dir)
526            } else {
527                self.occupancy[base + zw] |= 1u32 << zb;
528                self.colors.insert(idx, rgba);
529                // No normal supplied by this API — default to dir 0 (the
530                // sole caller, the carve hotkey, only ever clears).
531                self.dirs.insert(idx, 0);
532                if !self.materials.is_empty() {
533                    self.materials.insert(idx, 0); // new voxel → opaque material
534                }
535                for c in &mut self.color_offsets[col + 1..=cols] {
536                    *c += 1;
537                }
538            }
539            true
540        } else {
541            if !was_set {
542                return false;
543            }
544            self.occupancy[base + zw] &= !(1u32 << zb);
545            self.colors.remove(idx);
546            self.dirs.remove(idx);
547            if !self.materials.is_empty() {
548                self.materials.remove(idx);
549            }
550            for c in &mut self.color_offsets[col + 1..=cols] {
551                *c -= 1;
552            }
553            true
554        }
555    }
556
557    /// Radius of a bounding sphere centred at the instance position
558    /// (the pivot maps there): the farthest bbox corner from the
559    /// pivot. Used for frustum culling. Assumes a unit basis; scaled
560    /// instances would multiply this by their max basis length.
561    #[must_use]
562    pub fn bound_radius(&self) -> f32 {
563        let mut r2 = 0.0_f32;
564        for &cx in &[0.0, self.dims[0] as f32] {
565            for &cy in &[0.0, self.dims[1] as f32] {
566                for &cz in &[0.0, self.dims[2] as f32] {
567                    let d = [cx - self.pivot[0], cy - self.pivot[1], cz - self.pivot[2]];
568                    r2 = r2.max(d[0] * d[0] + d[1] * d[1] + d[2] * d[2]);
569                }
570            }
571        }
572        r2.sqrt()
573    }
574
575    /// GPU.10.4 — 2× voxel downsample for the next LOD level. A coarse
576    /// voxel is solid if any of its 2×2×2 fine voxels is, coloured by
577    /// their per-channel average. Dims/pivot halve and
578    /// `voxel_world_size` doubles, so the coarse model occupies the
579    /// same world box at half the resolution (origin-corner aligned).
580    #[must_use]
581    #[allow(clippy::manual_checked_ops)] // `n > 0` guards 4 divisions, not one checked_div
582    pub fn downsample(&self) -> SpriteModel {
583        let [fx, fy, fz] = self.dims;
584        let fidx = |x: u32, y: u32, z: u32| (x + y * fx + z * fx * fy) as usize;
585
586        // Reconstruct dense fine voxels (solid flag + colour + normal + TV
587        // material).
588        let has_mats = !self.materials.is_empty();
589        let mut solid = vec![false; (fx * fy * fz) as usize];
590        let mut fine = vec![0u32; (fx * fy * fz) as usize];
591        let mut fine_dir = vec![0u32; (fx * fy * fz) as usize];
592        let mut fine_mat = vec![0u8; (fx * fy * fz) as usize];
593        for x in 0..fx {
594            for y in 0..fy {
595                let col = (x + y * fx) as usize;
596                let base = col * self.occ_words_per_col as usize;
597                let off = self.color_offsets[col] as usize;
598                let mut seen = 0usize;
599                for z in 0..fz {
600                    let w = base + (z >> 5) as usize;
601                    if (self.occupancy[w] >> (z & 31)) & 1 == 1 {
602                        fine[fidx(x, y, z)] = self.colors[off + seen];
603                        fine_dir[fidx(x, y, z)] = self.dirs[off + seen];
604                        if has_mats {
605                            fine_mat[fidx(x, y, z)] = self.materials[off + seen];
606                        }
607                        solid[fidx(x, y, z)] = true;
608                        seen += 1;
609                    }
610                }
611            }
612        }
613
614        let nx = fx.div_ceil(2).max(1);
615        let ny = fy.div_ceil(2).max(1);
616        let nz = fz.div_ceil(2).max(1);
617        let owpc = nz.div_ceil(32).max(1);
618        let cols = (nx * ny) as usize;
619        let mut occupancy = vec![0u32; cols * owpc as usize];
620        let mut color_offsets = vec![0u32; cols + 1];
621        let mut colors: Vec<u32> = Vec::new();
622        let mut dirs: Vec<u32> = Vec::new();
623        let mut materials: Vec<u8> = Vec::new();
624
625        // Emit in column-index order (`ccol = cx + cy*nx`), cy outer,
626        // so `color_offsets` is a monotonic prefix sum like build's.
627        for cy in 0..ny {
628            for cx in 0..nx {
629                let ccol = (cx + cy * nx) as usize;
630                color_offsets[ccol] = colors.len() as u32;
631                for cz in 0..nz {
632                    let (mut a, mut r, mut g, mut b, mut n) = (0u32, 0u32, 0u32, 0u32, 0u32);
633                    // Normals + materials don't average meaningfully — keep
634                    // the first solid child's `dir` / material for the coarse
635                    // voxel.
636                    let mut rep_dir = 0u32;
637                    let mut rep_mat = 0u8;
638                    for dz in 0..2 {
639                        for dy in 0..2 {
640                            for dx in 0..2 {
641                                let (x, y, z) = (2 * cx + dx, 2 * cy + dy, 2 * cz + dz);
642                                if x < fx && y < fy && z < fz && solid[fidx(x, y, z)] {
643                                    let c = fine[fidx(x, y, z)];
644                                    if n == 0 {
645                                        rep_dir = fine_dir[fidx(x, y, z)];
646                                        rep_mat = fine_mat[fidx(x, y, z)];
647                                    }
648                                    a += (c >> 24) & 0xff;
649                                    r += (c >> 16) & 0xff;
650                                    g += (c >> 8) & 0xff;
651                                    b += c & 0xff;
652                                    n += 1;
653                                }
654                            }
655                        }
656                    }
657                    if n > 0 {
658                        let avg = ((a / n) << 24) | ((r / n) << 16) | ((g / n) << 8) | (b / n);
659                        let base = ccol * owpc as usize + (cz >> 5) as usize;
660                        occupancy[base] |= 1u32 << (cz & 31);
661                        colors.push(avg);
662                        dirs.push(rep_dir);
663                        if has_mats {
664                            materials.push(rep_mat);
665                        }
666                    }
667                }
668            }
669        }
670        color_offsets[cols] = colors.len() as u32;
671
672        SpriteModel {
673            dims: [nx, ny, nz],
674            occ_words_per_col: owpc,
675            pivot: [
676                self.pivot[0] * 0.5,
677                self.pivot[1] * 0.5,
678                self.pivot[2] * 0.5,
679            ],
680            occupancy,
681            colors,
682            dirs,
683            color_offsets,
684            materials,
685            voxel_world_size: self.voxel_world_size * 2.0,
686        }
687    }
688}
689
690/// View frustum for CPU instance culling, in world space. Built each
691/// frame from the world camera. `half_w`/`half_h` are the tangents of
692/// the half-FOV (so the side planes are `|x| <= half_w * z` etc. in
693/// camera space).
694#[derive(Clone, Copy, Debug)]
695pub struct ViewFrustum {
696    pub pos: [f32; 3],
697    pub right: [f32; 3],
698    pub down: [f32; 3],
699    pub forward: [f32; 3],
700    pub half_w: f32,
701    pub half_h: f32,
702    pub far: f32,
703}
704
705/// CPU cull record: the GPU instance + its world bounding sphere.
706/// Not `Copy` — carries a boxed 256-entry `kv6colmul` table.
707#[derive(Clone)]
708struct CullInstance {
709    /// Instance transform + a placeholder `model_id`; the cull
710    /// overwrites `model_id` with the distance-chosen LOD entry.
711    gpu: SpriteInstanceGpu,
712    /// LOD chain this instance draws (the user-facing `model_id`).
713    chain_id: u32,
714    center: [f32; 3],
715    radius: f32,
716    /// voxlap `kv6colmul[256]` — per-surface-normal colour modulation
717    /// for this instance's pose + lighting. Defaults to identity
718    /// (`0x0100` in every channel lane → unshaded) until the facade sets
719    /// it via [`SpriteRegistryResident::set_instance_colmul`]. Packed
720    /// into the `colmul` GPU buffer (in visible order) each frame.
721    colmul: Box<[u64; 256]>,
722}
723
724/// Identity `kv6colmul` table: every channel lane = `0x0100`, so the
725/// shader's `(rgb[c] << 8) * 0x0100 >> 16 == rgb[c]` — i.e. no shading.
726fn identity_colmul() -> Box<[u64; 256]> {
727    const LANE: u64 = 0x0100;
728    let w = LANE | (LANE << 16) | (LANE << 32) | (LANE << 48);
729    Box::new([w; 256])
730}
731
732fn dot3(a: [f32; 3], b: [f32; 3]) -> f32 {
733    a[0] * b[0] + a[1] * b[1] + a[2] * b[2]
734}
735
736/// Build one CPU cull record from a user [`SpriteInstance`]: pack the
737/// transform, seed the bounding sphere from the chain's finest model, and
738/// start `colmul` at identity. Shared by the full
739/// [`SpriteRegistryResident::upload`] and the incremental
740/// [`SpriteRegistryResident::append_instances`].
741fn make_cull(registry: &SpriteModelRegistry, i: &SpriteInstance) -> CullInstance {
742    CullInstance {
743        gpu: SpriteInstanceGpu {
744            inv_rot0: i.transform.inv_rot[0],
745            inv_rot1: i.transform.inv_rot[1],
746            inv_rot2: i.transform.inv_rot[2],
747            pos: i.transform.pos,
748            model_id: i.model_id, // placeholder; cull rewrites per frame
749            material: u32::from(i.material),
750            alpha_mul: f32::from(i.alpha_mul) / 255.0,
751            _pad0: 0,
752            _pad1: 0,
753        },
754        chain_id: i.model_id,
755        center: i.transform.pos,
756        radius: registry.model(i.model_id).bound_radius(),
757        colmul: identity_colmul(),
758    }
759}
760
761/// Allocate the `instances` capacity buffer (`STORAGE | COPY_DST`) sized
762/// for `cap` records (≥1). Left uninitialised — `cull_bin_upload`
763/// rewrites it (offset 0) each frame, and `append_instances` seeds the
764/// live records after a grow.
765fn instances_buffer(device: &wgpu::Device, cap: u32) -> wgpu::Buffer {
766    device.create_buffer(&wgpu::BufferDescriptor {
767        label: Some("roxlap-gpu sprite_reg.instances"),
768        size: u64::from(cap.max(1)) * std::mem::size_of::<SpriteInstanceGpu>() as u64,
769        usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
770        mapped_at_creation: false,
771    })
772}
773
774/// One sprite instance: a model reference + world pose.
775#[derive(Debug, Clone, Copy)]
776pub struct SpriteInstance {
777    pub model_id: u32,
778    pub transform: SpriteInstanceTransform,
779    /// Voxel-material id (TV stage): indexes the renderer's global material
780    /// palette for this instance's opacity + blend mode. `0` (the default)
781    /// is opaque, so an unset instance renders unchanged.
782    pub material: u8,
783    /// Per-instance alpha multiplier (TV stage), `0..=255` (`255` =
784    /// unscaled, the default).
785    pub alpha_mul: u8,
786}
787
788impl SpriteInstance {
789    /// A model reference + pose with the default opaque material
790    /// (`material = 0`, `alpha_mul = 255`).
791    #[must_use]
792    pub fn new(model_id: u32, transform: SpriteInstanceTransform) -> Self {
793        Self {
794            model_id,
795            transform,
796            material: 0,
797            alpha_mul: 255,
798        }
799    }
800}
801
802/// GPU per-model metadata: where this model's data starts in the
803/// shared registry buffers + its dims/pivot. Mirrors `ModelMeta` in
804/// the shader (std430, 48 bytes).
805#[repr(C)]
806#[derive(Clone, Copy, Pod, Zeroable, Debug)]
807struct SpriteModelMeta {
808    occupancy_offset: u32,
809    colors_offset: u32,
810    color_offsets_offset: u32,
811    occ_words_per_col: u32,
812    dims: [u32; 3],
813    /// TV.3 — 1 if this model has per-voxel materials (`materials_vox` is
814    /// populated for it); 0 ⇒ use the instance's uniform material.
815    has_vox_materials: u32,
816    pivot: [f32; 3],
817    /// GPU.10.4 — world size of one voxel of this (mip) entry.
818    voxel_world_size: f32,
819}
820
821/// GPU per-instance record. Mirrors `Instance` in the shader (std430,
822/// 80 bytes): inverse rotation columns + position + model id + the TV
823/// material id and per-instance alpha multiplier.
824#[repr(C)]
825#[derive(Clone, Copy, Pod, Zeroable, Debug)]
826struct SpriteInstanceGpu {
827    inv_rot0: [f32; 4],
828    inv_rot1: [f32; 4],
829    inv_rot2: [f32; 4],
830    pos: [f32; 3],
831    model_id: u32,
832    /// TV: material id into the global palette (binding 12).
833    material: u32,
834    /// TV: per-instance alpha multiplier, normalised to `0..=1`.
835    alpha_mul: f32,
836    _pad0: u32,
837    _pad1: u32,
838}
839
840/// Invert a 3×3 matrix given as basis columns `[c0, c1, c2]`,
841/// returning the inverse as columns. For an orthonormal basis this is
842/// the transpose; the general path covers rotation + non-unit scale.
843#[must_use]
844fn mat3_inverse(cols: [[f32; 3]; 3]) -> [[f32; 3]; 3] {
845    let [a, b, c] = cols; // columns
846                          // Determinant via scalar triple product a · (b × c).
847    let cross = |u: [f32; 3], v: [f32; 3]| {
848        [
849            u[1] * v[2] - u[2] * v[1],
850            u[2] * v[0] - u[0] * v[2],
851            u[0] * v[1] - u[1] * v[0],
852        ]
853    };
854    let bc = cross(b, c);
855    let ca = cross(c, a);
856    let ab = cross(a, b);
857    let det = a[0] * bc[0] + a[1] * bc[1] + a[2] * bc[2];
858    let inv_det = if det.abs() < 1e-12 { 0.0 } else { 1.0 / det };
859    // Inverse rows are (b×c, c×a, a×b)/det; return as columns of the
860    // inverse, i.e. transpose of those rows.
861    [
862        [bc[0] * inv_det, ca[0] * inv_det, ab[0] * inv_det],
863        [bc[1] * inv_det, ca[1] * inv_det, ab[1] * inv_det],
864        [bc[2] * inv_det, ca[2] * inv_det, ab[2] * inv_det],
865    ]
866}
867
868/// GPU-resident registry + instances: every model's occupancy /
869/// colours / offsets concatenated into shared storage buffers, a
870/// per-model metadata table, and a capacity-sized instance buffer
871/// rewritten each frame with the frustum-visible subset (GPU.10.2).
872/// One bind group serves all models (same approach as the multi-grid
873/// scene).
874pub struct SpriteRegistryResident {
875    pub occupancy: wgpu::Buffer,
876    pub colors: wgpu::Buffer,
877    /// Per-voxel surface-normal index, concatenated across models in the
878    /// same layout as [`colors`](Self::colors). The shader indexes the
879    /// per-instance `kv6colmul` table by it.
880    pub dirs: wgpu::Buffer,
881    /// Per-voxel material id (TV.3), same layout as [`colors`](Self::colors)
882    /// (one u32 per voxel). `0` for models without per-voxel materials; the
883    /// per-model `has_vox_materials` flag in `model_meta` says whether to use
884    /// it (else the shader falls back to the instance's uniform material).
885    pub materials_vox: wgpu::Buffer,
886    pub color_offsets: wgpu::Buffer,
887    pub model_meta: wgpu::Buffer,
888    /// Holds up to `instance_capacity` instances; the visible subset
889    /// is packed into `[0, count)` each frame by [`Self::cull_bin_upload`].
890    pub instances: wgpu::Buffer,
891    pub instance_capacity: u32,
892    /// Per-visible-instance `kv6colmul[256]` tables, packed in the same
893    /// order as the `instances` buffer each frame (two u32 per u64
894    /// entry: lanes 0|1 then 2|3). Sized `instance_capacity * 256 * 2`
895    /// u32; rewritten by [`Self::cull_bin_upload`].
896    pub colmul: wgpu::Buffer,
897    colmul_cap: u32,
898    /// GPU.10.3 — per-tile `(offset, count)` into `tile_instances`,
899    /// flat `2 * tiles_x * tiles_y` u32s. Grown to fit the screen.
900    pub tile_ranges: wgpu::Buffer,
901    tile_ranges_cap: u32,
902    /// GPU.10.3 — flat list of visible-instance indices grouped by
903    /// tile. Grown to fit the per-frame total.
904    pub tile_instances: wgpu::Buffer,
905    tile_instances_cap: u32,
906    /// CPU cull records (full set), with precomputed bounding spheres.
907    cull: Vec<CullInstance>,
908    /// GPU.10.4 — LOD chains: `chains[chain_id]` = entry ids, finest
909    /// first. The cull picks a level by distance and writes its entry
910    /// id into the packed instance's `model_id`.
911    chains: Vec<Vec<u32>>,
912    /// GPU.12 incremental — CPU mirror of the GPU `model_meta` table, one
913    /// per concrete entry. [`Self::update_model`] reads the fixed
914    /// occupancy/color_offsets bases from here and rewrites the changed
915    /// `colors_offset` on a relocation.
916    meta: Vec<SpriteModelMeta>,
917    /// GPU.12 incremental — per-entry placement of `colors`/`dirs` in the
918    /// shared buffers (drives both; same offsets/ranks). Lets an edit
919    /// re-upload one model's data without touching the others.
920    colors_alloc: ColorsAllocator,
921    /// Per-entry word length of the dims-fixed `occupancy` and
922    /// `color_offsets` arrays, kept so [`Self::update_model`] can assert a
923    /// carve never changed dims (which would invalidate the in-place
924    /// writes — growing dims is out of scope, handled by a full re-upload).
925    occ_lens: Vec<u32>,
926    coloff_lens: Vec<u32>,
927    /// Used / allocated words of the tightly-concatenated `occupancy`
928    /// buffer. `add_model` bump-appends at `occ_used`; when it would pass
929    /// `occ_cap` the buffer is grown (with slack) and rebuilt from the
930    /// registry. (`colors`/`dirs` track theirs in [`ColorsAllocator`].)
931    occ_used: u32,
932    occ_cap: u32,
933    /// Used / allocated words of the tightly-concatenated `color_offsets`
934    /// buffer — same growth scheme as `occ_*`.
935    coloff_used: u32,
936    coloff_cap: u32,
937    /// Allocated record count of the `model_meta` buffer; `add_model`
938    /// grows it (with slack) when the entry count passes it.
939    meta_cap: u32,
940    /// Per-entry tombstone: `true` once its model was removed
941    /// ([`Self::remove_model`]). Dead entries keep their `meta` slot (so
942    /// entry ids — and the caller's `chain_id`s — stay stable) but their
943    /// colours are freed for reuse and they contribute nothing to a
944    /// repack / [`Self::compact`]. Parallel to `meta`.
945    dead: Vec<bool>,
946}
947
948/// Which tightly-concatenated registry buffer [`SpriteRegistryResident::
949/// sync_concat`] is operating on.
950#[derive(Clone, Copy)]
951enum ConcatBuf {
952    Occupancy,
953    ColorOffsets,
954}
955
956/// The model's source array for a given [`ConcatBuf`] — a free fn (not a
957/// closure) so the returned borrow keeps `m`'s lifetime.
958fn concat_data(m: &SpriteModel, which: ConcatBuf) -> &[u32] {
959    match which {
960        ConcatBuf::Occupancy => &m.occupancy,
961        ConcatBuf::ColorOffsets => &m.color_offsets,
962    }
963}
964
965impl SpriteRegistryResident {
966    /// Concatenate `registry`'s models into shared buffers and prepare
967    /// `instances` for per-frame culling. Model-relative indices stay
968    /// as built; the shader adds each model's base offset from the
969    /// metadata table.
970    #[must_use]
971    pub fn upload(
972        device: &wgpu::Device,
973        registry: &SpriteModelRegistry,
974        instances: &[SpriteInstance],
975    ) -> Self {
976        // `occupancy` + `color_offsets` are dims-fixed → tightly
977        // concatenated (never grow on a carve). `colors` + `dirs` are
978        // variable → laid out by the suballocator with per-slot slack so
979        // an incremental edit can rewrite one model in place.
980        let entry_lens: Vec<u32> = registry
981            .entries
982            .iter()
983            .map(|m| m.colors.len() as u32)
984            .collect();
985        let colors_alloc = ColorsAllocator::new(&entry_lens);
986        let cap_total = colors_alloc.cap_total();
987
988        let mut all_occ: Vec<u32> = Vec::new();
989        let mut all_offsets: Vec<u32> = Vec::new();
990        let mut all_colors: Vec<u32> = vec![0; cap_total as usize];
991        let mut all_dirs: Vec<u32> = vec![0; cap_total as usize];
992        let mut all_materials: Vec<u32> = vec![0; cap_total as usize];
993        let mut meta: Vec<SpriteModelMeta> = Vec::with_capacity(registry.entries.len());
994        let mut occ_lens: Vec<u32> = Vec::with_capacity(registry.entries.len());
995        let mut coloff_lens: Vec<u32> = Vec::with_capacity(registry.entries.len());
996
997        // One meta + placed data per concrete (mip-level) entry.
998        for (e, m) in registry.entries.iter().enumerate() {
999            let slot = colors_alloc.slot(e);
1000            meta.push(SpriteModelMeta {
1001                occupancy_offset: all_occ.len() as u32,
1002                colors_offset: slot.off,
1003                color_offsets_offset: all_offsets.len() as u32,
1004                occ_words_per_col: m.occ_words_per_col,
1005                dims: m.dims,
1006                has_vox_materials: u32::from(!m.materials.is_empty()),
1007                pivot: m.pivot,
1008                voxel_world_size: m.voxel_world_size,
1009            });
1010            occ_lens.push(m.occupancy.len() as u32);
1011            coloff_lens.push(m.color_offsets.len() as u32);
1012            all_occ.extend_from_slice(&m.occupancy);
1013            all_offsets.extend_from_slice(&m.color_offsets);
1014            let off = slot.off as usize;
1015            all_colors[off..off + m.colors.len()].copy_from_slice(&m.colors);
1016            all_dirs[off..off + m.dirs.len()].copy_from_slice(&m.dirs);
1017            for (i, &mat) in m.materials.iter().enumerate() {
1018                all_materials[off + i] = u32::from(mat);
1019            }
1020        }
1021
1022        // Per-instance cull records: sphere centred at the instance
1023        // position, radius from the chain's finest (mip-0) model.
1024        // `colmul` starts at identity (unshaded) until the facade sets
1025        // per-instance lighting via `set_instance_colmul`.
1026        let cull: Vec<CullInstance> = instances.iter().map(|i| make_cull(registry, i)).collect();
1027
1028        // Capacity buffer (COPY_DST so cull can rewrite it each frame),
1029        // seeded with the full set so frame 0 is valid pre-cull.
1030        let seed: Vec<SpriteInstanceGpu> = cull.iter().map(|c| c.gpu).collect();
1031        let instances_buf = {
1032            use wgpu::util::DeviceExt;
1033            let one = [SpriteInstanceGpu::zeroed()];
1034            let src: &[SpriteInstanceGpu] = if seed.is_empty() { &one } else { &seed };
1035            device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
1036                label: Some("roxlap-gpu sprite_reg.instances"),
1037                contents: bytemuck::cast_slice(src),
1038                usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
1039            })
1040        };
1041
1042        let tile_ranges = storage_dst_u32(device, "roxlap-gpu sprite_reg.tile_ranges", 1);
1043        let tile_instances = storage_dst_u32(device, "roxlap-gpu sprite_reg.tile_instances", 1);
1044        // colmul: 256 entries × 2 u32 per visible instance. Sized to the
1045        // full instance set (worst case all visible); rewritten per frame.
1046        let colmul_cap = (cull.len() as u32).max(1) * 256 * 2;
1047        let colmul = storage_dst_u32(device, "roxlap-gpu sprite_reg.colmul", colmul_cap);
1048        Self {
1049            occupancy: storage_dst_u32_cap(
1050                device,
1051                "roxlap-gpu sprite_reg.occupancy",
1052                &all_occ,
1053                all_occ.len() as u32,
1054            ),
1055            colors: storage_dst_u32_cap(
1056                device,
1057                "roxlap-gpu sprite_reg.colors",
1058                &all_colors,
1059                cap_total,
1060            ),
1061            dirs: storage_dst_u32_cap(device, "roxlap-gpu sprite_reg.dirs", &all_dirs, cap_total),
1062            materials_vox: storage_dst_u32_cap(
1063                device,
1064                "roxlap-gpu sprite_reg.materials_vox",
1065                &all_materials,
1066                cap_total,
1067            ),
1068            color_offsets: storage_dst_u32_cap(
1069                device,
1070                "roxlap-gpu sprite_reg.color_offsets",
1071                &all_offsets,
1072                all_offsets.len() as u32,
1073            ),
1074            model_meta: storage_dst_pod(device, "roxlap-gpu sprite_reg.model_meta", &meta),
1075            instances: instances_buf,
1076            instance_capacity: cull.len() as u32,
1077            colmul,
1078            colmul_cap,
1079            tile_ranges,
1080            tile_ranges_cap: 1,
1081            tile_instances,
1082            tile_instances_cap: 1,
1083            cull,
1084            chains: registry.chains.clone(),
1085            occ_used: all_occ.len() as u32,
1086            occ_cap: all_occ.len() as u32,
1087            coloff_used: all_offsets.len() as u32,
1088            coloff_cap: all_offsets.len() as u32,
1089            meta_cap: meta.len() as u32,
1090            dead: vec![false; meta.len()],
1091            meta,
1092            colors_alloc,
1093            occ_lens,
1094            coloff_lens,
1095        }
1096    }
1097
1098    /// Number of resident instances (the cull set length).
1099    #[must_use]
1100    pub fn instance_count(&self) -> usize {
1101        self.cull.len()
1102    }
1103
1104    /// Append new instances **without** re-uploading any model volume —
1105    /// the incremental counterpart to [`Self::upload`], for streaming
1106    /// spawns (asteroids, projectiles, …). Returns the index of the first
1107    /// appended instance; the block occupies `[base, base + N)`.
1108    ///
1109    /// The model volumes are untouched, so every appended instance must
1110    /// reference a `model_id` (LOD chain) that was already present in the
1111    /// `registry` passed to [`Self::upload`]. Registering a *new* model
1112    /// still requires a full [`Self::upload`] (its voxels must be laid
1113    /// into the shared buffers). `registry` here is only read for the new
1114    /// instances' bound-sphere radii and must be the resident one.
1115    ///
1116    /// The `instances` GPU buffer is only *grown* here (power-of-two,
1117    /// amortised O(1)); its contents are **not** written. [`Self::
1118    /// cull_bin_upload`] rewrites the whole visible range from `cull` every
1119    /// frame before the sprite pass reads it — exactly as for the static
1120    /// instances — so appending only needs to extend `cull` and ensure
1121    /// capacity. Writing the buffer here too caused a mid-frame
1122    /// write-while-in-flight hazard on some drivers (a stray full-screen
1123    /// flash on append). `colmul` likewise grows lazily in
1124    /// `cull_bin_upload`. After a removal the capacity is not shrunk.
1125    pub fn append_instances(
1126        &mut self,
1127        device: &wgpu::Device,
1128        registry: &SpriteModelRegistry,
1129        instances: &[SpriteInstance],
1130    ) -> u32 {
1131        let base = self.cull.len() as u32;
1132        if instances.is_empty() {
1133            return base;
1134        }
1135        for i in instances {
1136            debug_assert!(
1137                (i.model_id as usize) < self.chains.len(),
1138                "append_instances: model_id {} not resident (run upload to register new models)",
1139                i.model_id
1140            );
1141            self.cull.push(make_cull(registry, i));
1142        }
1143        let need = self.cull.len() as u32;
1144        if need > self.instance_capacity {
1145            // Grow power-of-two and recreate the buffer (the next frame's
1146            // bind group picks up the new handle). No seed write — the
1147            // per-frame cull_bin_upload populates it.
1148            self.instance_capacity = need.next_power_of_two();
1149            self.instances = instances_buffer(device, self.instance_capacity);
1150        }
1151        base
1152    }
1153
1154    /// Remove the instance at `index` by swap-remove — O(1), no GPU work
1155    /// (the next [`Self::cull_bin_upload`] repacks the visible set from
1156    /// the shrunk cull list). Capacity is retained for reuse.
1157    ///
1158    /// Returns `Some(old_last)` when a different instance was moved into
1159    /// `index` to fill the hole (its index changed from `old_last` to
1160    /// `index` — callers holding instance handles must fix up that one),
1161    /// or `None` if `index` was the last element or out of range. Because
1162    /// this reorders, any [`Self::set_instance_colmul`] table set by
1163    /// position should be re-applied after a removal.
1164    pub fn remove_instance(&mut self, index: usize) -> Option<usize> {
1165        if index >= self.cull.len() {
1166            return None;
1167        }
1168        let last = self.cull.len() - 1;
1169        self.cull.swap_remove(index);
1170        (index != last).then_some(last)
1171    }
1172
1173    /// Set the per-instance `kv6colmul[256]` lighting tables (voxlap's
1174    /// `update_reflects` output), in the same order/length as the
1175    /// instances passed to [`Self::upload`]. The next
1176    /// [`Self::cull_bin_upload`] packs the visible subset to the GPU.
1177    /// Instances beyond `tables.len()` keep their previous tables.
1178    pub fn set_instance_colmul(&mut self, tables: &[[u64; 256]]) {
1179        for (ci, t) in self.cull.iter_mut().zip(tables) {
1180            ci.colmul.copy_from_slice(t);
1181        }
1182    }
1183
1184    /// Refresh instance poses in place from `instances` — for animated
1185    /// sprites (e.g. KFA limbs re-posed each frame) — **without** any
1186    /// model-volume re-upload. `instances` must match the set passed to
1187    /// [`Self::upload`] in length + order; each keeps its `model_id`
1188    /// (LOD chain) so only the transform + cull centre change. No GPU
1189    /// write happens here: the next [`Self::cull_bin_upload`] re-uploads
1190    /// the packed visible subset, as it already does every frame.
1191    pub fn update_transforms(&mut self, instances: &[SpriteInstance]) {
1192        debug_assert_eq!(
1193            instances.len(),
1194            self.cull.len(),
1195            "update_transforms instance count must match upload"
1196        );
1197        for (ci, inst) in self.cull.iter_mut().zip(instances) {
1198            ci.gpu.inv_rot0 = inst.transform.inv_rot[0];
1199            ci.gpu.inv_rot1 = inst.transform.inv_rot[1];
1200            ci.gpu.inv_rot2 = inst.transform.inv_rot[2];
1201            ci.gpu.pos = inst.transform.pos;
1202            // TV: material id + alpha multiplier ride the same coalesced
1203            // update as the pose (set via the facade's per-instance setters).
1204            ci.gpu.material = u32::from(inst.material);
1205            ci.gpu.alpha_mul = f32::from(inst.alpha_mul) / 255.0;
1206            // Bounding sphere follows the pivot; radius/chain unchanged.
1207            ci.center = inst.transform.pos;
1208        }
1209    }
1210
1211    /// Repoint instance `idx` at a different LOD chain — the per-frame
1212    /// **flipbook** step for animated voxel clips (VCL.2). The instance's
1213    /// transform / colmul are untouched; only which model's volume it
1214    /// draws changes. The new chain's volume must already be resident
1215    /// (uploaded via [`Self::add_model`] / [`Self::upload`]); `registry`
1216    /// is the one those uploads used (so the bounding radius reseeds from
1217    /// the new model). Like [`Self::update_transforms`], this is a CPU-side
1218    /// rewrite — the next [`Self::cull_bin_upload`] re-uploads the packed
1219    /// visible subset, so it costs nothing extra on the GPU. No-op if `idx`
1220    /// is out of range.
1221    ///
1222    /// All frames of a clip share the same `dims`, so a flipbook swap
1223    /// leaves the bounding radius unchanged; reseeding it anyway keeps the
1224    /// method correct for arbitrary chain swaps.
1225    pub fn set_instance_model(
1226        &mut self,
1227        registry: &SpriteModelRegistry,
1228        idx: usize,
1229        chain_id: u32,
1230    ) {
1231        // Guard `chain_id` (the `cull.get_mut` below only covers `idx`): a
1232        // public caller could pass an out-of-range / tombstoned chain, which
1233        // `registry.model` would index-panic on.
1234        let Some(radius) = registry
1235            .model_checked(chain_id)
1236            .map(SpriteModel::bound_radius)
1237        else {
1238            return;
1239        };
1240        let Some(ci) = self.cull.get_mut(idx) else {
1241            return;
1242        };
1243        ci.chain_id = chain_id;
1244        ci.gpu.model_id = chain_id; // placeholder; cull rewrites to the LOD entry
1245        ci.radius = radius;
1246    }
1247
1248    /// GPU.12 incremental — re-upload only the entries of LOD chain
1249    /// `chain_id` after an in-place edit (carve / recolour) of its model,
1250    /// **without** rebuilding the whole registry. `registry` must be the
1251    /// same registry uploaded (same entry ids), with chain `chain_id`'s
1252    /// entries already edited (`model_mut` + `rebuild_lod`).
1253    ///
1254    /// For each entry: occupancy + color_offsets are dims-fixed, so they
1255    /// are written in place; colors + dirs (variable, parallel) go through
1256    /// the suballocator — written in place when they fit the slack,
1257    /// relocated (with a `model_meta` rewrite) when they outgrow it, and
1258    /// only when the buffer tail overflows are colors/dirs grown + the
1259    /// whole registry repacked. Instances / cull / colmul are untouched
1260    /// (a carve never moves an instance or grows its bounds) — that is the
1261    /// win over [`Self::upload`].
1262    ///
1263    /// # Panics (debug)
1264    /// If an entry's dims changed (occupancy / color_offsets length), which
1265    /// the in-place path can't absorb — growing dims needs a full
1266    /// re-upload via [`Self::upload`].
1267    pub fn update_model(
1268        &mut self,
1269        device: &wgpu::Device,
1270        queue: &wgpu::Queue,
1271        registry: &SpriteModelRegistry,
1272        chain_id: u32,
1273    ) {
1274        let entries = self.chains[chain_id as usize].clone();
1275        let mut grew = false;
1276        for &e in &entries {
1277            let e = e as usize;
1278            let m = &registry.entries[e];
1279
1280            // Dims-fixed arrays: assert unchanged, then write in place.
1281            debug_assert_eq!(
1282                m.occupancy.len() as u32,
1283                self.occ_lens[e],
1284                "update_model: entry {e} occupancy length changed (dims grew?)"
1285            );
1286            debug_assert_eq!(
1287                m.color_offsets.len() as u32,
1288                self.coloff_lens[e],
1289                "update_model: entry {e} color_offsets length changed (dims grew?)"
1290            );
1291            queue.write_buffer(
1292                &self.occupancy,
1293                u64::from(self.meta[e].occupancy_offset) * 4,
1294                bytemuck::cast_slice(&m.occupancy),
1295            );
1296            queue.write_buffer(
1297                &self.color_offsets,
1298                u64::from(self.meta[e].color_offsets_offset) * 4,
1299                bytemuck::cast_slice(&m.color_offsets),
1300            );
1301
1302            // Variable colors/dirs via the suballocator.
1303            let new_len = m.colors.len() as u32;
1304            match self.colors_alloc.place(e, new_len) {
1305                Some(off) => {
1306                    queue.write_buffer(
1307                        &self.colors,
1308                        u64::from(off) * 4,
1309                        bytemuck::cast_slice(&m.colors),
1310                    );
1311                    queue.write_buffer(
1312                        &self.dirs,
1313                        u64::from(off) * 4,
1314                        bytemuck::cast_slice(&m.dirs),
1315                    );
1316                    let mats: Vec<u32> = m.materials.iter().map(|&x| u32::from(x)).collect();
1317                    queue.write_buffer(
1318                        &self.materials_vox,
1319                        u64::from(off) * 4,
1320                        bytemuck::cast_slice(&mats),
1321                    );
1322                    if self.meta[e].colors_offset != off {
1323                        // Relocated — rewrite this entry's meta record.
1324                        self.meta[e].colors_offset = off;
1325                        queue.write_buffer(
1326                            &self.model_meta,
1327                            (e * std::mem::size_of::<SpriteModelMeta>()) as u64,
1328                            bytemuck::bytes_of(&self.meta[e]),
1329                        );
1330                    }
1331                }
1332                None => grew = true,
1333            }
1334        }
1335
1336        // Buffer overflow on at least one entry → grow colors/dirs and
1337        // repack the WHOLE registry (rare; offsets for every entry move).
1338        if grew {
1339            self.grow_and_repack(device, queue, registry);
1340        }
1341    }
1342
1343    /// Grow the `colors`/`dirs` buffers and repack every entry compactly
1344    /// (with fresh slack) when an [`Self::update_model`] edit overflowed
1345    /// the buffer tail. Recreates both buffers (the next frame's bind
1346    /// group picks up the new handles) and rewrites every `model_meta`
1347    /// `colors_offset`. O(registry) but rare — logged so a growth burst
1348    /// is visible.
1349    fn grow_and_repack(
1350        &mut self,
1351        device: &wgpu::Device,
1352        queue: &wgpu::Queue,
1353        registry: &SpriteModelRegistry,
1354    ) {
1355        self.repack_colors_dirs(device, registry);
1356        // Every entry's colors_offset moved → rewrite the whole meta table.
1357        queue.write_buffer(&self.model_meta, 0, bytemuck::cast_slice(&self.meta));
1358    }
1359
1360    /// Repack `colors`/`dirs` compactly (with fresh slack) from the full
1361    /// `registry`, recreating both buffers and updating every CPU
1362    /// `meta[e].colors_offset`. Does **not** touch the GPU `model_meta`
1363    /// buffer — the caller writes it ([`Self::grow_and_repack`] writes the
1364    /// whole table; [`Self::add_model`] writes it once after all entries
1365    /// are placed). O(registry) but rare — logged so a growth burst is
1366    /// visible.
1367    fn repack_colors_dirs(&mut self, device: &wgpu::Device, registry: &SpriteModelRegistry) {
1368        // Dead (removed) entries collapse to 0 length so they reclaim no
1369        // space; live entries keep their colours.
1370        let new_lens: Vec<u32> = registry
1371            .entries
1372            .iter()
1373            .enumerate()
1374            .map(|(e, m)| {
1375                if self.dead[e] {
1376                    0
1377                } else {
1378                    m.colors.len() as u32
1379                }
1380            })
1381            .collect();
1382        self.colors_alloc.repack(&new_lens);
1383        let cap_total = self.colors_alloc.cap_total();
1384
1385        let mut all_colors = vec![0u32; cap_total as usize];
1386        let mut all_dirs = vec![0u32; cap_total as usize];
1387        let mut all_materials = vec![0u32; cap_total as usize];
1388        for (e, m) in registry.entries.iter().enumerate() {
1389            if self.dead[e] {
1390                self.meta[e].colors_offset = 0;
1391                continue;
1392            }
1393            let off = self.colors_alloc.slot(e).off as usize;
1394            all_colors[off..off + m.colors.len()].copy_from_slice(&m.colors);
1395            all_dirs[off..off + m.dirs.len()].copy_from_slice(&m.dirs);
1396            for (i, &mat) in m.materials.iter().enumerate() {
1397                all_materials[off + i] = u32::from(mat);
1398            }
1399            self.meta[e].colors_offset = off as u32;
1400        }
1401        self.colors = storage_dst_u32_cap(
1402            device,
1403            "roxlap-gpu sprite_reg.colors",
1404            &all_colors,
1405            cap_total,
1406        );
1407        self.dirs = storage_dst_u32_cap(device, "roxlap-gpu sprite_reg.dirs", &all_dirs, cap_total);
1408        self.materials_vox = storage_dst_u32_cap(
1409            device,
1410            "roxlap-gpu sprite_reg.materials_vox",
1411            &all_materials,
1412            cap_total,
1413        );
1414        eprintln!(
1415            "roxlap-gpu: sprite registry colors/dirs/materials grew + repacked to {cap_total} words"
1416        );
1417    }
1418
1419    /// Append a new model (its full LOD chain) to the resident registry
1420    /// **without** re-uploading the existing models' volumes — the
1421    /// incremental counterpart to a full [`Self::upload`], for streaming
1422    /// in new geometry (unique asteroids, generated meshes).
1423    ///
1424    /// Contract (mirrors [`Self::update_model`]): the caller owns the
1425    /// `SpriteModelRegistry`, has just appended this chain to it (e.g. via
1426    /// [`SpriteModelRegistry::add_lod`]), and passes the resulting
1427    /// `chain_id`. The chain's entries must be the registry's newest (ids
1428    /// `>= ` the resident entry count) — entries are append-only.
1429    ///
1430    /// The large `colors`/`dirs`/`occupancy`/`color_offsets` buffers carry
1431    /// slack and bump-append the new entries in place; a buffer that
1432    /// overflows is grown (with slack) and rebuilt once from the registry
1433    /// (amortised O(1) per add). The small `model_meta` table is rewritten
1434    /// each call. After this, [`Self::append_instances`] can reference the
1435    /// new `chain_id`.
1436    pub fn add_model(
1437        &mut self,
1438        device: &wgpu::Device,
1439        queue: &wgpu::Queue,
1440        registry: &SpriteModelRegistry,
1441        chain_id: u32,
1442    ) {
1443        let entries = registry.chains[chain_id as usize].clone();
1444        debug_assert_eq!(
1445            chain_id as usize,
1446            self.chains.len(),
1447            "add_model: chains must be appended in order"
1448        );
1449
1450        // CPU bookkeeping: assign each new entry a tight occ/coloff offset
1451        // and an allocator slot for colors/dirs. `need_colors_grow` marks
1452        // a slot that didn't fit → a colors/dirs repack below.
1453        let mut need_colors_grow = false;
1454        for &e in &entries {
1455            let e = e as usize;
1456            debug_assert_eq!(
1457                e,
1458                self.meta.len(),
1459                "add_model: entries must be appended in order"
1460            );
1461            let m = &registry.entries[e];
1462            let occ_off = self.occ_used;
1463            let coloff_off = self.coloff_used;
1464            self.occ_used += m.occupancy.len() as u32;
1465            self.coloff_used += m.color_offsets.len() as u32;
1466            let colors_off = match self.colors_alloc.push(m.colors.len() as u32) {
1467                Some(off) => off,
1468                None => {
1469                    need_colors_grow = true;
1470                    0 // placeholder; repack assigns the real offset
1471                }
1472            };
1473            self.meta.push(SpriteModelMeta {
1474                occupancy_offset: occ_off,
1475                colors_offset: colors_off,
1476                color_offsets_offset: coloff_off,
1477                occ_words_per_col: m.occ_words_per_col,
1478                dims: m.dims,
1479                has_vox_materials: u32::from(!m.materials.is_empty()),
1480                pivot: m.pivot,
1481                voxel_world_size: m.voxel_world_size,
1482            });
1483            self.occ_lens.push(m.occupancy.len() as u32);
1484            self.coloff_lens.push(m.color_offsets.len() as u32);
1485            self.dead.push(false);
1486        }
1487        self.chains.push(entries.clone());
1488
1489        // occupancy + color_offsets: grow+rebuild on overflow, else write
1490        // the new tails in place.
1491        self.sync_concat(device, queue, registry, &entries, ConcatBuf::Occupancy);
1492        self.sync_concat(device, queue, registry, &entries, ConcatBuf::ColorOffsets);
1493
1494        // colors/dirs: repack on overflow (rebuilds both + every CPU
1495        // colors_offset), else write the new entries at their slots.
1496        if need_colors_grow {
1497            self.repack_colors_dirs(device, registry);
1498        } else {
1499            for &e in &entries {
1500                let e = e as usize;
1501                let m = &registry.entries[e];
1502                let off = u64::from(self.meta[e].colors_offset) * 4;
1503                queue.write_buffer(&self.colors, off, bytemuck::cast_slice(&m.colors));
1504                queue.write_buffer(&self.dirs, off, bytemuck::cast_slice(&m.dirs));
1505                let mats: Vec<u32> = m.materials.iter().map(|&x| u32::from(x)).collect();
1506                queue.write_buffer(&self.materials_vox, off, bytemuck::cast_slice(&mats));
1507            }
1508        }
1509
1510        // model_meta: grow the record buffer if needed, then rewrite the
1511        // whole (small) table — covers both new records and any
1512        // colors_offset relocations from a repack.
1513        let count = self.meta.len() as u32;
1514        if count > self.meta_cap {
1515            self.meta_cap = grow_records(count);
1516            self.model_meta = storage_dst_pod_cap(
1517                device,
1518                "roxlap-gpu sprite_reg.model_meta",
1519                &self.meta,
1520                self.meta_cap,
1521            );
1522        } else {
1523            queue.write_buffer(&self.model_meta, 0, bytemuck::cast_slice(&self.meta));
1524        }
1525    }
1526
1527    /// Sync one tightly-concatenated buffer (`occupancy` or
1528    /// `color_offsets`) after `add_model` appended `new_entries`: if the
1529    /// used length now exceeds capacity, grow (with slack) and rebuild the
1530    /// whole buffer from the registry; otherwise write just the appended
1531    /// tails at their offsets.
1532    fn sync_concat(
1533        &mut self,
1534        device: &wgpu::Device,
1535        queue: &wgpu::Queue,
1536        registry: &SpriteModelRegistry,
1537        new_entries: &[u32],
1538        which: ConcatBuf,
1539    ) {
1540        let (used, cap) = match which {
1541            ConcatBuf::Occupancy => (self.occ_used, self.occ_cap),
1542            ConcatBuf::ColorOffsets => (self.coloff_used, self.coloff_cap),
1543        };
1544        if used > cap {
1545            let new_cap = grow_words(used);
1546            let all: Vec<u32> = registry
1547                .entries
1548                .iter()
1549                .flat_map(|m| concat_data(m, which).iter().copied())
1550                .collect();
1551            let label = match which {
1552                ConcatBuf::Occupancy => "roxlap-gpu sprite_reg.occupancy",
1553                ConcatBuf::ColorOffsets => "roxlap-gpu sprite_reg.color_offsets",
1554            };
1555            let buf = storage_dst_u32_cap(device, label, &all, new_cap);
1556            match which {
1557                ConcatBuf::Occupancy => {
1558                    self.occupancy = buf;
1559                    self.occ_cap = new_cap;
1560                }
1561                ConcatBuf::ColorOffsets => {
1562                    self.color_offsets = buf;
1563                    self.coloff_cap = new_cap;
1564                }
1565            }
1566        } else {
1567            let target = match which {
1568                ConcatBuf::Occupancy => &self.occupancy,
1569                ConcatBuf::ColorOffsets => &self.color_offsets,
1570            };
1571            for &e in new_entries {
1572                let e = e as usize;
1573                let off = match which {
1574                    ConcatBuf::Occupancy => self.meta[e].occupancy_offset,
1575                    ConcatBuf::ColorOffsets => self.meta[e].color_offsets_offset,
1576                };
1577                queue.write_buffer(
1578                    target,
1579                    u64::from(off) * 4,
1580                    bytemuck::cast_slice(concat_data(&registry.entries[e], which)),
1581                );
1582            }
1583        }
1584    }
1585
1586    /// Number of removed-but-not-yet-compacted models (tombstoned chains).
1587    /// A caller streams `add_model` / `remove_model` and calls
1588    /// [`Self::compact`] once this (relative to [`Self::live_model_count`])
1589    /// crosses a threshold.
1590    #[must_use]
1591    pub fn dead_model_count(&self) -> usize {
1592        self.chains.iter().filter(|c| c.is_empty()).count()
1593    }
1594
1595    /// Number of live (non-removed) models.
1596    #[must_use]
1597    pub fn live_model_count(&self) -> usize {
1598        self.chains.iter().filter(|c| !c.is_empty()).count()
1599    }
1600
1601    /// Remove a model (tombstone its LOD chain) — the counterpart to
1602    /// [`Self::add_model`]. O(chain length): marks the chain's entries
1603    /// dead and frees their `colors`/`dirs` slots for reuse by a later
1604    /// `add_model`. The `occupancy` / `color_offsets` holes are **not**
1605    /// reclaimed until [`Self::compact`]; entry ids (and the caller's other
1606    /// `chain_id`s) stay stable.
1607    ///
1608    /// Instances of the removed chain are **not** dropped here — they
1609    /// linger in the cull set but draw as nothing (skipped in
1610    /// [`Self::cull_bin_upload`]); the caller removes them via
1611    /// [`Self::remove_instance`] when convenient. A no-op if `chain_id` is
1612    /// out of range or already removed.
1613    pub fn remove_model(&mut self, chain_id: u32) {
1614        let Some(entries) = self.chains.get(chain_id as usize).cloned() else {
1615            return;
1616        };
1617        if entries.is_empty() {
1618            return; // already removed
1619        }
1620        for &e in &entries {
1621            let e = e as usize;
1622            self.dead[e] = true;
1623            self.colors_alloc.free(e);
1624        }
1625        self.chains[chain_id as usize] = Vec::new(); // tombstone
1626    }
1627
1628    /// Reclaim the holes left by [`Self::remove_model`]: rebuild the shared
1629    /// volume buffers from the live entries only, dropping every dead
1630    /// entry's data. Entry ids and `chain_id`s are preserved (dead entries
1631    /// keep a zero-length `meta` tombstone), so the caller's handles stay
1632    /// valid and no remap is needed.
1633    ///
1634    /// `registry` must be the resident one (entry ids 1:1, as for
1635    /// [`Self::add_model`] / [`Self::update_model`]). O(live volume) —
1636    /// call it when [`Self::dead_model_count`] is high, not every frame.
1637    pub fn compact(
1638        &mut self,
1639        device: &wgpu::Device,
1640        queue: &wgpu::Queue,
1641        registry: &SpriteModelRegistry,
1642    ) {
1643        // occupancy + color_offsets: re-pack live entries tightly, rewrite
1644        // each live entry's meta offset, zero the dead ones.
1645        self.compact_concat(device, registry, ConcatBuf::Occupancy);
1646        self.compact_concat(device, registry, ConcatBuf::ColorOffsets);
1647        // colors/dirs: the dead-aware repack already drops dead entries.
1648        self.repack_colors_dirs(device, registry);
1649        // model_meta: rewrite the (unchanged-length) table with the new
1650        // offsets. Buffer count didn't change, so no grow needed.
1651        queue.write_buffer(&self.model_meta, 0, bytemuck::cast_slice(&self.meta));
1652    }
1653
1654    /// Rebuild one tightly-concatenated buffer from live entries only
1655    /// (used by [`Self::compact`]): assign each live entry a fresh tight
1656    /// offset, zero dead entries' offset, and recreate the buffer with
1657    /// slack.
1658    fn compact_concat(
1659        &mut self,
1660        device: &wgpu::Device,
1661        registry: &SpriteModelRegistry,
1662        which: ConcatBuf,
1663    ) {
1664        let mut all: Vec<u32> = Vec::new();
1665        for e in 0..self.meta.len() {
1666            if self.dead[e] {
1667                match which {
1668                    ConcatBuf::Occupancy => self.meta[e].occupancy_offset = 0,
1669                    ConcatBuf::ColorOffsets => self.meta[e].color_offsets_offset = 0,
1670                }
1671                continue;
1672            }
1673            let off = all.len() as u32;
1674            match which {
1675                ConcatBuf::Occupancy => self.meta[e].occupancy_offset = off,
1676                ConcatBuf::ColorOffsets => self.meta[e].color_offsets_offset = off,
1677            }
1678            all.extend_from_slice(concat_data(&registry.entries[e], which));
1679        }
1680        let used = all.len() as u32;
1681        let cap = grow_words(used);
1682        let (label, buf) = match which {
1683            ConcatBuf::Occupancy => ("roxlap-gpu sprite_reg.occupancy", &mut self.occupancy),
1684            ConcatBuf::ColorOffsets => (
1685                "roxlap-gpu sprite_reg.color_offsets",
1686                &mut self.color_offsets,
1687            ),
1688        };
1689        *buf = storage_dst_u32_cap(device, label, &all, cap);
1690        match which {
1691            ConcatBuf::Occupancy => {
1692                self.occ_used = used;
1693                self.occ_cap = cap;
1694            }
1695            ConcatBuf::ColorOffsets => {
1696                self.coloff_used = used;
1697                self.coloff_cap = cap;
1698            }
1699        }
1700    }
1701
1702    /// GPU.10.3 — frustum-cull, pack the visible subset into the
1703    /// instance buffer, then bin those instances into screen tiles:
1704    /// project each visible bounding sphere to a screen AABB and append
1705    /// its (visible) index to every overlapped tile. Uploads the
1706    /// instance buffer + `tile_ranges` (per-tile offset/count) +
1707    /// `tile_instances` (flat grouped indices), growing the tile
1708    /// buffers as needed. Returns `(visible_count, tiles_x, tiles_y)`.
1709    #[allow(clippy::too_many_arguments)]
1710    pub fn cull_bin_upload(
1711        &mut self,
1712        device: &wgpu::Device,
1713        queue: &wgpu::Queue,
1714        f: &ViewFrustum,
1715        screen_w: u32,
1716        screen_h: u32,
1717        tile_size: u32,
1718        lod_px: f32,
1719    ) -> (u32, u32, u32) {
1720        let tiles_x = screen_w.div_ceil(tile_size).max(1);
1721        let tiles_y = screen_h.div_ceil(tile_size).max(1);
1722        let n_tiles = (tiles_x * tiles_y) as usize;
1723
1724        let nw = (1.0 + f.half_w * f.half_w).sqrt();
1725        let nh = (1.0 + f.half_h * f.half_h).sqrt();
1726        let cx = screen_w as f32 * 0.5;
1727        let cy = screen_h as f32 * 0.5;
1728        let px_per_world = cx / f.half_w; // isotropic: == cy/half_h
1729        let ts = tile_size as f32;
1730        let tx_max = tiles_x as i32 - 1;
1731        let ty_max = tiles_y as i32 - 1;
1732
1733        let mut visible: Vec<SpriteInstanceGpu> = Vec::with_capacity(self.cull.len());
1734        // Per-visible tile AABB (tx0, tx1, ty0, ty1) for the bin pass.
1735        let mut boxes: Vec<[i32; 4]> = Vec::with_capacity(self.cull.len());
1736        // Per-visible kv6colmul tables, flattened to two u32 per u64
1737        // entry (lanes 0|1, then 2|3), packed in visible order so the
1738        // shader indexes `colmul[inst_idx*512 + dir*2 + {0,1}]`.
1739        let mut visible_colmul: Vec<u32> = Vec::with_capacity(self.cull.len() * 512);
1740        let mut counts = vec![0u32; n_tiles];
1741
1742        for ci in &self.cull {
1743            // Skip instances of a removed model (tombstoned chain) — they
1744            // linger in `cull` until the caller drops them, but draw as
1745            // nothing.
1746            if self.chains[ci.chain_id as usize].is_empty() {
1747                continue;
1748            }
1749            let rel = [
1750                ci.center[0] - f.pos[0],
1751                ci.center[1] - f.pos[1],
1752                ci.center[2] - f.pos[2],
1753            ];
1754            let z = dot3(rel, f.forward);
1755            let r = ci.radius;
1756            if z + r < 0.0 || z - r > f.far {
1757                continue; // behind / beyond far
1758            }
1759            let x = dot3(rel, f.right);
1760            if (x - f.half_w * z) > r * nw || (-x - f.half_w * z) > r * nw {
1761                continue; // right / left
1762            }
1763            let y = dot3(rel, f.down);
1764            if (y - f.half_h * z) > r * nh || (-y - f.half_h * z) > r * nh {
1765                continue; // bottom / top
1766            }
1767
1768            // Visible: project the sphere to a screen AABB → tile range.
1769            let (tx0, tx1, ty0, ty1) = if z > 1e-3 {
1770                let sx = cx + (x / z) * px_per_world;
1771                let sy = cy + (y / z) * px_per_world;
1772                let sr = (r / z) * px_per_world;
1773                (
1774                    (((sx - sr) / ts).floor() as i32).clamp(0, tx_max),
1775                    (((sx + sr) / ts).floor() as i32).clamp(0, tx_max),
1776                    (((sy - sr) / ts).floor() as i32).clamp(0, ty_max),
1777                    (((sy + sr) / ts).floor() as i32).clamp(0, ty_max),
1778                )
1779            } else {
1780                (0, tx_max, 0, ty_max)
1781            };
1782            // GPU.10.4 — pick the LOD level by projected voxel size:
1783            // choose the coarsest level whose voxel still covers at
1784            // least `lod_px` screen pixels, i.e. step up once a mip-0
1785            // voxel would be smaller than that. `lod_px = 1` is the
1786            // natural "don't go sub-pixel" threshold; larger values
1787            // force LOD in closer (tuning/inspection).
1788            let chain = &self.chains[ci.chain_id as usize];
1789            let level = if z > 1e-3 && chain.len() > 1 {
1790                let voxel_px = px_per_world / z; // mip-0 voxel screen size
1791                ((lod_px / voxel_px).log2().ceil().max(0.0) as usize).min(chain.len() - 1)
1792            } else {
1793                0
1794            };
1795            let mut g = ci.gpu;
1796            g.model_id = chain[level];
1797            visible.push(g);
1798            boxes.push([tx0, tx1, ty0, ty1]);
1799            for &w in ci.colmul.iter() {
1800                visible_colmul.push((w & 0xffff_ffff) as u32);
1801                visible_colmul.push((w >> 32) as u32);
1802            }
1803            for ty in ty0..=ty1 {
1804                for tx in tx0..=tx1 {
1805                    counts[(ty * tiles_x as i32 + tx) as usize] += 1;
1806                }
1807            }
1808        }
1809
1810        if visible.is_empty() {
1811            return (0, tiles_x, tiles_y);
1812        }
1813
1814        // Prefix-sum counts → per-tile offsets; build the flat grouped
1815        // index list.
1816        let mut tile_ranges = vec![0u32; n_tiles * 2];
1817        let mut running = 0u32;
1818        for t in 0..n_tiles {
1819            tile_ranges[2 * t] = running; // offset
1820            tile_ranges[2 * t + 1] = counts[t]; // count
1821            running += counts[t];
1822        }
1823        let total = running as usize;
1824        let mut tile_instances = vec![0u32; total.max(1)];
1825        let mut cursor: Vec<u32> = (0..n_tiles).map(|t| tile_ranges[2 * t]).collect();
1826        for (vis_idx, b) in boxes.iter().enumerate() {
1827            for ty in b[2]..=b[3] {
1828                for tx in b[0]..=b[1] {
1829                    let t = (ty * tiles_x as i32 + tx) as usize;
1830                    tile_instances[cursor[t] as usize] = vis_idx as u32;
1831                    cursor[t] += 1;
1832                }
1833            }
1834        }
1835
1836        // Upload: instances + (grown) tile buffers. Grow a tile buffer
1837        // only when this frame needs more than its capacity (wgpu has
1838        // no Clone on Buffer, so we replace the field in place).
1839        queue.write_buffer(&self.instances, 0, bytemuck::cast_slice(&visible));
1840        let need_ranges = tile_ranges.len() as u32;
1841        if need_ranges > self.tile_ranges_cap {
1842            self.tile_ranges_cap = need_ranges.next_power_of_two();
1843            self.tile_ranges = storage_dst_u32(
1844                device,
1845                "roxlap-gpu sprite_reg.tile_ranges",
1846                self.tile_ranges_cap,
1847            );
1848        }
1849        let need_inst = tile_instances.len() as u32;
1850        if need_inst > self.tile_instances_cap {
1851            self.tile_instances_cap = need_inst.next_power_of_two();
1852            self.tile_instances = storage_dst_u32(
1853                device,
1854                "roxlap-gpu sprite_reg.tile_instances",
1855                self.tile_instances_cap,
1856            );
1857        }
1858        queue.write_buffer(&self.tile_ranges, 0, bytemuck::cast_slice(&tile_ranges));
1859        queue.write_buffer(
1860            &self.tile_instances,
1861            0,
1862            bytemuck::cast_slice(&tile_instances),
1863        );
1864        let need_colmul = visible_colmul.len() as u32;
1865        if need_colmul > self.colmul_cap {
1866            self.colmul_cap = need_colmul.next_power_of_two();
1867            self.colmul = storage_dst_u32(device, "roxlap-gpu sprite_reg.colmul", self.colmul_cap);
1868        }
1869        queue.write_buffer(&self.colmul, 0, bytemuck::cast_slice(&visible_colmul));
1870
1871        (visible.len() as u32, tiles_x, tiles_y)
1872    }
1873}
1874
1875/// GPU.12 incremental — per-entry placement of one model's `colors`
1876/// (and the parallel `dirs`) within the shared registry buffers: a
1877/// `[off, off+cap)` word window holding `len` live words. `cap >= len`
1878/// gives slack so a carve that *grows* the surface-voxel count can be
1879/// rewritten in place without relocating.
1880#[derive(Clone, Copy, Debug, PartialEq, Eq)]
1881struct ColorSlot {
1882    off: u32,
1883    cap: u32,
1884    len: u32,
1885}
1886
1887/// First-fit suballocator over the parallel `colors`/`dirs` buffers
1888/// (same offsets/ranks → one allocator drives both). Each registry
1889/// entry owns a [`ColorSlot`]; growth past a slot's `cap` relocates it
1890/// (freeing the old block) via the free list or a bump tail, and only
1891/// when the tail would exceed `cap_total` does the caller grow + repack
1892/// the whole buffer. Pure (no GPU) so it unit-tests on its own.
1893#[derive(Debug, Default)]
1894struct ColorsAllocator {
1895    /// Per-entry slot, indexed by entry id.
1896    slots: Vec<ColorSlot>,
1897    /// Freed `(off, cap)` blocks available for first-fit reuse.
1898    free: Vec<(u32, u32)>,
1899    /// Next bump-allocation position (words).
1900    tail: u32,
1901    /// Total buffer capacity in words.
1902    cap_total: u32,
1903}
1904
1905/// Slack-padded capacity for a `len`-word array: +25% + 16 words, so a
1906/// few extra surface voxels from a carve fit without relocating.
1907fn slot_cap(len: u32) -> u32 {
1908    len + len / 4 + 16
1909}
1910
1911/// Slack capacity (words) for a grown concatenated buffer: +50% + 256, so
1912/// a burst of `add_model` calls bump-appends rather than re-growing every
1913/// time. Matches [`ColorsAllocator`]'s `cap_total` headroom.
1914fn grow_words(used: u32) -> u32 {
1915    used + used / 2 + 256
1916}
1917
1918/// Slack capacity (records) for a grown `model_meta` buffer: +50% + 8.
1919fn grow_records(count: u32) -> u32 {
1920    count + count / 2 + 8
1921}
1922
1923impl ColorsAllocator {
1924    /// Lay every entry out contiguously (with per-slot slack) and add a
1925    /// global tail headroom so early growth bump-allocates rather than
1926    /// repacks.
1927    fn new(entry_lens: &[u32]) -> Self {
1928        let mut a = Self::default();
1929        a.repack(entry_lens);
1930        a
1931    }
1932
1933    fn slot(&self, entry: usize) -> ColorSlot {
1934        self.slots[entry]
1935    }
1936
1937    fn cap_total(&self) -> u32 {
1938        self.cap_total
1939    }
1940
1941    /// Repack ALL entries compactly to fit `new_lens`, resetting the
1942    /// free list + tail and choosing a fresh `cap_total` with headroom.
1943    /// Used at initial build and on a buffer grow.
1944    fn repack(&mut self, new_lens: &[u32]) {
1945        self.free.clear();
1946        let mut off = 0u32;
1947        let mut slots = Vec::with_capacity(new_lens.len());
1948        for &len in new_lens {
1949            // A 0-length (dead / removed) entry takes no space — keeps a
1950            // tombstone slot so entry ids stay positional.
1951            let cap = if len == 0 { 0 } else { slot_cap(len) };
1952            slots.push(ColorSlot { off, cap, len });
1953            off += cap;
1954        }
1955        self.slots = slots;
1956        self.tail = off;
1957        // Global headroom: +50% + 256 words.
1958        self.cap_total = off + off / 2 + 256;
1959    }
1960
1961    /// Place `new_len` words for `entry`. Returns `Some(off)` with the
1962    /// (possibly relocated) slot offset, or `None` if the buffer must
1963    /// grow + repack. On relocation the old block is pushed to the free
1964    /// list; an in-place fit returns the unchanged offset.
1965    fn place(&mut self, entry: usize, new_len: u32) -> Option<u32> {
1966        let cur = self.slots[entry];
1967        if new_len <= cur.cap {
1968            self.slots[entry] = ColorSlot {
1969                len: new_len,
1970                ..cur
1971            };
1972            return Some(cur.off);
1973        }
1974        let old = (cur.off, cur.cap);
1975        // First-fit a freed block big enough for the live data.
1976        if let Some(i) = self.free.iter().position(|&(_, c)| c >= new_len) {
1977            let (off, cap) = self.free.remove(i);
1978            self.free.push(old);
1979            self.slots[entry] = ColorSlot {
1980                off,
1981                cap,
1982                len: new_len,
1983            };
1984            return Some(off);
1985        }
1986        // Bump the tail if there's room.
1987        let want = slot_cap(new_len);
1988        if self.tail + want <= self.cap_total {
1989            let off = self.tail;
1990            self.tail += want;
1991            self.free.push(old);
1992            self.slots[entry] = ColorSlot {
1993                off,
1994                cap: want,
1995                len: new_len,
1996            };
1997            return Some(off);
1998        }
1999        None
2000    }
2001
2002    /// Append a slot for a brand-new entry of `new_len` words (used by
2003    /// [`SpriteRegistryResident::add_model`]). Returns `Some(off)` placed
2004    /// via the free list or the bump tail, or `None` if the buffer must
2005    /// grow + repack — in which case **no** slot is pushed (the caller's
2006    /// repack rebuilds every slot from scratch).
2007    fn push(&mut self, new_len: u32) -> Option<u32> {
2008        if let Some(i) = self.free.iter().position(|&(_, c)| c >= new_len) {
2009            let (off, cap) = self.free.remove(i);
2010            self.slots.push(ColorSlot {
2011                off,
2012                cap,
2013                len: new_len,
2014            });
2015            return Some(off);
2016        }
2017        let want = slot_cap(new_len);
2018        if self.tail + want <= self.cap_total {
2019            let off = self.tail;
2020            self.tail += want;
2021            self.slots.push(ColorSlot {
2022                off,
2023                cap: want,
2024                len: new_len,
2025            });
2026            return Some(off);
2027        }
2028        None
2029    }
2030
2031    /// Free `entry`'s slot back to the pool ([`SpriteRegistryResident::
2032    /// remove_model`]). Its `(off, cap)` block joins the free list for
2033    /// first-fit reuse by a later [`Self::push`]; the slot is zeroed so a
2034    /// repack treats it as a 0-length tombstone.
2035    fn free(&mut self, entry: usize) {
2036        let s = self.slots[entry];
2037        if s.cap > 0 {
2038            self.free.push((s.off, s.cap));
2039        }
2040        self.slots[entry] = ColorSlot {
2041            off: 0,
2042            cap: 0,
2043            len: 0,
2044        };
2045    }
2046}
2047
2048/// Create a STORAGE buffer of u32s; pads empty input (wgpu rejects
2049/// zero-sized storage bindings).
2050#[allow(dead_code)]
2051fn storage_u32(device: &wgpu::Device, label: &str, data: &[u32]) -> wgpu::Buffer {
2052    use wgpu::util::DeviceExt;
2053    let bytes: &[u8] = if data.is_empty() {
2054        bytemuck::cast_slice(&[0u32])
2055    } else {
2056        bytemuck::cast_slice(data)
2057    };
2058    device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
2059        label: Some(label),
2060        contents: bytes,
2061        usage: wgpu::BufferUsages::STORAGE,
2062    })
2063}
2064
2065/// Create an uninitialised `STORAGE | COPY_DST` `u32` buffer of `cap`
2066/// words (≥1). Written each frame via `queue.write_buffer`.
2067fn storage_dst_u32(device: &wgpu::Device, label: &str, cap: u32) -> wgpu::Buffer {
2068    device.create_buffer(&wgpu::BufferDescriptor {
2069        label: Some(label),
2070        size: u64::from(cap.max(1)) * 4,
2071        usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
2072        mapped_at_creation: false,
2073    })
2074}
2075
2076/// Create a `STORAGE | COPY_DST` `u32` buffer of `cap` words (≥ data
2077/// length, ≥ 1), initialised with `data` at offset 0 and the tail left
2078/// zeroed. Unlike [`storage_u32`] (STORAGE-only, exact-size) this both
2079/// reserves spare capacity and is `COPY_DST`, so the incremental
2080/// [`SpriteRegistryResident::update_model`] can `write_buffer` a growing
2081/// `colors`/`dirs` array in place. Filled via `mapped_at_creation` so no
2082/// queue is needed at upload time.
2083fn storage_dst_u32_cap(device: &wgpu::Device, label: &str, data: &[u32], cap: u32) -> wgpu::Buffer {
2084    let cap = cap.max(data.len() as u32).max(1);
2085    let buf = device.create_buffer(&wgpu::BufferDescriptor {
2086        label: Some(label),
2087        size: u64::from(cap) * 4,
2088        usage: wgpu::BufferUsages::STORAGE
2089            | wgpu::BufferUsages::COPY_DST
2090            | wgpu::BufferUsages::COPY_SRC,
2091        mapped_at_creation: true,
2092    });
2093    if !data.is_empty() {
2094        buf.slice(..(data.len() as u64 * 4))
2095            .get_mapped_range_mut()
2096            .copy_from_slice(bytemuck::cast_slice(data));
2097    }
2098    buf.unmap();
2099    buf
2100}
2101
2102/// Create a `STORAGE | COPY_DST` buffer of Pod records, exact-size
2103/// (≥ 1, zero-padded), so individual records can be rewritten in place
2104/// by [`SpriteRegistryResident::update_model`] on a relocation. The
2105/// record *count* never changes on an incremental edit (no model is
2106/// added/removed), so no slack is needed here.
2107fn storage_dst_pod<T: Pod + Zeroable>(
2108    device: &wgpu::Device,
2109    label: &str,
2110    data: &[T],
2111) -> wgpu::Buffer {
2112    let one = [T::zeroed()];
2113    let src: &[T] = if data.is_empty() { &one } else { data };
2114    let buf = device.create_buffer(&wgpu::BufferDescriptor {
2115        label: Some(label),
2116        size: std::mem::size_of_val(src) as u64,
2117        usage: wgpu::BufferUsages::STORAGE
2118            | wgpu::BufferUsages::COPY_DST
2119            | wgpu::BufferUsages::COPY_SRC,
2120        mapped_at_creation: true,
2121    });
2122    buf.slice(..)
2123        .get_mapped_range_mut()
2124        .copy_from_slice(bytemuck::cast_slice(src));
2125    buf.unmap();
2126    buf
2127}
2128
2129/// Create a `STORAGE | COPY_DST` Pod buffer holding `cap` records
2130/// (≥ `data.len()`, ≥ 1), initialised with `data` at record 0 and the
2131/// tail zeroed. The slack lets [`SpriteRegistryResident::add_model`] grow
2132/// the `model_meta` table without re-growing on every add.
2133fn storage_dst_pod_cap<T: Pod + Zeroable>(
2134    device: &wgpu::Device,
2135    label: &str,
2136    data: &[T],
2137    cap: u32,
2138) -> wgpu::Buffer {
2139    let rec = std::mem::size_of::<T>() as u64;
2140    let cap = u64::from(cap.max(data.len() as u32).max(1));
2141    let buf = device.create_buffer(&wgpu::BufferDescriptor {
2142        label: Some(label),
2143        size: cap * rec,
2144        usage: wgpu::BufferUsages::STORAGE
2145            | wgpu::BufferUsages::COPY_DST
2146            | wgpu::BufferUsages::COPY_SRC,
2147        mapped_at_creation: true,
2148    });
2149    if !data.is_empty() {
2150        buf.slice(..(data.len() as u64 * rec))
2151            .get_mapped_range_mut()
2152            .copy_from_slice(bytemuck::cast_slice(data));
2153    }
2154    buf.unmap();
2155    buf
2156}
2157
2158/// Create a STORAGE buffer of Pod records; pads empty input with one
2159/// zeroed `T`.
2160#[allow(dead_code)]
2161fn storage_pod<T: Pod + Zeroable>(device: &wgpu::Device, label: &str, data: &[T]) -> wgpu::Buffer {
2162    use wgpu::util::DeviceExt;
2163    let one = [T::zeroed()];
2164    let src: &[T] = if data.is_empty() { &one } else { data };
2165    device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
2166        label: Some(label),
2167        contents: bytemuck::cast_slice(src),
2168        usage: wgpu::BufferUsages::STORAGE,
2169    })
2170}
2171
2172#[cfg(test)]
2173mod tests {
2174    use super::*;
2175    use roxlap_formats::kv6::{Kv6, Voxel};
2176
2177    /// 2×1 kv6: column (0,0) has voxels at z=5 (red) and z=1 (green)
2178    /// stored OUT of z-order; column (1,0) has one voxel at z=3.
2179    fn kv6_unsorted() -> Kv6 {
2180        let mk = |z, col| Voxel {
2181            col,
2182            z,
2183            vis: 0,
2184            dir: 0,
2185        };
2186        Kv6 {
2187            xsiz: 2,
2188            ysiz: 1,
2189            zsiz: 8,
2190            xpiv: 0.0,
2191            ypiv: 0.0,
2192            zpiv: 0.0,
2193            voxels: vec![mk(5, 0xAA), mk(1, 0xBB), mk(3, 0xCC)],
2194            xlen: vec![2, 1],
2195            ylen: vec![vec![2], vec![1]],
2196            palette: None,
2197        }
2198    }
2199
2200    #[test]
2201    fn occupancy_bits_set_at_voxel_z() {
2202        let m = build_sprite_model(&kv6_unsorted());
2203        assert_eq!(m.dims, [2, 1, 8]);
2204        assert_eq!(m.occ_words_per_col, 1); // ceil(8/32)
2205                                            // col 0: bits 1 and 5; col 1: bit 3.
2206        assert_eq!(m.occupancy[0], (1 << 1) | (1 << 5));
2207        assert_eq!(m.occupancy[1], 1 << 3);
2208    }
2209
2210    #[test]
2211    fn colors_are_ascending_z_for_rank_lookup() {
2212        let m = build_sprite_model(&kv6_unsorted());
2213        // col 0 sorted ascending z ⇒ z=1 (green 0xBB) before z=5 (0xAA).
2214        assert_eq!(m.color_offsets, vec![0, 2, 3]);
2215        assert_eq!(&m.colors, &[0xBB, 0xAA, 0xCC]);
2216    }
2217
2218    #[test]
2219    fn identity_basis_inverts_to_identity() {
2220        let inv = mat3_inverse([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]);
2221        assert_eq!(inv, [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]);
2222    }
2223
2224    #[test]
2225    fn fork_is_independent_of_parent() {
2226        let mut reg = SpriteModelRegistry::new();
2227        let base = reg.add(build_sprite_model(&kv6_unsorted()));
2228        let forked = reg.fork(base);
2229        assert_ne!(base, forked);
2230        // Recolour only the fork.
2231        reg.model_mut(forked).recolor(|_| 0x11);
2232        // Parent colours untouched; fork fully overwritten.
2233        assert_eq!(&reg.model(base).colors, &[0xBB, 0xAA, 0xCC]);
2234        assert_eq!(&reg.model(forked).colors, &[0x11, 0x11, 0x11]);
2235    }
2236
2237    #[test]
2238    fn remove_frees_chain_data_keeps_ids_stable() {
2239        let mut reg = SpriteModelRegistry::new();
2240        let a = reg.add_lod(build_sprite_model(&kv6_unsorted()), 4);
2241        let b = reg.add_lod(build_sprite_model(&kv6_unsorted()), 4);
2242        let len_before = reg.len();
2243        assert!(reg.is_live(a) && reg.is_live(b));
2244
2245        reg.remove(a);
2246        // Chain `a` is tombstoned (its entries are freed to empty models;
2247        // they're unreachable via `model()` now — that's the tombstone).
2248        assert!(!reg.is_live(a));
2249        // `b` is untouched and still live; `len()` (next id) is unchanged.
2250        assert!(reg.is_live(b));
2251        assert_eq!(&reg.model(b).colors, &[0xBB, 0xAA, 0xCC]);
2252        assert_eq!(reg.len(), len_before);
2253
2254        // A later add mints a fresh id past the tombstone (no slot reuse).
2255        let c = reg.add_lod(build_sprite_model(&kv6_unsorted()), 4);
2256        assert_eq!(c, len_before as u32);
2257        assert!(reg.is_live(c));
2258        // `b`'s id stayed valid across the remove + add round-trip.
2259        assert_eq!(&reg.model(b).colors, &[0xBB, 0xAA, 0xCC]);
2260    }
2261
2262    #[test]
2263    fn model_checked_guards_out_of_range_and_tombstoned() {
2264        // The guard `set_instance_model` relies on: `model()` would
2265        // index-panic on these, `model_checked` returns `None`.
2266        let mut reg = SpriteModelRegistry::new();
2267        let a = reg.add_lod(build_sprite_model(&kv6_unsorted()), 4);
2268        assert!(reg.model_checked(a).is_some());
2269        assert!(reg.model_checked(9999).is_none(), "out of range → None");
2270        reg.remove(a);
2271        assert!(reg.model_checked(a).is_none(), "tombstoned chain → None");
2272    }
2273
2274    #[test]
2275    fn remove_is_idempotent_and_bounds_safe() {
2276        let mut reg = SpriteModelRegistry::new();
2277        let a = reg.add(build_sprite_model(&kv6_unsorted()));
2278        reg.remove(a);
2279        reg.remove(a); // already removed → no-op, no panic
2280        reg.remove(999); // out of range → no-op
2281        assert!(!reg.is_live(a));
2282        assert!(!reg.is_live(999));
2283    }
2284
2285    #[test]
2286    fn registry_gpu_structs_have_expected_sizes() {
2287        assert_eq!(std::mem::size_of::<SpriteModelMeta>(), 48);
2288        // TV — grew 64 → 80 with the per-instance material id + alpha_mul
2289        // (+ 8 bytes pad to keep the 16-byte std430 stride).
2290        assert_eq!(std::mem::size_of::<SpriteInstanceGpu>(), 80);
2291    }
2292
2293    #[test]
2294    fn add_lod_builds_halving_mip_chain() {
2295        let mut reg = SpriteModelRegistry::new();
2296        // 8×8×8 single voxel-filled column model would be ideal, but
2297        // kv6_unsorted is 2×1×8 → mips: 2×1×8 → 1×1×4 → 1×1×2 → 1×1×1.
2298        let id = reg.add_lod(build_sprite_model(&kv6_unsorted()), 4);
2299        let m0 = reg.model(id);
2300        assert_eq!(m0.dims, [2, 1, 8]);
2301        assert!((m0.voxel_world_size - 1.0).abs() < 1e-6);
2302    }
2303
2304    /// kv6 from explicit voxels, ordered x-major/y-inner to match
2305    /// `build_sprite_model`'s column walk.
2306    fn kv6_from(xsiz: u32, ysiz: u32, zsiz: u32, voxels: &[(u32, u32, u16, u32)]) -> Kv6 {
2307        let mut ylen = vec![vec![0u16; ysiz as usize]; xsiz as usize];
2308        let mut flat = Vec::new();
2309        for x in 0..xsiz {
2310            for y in 0..ysiz {
2311                let mut col: Vec<(u16, u32)> = voxels
2312                    .iter()
2313                    .filter(|(vx, vy, _, _)| *vx == x && *vy == y)
2314                    .map(|(_, _, z, c)| (*z, *c))
2315                    .collect();
2316                col.sort_by_key(|(z, _)| *z);
2317                ylen[x as usize][y as usize] = col.len() as u16;
2318                for (z, c) in col {
2319                    flat.push(Voxel {
2320                        col: c,
2321                        z,
2322                        vis: 0,
2323                        dir: 0,
2324                    });
2325                }
2326            }
2327        }
2328        let xlen = ylen
2329            .iter()
2330            .map(|c| c.iter().map(|&v| u32::from(v)).sum())
2331            .collect();
2332        Kv6 {
2333            xsiz,
2334            ysiz,
2335            zsiz,
2336            xpiv: 0.0,
2337            ypiv: 0.0,
2338            zpiv: 0.0,
2339            voxels: flat,
2340            xlen,
2341            ylen,
2342            palette: None,
2343        }
2344    }
2345
2346    fn offsets_consistent(m: &SpriteModel) -> bool {
2347        let cols = (m.dims[0] * m.dims[1]) as usize;
2348        if m.color_offsets.len() != cols + 1 {
2349            return false;
2350        }
2351        // Monotonic non-decreasing + last == colors.len + each column's
2352        // span == its solid-voxel count.
2353        for w in m.color_offsets.windows(2) {
2354            if w[1] < w[0] {
2355                return false;
2356            }
2357        }
2358        m.color_offsets[cols] as usize == m.colors.len()
2359    }
2360
2361    #[test]
2362    fn carve_two_layers_keeps_offsets_consistent() {
2363        // Mirror the demo's carve: columns with voxels at varied z,
2364        // some sharing z=0/z=1, some not.
2365        let kv6 = kv6_from(
2366            3,
2367            2,
2368            8,
2369            &[
2370                (0, 0, 0, 0xA0),
2371                (0, 0, 1, 0xA1),
2372                (0, 0, 5, 0xA5),
2373                (1, 0, 1, 0xB1),
2374                (2, 1, 0, 0xC0),
2375                (2, 1, 3, 0xC3),
2376            ],
2377        );
2378        let mut m = build_sprite_model(&kv6);
2379        assert!(offsets_consistent(&m));
2380        for z in 0..2u32 {
2381            for y in 0..m.dims[1] {
2382                for x in 0..m.dims[0] {
2383                    m.set_voxel(x, y, z, None);
2384                }
2385            }
2386            assert!(offsets_consistent(&m), "inconsistent after carving z={z}");
2387            // downsample must not panic on the carved model.
2388            let _ = m.downsample();
2389        }
2390    }
2391
2392    #[test]
2393    fn set_voxel_inserts_replaces_and_clears() {
2394        // col 0 starts with z=1 (0xBB), z=5 (0xAA); col 1 with z=3 (0xCC).
2395        let mut m = build_sprite_model(&kv6_unsorted());
2396
2397        // Insert z=3 into col 0 (between z=1 and z=5) → rank 1.
2398        assert!(m.set_voxel(0, 0, 3, Some(0x55)));
2399        assert_eq!(m.occupancy[0], (1 << 1) | (1 << 3) | (1 << 5));
2400        // col 0 colours ascending z: 0xBB(z1), 0x55(z3), 0xAA(z5).
2401        assert_eq!(m.color_offsets, vec![0, 3, 4]);
2402        assert_eq!(&m.colors, &[0xBB, 0x55, 0xAA, 0xCC]);
2403
2404        // Replace z=3 in place (no offset shift).
2405        assert!(m.set_voxel(0, 0, 3, Some(0x66)));
2406        assert_eq!(&m.colors, &[0xBB, 0x66, 0xAA, 0xCC]);
2407        assert_eq!(m.color_offsets, vec![0, 3, 4]);
2408
2409        // Clear z=1 (rank 0) from col 0.
2410        assert!(m.set_voxel(0, 0, 1, None));
2411        assert_eq!(m.occupancy[0], (1 << 3) | (1 << 5));
2412        assert_eq!(m.color_offsets, vec![0, 2, 3]);
2413        assert_eq!(&m.colors, &[0x66, 0xAA, 0xCC]);
2414
2415        // No-ops: clear an empty voxel, edit out of bounds.
2416        assert!(!m.set_voxel(0, 0, 2, None));
2417        assert!(!m.set_voxel(9, 0, 0, Some(1)));
2418    }
2419
2420    #[test]
2421    fn rebuild_lod_refreshes_coarse_levels_from_mip0() {
2422        let mut reg = SpriteModelRegistry::new();
2423        let id = reg.add_lod(build_sprite_model(&kv6_unsorted()), 3);
2424        // Recolour mip-0 only via model_mut, then rebuild the ladder.
2425        reg.model_mut(id).recolor(|_| 0x0000_2000);
2426        reg.rebuild_lod(id);
2427        // The mip-1 average of all-0x2000 voxels is still 0x2000.
2428        let lvl1_entry = reg.chains[id as usize][1] as usize;
2429        assert!(reg.entries[lvl1_entry]
2430            .colors
2431            .iter()
2432            .all(|&c| c == 0x0000_2000));
2433    }
2434
2435    // ---- GPU.12 incremental: colors/dirs suballocator -----------------
2436
2437    /// Every slot fits its data, has slack, doesn't overlap the next, and
2438    /// the buffer reserves tail headroom past the last slot.
2439    fn alloc_invariants(a: &ColorsAllocator, lens: &[u32]) {
2440        let mut prev_end = 0u32;
2441        for (e, &len) in lens.iter().enumerate() {
2442            let s = a.slot(e);
2443            assert_eq!(s.len, len, "slot {e} len");
2444            assert!(s.cap >= s.len, "slot {e} cap >= len");
2445            // In a freshly repacked layout slots are in entry order.
2446            assert!(s.off >= prev_end, "slot {e} overlaps previous");
2447            assert!(s.off + s.cap <= a.cap_total(), "slot {e} past cap_total");
2448            prev_end = s.off + s.cap;
2449        }
2450        assert!(a.cap_total() >= prev_end, "tail headroom");
2451    }
2452
2453    #[test]
2454    fn allocator_new_lays_out_with_slack_and_headroom() {
2455        let lens = [10u32, 0, 64, 7];
2456        let a = ColorsAllocator::new(&lens);
2457        alloc_invariants(&a, &lens);
2458        // Slack: a 64-word slot has cap > 64 so a small carve-grow fits.
2459        assert!(a.slot(2).cap > 64);
2460        // Headroom past the bump tail for early growth.
2461        assert!(a.cap_total() > a.slot(3).off + a.slot(3).cap);
2462    }
2463
2464    #[test]
2465    fn allocator_place_in_place_when_within_cap() {
2466        let mut a = ColorsAllocator::new(&[10, 20]);
2467        let off0 = a.slot(0).off;
2468        let cap0 = a.slot(0).cap;
2469        // Shrink: still the same slot.
2470        assert_eq!(a.place(0, 5), Some(off0));
2471        assert_eq!(a.slot(0).len, 5);
2472        assert_eq!(a.slot(0).cap, cap0);
2473        // Grow within slack: same offset, no relocation.
2474        assert_eq!(a.place(0, cap0), Some(off0));
2475        assert_eq!(a.slot(0).off, off0);
2476        assert!(a.free.is_empty(), "no relocation should free anything");
2477    }
2478
2479    #[test]
2480    fn allocator_place_relocates_to_tail_and_frees_old() {
2481        let mut a = ColorsAllocator::new(&[10, 20]);
2482        let old0 = (a.slot(0).off, a.slot(0).cap);
2483        let tail_before = a.tail;
2484        // Overgrow entry 0 past its cap → relocate to the bump tail.
2485        let new_len = a.slot(0).cap + 5;
2486        let off = a.place(0, new_len).expect("fits in headroom");
2487        assert_eq!(off, tail_before, "relocated to old tail");
2488        assert_eq!(a.slot(0).off, off);
2489        assert_eq!(a.slot(0).len, new_len);
2490        assert!(a.free.contains(&old0), "old slot freed");
2491    }
2492
2493    #[test]
2494    fn allocator_reuses_freed_block_first_fit() {
2495        // Entry 0 has a large slot; entry 1 a tiny one, so growing 1 must
2496        // relocate (it can't fit in place) and lands in 0's freed block.
2497        let mut a = ColorsAllocator::new(&[10, 2]);
2498        let old0 = (a.slot(0).off, a.slot(0).cap);
2499        // Relocate entry 0 to the tail, freeing its original block.
2500        let _ = a.place(0, a.slot(0).cap + 5).unwrap();
2501        assert!(a.free.contains(&old0));
2502        // Grow entry 1 past its (tiny) cap but ≤ the freed block's cap →
2503        // first-fit reuses that block rather than bumping the tail.
2504        let new1 = a.slot(1).cap + 1;
2505        assert!(new1 <= old0.1, "freed block big enough");
2506        let off = a.place(1, new1).expect("reuses freed block");
2507        assert_eq!(off, old0.0, "first-fit reused the freed slot offset");
2508        assert!(!a.free.contains(&old0), "freed block consumed");
2509    }
2510
2511    #[test]
2512    fn allocator_signals_grow_then_repack_restores() {
2513        let mut a = ColorsAllocator::new(&[8, 8]);
2514        // Force overflow: ask for far more than cap_total.
2515        let huge = a.cap_total() + 100;
2516        assert_eq!(a.place(0, huge), None, "overflow must signal grow");
2517        // Repack with the new lengths compacts + grows the buffer.
2518        a.repack(&[huge, 8]);
2519        alloc_invariants(&a, &[huge, 8]);
2520        assert!(a.cap_total() > huge);
2521        // After repack the entry now fits in place.
2522        assert_eq!(a.place(0, huge), Some(a.slot(0).off));
2523    }
2524
2525    /// Drive the allocator like a real carve loop (mirroring
2526    /// `update_model`): one model's colour count drifts up and down
2527    /// across many edits while two neighbours stay put. Growth is
2528    /// absorbed in place / via the free list / by the bump tail, and on
2529    /// the rare overflow we repack (as `update_model` does). After every
2530    /// edit the live `[off, off+len)` windows must stay disjoint.
2531    #[test]
2532    fn allocator_carve_loop_keeps_live_windows_disjoint() {
2533        let mut a = ColorsAllocator::new(&[40, 12, 40]);
2534        let mut lens = [40u32, 12, 40];
2535        // A deterministic up/down walk of entry 1's length, incl. a jump
2536        // that forces at least one grow+repack.
2537        let walk = [13u32, 30, 60, 18, 9, 80, 80, 25, 200, 7];
2538        let mut grew = false;
2539        for &len in &walk {
2540            lens[1] = len;
2541            // Entry 1 re-placed; on overflow, repack the whole set.
2542            if a.place(1, len).is_none() {
2543                grew = true;
2544                a.repack(&lens);
2545            } else {
2546                // Neighbours fit in place every time.
2547                assert_eq!(a.place(0, 40), Some(a.slot(0).off));
2548                assert_eq!(a.place(2, 40), Some(a.slot(2).off));
2549            }
2550            assert_eq!(a.slot(1).len, len);
2551
2552            // No two entries' live windows overlap.
2553            let mut wins: Vec<(u32, u32)> =
2554                (0..3).map(|e| (a.slot(e).off, a.slot(e).len)).collect();
2555            wins.sort_by_key(|w| w.0);
2556            for pair in wins.windows(2) {
2557                let (o0, l0) = pair[0];
2558                let (o1, _) = pair[1];
2559                assert!(o0 + l0 <= o1, "live windows overlap: {pair:?}");
2560            }
2561        }
2562        assert!(grew, "the 200-word jump should have forced a repack");
2563    }
2564
2565    // --- incremental instance path (device-backed; skips w/o adapter) ---
2566
2567    fn headless() -> Option<crate::HeadlessGpu> {
2568        match crate::HeadlessGpu::new_blocking(crate::GpuRendererSettings::default()) {
2569            Ok(h) => Some(h),
2570            Err(e) => {
2571                eprintln!("[skip] no GPU adapter reachable: {e}");
2572                None
2573            }
2574        }
2575    }
2576
2577    fn one_model_registry() -> (SpriteModelRegistry, u32) {
2578        let mut reg = SpriteModelRegistry::new();
2579        let id = reg.add(build_sprite_model(&kv6_unsorted()));
2580        (reg, id)
2581    }
2582
2583    fn inst(model_id: u32, pos: [f32; 3]) -> SpriteInstance {
2584        use roxlap_formats::sprite::Sprite;
2585        SpriteInstance::new(
2586            model_id,
2587            SpriteInstanceTransform::from_sprite(&Sprite::axis_aligned(kv6_unsorted(), pos)),
2588        )
2589    }
2590
2591    #[test]
2592    fn append_grows_count_and_capacity_pow2() {
2593        let Some(h) = headless() else { return };
2594        let (reg, m) = one_model_registry();
2595        let mut res = SpriteRegistryResident::upload(&h.device, &reg, &[inst(m, [0.0; 3])]);
2596        assert_eq!(res.instance_count(), 1);
2597        assert_eq!(res.instance_capacity, 1);
2598
2599        // Append 4 → count 5, capacity grows to next_pow2(5) = 8.
2600        let more: Vec<_> = (1..=4).map(|i| inst(m, [i as f32, 0.0, 0.0])).collect();
2601        let base = res.append_instances(&h.device, &reg, &more);
2602        assert_eq!(base, 1, "first appended index follows the seed instance");
2603        assert_eq!(res.instance_count(), 5);
2604        assert_eq!(res.instance_capacity, 8, "power-of-two growth");
2605
2606        // A second append that still fits keeps the same capacity (no realloc).
2607        let base2 = res.append_instances(&h.device, &reg, &[inst(m, [9.0, 0.0, 0.0])]);
2608        assert_eq!(base2, 5);
2609        assert_eq!(res.instance_count(), 6);
2610        assert_eq!(res.instance_capacity, 8, "fits existing capacity, no grow");
2611    }
2612
2613    #[test]
2614    fn append_empty_is_noop() {
2615        let Some(h) = headless() else { return };
2616        let (reg, m) = one_model_registry();
2617        let mut res = SpriteRegistryResident::upload(&h.device, &reg, &[inst(m, [0.0; 3])]);
2618        let base = res.append_instances(&h.device, &reg, &[]);
2619        assert_eq!(base, 1);
2620        assert_eq!(res.instance_count(), 1);
2621        assert_eq!(res.instance_capacity, 1);
2622    }
2623
2624    /// Read `words` u32s back from a GPU buffer (needs COPY_SRC).
2625    fn read_u32(h: &crate::HeadlessGpu, buf: &wgpu::Buffer, words: u64) -> Vec<u32> {
2626        let bytes = words * 4;
2627        let staging = h.device.create_buffer(&wgpu::BufferDescriptor {
2628            label: Some("readback"),
2629            size: bytes,
2630            usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
2631            mapped_at_creation: false,
2632        });
2633        let mut enc = h
2634            .device
2635            .create_command_encoder(&wgpu::CommandEncoderDescriptor::default());
2636        enc.copy_buffer_to_buffer(buf, 0, &staging, 0, bytes);
2637        h.queue.submit(std::iter::once(enc.finish()));
2638        let slice = staging.slice(..);
2639        let (tx, rx) = std::sync::mpsc::channel();
2640        slice.map_async(wgpu::MapMode::Read, move |r| tx.send(r).unwrap());
2641        h.device.poll(wgpu::PollType::wait_indefinitely()).ok();
2642        rx.recv().unwrap().unwrap();
2643        let data = slice.get_mapped_range();
2644        let out = bytemuck::cast_slice::<u8, u32>(&data).to_vec();
2645        drop(data);
2646        staging.unmap();
2647        out
2648    }
2649
2650    /// A second distinct model so add_model has real new geometry to lay
2651    /// down (different dims + colours from `kv6_unsorted`).
2652    fn kv6_other() -> Kv6 {
2653        let mk = |z, col| Voxel {
2654            col,
2655            z,
2656            vis: 0,
2657            dir: 0,
2658        };
2659        Kv6 {
2660            xsiz: 1,
2661            ysiz: 1,
2662            zsiz: 4,
2663            xpiv: 0.0,
2664            ypiv: 0.0,
2665            zpiv: 0.0,
2666            voxels: vec![mk(0, 0x11), mk(2, 0x22)],
2667            xlen: vec![2],
2668            ylen: vec![vec![2]],
2669            palette: None,
2670        }
2671    }
2672
2673    /// add_model lays the new model's volume on the GPU at the offsets its
2674    /// meta record claims — verified by reading the shared buffers back
2675    /// and matching each entry against its source SpriteModel.
2676    #[test]
2677    fn add_model_uploads_new_volume_incrementally() {
2678        let Some(h) = headless() else { return };
2679
2680        // Residency starts with model A only.
2681        let mut reg = SpriteModelRegistry::new();
2682        let a = reg.add(build_sprite_model(&kv6_unsorted()));
2683        let mut res = SpriteRegistryResident::upload(&h.device, &reg, &[inst(a, [0.0; 3])]);
2684        assert_eq!(res.chains.len(), 1);
2685        let entries_before = res.meta.len();
2686
2687        // Append model B (single-level) to the registry, then sync it.
2688        let b = reg.add(build_sprite_model(&kv6_other()));
2689        res.add_model(&h.device, &h.queue, &reg, b);
2690        assert_eq!(res.chains.len(), 2);
2691        assert_eq!(res.meta.len(), entries_before + 1, "one new entry");
2692
2693        // Read the shared buffers back and check EVERY entry's data sits
2694        // where its meta record points — both the pre-existing A and the
2695        // newly streamed B.
2696        let occ = read_u32(&h, &res.occupancy, u64::from(res.occ_cap));
2697        let coloff = read_u32(&h, &res.color_offsets, u64::from(res.coloff_cap));
2698        let cols = read_u32(&h, &res.colors, u64::from(res.colors_alloc.cap_total()));
2699        for (e, m) in reg.entries.iter().enumerate() {
2700            let meta = res.meta[e];
2701            let oo = meta.occupancy_offset as usize;
2702            assert_eq!(
2703                &occ[oo..oo + m.occupancy.len()],
2704                &m.occupancy[..],
2705                "occ entry {e}"
2706            );
2707            let co = meta.color_offsets_offset as usize;
2708            assert_eq!(
2709                &coloff[co..co + m.color_offsets.len()],
2710                &m.color_offsets[..],
2711                "color_offsets entry {e}"
2712            );
2713            let cc = meta.colors_offset as usize;
2714            assert_eq!(
2715                &cols[cc..cc + m.colors.len()],
2716                &m.colors[..],
2717                "colors entry {e}"
2718            );
2719        }
2720
2721        // And an instance of the freshly-added model can now be appended.
2722        let base = res.append_instances(&h.device, &reg, &[inst(b, [5.0, 0.0, 0.0])]);
2723        assert_eq!(base, 1);
2724        assert_eq!(res.instance_count(), 2);
2725    }
2726
2727    /// Adding many small models forces the volume buffers to grow + rebuild
2728    /// at least once; every entry must still read back correctly across the
2729    /// grow boundary.
2730    #[test]
2731    fn add_model_survives_buffer_growth() {
2732        let Some(h) = headless() else { return };
2733        let mut reg = SpriteModelRegistry::new();
2734        let a = reg.add(build_sprite_model(&kv6_unsorted()));
2735        let mut res = SpriteRegistryResident::upload(&h.device, &reg, &[inst(a, [0.0; 3])]);
2736        let occ_cap0 = res.occ_cap;
2737
2738        // 40 adds — occupancy starts exact-sized (cap == used), so the very
2739        // first add overflows and grows; later ones ride the slack.
2740        for _ in 0..40 {
2741            let id = reg.add(build_sprite_model(&kv6_other()));
2742            res.add_model(&h.device, &h.queue, &reg, id);
2743        }
2744        assert_eq!(res.chains.len(), 41);
2745        assert!(res.occ_cap > occ_cap0, "occupancy buffer grew");
2746
2747        let occ = read_u32(&h, &res.occupancy, u64::from(res.occ_cap));
2748        let cols = read_u32(&h, &res.colors, u64::from(res.colors_alloc.cap_total()));
2749        for (e, m) in reg.entries.iter().enumerate() {
2750            let meta = res.meta[e];
2751            let oo = meta.occupancy_offset as usize;
2752            assert_eq!(
2753                &occ[oo..oo + m.occupancy.len()],
2754                &m.occupancy[..],
2755                "occ entry {e}"
2756            );
2757            let cc = meta.colors_offset as usize;
2758            assert_eq!(
2759                &cols[cc..cc + m.colors.len()],
2760                &m.colors[..],
2761                "colors entry {e}"
2762            );
2763        }
2764    }
2765
2766    /// VCL.2 — a decoded voxel clip's frames register as a flipbook of LOD
2767    /// chains, and `set_instance_model` flips which frame an instance
2768    /// draws. The cull state it updates is exactly what
2769    /// `cull_bin_upload` packs into the GPU instance buffer each frame, so
2770    /// TV.3 (clip wiring): `sprite_model_from_clip_frame_with_materials`
2771    /// classifies a clip frame's voxels into a per-voxel `materials` array
2772    /// (parallel to `colors`) by colour; an empty map leaves it empty (the
2773    /// all-opaque clip), identical to `sprite_model_from_clip_frame`.
2774    #[test]
2775    fn clip_frame_with_materials_classifies_by_color() {
2776        use roxlap_formats::voxel_clip::{LoopMode, VoxelClip, VoxelFrame};
2777
2778        let dims = [1u32, 1, 4];
2779        let owpc = dims[2].div_ceil(32).max(1) as usize; // 1
2780        let glass = 0x80AA_BBCC;
2781        let stone = 0x8011_2233;
2782        let frame = VoxelFrame {
2783            occupancy: {
2784                let mut occ = vec![0u32; owpc];
2785                occ[0] |= (1 << 0) | (1 << 1);
2786                occ
2787            },
2788            colors: vec![stone, glass], // ascending z: z=0 stone, z=1 glass
2789            color_offsets: vec![0, 2],
2790        };
2791        let clip = VoxelClip::from_frames(
2792            dims,
2793            [0.5, 0.5, 2.0],
2794            1.0,
2795            LoopMode::Loop,
2796            &[frame],
2797            &[],
2798            33,
2799            0,
2800        );
2801        let decoded = clip.decode().expect("decode");
2802
2803        // Map only the glass colour → material 2; stone stays opaque (0).
2804        let m = sprite_model_from_clip_frame_with_materials(&decoded, 0, &[(0x00AA_BBCC, 2)]);
2805        assert_eq!(
2806            m.materials.len(),
2807            m.colors.len(),
2808            "materials parallel to colors"
2809        );
2810        // `colors` is in popcount-rank (ascending z) order: stone then glass.
2811        assert_eq!(
2812            m.materials,
2813            vec![0u8, 2u8],
2814            "stone opaque, glass material 2"
2815        );
2816
2817        // Empty map ⇒ no per-voxel materials, identical to the plain builder.
2818        let plain = sprite_model_from_clip_frame(&decoded, 0);
2819        let plain_mat = sprite_model_from_clip_frame_with_materials(&decoded, 0, &[]);
2820        assert!(plain.materials.is_empty());
2821        assert!(plain_mat.materials.is_empty());
2822        assert_eq!(plain.colors, plain_mat.colors);
2823    }
2824
2825    /// TV.3 (streaming-clip refresh path): `build_sprite_model_with_materials`
2826    /// — the builder behind `GpuBackend::update_sprite_model_with_materials`,
2827    /// which a streaming clip re-runs each frame — classifies a kv6's voxels
2828    /// into a per-voxel `materials` array (popcount-rank order) by colour.
2829    #[test]
2830    fn build_with_materials_classifies_by_color() {
2831        let glass = 0x80AA_BBCC;
2832        let stone = 0x8011_2233;
2833        // One column (x=0,y=0), two voxels: z=0 stone, z=1 glass.
2834        let kv6 = kv6_from(1, 1, 4, &[(0, 0, 0, stone), (0, 0, 1, glass)]);
2835
2836        let m = build_sprite_model_with_materials(&kv6, &[(0x00AA_BBCC, 2)]);
2837        assert_eq!(
2838            m.materials.len(),
2839            m.colors.len(),
2840            "materials parallel to colors"
2841        );
2842        assert_eq!(
2843            m.materials,
2844            vec![0u8, 2u8],
2845            "stone opaque, glass material 2"
2846        );
2847
2848        // Empty map ⇒ no per-voxel materials, identical to `build_sprite_model`.
2849        let plain = build_sprite_model(&kv6);
2850        let plain_mat = build_sprite_model_with_materials(&kv6, &[]);
2851        assert!(plain.materials.is_empty());
2852        assert!(plain_mat.materials.is_empty());
2853        assert_eq!(plain.colors, plain_mat.colors);
2854    }
2855
2856    /// flipping `chain_id` redirects the rendered instance to the new
2857    /// frame's resident volume.
2858    #[test]
2859    fn voxel_clip_flipbook_set_instance_model() {
2860        use roxlap_formats::voxel_clip::{LoopMode, VoxelClip, VoxelFrame};
2861        let Some(h) = headless() else { return };
2862
2863        // Two distinct frames of a 1×1×4 clip: frame 0 has a voxel at z=0;
2864        // frame 1 adds z=1 — different occupancy + a longer colour run.
2865        let dims = [1u32, 1, 4];
2866        let owpc = dims[2].div_ceil(32).max(1) as usize; // 1
2867        let mk_frame = |zs: &[u32], cols: &[u32]| -> VoxelFrame {
2868            let mut occ = vec![0u32; owpc];
2869            for &z in zs {
2870                occ[(z >> 5) as usize] |= 1u32 << (z & 31);
2871            }
2872            VoxelFrame {
2873                occupancy: occ,
2874                colors: cols.to_vec(),
2875                color_offsets: vec![0, cols.len() as u32],
2876            }
2877        };
2878        let f0 = mk_frame(&[0], &[0x8011_2233]);
2879        let f1 = mk_frame(&[0, 1], &[0x8011_2233, 0x80AA_BBCC]);
2880        let clip = VoxelClip::from_frames(
2881            dims,
2882            [0.5, 0.5, 2.0],
2883            1.0,
2884            LoopMode::Loop,
2885            &[f0, f1],
2886            &[],
2887            33,
2888            0,
2889        );
2890        let decoded = clip.decode().expect("decode");
2891
2892        // Each frame → a single-level chain; both volumes resident + distinct.
2893        let mut reg = SpriteModelRegistry::new();
2894        let c0 = reg.add(sprite_model_from_clip_frame(&decoded, 0));
2895        let c1 = reg.add(sprite_model_from_clip_frame(&decoded, 1));
2896        assert_eq!(reg.model(c0).colors.len(), 1);
2897        assert_eq!(reg.model(c1).colors.len(), 2);
2898
2899        // One instance, in front of the test frustum, drawing frame 0.
2900        let mut res = SpriteRegistryResident::upload(&h.device, &reg, &[inst(c0, [0.0, 0.0, 5.0])]);
2901        assert_eq!(res.cull[0].chain_id, c0);
2902
2903        // Flip to frame 1: the cull now draws chain c1 (radius reseeded).
2904        res.set_instance_model(&reg, 0, c1);
2905        assert_eq!(res.cull[0].chain_id, c1);
2906        assert_eq!(res.cull[0].radius, reg.model(c1).bound_radius());
2907
2908        // The next cull packs the new chain into the GPU instance buffer
2909        // (visible, no panic).
2910        let f = test_frustum();
2911        let (visible, _, _) = res.cull_bin_upload(&h.device, &h.queue, &f, 64, 64, 16, 1.0);
2912        assert_eq!(visible, 1);
2913
2914        // …and back to frame 0.
2915        res.set_instance_model(&reg, 0, c0);
2916        assert_eq!(res.cull[0].chain_id, c0);
2917
2918        // Out-of-range index is a safe no-op.
2919        res.set_instance_model(&reg, 99, c1);
2920        assert_eq!(res.cull[0].chain_id, c0);
2921    }
2922
2923    fn test_frustum() -> ViewFrustum {
2924        ViewFrustum {
2925            pos: [0.0, 0.0, 0.0],
2926            right: [1.0, 0.0, 0.0],
2927            down: [0.0, 1.0, 0.0],
2928            forward: [0.0, 0.0, 1.0],
2929            half_w: 1.0,
2930            half_h: 1.0,
2931            far: 10_000.0,
2932        }
2933    }
2934
2935    #[test]
2936    fn remove_model_tombstones_frees_and_reuses() {
2937        let Some(h) = headless() else { return };
2938        // Residency with models A and B, one instance each.
2939        let mut reg = SpriteModelRegistry::new();
2940        let a = reg.add(build_sprite_model(&kv6_unsorted()));
2941        let b = reg.add(build_sprite_model(&kv6_other()));
2942        let mut res = SpriteRegistryResident::upload(
2943            &h.device,
2944            &reg,
2945            &[inst(a, [0.0; 3]), inst(b, [1.0, 0.0, 0.0])],
2946        );
2947        assert_eq!(res.live_model_count(), 2);
2948        assert_eq!(res.dead_model_count(), 0);
2949
2950        // Remove B → tombstoned, its colours freed into the pool.
2951        res.remove_model(b);
2952        assert_eq!(res.live_model_count(), 1);
2953        assert_eq!(res.dead_model_count(), 1);
2954        assert_eq!(res.dead.iter().filter(|&&d| d).count(), 1, "one entry dead");
2955        assert!(!res.colors_alloc.free.is_empty(), "B's colour slot freed");
2956
2957        // Adding C reuses the freed slot (free-list first-fit).
2958        let c = reg.add(build_sprite_model(&kv6_other()));
2959        res.add_model(&h.device, &h.queue, &reg, c);
2960        assert_eq!(res.live_model_count(), 2);
2961
2962        // A and C read back correctly; B is dead (skipped).
2963        let cols = read_u32(&h, &res.colors, u64::from(res.colors_alloc.cap_total()));
2964        for e in [a as usize, c as usize] {
2965            let m = &reg.entries[e];
2966            let cc = res.meta[e].colors_offset as usize;
2967            assert_eq!(
2968                &cols[cc..cc + m.colors.len()],
2969                &m.colors[..],
2970                "colors entry {e}"
2971            );
2972        }
2973
2974        // The lingering instance of removed B is skipped without panic.
2975        let f = test_frustum();
2976        let _ = res.cull_bin_upload(&h.device, &h.queue, &f, 64, 64, 16, 1.0);
2977    }
2978
2979    #[test]
2980    fn compact_reclaims_holes_keeps_ids_stable() {
2981        let Some(h) = headless() else { return };
2982        let mut reg = SpriteModelRegistry::new();
2983        let a = reg.add(build_sprite_model(&kv6_unsorted()));
2984        let b = reg.add(build_sprite_model(&kv6_other()));
2985        let c = reg.add(build_sprite_model(&kv6_other()));
2986        let mut res = SpriteRegistryResident::upload(
2987            &h.device,
2988            &reg,
2989            &[inst(a, [0.0; 3]), inst(b, [1.0; 3]), inst(c, [2.0; 3])],
2990        );
2991        let occ_used_full = res.occ_used;
2992
2993        // Remove the middle model, then compact.
2994        res.remove_model(b);
2995        res.compact(&h.device, &h.queue, &reg);
2996
2997        // Holes reclaimed: occupancy now only covers A + C.
2998        let live_occ: u32 = [a, c]
2999            .iter()
3000            .map(|&e| reg.entries[e as usize].occupancy.len() as u32)
3001            .sum();
3002        assert_eq!(res.occ_used, live_occ);
3003        assert!(res.occ_used < occ_used_full, "compaction shrank occupancy");
3004        // Dead entry keeps a zeroed tombstone; ids unchanged.
3005        assert_eq!(res.meta[b as usize].occupancy_offset, 0);
3006        assert_eq!(res.live_model_count(), 2);
3007        assert_eq!(res.dead_model_count(), 1);
3008
3009        // Live entries read back correctly at their new offsets.
3010        let occ = read_u32(&h, &res.occupancy, u64::from(res.occ_cap));
3011        let cols = read_u32(&h, &res.colors, u64::from(res.colors_alloc.cap_total()));
3012        for &e in &[a as usize, c as usize] {
3013            let m = &reg.entries[e];
3014            let oo = res.meta[e].occupancy_offset as usize;
3015            assert_eq!(
3016                &occ[oo..oo + m.occupancy.len()],
3017                &m.occupancy[..],
3018                "occ {e}"
3019            );
3020            let cc = res.meta[e].colors_offset as usize;
3021            assert_eq!(&cols[cc..cc + m.colors.len()], &m.colors[..], "cols {e}");
3022        }
3023
3024        // Chain ids still valid: C's chain still resolves; B's is empty.
3025        assert!(!res.chains[c as usize].is_empty());
3026        assert!(res.chains[b as usize].is_empty());
3027    }
3028
3029    #[test]
3030    fn remove_swap_semantics_and_capacity_retained() {
3031        let Some(h) = headless() else { return };
3032        let (reg, m) = one_model_registry();
3033        let seed: Vec<_> = (0..4).map(|i| inst(m, [i as f32, 0.0, 0.0])).collect();
3034        let mut res = SpriteRegistryResident::upload(&h.device, &reg, &seed);
3035        assert_eq!(res.instance_count(), 4);
3036        let cap = res.instance_capacity;
3037
3038        // Remove a middle element → the previous last (idx 3) moved into it.
3039        assert_eq!(res.remove_instance(1), Some(3));
3040        assert_eq!(res.instance_count(), 3);
3041
3042        // Remove the current last (idx 2) → nothing moved.
3043        assert_eq!(res.remove_instance(2), None);
3044        assert_eq!(res.instance_count(), 2);
3045
3046        // Out of range → None.
3047        assert_eq!(res.remove_instance(99), None);
3048        assert_eq!(res.instance_count(), 2);
3049
3050        // Capacity is retained for reuse (no shrink).
3051        assert_eq!(res.instance_capacity, cap);
3052    }
3053}