Skip to main content

roxlap_gpu/
sprite_model.rs

1//! GPU.10 — KV6 sprite as a DDA-marchable voxel model.
2//!
3//! Unlike the GPU.9 splatter (one thread per voxel, screen-space
4//! squares, overdraw + atomic contention), a sprite model is a small
5//! voxel volume the precise ray-DDA marches one ray per pixel —
6//! crisp, correct occlusion, no overdraw. This is the GPU.10.0 single
7//! sprite; instancing + tiling + LOD come in later sub-substages.
8//!
9//! The volume reuses the chunk occupancy/colour scheme but sized to
10//! the KV6 bbox: per-column occupancy bitmask (`occ_words_per_col`
11//! u32s, `CHUNK_Z`-style 32-bits-per-word), a flat colour array in
12//! ascending-z order per column, and a `color_offsets` prefix table.
13//! The shader finds a voxel's colour by `offset[col] + popcount(bits
14//! below z)`, so colours MUST be ascending-z (we sort per column).
15
16#![allow(
17    clippy::cast_precision_loss,
18    clippy::cast_possible_truncation,
19    clippy::cast_possible_wrap,
20    clippy::cast_sign_loss,
21    clippy::many_single_char_names,
22    clippy::similar_names
23)]
24
25use bytemuck::{Pod, Zeroable};
26use roxlap_formats::kv6::Kv6;
27use roxlap_formats::material::material_for_color;
28use roxlap_formats::sprite::Sprite;
29use roxlap_formats::voxel_clip::{DecodedClip, VoxelFrame};
30
31/// CPU-built voxel volume for one KV6 model.
32#[derive(Debug, Clone)]
33pub struct SpriteModel {
34    /// Voxel extent `(mx, my, mz)`.
35    pub dims: [u32; 3],
36    /// `ceil(mz / 32)` — u32 words of occupancy per (x, y) column.
37    pub occ_words_per_col: u32,
38    /// KV6 pivot in model-local voxel space.
39    pub pivot: [f32; 3],
40    /// Per-column occupancy bitmask, `mx * my * occ_words_per_col`.
41    pub occupancy: Vec<u32>,
42    /// Voxel colours, ascending z within each column.
43    pub colors: Vec<u32>,
44    /// Per-voxel surface-normal index (`Kv6::Voxel::dir`, 0..256),
45    /// parallel to [`colors`](Self::colors). The GPU sprite shader uses
46    /// it to index the per-instance `kv6colmul` lighting table, matching
47    /// the CPU rasteriser's normal-based shading.
48    pub dirs: Vec<u32>,
49    /// Prefix sums: `color_offsets[col]` is the first colour index of
50    /// column `col`; length `mx * my + 1`.
51    pub color_offsets: Vec<u32>,
52    /// Per-voxel material id (TV.3), parallel to [`colors`](Self::colors).
53    /// **Empty** means the model has no per-voxel materials — every voxel
54    /// uses the instance's uniform material (the TV.1/TV.2 path). A non-empty
55    /// array gives mixed-material models (opaque frame + glass). Built by
56    /// [`build_sprite_model_with_materials`].
57    pub materials: Vec<u8>,
58    /// World-space size of one voxel of this model (GPU.10.4 LOD): 1.0
59    /// at mip-0, doubling each [`SpriteModel::downsample`]. The shader
60    /// divides the local ray by this so a coarse voxel spans the right
61    /// world extent and the march `t` stays in world units.
62    pub voxel_world_size: f32,
63}
64
65/// Build the DDA volume from a KV6. Columns are packed in
66/// `x + y*mx` order; each column's voxels are sorted ascending by z
67/// so the shader's popcount-rank colour lookup is correct.
68///
69/// # Panics
70/// If the KV6's `ylen` counters disagree with `voxels.len()` (a
71/// malformed model).
72#[must_use]
73pub fn build_sprite_model(kv6: &Kv6) -> SpriteModel {
74    build_sprite_model_inner(kv6, &[])
75}
76
77/// Build the DDA volume from a KV6, classifying each voxel into a per-voxel
78/// **material id** by colour (TV.3 mixed models) via `material_map`
79/// (`(rgb, material_id)` pairs; see
80/// [`material_for_color`](roxlap_formats::material::material_for_color)).
81/// An empty map produces a model with no per-voxel materials (identical to
82/// [`build_sprite_model`]).
83///
84/// # Panics
85/// As [`build_sprite_model`].
86#[must_use]
87pub fn build_sprite_model_with_materials(kv6: &Kv6, material_map: &[(u32, u8)]) -> SpriteModel {
88    build_sprite_model_inner(kv6, material_map)
89}
90
91fn build_sprite_model_inner(kv6: &Kv6, material_map: &[(u32, u8)]) -> SpriteModel {
92    let (mx, my, mz) = (kv6.xsiz, kv6.ysiz, kv6.zsiz);
93    let occ_words_per_col = mz.div_ceil(32).max(1);
94    let cols = (mx * my) as usize;
95    let want_mats = !material_map.is_empty();
96
97    let mut occupancy = vec![0u32; cols * occ_words_per_col as usize];
98    let mut color_offsets = vec![0u32; cols + 1];
99    let mut colors: Vec<u32> = Vec::with_capacity(kv6.voxels.len());
100    let mut dirs: Vec<u32> = Vec::with_capacity(kv6.voxels.len());
101    let mut materials: Vec<u8> = if want_mats {
102        Vec::with_capacity(kv6.voxels.len())
103    } else {
104        Vec::new()
105    };
106
107    // Pass 1 — consume voxels in KV6 storage order (x-outer / y-inner)
108    // into per-column buckets keyed by `col = x + y*mx`. Each entry is
109    // `(z, colour, normal-dir)`.
110    let mut buckets: Vec<Vec<(u16, u32, u8)>> = vec![Vec::new(); cols];
111    let mut voxel_iter = kv6.voxels.iter();
112    for x in 0..mx {
113        for y in 0..my {
114            let col = (x + y * mx) as usize;
115            let count = kv6.ylen[x as usize][y as usize];
116            for _ in 0..count {
117                let v = voxel_iter.next().expect("KV6 ylen / voxels.len mismatch");
118                buckets[col].push((v.z, v.col, v.dir));
119            }
120        }
121    }
122
123    // Pass 2 — emit in COLUMN-INDEX order so `color_offsets` is a true
124    // monotonic prefix sum (the shader indexes by `col` either way, but
125    // structural edits / mip rebuilds rely on monotonic offsets). Each
126    // column's voxels sorted ascending z for the popcount-rank lookup.
127    for (col, bucket) in buckets.iter_mut().enumerate() {
128        color_offsets[col] = colors.len() as u32;
129        bucket.sort_by_key(|(z, _, _)| *z);
130        for &(z, col_rgba, dir) in bucket.iter() {
131            let z = u32::from(z);
132            let base = col * occ_words_per_col as usize + (z >> 5) as usize;
133            occupancy[base] |= 1u32 << (z & 31);
134            colors.push(col_rgba);
135            dirs.push(u32::from(dir));
136            if want_mats {
137                materials.push(material_for_color(material_map, col_rgba));
138            }
139        }
140    }
141    color_offsets[cols] = colors.len() as u32;
142
143    SpriteModel {
144        dims: [mx, my, mz],
145        occ_words_per_col,
146        pivot: [kv6.xpiv, kv6.ypiv, kv6.zpiv],
147        occupancy,
148        color_offsets,
149        colors,
150        dirs,
151        materials,
152        voxel_world_size: 1.0,
153    }
154}
155
156/// Build a [`SpriteModel`] directly from a decoded voxel-clip frame
157/// (VCL.2). The [`VoxelFrame`] dense-column layout is byte-for-byte the
158/// [`SpriteModel`] layout that [`build_sprite_model`] produces, so this is
159/// a field move — no per-column bucket-sort. `dirs` is the frame's
160/// surface-normal LUT indices (from [`DecodedClip::dirs`]), parallel to
161/// `frame.colors`.
162///
163/// # Panics
164/// In debug, if `dirs.len() != frame.colors.len()` or the field shapes
165/// don't match `dims` (the same invariants [`build_sprite_model`] upholds).
166#[must_use]
167pub fn sprite_model_from_voxel_frame(
168    frame: &VoxelFrame,
169    dirs: &[u32],
170    dims: [u32; 3],
171    pivot: [f32; 3],
172    voxel_world_size: f32,
173) -> SpriteModel {
174    sprite_model_from_voxel_frame_with_materials(frame, dirs, dims, pivot, voxel_world_size, &[])
175}
176
177/// Like [`sprite_model_from_voxel_frame`] but classifies each voxel into a
178/// per-voxel **material id** by colour (TV.3 mixed models) via `material_map`
179/// (`(rgb, material_id)` pairs). An empty map produces a model with no
180/// per-voxel materials (identical to [`sprite_model_from_voxel_frame`]).
181///
182/// # Panics
183/// As [`sprite_model_from_voxel_frame`].
184#[must_use]
185pub fn sprite_model_from_voxel_frame_with_materials(
186    frame: &VoxelFrame,
187    dirs: &[u32],
188    dims: [u32; 3],
189    pivot: [f32; 3],
190    voxel_world_size: f32,
191    material_map: &[(u32, u8)],
192) -> SpriteModel {
193    let occ_words_per_col = dims[2].div_ceil(32).max(1);
194    let cols = (dims[0] * dims[1]) as usize;
195    debug_assert_eq!(frame.occupancy.len(), cols * occ_words_per_col as usize);
196    debug_assert_eq!(frame.color_offsets.len(), cols + 1);
197    debug_assert_eq!(dirs.len(), frame.colors.len());
198    // Per-voxel materials are parallel to `colors` (popcount-rank order), so
199    // classify the frame's colour run directly — no re-index needed.
200    let materials: Vec<u8> = if material_map.is_empty() {
201        Vec::new()
202    } else {
203        frame
204            .colors
205            .iter()
206            .map(|&c| material_for_color(material_map, c))
207            .collect()
208    };
209    SpriteModel {
210        dims,
211        occ_words_per_col,
212        pivot,
213        occupancy: frame.occupancy.clone(),
214        colors: frame.colors.clone(),
215        dirs: dirs.to_vec(),
216        color_offsets: frame.color_offsets.clone(),
217        materials,
218        voxel_world_size,
219    }
220}
221
222/// Build the [`SpriteModel`] for frame `frame` of a decoded clip — the
223/// per-frame model uploaded into a flipbook chain (VCL.2).
224///
225/// # Panics
226/// If `frame` is out of range, or the frame fails the layout invariants.
227#[must_use]
228pub fn sprite_model_from_clip_frame(clip: &DecodedClip, frame: usize) -> SpriteModel {
229    sprite_model_from_clip_frame_with_materials(clip, frame, &[])
230}
231
232/// Like [`sprite_model_from_clip_frame`] but classifies the frame's voxels
233/// into per-voxel material ids by colour (TV.3 mixed models) via
234/// `material_map`. An empty map is identical to [`sprite_model_from_clip_frame`].
235///
236/// # Panics
237/// If `frame` is out of range, or the frame fails the layout invariants.
238#[must_use]
239pub fn sprite_model_from_clip_frame_with_materials(
240    clip: &DecodedClip,
241    frame: usize,
242    material_map: &[(u32, u8)],
243) -> SpriteModel {
244    sprite_model_from_voxel_frame_with_materials(
245        &clip.frames[frame],
246        &clip.dirs[frame],
247        clip.dims,
248        clip.pivot,
249        clip.voxel_world_size,
250        material_map,
251    )
252}
253
254/// Per-instance transform consumed by the model-DDA shader: the
255/// inverse model→world rotation (so a world ray can be brought into
256/// model-local space) plus the instance's world position. Stored as
257/// three padded columns for std140/std430 (`mat3x3` 16-byte columns).
258#[repr(C)]
259#[derive(Clone, Copy, Pod, Zeroable, Debug)]
260pub struct SpriteInstanceTransform {
261    /// Inverse of `[s | h | f]`, column-major, each column padded to
262    /// `vec4`. `inv_rot * v = c0*v.x + c1*v.y + c2*v.z`.
263    pub inv_rot: [[f32; 4]; 3],
264    /// Instance world position (the KV6 pivot maps here).
265    pub pos: [f32; 3],
266    _pad: f32,
267}
268
269impl SpriteInstanceTransform {
270    /// Build from a sprite pose. `s/h/f` are the model→world basis
271    /// columns; we invert them so the shader can map world→local.
272    #[must_use]
273    pub fn from_sprite(sprite: &Sprite) -> Self {
274        let inv = mat3_inverse([sprite.s, sprite.h, sprite.f]);
275        Self {
276            inv_rot: [
277                [inv[0][0], inv[0][1], inv[0][2], 0.0],
278                [inv[1][0], inv[1][1], inv[1][2], 0.0],
279                [inv[2][0], inv[2][1], inv[2][2], 0.0],
280            ],
281            pos: sprite.p,
282            _pad: 0.0,
283        }
284    }
285}
286
287/// A registry of sprite models. Instances reference a model by
288/// `model_id`, which is a **LOD chain** id: each chain holds one or
289/// more concrete mip levels (finest first; GPU.10.4), and the renderer
290/// picks the level per instance by distance. Identical KV6s are added
291/// once and shared by many instances. **Copy-on-modify**:
292/// [`Self::fork`] deep-copies a chain so edits to the fork leave the
293/// parent (and its instances) intact.
294#[derive(Debug, Clone, Default)]
295pub struct SpriteModelRegistry {
296    /// Concrete mip-level volumes (the GPU buffers concatenate these).
297    entries: Vec<SpriteModel>,
298    /// `chains[model_id]` = entry ids, finest (mip-0) first.
299    chains: Vec<Vec<u32>>,
300}
301
302impl SpriteModelRegistry {
303    #[must_use]
304    pub fn new() -> Self {
305        Self::default()
306    }
307
308    fn push_entry(&mut self, model: SpriteModel) -> u32 {
309        let id = self.entries.len() as u32;
310        self.entries.push(model);
311        id
312    }
313
314    /// Register a single-level (no-LOD) model; returns its `model_id`.
315    pub fn add(&mut self, model: SpriteModel) -> u32 {
316        let e = self.push_entry(model);
317        let id = self.chains.len() as u32;
318        self.chains.push(vec![e]);
319        id
320    }
321
322    /// Register a model with up to `max_levels` LOD mips (each a 2×
323    /// [`SpriteModel::downsample`] of the previous; stops early once a
324    /// level collapses to 1³). Returns its `model_id`.
325    pub fn add_lod(&mut self, model: SpriteModel, max_levels: u32) -> u32 {
326        let mut levels = vec![self.push_entry(model.clone())];
327        let mut cur = model;
328        for _ in 1..max_levels.max(1) {
329            if cur.dims == [1, 1, 1] {
330                break;
331            }
332            cur = cur.downsample();
333            levels.push(self.push_entry(cur.clone()));
334        }
335        let id = self.chains.len() as u32;
336        self.chains.push(levels);
337        id
338    }
339
340    /// Copy-on-modify: deep-copy every level of chain `parent` into new
341    /// entries + a new chain, and return its `model_id`. The fork owns
342    /// independent voxel data, so mutating it does not affect the
343    /// parent or any instance still pointing at it.
344    ///
345    /// # Panics
346    /// If `parent` is not a registered `model_id`.
347    pub fn fork(&mut self, parent: u32) -> u32 {
348        let src = self.chains[parent as usize].clone();
349        let levels: Vec<u32> = src
350            .iter()
351            .map(|&e| {
352                let copy = self.entries[e as usize].clone();
353                self.push_entry(copy)
354            })
355            .collect();
356        let id = self.chains.len() as u32;
357        self.chains.push(levels);
358        id
359    }
360
361    /// The finest (mip-0) model of chain `id`.
362    #[must_use]
363    pub fn model(&self, id: u32) -> &SpriteModel {
364        &self.entries[self.chains[id as usize][0] as usize]
365    }
366
367    /// Like [`Self::model`] but returns `None` for an out-of-range or
368    /// tombstoned (emptied) chain instead of panicking — the guarded form
369    /// for public primitives handed an arbitrary `chain_id`.
370    #[must_use]
371    pub fn model_checked(&self, id: u32) -> Option<&SpriteModel> {
372        let entry = *self.chains.get(id as usize)?.first()?;
373        self.entries.get(entry as usize)
374    }
375
376    /// Mutable access to the finest (mip-0) model for editing — the
377    /// copy-on-modify entry point (typically on a [`Self::fork`]).
378    /// After a *structural* edit (occupancy/dims), call
379    /// [`Self::rebuild_lod`] so the coarser mips match; a pure recolour
380    /// can use [`Self::recolor_chain`] instead.
381    pub fn model_mut(&mut self, id: u32) -> &mut SpriteModel {
382        let e = self.chains[id as usize][0] as usize;
383        &mut self.entries[e]
384    }
385
386    /// Recolour every LOD level of chain `id` (so a forked tint shows
387    /// at all distances).
388    pub fn recolor_chain(&mut self, id: u32, f: impl Fn(u32) -> u32 + Copy) {
389        for li in 0..self.chains[id as usize].len() {
390            let e = self.chains[id as usize][li] as usize;
391            self.entries[e].recolor(f);
392        }
393    }
394
395    /// Regenerate chain `id`'s coarser mip levels from its (possibly
396    /// just-edited) mip-0. Run after a structural edit via
397    /// [`Self::model_mut`] so the LOD ladder stays consistent. No-op
398    /// for a single-level (no-LOD) chain.
399    pub fn rebuild_lod(&mut self, id: u32) {
400        let levels = self.chains[id as usize].clone();
401        if levels.len() <= 1 {
402            return;
403        }
404        let mut cur = self.entries[levels[0] as usize].clone();
405        for &e in &levels[1..] {
406            cur = cur.downsample();
407            self.entries[e as usize] = cur.clone();
408        }
409    }
410
411    /// Free chain `chain_id`'s voxel data **in place**: replace each of
412    /// its LOD entries with [`SpriteModel::empty`] and clear the chain.
413    /// Entry ids and every other `model_id` are **preserved** (the chain
414    /// becomes empty, its entries become placeholders), so no id remap is
415    /// needed and the resident registry's entry alignment stays intact.
416    ///
417    /// This is safe to pair with the resident side because
418    /// [`SpriteRegistryResident::remove_model`] tombstones the same
419    /// entries (`dead[e]`) and [`compact`](SpriteRegistryResident::compact)
420    /// reads only live entries — so the resident never touches the empty
421    /// placeholders left here. Call `remove_model` (resident) **before**
422    /// this so those tombstones are set. No-op if `chain_id` is out of
423    /// range or already removed.
424    pub fn remove(&mut self, chain_id: u32) {
425        let Some(entries) = self.chains.get(chain_id as usize) else {
426            return;
427        };
428        // Clone the small id list so we can mutate `entries` while iterating.
429        let entries = entries.clone();
430        for e in entries {
431            self.entries[e as usize] = SpriteModel::empty();
432        }
433        self.chains[chain_id as usize] = Vec::new(); // tombstone (slot kept)
434    }
435
436    /// Whether `chain_id` is a live (registered, not [`removed`](Self::remove))
437    /// model. `false` for an out-of-range id or a tombstoned chain.
438    #[must_use]
439    pub fn is_live(&self, chain_id: u32) -> bool {
440        self.chains
441            .get(chain_id as usize)
442            .is_some_and(|c| !c.is_empty())
443    }
444
445    /// Number of LOD chains (distinct `model_id`s). Counts tombstoned
446    /// (removed) chains too — ids are never reused, so this is also the
447    /// next id that [`Self::add`] / [`Self::add_lod`] will mint.
448    #[must_use]
449    pub fn len(&self) -> usize {
450        self.chains.len()
451    }
452
453    #[must_use]
454    pub fn is_empty(&self) -> bool {
455        self.chains.is_empty()
456    }
457}
458
459impl SpriteModel {
460    /// An empty (zero-voxel, zero-extent) placeholder model. Used by
461    /// [`SpriteModelRegistry::remove`] to free a removed chain's voxel
462    /// data while keeping its entry slot, so ids stay stable. Carries no
463    /// occupancy/colours; `color_offsets` is the single-element prefix
464    /// `[0]` (`cols + 1` with `cols == 0`), keeping the structural
465    /// invariant intact for any code that inspects it.
466    #[must_use]
467    pub fn empty() -> Self {
468        Self {
469            dims: [0, 0, 0],
470            occ_words_per_col: 1,
471            pivot: [0.0, 0.0, 0.0],
472            occupancy: Vec::new(),
473            colors: Vec::new(),
474            dirs: Vec::new(),
475            color_offsets: vec![0],
476            materials: Vec::new(),
477            voxel_world_size: 1.0,
478        }
479    }
480
481    /// Recolour every voxel via `f(old_rgba) -> new_rgba`. Structure
482    /// (occupancy / offsets) is untouched, so this is a cheap in-place
483    /// edit — handy on a [`SpriteModelRegistry::fork`] to make a tinted
484    /// variant. For structural edits, mutate the public occupancy /
485    /// colours / dims directly (via `model_mut`) then rebuild the LOD.
486    pub fn recolor(&mut self, f: impl Fn(u32) -> u32) {
487        for c in &mut self.colors {
488            *c = f(*c);
489        }
490    }
491
492    /// GPU.12 — structural edit of a single voxel within the model's
493    /// existing bounds. `Some(rgba)` sets/replaces the voxel at
494    /// `(x, y, z)`; `None` clears it. Maintains the ascending-z colour
495    /// invariant by inserting/removing at the voxel's popcount rank and
496    /// shifting the affected columns' `color_offsets`. Returns `true`
497    /// if the model changed. Out-of-bounds coordinates are ignored
498    /// (returns `false`) — growing `dims` is a separate concern.
499    ///
500    /// After editing, call [`SpriteModelRegistry::rebuild_lod`] to
501    /// refresh coarser mips, then re-upload via `set_sprite_instances`.
502    pub fn set_voxel(&mut self, x: u32, y: u32, z: u32, color: Option<u32>) -> bool {
503        if x >= self.dims[0] || y >= self.dims[1] || z >= self.dims[2] {
504            return false;
505        }
506        let owpc = self.occ_words_per_col as usize;
507        let cols = (self.dims[0] * self.dims[1]) as usize;
508        let col = (x + y * self.dims[0]) as usize;
509        let base = col * owpc;
510        let zw = (z >> 5) as usize;
511        let zb = z & 31;
512
513        // Rank = solid voxels strictly below z in this column.
514        let mut rank = 0usize;
515        for w in 0..zw {
516            rank += self.occupancy[base + w].count_ones() as usize;
517        }
518        let below_mask = if zb > 0 { (1u32 << zb) - 1 } else { 0 };
519        rank += (self.occupancy[base + zw] & below_mask).count_ones() as usize;
520        let idx = self.color_offsets[col] as usize + rank;
521        let was_set = (self.occupancy[base + zw] >> zb) & 1 == 1;
522
523        if let Some(rgba) = color {
524            if was_set {
525                self.colors[idx] = rgba; // replace in place (keeps dir)
526            } else {
527                self.occupancy[base + zw] |= 1u32 << zb;
528                self.colors.insert(idx, rgba);
529                // No normal supplied by this API — default to dir 0 (the
530                // sole caller, the carve hotkey, only ever clears).
531                self.dirs.insert(idx, 0);
532                if !self.materials.is_empty() {
533                    self.materials.insert(idx, 0); // new voxel → opaque material
534                }
535                for c in &mut self.color_offsets[col + 1..=cols] {
536                    *c += 1;
537                }
538            }
539            true
540        } else {
541            if !was_set {
542                return false;
543            }
544            self.occupancy[base + zw] &= !(1u32 << zb);
545            self.colors.remove(idx);
546            self.dirs.remove(idx);
547            if !self.materials.is_empty() {
548                self.materials.remove(idx);
549            }
550            for c in &mut self.color_offsets[col + 1..=cols] {
551                *c -= 1;
552            }
553            true
554        }
555    }
556
557    /// Radius of a bounding sphere centred at the instance position
558    /// (the pivot maps there): the farthest bbox corner from the
559    /// pivot. Used for frustum culling. Assumes a unit basis; scaled
560    /// instances would multiply this by their max basis length.
561    #[must_use]
562    pub fn bound_radius(&self) -> f32 {
563        let mut r2 = 0.0_f32;
564        for &cx in &[0.0, self.dims[0] as f32] {
565            for &cy in &[0.0, self.dims[1] as f32] {
566                for &cz in &[0.0, self.dims[2] as f32] {
567                    let d = [cx - self.pivot[0], cy - self.pivot[1], cz - self.pivot[2]];
568                    r2 = r2.max(d[0] * d[0] + d[1] * d[1] + d[2] * d[2]);
569                }
570            }
571        }
572        r2.sqrt()
573    }
574
575    /// GPU.10.4 — 2× voxel downsample for the next LOD level. A coarse
576    /// voxel is solid if any of its 2×2×2 fine voxels is, coloured by
577    /// their per-channel average. Dims/pivot halve and
578    /// `voxel_world_size` doubles, so the coarse model occupies the
579    /// same world box at half the resolution (origin-corner aligned).
580    #[must_use]
581    #[allow(clippy::manual_checked_ops)] // `n > 0` guards 4 divisions, not one checked_div
582    pub fn downsample(&self) -> SpriteModel {
583        let [fx, fy, fz] = self.dims;
584        let fidx = |x: u32, y: u32, z: u32| (x + y * fx + z * fx * fy) as usize;
585
586        // Reconstruct dense fine voxels (solid flag + colour + normal + TV
587        // material).
588        let has_mats = !self.materials.is_empty();
589        let mut solid = vec![false; (fx * fy * fz) as usize];
590        let mut fine = vec![0u32; (fx * fy * fz) as usize];
591        let mut fine_dir = vec![0u32; (fx * fy * fz) as usize];
592        let mut fine_mat = vec![0u8; (fx * fy * fz) as usize];
593        for x in 0..fx {
594            for y in 0..fy {
595                let col = (x + y * fx) as usize;
596                let base = col * self.occ_words_per_col as usize;
597                let off = self.color_offsets[col] as usize;
598                let mut seen = 0usize;
599                for z in 0..fz {
600                    let w = base + (z >> 5) as usize;
601                    if (self.occupancy[w] >> (z & 31)) & 1 == 1 {
602                        fine[fidx(x, y, z)] = self.colors[off + seen];
603                        fine_dir[fidx(x, y, z)] = self.dirs[off + seen];
604                        if has_mats {
605                            fine_mat[fidx(x, y, z)] = self.materials[off + seen];
606                        }
607                        solid[fidx(x, y, z)] = true;
608                        seen += 1;
609                    }
610                }
611            }
612        }
613
614        let nx = fx.div_ceil(2).max(1);
615        let ny = fy.div_ceil(2).max(1);
616        let nz = fz.div_ceil(2).max(1);
617        let owpc = nz.div_ceil(32).max(1);
618        let cols = (nx * ny) as usize;
619        let mut occupancy = vec![0u32; cols * owpc as usize];
620        let mut color_offsets = vec![0u32; cols + 1];
621        let mut colors: Vec<u32> = Vec::new();
622        let mut dirs: Vec<u32> = Vec::new();
623        let mut materials: Vec<u8> = Vec::new();
624
625        // Emit in column-index order (`ccol = cx + cy*nx`), cy outer,
626        // so `color_offsets` is a monotonic prefix sum like build's.
627        for cy in 0..ny {
628            for cx in 0..nx {
629                let ccol = (cx + cy * nx) as usize;
630                color_offsets[ccol] = colors.len() as u32;
631                for cz in 0..nz {
632                    let (mut a, mut r, mut g, mut b, mut n) = (0u32, 0u32, 0u32, 0u32, 0u32);
633                    // Normals + materials don't average meaningfully — keep
634                    // the first solid child's `dir` / material for the coarse
635                    // voxel.
636                    let mut rep_dir = 0u32;
637                    let mut rep_mat = 0u8;
638                    for dz in 0..2 {
639                        for dy in 0..2 {
640                            for dx in 0..2 {
641                                let (x, y, z) = (2 * cx + dx, 2 * cy + dy, 2 * cz + dz);
642                                if x < fx && y < fy && z < fz && solid[fidx(x, y, z)] {
643                                    let c = fine[fidx(x, y, z)];
644                                    if n == 0 {
645                                        rep_dir = fine_dir[fidx(x, y, z)];
646                                        rep_mat = fine_mat[fidx(x, y, z)];
647                                    }
648                                    a += (c >> 24) & 0xff;
649                                    r += (c >> 16) & 0xff;
650                                    g += (c >> 8) & 0xff;
651                                    b += c & 0xff;
652                                    n += 1;
653                                }
654                            }
655                        }
656                    }
657                    if n > 0 {
658                        let avg = ((a / n) << 24) | ((r / n) << 16) | ((g / n) << 8) | (b / n);
659                        let base = ccol * owpc as usize + (cz >> 5) as usize;
660                        occupancy[base] |= 1u32 << (cz & 31);
661                        colors.push(avg);
662                        dirs.push(rep_dir);
663                        if has_mats {
664                            materials.push(rep_mat);
665                        }
666                    }
667                }
668            }
669        }
670        color_offsets[cols] = colors.len() as u32;
671
672        SpriteModel {
673            dims: [nx, ny, nz],
674            occ_words_per_col: owpc,
675            pivot: [
676                self.pivot[0] * 0.5,
677                self.pivot[1] * 0.5,
678                self.pivot[2] * 0.5,
679            ],
680            occupancy,
681            colors,
682            dirs,
683            color_offsets,
684            materials,
685            voxel_world_size: self.voxel_world_size * 2.0,
686        }
687    }
688}
689
690/// View frustum for CPU instance culling, in world space. Built each
691/// frame from the world camera. `half_w`/`half_h` are the tangents of
692/// the half-FOV (so the side planes are `|x| <= half_w * z` etc. in
693/// camera space).
694#[derive(Clone, Copy, Debug)]
695pub struct ViewFrustum {
696    pub pos: [f32; 3],
697    pub right: [f32; 3],
698    pub down: [f32; 3],
699    pub forward: [f32; 3],
700    pub half_w: f32,
701    pub half_h: f32,
702    pub far: f32,
703}
704
705/// CPU cull record: the GPU instance + its world bounding sphere.
706/// Not `Copy` — carries a boxed 256-entry `kv6colmul` table.
707#[derive(Clone)]
708struct CullInstance {
709    /// Instance transform + a placeholder `model_id`; the cull
710    /// overwrites `model_id` with the distance-chosen LOD entry.
711    gpu: SpriteInstanceGpu,
712    /// LOD chain this instance draws (the user-facing `model_id`).
713    chain_id: u32,
714    center: [f32; 3],
715    radius: f32,
716    /// voxlap `kv6colmul[256]` — per-surface-normal colour modulation
717    /// for this instance's pose + lighting. Defaults to identity
718    /// (`0x0100` in every channel lane → unshaded) until the facade sets
719    /// it via [`SpriteRegistryResident::set_instance_colmul`]. Packed
720    /// into the `colmul` GPU buffer (in visible order) each frame.
721    colmul: Box<[u64; 256]>,
722}
723
724/// Identity `kv6colmul` table: every channel lane = `0x0100`, so the
725/// shader's `(rgb[c] << 8) * 0x0100 >> 16 == rgb[c]` — i.e. no shading.
726fn identity_colmul() -> Box<[u64; 256]> {
727    const LANE: u64 = 0x0100;
728    let w = LANE | (LANE << 16) | (LANE << 32) | (LANE << 48);
729    Box::new([w; 256])
730}
731
732fn dot3(a: [f32; 3], b: [f32; 3]) -> f32 {
733    a[0] * b[0] + a[1] * b[1] + a[2] * b[2]
734}
735
736/// Build one CPU cull record from a user [`SpriteInstance`]: pack the
737/// transform, seed the bounding sphere from the chain's finest model, and
738/// start `colmul` at identity. Shared by the full
739/// [`SpriteRegistryResident::upload`] and the incremental
740/// [`SpriteRegistryResident::append_instances`].
741fn make_cull(registry: &SpriteModelRegistry, i: &SpriteInstance) -> CullInstance {
742    CullInstance {
743        gpu: SpriteInstanceGpu {
744            inv_rot0: i.transform.inv_rot[0],
745            inv_rot1: i.transform.inv_rot[1],
746            inv_rot2: i.transform.inv_rot[2],
747            pos: i.transform.pos,
748            model_id: i.model_id, // placeholder; cull rewrites per frame
749            material: u32::from(i.material),
750            alpha_mul: f32::from(i.alpha_mul) / 255.0,
751            flags: i.flags,
752            _pad1: 0,
753        },
754        chain_id: i.model_id,
755        center: i.transform.pos,
756        radius: registry.model(i.model_id).bound_radius(),
757        colmul: identity_colmul(),
758    }
759}
760
761/// Allocate the `instances` capacity buffer (`STORAGE | COPY_DST`) sized
762/// for `cap` records (≥1). Left uninitialised — `cull_bin_upload`
763/// rewrites it (offset 0) each frame, and `append_instances` seeds the
764/// live records after a grow.
765fn instances_buffer(device: &wgpu::Device, cap: u32) -> wgpu::Buffer {
766    device.create_buffer(&wgpu::BufferDescriptor {
767        label: Some("roxlap-gpu sprite_reg.instances"),
768        size: u64::from(cap.max(1)) * std::mem::size_of::<SpriteInstanceGpu>() as u64,
769        usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
770        mapped_at_creation: false,
771    })
772}
773
774/// One sprite instance: a model reference + world pose.
775#[derive(Debug, Clone, Copy)]
776pub struct SpriteInstance {
777    pub model_id: u32,
778    pub transform: SpriteInstanceTransform,
779    /// Voxel-material id (TV stage): indexes the renderer's global material
780    /// palette for this instance's opacity + blend mode. `0` (the default)
781    /// is opaque, so an unset instance renders unchanged.
782    pub material: u8,
783    /// Per-instance alpha multiplier (TV stage), `0..=255` (`255` =
784    /// unscaled, the default).
785    pub alpha_mul: u8,
786    /// XS.4 — sprite shadow flags (`roxlap_formats::sprite` bits 4/5:
787    /// `NO_SHADOW_CAST` / `NO_SHADOW_RECEIVE`). `0` (default) ⇒ casts +
788    /// receives. Only honoured when the device is sprite-shadow capable.
789    pub flags: u32,
790}
791
792impl SpriteInstance {
793    /// A model reference + pose with the default opaque material
794    /// (`material = 0`, `alpha_mul = 255`) and shadows on (`flags = 0`).
795    #[must_use]
796    pub fn new(model_id: u32, transform: SpriteInstanceTransform) -> Self {
797        Self {
798            model_id,
799            transform,
800            material: 0,
801            alpha_mul: 255,
802            flags: 0,
803        }
804    }
805}
806
807/// GPU per-model metadata: where this model's data starts in the
808/// shared registry buffers + its dims/pivot. Mirrors `ModelMeta` in
809/// the shader (std430, 48 bytes).
810#[repr(C)]
811#[derive(Clone, Copy, Pod, Zeroable, Debug)]
812struct SpriteModelMeta {
813    occupancy_offset: u32,
814    colors_offset: u32,
815    color_offsets_offset: u32,
816    occ_words_per_col: u32,
817    dims: [u32; 3],
818    /// TV.3 — 1 if this model has per-voxel materials (`materials_vox` is
819    /// populated for it); 0 ⇒ use the instance's uniform material.
820    has_vox_materials: u32,
821    pivot: [f32; 3],
822    /// GPU.10.4 — world size of one voxel of this (mip) entry.
823    voxel_world_size: f32,
824}
825
826/// GPU per-instance record. Mirrors `Instance` in the shader (std430,
827/// 80 bytes): inverse rotation columns + position + model id + the TV
828/// material id and per-instance alpha multiplier.
829#[repr(C)]
830#[derive(Clone, Copy, Pod, Zeroable, Debug)]
831struct SpriteInstanceGpu {
832    inv_rot0: [f32; 4],
833    inv_rot1: [f32; 4],
834    inv_rot2: [f32; 4],
835    pos: [f32; 3],
836    model_id: u32,
837    /// TV: material id into the global palette (binding 12).
838    material: u32,
839    /// TV: per-instance alpha multiplier, normalised to `0..=1`.
840    alpha_mul: f32,
841    /// XS.4 — sprite shadow flags (mirror of `roxlap_formats::sprite` bits 4/5):
842    /// bit4 = NO_SHADOW_CAST, bit5 = NO_SHADOW_RECEIVE. `0` ⇒ casts + receives.
843    flags: u32,
844    _pad1: u32,
845}
846
847/// Invert a 3×3 matrix given as basis columns `[c0, c1, c2]`,
848/// returning the inverse as columns. For an orthonormal basis this is
849/// the transpose; the general path covers rotation + non-unit scale.
850#[must_use]
851fn mat3_inverse(cols: [[f32; 3]; 3]) -> [[f32; 3]; 3] {
852    let [a, b, c] = cols; // columns
853                          // Determinant via scalar triple product a · (b × c).
854    let cross = |u: [f32; 3], v: [f32; 3]| {
855        [
856            u[1] * v[2] - u[2] * v[1],
857            u[2] * v[0] - u[0] * v[2],
858            u[0] * v[1] - u[1] * v[0],
859        ]
860    };
861    let bc = cross(b, c);
862    let ca = cross(c, a);
863    let ab = cross(a, b);
864    let det = a[0] * bc[0] + a[1] * bc[1] + a[2] * bc[2];
865    let inv_det = if det.abs() < 1e-12 { 0.0 } else { 1.0 / det };
866    // Inverse rows are (b×c, c×a, a×b)/det; return as columns of the
867    // inverse, i.e. transpose of those rows.
868    [
869        [bc[0] * inv_det, ca[0] * inv_det, ab[0] * inv_det],
870        [bc[1] * inv_det, ca[1] * inv_det, ab[1] * inv_det],
871        [bc[2] * inv_det, ca[2] * inv_det, ab[2] * inv_det],
872    ]
873}
874
875/// GPU-resident registry + instances: every model's occupancy /
876/// colours / offsets concatenated into shared storage buffers, a
877/// per-model metadata table, and a capacity-sized instance buffer
878/// rewritten each frame with the frustum-visible subset (GPU.10.2).
879/// One bind group serves all models (same approach as the multi-grid
880/// scene).
881pub struct SpriteRegistryResident {
882    pub occupancy: wgpu::Buffer,
883    pub colors: wgpu::Buffer,
884    /// Per-voxel surface-normal index, concatenated across models in the
885    /// same layout as [`colors`](Self::colors). The shader indexes the
886    /// per-instance `kv6colmul` table by it.
887    pub dirs: wgpu::Buffer,
888    /// Per-voxel material id (TV.3), same layout as [`colors`](Self::colors)
889    /// (one u32 per voxel). `0` for models without per-voxel materials; the
890    /// per-model `has_vox_materials` flag in `model_meta` says whether to use
891    /// it (else the shader falls back to the instance's uniform material).
892    pub materials_vox: wgpu::Buffer,
893    pub color_offsets: wgpu::Buffer,
894    pub model_meta: wgpu::Buffer,
895    /// Holds up to `instance_capacity` instances; the visible subset
896    /// is packed into `[0, count)` each frame by [`Self::cull_bin_upload`].
897    pub instances: wgpu::Buffer,
898    pub instance_capacity: u32,
899    /// Per-visible-instance `kv6colmul[256]` tables, packed in the same
900    /// order as the `instances` buffer each frame (two u32 per u64
901    /// entry: lanes 0|1 then 2|3). Sized `instance_capacity * 256 * 2`
902    /// u32; rewritten by [`Self::cull_bin_upload`].
903    pub colmul: wgpu::Buffer,
904    colmul_cap: u32,
905    /// GPU.10.3 — per-tile `(offset, count)` into `tile_instances`,
906    /// flat `2 * tiles_x * tiles_y` u32s. Grown to fit the screen.
907    pub tile_ranges: wgpu::Buffer,
908    tile_ranges_cap: u32,
909    /// GPU.10.3 — flat list of visible-instance indices grouped by
910    /// tile. Grown to fit the per-frame total.
911    pub tile_instances: wgpu::Buffer,
912    tile_instances_cap: u32,
913    /// CPU cull records (full set), with precomputed bounding spheres.
914    cull: Vec<CullInstance>,
915    /// GPU.10.4 — LOD chains: `chains[chain_id]` = entry ids, finest
916    /// first. The cull picks a level by distance and writes its entry
917    /// id into the packed instance's `model_id`.
918    chains: Vec<Vec<u32>>,
919    /// GPU.12 incremental — CPU mirror of the GPU `model_meta` table, one
920    /// per concrete entry. [`Self::update_model`] reads the fixed
921    /// occupancy/color_offsets bases from here and rewrites the changed
922    /// `colors_offset` on a relocation.
923    meta: Vec<SpriteModelMeta>,
924    /// GPU.12 incremental — per-entry placement of `colors`/`dirs` in the
925    /// shared buffers (drives both; same offsets/ranks). Lets an edit
926    /// re-upload one model's data without touching the others.
927    colors_alloc: ColorsAllocator,
928    /// Per-entry word length of the dims-fixed `occupancy` and
929    /// `color_offsets` arrays, kept so [`Self::update_model`] can assert a
930    /// carve never changed dims (which would invalidate the in-place
931    /// writes — growing dims is out of scope, handled by a full re-upload).
932    occ_lens: Vec<u32>,
933    coloff_lens: Vec<u32>,
934    /// Used / allocated words of the tightly-concatenated `occupancy`
935    /// buffer. `add_model` bump-appends at `occ_used`; when it would pass
936    /// `occ_cap` the buffer is grown (with slack) and rebuilt from the
937    /// registry. (`colors`/`dirs` track theirs in [`ColorsAllocator`].)
938    occ_used: u32,
939    occ_cap: u32,
940    /// Used / allocated words of the tightly-concatenated `color_offsets`
941    /// buffer — same growth scheme as `occ_*`.
942    coloff_used: u32,
943    coloff_cap: u32,
944    /// Allocated record count of the `model_meta` buffer; `add_model`
945    /// grows it (with slack) when the entry count passes it.
946    meta_cap: u32,
947    /// Per-entry tombstone: `true` once its model was removed
948    /// ([`Self::remove_model`]). Dead entries keep their `meta` slot (so
949    /// entry ids — and the caller's `chain_id`s — stay stable) but their
950    /// colours are freed for reuse and they contribute nothing to a
951    /// repack / [`Self::compact`]. Parallel to `meta`.
952    dead: Vec<bool>,
953}
954
955/// Which tightly-concatenated registry buffer [`SpriteRegistryResident::
956/// sync_concat`] is operating on.
957#[derive(Clone, Copy)]
958enum ConcatBuf {
959    Occupancy,
960    ColorOffsets,
961}
962
963/// The model's source array for a given [`ConcatBuf`] — a free fn (not a
964/// closure) so the returned borrow keeps `m`'s lifetime.
965fn concat_data(m: &SpriteModel, which: ConcatBuf) -> &[u32] {
966    match which {
967        ConcatBuf::Occupancy => &m.occupancy,
968        ConcatBuf::ColorOffsets => &m.color_offsets,
969    }
970}
971
972impl SpriteRegistryResident {
973    /// Concatenate `registry`'s models into shared buffers and prepare
974    /// `instances` for per-frame culling. Model-relative indices stay
975    /// as built; the shader adds each model's base offset from the
976    /// metadata table.
977    #[must_use]
978    pub fn upload(
979        device: &wgpu::Device,
980        registry: &SpriteModelRegistry,
981        instances: &[SpriteInstance],
982    ) -> Self {
983        // `occupancy` + `color_offsets` are dims-fixed → tightly
984        // concatenated (never grow on a carve). `colors` + `dirs` are
985        // variable → laid out by the suballocator with per-slot slack so
986        // an incremental edit can rewrite one model in place.
987        let entry_lens: Vec<u32> = registry
988            .entries
989            .iter()
990            .map(|m| m.colors.len() as u32)
991            .collect();
992        let colors_alloc = ColorsAllocator::new(&entry_lens);
993        let cap_total = colors_alloc.cap_total();
994
995        let mut all_occ: Vec<u32> = Vec::new();
996        let mut all_offsets: Vec<u32> = Vec::new();
997        let mut all_colors: Vec<u32> = vec![0; cap_total as usize];
998        let mut all_dirs: Vec<u32> = vec![0; cap_total as usize];
999        let mut all_materials: Vec<u32> = vec![0; cap_total as usize];
1000        let mut meta: Vec<SpriteModelMeta> = Vec::with_capacity(registry.entries.len());
1001        let mut occ_lens: Vec<u32> = Vec::with_capacity(registry.entries.len());
1002        let mut coloff_lens: Vec<u32> = Vec::with_capacity(registry.entries.len());
1003
1004        // One meta + placed data per concrete (mip-level) entry.
1005        for (e, m) in registry.entries.iter().enumerate() {
1006            let slot = colors_alloc.slot(e);
1007            meta.push(SpriteModelMeta {
1008                occupancy_offset: all_occ.len() as u32,
1009                colors_offset: slot.off,
1010                color_offsets_offset: all_offsets.len() as u32,
1011                occ_words_per_col: m.occ_words_per_col,
1012                dims: m.dims,
1013                has_vox_materials: u32::from(!m.materials.is_empty()),
1014                pivot: m.pivot,
1015                voxel_world_size: m.voxel_world_size,
1016            });
1017            occ_lens.push(m.occupancy.len() as u32);
1018            coloff_lens.push(m.color_offsets.len() as u32);
1019            all_occ.extend_from_slice(&m.occupancy);
1020            all_offsets.extend_from_slice(&m.color_offsets);
1021            let off = slot.off as usize;
1022            all_colors[off..off + m.colors.len()].copy_from_slice(&m.colors);
1023            all_dirs[off..off + m.dirs.len()].copy_from_slice(&m.dirs);
1024            for (i, &mat) in m.materials.iter().enumerate() {
1025                all_materials[off + i] = u32::from(mat);
1026            }
1027        }
1028
1029        // Per-instance cull records: sphere centred at the instance
1030        // position, radius from the chain's finest (mip-0) model.
1031        // `colmul` starts at identity (unshaded) until the facade sets
1032        // per-instance lighting via `set_instance_colmul`.
1033        let cull: Vec<CullInstance> = instances.iter().map(|i| make_cull(registry, i)).collect();
1034
1035        // Capacity buffer (COPY_DST so cull can rewrite it each frame),
1036        // seeded with the full set so frame 0 is valid pre-cull.
1037        let seed: Vec<SpriteInstanceGpu> = cull.iter().map(|c| c.gpu).collect();
1038        let instances_buf = {
1039            use wgpu::util::DeviceExt;
1040            let one = [SpriteInstanceGpu::zeroed()];
1041            let src: &[SpriteInstanceGpu] = if seed.is_empty() { &one } else { &seed };
1042            device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
1043                label: Some("roxlap-gpu sprite_reg.instances"),
1044                contents: bytemuck::cast_slice(src),
1045                usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
1046            })
1047        };
1048
1049        let tile_ranges = storage_dst_u32(device, "roxlap-gpu sprite_reg.tile_ranges", 1);
1050        let tile_instances = storage_dst_u32(device, "roxlap-gpu sprite_reg.tile_instances", 1);
1051        // colmul: 256 entries × 2 u32 per visible instance. Sized to the
1052        // full instance set (worst case all visible); rewritten per frame.
1053        let colmul_cap = (cull.len() as u32).max(1) * 256 * 2;
1054        let colmul = storage_dst_u32(device, "roxlap-gpu sprite_reg.colmul", colmul_cap);
1055        Self {
1056            occupancy: storage_dst_u32_cap(
1057                device,
1058                "roxlap-gpu sprite_reg.occupancy",
1059                &all_occ,
1060                all_occ.len() as u32,
1061            ),
1062            colors: storage_dst_u32_cap(
1063                device,
1064                "roxlap-gpu sprite_reg.colors",
1065                &all_colors,
1066                cap_total,
1067            ),
1068            dirs: storage_dst_u32_cap(device, "roxlap-gpu sprite_reg.dirs", &all_dirs, cap_total),
1069            materials_vox: storage_dst_u32_cap(
1070                device,
1071                "roxlap-gpu sprite_reg.materials_vox",
1072                &all_materials,
1073                cap_total,
1074            ),
1075            color_offsets: storage_dst_u32_cap(
1076                device,
1077                "roxlap-gpu sprite_reg.color_offsets",
1078                &all_offsets,
1079                all_offsets.len() as u32,
1080            ),
1081            model_meta: storage_dst_pod(device, "roxlap-gpu sprite_reg.model_meta", &meta),
1082            instances: instances_buf,
1083            instance_capacity: cull.len() as u32,
1084            colmul,
1085            colmul_cap,
1086            tile_ranges,
1087            tile_ranges_cap: 1,
1088            tile_instances,
1089            tile_instances_cap: 1,
1090            cull,
1091            chains: registry.chains.clone(),
1092            occ_used: all_occ.len() as u32,
1093            occ_cap: all_occ.len() as u32,
1094            coloff_used: all_offsets.len() as u32,
1095            coloff_cap: all_offsets.len() as u32,
1096            meta_cap: meta.len() as u32,
1097            dead: vec![false; meta.len()],
1098            meta,
1099            colors_alloc,
1100            occ_lens,
1101            coloff_lens,
1102        }
1103    }
1104
1105    /// Number of resident instances (the cull set length).
1106    #[must_use]
1107    pub fn instance_count(&self) -> usize {
1108        self.cull.len()
1109    }
1110
1111    /// Append new instances **without** re-uploading any model volume —
1112    /// the incremental counterpart to [`Self::upload`], for streaming
1113    /// spawns (asteroids, projectiles, …). Returns the index of the first
1114    /// appended instance; the block occupies `[base, base + N)`.
1115    ///
1116    /// The model volumes are untouched, so every appended instance must
1117    /// reference a `model_id` (LOD chain) that was already present in the
1118    /// `registry` passed to [`Self::upload`]. Registering a *new* model
1119    /// still requires a full [`Self::upload`] (its voxels must be laid
1120    /// into the shared buffers). `registry` here is only read for the new
1121    /// instances' bound-sphere radii and must be the resident one.
1122    ///
1123    /// The `instances` GPU buffer is only *grown* here (power-of-two,
1124    /// amortised O(1)); its contents are **not** written. [`Self::
1125    /// cull_bin_upload`] rewrites the whole visible range from `cull` every
1126    /// frame before the sprite pass reads it — exactly as for the static
1127    /// instances — so appending only needs to extend `cull` and ensure
1128    /// capacity. Writing the buffer here too caused a mid-frame
1129    /// write-while-in-flight hazard on some drivers (a stray full-screen
1130    /// flash on append). `colmul` likewise grows lazily in
1131    /// `cull_bin_upload`. After a removal the capacity is not shrunk.
1132    pub fn append_instances(
1133        &mut self,
1134        device: &wgpu::Device,
1135        registry: &SpriteModelRegistry,
1136        instances: &[SpriteInstance],
1137    ) -> u32 {
1138        let base = self.cull.len() as u32;
1139        if instances.is_empty() {
1140            return base;
1141        }
1142        for i in instances {
1143            debug_assert!(
1144                (i.model_id as usize) < self.chains.len(),
1145                "append_instances: model_id {} not resident (run upload to register new models)",
1146                i.model_id
1147            );
1148            self.cull.push(make_cull(registry, i));
1149        }
1150        let need = self.cull.len() as u32;
1151        if need > self.instance_capacity {
1152            // Grow power-of-two and recreate the buffer (the next frame's
1153            // bind group picks up the new handle). No seed write — the
1154            // per-frame cull_bin_upload populates it.
1155            self.instance_capacity = need.next_power_of_two();
1156            self.instances = instances_buffer(device, self.instance_capacity);
1157        }
1158        base
1159    }
1160
1161    /// Remove the instance at `index` by swap-remove — O(1), no GPU work
1162    /// (the next [`Self::cull_bin_upload`] repacks the visible set from
1163    /// the shrunk cull list). Capacity is retained for reuse.
1164    ///
1165    /// Returns `Some(old_last)` when a different instance was moved into
1166    /// `index` to fill the hole (its index changed from `old_last` to
1167    /// `index` — callers holding instance handles must fix up that one),
1168    /// or `None` if `index` was the last element or out of range. Because
1169    /// this reorders, any [`Self::set_instance_colmul`] table set by
1170    /// position should be re-applied after a removal.
1171    pub fn remove_instance(&mut self, index: usize) -> Option<usize> {
1172        if index >= self.cull.len() {
1173            return None;
1174        }
1175        let last = self.cull.len() - 1;
1176        self.cull.swap_remove(index);
1177        (index != last).then_some(last)
1178    }
1179
1180    /// Set the per-instance `kv6colmul[256]` lighting tables (voxlap's
1181    /// `update_reflects` output), in the same order/length as the
1182    /// instances passed to [`Self::upload`]. The next
1183    /// [`Self::cull_bin_upload`] packs the visible subset to the GPU.
1184    /// Instances beyond `tables.len()` keep their previous tables.
1185    pub fn set_instance_colmul(&mut self, tables: &[[u64; 256]]) {
1186        for (ci, t) in self.cull.iter_mut().zip(tables) {
1187            ci.colmul.copy_from_slice(t);
1188        }
1189    }
1190
1191    /// Refresh instance poses in place from `instances` — for animated
1192    /// sprites (e.g. KFA limbs re-posed each frame) — **without** any
1193    /// model-volume re-upload. `instances` must match the set passed to
1194    /// [`Self::upload`] in length + order; each keeps its `model_id`
1195    /// (LOD chain) so only the transform + cull centre change. No GPU
1196    /// write happens here: the next [`Self::cull_bin_upload`] re-uploads
1197    /// the packed visible subset, as it already does every frame.
1198    pub fn update_transforms(&mut self, instances: &[SpriteInstance]) {
1199        debug_assert_eq!(
1200            instances.len(),
1201            self.cull.len(),
1202            "update_transforms instance count must match upload"
1203        );
1204        for (ci, inst) in self.cull.iter_mut().zip(instances) {
1205            ci.gpu.inv_rot0 = inst.transform.inv_rot[0];
1206            ci.gpu.inv_rot1 = inst.transform.inv_rot[1];
1207            ci.gpu.inv_rot2 = inst.transform.inv_rot[2];
1208            ci.gpu.pos = inst.transform.pos;
1209            // TV: material id + alpha multiplier ride the same coalesced
1210            // update as the pose (set via the facade's per-instance setters).
1211            ci.gpu.material = u32::from(inst.material);
1212            ci.gpu.alpha_mul = f32::from(inst.alpha_mul) / 255.0;
1213            // Bounding sphere follows the pivot; radius/chain unchanged.
1214            ci.center = inst.transform.pos;
1215        }
1216    }
1217
1218    /// Repoint instance `idx` at a different LOD chain — the per-frame
1219    /// **flipbook** step for animated voxel clips (VCL.2). The instance's
1220    /// transform / colmul are untouched; only which model's volume it
1221    /// draws changes. The new chain's volume must already be resident
1222    /// (uploaded via [`Self::add_model`] / [`Self::upload`]); `registry`
1223    /// is the one those uploads used (so the bounding radius reseeds from
1224    /// the new model). Like [`Self::update_transforms`], this is a CPU-side
1225    /// rewrite — the next [`Self::cull_bin_upload`] re-uploads the packed
1226    /// visible subset, so it costs nothing extra on the GPU. No-op if `idx`
1227    /// is out of range.
1228    ///
1229    /// All frames of a clip share the same `dims`, so a flipbook swap
1230    /// leaves the bounding radius unchanged; reseeding it anyway keeps the
1231    /// method correct for arbitrary chain swaps.
1232    pub fn set_instance_model(
1233        &mut self,
1234        registry: &SpriteModelRegistry,
1235        idx: usize,
1236        chain_id: u32,
1237    ) {
1238        // Guard `chain_id` (the `cull.get_mut` below only covers `idx`): a
1239        // public caller could pass an out-of-range / tombstoned chain, which
1240        // `registry.model` would index-panic on.
1241        let Some(radius) = registry
1242            .model_checked(chain_id)
1243            .map(SpriteModel::bound_radius)
1244        else {
1245            return;
1246        };
1247        let Some(ci) = self.cull.get_mut(idx) else {
1248            return;
1249        };
1250        ci.chain_id = chain_id;
1251        ci.gpu.model_id = chain_id; // placeholder; cull rewrites to the LOD entry
1252        ci.radius = radius;
1253    }
1254
1255    /// GPU.12 incremental — re-upload only the entries of LOD chain
1256    /// `chain_id` after an in-place edit (carve / recolour) of its model,
1257    /// **without** rebuilding the whole registry. `registry` must be the
1258    /// same registry uploaded (same entry ids), with chain `chain_id`'s
1259    /// entries already edited (`model_mut` + `rebuild_lod`).
1260    ///
1261    /// For each entry: occupancy + color_offsets are dims-fixed, so they
1262    /// are written in place; colors + dirs (variable, parallel) go through
1263    /// the suballocator — written in place when they fit the slack,
1264    /// relocated (with a `model_meta` rewrite) when they outgrow it, and
1265    /// only when the buffer tail overflows are colors/dirs grown + the
1266    /// whole registry repacked. Instances / cull / colmul are untouched
1267    /// (a carve never moves an instance or grows its bounds) — that is the
1268    /// win over [`Self::upload`].
1269    ///
1270    /// # Panics (debug)
1271    /// If an entry's dims changed (occupancy / color_offsets length), which
1272    /// the in-place path can't absorb — growing dims needs a full
1273    /// re-upload via [`Self::upload`].
1274    pub fn update_model(
1275        &mut self,
1276        device: &wgpu::Device,
1277        queue: &wgpu::Queue,
1278        registry: &SpriteModelRegistry,
1279        chain_id: u32,
1280    ) {
1281        let entries = self.chains[chain_id as usize].clone();
1282        let mut grew = false;
1283        for &e in &entries {
1284            let e = e as usize;
1285            let m = &registry.entries[e];
1286
1287            // Dims-fixed arrays: assert unchanged, then write in place.
1288            debug_assert_eq!(
1289                m.occupancy.len() as u32,
1290                self.occ_lens[e],
1291                "update_model: entry {e} occupancy length changed (dims grew?)"
1292            );
1293            debug_assert_eq!(
1294                m.color_offsets.len() as u32,
1295                self.coloff_lens[e],
1296                "update_model: entry {e} color_offsets length changed (dims grew?)"
1297            );
1298            queue.write_buffer(
1299                &self.occupancy,
1300                u64::from(self.meta[e].occupancy_offset) * 4,
1301                bytemuck::cast_slice(&m.occupancy),
1302            );
1303            queue.write_buffer(
1304                &self.color_offsets,
1305                u64::from(self.meta[e].color_offsets_offset) * 4,
1306                bytemuck::cast_slice(&m.color_offsets),
1307            );
1308
1309            // Variable colors/dirs via the suballocator.
1310            let new_len = m.colors.len() as u32;
1311            match self.colors_alloc.place(e, new_len) {
1312                Some(off) => {
1313                    queue.write_buffer(
1314                        &self.colors,
1315                        u64::from(off) * 4,
1316                        bytemuck::cast_slice(&m.colors),
1317                    );
1318                    queue.write_buffer(
1319                        &self.dirs,
1320                        u64::from(off) * 4,
1321                        bytemuck::cast_slice(&m.dirs),
1322                    );
1323                    let mats: Vec<u32> = m.materials.iter().map(|&x| u32::from(x)).collect();
1324                    queue.write_buffer(
1325                        &self.materials_vox,
1326                        u64::from(off) * 4,
1327                        bytemuck::cast_slice(&mats),
1328                    );
1329                    if self.meta[e].colors_offset != off {
1330                        // Relocated — rewrite this entry's meta record.
1331                        self.meta[e].colors_offset = off;
1332                        queue.write_buffer(
1333                            &self.model_meta,
1334                            (e * std::mem::size_of::<SpriteModelMeta>()) as u64,
1335                            bytemuck::bytes_of(&self.meta[e]),
1336                        );
1337                    }
1338                }
1339                None => grew = true,
1340            }
1341        }
1342
1343        // Buffer overflow on at least one entry → grow colors/dirs and
1344        // repack the WHOLE registry (rare; offsets for every entry move).
1345        if grew {
1346            self.grow_and_repack(device, queue, registry);
1347        }
1348    }
1349
1350    /// Grow the `colors`/`dirs` buffers and repack every entry compactly
1351    /// (with fresh slack) when an [`Self::update_model`] edit overflowed
1352    /// the buffer tail. Recreates both buffers (the next frame's bind
1353    /// group picks up the new handles) and rewrites every `model_meta`
1354    /// `colors_offset`. O(registry) but rare — logged so a growth burst
1355    /// is visible.
1356    fn grow_and_repack(
1357        &mut self,
1358        device: &wgpu::Device,
1359        queue: &wgpu::Queue,
1360        registry: &SpriteModelRegistry,
1361    ) {
1362        self.repack_colors_dirs(device, registry);
1363        // Every entry's colors_offset moved → rewrite the whole meta table.
1364        queue.write_buffer(&self.model_meta, 0, bytemuck::cast_slice(&self.meta));
1365    }
1366
1367    /// Repack `colors`/`dirs` compactly (with fresh slack) from the full
1368    /// `registry`, recreating both buffers and updating every CPU
1369    /// `meta[e].colors_offset`. Does **not** touch the GPU `model_meta`
1370    /// buffer — the caller writes it ([`Self::grow_and_repack`] writes the
1371    /// whole table; [`Self::add_model`] writes it once after all entries
1372    /// are placed). O(registry) but rare — logged so a growth burst is
1373    /// visible.
1374    fn repack_colors_dirs(&mut self, device: &wgpu::Device, registry: &SpriteModelRegistry) {
1375        // Dead (removed) entries collapse to 0 length so they reclaim no
1376        // space; live entries keep their colours.
1377        let new_lens: Vec<u32> = registry
1378            .entries
1379            .iter()
1380            .enumerate()
1381            .map(|(e, m)| {
1382                if self.dead[e] {
1383                    0
1384                } else {
1385                    m.colors.len() as u32
1386                }
1387            })
1388            .collect();
1389        self.colors_alloc.repack(&new_lens);
1390        let cap_total = self.colors_alloc.cap_total();
1391
1392        let mut all_colors = vec![0u32; cap_total as usize];
1393        let mut all_dirs = vec![0u32; cap_total as usize];
1394        let mut all_materials = vec![0u32; cap_total as usize];
1395        for (e, m) in registry.entries.iter().enumerate() {
1396            if self.dead[e] {
1397                self.meta[e].colors_offset = 0;
1398                continue;
1399            }
1400            let off = self.colors_alloc.slot(e).off as usize;
1401            all_colors[off..off + m.colors.len()].copy_from_slice(&m.colors);
1402            all_dirs[off..off + m.dirs.len()].copy_from_slice(&m.dirs);
1403            for (i, &mat) in m.materials.iter().enumerate() {
1404                all_materials[off + i] = u32::from(mat);
1405            }
1406            self.meta[e].colors_offset = off as u32;
1407        }
1408        self.colors = storage_dst_u32_cap(
1409            device,
1410            "roxlap-gpu sprite_reg.colors",
1411            &all_colors,
1412            cap_total,
1413        );
1414        self.dirs = storage_dst_u32_cap(device, "roxlap-gpu sprite_reg.dirs", &all_dirs, cap_total);
1415        self.materials_vox = storage_dst_u32_cap(
1416            device,
1417            "roxlap-gpu sprite_reg.materials_vox",
1418            &all_materials,
1419            cap_total,
1420        );
1421        eprintln!(
1422            "roxlap-gpu: sprite registry colors/dirs/materials grew + repacked to {cap_total} words"
1423        );
1424    }
1425
1426    /// Append a new model (its full LOD chain) to the resident registry
1427    /// **without** re-uploading the existing models' volumes — the
1428    /// incremental counterpart to a full [`Self::upload`], for streaming
1429    /// in new geometry (unique asteroids, generated meshes).
1430    ///
1431    /// Contract (mirrors [`Self::update_model`]): the caller owns the
1432    /// `SpriteModelRegistry`, has just appended this chain to it (e.g. via
1433    /// [`SpriteModelRegistry::add_lod`]), and passes the resulting
1434    /// `chain_id`. The chain's entries must be the registry's newest (ids
1435    /// `>= ` the resident entry count) — entries are append-only.
1436    ///
1437    /// The large `colors`/`dirs`/`occupancy`/`color_offsets` buffers carry
1438    /// slack and bump-append the new entries in place; a buffer that
1439    /// overflows is grown (with slack) and rebuilt once from the registry
1440    /// (amortised O(1) per add). The small `model_meta` table is rewritten
1441    /// each call. After this, [`Self::append_instances`] can reference the
1442    /// new `chain_id`.
1443    pub fn add_model(
1444        &mut self,
1445        device: &wgpu::Device,
1446        queue: &wgpu::Queue,
1447        registry: &SpriteModelRegistry,
1448        chain_id: u32,
1449    ) {
1450        let entries = registry.chains[chain_id as usize].clone();
1451        debug_assert_eq!(
1452            chain_id as usize,
1453            self.chains.len(),
1454            "add_model: chains must be appended in order"
1455        );
1456
1457        // CPU bookkeeping: assign each new entry a tight occ/coloff offset
1458        // and an allocator slot for colors/dirs. `need_colors_grow` marks
1459        // a slot that didn't fit → a colors/dirs repack below.
1460        let mut need_colors_grow = false;
1461        for &e in &entries {
1462            let e = e as usize;
1463            debug_assert_eq!(
1464                e,
1465                self.meta.len(),
1466                "add_model: entries must be appended in order"
1467            );
1468            let m = &registry.entries[e];
1469            let occ_off = self.occ_used;
1470            let coloff_off = self.coloff_used;
1471            self.occ_used += m.occupancy.len() as u32;
1472            self.coloff_used += m.color_offsets.len() as u32;
1473            let colors_off = match self.colors_alloc.push(m.colors.len() as u32) {
1474                Some(off) => off,
1475                None => {
1476                    need_colors_grow = true;
1477                    0 // placeholder; repack assigns the real offset
1478                }
1479            };
1480            self.meta.push(SpriteModelMeta {
1481                occupancy_offset: occ_off,
1482                colors_offset: colors_off,
1483                color_offsets_offset: coloff_off,
1484                occ_words_per_col: m.occ_words_per_col,
1485                dims: m.dims,
1486                has_vox_materials: u32::from(!m.materials.is_empty()),
1487                pivot: m.pivot,
1488                voxel_world_size: m.voxel_world_size,
1489            });
1490            self.occ_lens.push(m.occupancy.len() as u32);
1491            self.coloff_lens.push(m.color_offsets.len() as u32);
1492            self.dead.push(false);
1493        }
1494        self.chains.push(entries.clone());
1495
1496        // occupancy + color_offsets: grow+rebuild on overflow, else write
1497        // the new tails in place.
1498        self.sync_concat(device, queue, registry, &entries, ConcatBuf::Occupancy);
1499        self.sync_concat(device, queue, registry, &entries, ConcatBuf::ColorOffsets);
1500
1501        // colors/dirs: repack on overflow (rebuilds both + every CPU
1502        // colors_offset), else write the new entries at their slots.
1503        if need_colors_grow {
1504            self.repack_colors_dirs(device, registry);
1505        } else {
1506            for &e in &entries {
1507                let e = e as usize;
1508                let m = &registry.entries[e];
1509                let off = u64::from(self.meta[e].colors_offset) * 4;
1510                queue.write_buffer(&self.colors, off, bytemuck::cast_slice(&m.colors));
1511                queue.write_buffer(&self.dirs, off, bytemuck::cast_slice(&m.dirs));
1512                let mats: Vec<u32> = m.materials.iter().map(|&x| u32::from(x)).collect();
1513                queue.write_buffer(&self.materials_vox, off, bytemuck::cast_slice(&mats));
1514            }
1515        }
1516
1517        // model_meta: grow the record buffer if needed, then rewrite the
1518        // whole (small) table — covers both new records and any
1519        // colors_offset relocations from a repack.
1520        let count = self.meta.len() as u32;
1521        if count > self.meta_cap {
1522            self.meta_cap = grow_records(count);
1523            self.model_meta = storage_dst_pod_cap(
1524                device,
1525                "roxlap-gpu sprite_reg.model_meta",
1526                &self.meta,
1527                self.meta_cap,
1528            );
1529        } else {
1530            queue.write_buffer(&self.model_meta, 0, bytemuck::cast_slice(&self.meta));
1531        }
1532    }
1533
1534    /// Sync one tightly-concatenated buffer (`occupancy` or
1535    /// `color_offsets`) after `add_model` appended `new_entries`: if the
1536    /// used length now exceeds capacity, grow (with slack) and rebuild the
1537    /// whole buffer from the registry; otherwise write just the appended
1538    /// tails at their offsets.
1539    fn sync_concat(
1540        &mut self,
1541        device: &wgpu::Device,
1542        queue: &wgpu::Queue,
1543        registry: &SpriteModelRegistry,
1544        new_entries: &[u32],
1545        which: ConcatBuf,
1546    ) {
1547        let (used, cap) = match which {
1548            ConcatBuf::Occupancy => (self.occ_used, self.occ_cap),
1549            ConcatBuf::ColorOffsets => (self.coloff_used, self.coloff_cap),
1550        };
1551        if used > cap {
1552            let new_cap = grow_words(used);
1553            let all: Vec<u32> = registry
1554                .entries
1555                .iter()
1556                .flat_map(|m| concat_data(m, which).iter().copied())
1557                .collect();
1558            let label = match which {
1559                ConcatBuf::Occupancy => "roxlap-gpu sprite_reg.occupancy",
1560                ConcatBuf::ColorOffsets => "roxlap-gpu sprite_reg.color_offsets",
1561            };
1562            let buf = storage_dst_u32_cap(device, label, &all, new_cap);
1563            match which {
1564                ConcatBuf::Occupancy => {
1565                    self.occupancy = buf;
1566                    self.occ_cap = new_cap;
1567                }
1568                ConcatBuf::ColorOffsets => {
1569                    self.color_offsets = buf;
1570                    self.coloff_cap = new_cap;
1571                }
1572            }
1573        } else {
1574            let target = match which {
1575                ConcatBuf::Occupancy => &self.occupancy,
1576                ConcatBuf::ColorOffsets => &self.color_offsets,
1577            };
1578            for &e in new_entries {
1579                let e = e as usize;
1580                let off = match which {
1581                    ConcatBuf::Occupancy => self.meta[e].occupancy_offset,
1582                    ConcatBuf::ColorOffsets => self.meta[e].color_offsets_offset,
1583                };
1584                queue.write_buffer(
1585                    target,
1586                    u64::from(off) * 4,
1587                    bytemuck::cast_slice(concat_data(&registry.entries[e], which)),
1588                );
1589            }
1590        }
1591    }
1592
1593    /// Number of removed-but-not-yet-compacted models (tombstoned chains).
1594    /// A caller streams `add_model` / `remove_model` and calls
1595    /// [`Self::compact`] once this (relative to [`Self::live_model_count`])
1596    /// crosses a threshold.
1597    #[must_use]
1598    pub fn dead_model_count(&self) -> usize {
1599        self.chains.iter().filter(|c| c.is_empty()).count()
1600    }
1601
1602    /// Number of live (non-removed) models.
1603    #[must_use]
1604    pub fn live_model_count(&self) -> usize {
1605        self.chains.iter().filter(|c| !c.is_empty()).count()
1606    }
1607
1608    /// Remove a model (tombstone its LOD chain) — the counterpart to
1609    /// [`Self::add_model`]. O(chain length): marks the chain's entries
1610    /// dead and frees their `colors`/`dirs` slots for reuse by a later
1611    /// `add_model`. The `occupancy` / `color_offsets` holes are **not**
1612    /// reclaimed until [`Self::compact`]; entry ids (and the caller's other
1613    /// `chain_id`s) stay stable.
1614    ///
1615    /// Instances of the removed chain are **not** dropped here — they
1616    /// linger in the cull set but draw as nothing (skipped in
1617    /// [`Self::cull_bin_upload`]); the caller removes them via
1618    /// [`Self::remove_instance`] when convenient. A no-op if `chain_id` is
1619    /// out of range or already removed.
1620    pub fn remove_model(&mut self, chain_id: u32) {
1621        let Some(entries) = self.chains.get(chain_id as usize).cloned() else {
1622            return;
1623        };
1624        if entries.is_empty() {
1625            return; // already removed
1626        }
1627        for &e in &entries {
1628            let e = e as usize;
1629            self.dead[e] = true;
1630            self.colors_alloc.free(e);
1631        }
1632        self.chains[chain_id as usize] = Vec::new(); // tombstone
1633    }
1634
1635    /// Reclaim the holes left by [`Self::remove_model`]: rebuild the shared
1636    /// volume buffers from the live entries only, dropping every dead
1637    /// entry's data. Entry ids and `chain_id`s are preserved (dead entries
1638    /// keep a zero-length `meta` tombstone), so the caller's handles stay
1639    /// valid and no remap is needed.
1640    ///
1641    /// `registry` must be the resident one (entry ids 1:1, as for
1642    /// [`Self::add_model`] / [`Self::update_model`]). O(live volume) —
1643    /// call it when [`Self::dead_model_count`] is high, not every frame.
1644    pub fn compact(
1645        &mut self,
1646        device: &wgpu::Device,
1647        queue: &wgpu::Queue,
1648        registry: &SpriteModelRegistry,
1649    ) {
1650        // occupancy + color_offsets: re-pack live entries tightly, rewrite
1651        // each live entry's meta offset, zero the dead ones.
1652        self.compact_concat(device, registry, ConcatBuf::Occupancy);
1653        self.compact_concat(device, registry, ConcatBuf::ColorOffsets);
1654        // colors/dirs: the dead-aware repack already drops dead entries.
1655        self.repack_colors_dirs(device, registry);
1656        // model_meta: rewrite the (unchanged-length) table with the new
1657        // offsets. Buffer count didn't change, so no grow needed.
1658        queue.write_buffer(&self.model_meta, 0, bytemuck::cast_slice(&self.meta));
1659    }
1660
1661    /// Rebuild one tightly-concatenated buffer from live entries only
1662    /// (used by [`Self::compact`]): assign each live entry a fresh tight
1663    /// offset, zero dead entries' offset, and recreate the buffer with
1664    /// slack.
1665    fn compact_concat(
1666        &mut self,
1667        device: &wgpu::Device,
1668        registry: &SpriteModelRegistry,
1669        which: ConcatBuf,
1670    ) {
1671        let mut all: Vec<u32> = Vec::new();
1672        for e in 0..self.meta.len() {
1673            if self.dead[e] {
1674                match which {
1675                    ConcatBuf::Occupancy => self.meta[e].occupancy_offset = 0,
1676                    ConcatBuf::ColorOffsets => self.meta[e].color_offsets_offset = 0,
1677                }
1678                continue;
1679            }
1680            let off = all.len() as u32;
1681            match which {
1682                ConcatBuf::Occupancy => self.meta[e].occupancy_offset = off,
1683                ConcatBuf::ColorOffsets => self.meta[e].color_offsets_offset = off,
1684            }
1685            all.extend_from_slice(concat_data(&registry.entries[e], which));
1686        }
1687        let used = all.len() as u32;
1688        let cap = grow_words(used);
1689        let (label, buf) = match which {
1690            ConcatBuf::Occupancy => ("roxlap-gpu sprite_reg.occupancy", &mut self.occupancy),
1691            ConcatBuf::ColorOffsets => (
1692                "roxlap-gpu sprite_reg.color_offsets",
1693                &mut self.color_offsets,
1694            ),
1695        };
1696        *buf = storage_dst_u32_cap(device, label, &all, cap);
1697        match which {
1698            ConcatBuf::Occupancy => {
1699                self.occ_used = used;
1700                self.occ_cap = cap;
1701            }
1702            ConcatBuf::ColorOffsets => {
1703                self.coloff_used = used;
1704                self.coloff_cap = cap;
1705            }
1706        }
1707    }
1708
1709    /// GPU.10.3 — frustum-cull, pack the visible subset into the
1710    /// instance buffer, then bin those instances into screen tiles:
1711    /// project each visible bounding sphere to a screen AABB and append
1712    /// its (visible) index to every overlapped tile. Uploads the
1713    /// instance buffer + `tile_ranges` (per-tile offset/count) +
1714    /// `tile_instances` (flat grouped indices), growing the tile
1715    /// buffers as needed. Returns `(visible_count, tiles_x, tiles_y)`.
1716    #[allow(clippy::too_many_arguments)]
1717    pub fn cull_bin_upload(
1718        &mut self,
1719        device: &wgpu::Device,
1720        queue: &wgpu::Queue,
1721        f: &ViewFrustum,
1722        screen_w: u32,
1723        screen_h: u32,
1724        tile_size: u32,
1725        lod_px: f32,
1726    ) -> (u32, u32, u32) {
1727        let tiles_x = screen_w.div_ceil(tile_size).max(1);
1728        let tiles_y = screen_h.div_ceil(tile_size).max(1);
1729        let n_tiles = (tiles_x * tiles_y) as usize;
1730
1731        let nw = (1.0 + f.half_w * f.half_w).sqrt();
1732        let nh = (1.0 + f.half_h * f.half_h).sqrt();
1733        let cx = screen_w as f32 * 0.5;
1734        let cy = screen_h as f32 * 0.5;
1735        let px_per_world = cx / f.half_w; // isotropic: == cy/half_h
1736        let ts = tile_size as f32;
1737        let tx_max = tiles_x as i32 - 1;
1738        let ty_max = tiles_y as i32 - 1;
1739
1740        let mut visible: Vec<SpriteInstanceGpu> = Vec::with_capacity(self.cull.len());
1741        // Per-visible tile AABB (tx0, tx1, ty0, ty1) for the bin pass.
1742        let mut boxes: Vec<[i32; 4]> = Vec::with_capacity(self.cull.len());
1743        // Per-visible kv6colmul tables, flattened to two u32 per u64
1744        // entry (lanes 0|1, then 2|3), packed in visible order so the
1745        // shader indexes `colmul[inst_idx*512 + dir*2 + {0,1}]`.
1746        let mut visible_colmul: Vec<u32> = Vec::with_capacity(self.cull.len() * 512);
1747        let mut counts = vec![0u32; n_tiles];
1748
1749        for ci in &self.cull {
1750            // Skip instances of a removed model (tombstoned chain) — they
1751            // linger in `cull` until the caller drops them, but draw as
1752            // nothing.
1753            if self.chains[ci.chain_id as usize].is_empty() {
1754                continue;
1755            }
1756            let rel = [
1757                ci.center[0] - f.pos[0],
1758                ci.center[1] - f.pos[1],
1759                ci.center[2] - f.pos[2],
1760            ];
1761            let z = dot3(rel, f.forward);
1762            let r = ci.radius;
1763            if z + r < 0.0 || z - r > f.far {
1764                continue; // behind / beyond far
1765            }
1766            let x = dot3(rel, f.right);
1767            if (x - f.half_w * z) > r * nw || (-x - f.half_w * z) > r * nw {
1768                continue; // right / left
1769            }
1770            let y = dot3(rel, f.down);
1771            if (y - f.half_h * z) > r * nh || (-y - f.half_h * z) > r * nh {
1772                continue; // bottom / top
1773            }
1774
1775            // Visible: project the sphere to a screen AABB → tile range.
1776            let (tx0, tx1, ty0, ty1) = if z > 1e-3 {
1777                let sx = cx + (x / z) * px_per_world;
1778                let sy = cy + (y / z) * px_per_world;
1779                let sr = (r / z) * px_per_world;
1780                (
1781                    (((sx - sr) / ts).floor() as i32).clamp(0, tx_max),
1782                    (((sx + sr) / ts).floor() as i32).clamp(0, tx_max),
1783                    (((sy - sr) / ts).floor() as i32).clamp(0, ty_max),
1784                    (((sy + sr) / ts).floor() as i32).clamp(0, ty_max),
1785                )
1786            } else {
1787                (0, tx_max, 0, ty_max)
1788            };
1789            // GPU.10.4 — pick the LOD level by projected voxel size:
1790            // choose the coarsest level whose voxel still covers at
1791            // least `lod_px` screen pixels, i.e. step up once a mip-0
1792            // voxel would be smaller than that. `lod_px = 1` is the
1793            // natural "don't go sub-pixel" threshold; larger values
1794            // force LOD in closer (tuning/inspection).
1795            let chain = &self.chains[ci.chain_id as usize];
1796            let level = if z > 1e-3 && chain.len() > 1 {
1797                let voxel_px = px_per_world / z; // mip-0 voxel screen size
1798                ((lod_px / voxel_px).log2().ceil().max(0.0) as usize).min(chain.len() - 1)
1799            } else {
1800                0
1801            };
1802            let mut g = ci.gpu;
1803            g.model_id = chain[level];
1804            visible.push(g);
1805            boxes.push([tx0, tx1, ty0, ty1]);
1806            for &w in ci.colmul.iter() {
1807                visible_colmul.push((w & 0xffff_ffff) as u32);
1808                visible_colmul.push((w >> 32) as u32);
1809            }
1810            for ty in ty0..=ty1 {
1811                for tx in tx0..=tx1 {
1812                    counts[(ty * tiles_x as i32 + tx) as usize] += 1;
1813                }
1814            }
1815        }
1816
1817        if visible.is_empty() {
1818            return (0, tiles_x, tiles_y);
1819        }
1820
1821        // Prefix-sum counts → per-tile offsets; build the flat grouped
1822        // index list.
1823        let mut tile_ranges = vec![0u32; n_tiles * 2];
1824        let mut running = 0u32;
1825        for t in 0..n_tiles {
1826            tile_ranges[2 * t] = running; // offset
1827            tile_ranges[2 * t + 1] = counts[t]; // count
1828            running += counts[t];
1829        }
1830        let total = running as usize;
1831        let mut tile_instances = vec![0u32; total.max(1)];
1832        let mut cursor: Vec<u32> = (0..n_tiles).map(|t| tile_ranges[2 * t]).collect();
1833        for (vis_idx, b) in boxes.iter().enumerate() {
1834            for ty in b[2]..=b[3] {
1835                for tx in b[0]..=b[1] {
1836                    let t = (ty * tiles_x as i32 + tx) as usize;
1837                    tile_instances[cursor[t] as usize] = vis_idx as u32;
1838                    cursor[t] += 1;
1839                }
1840            }
1841        }
1842
1843        // Upload: instances + (grown) tile buffers. Grow a tile buffer
1844        // only when this frame needs more than its capacity (wgpu has
1845        // no Clone on Buffer, so we replace the field in place).
1846        queue.write_buffer(&self.instances, 0, bytemuck::cast_slice(&visible));
1847        let need_ranges = tile_ranges.len() as u32;
1848        if need_ranges > self.tile_ranges_cap {
1849            self.tile_ranges_cap = need_ranges.next_power_of_two();
1850            self.tile_ranges = storage_dst_u32(
1851                device,
1852                "roxlap-gpu sprite_reg.tile_ranges",
1853                self.tile_ranges_cap,
1854            );
1855        }
1856        let need_inst = tile_instances.len() as u32;
1857        if need_inst > self.tile_instances_cap {
1858            self.tile_instances_cap = need_inst.next_power_of_two();
1859            self.tile_instances = storage_dst_u32(
1860                device,
1861                "roxlap-gpu sprite_reg.tile_instances",
1862                self.tile_instances_cap,
1863            );
1864        }
1865        queue.write_buffer(&self.tile_ranges, 0, bytemuck::cast_slice(&tile_ranges));
1866        queue.write_buffer(
1867            &self.tile_instances,
1868            0,
1869            bytemuck::cast_slice(&tile_instances),
1870        );
1871        let need_colmul = visible_colmul.len() as u32;
1872        if need_colmul > self.colmul_cap {
1873            self.colmul_cap = need_colmul.next_power_of_two();
1874            self.colmul = storage_dst_u32(device, "roxlap-gpu sprite_reg.colmul", self.colmul_cap);
1875        }
1876        queue.write_buffer(&self.colmul, 0, bytemuck::cast_slice(&visible_colmul));
1877
1878        (visible.len() as u32, tiles_x, tiles_y)
1879    }
1880}
1881
1882/// GPU.12 incremental — per-entry placement of one model's `colors`
1883/// (and the parallel `dirs`) within the shared registry buffers: a
1884/// `[off, off+cap)` word window holding `len` live words. `cap >= len`
1885/// gives slack so a carve that *grows* the surface-voxel count can be
1886/// rewritten in place without relocating.
1887#[derive(Clone, Copy, Debug, PartialEq, Eq)]
1888struct ColorSlot {
1889    off: u32,
1890    cap: u32,
1891    len: u32,
1892}
1893
1894/// First-fit suballocator over the parallel `colors`/`dirs` buffers
1895/// (same offsets/ranks → one allocator drives both). Each registry
1896/// entry owns a [`ColorSlot`]; growth past a slot's `cap` relocates it
1897/// (freeing the old block) via the free list or a bump tail, and only
1898/// when the tail would exceed `cap_total` does the caller grow + repack
1899/// the whole buffer. Pure (no GPU) so it unit-tests on its own.
1900#[derive(Debug, Default)]
1901struct ColorsAllocator {
1902    /// Per-entry slot, indexed by entry id.
1903    slots: Vec<ColorSlot>,
1904    /// Freed `(off, cap)` blocks available for first-fit reuse.
1905    free: Vec<(u32, u32)>,
1906    /// Next bump-allocation position (words).
1907    tail: u32,
1908    /// Total buffer capacity in words.
1909    cap_total: u32,
1910}
1911
1912/// Slack-padded capacity for a `len`-word array: +25% + 16 words, so a
1913/// few extra surface voxels from a carve fit without relocating.
1914fn slot_cap(len: u32) -> u32 {
1915    len + len / 4 + 16
1916}
1917
1918/// Slack capacity (words) for a grown concatenated buffer: +50% + 256, so
1919/// a burst of `add_model` calls bump-appends rather than re-growing every
1920/// time. Matches [`ColorsAllocator`]'s `cap_total` headroom.
1921fn grow_words(used: u32) -> u32 {
1922    used + used / 2 + 256
1923}
1924
1925/// Slack capacity (records) for a grown `model_meta` buffer: +50% + 8.
1926fn grow_records(count: u32) -> u32 {
1927    count + count / 2 + 8
1928}
1929
1930impl ColorsAllocator {
1931    /// Lay every entry out contiguously (with per-slot slack) and add a
1932    /// global tail headroom so early growth bump-allocates rather than
1933    /// repacks.
1934    fn new(entry_lens: &[u32]) -> Self {
1935        let mut a = Self::default();
1936        a.repack(entry_lens);
1937        a
1938    }
1939
1940    fn slot(&self, entry: usize) -> ColorSlot {
1941        self.slots[entry]
1942    }
1943
1944    fn cap_total(&self) -> u32 {
1945        self.cap_total
1946    }
1947
1948    /// Repack ALL entries compactly to fit `new_lens`, resetting the
1949    /// free list + tail and choosing a fresh `cap_total` with headroom.
1950    /// Used at initial build and on a buffer grow.
1951    fn repack(&mut self, new_lens: &[u32]) {
1952        self.free.clear();
1953        let mut off = 0u32;
1954        let mut slots = Vec::with_capacity(new_lens.len());
1955        for &len in new_lens {
1956            // A 0-length (dead / removed) entry takes no space — keeps a
1957            // tombstone slot so entry ids stay positional.
1958            let cap = if len == 0 { 0 } else { slot_cap(len) };
1959            slots.push(ColorSlot { off, cap, len });
1960            off += cap;
1961        }
1962        self.slots = slots;
1963        self.tail = off;
1964        // Global headroom: +50% + 256 words.
1965        self.cap_total = off + off / 2 + 256;
1966    }
1967
1968    /// Place `new_len` words for `entry`. Returns `Some(off)` with the
1969    /// (possibly relocated) slot offset, or `None` if the buffer must
1970    /// grow + repack. On relocation the old block is pushed to the free
1971    /// list; an in-place fit returns the unchanged offset.
1972    fn place(&mut self, entry: usize, new_len: u32) -> Option<u32> {
1973        let cur = self.slots[entry];
1974        if new_len <= cur.cap {
1975            self.slots[entry] = ColorSlot {
1976                len: new_len,
1977                ..cur
1978            };
1979            return Some(cur.off);
1980        }
1981        let old = (cur.off, cur.cap);
1982        // First-fit a freed block big enough for the live data.
1983        if let Some(i) = self.free.iter().position(|&(_, c)| c >= new_len) {
1984            let (off, cap) = self.free.remove(i);
1985            self.free.push(old);
1986            self.slots[entry] = ColorSlot {
1987                off,
1988                cap,
1989                len: new_len,
1990            };
1991            return Some(off);
1992        }
1993        // Bump the tail if there's room.
1994        let want = slot_cap(new_len);
1995        if self.tail + want <= self.cap_total {
1996            let off = self.tail;
1997            self.tail += want;
1998            self.free.push(old);
1999            self.slots[entry] = ColorSlot {
2000                off,
2001                cap: want,
2002                len: new_len,
2003            };
2004            return Some(off);
2005        }
2006        None
2007    }
2008
2009    /// Append a slot for a brand-new entry of `new_len` words (used by
2010    /// [`SpriteRegistryResident::add_model`]). Returns `Some(off)` placed
2011    /// via the free list or the bump tail, or `None` if the buffer must
2012    /// grow + repack — in which case **no** slot is pushed (the caller's
2013    /// repack rebuilds every slot from scratch).
2014    fn push(&mut self, new_len: u32) -> Option<u32> {
2015        if let Some(i) = self.free.iter().position(|&(_, c)| c >= new_len) {
2016            let (off, cap) = self.free.remove(i);
2017            self.slots.push(ColorSlot {
2018                off,
2019                cap,
2020                len: new_len,
2021            });
2022            return Some(off);
2023        }
2024        let want = slot_cap(new_len);
2025        if self.tail + want <= self.cap_total {
2026            let off = self.tail;
2027            self.tail += want;
2028            self.slots.push(ColorSlot {
2029                off,
2030                cap: want,
2031                len: new_len,
2032            });
2033            return Some(off);
2034        }
2035        None
2036    }
2037
2038    /// Free `entry`'s slot back to the pool ([`SpriteRegistryResident::
2039    /// remove_model`]). Its `(off, cap)` block joins the free list for
2040    /// first-fit reuse by a later [`Self::push`]; the slot is zeroed so a
2041    /// repack treats it as a 0-length tombstone.
2042    fn free(&mut self, entry: usize) {
2043        let s = self.slots[entry];
2044        if s.cap > 0 {
2045            self.free.push((s.off, s.cap));
2046        }
2047        self.slots[entry] = ColorSlot {
2048            off: 0,
2049            cap: 0,
2050            len: 0,
2051        };
2052    }
2053}
2054
2055/// Create a STORAGE buffer of u32s; pads empty input (wgpu rejects
2056/// zero-sized storage bindings).
2057#[allow(dead_code)]
2058fn storage_u32(device: &wgpu::Device, label: &str, data: &[u32]) -> wgpu::Buffer {
2059    use wgpu::util::DeviceExt;
2060    let bytes: &[u8] = if data.is_empty() {
2061        bytemuck::cast_slice(&[0u32])
2062    } else {
2063        bytemuck::cast_slice(data)
2064    };
2065    device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
2066        label: Some(label),
2067        contents: bytes,
2068        usage: wgpu::BufferUsages::STORAGE,
2069    })
2070}
2071
2072/// Create an uninitialised `STORAGE | COPY_DST` `u32` buffer of `cap`
2073/// words (≥1). Written each frame via `queue.write_buffer`.
2074fn storage_dst_u32(device: &wgpu::Device, label: &str, cap: u32) -> wgpu::Buffer {
2075    device.create_buffer(&wgpu::BufferDescriptor {
2076        label: Some(label),
2077        size: u64::from(cap.max(1)) * 4,
2078        usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
2079        mapped_at_creation: false,
2080    })
2081}
2082
2083/// Create a `STORAGE | COPY_DST` `u32` buffer of `cap` words (≥ data
2084/// length, ≥ 1), initialised with `data` at offset 0 and the tail left
2085/// zeroed. Unlike [`storage_u32`] (STORAGE-only, exact-size) this both
2086/// reserves spare capacity and is `COPY_DST`, so the incremental
2087/// [`SpriteRegistryResident::update_model`] can `write_buffer` a growing
2088/// `colors`/`dirs` array in place. Filled via `mapped_at_creation` so no
2089/// queue is needed at upload time.
2090fn storage_dst_u32_cap(device: &wgpu::Device, label: &str, data: &[u32], cap: u32) -> wgpu::Buffer {
2091    let cap = cap.max(data.len() as u32).max(1);
2092    let buf = device.create_buffer(&wgpu::BufferDescriptor {
2093        label: Some(label),
2094        size: u64::from(cap) * 4,
2095        usage: wgpu::BufferUsages::STORAGE
2096            | wgpu::BufferUsages::COPY_DST
2097            | wgpu::BufferUsages::COPY_SRC,
2098        mapped_at_creation: true,
2099    });
2100    if !data.is_empty() {
2101        buf.slice(..(data.len() as u64 * 4))
2102            .get_mapped_range_mut()
2103            .copy_from_slice(bytemuck::cast_slice(data));
2104    }
2105    buf.unmap();
2106    buf
2107}
2108
2109/// Create a `STORAGE | COPY_DST` buffer of Pod records, exact-size
2110/// (≥ 1, zero-padded), so individual records can be rewritten in place
2111/// by [`SpriteRegistryResident::update_model`] on a relocation. The
2112/// record *count* never changes on an incremental edit (no model is
2113/// added/removed), so no slack is needed here.
2114fn storage_dst_pod<T: Pod + Zeroable>(
2115    device: &wgpu::Device,
2116    label: &str,
2117    data: &[T],
2118) -> wgpu::Buffer {
2119    let one = [T::zeroed()];
2120    let src: &[T] = if data.is_empty() { &one } else { data };
2121    let buf = device.create_buffer(&wgpu::BufferDescriptor {
2122        label: Some(label),
2123        size: std::mem::size_of_val(src) as u64,
2124        usage: wgpu::BufferUsages::STORAGE
2125            | wgpu::BufferUsages::COPY_DST
2126            | wgpu::BufferUsages::COPY_SRC,
2127        mapped_at_creation: true,
2128    });
2129    buf.slice(..)
2130        .get_mapped_range_mut()
2131        .copy_from_slice(bytemuck::cast_slice(src));
2132    buf.unmap();
2133    buf
2134}
2135
2136/// Create a `STORAGE | COPY_DST` Pod buffer holding `cap` records
2137/// (≥ `data.len()`, ≥ 1), initialised with `data` at record 0 and the
2138/// tail zeroed. The slack lets [`SpriteRegistryResident::add_model`] grow
2139/// the `model_meta` table without re-growing on every add.
2140fn storage_dst_pod_cap<T: Pod + Zeroable>(
2141    device: &wgpu::Device,
2142    label: &str,
2143    data: &[T],
2144    cap: u32,
2145) -> wgpu::Buffer {
2146    let rec = std::mem::size_of::<T>() as u64;
2147    let cap = u64::from(cap.max(data.len() as u32).max(1));
2148    let buf = device.create_buffer(&wgpu::BufferDescriptor {
2149        label: Some(label),
2150        size: cap * rec,
2151        usage: wgpu::BufferUsages::STORAGE
2152            | wgpu::BufferUsages::COPY_DST
2153            | wgpu::BufferUsages::COPY_SRC,
2154        mapped_at_creation: true,
2155    });
2156    if !data.is_empty() {
2157        buf.slice(..(data.len() as u64 * rec))
2158            .get_mapped_range_mut()
2159            .copy_from_slice(bytemuck::cast_slice(data));
2160    }
2161    buf.unmap();
2162    buf
2163}
2164
2165/// Create a STORAGE buffer of Pod records; pads empty input with one
2166/// zeroed `T`.
2167#[allow(dead_code)]
2168fn storage_pod<T: Pod + Zeroable>(device: &wgpu::Device, label: &str, data: &[T]) -> wgpu::Buffer {
2169    use wgpu::util::DeviceExt;
2170    let one = [T::zeroed()];
2171    let src: &[T] = if data.is_empty() { &one } else { data };
2172    device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
2173        label: Some(label),
2174        contents: bytemuck::cast_slice(src),
2175        usage: wgpu::BufferUsages::STORAGE,
2176    })
2177}
2178
2179#[cfg(test)]
2180mod tests {
2181    use super::*;
2182    use roxlap_formats::kv6::{Kv6, Voxel};
2183
2184    /// 2×1 kv6: column (0,0) has voxels at z=5 (red) and z=1 (green)
2185    /// stored OUT of z-order; column (1,0) has one voxel at z=3.
2186    fn kv6_unsorted() -> Kv6 {
2187        let mk = |z, col| Voxel {
2188            col,
2189            z,
2190            vis: 0,
2191            dir: 0,
2192        };
2193        Kv6 {
2194            xsiz: 2,
2195            ysiz: 1,
2196            zsiz: 8,
2197            xpiv: 0.0,
2198            ypiv: 0.0,
2199            zpiv: 0.0,
2200            voxels: vec![mk(5, 0xAA), mk(1, 0xBB), mk(3, 0xCC)],
2201            xlen: vec![2, 1],
2202            ylen: vec![vec![2], vec![1]],
2203            palette: None,
2204        }
2205    }
2206
2207    #[test]
2208    fn occupancy_bits_set_at_voxel_z() {
2209        let m = build_sprite_model(&kv6_unsorted());
2210        assert_eq!(m.dims, [2, 1, 8]);
2211        assert_eq!(m.occ_words_per_col, 1); // ceil(8/32)
2212                                            // col 0: bits 1 and 5; col 1: bit 3.
2213        assert_eq!(m.occupancy[0], (1 << 1) | (1 << 5));
2214        assert_eq!(m.occupancy[1], 1 << 3);
2215    }
2216
2217    #[test]
2218    fn colors_are_ascending_z_for_rank_lookup() {
2219        let m = build_sprite_model(&kv6_unsorted());
2220        // col 0 sorted ascending z ⇒ z=1 (green 0xBB) before z=5 (0xAA).
2221        assert_eq!(m.color_offsets, vec![0, 2, 3]);
2222        assert_eq!(&m.colors, &[0xBB, 0xAA, 0xCC]);
2223    }
2224
2225    #[test]
2226    fn identity_basis_inverts_to_identity() {
2227        let inv = mat3_inverse([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]);
2228        assert_eq!(inv, [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]);
2229    }
2230
2231    #[test]
2232    fn fork_is_independent_of_parent() {
2233        let mut reg = SpriteModelRegistry::new();
2234        let base = reg.add(build_sprite_model(&kv6_unsorted()));
2235        let forked = reg.fork(base);
2236        assert_ne!(base, forked);
2237        // Recolour only the fork.
2238        reg.model_mut(forked).recolor(|_| 0x11);
2239        // Parent colours untouched; fork fully overwritten.
2240        assert_eq!(&reg.model(base).colors, &[0xBB, 0xAA, 0xCC]);
2241        assert_eq!(&reg.model(forked).colors, &[0x11, 0x11, 0x11]);
2242    }
2243
2244    #[test]
2245    fn remove_frees_chain_data_keeps_ids_stable() {
2246        let mut reg = SpriteModelRegistry::new();
2247        let a = reg.add_lod(build_sprite_model(&kv6_unsorted()), 4);
2248        let b = reg.add_lod(build_sprite_model(&kv6_unsorted()), 4);
2249        let len_before = reg.len();
2250        assert!(reg.is_live(a) && reg.is_live(b));
2251
2252        reg.remove(a);
2253        // Chain `a` is tombstoned (its entries are freed to empty models;
2254        // they're unreachable via `model()` now — that's the tombstone).
2255        assert!(!reg.is_live(a));
2256        // `b` is untouched and still live; `len()` (next id) is unchanged.
2257        assert!(reg.is_live(b));
2258        assert_eq!(&reg.model(b).colors, &[0xBB, 0xAA, 0xCC]);
2259        assert_eq!(reg.len(), len_before);
2260
2261        // A later add mints a fresh id past the tombstone (no slot reuse).
2262        let c = reg.add_lod(build_sprite_model(&kv6_unsorted()), 4);
2263        assert_eq!(c, len_before as u32);
2264        assert!(reg.is_live(c));
2265        // `b`'s id stayed valid across the remove + add round-trip.
2266        assert_eq!(&reg.model(b).colors, &[0xBB, 0xAA, 0xCC]);
2267    }
2268
2269    #[test]
2270    fn model_checked_guards_out_of_range_and_tombstoned() {
2271        // The guard `set_instance_model` relies on: `model()` would
2272        // index-panic on these, `model_checked` returns `None`.
2273        let mut reg = SpriteModelRegistry::new();
2274        let a = reg.add_lod(build_sprite_model(&kv6_unsorted()), 4);
2275        assert!(reg.model_checked(a).is_some());
2276        assert!(reg.model_checked(9999).is_none(), "out of range → None");
2277        reg.remove(a);
2278        assert!(reg.model_checked(a).is_none(), "tombstoned chain → None");
2279    }
2280
2281    #[test]
2282    fn remove_is_idempotent_and_bounds_safe() {
2283        let mut reg = SpriteModelRegistry::new();
2284        let a = reg.add(build_sprite_model(&kv6_unsorted()));
2285        reg.remove(a);
2286        reg.remove(a); // already removed → no-op, no panic
2287        reg.remove(999); // out of range → no-op
2288        assert!(!reg.is_live(a));
2289        assert!(!reg.is_live(999));
2290    }
2291
2292    #[test]
2293    fn registry_gpu_structs_have_expected_sizes() {
2294        assert_eq!(std::mem::size_of::<SpriteModelMeta>(), 48);
2295        // TV — grew 64 → 80 with the per-instance material id + alpha_mul
2296        // (+ 8 bytes pad to keep the 16-byte std430 stride).
2297        assert_eq!(std::mem::size_of::<SpriteInstanceGpu>(), 80);
2298    }
2299
2300    #[test]
2301    fn add_lod_builds_halving_mip_chain() {
2302        let mut reg = SpriteModelRegistry::new();
2303        // 8×8×8 single voxel-filled column model would be ideal, but
2304        // kv6_unsorted is 2×1×8 → mips: 2×1×8 → 1×1×4 → 1×1×2 → 1×1×1.
2305        let id = reg.add_lod(build_sprite_model(&kv6_unsorted()), 4);
2306        let m0 = reg.model(id);
2307        assert_eq!(m0.dims, [2, 1, 8]);
2308        assert!((m0.voxel_world_size - 1.0).abs() < 1e-6);
2309    }
2310
2311    /// kv6 from explicit voxels, ordered x-major/y-inner to match
2312    /// `build_sprite_model`'s column walk.
2313    fn kv6_from(xsiz: u32, ysiz: u32, zsiz: u32, voxels: &[(u32, u32, u16, u32)]) -> Kv6 {
2314        let mut ylen = vec![vec![0u16; ysiz as usize]; xsiz as usize];
2315        let mut flat = Vec::new();
2316        for x in 0..xsiz {
2317            for y in 0..ysiz {
2318                let mut col: Vec<(u16, u32)> = voxels
2319                    .iter()
2320                    .filter(|(vx, vy, _, _)| *vx == x && *vy == y)
2321                    .map(|(_, _, z, c)| (*z, *c))
2322                    .collect();
2323                col.sort_by_key(|(z, _)| *z);
2324                ylen[x as usize][y as usize] = col.len() as u16;
2325                for (z, c) in col {
2326                    flat.push(Voxel {
2327                        col: c,
2328                        z,
2329                        vis: 0,
2330                        dir: 0,
2331                    });
2332                }
2333            }
2334        }
2335        let xlen = ylen
2336            .iter()
2337            .map(|c| c.iter().map(|&v| u32::from(v)).sum())
2338            .collect();
2339        Kv6 {
2340            xsiz,
2341            ysiz,
2342            zsiz,
2343            xpiv: 0.0,
2344            ypiv: 0.0,
2345            zpiv: 0.0,
2346            voxels: flat,
2347            xlen,
2348            ylen,
2349            palette: None,
2350        }
2351    }
2352
2353    fn offsets_consistent(m: &SpriteModel) -> bool {
2354        let cols = (m.dims[0] * m.dims[1]) as usize;
2355        if m.color_offsets.len() != cols + 1 {
2356            return false;
2357        }
2358        // Monotonic non-decreasing + last == colors.len + each column's
2359        // span == its solid-voxel count.
2360        for w in m.color_offsets.windows(2) {
2361            if w[1] < w[0] {
2362                return false;
2363            }
2364        }
2365        m.color_offsets[cols] as usize == m.colors.len()
2366    }
2367
2368    #[test]
2369    fn carve_two_layers_keeps_offsets_consistent() {
2370        // Mirror the demo's carve: columns with voxels at varied z,
2371        // some sharing z=0/z=1, some not.
2372        let kv6 = kv6_from(
2373            3,
2374            2,
2375            8,
2376            &[
2377                (0, 0, 0, 0xA0),
2378                (0, 0, 1, 0xA1),
2379                (0, 0, 5, 0xA5),
2380                (1, 0, 1, 0xB1),
2381                (2, 1, 0, 0xC0),
2382                (2, 1, 3, 0xC3),
2383            ],
2384        );
2385        let mut m = build_sprite_model(&kv6);
2386        assert!(offsets_consistent(&m));
2387        for z in 0..2u32 {
2388            for y in 0..m.dims[1] {
2389                for x in 0..m.dims[0] {
2390                    m.set_voxel(x, y, z, None);
2391                }
2392            }
2393            assert!(offsets_consistent(&m), "inconsistent after carving z={z}");
2394            // downsample must not panic on the carved model.
2395            let _ = m.downsample();
2396        }
2397    }
2398
2399    #[test]
2400    fn set_voxel_inserts_replaces_and_clears() {
2401        // col 0 starts with z=1 (0xBB), z=5 (0xAA); col 1 with z=3 (0xCC).
2402        let mut m = build_sprite_model(&kv6_unsorted());
2403
2404        // Insert z=3 into col 0 (between z=1 and z=5) → rank 1.
2405        assert!(m.set_voxel(0, 0, 3, Some(0x55)));
2406        assert_eq!(m.occupancy[0], (1 << 1) | (1 << 3) | (1 << 5));
2407        // col 0 colours ascending z: 0xBB(z1), 0x55(z3), 0xAA(z5).
2408        assert_eq!(m.color_offsets, vec![0, 3, 4]);
2409        assert_eq!(&m.colors, &[0xBB, 0x55, 0xAA, 0xCC]);
2410
2411        // Replace z=3 in place (no offset shift).
2412        assert!(m.set_voxel(0, 0, 3, Some(0x66)));
2413        assert_eq!(&m.colors, &[0xBB, 0x66, 0xAA, 0xCC]);
2414        assert_eq!(m.color_offsets, vec![0, 3, 4]);
2415
2416        // Clear z=1 (rank 0) from col 0.
2417        assert!(m.set_voxel(0, 0, 1, None));
2418        assert_eq!(m.occupancy[0], (1 << 3) | (1 << 5));
2419        assert_eq!(m.color_offsets, vec![0, 2, 3]);
2420        assert_eq!(&m.colors, &[0x66, 0xAA, 0xCC]);
2421
2422        // No-ops: clear an empty voxel, edit out of bounds.
2423        assert!(!m.set_voxel(0, 0, 2, None));
2424        assert!(!m.set_voxel(9, 0, 0, Some(1)));
2425    }
2426
2427    #[test]
2428    fn rebuild_lod_refreshes_coarse_levels_from_mip0() {
2429        let mut reg = SpriteModelRegistry::new();
2430        let id = reg.add_lod(build_sprite_model(&kv6_unsorted()), 3);
2431        // Recolour mip-0 only via model_mut, then rebuild the ladder.
2432        reg.model_mut(id).recolor(|_| 0x0000_2000);
2433        reg.rebuild_lod(id);
2434        // The mip-1 average of all-0x2000 voxels is still 0x2000.
2435        let lvl1_entry = reg.chains[id as usize][1] as usize;
2436        assert!(reg.entries[lvl1_entry]
2437            .colors
2438            .iter()
2439            .all(|&c| c == 0x0000_2000));
2440    }
2441
2442    // ---- GPU.12 incremental: colors/dirs suballocator -----------------
2443
2444    /// Every slot fits its data, has slack, doesn't overlap the next, and
2445    /// the buffer reserves tail headroom past the last slot.
2446    fn alloc_invariants(a: &ColorsAllocator, lens: &[u32]) {
2447        let mut prev_end = 0u32;
2448        for (e, &len) in lens.iter().enumerate() {
2449            let s = a.slot(e);
2450            assert_eq!(s.len, len, "slot {e} len");
2451            assert!(s.cap >= s.len, "slot {e} cap >= len");
2452            // In a freshly repacked layout slots are in entry order.
2453            assert!(s.off >= prev_end, "slot {e} overlaps previous");
2454            assert!(s.off + s.cap <= a.cap_total(), "slot {e} past cap_total");
2455            prev_end = s.off + s.cap;
2456        }
2457        assert!(a.cap_total() >= prev_end, "tail headroom");
2458    }
2459
2460    #[test]
2461    fn allocator_new_lays_out_with_slack_and_headroom() {
2462        let lens = [10u32, 0, 64, 7];
2463        let a = ColorsAllocator::new(&lens);
2464        alloc_invariants(&a, &lens);
2465        // Slack: a 64-word slot has cap > 64 so a small carve-grow fits.
2466        assert!(a.slot(2).cap > 64);
2467        // Headroom past the bump tail for early growth.
2468        assert!(a.cap_total() > a.slot(3).off + a.slot(3).cap);
2469    }
2470
2471    #[test]
2472    fn allocator_place_in_place_when_within_cap() {
2473        let mut a = ColorsAllocator::new(&[10, 20]);
2474        let off0 = a.slot(0).off;
2475        let cap0 = a.slot(0).cap;
2476        // Shrink: still the same slot.
2477        assert_eq!(a.place(0, 5), Some(off0));
2478        assert_eq!(a.slot(0).len, 5);
2479        assert_eq!(a.slot(0).cap, cap0);
2480        // Grow within slack: same offset, no relocation.
2481        assert_eq!(a.place(0, cap0), Some(off0));
2482        assert_eq!(a.slot(0).off, off0);
2483        assert!(a.free.is_empty(), "no relocation should free anything");
2484    }
2485
2486    #[test]
2487    fn allocator_place_relocates_to_tail_and_frees_old() {
2488        let mut a = ColorsAllocator::new(&[10, 20]);
2489        let old0 = (a.slot(0).off, a.slot(0).cap);
2490        let tail_before = a.tail;
2491        // Overgrow entry 0 past its cap → relocate to the bump tail.
2492        let new_len = a.slot(0).cap + 5;
2493        let off = a.place(0, new_len).expect("fits in headroom");
2494        assert_eq!(off, tail_before, "relocated to old tail");
2495        assert_eq!(a.slot(0).off, off);
2496        assert_eq!(a.slot(0).len, new_len);
2497        assert!(a.free.contains(&old0), "old slot freed");
2498    }
2499
2500    #[test]
2501    fn allocator_reuses_freed_block_first_fit() {
2502        // Entry 0 has a large slot; entry 1 a tiny one, so growing 1 must
2503        // relocate (it can't fit in place) and lands in 0's freed block.
2504        let mut a = ColorsAllocator::new(&[10, 2]);
2505        let old0 = (a.slot(0).off, a.slot(0).cap);
2506        // Relocate entry 0 to the tail, freeing its original block.
2507        let _ = a.place(0, a.slot(0).cap + 5).unwrap();
2508        assert!(a.free.contains(&old0));
2509        // Grow entry 1 past its (tiny) cap but ≤ the freed block's cap →
2510        // first-fit reuses that block rather than bumping the tail.
2511        let new1 = a.slot(1).cap + 1;
2512        assert!(new1 <= old0.1, "freed block big enough");
2513        let off = a.place(1, new1).expect("reuses freed block");
2514        assert_eq!(off, old0.0, "first-fit reused the freed slot offset");
2515        assert!(!a.free.contains(&old0), "freed block consumed");
2516    }
2517
2518    #[test]
2519    fn allocator_signals_grow_then_repack_restores() {
2520        let mut a = ColorsAllocator::new(&[8, 8]);
2521        // Force overflow: ask for far more than cap_total.
2522        let huge = a.cap_total() + 100;
2523        assert_eq!(a.place(0, huge), None, "overflow must signal grow");
2524        // Repack with the new lengths compacts + grows the buffer.
2525        a.repack(&[huge, 8]);
2526        alloc_invariants(&a, &[huge, 8]);
2527        assert!(a.cap_total() > huge);
2528        // After repack the entry now fits in place.
2529        assert_eq!(a.place(0, huge), Some(a.slot(0).off));
2530    }
2531
2532    /// Drive the allocator like a real carve loop (mirroring
2533    /// `update_model`): one model's colour count drifts up and down
2534    /// across many edits while two neighbours stay put. Growth is
2535    /// absorbed in place / via the free list / by the bump tail, and on
2536    /// the rare overflow we repack (as `update_model` does). After every
2537    /// edit the live `[off, off+len)` windows must stay disjoint.
2538    #[test]
2539    fn allocator_carve_loop_keeps_live_windows_disjoint() {
2540        let mut a = ColorsAllocator::new(&[40, 12, 40]);
2541        let mut lens = [40u32, 12, 40];
2542        // A deterministic up/down walk of entry 1's length, incl. a jump
2543        // that forces at least one grow+repack.
2544        let walk = [13u32, 30, 60, 18, 9, 80, 80, 25, 200, 7];
2545        let mut grew = false;
2546        for &len in &walk {
2547            lens[1] = len;
2548            // Entry 1 re-placed; on overflow, repack the whole set.
2549            if a.place(1, len).is_none() {
2550                grew = true;
2551                a.repack(&lens);
2552            } else {
2553                // Neighbours fit in place every time.
2554                assert_eq!(a.place(0, 40), Some(a.slot(0).off));
2555                assert_eq!(a.place(2, 40), Some(a.slot(2).off));
2556            }
2557            assert_eq!(a.slot(1).len, len);
2558
2559            // No two entries' live windows overlap.
2560            let mut wins: Vec<(u32, u32)> =
2561                (0..3).map(|e| (a.slot(e).off, a.slot(e).len)).collect();
2562            wins.sort_by_key(|w| w.0);
2563            for pair in wins.windows(2) {
2564                let (o0, l0) = pair[0];
2565                let (o1, _) = pair[1];
2566                assert!(o0 + l0 <= o1, "live windows overlap: {pair:?}");
2567            }
2568        }
2569        assert!(grew, "the 200-word jump should have forced a repack");
2570    }
2571
2572    // --- incremental instance path (device-backed; skips w/o adapter) ---
2573
2574    fn headless() -> Option<crate::HeadlessGpu> {
2575        match crate::HeadlessGpu::new_blocking(crate::GpuRendererSettings::default()) {
2576            Ok(h) => Some(h),
2577            Err(e) => {
2578                eprintln!("[skip] no GPU adapter reachable: {e}");
2579                None
2580            }
2581        }
2582    }
2583
2584    fn one_model_registry() -> (SpriteModelRegistry, u32) {
2585        let mut reg = SpriteModelRegistry::new();
2586        let id = reg.add(build_sprite_model(&kv6_unsorted()));
2587        (reg, id)
2588    }
2589
2590    fn inst(model_id: u32, pos: [f32; 3]) -> SpriteInstance {
2591        use roxlap_formats::sprite::Sprite;
2592        SpriteInstance::new(
2593            model_id,
2594            SpriteInstanceTransform::from_sprite(&Sprite::axis_aligned(kv6_unsorted(), pos)),
2595        )
2596    }
2597
2598    #[test]
2599    fn append_grows_count_and_capacity_pow2() {
2600        let Some(h) = headless() else { return };
2601        let (reg, m) = one_model_registry();
2602        let mut res = SpriteRegistryResident::upload(&h.device, &reg, &[inst(m, [0.0; 3])]);
2603        assert_eq!(res.instance_count(), 1);
2604        assert_eq!(res.instance_capacity, 1);
2605
2606        // Append 4 → count 5, capacity grows to next_pow2(5) = 8.
2607        let more: Vec<_> = (1..=4).map(|i| inst(m, [i as f32, 0.0, 0.0])).collect();
2608        let base = res.append_instances(&h.device, &reg, &more);
2609        assert_eq!(base, 1, "first appended index follows the seed instance");
2610        assert_eq!(res.instance_count(), 5);
2611        assert_eq!(res.instance_capacity, 8, "power-of-two growth");
2612
2613        // A second append that still fits keeps the same capacity (no realloc).
2614        let base2 = res.append_instances(&h.device, &reg, &[inst(m, [9.0, 0.0, 0.0])]);
2615        assert_eq!(base2, 5);
2616        assert_eq!(res.instance_count(), 6);
2617        assert_eq!(res.instance_capacity, 8, "fits existing capacity, no grow");
2618    }
2619
2620    #[test]
2621    fn append_empty_is_noop() {
2622        let Some(h) = headless() else { return };
2623        let (reg, m) = one_model_registry();
2624        let mut res = SpriteRegistryResident::upload(&h.device, &reg, &[inst(m, [0.0; 3])]);
2625        let base = res.append_instances(&h.device, &reg, &[]);
2626        assert_eq!(base, 1);
2627        assert_eq!(res.instance_count(), 1);
2628        assert_eq!(res.instance_capacity, 1);
2629    }
2630
2631    /// Read `words` u32s back from a GPU buffer (needs COPY_SRC).
2632    fn read_u32(h: &crate::HeadlessGpu, buf: &wgpu::Buffer, words: u64) -> Vec<u32> {
2633        let bytes = words * 4;
2634        let staging = h.device.create_buffer(&wgpu::BufferDescriptor {
2635            label: Some("readback"),
2636            size: bytes,
2637            usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
2638            mapped_at_creation: false,
2639        });
2640        let mut enc = h
2641            .device
2642            .create_command_encoder(&wgpu::CommandEncoderDescriptor::default());
2643        enc.copy_buffer_to_buffer(buf, 0, &staging, 0, bytes);
2644        h.queue.submit(std::iter::once(enc.finish()));
2645        let slice = staging.slice(..);
2646        let (tx, rx) = std::sync::mpsc::channel();
2647        slice.map_async(wgpu::MapMode::Read, move |r| tx.send(r).unwrap());
2648        h.device.poll(wgpu::PollType::wait_indefinitely()).ok();
2649        rx.recv().unwrap().unwrap();
2650        let data = slice.get_mapped_range();
2651        let out = bytemuck::cast_slice::<u8, u32>(&data).to_vec();
2652        drop(data);
2653        staging.unmap();
2654        out
2655    }
2656
2657    /// A second distinct model so add_model has real new geometry to lay
2658    /// down (different dims + colours from `kv6_unsorted`).
2659    fn kv6_other() -> Kv6 {
2660        let mk = |z, col| Voxel {
2661            col,
2662            z,
2663            vis: 0,
2664            dir: 0,
2665        };
2666        Kv6 {
2667            xsiz: 1,
2668            ysiz: 1,
2669            zsiz: 4,
2670            xpiv: 0.0,
2671            ypiv: 0.0,
2672            zpiv: 0.0,
2673            voxels: vec![mk(0, 0x11), mk(2, 0x22)],
2674            xlen: vec![2],
2675            ylen: vec![vec![2]],
2676            palette: None,
2677        }
2678    }
2679
2680    /// add_model lays the new model's volume on the GPU at the offsets its
2681    /// meta record claims — verified by reading the shared buffers back
2682    /// and matching each entry against its source SpriteModel.
2683    #[test]
2684    fn add_model_uploads_new_volume_incrementally() {
2685        let Some(h) = headless() else { return };
2686
2687        // Residency starts with model A only.
2688        let mut reg = SpriteModelRegistry::new();
2689        let a = reg.add(build_sprite_model(&kv6_unsorted()));
2690        let mut res = SpriteRegistryResident::upload(&h.device, &reg, &[inst(a, [0.0; 3])]);
2691        assert_eq!(res.chains.len(), 1);
2692        let entries_before = res.meta.len();
2693
2694        // Append model B (single-level) to the registry, then sync it.
2695        let b = reg.add(build_sprite_model(&kv6_other()));
2696        res.add_model(&h.device, &h.queue, &reg, b);
2697        assert_eq!(res.chains.len(), 2);
2698        assert_eq!(res.meta.len(), entries_before + 1, "one new entry");
2699
2700        // Read the shared buffers back and check EVERY entry's data sits
2701        // where its meta record points — both the pre-existing A and the
2702        // newly streamed B.
2703        let occ = read_u32(&h, &res.occupancy, u64::from(res.occ_cap));
2704        let coloff = read_u32(&h, &res.color_offsets, u64::from(res.coloff_cap));
2705        let cols = read_u32(&h, &res.colors, u64::from(res.colors_alloc.cap_total()));
2706        for (e, m) in reg.entries.iter().enumerate() {
2707            let meta = res.meta[e];
2708            let oo = meta.occupancy_offset as usize;
2709            assert_eq!(
2710                &occ[oo..oo + m.occupancy.len()],
2711                &m.occupancy[..],
2712                "occ entry {e}"
2713            );
2714            let co = meta.color_offsets_offset as usize;
2715            assert_eq!(
2716                &coloff[co..co + m.color_offsets.len()],
2717                &m.color_offsets[..],
2718                "color_offsets entry {e}"
2719            );
2720            let cc = meta.colors_offset as usize;
2721            assert_eq!(
2722                &cols[cc..cc + m.colors.len()],
2723                &m.colors[..],
2724                "colors entry {e}"
2725            );
2726        }
2727
2728        // And an instance of the freshly-added model can now be appended.
2729        let base = res.append_instances(&h.device, &reg, &[inst(b, [5.0, 0.0, 0.0])]);
2730        assert_eq!(base, 1);
2731        assert_eq!(res.instance_count(), 2);
2732    }
2733
2734    /// Adding many small models forces the volume buffers to grow + rebuild
2735    /// at least once; every entry must still read back correctly across the
2736    /// grow boundary.
2737    #[test]
2738    fn add_model_survives_buffer_growth() {
2739        let Some(h) = headless() else { return };
2740        let mut reg = SpriteModelRegistry::new();
2741        let a = reg.add(build_sprite_model(&kv6_unsorted()));
2742        let mut res = SpriteRegistryResident::upload(&h.device, &reg, &[inst(a, [0.0; 3])]);
2743        let occ_cap0 = res.occ_cap;
2744
2745        // 40 adds — occupancy starts exact-sized (cap == used), so the very
2746        // first add overflows and grows; later ones ride the slack.
2747        for _ in 0..40 {
2748            let id = reg.add(build_sprite_model(&kv6_other()));
2749            res.add_model(&h.device, &h.queue, &reg, id);
2750        }
2751        assert_eq!(res.chains.len(), 41);
2752        assert!(res.occ_cap > occ_cap0, "occupancy buffer grew");
2753
2754        let occ = read_u32(&h, &res.occupancy, u64::from(res.occ_cap));
2755        let cols = read_u32(&h, &res.colors, u64::from(res.colors_alloc.cap_total()));
2756        for (e, m) in reg.entries.iter().enumerate() {
2757            let meta = res.meta[e];
2758            let oo = meta.occupancy_offset as usize;
2759            assert_eq!(
2760                &occ[oo..oo + m.occupancy.len()],
2761                &m.occupancy[..],
2762                "occ entry {e}"
2763            );
2764            let cc = meta.colors_offset as usize;
2765            assert_eq!(
2766                &cols[cc..cc + m.colors.len()],
2767                &m.colors[..],
2768                "colors entry {e}"
2769            );
2770        }
2771    }
2772
2773    /// VCL.2 — a decoded voxel clip's frames register as a flipbook of LOD
2774    /// chains, and `set_instance_model` flips which frame an instance
2775    /// draws. The cull state it updates is exactly what
2776    /// `cull_bin_upload` packs into the GPU instance buffer each frame, so
2777    /// TV.3 (clip wiring): `sprite_model_from_clip_frame_with_materials`
2778    /// classifies a clip frame's voxels into a per-voxel `materials` array
2779    /// (parallel to `colors`) by colour; an empty map leaves it empty (the
2780    /// all-opaque clip), identical to `sprite_model_from_clip_frame`.
2781    #[test]
2782    fn clip_frame_with_materials_classifies_by_color() {
2783        use roxlap_formats::voxel_clip::{LoopMode, VoxelClip, VoxelFrame};
2784
2785        let dims = [1u32, 1, 4];
2786        let owpc = dims[2].div_ceil(32).max(1) as usize; // 1
2787        let glass = 0x80AA_BBCC;
2788        let stone = 0x8011_2233;
2789        let frame = VoxelFrame {
2790            occupancy: {
2791                let mut occ = vec![0u32; owpc];
2792                occ[0] |= (1 << 0) | (1 << 1);
2793                occ
2794            },
2795            colors: vec![stone, glass], // ascending z: z=0 stone, z=1 glass
2796            color_offsets: vec![0, 2],
2797        };
2798        let clip = VoxelClip::from_frames(
2799            dims,
2800            [0.5, 0.5, 2.0],
2801            1.0,
2802            LoopMode::Loop,
2803            &[frame],
2804            &[],
2805            33,
2806            0,
2807        );
2808        let decoded = clip.decode().expect("decode");
2809
2810        // Map only the glass colour → material 2; stone stays opaque (0).
2811        let m = sprite_model_from_clip_frame_with_materials(&decoded, 0, &[(0x00AA_BBCC, 2)]);
2812        assert_eq!(
2813            m.materials.len(),
2814            m.colors.len(),
2815            "materials parallel to colors"
2816        );
2817        // `colors` is in popcount-rank (ascending z) order: stone then glass.
2818        assert_eq!(
2819            m.materials,
2820            vec![0u8, 2u8],
2821            "stone opaque, glass material 2"
2822        );
2823
2824        // Empty map ⇒ no per-voxel materials, identical to the plain builder.
2825        let plain = sprite_model_from_clip_frame(&decoded, 0);
2826        let plain_mat = sprite_model_from_clip_frame_with_materials(&decoded, 0, &[]);
2827        assert!(plain.materials.is_empty());
2828        assert!(plain_mat.materials.is_empty());
2829        assert_eq!(plain.colors, plain_mat.colors);
2830    }
2831
2832    /// TV.3 (streaming-clip refresh path): `build_sprite_model_with_materials`
2833    /// — the builder behind `GpuBackend::update_sprite_model_with_materials`,
2834    /// which a streaming clip re-runs each frame — classifies a kv6's voxels
2835    /// into a per-voxel `materials` array (popcount-rank order) by colour.
2836    #[test]
2837    fn build_with_materials_classifies_by_color() {
2838        let glass = 0x80AA_BBCC;
2839        let stone = 0x8011_2233;
2840        // One column (x=0,y=0), two voxels: z=0 stone, z=1 glass.
2841        let kv6 = kv6_from(1, 1, 4, &[(0, 0, 0, stone), (0, 0, 1, glass)]);
2842
2843        let m = build_sprite_model_with_materials(&kv6, &[(0x00AA_BBCC, 2)]);
2844        assert_eq!(
2845            m.materials.len(),
2846            m.colors.len(),
2847            "materials parallel to colors"
2848        );
2849        assert_eq!(
2850            m.materials,
2851            vec![0u8, 2u8],
2852            "stone opaque, glass material 2"
2853        );
2854
2855        // Empty map ⇒ no per-voxel materials, identical to `build_sprite_model`.
2856        let plain = build_sprite_model(&kv6);
2857        let plain_mat = build_sprite_model_with_materials(&kv6, &[]);
2858        assert!(plain.materials.is_empty());
2859        assert!(plain_mat.materials.is_empty());
2860        assert_eq!(plain.colors, plain_mat.colors);
2861    }
2862
2863    /// flipping `chain_id` redirects the rendered instance to the new
2864    /// frame's resident volume.
2865    #[test]
2866    fn voxel_clip_flipbook_set_instance_model() {
2867        use roxlap_formats::voxel_clip::{LoopMode, VoxelClip, VoxelFrame};
2868        let Some(h) = headless() else { return };
2869
2870        // Two distinct frames of a 1×1×4 clip: frame 0 has a voxel at z=0;
2871        // frame 1 adds z=1 — different occupancy + a longer colour run.
2872        let dims = [1u32, 1, 4];
2873        let owpc = dims[2].div_ceil(32).max(1) as usize; // 1
2874        let mk_frame = |zs: &[u32], cols: &[u32]| -> VoxelFrame {
2875            let mut occ = vec![0u32; owpc];
2876            for &z in zs {
2877                occ[(z >> 5) as usize] |= 1u32 << (z & 31);
2878            }
2879            VoxelFrame {
2880                occupancy: occ,
2881                colors: cols.to_vec(),
2882                color_offsets: vec![0, cols.len() as u32],
2883            }
2884        };
2885        let f0 = mk_frame(&[0], &[0x8011_2233]);
2886        let f1 = mk_frame(&[0, 1], &[0x8011_2233, 0x80AA_BBCC]);
2887        let clip = VoxelClip::from_frames(
2888            dims,
2889            [0.5, 0.5, 2.0],
2890            1.0,
2891            LoopMode::Loop,
2892            &[f0, f1],
2893            &[],
2894            33,
2895            0,
2896        );
2897        let decoded = clip.decode().expect("decode");
2898
2899        // Each frame → a single-level chain; both volumes resident + distinct.
2900        let mut reg = SpriteModelRegistry::new();
2901        let c0 = reg.add(sprite_model_from_clip_frame(&decoded, 0));
2902        let c1 = reg.add(sprite_model_from_clip_frame(&decoded, 1));
2903        assert_eq!(reg.model(c0).colors.len(), 1);
2904        assert_eq!(reg.model(c1).colors.len(), 2);
2905
2906        // One instance, in front of the test frustum, drawing frame 0.
2907        let mut res = SpriteRegistryResident::upload(&h.device, &reg, &[inst(c0, [0.0, 0.0, 5.0])]);
2908        assert_eq!(res.cull[0].chain_id, c0);
2909
2910        // Flip to frame 1: the cull now draws chain c1 (radius reseeded).
2911        res.set_instance_model(&reg, 0, c1);
2912        assert_eq!(res.cull[0].chain_id, c1);
2913        assert_eq!(res.cull[0].radius, reg.model(c1).bound_radius());
2914
2915        // The next cull packs the new chain into the GPU instance buffer
2916        // (visible, no panic).
2917        let f = test_frustum();
2918        let (visible, _, _) = res.cull_bin_upload(&h.device, &h.queue, &f, 64, 64, 16, 1.0);
2919        assert_eq!(visible, 1);
2920
2921        // …and back to frame 0.
2922        res.set_instance_model(&reg, 0, c0);
2923        assert_eq!(res.cull[0].chain_id, c0);
2924
2925        // Out-of-range index is a safe no-op.
2926        res.set_instance_model(&reg, 99, c1);
2927        assert_eq!(res.cull[0].chain_id, c0);
2928    }
2929
2930    fn test_frustum() -> ViewFrustum {
2931        ViewFrustum {
2932            pos: [0.0, 0.0, 0.0],
2933            right: [1.0, 0.0, 0.0],
2934            down: [0.0, 1.0, 0.0],
2935            forward: [0.0, 0.0, 1.0],
2936            half_w: 1.0,
2937            half_h: 1.0,
2938            far: 10_000.0,
2939        }
2940    }
2941
2942    #[test]
2943    fn remove_model_tombstones_frees_and_reuses() {
2944        let Some(h) = headless() else { return };
2945        // Residency with models A and B, one instance each.
2946        let mut reg = SpriteModelRegistry::new();
2947        let a = reg.add(build_sprite_model(&kv6_unsorted()));
2948        let b = reg.add(build_sprite_model(&kv6_other()));
2949        let mut res = SpriteRegistryResident::upload(
2950            &h.device,
2951            &reg,
2952            &[inst(a, [0.0; 3]), inst(b, [1.0, 0.0, 0.0])],
2953        );
2954        assert_eq!(res.live_model_count(), 2);
2955        assert_eq!(res.dead_model_count(), 0);
2956
2957        // Remove B → tombstoned, its colours freed into the pool.
2958        res.remove_model(b);
2959        assert_eq!(res.live_model_count(), 1);
2960        assert_eq!(res.dead_model_count(), 1);
2961        assert_eq!(res.dead.iter().filter(|&&d| d).count(), 1, "one entry dead");
2962        assert!(!res.colors_alloc.free.is_empty(), "B's colour slot freed");
2963
2964        // Adding C reuses the freed slot (free-list first-fit).
2965        let c = reg.add(build_sprite_model(&kv6_other()));
2966        res.add_model(&h.device, &h.queue, &reg, c);
2967        assert_eq!(res.live_model_count(), 2);
2968
2969        // A and C read back correctly; B is dead (skipped).
2970        let cols = read_u32(&h, &res.colors, u64::from(res.colors_alloc.cap_total()));
2971        for e in [a as usize, c as usize] {
2972            let m = &reg.entries[e];
2973            let cc = res.meta[e].colors_offset as usize;
2974            assert_eq!(
2975                &cols[cc..cc + m.colors.len()],
2976                &m.colors[..],
2977                "colors entry {e}"
2978            );
2979        }
2980
2981        // The lingering instance of removed B is skipped without panic.
2982        let f = test_frustum();
2983        let _ = res.cull_bin_upload(&h.device, &h.queue, &f, 64, 64, 16, 1.0);
2984    }
2985
2986    #[test]
2987    fn compact_reclaims_holes_keeps_ids_stable() {
2988        let Some(h) = headless() else { return };
2989        let mut reg = SpriteModelRegistry::new();
2990        let a = reg.add(build_sprite_model(&kv6_unsorted()));
2991        let b = reg.add(build_sprite_model(&kv6_other()));
2992        let c = reg.add(build_sprite_model(&kv6_other()));
2993        let mut res = SpriteRegistryResident::upload(
2994            &h.device,
2995            &reg,
2996            &[inst(a, [0.0; 3]), inst(b, [1.0; 3]), inst(c, [2.0; 3])],
2997        );
2998        let occ_used_full = res.occ_used;
2999
3000        // Remove the middle model, then compact.
3001        res.remove_model(b);
3002        res.compact(&h.device, &h.queue, &reg);
3003
3004        // Holes reclaimed: occupancy now only covers A + C.
3005        let live_occ: u32 = [a, c]
3006            .iter()
3007            .map(|&e| reg.entries[e as usize].occupancy.len() as u32)
3008            .sum();
3009        assert_eq!(res.occ_used, live_occ);
3010        assert!(res.occ_used < occ_used_full, "compaction shrank occupancy");
3011        // Dead entry keeps a zeroed tombstone; ids unchanged.
3012        assert_eq!(res.meta[b as usize].occupancy_offset, 0);
3013        assert_eq!(res.live_model_count(), 2);
3014        assert_eq!(res.dead_model_count(), 1);
3015
3016        // Live entries read back correctly at their new offsets.
3017        let occ = read_u32(&h, &res.occupancy, u64::from(res.occ_cap));
3018        let cols = read_u32(&h, &res.colors, u64::from(res.colors_alloc.cap_total()));
3019        for &e in &[a as usize, c as usize] {
3020            let m = &reg.entries[e];
3021            let oo = res.meta[e].occupancy_offset as usize;
3022            assert_eq!(
3023                &occ[oo..oo + m.occupancy.len()],
3024                &m.occupancy[..],
3025                "occ {e}"
3026            );
3027            let cc = res.meta[e].colors_offset as usize;
3028            assert_eq!(&cols[cc..cc + m.colors.len()], &m.colors[..], "cols {e}");
3029        }
3030
3031        // Chain ids still valid: C's chain still resolves; B's is empty.
3032        assert!(!res.chains[c as usize].is_empty());
3033        assert!(res.chains[b as usize].is_empty());
3034    }
3035
3036    #[test]
3037    fn remove_swap_semantics_and_capacity_retained() {
3038        let Some(h) = headless() else { return };
3039        let (reg, m) = one_model_registry();
3040        let seed: Vec<_> = (0..4).map(|i| inst(m, [i as f32, 0.0, 0.0])).collect();
3041        let mut res = SpriteRegistryResident::upload(&h.device, &reg, &seed);
3042        assert_eq!(res.instance_count(), 4);
3043        let cap = res.instance_capacity;
3044
3045        // Remove a middle element → the previous last (idx 3) moved into it.
3046        assert_eq!(res.remove_instance(1), Some(3));
3047        assert_eq!(res.instance_count(), 3);
3048
3049        // Remove the current last (idx 2) → nothing moved.
3050        assert_eq!(res.remove_instance(2), None);
3051        assert_eq!(res.instance_count(), 2);
3052
3053        // Out of range → None.
3054        assert_eq!(res.remove_instance(99), None);
3055        assert_eq!(res.instance_count(), 2);
3056
3057        // Capacity is retained for reuse (no shrink).
3058        assert_eq!(res.instance_capacity, cap);
3059    }
3060}