Skip to main content

roxlap_gpu/
sprite_model.rs

1//! GPU.10 — KV6 sprite as a DDA-marchable voxel model.
2//!
3//! Unlike the GPU.9 splatter (one thread per voxel, screen-space
4//! squares, overdraw + atomic contention), a sprite model is a small
5//! voxel volume the precise ray-DDA marches one ray per pixel —
6//! crisp, correct occlusion, no overdraw. This is the GPU.10.0 single
7//! sprite; instancing + tiling + LOD come in later sub-substages.
8//!
9//! The volume reuses the chunk occupancy/colour scheme but sized to
10//! the KV6 bbox: per-column occupancy bitmask (`occ_words_per_col`
11//! u32s, `CHUNK_Z`-style 32-bits-per-word), a flat colour array in
12//! ascending-z order per column, and a `color_offsets` prefix table.
13//! The shader finds a voxel's colour by `offset[col] + popcount(bits
14//! below z)`, so colours MUST be ascending-z (we sort per column).
15
16#![allow(
17    clippy::cast_precision_loss,
18    clippy::cast_possible_truncation,
19    clippy::cast_possible_wrap,
20    clippy::cast_sign_loss,
21    clippy::many_single_char_names,
22    clippy::similar_names
23)]
24
25use bytemuck::{Pod, Zeroable};
26use roxlap_formats::kv6::Kv6;
27use roxlap_formats::sprite::Sprite;
28
29/// CPU-built voxel volume for one KV6 model.
30#[derive(Debug, Clone)]
31pub struct SpriteModel {
32    /// Voxel extent `(mx, my, mz)`.
33    pub dims: [u32; 3],
34    /// `ceil(mz / 32)` — u32 words of occupancy per (x, y) column.
35    pub occ_words_per_col: u32,
36    /// KV6 pivot in model-local voxel space.
37    pub pivot: [f32; 3],
38    /// Per-column occupancy bitmask, `mx * my * occ_words_per_col`.
39    pub occupancy: Vec<u32>,
40    /// Voxel colours, ascending z within each column.
41    pub colors: Vec<u32>,
42    /// Per-voxel surface-normal index (`Kv6::Voxel::dir`, 0..256),
43    /// parallel to [`colors`](Self::colors). The GPU sprite shader uses
44    /// it to index the per-instance `kv6colmul` lighting table, matching
45    /// the CPU rasteriser's normal-based shading.
46    pub dirs: Vec<u32>,
47    /// Prefix sums: `color_offsets[col]` is the first colour index of
48    /// column `col`; length `mx * my + 1`.
49    pub color_offsets: Vec<u32>,
50    /// World-space size of one voxel of this model (GPU.10.4 LOD): 1.0
51    /// at mip-0, doubling each [`SpriteModel::downsample`]. The shader
52    /// divides the local ray by this so a coarse voxel spans the right
53    /// world extent and the march `t` stays in world units.
54    pub voxel_world_size: f32,
55}
56
57/// Build the DDA volume from a KV6. Columns are packed in
58/// `x + y*mx` order; each column's voxels are sorted ascending by z
59/// so the shader's popcount-rank colour lookup is correct.
60///
61/// # Panics
62/// If the KV6's `ylen` counters disagree with `voxels.len()` (a
63/// malformed model).
64#[must_use]
65pub fn build_sprite_model(kv6: &Kv6) -> SpriteModel {
66    let (mx, my, mz) = (kv6.xsiz, kv6.ysiz, kv6.zsiz);
67    let occ_words_per_col = mz.div_ceil(32).max(1);
68    let cols = (mx * my) as usize;
69
70    let mut occupancy = vec![0u32; cols * occ_words_per_col as usize];
71    let mut color_offsets = vec![0u32; cols + 1];
72    let mut colors: Vec<u32> = Vec::with_capacity(kv6.voxels.len());
73    let mut dirs: Vec<u32> = Vec::with_capacity(kv6.voxels.len());
74
75    // Pass 1 — consume voxels in KV6 storage order (x-outer / y-inner)
76    // into per-column buckets keyed by `col = x + y*mx`. Each entry is
77    // `(z, colour, normal-dir)`.
78    let mut buckets: Vec<Vec<(u16, u32, u8)>> = vec![Vec::new(); cols];
79    let mut voxel_iter = kv6.voxels.iter();
80    for x in 0..mx {
81        for y in 0..my {
82            let col = (x + y * mx) as usize;
83            let count = kv6.ylen[x as usize][y as usize];
84            for _ in 0..count {
85                let v = voxel_iter.next().expect("KV6 ylen / voxels.len mismatch");
86                buckets[col].push((v.z, v.col, v.dir));
87            }
88        }
89    }
90
91    // Pass 2 — emit in COLUMN-INDEX order so `color_offsets` is a true
92    // monotonic prefix sum (the shader indexes by `col` either way, but
93    // structural edits / mip rebuilds rely on monotonic offsets). Each
94    // column's voxels sorted ascending z for the popcount-rank lookup.
95    for (col, bucket) in buckets.iter_mut().enumerate() {
96        color_offsets[col] = colors.len() as u32;
97        bucket.sort_by_key(|(z, _, _)| *z);
98        for &(z, col_rgba, dir) in bucket.iter() {
99            let z = u32::from(z);
100            let base = col * occ_words_per_col as usize + (z >> 5) as usize;
101            occupancy[base] |= 1u32 << (z & 31);
102            colors.push(col_rgba);
103            dirs.push(u32::from(dir));
104        }
105    }
106    color_offsets[cols] = colors.len() as u32;
107
108    SpriteModel {
109        dims: [mx, my, mz],
110        occ_words_per_col,
111        pivot: [kv6.xpiv, kv6.ypiv, kv6.zpiv],
112        occupancy,
113        color_offsets,
114        colors,
115        dirs,
116        voxel_world_size: 1.0,
117    }
118}
119
120/// Per-instance transform consumed by the model-DDA shader: the
121/// inverse model→world rotation (so a world ray can be brought into
122/// model-local space) plus the instance's world position. Stored as
123/// three padded columns for std140/std430 (`mat3x3` 16-byte columns).
124#[repr(C)]
125#[derive(Clone, Copy, Pod, Zeroable, Debug)]
126pub struct SpriteInstanceTransform {
127    /// Inverse of `[s | h | f]`, column-major, each column padded to
128    /// `vec4`. `inv_rot * v = c0*v.x + c1*v.y + c2*v.z`.
129    pub inv_rot: [[f32; 4]; 3],
130    /// Instance world position (the KV6 pivot maps here).
131    pub pos: [f32; 3],
132    _pad: f32,
133}
134
135impl SpriteInstanceTransform {
136    /// Build from a sprite pose. `s/h/f` are the model→world basis
137    /// columns; we invert them so the shader can map world→local.
138    #[must_use]
139    pub fn from_sprite(sprite: &Sprite) -> Self {
140        let inv = mat3_inverse([sprite.s, sprite.h, sprite.f]);
141        Self {
142            inv_rot: [
143                [inv[0][0], inv[0][1], inv[0][2], 0.0],
144                [inv[1][0], inv[1][1], inv[1][2], 0.0],
145                [inv[2][0], inv[2][1], inv[2][2], 0.0],
146            ],
147            pos: sprite.p,
148            _pad: 0.0,
149        }
150    }
151}
152
153/// A registry of sprite models. Instances reference a model by
154/// `model_id`, which is a **LOD chain** id: each chain holds one or
155/// more concrete mip levels (finest first; GPU.10.4), and the renderer
156/// picks the level per instance by distance. Identical KV6s are added
157/// once and shared by many instances. **Copy-on-modify**:
158/// [`Self::fork`] deep-copies a chain so edits to the fork leave the
159/// parent (and its instances) intact.
160#[derive(Debug, Clone, Default)]
161pub struct SpriteModelRegistry {
162    /// Concrete mip-level volumes (the GPU buffers concatenate these).
163    entries: Vec<SpriteModel>,
164    /// `chains[model_id]` = entry ids, finest (mip-0) first.
165    chains: Vec<Vec<u32>>,
166}
167
168impl SpriteModelRegistry {
169    #[must_use]
170    pub fn new() -> Self {
171        Self::default()
172    }
173
174    fn push_entry(&mut self, model: SpriteModel) -> u32 {
175        let id = self.entries.len() as u32;
176        self.entries.push(model);
177        id
178    }
179
180    /// Register a single-level (no-LOD) model; returns its `model_id`.
181    pub fn add(&mut self, model: SpriteModel) -> u32 {
182        let e = self.push_entry(model);
183        let id = self.chains.len() as u32;
184        self.chains.push(vec![e]);
185        id
186    }
187
188    /// Register a model with up to `max_levels` LOD mips (each a 2×
189    /// [`SpriteModel::downsample`] of the previous; stops early once a
190    /// level collapses to 1³). Returns its `model_id`.
191    pub fn add_lod(&mut self, model: SpriteModel, max_levels: u32) -> u32 {
192        let mut levels = vec![self.push_entry(model.clone())];
193        let mut cur = model;
194        for _ in 1..max_levels.max(1) {
195            if cur.dims == [1, 1, 1] {
196                break;
197            }
198            cur = cur.downsample();
199            levels.push(self.push_entry(cur.clone()));
200        }
201        let id = self.chains.len() as u32;
202        self.chains.push(levels);
203        id
204    }
205
206    /// Copy-on-modify: deep-copy every level of chain `parent` into new
207    /// entries + a new chain, and return its `model_id`. The fork owns
208    /// independent voxel data, so mutating it does not affect the
209    /// parent or any instance still pointing at it.
210    ///
211    /// # Panics
212    /// If `parent` is not a registered `model_id`.
213    pub fn fork(&mut self, parent: u32) -> u32 {
214        let src = self.chains[parent as usize].clone();
215        let levels: Vec<u32> = src
216            .iter()
217            .map(|&e| {
218                let copy = self.entries[e as usize].clone();
219                self.push_entry(copy)
220            })
221            .collect();
222        let id = self.chains.len() as u32;
223        self.chains.push(levels);
224        id
225    }
226
227    /// The finest (mip-0) model of chain `id`.
228    #[must_use]
229    pub fn model(&self, id: u32) -> &SpriteModel {
230        &self.entries[self.chains[id as usize][0] as usize]
231    }
232
233    /// Mutable access to the finest (mip-0) model for editing — the
234    /// copy-on-modify entry point (typically on a [`Self::fork`]).
235    /// After a *structural* edit (occupancy/dims), call
236    /// [`Self::rebuild_lod`] so the coarser mips match; a pure recolour
237    /// can use [`Self::recolor_chain`] instead.
238    pub fn model_mut(&mut self, id: u32) -> &mut SpriteModel {
239        let e = self.chains[id as usize][0] as usize;
240        &mut self.entries[e]
241    }
242
243    /// Recolour every LOD level of chain `id` (so a forked tint shows
244    /// at all distances).
245    pub fn recolor_chain(&mut self, id: u32, f: impl Fn(u32) -> u32 + Copy) {
246        for li in 0..self.chains[id as usize].len() {
247            let e = self.chains[id as usize][li] as usize;
248            self.entries[e].recolor(f);
249        }
250    }
251
252    /// Regenerate chain `id`'s coarser mip levels from its (possibly
253    /// just-edited) mip-0. Run after a structural edit via
254    /// [`Self::model_mut`] so the LOD ladder stays consistent. No-op
255    /// for a single-level (no-LOD) chain.
256    pub fn rebuild_lod(&mut self, id: u32) {
257        let levels = self.chains[id as usize].clone();
258        if levels.len() <= 1 {
259            return;
260        }
261        let mut cur = self.entries[levels[0] as usize].clone();
262        for &e in &levels[1..] {
263            cur = cur.downsample();
264            self.entries[e as usize] = cur.clone();
265        }
266    }
267
268    /// Number of LOD chains (distinct `model_id`s).
269    #[must_use]
270    pub fn len(&self) -> usize {
271        self.chains.len()
272    }
273
274    #[must_use]
275    pub fn is_empty(&self) -> bool {
276        self.chains.is_empty()
277    }
278}
279
280impl SpriteModel {
281    /// Recolour every voxel via `f(old_rgba) -> new_rgba`. Structure
282    /// (occupancy / offsets) is untouched, so this is a cheap in-place
283    /// edit — handy on a [`SpriteModelRegistry::fork`] to make a tinted
284    /// variant. For structural edits, mutate the public occupancy /
285    /// colours / dims directly (via `model_mut`) then rebuild the LOD.
286    pub fn recolor(&mut self, f: impl Fn(u32) -> u32) {
287        for c in &mut self.colors {
288            *c = f(*c);
289        }
290    }
291
292    /// GPU.12 — structural edit of a single voxel within the model's
293    /// existing bounds. `Some(rgba)` sets/replaces the voxel at
294    /// `(x, y, z)`; `None` clears it. Maintains the ascending-z colour
295    /// invariant by inserting/removing at the voxel's popcount rank and
296    /// shifting the affected columns' `color_offsets`. Returns `true`
297    /// if the model changed. Out-of-bounds coordinates are ignored
298    /// (returns `false`) — growing `dims` is a separate concern.
299    ///
300    /// After editing, call [`SpriteModelRegistry::rebuild_lod`] to
301    /// refresh coarser mips, then re-upload via `set_sprite_instances`.
302    pub fn set_voxel(&mut self, x: u32, y: u32, z: u32, color: Option<u32>) -> bool {
303        if x >= self.dims[0] || y >= self.dims[1] || z >= self.dims[2] {
304            return false;
305        }
306        let owpc = self.occ_words_per_col as usize;
307        let cols = (self.dims[0] * self.dims[1]) as usize;
308        let col = (x + y * self.dims[0]) as usize;
309        let base = col * owpc;
310        let zw = (z >> 5) as usize;
311        let zb = z & 31;
312
313        // Rank = solid voxels strictly below z in this column.
314        let mut rank = 0usize;
315        for w in 0..zw {
316            rank += self.occupancy[base + w].count_ones() as usize;
317        }
318        let below_mask = if zb > 0 { (1u32 << zb) - 1 } else { 0 };
319        rank += (self.occupancy[base + zw] & below_mask).count_ones() as usize;
320        let idx = self.color_offsets[col] as usize + rank;
321        let was_set = (self.occupancy[base + zw] >> zb) & 1 == 1;
322
323        if let Some(rgba) = color {
324            if was_set {
325                self.colors[idx] = rgba; // replace in place (keeps dir)
326            } else {
327                self.occupancy[base + zw] |= 1u32 << zb;
328                self.colors.insert(idx, rgba);
329                // No normal supplied by this API — default to dir 0 (the
330                // sole caller, the carve hotkey, only ever clears).
331                self.dirs.insert(idx, 0);
332                for c in &mut self.color_offsets[col + 1..=cols] {
333                    *c += 1;
334                }
335            }
336            true
337        } else {
338            if !was_set {
339                return false;
340            }
341            self.occupancy[base + zw] &= !(1u32 << zb);
342            self.colors.remove(idx);
343            self.dirs.remove(idx);
344            for c in &mut self.color_offsets[col + 1..=cols] {
345                *c -= 1;
346            }
347            true
348        }
349    }
350
351    /// Radius of a bounding sphere centred at the instance position
352    /// (the pivot maps there): the farthest bbox corner from the
353    /// pivot. Used for frustum culling. Assumes a unit basis; scaled
354    /// instances would multiply this by their max basis length.
355    #[must_use]
356    pub fn bound_radius(&self) -> f32 {
357        let mut r2 = 0.0_f32;
358        for &cx in &[0.0, self.dims[0] as f32] {
359            for &cy in &[0.0, self.dims[1] as f32] {
360                for &cz in &[0.0, self.dims[2] as f32] {
361                    let d = [cx - self.pivot[0], cy - self.pivot[1], cz - self.pivot[2]];
362                    r2 = r2.max(d[0] * d[0] + d[1] * d[1] + d[2] * d[2]);
363                }
364            }
365        }
366        r2.sqrt()
367    }
368
369    /// GPU.10.4 — 2× voxel downsample for the next LOD level. A coarse
370    /// voxel is solid if any of its 2×2×2 fine voxels is, coloured by
371    /// their per-channel average. Dims/pivot halve and
372    /// `voxel_world_size` doubles, so the coarse model occupies the
373    /// same world box at half the resolution (origin-corner aligned).
374    #[must_use]
375    #[allow(clippy::manual_checked_ops)] // `n > 0` guards 4 divisions, not one checked_div
376    pub fn downsample(&self) -> SpriteModel {
377        let [fx, fy, fz] = self.dims;
378        let fidx = |x: u32, y: u32, z: u32| (x + y * fx + z * fx * fy) as usize;
379
380        // Reconstruct dense fine voxels (solid flag + colour + normal).
381        let mut solid = vec![false; (fx * fy * fz) as usize];
382        let mut fine = vec![0u32; (fx * fy * fz) as usize];
383        let mut fine_dir = vec![0u32; (fx * fy * fz) as usize];
384        for x in 0..fx {
385            for y in 0..fy {
386                let col = (x + y * fx) as usize;
387                let base = col * self.occ_words_per_col as usize;
388                let off = self.color_offsets[col] as usize;
389                let mut seen = 0usize;
390                for z in 0..fz {
391                    let w = base + (z >> 5) as usize;
392                    if (self.occupancy[w] >> (z & 31)) & 1 == 1 {
393                        fine[fidx(x, y, z)] = self.colors[off + seen];
394                        fine_dir[fidx(x, y, z)] = self.dirs[off + seen];
395                        solid[fidx(x, y, z)] = true;
396                        seen += 1;
397                    }
398                }
399            }
400        }
401
402        let nx = fx.div_ceil(2).max(1);
403        let ny = fy.div_ceil(2).max(1);
404        let nz = fz.div_ceil(2).max(1);
405        let owpc = nz.div_ceil(32).max(1);
406        let cols = (nx * ny) as usize;
407        let mut occupancy = vec![0u32; cols * owpc as usize];
408        let mut color_offsets = vec![0u32; cols + 1];
409        let mut colors: Vec<u32> = Vec::new();
410        let mut dirs: Vec<u32> = Vec::new();
411
412        // Emit in column-index order (`ccol = cx + cy*nx`), cy outer,
413        // so `color_offsets` is a monotonic prefix sum like build's.
414        for cy in 0..ny {
415            for cx in 0..nx {
416                let ccol = (cx + cy * nx) as usize;
417                color_offsets[ccol] = colors.len() as u32;
418                for cz in 0..nz {
419                    let (mut a, mut r, mut g, mut b, mut n) = (0u32, 0u32, 0u32, 0u32, 0u32);
420                    // Normals don't average meaningfully — keep the first
421                    // solid child's `dir` as the coarse voxel's normal.
422                    let mut rep_dir = 0u32;
423                    for dz in 0..2 {
424                        for dy in 0..2 {
425                            for dx in 0..2 {
426                                let (x, y, z) = (2 * cx + dx, 2 * cy + dy, 2 * cz + dz);
427                                if x < fx && y < fy && z < fz && solid[fidx(x, y, z)] {
428                                    let c = fine[fidx(x, y, z)];
429                                    if n == 0 {
430                                        rep_dir = fine_dir[fidx(x, y, z)];
431                                    }
432                                    a += (c >> 24) & 0xff;
433                                    r += (c >> 16) & 0xff;
434                                    g += (c >> 8) & 0xff;
435                                    b += c & 0xff;
436                                    n += 1;
437                                }
438                            }
439                        }
440                    }
441                    if n > 0 {
442                        let avg = ((a / n) << 24) | ((r / n) << 16) | ((g / n) << 8) | (b / n);
443                        let base = ccol * owpc as usize + (cz >> 5) as usize;
444                        occupancy[base] |= 1u32 << (cz & 31);
445                        colors.push(avg);
446                        dirs.push(rep_dir);
447                    }
448                }
449            }
450        }
451        color_offsets[cols] = colors.len() as u32;
452
453        SpriteModel {
454            dims: [nx, ny, nz],
455            occ_words_per_col: owpc,
456            pivot: [
457                self.pivot[0] * 0.5,
458                self.pivot[1] * 0.5,
459                self.pivot[2] * 0.5,
460            ],
461            occupancy,
462            colors,
463            dirs,
464            color_offsets,
465            voxel_world_size: self.voxel_world_size * 2.0,
466        }
467    }
468}
469
470/// View frustum for CPU instance culling, in world space. Built each
471/// frame from the world camera. `half_w`/`half_h` are the tangents of
472/// the half-FOV (so the side planes are `|x| <= half_w * z` etc. in
473/// camera space).
474#[derive(Clone, Copy, Debug)]
475pub struct ViewFrustum {
476    pub pos: [f32; 3],
477    pub right: [f32; 3],
478    pub down: [f32; 3],
479    pub forward: [f32; 3],
480    pub half_w: f32,
481    pub half_h: f32,
482    pub far: f32,
483}
484
485/// CPU cull record: the GPU instance + its world bounding sphere.
486/// Not `Copy` — carries a boxed 256-entry `kv6colmul` table.
487#[derive(Clone)]
488struct CullInstance {
489    /// Instance transform + a placeholder `model_id`; the cull
490    /// overwrites `model_id` with the distance-chosen LOD entry.
491    gpu: SpriteInstanceGpu,
492    /// LOD chain this instance draws (the user-facing `model_id`).
493    chain_id: u32,
494    center: [f32; 3],
495    radius: f32,
496    /// voxlap `kv6colmul[256]` — per-surface-normal colour modulation
497    /// for this instance's pose + lighting. Defaults to identity
498    /// (`0x0100` in every channel lane → unshaded) until the facade sets
499    /// it via [`SpriteRegistryResident::set_instance_colmul`]. Packed
500    /// into the `colmul` GPU buffer (in visible order) each frame.
501    colmul: Box<[u64; 256]>,
502}
503
504/// Identity `kv6colmul` table: every channel lane = `0x0100`, so the
505/// shader's `(rgb[c] << 8) * 0x0100 >> 16 == rgb[c]` — i.e. no shading.
506fn identity_colmul() -> Box<[u64; 256]> {
507    const LANE: u64 = 0x0100;
508    let w = LANE | (LANE << 16) | (LANE << 32) | (LANE << 48);
509    Box::new([w; 256])
510}
511
512fn dot3(a: [f32; 3], b: [f32; 3]) -> f32 {
513    a[0] * b[0] + a[1] * b[1] + a[2] * b[2]
514}
515
516/// Build one CPU cull record from a user [`SpriteInstance`]: pack the
517/// transform, seed the bounding sphere from the chain's finest model, and
518/// start `colmul` at identity. Shared by the full
519/// [`SpriteRegistryResident::upload`] and the incremental
520/// [`SpriteRegistryResident::append_instances`].
521fn make_cull(registry: &SpriteModelRegistry, i: &SpriteInstance) -> CullInstance {
522    CullInstance {
523        gpu: SpriteInstanceGpu {
524            inv_rot0: i.transform.inv_rot[0],
525            inv_rot1: i.transform.inv_rot[1],
526            inv_rot2: i.transform.inv_rot[2],
527            pos: i.transform.pos,
528            model_id: i.model_id, // placeholder; cull rewrites per frame
529        },
530        chain_id: i.model_id,
531        center: i.transform.pos,
532        radius: registry.model(i.model_id).bound_radius(),
533        colmul: identity_colmul(),
534    }
535}
536
537/// Allocate the `instances` capacity buffer (`STORAGE | COPY_DST`) sized
538/// for `cap` records (≥1). Left uninitialised — `cull_bin_upload`
539/// rewrites it (offset 0) each frame, and `append_instances` seeds the
540/// live records after a grow.
541fn instances_buffer(device: &wgpu::Device, cap: u32) -> wgpu::Buffer {
542    device.create_buffer(&wgpu::BufferDescriptor {
543        label: Some("roxlap-gpu sprite_reg.instances"),
544        size: u64::from(cap.max(1)) * std::mem::size_of::<SpriteInstanceGpu>() as u64,
545        usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
546        mapped_at_creation: false,
547    })
548}
549
550/// One sprite instance: a model reference + world pose.
551#[derive(Debug, Clone, Copy)]
552pub struct SpriteInstance {
553    pub model_id: u32,
554    pub transform: SpriteInstanceTransform,
555}
556
557/// GPU per-model metadata: where this model's data starts in the
558/// shared registry buffers + its dims/pivot. Mirrors `ModelMeta` in
559/// the shader (std430, 48 bytes).
560#[repr(C)]
561#[derive(Clone, Copy, Pod, Zeroable, Debug)]
562struct SpriteModelMeta {
563    occupancy_offset: u32,
564    colors_offset: u32,
565    color_offsets_offset: u32,
566    occ_words_per_col: u32,
567    dims: [u32; 3],
568    _pad0: u32,
569    pivot: [f32; 3],
570    /// GPU.10.4 — world size of one voxel of this (mip) entry.
571    voxel_world_size: f32,
572}
573
574/// GPU per-instance record. Mirrors `Instance` in the shader (std430,
575/// 64 bytes): inverse rotation columns + position + model id.
576#[repr(C)]
577#[derive(Clone, Copy, Pod, Zeroable, Debug)]
578struct SpriteInstanceGpu {
579    inv_rot0: [f32; 4],
580    inv_rot1: [f32; 4],
581    inv_rot2: [f32; 4],
582    pos: [f32; 3],
583    model_id: u32,
584}
585
586/// Invert a 3×3 matrix given as basis columns `[c0, c1, c2]`,
587/// returning the inverse as columns. For an orthonormal basis this is
588/// the transpose; the general path covers rotation + non-unit scale.
589#[must_use]
590fn mat3_inverse(cols: [[f32; 3]; 3]) -> [[f32; 3]; 3] {
591    let [a, b, c] = cols; // columns
592                          // Determinant via scalar triple product a · (b × c).
593    let cross = |u: [f32; 3], v: [f32; 3]| {
594        [
595            u[1] * v[2] - u[2] * v[1],
596            u[2] * v[0] - u[0] * v[2],
597            u[0] * v[1] - u[1] * v[0],
598        ]
599    };
600    let bc = cross(b, c);
601    let ca = cross(c, a);
602    let ab = cross(a, b);
603    let det = a[0] * bc[0] + a[1] * bc[1] + a[2] * bc[2];
604    let inv_det = if det.abs() < 1e-12 { 0.0 } else { 1.0 / det };
605    // Inverse rows are (b×c, c×a, a×b)/det; return as columns of the
606    // inverse, i.e. transpose of those rows.
607    [
608        [bc[0] * inv_det, ca[0] * inv_det, ab[0] * inv_det],
609        [bc[1] * inv_det, ca[1] * inv_det, ab[1] * inv_det],
610        [bc[2] * inv_det, ca[2] * inv_det, ab[2] * inv_det],
611    ]
612}
613
614/// GPU-resident registry + instances: every model's occupancy /
615/// colours / offsets concatenated into shared storage buffers, a
616/// per-model metadata table, and a capacity-sized instance buffer
617/// rewritten each frame with the frustum-visible subset (GPU.10.2).
618/// One bind group serves all models (same approach as the multi-grid
619/// scene).
620pub struct SpriteRegistryResident {
621    pub occupancy: wgpu::Buffer,
622    pub colors: wgpu::Buffer,
623    /// Per-voxel surface-normal index, concatenated across models in the
624    /// same layout as [`colors`](Self::colors). The shader indexes the
625    /// per-instance `kv6colmul` table by it.
626    pub dirs: wgpu::Buffer,
627    pub color_offsets: wgpu::Buffer,
628    pub model_meta: wgpu::Buffer,
629    /// Holds up to `instance_capacity` instances; the visible subset
630    /// is packed into `[0, count)` each frame by [`Self::cull_bin_upload`].
631    pub instances: wgpu::Buffer,
632    pub instance_capacity: u32,
633    /// Per-visible-instance `kv6colmul[256]` tables, packed in the same
634    /// order as the `instances` buffer each frame (two u32 per u64
635    /// entry: lanes 0|1 then 2|3). Sized `instance_capacity * 256 * 2`
636    /// u32; rewritten by [`Self::cull_bin_upload`].
637    pub colmul: wgpu::Buffer,
638    colmul_cap: u32,
639    /// GPU.10.3 — per-tile `(offset, count)` into `tile_instances`,
640    /// flat `2 * tiles_x * tiles_y` u32s. Grown to fit the screen.
641    pub tile_ranges: wgpu::Buffer,
642    tile_ranges_cap: u32,
643    /// GPU.10.3 — flat list of visible-instance indices grouped by
644    /// tile. Grown to fit the per-frame total.
645    pub tile_instances: wgpu::Buffer,
646    tile_instances_cap: u32,
647    /// CPU cull records (full set), with precomputed bounding spheres.
648    cull: Vec<CullInstance>,
649    /// GPU.10.4 — LOD chains: `chains[chain_id]` = entry ids, finest
650    /// first. The cull picks a level by distance and writes its entry
651    /// id into the packed instance's `model_id`.
652    chains: Vec<Vec<u32>>,
653    /// GPU.12 incremental — CPU mirror of the GPU `model_meta` table, one
654    /// per concrete entry. [`Self::update_model`] reads the fixed
655    /// occupancy/color_offsets bases from here and rewrites the changed
656    /// `colors_offset` on a relocation.
657    meta: Vec<SpriteModelMeta>,
658    /// GPU.12 incremental — per-entry placement of `colors`/`dirs` in the
659    /// shared buffers (drives both; same offsets/ranks). Lets an edit
660    /// re-upload one model's data without touching the others.
661    colors_alloc: ColorsAllocator,
662    /// Per-entry word length of the dims-fixed `occupancy` and
663    /// `color_offsets` arrays, kept so [`Self::update_model`] can assert a
664    /// carve never changed dims (which would invalidate the in-place
665    /// writes — growing dims is out of scope, handled by a full re-upload).
666    occ_lens: Vec<u32>,
667    coloff_lens: Vec<u32>,
668    /// Used / allocated words of the tightly-concatenated `occupancy`
669    /// buffer. `add_model` bump-appends at `occ_used`; when it would pass
670    /// `occ_cap` the buffer is grown (with slack) and rebuilt from the
671    /// registry. (`colors`/`dirs` track theirs in [`ColorsAllocator`].)
672    occ_used: u32,
673    occ_cap: u32,
674    /// Used / allocated words of the tightly-concatenated `color_offsets`
675    /// buffer — same growth scheme as `occ_*`.
676    coloff_used: u32,
677    coloff_cap: u32,
678    /// Allocated record count of the `model_meta` buffer; `add_model`
679    /// grows it (with slack) when the entry count passes it.
680    meta_cap: u32,
681    /// Per-entry tombstone: `true` once its model was removed
682    /// ([`Self::remove_model`]). Dead entries keep their `meta` slot (so
683    /// entry ids — and the caller's `chain_id`s — stay stable) but their
684    /// colours are freed for reuse and they contribute nothing to a
685    /// repack / [`Self::compact`]. Parallel to `meta`.
686    dead: Vec<bool>,
687}
688
689/// Which tightly-concatenated registry buffer [`SpriteRegistryResident::
690/// sync_concat`] is operating on.
691#[derive(Clone, Copy)]
692enum ConcatBuf {
693    Occupancy,
694    ColorOffsets,
695}
696
697/// The model's source array for a given [`ConcatBuf`] — a free fn (not a
698/// closure) so the returned borrow keeps `m`'s lifetime.
699fn concat_data(m: &SpriteModel, which: ConcatBuf) -> &[u32] {
700    match which {
701        ConcatBuf::Occupancy => &m.occupancy,
702        ConcatBuf::ColorOffsets => &m.color_offsets,
703    }
704}
705
706impl SpriteRegistryResident {
707    /// Concatenate `registry`'s models into shared buffers and prepare
708    /// `instances` for per-frame culling. Model-relative indices stay
709    /// as built; the shader adds each model's base offset from the
710    /// metadata table.
711    #[must_use]
712    pub fn upload(
713        device: &wgpu::Device,
714        registry: &SpriteModelRegistry,
715        instances: &[SpriteInstance],
716    ) -> Self {
717        // `occupancy` + `color_offsets` are dims-fixed → tightly
718        // concatenated (never grow on a carve). `colors` + `dirs` are
719        // variable → laid out by the suballocator with per-slot slack so
720        // an incremental edit can rewrite one model in place.
721        let entry_lens: Vec<u32> = registry
722            .entries
723            .iter()
724            .map(|m| m.colors.len() as u32)
725            .collect();
726        let colors_alloc = ColorsAllocator::new(&entry_lens);
727        let cap_total = colors_alloc.cap_total();
728
729        let mut all_occ: Vec<u32> = Vec::new();
730        let mut all_offsets: Vec<u32> = Vec::new();
731        let mut all_colors: Vec<u32> = vec![0; cap_total as usize];
732        let mut all_dirs: Vec<u32> = vec![0; cap_total as usize];
733        let mut meta: Vec<SpriteModelMeta> = Vec::with_capacity(registry.entries.len());
734        let mut occ_lens: Vec<u32> = Vec::with_capacity(registry.entries.len());
735        let mut coloff_lens: Vec<u32> = Vec::with_capacity(registry.entries.len());
736
737        // One meta + placed data per concrete (mip-level) entry.
738        for (e, m) in registry.entries.iter().enumerate() {
739            let slot = colors_alloc.slot(e);
740            meta.push(SpriteModelMeta {
741                occupancy_offset: all_occ.len() as u32,
742                colors_offset: slot.off,
743                color_offsets_offset: all_offsets.len() as u32,
744                occ_words_per_col: m.occ_words_per_col,
745                dims: m.dims,
746                _pad0: 0,
747                pivot: m.pivot,
748                voxel_world_size: m.voxel_world_size,
749            });
750            occ_lens.push(m.occupancy.len() as u32);
751            coloff_lens.push(m.color_offsets.len() as u32);
752            all_occ.extend_from_slice(&m.occupancy);
753            all_offsets.extend_from_slice(&m.color_offsets);
754            let off = slot.off as usize;
755            all_colors[off..off + m.colors.len()].copy_from_slice(&m.colors);
756            all_dirs[off..off + m.dirs.len()].copy_from_slice(&m.dirs);
757        }
758
759        // Per-instance cull records: sphere centred at the instance
760        // position, radius from the chain's finest (mip-0) model.
761        // `colmul` starts at identity (unshaded) until the facade sets
762        // per-instance lighting via `set_instance_colmul`.
763        let cull: Vec<CullInstance> = instances.iter().map(|i| make_cull(registry, i)).collect();
764
765        // Capacity buffer (COPY_DST so cull can rewrite it each frame),
766        // seeded with the full set so frame 0 is valid pre-cull.
767        let seed: Vec<SpriteInstanceGpu> = cull.iter().map(|c| c.gpu).collect();
768        let instances_buf = {
769            use wgpu::util::DeviceExt;
770            let one = [SpriteInstanceGpu::zeroed()];
771            let src: &[SpriteInstanceGpu] = if seed.is_empty() { &one } else { &seed };
772            device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
773                label: Some("roxlap-gpu sprite_reg.instances"),
774                contents: bytemuck::cast_slice(src),
775                usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
776            })
777        };
778
779        let tile_ranges = storage_dst_u32(device, "roxlap-gpu sprite_reg.tile_ranges", 1);
780        let tile_instances = storage_dst_u32(device, "roxlap-gpu sprite_reg.tile_instances", 1);
781        // colmul: 256 entries × 2 u32 per visible instance. Sized to the
782        // full instance set (worst case all visible); rewritten per frame.
783        let colmul_cap = (cull.len() as u32).max(1) * 256 * 2;
784        let colmul = storage_dst_u32(device, "roxlap-gpu sprite_reg.colmul", colmul_cap);
785        Self {
786            occupancy: storage_dst_u32_cap(
787                device,
788                "roxlap-gpu sprite_reg.occupancy",
789                &all_occ,
790                all_occ.len() as u32,
791            ),
792            colors: storage_dst_u32_cap(
793                device,
794                "roxlap-gpu sprite_reg.colors",
795                &all_colors,
796                cap_total,
797            ),
798            dirs: storage_dst_u32_cap(device, "roxlap-gpu sprite_reg.dirs", &all_dirs, cap_total),
799            color_offsets: storage_dst_u32_cap(
800                device,
801                "roxlap-gpu sprite_reg.color_offsets",
802                &all_offsets,
803                all_offsets.len() as u32,
804            ),
805            model_meta: storage_dst_pod(device, "roxlap-gpu sprite_reg.model_meta", &meta),
806            instances: instances_buf,
807            instance_capacity: cull.len() as u32,
808            colmul,
809            colmul_cap,
810            tile_ranges,
811            tile_ranges_cap: 1,
812            tile_instances,
813            tile_instances_cap: 1,
814            cull,
815            chains: registry.chains.clone(),
816            occ_used: all_occ.len() as u32,
817            occ_cap: all_occ.len() as u32,
818            coloff_used: all_offsets.len() as u32,
819            coloff_cap: all_offsets.len() as u32,
820            meta_cap: meta.len() as u32,
821            dead: vec![false; meta.len()],
822            meta,
823            colors_alloc,
824            occ_lens,
825            coloff_lens,
826        }
827    }
828
829    /// Number of resident instances (the cull set length).
830    #[must_use]
831    pub fn instance_count(&self) -> usize {
832        self.cull.len()
833    }
834
835    /// Append new instances **without** re-uploading any model volume —
836    /// the incremental counterpart to [`Self::upload`], for streaming
837    /// spawns (asteroids, projectiles, …). Returns the index of the first
838    /// appended instance; the block occupies `[base, base + N)`.
839    ///
840    /// The model volumes are untouched, so every appended instance must
841    /// reference a `model_id` (LOD chain) that was already present in the
842    /// `registry` passed to [`Self::upload`]. Registering a *new* model
843    /// still requires a full [`Self::upload`] (its voxels must be laid
844    /// into the shared buffers). `registry` here is only read for the new
845    /// instances' bound-sphere radii and must be the resident one.
846    ///
847    /// The `instances` GPU buffer is only *grown* here (power-of-two,
848    /// amortised O(1)); its contents are **not** written. [`Self::
849    /// cull_bin_upload`] rewrites the whole visible range from `cull` every
850    /// frame before the sprite pass reads it — exactly as for the static
851    /// instances — so appending only needs to extend `cull` and ensure
852    /// capacity. Writing the buffer here too caused a mid-frame
853    /// write-while-in-flight hazard on some drivers (a stray full-screen
854    /// flash on append). `colmul` likewise grows lazily in
855    /// `cull_bin_upload`. After a removal the capacity is not shrunk.
856    pub fn append_instances(
857        &mut self,
858        device: &wgpu::Device,
859        registry: &SpriteModelRegistry,
860        instances: &[SpriteInstance],
861    ) -> u32 {
862        let base = self.cull.len() as u32;
863        if instances.is_empty() {
864            return base;
865        }
866        for i in instances {
867            debug_assert!(
868                (i.model_id as usize) < self.chains.len(),
869                "append_instances: model_id {} not resident (run upload to register new models)",
870                i.model_id
871            );
872            self.cull.push(make_cull(registry, i));
873        }
874        let need = self.cull.len() as u32;
875        if need > self.instance_capacity {
876            // Grow power-of-two and recreate the buffer (the next frame's
877            // bind group picks up the new handle). No seed write — the
878            // per-frame cull_bin_upload populates it.
879            self.instance_capacity = need.next_power_of_two();
880            self.instances = instances_buffer(device, self.instance_capacity);
881        }
882        base
883    }
884
885    /// Remove the instance at `index` by swap-remove — O(1), no GPU work
886    /// (the next [`Self::cull_bin_upload`] repacks the visible set from
887    /// the shrunk cull list). Capacity is retained for reuse.
888    ///
889    /// Returns `Some(old_last)` when a different instance was moved into
890    /// `index` to fill the hole (its index changed from `old_last` to
891    /// `index` — callers holding instance handles must fix up that one),
892    /// or `None` if `index` was the last element or out of range. Because
893    /// this reorders, any [`Self::set_instance_colmul`] table set by
894    /// position should be re-applied after a removal.
895    pub fn remove_instance(&mut self, index: usize) -> Option<usize> {
896        if index >= self.cull.len() {
897            return None;
898        }
899        let last = self.cull.len() - 1;
900        self.cull.swap_remove(index);
901        (index != last).then_some(last)
902    }
903
904    /// Set the per-instance `kv6colmul[256]` lighting tables (voxlap's
905    /// `update_reflects` output), in the same order/length as the
906    /// instances passed to [`Self::upload`]. The next
907    /// [`Self::cull_bin_upload`] packs the visible subset to the GPU.
908    /// Instances beyond `tables.len()` keep their previous tables.
909    pub fn set_instance_colmul(&mut self, tables: &[[u64; 256]]) {
910        for (ci, t) in self.cull.iter_mut().zip(tables) {
911            ci.colmul.copy_from_slice(t);
912        }
913    }
914
915    /// Refresh instance poses in place from `instances` — for animated
916    /// sprites (e.g. KFA limbs re-posed each frame) — **without** any
917    /// model-volume re-upload. `instances` must match the set passed to
918    /// [`Self::upload`] in length + order; each keeps its `model_id`
919    /// (LOD chain) so only the transform + cull centre change. No GPU
920    /// write happens here: the next [`Self::cull_bin_upload`] re-uploads
921    /// the packed visible subset, as it already does every frame.
922    pub fn update_transforms(&mut self, instances: &[SpriteInstance]) {
923        debug_assert_eq!(
924            instances.len(),
925            self.cull.len(),
926            "update_transforms instance count must match upload"
927        );
928        for (ci, inst) in self.cull.iter_mut().zip(instances) {
929            ci.gpu.inv_rot0 = inst.transform.inv_rot[0];
930            ci.gpu.inv_rot1 = inst.transform.inv_rot[1];
931            ci.gpu.inv_rot2 = inst.transform.inv_rot[2];
932            ci.gpu.pos = inst.transform.pos;
933            // Bounding sphere follows the pivot; radius/chain unchanged.
934            ci.center = inst.transform.pos;
935        }
936    }
937
938    /// GPU.12 incremental — re-upload only the entries of LOD chain
939    /// `chain_id` after an in-place edit (carve / recolour) of its model,
940    /// **without** rebuilding the whole registry. `registry` must be the
941    /// same registry uploaded (same entry ids), with chain `chain_id`'s
942    /// entries already edited (`model_mut` + `rebuild_lod`).
943    ///
944    /// For each entry: occupancy + color_offsets are dims-fixed, so they
945    /// are written in place; colors + dirs (variable, parallel) go through
946    /// the suballocator — written in place when they fit the slack,
947    /// relocated (with a `model_meta` rewrite) when they outgrow it, and
948    /// only when the buffer tail overflows are colors/dirs grown + the
949    /// whole registry repacked. Instances / cull / colmul are untouched
950    /// (a carve never moves an instance or grows its bounds) — that is the
951    /// win over [`Self::upload`].
952    ///
953    /// # Panics (debug)
954    /// If an entry's dims changed (occupancy / color_offsets length), which
955    /// the in-place path can't absorb — growing dims needs a full
956    /// re-upload via [`Self::upload`].
957    pub fn update_model(
958        &mut self,
959        device: &wgpu::Device,
960        queue: &wgpu::Queue,
961        registry: &SpriteModelRegistry,
962        chain_id: u32,
963    ) {
964        let entries = self.chains[chain_id as usize].clone();
965        let mut grew = false;
966        for &e in &entries {
967            let e = e as usize;
968            let m = &registry.entries[e];
969
970            // Dims-fixed arrays: assert unchanged, then write in place.
971            debug_assert_eq!(
972                m.occupancy.len() as u32,
973                self.occ_lens[e],
974                "update_model: entry {e} occupancy length changed (dims grew?)"
975            );
976            debug_assert_eq!(
977                m.color_offsets.len() as u32,
978                self.coloff_lens[e],
979                "update_model: entry {e} color_offsets length changed (dims grew?)"
980            );
981            queue.write_buffer(
982                &self.occupancy,
983                u64::from(self.meta[e].occupancy_offset) * 4,
984                bytemuck::cast_slice(&m.occupancy),
985            );
986            queue.write_buffer(
987                &self.color_offsets,
988                u64::from(self.meta[e].color_offsets_offset) * 4,
989                bytemuck::cast_slice(&m.color_offsets),
990            );
991
992            // Variable colors/dirs via the suballocator.
993            let new_len = m.colors.len() as u32;
994            match self.colors_alloc.place(e, new_len) {
995                Some(off) => {
996                    queue.write_buffer(
997                        &self.colors,
998                        u64::from(off) * 4,
999                        bytemuck::cast_slice(&m.colors),
1000                    );
1001                    queue.write_buffer(
1002                        &self.dirs,
1003                        u64::from(off) * 4,
1004                        bytemuck::cast_slice(&m.dirs),
1005                    );
1006                    if self.meta[e].colors_offset != off {
1007                        // Relocated — rewrite this entry's meta record.
1008                        self.meta[e].colors_offset = off;
1009                        queue.write_buffer(
1010                            &self.model_meta,
1011                            (e * std::mem::size_of::<SpriteModelMeta>()) as u64,
1012                            bytemuck::bytes_of(&self.meta[e]),
1013                        );
1014                    }
1015                }
1016                None => grew = true,
1017            }
1018        }
1019
1020        // Buffer overflow on at least one entry → grow colors/dirs and
1021        // repack the WHOLE registry (rare; offsets for every entry move).
1022        if grew {
1023            self.grow_and_repack(device, queue, registry);
1024        }
1025    }
1026
1027    /// Grow the `colors`/`dirs` buffers and repack every entry compactly
1028    /// (with fresh slack) when an [`Self::update_model`] edit overflowed
1029    /// the buffer tail. Recreates both buffers (the next frame's bind
1030    /// group picks up the new handles) and rewrites every `model_meta`
1031    /// `colors_offset`. O(registry) but rare — logged so a growth burst
1032    /// is visible.
1033    fn grow_and_repack(
1034        &mut self,
1035        device: &wgpu::Device,
1036        queue: &wgpu::Queue,
1037        registry: &SpriteModelRegistry,
1038    ) {
1039        self.repack_colors_dirs(device, registry);
1040        // Every entry's colors_offset moved → rewrite the whole meta table.
1041        queue.write_buffer(&self.model_meta, 0, bytemuck::cast_slice(&self.meta));
1042    }
1043
1044    /// Repack `colors`/`dirs` compactly (with fresh slack) from the full
1045    /// `registry`, recreating both buffers and updating every CPU
1046    /// `meta[e].colors_offset`. Does **not** touch the GPU `model_meta`
1047    /// buffer — the caller writes it ([`Self::grow_and_repack`] writes the
1048    /// whole table; [`Self::add_model`] writes it once after all entries
1049    /// are placed). O(registry) but rare — logged so a growth burst is
1050    /// visible.
1051    fn repack_colors_dirs(&mut self, device: &wgpu::Device, registry: &SpriteModelRegistry) {
1052        // Dead (removed) entries collapse to 0 length so they reclaim no
1053        // space; live entries keep their colours.
1054        let new_lens: Vec<u32> = registry
1055            .entries
1056            .iter()
1057            .enumerate()
1058            .map(|(e, m)| {
1059                if self.dead[e] {
1060                    0
1061                } else {
1062                    m.colors.len() as u32
1063                }
1064            })
1065            .collect();
1066        self.colors_alloc.repack(&new_lens);
1067        let cap_total = self.colors_alloc.cap_total();
1068
1069        let mut all_colors = vec![0u32; cap_total as usize];
1070        let mut all_dirs = vec![0u32; cap_total as usize];
1071        for (e, m) in registry.entries.iter().enumerate() {
1072            if self.dead[e] {
1073                self.meta[e].colors_offset = 0;
1074                continue;
1075            }
1076            let off = self.colors_alloc.slot(e).off as usize;
1077            all_colors[off..off + m.colors.len()].copy_from_slice(&m.colors);
1078            all_dirs[off..off + m.dirs.len()].copy_from_slice(&m.dirs);
1079            self.meta[e].colors_offset = off as u32;
1080        }
1081        self.colors = storage_dst_u32_cap(
1082            device,
1083            "roxlap-gpu sprite_reg.colors",
1084            &all_colors,
1085            cap_total,
1086        );
1087        self.dirs = storage_dst_u32_cap(device, "roxlap-gpu sprite_reg.dirs", &all_dirs, cap_total);
1088        eprintln!("roxlap-gpu: sprite registry colors/dirs grew + repacked to {cap_total} words");
1089    }
1090
1091    /// Append a new model (its full LOD chain) to the resident registry
1092    /// **without** re-uploading the existing models' volumes — the
1093    /// incremental counterpart to a full [`Self::upload`], for streaming
1094    /// in new geometry (unique asteroids, generated meshes).
1095    ///
1096    /// Contract (mirrors [`Self::update_model`]): the caller owns the
1097    /// `SpriteModelRegistry`, has just appended this chain to it (e.g. via
1098    /// [`SpriteModelRegistry::add_lod`]), and passes the resulting
1099    /// `chain_id`. The chain's entries must be the registry's newest (ids
1100    /// `>= ` the resident entry count) — entries are append-only.
1101    ///
1102    /// The large `colors`/`dirs`/`occupancy`/`color_offsets` buffers carry
1103    /// slack and bump-append the new entries in place; a buffer that
1104    /// overflows is grown (with slack) and rebuilt once from the registry
1105    /// (amortised O(1) per add). The small `model_meta` table is rewritten
1106    /// each call. After this, [`Self::append_instances`] can reference the
1107    /// new `chain_id`.
1108    pub fn add_model(
1109        &mut self,
1110        device: &wgpu::Device,
1111        queue: &wgpu::Queue,
1112        registry: &SpriteModelRegistry,
1113        chain_id: u32,
1114    ) {
1115        let entries = registry.chains[chain_id as usize].clone();
1116        debug_assert_eq!(
1117            chain_id as usize,
1118            self.chains.len(),
1119            "add_model: chains must be appended in order"
1120        );
1121
1122        // CPU bookkeeping: assign each new entry a tight occ/coloff offset
1123        // and an allocator slot for colors/dirs. `need_colors_grow` marks
1124        // a slot that didn't fit → a colors/dirs repack below.
1125        let mut need_colors_grow = false;
1126        for &e in &entries {
1127            let e = e as usize;
1128            debug_assert_eq!(
1129                e,
1130                self.meta.len(),
1131                "add_model: entries must be appended in order"
1132            );
1133            let m = &registry.entries[e];
1134            let occ_off = self.occ_used;
1135            let coloff_off = self.coloff_used;
1136            self.occ_used += m.occupancy.len() as u32;
1137            self.coloff_used += m.color_offsets.len() as u32;
1138            let colors_off = match self.colors_alloc.push(m.colors.len() as u32) {
1139                Some(off) => off,
1140                None => {
1141                    need_colors_grow = true;
1142                    0 // placeholder; repack assigns the real offset
1143                }
1144            };
1145            self.meta.push(SpriteModelMeta {
1146                occupancy_offset: occ_off,
1147                colors_offset: colors_off,
1148                color_offsets_offset: coloff_off,
1149                occ_words_per_col: m.occ_words_per_col,
1150                dims: m.dims,
1151                _pad0: 0,
1152                pivot: m.pivot,
1153                voxel_world_size: m.voxel_world_size,
1154            });
1155            self.occ_lens.push(m.occupancy.len() as u32);
1156            self.coloff_lens.push(m.color_offsets.len() as u32);
1157            self.dead.push(false);
1158        }
1159        self.chains.push(entries.clone());
1160
1161        // occupancy + color_offsets: grow+rebuild on overflow, else write
1162        // the new tails in place.
1163        self.sync_concat(device, queue, registry, &entries, ConcatBuf::Occupancy);
1164        self.sync_concat(device, queue, registry, &entries, ConcatBuf::ColorOffsets);
1165
1166        // colors/dirs: repack on overflow (rebuilds both + every CPU
1167        // colors_offset), else write the new entries at their slots.
1168        if need_colors_grow {
1169            self.repack_colors_dirs(device, registry);
1170        } else {
1171            for &e in &entries {
1172                let e = e as usize;
1173                let m = &registry.entries[e];
1174                let off = u64::from(self.meta[e].colors_offset) * 4;
1175                queue.write_buffer(&self.colors, off, bytemuck::cast_slice(&m.colors));
1176                queue.write_buffer(&self.dirs, off, bytemuck::cast_slice(&m.dirs));
1177            }
1178        }
1179
1180        // model_meta: grow the record buffer if needed, then rewrite the
1181        // whole (small) table — covers both new records and any
1182        // colors_offset relocations from a repack.
1183        let count = self.meta.len() as u32;
1184        if count > self.meta_cap {
1185            self.meta_cap = grow_records(count);
1186            self.model_meta = storage_dst_pod_cap(
1187                device,
1188                "roxlap-gpu sprite_reg.model_meta",
1189                &self.meta,
1190                self.meta_cap,
1191            );
1192        } else {
1193            queue.write_buffer(&self.model_meta, 0, bytemuck::cast_slice(&self.meta));
1194        }
1195    }
1196
1197    /// Sync one tightly-concatenated buffer (`occupancy` or
1198    /// `color_offsets`) after `add_model` appended `new_entries`: if the
1199    /// used length now exceeds capacity, grow (with slack) and rebuild the
1200    /// whole buffer from the registry; otherwise write just the appended
1201    /// tails at their offsets.
1202    fn sync_concat(
1203        &mut self,
1204        device: &wgpu::Device,
1205        queue: &wgpu::Queue,
1206        registry: &SpriteModelRegistry,
1207        new_entries: &[u32],
1208        which: ConcatBuf,
1209    ) {
1210        let (used, cap) = match which {
1211            ConcatBuf::Occupancy => (self.occ_used, self.occ_cap),
1212            ConcatBuf::ColorOffsets => (self.coloff_used, self.coloff_cap),
1213        };
1214        if used > cap {
1215            let new_cap = grow_words(used);
1216            let all: Vec<u32> = registry
1217                .entries
1218                .iter()
1219                .flat_map(|m| concat_data(m, which).iter().copied())
1220                .collect();
1221            let label = match which {
1222                ConcatBuf::Occupancy => "roxlap-gpu sprite_reg.occupancy",
1223                ConcatBuf::ColorOffsets => "roxlap-gpu sprite_reg.color_offsets",
1224            };
1225            let buf = storage_dst_u32_cap(device, label, &all, new_cap);
1226            match which {
1227                ConcatBuf::Occupancy => {
1228                    self.occupancy = buf;
1229                    self.occ_cap = new_cap;
1230                }
1231                ConcatBuf::ColorOffsets => {
1232                    self.color_offsets = buf;
1233                    self.coloff_cap = new_cap;
1234                }
1235            }
1236        } else {
1237            let target = match which {
1238                ConcatBuf::Occupancy => &self.occupancy,
1239                ConcatBuf::ColorOffsets => &self.color_offsets,
1240            };
1241            for &e in new_entries {
1242                let e = e as usize;
1243                let off = match which {
1244                    ConcatBuf::Occupancy => self.meta[e].occupancy_offset,
1245                    ConcatBuf::ColorOffsets => self.meta[e].color_offsets_offset,
1246                };
1247                queue.write_buffer(
1248                    target,
1249                    u64::from(off) * 4,
1250                    bytemuck::cast_slice(concat_data(&registry.entries[e], which)),
1251                );
1252            }
1253        }
1254    }
1255
1256    /// Number of removed-but-not-yet-compacted models (tombstoned chains).
1257    /// A caller streams `add_model` / `remove_model` and calls
1258    /// [`Self::compact`] once this (relative to [`Self::live_model_count`])
1259    /// crosses a threshold.
1260    #[must_use]
1261    pub fn dead_model_count(&self) -> usize {
1262        self.chains.iter().filter(|c| c.is_empty()).count()
1263    }
1264
1265    /// Number of live (non-removed) models.
1266    #[must_use]
1267    pub fn live_model_count(&self) -> usize {
1268        self.chains.iter().filter(|c| !c.is_empty()).count()
1269    }
1270
1271    /// Remove a model (tombstone its LOD chain) — the counterpart to
1272    /// [`Self::add_model`]. O(chain length): marks the chain's entries
1273    /// dead and frees their `colors`/`dirs` slots for reuse by a later
1274    /// `add_model`. The `occupancy` / `color_offsets` holes are **not**
1275    /// reclaimed until [`Self::compact`]; entry ids (and the caller's other
1276    /// `chain_id`s) stay stable.
1277    ///
1278    /// Instances of the removed chain are **not** dropped here — they
1279    /// linger in the cull set but draw as nothing (skipped in
1280    /// [`Self::cull_bin_upload`]); the caller removes them via
1281    /// [`Self::remove_instance`] when convenient. A no-op if `chain_id` is
1282    /// out of range or already removed.
1283    pub fn remove_model(&mut self, chain_id: u32) {
1284        let Some(entries) = self.chains.get(chain_id as usize).cloned() else {
1285            return;
1286        };
1287        if entries.is_empty() {
1288            return; // already removed
1289        }
1290        for &e in &entries {
1291            let e = e as usize;
1292            self.dead[e] = true;
1293            self.colors_alloc.free(e);
1294        }
1295        self.chains[chain_id as usize] = Vec::new(); // tombstone
1296    }
1297
1298    /// Reclaim the holes left by [`Self::remove_model`]: rebuild the shared
1299    /// volume buffers from the live entries only, dropping every dead
1300    /// entry's data. Entry ids and `chain_id`s are preserved (dead entries
1301    /// keep a zero-length `meta` tombstone), so the caller's handles stay
1302    /// valid and no remap is needed.
1303    ///
1304    /// `registry` must be the resident one (entry ids 1:1, as for
1305    /// [`Self::add_model`] / [`Self::update_model`]). O(live volume) —
1306    /// call it when [`Self::dead_model_count`] is high, not every frame.
1307    pub fn compact(
1308        &mut self,
1309        device: &wgpu::Device,
1310        queue: &wgpu::Queue,
1311        registry: &SpriteModelRegistry,
1312    ) {
1313        // occupancy + color_offsets: re-pack live entries tightly, rewrite
1314        // each live entry's meta offset, zero the dead ones.
1315        self.compact_concat(device, registry, ConcatBuf::Occupancy);
1316        self.compact_concat(device, registry, ConcatBuf::ColorOffsets);
1317        // colors/dirs: the dead-aware repack already drops dead entries.
1318        self.repack_colors_dirs(device, registry);
1319        // model_meta: rewrite the (unchanged-length) table with the new
1320        // offsets. Buffer count didn't change, so no grow needed.
1321        queue.write_buffer(&self.model_meta, 0, bytemuck::cast_slice(&self.meta));
1322    }
1323
1324    /// Rebuild one tightly-concatenated buffer from live entries only
1325    /// (used by [`Self::compact`]): assign each live entry a fresh tight
1326    /// offset, zero dead entries' offset, and recreate the buffer with
1327    /// slack.
1328    fn compact_concat(
1329        &mut self,
1330        device: &wgpu::Device,
1331        registry: &SpriteModelRegistry,
1332        which: ConcatBuf,
1333    ) {
1334        let mut all: Vec<u32> = Vec::new();
1335        for e in 0..self.meta.len() {
1336            if self.dead[e] {
1337                match which {
1338                    ConcatBuf::Occupancy => self.meta[e].occupancy_offset = 0,
1339                    ConcatBuf::ColorOffsets => self.meta[e].color_offsets_offset = 0,
1340                }
1341                continue;
1342            }
1343            let off = all.len() as u32;
1344            match which {
1345                ConcatBuf::Occupancy => self.meta[e].occupancy_offset = off,
1346                ConcatBuf::ColorOffsets => self.meta[e].color_offsets_offset = off,
1347            }
1348            all.extend_from_slice(concat_data(&registry.entries[e], which));
1349        }
1350        let used = all.len() as u32;
1351        let cap = grow_words(used);
1352        let (label, buf) = match which {
1353            ConcatBuf::Occupancy => ("roxlap-gpu sprite_reg.occupancy", &mut self.occupancy),
1354            ConcatBuf::ColorOffsets => (
1355                "roxlap-gpu sprite_reg.color_offsets",
1356                &mut self.color_offsets,
1357            ),
1358        };
1359        *buf = storage_dst_u32_cap(device, label, &all, cap);
1360        match which {
1361            ConcatBuf::Occupancy => {
1362                self.occ_used = used;
1363                self.occ_cap = cap;
1364            }
1365            ConcatBuf::ColorOffsets => {
1366                self.coloff_used = used;
1367                self.coloff_cap = cap;
1368            }
1369        }
1370    }
1371
1372    /// GPU.10.3 — frustum-cull, pack the visible subset into the
1373    /// instance buffer, then bin those instances into screen tiles:
1374    /// project each visible bounding sphere to a screen AABB and append
1375    /// its (visible) index to every overlapped tile. Uploads the
1376    /// instance buffer + `tile_ranges` (per-tile offset/count) +
1377    /// `tile_instances` (flat grouped indices), growing the tile
1378    /// buffers as needed. Returns `(visible_count, tiles_x, tiles_y)`.
1379    #[allow(clippy::too_many_arguments)]
1380    pub fn cull_bin_upload(
1381        &mut self,
1382        device: &wgpu::Device,
1383        queue: &wgpu::Queue,
1384        f: &ViewFrustum,
1385        screen_w: u32,
1386        screen_h: u32,
1387        tile_size: u32,
1388        lod_px: f32,
1389    ) -> (u32, u32, u32) {
1390        let tiles_x = screen_w.div_ceil(tile_size).max(1);
1391        let tiles_y = screen_h.div_ceil(tile_size).max(1);
1392        let n_tiles = (tiles_x * tiles_y) as usize;
1393
1394        let nw = (1.0 + f.half_w * f.half_w).sqrt();
1395        let nh = (1.0 + f.half_h * f.half_h).sqrt();
1396        let cx = screen_w as f32 * 0.5;
1397        let cy = screen_h as f32 * 0.5;
1398        let px_per_world = cx / f.half_w; // isotropic: == cy/half_h
1399        let ts = tile_size as f32;
1400        let tx_max = tiles_x as i32 - 1;
1401        let ty_max = tiles_y as i32 - 1;
1402
1403        let mut visible: Vec<SpriteInstanceGpu> = Vec::with_capacity(self.cull.len());
1404        // Per-visible tile AABB (tx0, tx1, ty0, ty1) for the bin pass.
1405        let mut boxes: Vec<[i32; 4]> = Vec::with_capacity(self.cull.len());
1406        // Per-visible kv6colmul tables, flattened to two u32 per u64
1407        // entry (lanes 0|1, then 2|3), packed in visible order so the
1408        // shader indexes `colmul[inst_idx*512 + dir*2 + {0,1}]`.
1409        let mut visible_colmul: Vec<u32> = Vec::with_capacity(self.cull.len() * 512);
1410        let mut counts = vec![0u32; n_tiles];
1411
1412        for ci in &self.cull {
1413            // Skip instances of a removed model (tombstoned chain) — they
1414            // linger in `cull` until the caller drops them, but draw as
1415            // nothing.
1416            if self.chains[ci.chain_id as usize].is_empty() {
1417                continue;
1418            }
1419            let rel = [
1420                ci.center[0] - f.pos[0],
1421                ci.center[1] - f.pos[1],
1422                ci.center[2] - f.pos[2],
1423            ];
1424            let z = dot3(rel, f.forward);
1425            let r = ci.radius;
1426            if z + r < 0.0 || z - r > f.far {
1427                continue; // behind / beyond far
1428            }
1429            let x = dot3(rel, f.right);
1430            if (x - f.half_w * z) > r * nw || (-x - f.half_w * z) > r * nw {
1431                continue; // right / left
1432            }
1433            let y = dot3(rel, f.down);
1434            if (y - f.half_h * z) > r * nh || (-y - f.half_h * z) > r * nh {
1435                continue; // bottom / top
1436            }
1437
1438            // Visible: project the sphere to a screen AABB → tile range.
1439            let (tx0, tx1, ty0, ty1) = if z > 1e-3 {
1440                let sx = cx + (x / z) * px_per_world;
1441                let sy = cy + (y / z) * px_per_world;
1442                let sr = (r / z) * px_per_world;
1443                (
1444                    (((sx - sr) / ts).floor() as i32).clamp(0, tx_max),
1445                    (((sx + sr) / ts).floor() as i32).clamp(0, tx_max),
1446                    (((sy - sr) / ts).floor() as i32).clamp(0, ty_max),
1447                    (((sy + sr) / ts).floor() as i32).clamp(0, ty_max),
1448                )
1449            } else {
1450                (0, tx_max, 0, ty_max)
1451            };
1452            // GPU.10.4 — pick the LOD level by projected voxel size:
1453            // choose the coarsest level whose voxel still covers at
1454            // least `lod_px` screen pixels, i.e. step up once a mip-0
1455            // voxel would be smaller than that. `lod_px = 1` is the
1456            // natural "don't go sub-pixel" threshold; larger values
1457            // force LOD in closer (tuning/inspection).
1458            let chain = &self.chains[ci.chain_id as usize];
1459            let level = if z > 1e-3 && chain.len() > 1 {
1460                let voxel_px = px_per_world / z; // mip-0 voxel screen size
1461                ((lod_px / voxel_px).log2().ceil().max(0.0) as usize).min(chain.len() - 1)
1462            } else {
1463                0
1464            };
1465            let mut g = ci.gpu;
1466            g.model_id = chain[level];
1467            visible.push(g);
1468            boxes.push([tx0, tx1, ty0, ty1]);
1469            for &w in ci.colmul.iter() {
1470                visible_colmul.push((w & 0xffff_ffff) as u32);
1471                visible_colmul.push((w >> 32) as u32);
1472            }
1473            for ty in ty0..=ty1 {
1474                for tx in tx0..=tx1 {
1475                    counts[(ty * tiles_x as i32 + tx) as usize] += 1;
1476                }
1477            }
1478        }
1479
1480        if visible.is_empty() {
1481            return (0, tiles_x, tiles_y);
1482        }
1483
1484        // Prefix-sum counts → per-tile offsets; build the flat grouped
1485        // index list.
1486        let mut tile_ranges = vec![0u32; n_tiles * 2];
1487        let mut running = 0u32;
1488        for t in 0..n_tiles {
1489            tile_ranges[2 * t] = running; // offset
1490            tile_ranges[2 * t + 1] = counts[t]; // count
1491            running += counts[t];
1492        }
1493        let total = running as usize;
1494        let mut tile_instances = vec![0u32; total.max(1)];
1495        let mut cursor: Vec<u32> = (0..n_tiles).map(|t| tile_ranges[2 * t]).collect();
1496        for (vis_idx, b) in boxes.iter().enumerate() {
1497            for ty in b[2]..=b[3] {
1498                for tx in b[0]..=b[1] {
1499                    let t = (ty * tiles_x as i32 + tx) as usize;
1500                    tile_instances[cursor[t] as usize] = vis_idx as u32;
1501                    cursor[t] += 1;
1502                }
1503            }
1504        }
1505
1506        // Upload: instances + (grown) tile buffers. Grow a tile buffer
1507        // only when this frame needs more than its capacity (wgpu has
1508        // no Clone on Buffer, so we replace the field in place).
1509        queue.write_buffer(&self.instances, 0, bytemuck::cast_slice(&visible));
1510        let need_ranges = tile_ranges.len() as u32;
1511        if need_ranges > self.tile_ranges_cap {
1512            self.tile_ranges_cap = need_ranges.next_power_of_two();
1513            self.tile_ranges = storage_dst_u32(
1514                device,
1515                "roxlap-gpu sprite_reg.tile_ranges",
1516                self.tile_ranges_cap,
1517            );
1518        }
1519        let need_inst = tile_instances.len() as u32;
1520        if need_inst > self.tile_instances_cap {
1521            self.tile_instances_cap = need_inst.next_power_of_two();
1522            self.tile_instances = storage_dst_u32(
1523                device,
1524                "roxlap-gpu sprite_reg.tile_instances",
1525                self.tile_instances_cap,
1526            );
1527        }
1528        queue.write_buffer(&self.tile_ranges, 0, bytemuck::cast_slice(&tile_ranges));
1529        queue.write_buffer(
1530            &self.tile_instances,
1531            0,
1532            bytemuck::cast_slice(&tile_instances),
1533        );
1534        let need_colmul = visible_colmul.len() as u32;
1535        if need_colmul > self.colmul_cap {
1536            self.colmul_cap = need_colmul.next_power_of_two();
1537            self.colmul = storage_dst_u32(device, "roxlap-gpu sprite_reg.colmul", self.colmul_cap);
1538        }
1539        queue.write_buffer(&self.colmul, 0, bytemuck::cast_slice(&visible_colmul));
1540
1541        (visible.len() as u32, tiles_x, tiles_y)
1542    }
1543}
1544
1545/// GPU.12 incremental — per-entry placement of one model's `colors`
1546/// (and the parallel `dirs`) within the shared registry buffers: a
1547/// `[off, off+cap)` word window holding `len` live words. `cap >= len`
1548/// gives slack so a carve that *grows* the surface-voxel count can be
1549/// rewritten in place without relocating.
1550#[derive(Clone, Copy, Debug, PartialEq, Eq)]
1551struct ColorSlot {
1552    off: u32,
1553    cap: u32,
1554    len: u32,
1555}
1556
1557/// First-fit suballocator over the parallel `colors`/`dirs` buffers
1558/// (same offsets/ranks → one allocator drives both). Each registry
1559/// entry owns a [`ColorSlot`]; growth past a slot's `cap` relocates it
1560/// (freeing the old block) via the free list or a bump tail, and only
1561/// when the tail would exceed `cap_total` does the caller grow + repack
1562/// the whole buffer. Pure (no GPU) so it unit-tests on its own.
1563#[derive(Debug, Default)]
1564struct ColorsAllocator {
1565    /// Per-entry slot, indexed by entry id.
1566    slots: Vec<ColorSlot>,
1567    /// Freed `(off, cap)` blocks available for first-fit reuse.
1568    free: Vec<(u32, u32)>,
1569    /// Next bump-allocation position (words).
1570    tail: u32,
1571    /// Total buffer capacity in words.
1572    cap_total: u32,
1573}
1574
1575/// Slack-padded capacity for a `len`-word array: +25% + 16 words, so a
1576/// few extra surface voxels from a carve fit without relocating.
1577fn slot_cap(len: u32) -> u32 {
1578    len + len / 4 + 16
1579}
1580
1581/// Slack capacity (words) for a grown concatenated buffer: +50% + 256, so
1582/// a burst of `add_model` calls bump-appends rather than re-growing every
1583/// time. Matches [`ColorsAllocator`]'s `cap_total` headroom.
1584fn grow_words(used: u32) -> u32 {
1585    used + used / 2 + 256
1586}
1587
1588/// Slack capacity (records) for a grown `model_meta` buffer: +50% + 8.
1589fn grow_records(count: u32) -> u32 {
1590    count + count / 2 + 8
1591}
1592
1593impl ColorsAllocator {
1594    /// Lay every entry out contiguously (with per-slot slack) and add a
1595    /// global tail headroom so early growth bump-allocates rather than
1596    /// repacks.
1597    fn new(entry_lens: &[u32]) -> Self {
1598        let mut a = Self::default();
1599        a.repack(entry_lens);
1600        a
1601    }
1602
1603    fn slot(&self, entry: usize) -> ColorSlot {
1604        self.slots[entry]
1605    }
1606
1607    fn cap_total(&self) -> u32 {
1608        self.cap_total
1609    }
1610
1611    /// Repack ALL entries compactly to fit `new_lens`, resetting the
1612    /// free list + tail and choosing a fresh `cap_total` with headroom.
1613    /// Used at initial build and on a buffer grow.
1614    fn repack(&mut self, new_lens: &[u32]) {
1615        self.free.clear();
1616        let mut off = 0u32;
1617        let mut slots = Vec::with_capacity(new_lens.len());
1618        for &len in new_lens {
1619            // A 0-length (dead / removed) entry takes no space — keeps a
1620            // tombstone slot so entry ids stay positional.
1621            let cap = if len == 0 { 0 } else { slot_cap(len) };
1622            slots.push(ColorSlot { off, cap, len });
1623            off += cap;
1624        }
1625        self.slots = slots;
1626        self.tail = off;
1627        // Global headroom: +50% + 256 words.
1628        self.cap_total = off + off / 2 + 256;
1629    }
1630
1631    /// Place `new_len` words for `entry`. Returns `Some(off)` with the
1632    /// (possibly relocated) slot offset, or `None` if the buffer must
1633    /// grow + repack. On relocation the old block is pushed to the free
1634    /// list; an in-place fit returns the unchanged offset.
1635    fn place(&mut self, entry: usize, new_len: u32) -> Option<u32> {
1636        let cur = self.slots[entry];
1637        if new_len <= cur.cap {
1638            self.slots[entry] = ColorSlot {
1639                len: new_len,
1640                ..cur
1641            };
1642            return Some(cur.off);
1643        }
1644        let old = (cur.off, cur.cap);
1645        // First-fit a freed block big enough for the live data.
1646        if let Some(i) = self.free.iter().position(|&(_, c)| c >= new_len) {
1647            let (off, cap) = self.free.remove(i);
1648            self.free.push(old);
1649            self.slots[entry] = ColorSlot {
1650                off,
1651                cap,
1652                len: new_len,
1653            };
1654            return Some(off);
1655        }
1656        // Bump the tail if there's room.
1657        let want = slot_cap(new_len);
1658        if self.tail + want <= self.cap_total {
1659            let off = self.tail;
1660            self.tail += want;
1661            self.free.push(old);
1662            self.slots[entry] = ColorSlot {
1663                off,
1664                cap: want,
1665                len: new_len,
1666            };
1667            return Some(off);
1668        }
1669        None
1670    }
1671
1672    /// Append a slot for a brand-new entry of `new_len` words (used by
1673    /// [`SpriteRegistryResident::add_model`]). Returns `Some(off)` placed
1674    /// via the free list or the bump tail, or `None` if the buffer must
1675    /// grow + repack — in which case **no** slot is pushed (the caller's
1676    /// repack rebuilds every slot from scratch).
1677    fn push(&mut self, new_len: u32) -> Option<u32> {
1678        if let Some(i) = self.free.iter().position(|&(_, c)| c >= new_len) {
1679            let (off, cap) = self.free.remove(i);
1680            self.slots.push(ColorSlot {
1681                off,
1682                cap,
1683                len: new_len,
1684            });
1685            return Some(off);
1686        }
1687        let want = slot_cap(new_len);
1688        if self.tail + want <= self.cap_total {
1689            let off = self.tail;
1690            self.tail += want;
1691            self.slots.push(ColorSlot {
1692                off,
1693                cap: want,
1694                len: new_len,
1695            });
1696            return Some(off);
1697        }
1698        None
1699    }
1700
1701    /// Free `entry`'s slot back to the pool ([`SpriteRegistryResident::
1702    /// remove_model`]). Its `(off, cap)` block joins the free list for
1703    /// first-fit reuse by a later [`Self::push`]; the slot is zeroed so a
1704    /// repack treats it as a 0-length tombstone.
1705    fn free(&mut self, entry: usize) {
1706        let s = self.slots[entry];
1707        if s.cap > 0 {
1708            self.free.push((s.off, s.cap));
1709        }
1710        self.slots[entry] = ColorSlot {
1711            off: 0,
1712            cap: 0,
1713            len: 0,
1714        };
1715    }
1716}
1717
1718/// Create a STORAGE buffer of u32s; pads empty input (wgpu rejects
1719/// zero-sized storage bindings).
1720#[allow(dead_code)]
1721fn storage_u32(device: &wgpu::Device, label: &str, data: &[u32]) -> wgpu::Buffer {
1722    use wgpu::util::DeviceExt;
1723    let bytes: &[u8] = if data.is_empty() {
1724        bytemuck::cast_slice(&[0u32])
1725    } else {
1726        bytemuck::cast_slice(data)
1727    };
1728    device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
1729        label: Some(label),
1730        contents: bytes,
1731        usage: wgpu::BufferUsages::STORAGE,
1732    })
1733}
1734
1735/// Create an uninitialised `STORAGE | COPY_DST` `u32` buffer of `cap`
1736/// words (≥1). Written each frame via `queue.write_buffer`.
1737fn storage_dst_u32(device: &wgpu::Device, label: &str, cap: u32) -> wgpu::Buffer {
1738    device.create_buffer(&wgpu::BufferDescriptor {
1739        label: Some(label),
1740        size: u64::from(cap.max(1)) * 4,
1741        usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
1742        mapped_at_creation: false,
1743    })
1744}
1745
1746/// Create a `STORAGE | COPY_DST` `u32` buffer of `cap` words (≥ data
1747/// length, ≥ 1), initialised with `data` at offset 0 and the tail left
1748/// zeroed. Unlike [`storage_u32`] (STORAGE-only, exact-size) this both
1749/// reserves spare capacity and is `COPY_DST`, so the incremental
1750/// [`SpriteRegistryResident::update_model`] can `write_buffer` a growing
1751/// `colors`/`dirs` array in place. Filled via `mapped_at_creation` so no
1752/// queue is needed at upload time.
1753fn storage_dst_u32_cap(device: &wgpu::Device, label: &str, data: &[u32], cap: u32) -> wgpu::Buffer {
1754    let cap = cap.max(data.len() as u32).max(1);
1755    let buf = device.create_buffer(&wgpu::BufferDescriptor {
1756        label: Some(label),
1757        size: u64::from(cap) * 4,
1758        usage: wgpu::BufferUsages::STORAGE
1759            | wgpu::BufferUsages::COPY_DST
1760            | wgpu::BufferUsages::COPY_SRC,
1761        mapped_at_creation: true,
1762    });
1763    if !data.is_empty() {
1764        buf.slice(..(data.len() as u64 * 4))
1765            .get_mapped_range_mut()
1766            .copy_from_slice(bytemuck::cast_slice(data));
1767    }
1768    buf.unmap();
1769    buf
1770}
1771
1772/// Create a `STORAGE | COPY_DST` buffer of Pod records, exact-size
1773/// (≥ 1, zero-padded), so individual records can be rewritten in place
1774/// by [`SpriteRegistryResident::update_model`] on a relocation. The
1775/// record *count* never changes on an incremental edit (no model is
1776/// added/removed), so no slack is needed here.
1777fn storage_dst_pod<T: Pod + Zeroable>(
1778    device: &wgpu::Device,
1779    label: &str,
1780    data: &[T],
1781) -> wgpu::Buffer {
1782    let one = [T::zeroed()];
1783    let src: &[T] = if data.is_empty() { &one } else { data };
1784    let buf = device.create_buffer(&wgpu::BufferDescriptor {
1785        label: Some(label),
1786        size: std::mem::size_of_val(src) as u64,
1787        usage: wgpu::BufferUsages::STORAGE
1788            | wgpu::BufferUsages::COPY_DST
1789            | wgpu::BufferUsages::COPY_SRC,
1790        mapped_at_creation: true,
1791    });
1792    buf.slice(..)
1793        .get_mapped_range_mut()
1794        .copy_from_slice(bytemuck::cast_slice(src));
1795    buf.unmap();
1796    buf
1797}
1798
1799/// Create a `STORAGE | COPY_DST` Pod buffer holding `cap` records
1800/// (≥ `data.len()`, ≥ 1), initialised with `data` at record 0 and the
1801/// tail zeroed. The slack lets [`SpriteRegistryResident::add_model`] grow
1802/// the `model_meta` table without re-growing on every add.
1803fn storage_dst_pod_cap<T: Pod + Zeroable>(
1804    device: &wgpu::Device,
1805    label: &str,
1806    data: &[T],
1807    cap: u32,
1808) -> wgpu::Buffer {
1809    let rec = std::mem::size_of::<T>() as u64;
1810    let cap = u64::from(cap.max(data.len() as u32).max(1));
1811    let buf = device.create_buffer(&wgpu::BufferDescriptor {
1812        label: Some(label),
1813        size: cap * rec,
1814        usage: wgpu::BufferUsages::STORAGE
1815            | wgpu::BufferUsages::COPY_DST
1816            | wgpu::BufferUsages::COPY_SRC,
1817        mapped_at_creation: true,
1818    });
1819    if !data.is_empty() {
1820        buf.slice(..(data.len() as u64 * rec))
1821            .get_mapped_range_mut()
1822            .copy_from_slice(bytemuck::cast_slice(data));
1823    }
1824    buf.unmap();
1825    buf
1826}
1827
1828/// Create a STORAGE buffer of Pod records; pads empty input with one
1829/// zeroed `T`.
1830#[allow(dead_code)]
1831fn storage_pod<T: Pod + Zeroable>(device: &wgpu::Device, label: &str, data: &[T]) -> wgpu::Buffer {
1832    use wgpu::util::DeviceExt;
1833    let one = [T::zeroed()];
1834    let src: &[T] = if data.is_empty() { &one } else { data };
1835    device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
1836        label: Some(label),
1837        contents: bytemuck::cast_slice(src),
1838        usage: wgpu::BufferUsages::STORAGE,
1839    })
1840}
1841
1842#[cfg(test)]
1843mod tests {
1844    use super::*;
1845    use roxlap_formats::kv6::{Kv6, Voxel};
1846
1847    /// 2×1 kv6: column (0,0) has voxels at z=5 (red) and z=1 (green)
1848    /// stored OUT of z-order; column (1,0) has one voxel at z=3.
1849    fn kv6_unsorted() -> Kv6 {
1850        let mk = |z, col| Voxel {
1851            col,
1852            z,
1853            vis: 0,
1854            dir: 0,
1855        };
1856        Kv6 {
1857            xsiz: 2,
1858            ysiz: 1,
1859            zsiz: 8,
1860            xpiv: 0.0,
1861            ypiv: 0.0,
1862            zpiv: 0.0,
1863            voxels: vec![mk(5, 0xAA), mk(1, 0xBB), mk(3, 0xCC)],
1864            xlen: vec![2, 1],
1865            ylen: vec![vec![2], vec![1]],
1866            palette: None,
1867        }
1868    }
1869
1870    #[test]
1871    fn occupancy_bits_set_at_voxel_z() {
1872        let m = build_sprite_model(&kv6_unsorted());
1873        assert_eq!(m.dims, [2, 1, 8]);
1874        assert_eq!(m.occ_words_per_col, 1); // ceil(8/32)
1875                                            // col 0: bits 1 and 5; col 1: bit 3.
1876        assert_eq!(m.occupancy[0], (1 << 1) | (1 << 5));
1877        assert_eq!(m.occupancy[1], 1 << 3);
1878    }
1879
1880    #[test]
1881    fn colors_are_ascending_z_for_rank_lookup() {
1882        let m = build_sprite_model(&kv6_unsorted());
1883        // col 0 sorted ascending z ⇒ z=1 (green 0xBB) before z=5 (0xAA).
1884        assert_eq!(m.color_offsets, vec![0, 2, 3]);
1885        assert_eq!(&m.colors, &[0xBB, 0xAA, 0xCC]);
1886    }
1887
1888    #[test]
1889    fn identity_basis_inverts_to_identity() {
1890        let inv = mat3_inverse([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]);
1891        assert_eq!(inv, [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]);
1892    }
1893
1894    #[test]
1895    fn fork_is_independent_of_parent() {
1896        let mut reg = SpriteModelRegistry::new();
1897        let base = reg.add(build_sprite_model(&kv6_unsorted()));
1898        let forked = reg.fork(base);
1899        assert_ne!(base, forked);
1900        // Recolour only the fork.
1901        reg.model_mut(forked).recolor(|_| 0x11);
1902        // Parent colours untouched; fork fully overwritten.
1903        assert_eq!(&reg.model(base).colors, &[0xBB, 0xAA, 0xCC]);
1904        assert_eq!(&reg.model(forked).colors, &[0x11, 0x11, 0x11]);
1905    }
1906
1907    #[test]
1908    fn registry_gpu_structs_have_expected_sizes() {
1909        assert_eq!(std::mem::size_of::<SpriteModelMeta>(), 48);
1910        assert_eq!(std::mem::size_of::<SpriteInstanceGpu>(), 64);
1911    }
1912
1913    #[test]
1914    fn add_lod_builds_halving_mip_chain() {
1915        let mut reg = SpriteModelRegistry::new();
1916        // 8×8×8 single voxel-filled column model would be ideal, but
1917        // kv6_unsorted is 2×1×8 → mips: 2×1×8 → 1×1×4 → 1×1×2 → 1×1×1.
1918        let id = reg.add_lod(build_sprite_model(&kv6_unsorted()), 4);
1919        let m0 = reg.model(id);
1920        assert_eq!(m0.dims, [2, 1, 8]);
1921        assert!((m0.voxel_world_size - 1.0).abs() < 1e-6);
1922    }
1923
1924    /// kv6 from explicit voxels, ordered x-major/y-inner to match
1925    /// `build_sprite_model`'s column walk.
1926    fn kv6_from(xsiz: u32, ysiz: u32, zsiz: u32, voxels: &[(u32, u32, u16, u32)]) -> Kv6 {
1927        let mut ylen = vec![vec![0u16; ysiz as usize]; xsiz as usize];
1928        let mut flat = Vec::new();
1929        for x in 0..xsiz {
1930            for y in 0..ysiz {
1931                let mut col: Vec<(u16, u32)> = voxels
1932                    .iter()
1933                    .filter(|(vx, vy, _, _)| *vx == x && *vy == y)
1934                    .map(|(_, _, z, c)| (*z, *c))
1935                    .collect();
1936                col.sort_by_key(|(z, _)| *z);
1937                ylen[x as usize][y as usize] = col.len() as u16;
1938                for (z, c) in col {
1939                    flat.push(Voxel {
1940                        col: c,
1941                        z,
1942                        vis: 0,
1943                        dir: 0,
1944                    });
1945                }
1946            }
1947        }
1948        let xlen = ylen
1949            .iter()
1950            .map(|c| c.iter().map(|&v| u32::from(v)).sum())
1951            .collect();
1952        Kv6 {
1953            xsiz,
1954            ysiz,
1955            zsiz,
1956            xpiv: 0.0,
1957            ypiv: 0.0,
1958            zpiv: 0.0,
1959            voxels: flat,
1960            xlen,
1961            ylen,
1962            palette: None,
1963        }
1964    }
1965
1966    fn offsets_consistent(m: &SpriteModel) -> bool {
1967        let cols = (m.dims[0] * m.dims[1]) as usize;
1968        if m.color_offsets.len() != cols + 1 {
1969            return false;
1970        }
1971        // Monotonic non-decreasing + last == colors.len + each column's
1972        // span == its solid-voxel count.
1973        for w in m.color_offsets.windows(2) {
1974            if w[1] < w[0] {
1975                return false;
1976            }
1977        }
1978        m.color_offsets[cols] as usize == m.colors.len()
1979    }
1980
1981    #[test]
1982    fn carve_two_layers_keeps_offsets_consistent() {
1983        // Mirror the demo's carve: columns with voxels at varied z,
1984        // some sharing z=0/z=1, some not.
1985        let kv6 = kv6_from(
1986            3,
1987            2,
1988            8,
1989            &[
1990                (0, 0, 0, 0xA0),
1991                (0, 0, 1, 0xA1),
1992                (0, 0, 5, 0xA5),
1993                (1, 0, 1, 0xB1),
1994                (2, 1, 0, 0xC0),
1995                (2, 1, 3, 0xC3),
1996            ],
1997        );
1998        let mut m = build_sprite_model(&kv6);
1999        assert!(offsets_consistent(&m));
2000        for z in 0..2u32 {
2001            for y in 0..m.dims[1] {
2002                for x in 0..m.dims[0] {
2003                    m.set_voxel(x, y, z, None);
2004                }
2005            }
2006            assert!(offsets_consistent(&m), "inconsistent after carving z={z}");
2007            // downsample must not panic on the carved model.
2008            let _ = m.downsample();
2009        }
2010    }
2011
2012    #[test]
2013    fn set_voxel_inserts_replaces_and_clears() {
2014        // col 0 starts with z=1 (0xBB), z=5 (0xAA); col 1 with z=3 (0xCC).
2015        let mut m = build_sprite_model(&kv6_unsorted());
2016
2017        // Insert z=3 into col 0 (between z=1 and z=5) → rank 1.
2018        assert!(m.set_voxel(0, 0, 3, Some(0x55)));
2019        assert_eq!(m.occupancy[0], (1 << 1) | (1 << 3) | (1 << 5));
2020        // col 0 colours ascending z: 0xBB(z1), 0x55(z3), 0xAA(z5).
2021        assert_eq!(m.color_offsets, vec![0, 3, 4]);
2022        assert_eq!(&m.colors, &[0xBB, 0x55, 0xAA, 0xCC]);
2023
2024        // Replace z=3 in place (no offset shift).
2025        assert!(m.set_voxel(0, 0, 3, Some(0x66)));
2026        assert_eq!(&m.colors, &[0xBB, 0x66, 0xAA, 0xCC]);
2027        assert_eq!(m.color_offsets, vec![0, 3, 4]);
2028
2029        // Clear z=1 (rank 0) from col 0.
2030        assert!(m.set_voxel(0, 0, 1, None));
2031        assert_eq!(m.occupancy[0], (1 << 3) | (1 << 5));
2032        assert_eq!(m.color_offsets, vec![0, 2, 3]);
2033        assert_eq!(&m.colors, &[0x66, 0xAA, 0xCC]);
2034
2035        // No-ops: clear an empty voxel, edit out of bounds.
2036        assert!(!m.set_voxel(0, 0, 2, None));
2037        assert!(!m.set_voxel(9, 0, 0, Some(1)));
2038    }
2039
2040    #[test]
2041    fn rebuild_lod_refreshes_coarse_levels_from_mip0() {
2042        let mut reg = SpriteModelRegistry::new();
2043        let id = reg.add_lod(build_sprite_model(&kv6_unsorted()), 3);
2044        // Recolour mip-0 only via model_mut, then rebuild the ladder.
2045        reg.model_mut(id).recolor(|_| 0x0000_2000);
2046        reg.rebuild_lod(id);
2047        // The mip-1 average of all-0x2000 voxels is still 0x2000.
2048        let lvl1_entry = reg.chains[id as usize][1] as usize;
2049        assert!(reg.entries[lvl1_entry]
2050            .colors
2051            .iter()
2052            .all(|&c| c == 0x0000_2000));
2053    }
2054
2055    // ---- GPU.12 incremental: colors/dirs suballocator -----------------
2056
2057    /// Every slot fits its data, has slack, doesn't overlap the next, and
2058    /// the buffer reserves tail headroom past the last slot.
2059    fn alloc_invariants(a: &ColorsAllocator, lens: &[u32]) {
2060        let mut prev_end = 0u32;
2061        for (e, &len) in lens.iter().enumerate() {
2062            let s = a.slot(e);
2063            assert_eq!(s.len, len, "slot {e} len");
2064            assert!(s.cap >= s.len, "slot {e} cap >= len");
2065            // In a freshly repacked layout slots are in entry order.
2066            assert!(s.off >= prev_end, "slot {e} overlaps previous");
2067            assert!(s.off + s.cap <= a.cap_total(), "slot {e} past cap_total");
2068            prev_end = s.off + s.cap;
2069        }
2070        assert!(a.cap_total() >= prev_end, "tail headroom");
2071    }
2072
2073    #[test]
2074    fn allocator_new_lays_out_with_slack_and_headroom() {
2075        let lens = [10u32, 0, 64, 7];
2076        let a = ColorsAllocator::new(&lens);
2077        alloc_invariants(&a, &lens);
2078        // Slack: a 64-word slot has cap > 64 so a small carve-grow fits.
2079        assert!(a.slot(2).cap > 64);
2080        // Headroom past the bump tail for early growth.
2081        assert!(a.cap_total() > a.slot(3).off + a.slot(3).cap);
2082    }
2083
2084    #[test]
2085    fn allocator_place_in_place_when_within_cap() {
2086        let mut a = ColorsAllocator::new(&[10, 20]);
2087        let off0 = a.slot(0).off;
2088        let cap0 = a.slot(0).cap;
2089        // Shrink: still the same slot.
2090        assert_eq!(a.place(0, 5), Some(off0));
2091        assert_eq!(a.slot(0).len, 5);
2092        assert_eq!(a.slot(0).cap, cap0);
2093        // Grow within slack: same offset, no relocation.
2094        assert_eq!(a.place(0, cap0), Some(off0));
2095        assert_eq!(a.slot(0).off, off0);
2096        assert!(a.free.is_empty(), "no relocation should free anything");
2097    }
2098
2099    #[test]
2100    fn allocator_place_relocates_to_tail_and_frees_old() {
2101        let mut a = ColorsAllocator::new(&[10, 20]);
2102        let old0 = (a.slot(0).off, a.slot(0).cap);
2103        let tail_before = a.tail;
2104        // Overgrow entry 0 past its cap → relocate to the bump tail.
2105        let new_len = a.slot(0).cap + 5;
2106        let off = a.place(0, new_len).expect("fits in headroom");
2107        assert_eq!(off, tail_before, "relocated to old tail");
2108        assert_eq!(a.slot(0).off, off);
2109        assert_eq!(a.slot(0).len, new_len);
2110        assert!(a.free.contains(&old0), "old slot freed");
2111    }
2112
2113    #[test]
2114    fn allocator_reuses_freed_block_first_fit() {
2115        // Entry 0 has a large slot; entry 1 a tiny one, so growing 1 must
2116        // relocate (it can't fit in place) and lands in 0's freed block.
2117        let mut a = ColorsAllocator::new(&[10, 2]);
2118        let old0 = (a.slot(0).off, a.slot(0).cap);
2119        // Relocate entry 0 to the tail, freeing its original block.
2120        let _ = a.place(0, a.slot(0).cap + 5).unwrap();
2121        assert!(a.free.contains(&old0));
2122        // Grow entry 1 past its (tiny) cap but ≤ the freed block's cap →
2123        // first-fit reuses that block rather than bumping the tail.
2124        let new1 = a.slot(1).cap + 1;
2125        assert!(new1 <= old0.1, "freed block big enough");
2126        let off = a.place(1, new1).expect("reuses freed block");
2127        assert_eq!(off, old0.0, "first-fit reused the freed slot offset");
2128        assert!(!a.free.contains(&old0), "freed block consumed");
2129    }
2130
2131    #[test]
2132    fn allocator_signals_grow_then_repack_restores() {
2133        let mut a = ColorsAllocator::new(&[8, 8]);
2134        // Force overflow: ask for far more than cap_total.
2135        let huge = a.cap_total() + 100;
2136        assert_eq!(a.place(0, huge), None, "overflow must signal grow");
2137        // Repack with the new lengths compacts + grows the buffer.
2138        a.repack(&[huge, 8]);
2139        alloc_invariants(&a, &[huge, 8]);
2140        assert!(a.cap_total() > huge);
2141        // After repack the entry now fits in place.
2142        assert_eq!(a.place(0, huge), Some(a.slot(0).off));
2143    }
2144
2145    /// Drive the allocator like a real carve loop (mirroring
2146    /// `update_model`): one model's colour count drifts up and down
2147    /// across many edits while two neighbours stay put. Growth is
2148    /// absorbed in place / via the free list / by the bump tail, and on
2149    /// the rare overflow we repack (as `update_model` does). After every
2150    /// edit the live `[off, off+len)` windows must stay disjoint.
2151    #[test]
2152    fn allocator_carve_loop_keeps_live_windows_disjoint() {
2153        let mut a = ColorsAllocator::new(&[40, 12, 40]);
2154        let mut lens = [40u32, 12, 40];
2155        // A deterministic up/down walk of entry 1's length, incl. a jump
2156        // that forces at least one grow+repack.
2157        let walk = [13u32, 30, 60, 18, 9, 80, 80, 25, 200, 7];
2158        let mut grew = false;
2159        for &len in &walk {
2160            lens[1] = len;
2161            // Entry 1 re-placed; on overflow, repack the whole set.
2162            if a.place(1, len).is_none() {
2163                grew = true;
2164                a.repack(&lens);
2165            } else {
2166                // Neighbours fit in place every time.
2167                assert_eq!(a.place(0, 40), Some(a.slot(0).off));
2168                assert_eq!(a.place(2, 40), Some(a.slot(2).off));
2169            }
2170            assert_eq!(a.slot(1).len, len);
2171
2172            // No two entries' live windows overlap.
2173            let mut wins: Vec<(u32, u32)> =
2174                (0..3).map(|e| (a.slot(e).off, a.slot(e).len)).collect();
2175            wins.sort_by_key(|w| w.0);
2176            for pair in wins.windows(2) {
2177                let (o0, l0) = pair[0];
2178                let (o1, _) = pair[1];
2179                assert!(o0 + l0 <= o1, "live windows overlap: {pair:?}");
2180            }
2181        }
2182        assert!(grew, "the 200-word jump should have forced a repack");
2183    }
2184
2185    // --- incremental instance path (device-backed; skips w/o adapter) ---
2186
2187    fn headless() -> Option<crate::HeadlessGpu> {
2188        match crate::HeadlessGpu::new_blocking(crate::GpuRendererSettings::default()) {
2189            Ok(h) => Some(h),
2190            Err(e) => {
2191                eprintln!("[skip] no GPU adapter reachable: {e}");
2192                None
2193            }
2194        }
2195    }
2196
2197    fn one_model_registry() -> (SpriteModelRegistry, u32) {
2198        let mut reg = SpriteModelRegistry::new();
2199        let id = reg.add(build_sprite_model(&kv6_unsorted()));
2200        (reg, id)
2201    }
2202
2203    fn inst(model_id: u32, pos: [f32; 3]) -> SpriteInstance {
2204        use roxlap_formats::sprite::Sprite;
2205        SpriteInstance {
2206            model_id,
2207            transform: SpriteInstanceTransform::from_sprite(&Sprite::axis_aligned(
2208                kv6_unsorted(),
2209                pos,
2210            )),
2211        }
2212    }
2213
2214    #[test]
2215    fn append_grows_count_and_capacity_pow2() {
2216        let Some(h) = headless() else { return };
2217        let (reg, m) = one_model_registry();
2218        let mut res = SpriteRegistryResident::upload(&h.device, &reg, &[inst(m, [0.0; 3])]);
2219        assert_eq!(res.instance_count(), 1);
2220        assert_eq!(res.instance_capacity, 1);
2221
2222        // Append 4 → count 5, capacity grows to next_pow2(5) = 8.
2223        let more: Vec<_> = (1..=4).map(|i| inst(m, [i as f32, 0.0, 0.0])).collect();
2224        let base = res.append_instances(&h.device, &reg, &more);
2225        assert_eq!(base, 1, "first appended index follows the seed instance");
2226        assert_eq!(res.instance_count(), 5);
2227        assert_eq!(res.instance_capacity, 8, "power-of-two growth");
2228
2229        // A second append that still fits keeps the same capacity (no realloc).
2230        let base2 = res.append_instances(&h.device, &reg, &[inst(m, [9.0, 0.0, 0.0])]);
2231        assert_eq!(base2, 5);
2232        assert_eq!(res.instance_count(), 6);
2233        assert_eq!(res.instance_capacity, 8, "fits existing capacity, no grow");
2234    }
2235
2236    #[test]
2237    fn append_empty_is_noop() {
2238        let Some(h) = headless() else { return };
2239        let (reg, m) = one_model_registry();
2240        let mut res = SpriteRegistryResident::upload(&h.device, &reg, &[inst(m, [0.0; 3])]);
2241        let base = res.append_instances(&h.device, &reg, &[]);
2242        assert_eq!(base, 1);
2243        assert_eq!(res.instance_count(), 1);
2244        assert_eq!(res.instance_capacity, 1);
2245    }
2246
2247    /// Read `words` u32s back from a GPU buffer (needs COPY_SRC).
2248    fn read_u32(h: &crate::HeadlessGpu, buf: &wgpu::Buffer, words: u64) -> Vec<u32> {
2249        let bytes = words * 4;
2250        let staging = h.device.create_buffer(&wgpu::BufferDescriptor {
2251            label: Some("readback"),
2252            size: bytes,
2253            usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
2254            mapped_at_creation: false,
2255        });
2256        let mut enc = h
2257            .device
2258            .create_command_encoder(&wgpu::CommandEncoderDescriptor::default());
2259        enc.copy_buffer_to_buffer(buf, 0, &staging, 0, bytes);
2260        h.queue.submit(std::iter::once(enc.finish()));
2261        let slice = staging.slice(..);
2262        let (tx, rx) = std::sync::mpsc::channel();
2263        slice.map_async(wgpu::MapMode::Read, move |r| tx.send(r).unwrap());
2264        h.device.poll(wgpu::PollType::wait_indefinitely()).ok();
2265        rx.recv().unwrap().unwrap();
2266        let data = slice.get_mapped_range();
2267        let out = bytemuck::cast_slice::<u8, u32>(&data).to_vec();
2268        drop(data);
2269        staging.unmap();
2270        out
2271    }
2272
2273    /// A second distinct model so add_model has real new geometry to lay
2274    /// down (different dims + colours from `kv6_unsorted`).
2275    fn kv6_other() -> Kv6 {
2276        let mk = |z, col| Voxel {
2277            col,
2278            z,
2279            vis: 0,
2280            dir: 0,
2281        };
2282        Kv6 {
2283            xsiz: 1,
2284            ysiz: 1,
2285            zsiz: 4,
2286            xpiv: 0.0,
2287            ypiv: 0.0,
2288            zpiv: 0.0,
2289            voxels: vec![mk(0, 0x11), mk(2, 0x22)],
2290            xlen: vec![2],
2291            ylen: vec![vec![2]],
2292            palette: None,
2293        }
2294    }
2295
2296    /// add_model lays the new model's volume on the GPU at the offsets its
2297    /// meta record claims — verified by reading the shared buffers back
2298    /// and matching each entry against its source SpriteModel.
2299    #[test]
2300    fn add_model_uploads_new_volume_incrementally() {
2301        let Some(h) = headless() else { return };
2302
2303        // Residency starts with model A only.
2304        let mut reg = SpriteModelRegistry::new();
2305        let a = reg.add(build_sprite_model(&kv6_unsorted()));
2306        let mut res = SpriteRegistryResident::upload(&h.device, &reg, &[inst(a, [0.0; 3])]);
2307        assert_eq!(res.chains.len(), 1);
2308        let entries_before = res.meta.len();
2309
2310        // Append model B (single-level) to the registry, then sync it.
2311        let b = reg.add(build_sprite_model(&kv6_other()));
2312        res.add_model(&h.device, &h.queue, &reg, b);
2313        assert_eq!(res.chains.len(), 2);
2314        assert_eq!(res.meta.len(), entries_before + 1, "one new entry");
2315
2316        // Read the shared buffers back and check EVERY entry's data sits
2317        // where its meta record points — both the pre-existing A and the
2318        // newly streamed B.
2319        let occ = read_u32(&h, &res.occupancy, u64::from(res.occ_cap));
2320        let coloff = read_u32(&h, &res.color_offsets, u64::from(res.coloff_cap));
2321        let cols = read_u32(&h, &res.colors, u64::from(res.colors_alloc.cap_total()));
2322        for (e, m) in reg.entries.iter().enumerate() {
2323            let meta = res.meta[e];
2324            let oo = meta.occupancy_offset as usize;
2325            assert_eq!(
2326                &occ[oo..oo + m.occupancy.len()],
2327                &m.occupancy[..],
2328                "occ entry {e}"
2329            );
2330            let co = meta.color_offsets_offset as usize;
2331            assert_eq!(
2332                &coloff[co..co + m.color_offsets.len()],
2333                &m.color_offsets[..],
2334                "color_offsets entry {e}"
2335            );
2336            let cc = meta.colors_offset as usize;
2337            assert_eq!(
2338                &cols[cc..cc + m.colors.len()],
2339                &m.colors[..],
2340                "colors entry {e}"
2341            );
2342        }
2343
2344        // And an instance of the freshly-added model can now be appended.
2345        let base = res.append_instances(&h.device, &reg, &[inst(b, [5.0, 0.0, 0.0])]);
2346        assert_eq!(base, 1);
2347        assert_eq!(res.instance_count(), 2);
2348    }
2349
2350    /// Adding many small models forces the volume buffers to grow + rebuild
2351    /// at least once; every entry must still read back correctly across the
2352    /// grow boundary.
2353    #[test]
2354    fn add_model_survives_buffer_growth() {
2355        let Some(h) = headless() else { return };
2356        let mut reg = SpriteModelRegistry::new();
2357        let a = reg.add(build_sprite_model(&kv6_unsorted()));
2358        let mut res = SpriteRegistryResident::upload(&h.device, &reg, &[inst(a, [0.0; 3])]);
2359        let occ_cap0 = res.occ_cap;
2360
2361        // 40 adds — occupancy starts exact-sized (cap == used), so the very
2362        // first add overflows and grows; later ones ride the slack.
2363        for _ in 0..40 {
2364            let id = reg.add(build_sprite_model(&kv6_other()));
2365            res.add_model(&h.device, &h.queue, &reg, id);
2366        }
2367        assert_eq!(res.chains.len(), 41);
2368        assert!(res.occ_cap > occ_cap0, "occupancy buffer grew");
2369
2370        let occ = read_u32(&h, &res.occupancy, u64::from(res.occ_cap));
2371        let cols = read_u32(&h, &res.colors, u64::from(res.colors_alloc.cap_total()));
2372        for (e, m) in reg.entries.iter().enumerate() {
2373            let meta = res.meta[e];
2374            let oo = meta.occupancy_offset as usize;
2375            assert_eq!(
2376                &occ[oo..oo + m.occupancy.len()],
2377                &m.occupancy[..],
2378                "occ entry {e}"
2379            );
2380            let cc = meta.colors_offset as usize;
2381            assert_eq!(
2382                &cols[cc..cc + m.colors.len()],
2383                &m.colors[..],
2384                "colors entry {e}"
2385            );
2386        }
2387    }
2388
2389    fn test_frustum() -> ViewFrustum {
2390        ViewFrustum {
2391            pos: [0.0, 0.0, 0.0],
2392            right: [1.0, 0.0, 0.0],
2393            down: [0.0, 1.0, 0.0],
2394            forward: [0.0, 0.0, 1.0],
2395            half_w: 1.0,
2396            half_h: 1.0,
2397            far: 10_000.0,
2398        }
2399    }
2400
2401    #[test]
2402    fn remove_model_tombstones_frees_and_reuses() {
2403        let Some(h) = headless() else { return };
2404        // Residency with models A and B, one instance each.
2405        let mut reg = SpriteModelRegistry::new();
2406        let a = reg.add(build_sprite_model(&kv6_unsorted()));
2407        let b = reg.add(build_sprite_model(&kv6_other()));
2408        let mut res = SpriteRegistryResident::upload(
2409            &h.device,
2410            &reg,
2411            &[inst(a, [0.0; 3]), inst(b, [1.0, 0.0, 0.0])],
2412        );
2413        assert_eq!(res.live_model_count(), 2);
2414        assert_eq!(res.dead_model_count(), 0);
2415
2416        // Remove B → tombstoned, its colours freed into the pool.
2417        res.remove_model(b);
2418        assert_eq!(res.live_model_count(), 1);
2419        assert_eq!(res.dead_model_count(), 1);
2420        assert_eq!(res.dead.iter().filter(|&&d| d).count(), 1, "one entry dead");
2421        assert!(!res.colors_alloc.free.is_empty(), "B's colour slot freed");
2422
2423        // Adding C reuses the freed slot (free-list first-fit).
2424        let c = reg.add(build_sprite_model(&kv6_other()));
2425        res.add_model(&h.device, &h.queue, &reg, c);
2426        assert_eq!(res.live_model_count(), 2);
2427
2428        // A and C read back correctly; B is dead (skipped).
2429        let cols = read_u32(&h, &res.colors, u64::from(res.colors_alloc.cap_total()));
2430        for e in [a as usize, c as usize] {
2431            let m = &reg.entries[e];
2432            let cc = res.meta[e].colors_offset as usize;
2433            assert_eq!(
2434                &cols[cc..cc + m.colors.len()],
2435                &m.colors[..],
2436                "colors entry {e}"
2437            );
2438        }
2439
2440        // The lingering instance of removed B is skipped without panic.
2441        let f = test_frustum();
2442        let _ = res.cull_bin_upload(&h.device, &h.queue, &f, 64, 64, 16, 1.0);
2443    }
2444
2445    #[test]
2446    fn compact_reclaims_holes_keeps_ids_stable() {
2447        let Some(h) = headless() else { return };
2448        let mut reg = SpriteModelRegistry::new();
2449        let a = reg.add(build_sprite_model(&kv6_unsorted()));
2450        let b = reg.add(build_sprite_model(&kv6_other()));
2451        let c = reg.add(build_sprite_model(&kv6_other()));
2452        let mut res = SpriteRegistryResident::upload(
2453            &h.device,
2454            &reg,
2455            &[inst(a, [0.0; 3]), inst(b, [1.0; 3]), inst(c, [2.0; 3])],
2456        );
2457        let occ_used_full = res.occ_used;
2458
2459        // Remove the middle model, then compact.
2460        res.remove_model(b);
2461        res.compact(&h.device, &h.queue, &reg);
2462
2463        // Holes reclaimed: occupancy now only covers A + C.
2464        let live_occ: u32 = [a, c]
2465            .iter()
2466            .map(|&e| reg.entries[e as usize].occupancy.len() as u32)
2467            .sum();
2468        assert_eq!(res.occ_used, live_occ);
2469        assert!(res.occ_used < occ_used_full, "compaction shrank occupancy");
2470        // Dead entry keeps a zeroed tombstone; ids unchanged.
2471        assert_eq!(res.meta[b as usize].occupancy_offset, 0);
2472        assert_eq!(res.live_model_count(), 2);
2473        assert_eq!(res.dead_model_count(), 1);
2474
2475        // Live entries read back correctly at their new offsets.
2476        let occ = read_u32(&h, &res.occupancy, u64::from(res.occ_cap));
2477        let cols = read_u32(&h, &res.colors, u64::from(res.colors_alloc.cap_total()));
2478        for &e in &[a as usize, c as usize] {
2479            let m = &reg.entries[e];
2480            let oo = res.meta[e].occupancy_offset as usize;
2481            assert_eq!(
2482                &occ[oo..oo + m.occupancy.len()],
2483                &m.occupancy[..],
2484                "occ {e}"
2485            );
2486            let cc = res.meta[e].colors_offset as usize;
2487            assert_eq!(&cols[cc..cc + m.colors.len()], &m.colors[..], "cols {e}");
2488        }
2489
2490        // Chain ids still valid: C's chain still resolves; B's is empty.
2491        assert!(!res.chains[c as usize].is_empty());
2492        assert!(res.chains[b as usize].is_empty());
2493    }
2494
2495    #[test]
2496    fn remove_swap_semantics_and_capacity_retained() {
2497        let Some(h) = headless() else { return };
2498        let (reg, m) = one_model_registry();
2499        let seed: Vec<_> = (0..4).map(|i| inst(m, [i as f32, 0.0, 0.0])).collect();
2500        let mut res = SpriteRegistryResident::upload(&h.device, &reg, &seed);
2501        assert_eq!(res.instance_count(), 4);
2502        let cap = res.instance_capacity;
2503
2504        // Remove a middle element → the previous last (idx 3) moved into it.
2505        assert_eq!(res.remove_instance(1), Some(3));
2506        assert_eq!(res.instance_count(), 3);
2507
2508        // Remove the current last (idx 2) → nothing moved.
2509        assert_eq!(res.remove_instance(2), None);
2510        assert_eq!(res.instance_count(), 2);
2511
2512        // Out of range → None.
2513        assert_eq!(res.remove_instance(99), None);
2514        assert_eq!(res.instance_count(), 2);
2515
2516        // Capacity is retained for reuse (no shrink).
2517        assert_eq!(res.instance_capacity, cap);
2518    }
2519}