roxlap_gpu/
sprite_model.rs

1//! GPU.10 — KV6 sprite as a DDA-marchable voxel model.
2//!
3//! Unlike the GPU.9 splatter (one thread per voxel, screen-space
4//! squares, overdraw + atomic contention), a sprite model is a small
5//! voxel volume the precise ray-DDA marches one ray per pixel —
6//! crisp, correct occlusion, no overdraw. This is the GPU.10.0 single
7//! sprite; instancing + tiling + LOD come in later sub-substages.
8//!
9//! The volume reuses the chunk occupancy/colour scheme but sized to
10//! the KV6 bbox: per-column occupancy bitmask (`occ_words_per_col`
11//! u32s, `CHUNK_Z`-style 32-bits-per-word), a flat colour array in
12//! ascending-z order per column, and a `color_offsets` prefix table.
13//! The shader finds a voxel's colour by `offset[col] + popcount(bits
14//! below z)`, so colours MUST be ascending-z (we sort per column).
15
16#![allow(
17    clippy::cast_precision_loss,
18    clippy::cast_possible_truncation,
19    clippy::cast_possible_wrap,
20    clippy::cast_sign_loss,
21    clippy::many_single_char_names,
22    clippy::similar_names
23)]
24
25use bytemuck::{Pod, Zeroable};
26use roxlap_formats::kv6::Kv6;
27use roxlap_formats::sprite::Sprite;
28
29/// CPU-built voxel volume for one KV6 model.
30#[derive(Debug, Clone)]
31pub struct SpriteModel {
32    /// Voxel extent `(mx, my, mz)`.
33    pub dims: [u32; 3],
34    /// `ceil(mz / 32)` — u32 words of occupancy per (x, y) column.
35    pub occ_words_per_col: u32,
36    /// KV6 pivot in model-local voxel space.
37    pub pivot: [f32; 3],
38    /// Per-column occupancy bitmask, `mx * my * occ_words_per_col`.
39    pub occupancy: Vec<u32>,
40    /// Voxel colours, ascending z within each column.
41    pub colors: Vec<u32>,
42    /// Per-voxel surface-normal index (`Kv6::Voxel::dir`, 0..256),
43    /// parallel to [`colors`](Self::colors). The GPU sprite shader uses
44    /// it to index the per-instance `kv6colmul` lighting table, matching
45    /// the CPU rasteriser's normal-based shading.
46    pub dirs: Vec<u32>,
47    /// Prefix sums: `color_offsets[col]` is the first colour index of
48    /// column `col`; length `mx * my + 1`.
49    pub color_offsets: Vec<u32>,
50    /// World-space size of one voxel of this model (GPU.10.4 LOD): 1.0
51    /// at mip-0, doubling each [`SpriteModel::downsample`]. The shader
52    /// divides the local ray by this so a coarse voxel spans the right
53    /// world extent and the march `t` stays in world units.
54    pub voxel_world_size: f32,
55}
56
57/// Build the DDA volume from a KV6. Columns are packed in
58/// `x + y*mx` order; each column's voxels are sorted ascending by z
59/// so the shader's popcount-rank colour lookup is correct.
60///
61/// # Panics
62/// If the KV6's `ylen` counters disagree with `voxels.len()` (a
63/// malformed model).
64#[must_use]
65pub fn build_sprite_model(kv6: &Kv6) -> SpriteModel {
66    let (mx, my, mz) = (kv6.xsiz, kv6.ysiz, kv6.zsiz);
67    let occ_words_per_col = mz.div_ceil(32).max(1);
68    let cols = (mx * my) as usize;
69
70    let mut occupancy = vec![0u32; cols * occ_words_per_col as usize];
71    let mut color_offsets = vec![0u32; cols + 1];
72    let mut colors: Vec<u32> = Vec::with_capacity(kv6.voxels.len());
73    let mut dirs: Vec<u32> = Vec::with_capacity(kv6.voxels.len());
74
75    // Pass 1 — consume voxels in KV6 storage order (x-outer / y-inner)
76    // into per-column buckets keyed by `col = x + y*mx`. Each entry is
77    // `(z, colour, normal-dir)`.
78    let mut buckets: Vec<Vec<(u16, u32, u8)>> = vec![Vec::new(); cols];
79    let mut voxel_iter = kv6.voxels.iter();
80    for x in 0..mx {
81        for y in 0..my {
82            let col = (x + y * mx) as usize;
83            let count = kv6.ylen[x as usize][y as usize];
84            for _ in 0..count {
85                let v = voxel_iter.next().expect("KV6 ylen / voxels.len mismatch");
86                buckets[col].push((v.z, v.col, v.dir));
87            }
88        }
89    }
90
91    // Pass 2 — emit in COLUMN-INDEX order so `color_offsets` is a true
92    // monotonic prefix sum (the shader indexes by `col` either way, but
93    // structural edits / mip rebuilds rely on monotonic offsets). Each
94    // column's voxels sorted ascending z for the popcount-rank lookup.
95    for (col, bucket) in buckets.iter_mut().enumerate() {
96        color_offsets[col] = colors.len() as u32;
97        bucket.sort_by_key(|(z, _, _)| *z);
98        for &(z, col_rgba, dir) in bucket.iter() {
99            let z = u32::from(z);
100            let base = col * occ_words_per_col as usize + (z >> 5) as usize;
101            occupancy[base] |= 1u32 << (z & 31);
102            colors.push(col_rgba);
103            dirs.push(u32::from(dir));
104        }
105    }
106    color_offsets[cols] = colors.len() as u32;
107
108    SpriteModel {
109        dims: [mx, my, mz],
110        occ_words_per_col,
111        pivot: [kv6.xpiv, kv6.ypiv, kv6.zpiv],
112        occupancy,
113        color_offsets,
114        colors,
115        dirs,
116        voxel_world_size: 1.0,
117    }
118}
119
120/// Per-instance transform consumed by the model-DDA shader: the
121/// inverse model→world rotation (so a world ray can be brought into
122/// model-local space) plus the instance's world position. Stored as
123/// three padded columns for std140/std430 (`mat3x3` 16-byte columns).
124#[repr(C)]
125#[derive(Clone, Copy, Pod, Zeroable, Debug)]
126pub struct SpriteInstanceTransform {
127    /// Inverse of `[s | h | f]`, column-major, each column padded to
128    /// `vec4`. `inv_rot * v = c0*v.x + c1*v.y + c2*v.z`.
129    pub inv_rot: [[f32; 4]; 3],
130    /// Instance world position (the KV6 pivot maps here).
131    pub pos: [f32; 3],
132    _pad: f32,
133}
134
135impl SpriteInstanceTransform {
136    /// Build from a sprite pose. `s/h/f` are the model→world basis
137    /// columns; we invert them so the shader can map world→local.
138    #[must_use]
139    pub fn from_sprite(sprite: &Sprite) -> Self {
140        let inv = mat3_inverse([sprite.s, sprite.h, sprite.f]);
141        Self {
142            inv_rot: [
143                [inv[0][0], inv[0][1], inv[0][2], 0.0],
144                [inv[1][0], inv[1][1], inv[1][2], 0.0],
145                [inv[2][0], inv[2][1], inv[2][2], 0.0],
146            ],
147            pos: sprite.p,
148            _pad: 0.0,
149        }
150    }
151}
152
153/// A registry of sprite models. Instances reference a model by
154/// `model_id`, which is a **LOD chain** id: each chain holds one or
155/// more concrete mip levels (finest first; GPU.10.4), and the renderer
156/// picks the level per instance by distance. Identical KV6s are added
157/// once and shared by many instances. **Copy-on-modify**:
158/// [`Self::fork`] deep-copies a chain so edits to the fork leave the
159/// parent (and its instances) intact.
160#[derive(Debug, Clone, Default)]
161pub struct SpriteModelRegistry {
162    /// Concrete mip-level volumes (the GPU buffers concatenate these).
163    entries: Vec<SpriteModel>,
164    /// `chains[model_id]` = entry ids, finest (mip-0) first.
165    chains: Vec<Vec<u32>>,
166}
167
168impl SpriteModelRegistry {
169    #[must_use]
170    pub fn new() -> Self {
171        Self::default()
172    }
173
174    fn push_entry(&mut self, model: SpriteModel) -> u32 {
175        let id = self.entries.len() as u32;
176        self.entries.push(model);
177        id
178    }
179
180    /// Register a single-level (no-LOD) model; returns its `model_id`.
181    pub fn add(&mut self, model: SpriteModel) -> u32 {
182        let e = self.push_entry(model);
183        let id = self.chains.len() as u32;
184        self.chains.push(vec![e]);
185        id
186    }
187
188    /// Register a model with up to `max_levels` LOD mips (each a 2×
189    /// [`SpriteModel::downsample`] of the previous; stops early once a
190    /// level collapses to 1³). Returns its `model_id`.
191    pub fn add_lod(&mut self, model: SpriteModel, max_levels: u32) -> u32 {
192        let mut levels = vec![self.push_entry(model.clone())];
193        let mut cur = model;
194        for _ in 1..max_levels.max(1) {
195            if cur.dims == [1, 1, 1] {
196                break;
197            }
198            cur = cur.downsample();
199            levels.push(self.push_entry(cur.clone()));
200        }
201        let id = self.chains.len() as u32;
202        self.chains.push(levels);
203        id
204    }
205
206    /// Copy-on-modify: deep-copy every level of chain `parent` into new
207    /// entries + a new chain, and return its `model_id`. The fork owns
208    /// independent voxel data, so mutating it does not affect the
209    /// parent or any instance still pointing at it.
210    ///
211    /// # Panics
212    /// If `parent` is not a registered `model_id`.
213    pub fn fork(&mut self, parent: u32) -> u32 {
214        let src = self.chains[parent as usize].clone();
215        let levels: Vec<u32> = src
216            .iter()
217            .map(|&e| {
218                let copy = self.entries[e as usize].clone();
219                self.push_entry(copy)
220            })
221            .collect();
222        let id = self.chains.len() as u32;
223        self.chains.push(levels);
224        id
225    }
226
227    /// The finest (mip-0) model of chain `id`.
228    #[must_use]
229    pub fn model(&self, id: u32) -> &SpriteModel {
230        &self.entries[self.chains[id as usize][0] as usize]
231    }
232
233    /// Mutable access to the finest (mip-0) model for editing — the
234    /// copy-on-modify entry point (typically on a [`Self::fork`]).
235    /// After a *structural* edit (occupancy/dims), call
236    /// [`Self::rebuild_lod`] so the coarser mips match; a pure recolour
237    /// can use [`Self::recolor_chain`] instead.
238    pub fn model_mut(&mut self, id: u32) -> &mut SpriteModel {
239        let e = self.chains[id as usize][0] as usize;
240        &mut self.entries[e]
241    }
242
243    /// Recolour every LOD level of chain `id` (so a forked tint shows
244    /// at all distances).
245    pub fn recolor_chain(&mut self, id: u32, f: impl Fn(u32) -> u32 + Copy) {
246        for li in 0..self.chains[id as usize].len() {
247            let e = self.chains[id as usize][li] as usize;
248            self.entries[e].recolor(f);
249        }
250    }
251
252    /// Regenerate chain `id`'s coarser mip levels from its (possibly
253    /// just-edited) mip-0. Run after a structural edit via
254    /// [`Self::model_mut`] so the LOD ladder stays consistent. No-op
255    /// for a single-level (no-LOD) chain.
256    pub fn rebuild_lod(&mut self, id: u32) {
257        let levels = self.chains[id as usize].clone();
258        if levels.len() <= 1 {
259            return;
260        }
261        let mut cur = self.entries[levels[0] as usize].clone();
262        for &e in &levels[1..] {
263            cur = cur.downsample();
264            self.entries[e as usize] = cur.clone();
265        }
266    }
267
268    /// Number of LOD chains (distinct `model_id`s).
269    #[must_use]
270    pub fn len(&self) -> usize {
271        self.chains.len()
272    }
273
274    #[must_use]
275    pub fn is_empty(&self) -> bool {
276        self.chains.is_empty()
277    }
278}
279
280impl SpriteModel {
281    /// Recolour every voxel via `f(old_rgba) -> new_rgba`. Structure
282    /// (occupancy / offsets) is untouched, so this is a cheap in-place
283    /// edit — handy on a [`SpriteModelRegistry::fork`] to make a tinted
284    /// variant. For structural edits, mutate the public occupancy /
285    /// colours / dims directly (via `model_mut`) then rebuild the LOD.
286    pub fn recolor(&mut self, f: impl Fn(u32) -> u32) {
287        for c in &mut self.colors {
288            *c = f(*c);
289        }
290    }
291
292    /// GPU.12 — structural edit of a single voxel within the model's
293    /// existing bounds. `Some(rgba)` sets/replaces the voxel at
294    /// `(x, y, z)`; `None` clears it. Maintains the ascending-z colour
295    /// invariant by inserting/removing at the voxel's popcount rank and
296    /// shifting the affected columns' `color_offsets`. Returns `true`
297    /// if the model changed. Out-of-bounds coordinates are ignored
298    /// (returns `false`) — growing `dims` is a separate concern.
299    ///
300    /// After editing, call [`SpriteModelRegistry::rebuild_lod`] to
301    /// refresh coarser mips, then re-upload via `set_sprite_instances`.
302    pub fn set_voxel(&mut self, x: u32, y: u32, z: u32, color: Option<u32>) -> bool {
303        if x >= self.dims[0] || y >= self.dims[1] || z >= self.dims[2] {
304            return false;
305        }
306        let owpc = self.occ_words_per_col as usize;
307        let cols = (self.dims[0] * self.dims[1]) as usize;
308        let col = (x + y * self.dims[0]) as usize;
309        let base = col * owpc;
310        let zw = (z >> 5) as usize;
311        let zb = z & 31;
312
313        // Rank = solid voxels strictly below z in this column.
314        let mut rank = 0usize;
315        for w in 0..zw {
316            rank += self.occupancy[base + w].count_ones() as usize;
317        }
318        let below_mask = if zb > 0 { (1u32 << zb) - 1 } else { 0 };
319        rank += (self.occupancy[base + zw] & below_mask).count_ones() as usize;
320        let idx = self.color_offsets[col] as usize + rank;
321        let was_set = (self.occupancy[base + zw] >> zb) & 1 == 1;
322
323        if let Some(rgba) = color {
324            if was_set {
325                self.colors[idx] = rgba; // replace in place (keeps dir)
326            } else {
327                self.occupancy[base + zw] |= 1u32 << zb;
328                self.colors.insert(idx, rgba);
329                // No normal supplied by this API — default to dir 0 (the
330                // sole caller, the carve hotkey, only ever clears).
331                self.dirs.insert(idx, 0);
332                for c in &mut self.color_offsets[col + 1..=cols] {
333                    *c += 1;
334                }
335            }
336            true
337        } else {
338            if !was_set {
339                return false;
340            }
341            self.occupancy[base + zw] &= !(1u32 << zb);
342            self.colors.remove(idx);
343            self.dirs.remove(idx);
344            for c in &mut self.color_offsets[col + 1..=cols] {
345                *c -= 1;
346            }
347            true
348        }
349    }
350
351    /// Radius of a bounding sphere centred at the instance position
352    /// (the pivot maps there): the farthest bbox corner from the
353    /// pivot. Used for frustum culling. Assumes a unit basis; scaled
354    /// instances would multiply this by their max basis length.
355    #[must_use]
356    pub fn bound_radius(&self) -> f32 {
357        let mut r2 = 0.0_f32;
358        for &cx in &[0.0, self.dims[0] as f32] {
359            for &cy in &[0.0, self.dims[1] as f32] {
360                for &cz in &[0.0, self.dims[2] as f32] {
361                    let d = [cx - self.pivot[0], cy - self.pivot[1], cz - self.pivot[2]];
362                    r2 = r2.max(d[0] * d[0] + d[1] * d[1] + d[2] * d[2]);
363                }
364            }
365        }
366        r2.sqrt()
367    }
368
369    /// GPU.10.4 — 2× voxel downsample for the next LOD level. A coarse
370    /// voxel is solid if any of its 2×2×2 fine voxels is, coloured by
371    /// their per-channel average. Dims/pivot halve and
372    /// `voxel_world_size` doubles, so the coarse model occupies the
373    /// same world box at half the resolution (origin-corner aligned).
374    #[must_use]
375    #[allow(clippy::manual_checked_ops)] // `n > 0` guards 4 divisions, not one checked_div
376    pub fn downsample(&self) -> SpriteModel {
377        let [fx, fy, fz] = self.dims;
378        let fidx = |x: u32, y: u32, z: u32| (x + y * fx + z * fx * fy) as usize;
379
380        // Reconstruct dense fine voxels (solid flag + colour + normal).
381        let mut solid = vec![false; (fx * fy * fz) as usize];
382        let mut fine = vec![0u32; (fx * fy * fz) as usize];
383        let mut fine_dir = vec![0u32; (fx * fy * fz) as usize];
384        for x in 0..fx {
385            for y in 0..fy {
386                let col = (x + y * fx) as usize;
387                let base = col * self.occ_words_per_col as usize;
388                let off = self.color_offsets[col] as usize;
389                let mut seen = 0usize;
390                for z in 0..fz {
391                    let w = base + (z >> 5) as usize;
392                    if (self.occupancy[w] >> (z & 31)) & 1 == 1 {
393                        fine[fidx(x, y, z)] = self.colors[off + seen];
394                        fine_dir[fidx(x, y, z)] = self.dirs[off + seen];
395                        solid[fidx(x, y, z)] = true;
396                        seen += 1;
397                    }
398                }
399            }
400        }
401
402        let nx = fx.div_ceil(2).max(1);
403        let ny = fy.div_ceil(2).max(1);
404        let nz = fz.div_ceil(2).max(1);
405        let owpc = nz.div_ceil(32).max(1);
406        let cols = (nx * ny) as usize;
407        let mut occupancy = vec![0u32; cols * owpc as usize];
408        let mut color_offsets = vec![0u32; cols + 1];
409        let mut colors: Vec<u32> = Vec::new();
410        let mut dirs: Vec<u32> = Vec::new();
411
412        // Emit in column-index order (`ccol = cx + cy*nx`), cy outer,
413        // so `color_offsets` is a monotonic prefix sum like build's.
414        for cy in 0..ny {
415            for cx in 0..nx {
416                let ccol = (cx + cy * nx) as usize;
417                color_offsets[ccol] = colors.len() as u32;
418                for cz in 0..nz {
419                    let (mut a, mut r, mut g, mut b, mut n) = (0u32, 0u32, 0u32, 0u32, 0u32);
420                    // Normals don't average meaningfully — keep the first
421                    // solid child's `dir` as the coarse voxel's normal.
422                    let mut rep_dir = 0u32;
423                    for dz in 0..2 {
424                        for dy in 0..2 {
425                            for dx in 0..2 {
426                                let (x, y, z) = (2 * cx + dx, 2 * cy + dy, 2 * cz + dz);
427                                if x < fx && y < fy && z < fz && solid[fidx(x, y, z)] {
428                                    let c = fine[fidx(x, y, z)];
429                                    if n == 0 {
430                                        rep_dir = fine_dir[fidx(x, y, z)];
431                                    }
432                                    a += (c >> 24) & 0xff;
433                                    r += (c >> 16) & 0xff;
434                                    g += (c >> 8) & 0xff;
435                                    b += c & 0xff;
436                                    n += 1;
437                                }
438                            }
439                        }
440                    }
441                    if n > 0 {
442                        let avg = ((a / n) << 24) | ((r / n) << 16) | ((g / n) << 8) | (b / n);
443                        let base = ccol * owpc as usize + (cz >> 5) as usize;
444                        occupancy[base] |= 1u32 << (cz & 31);
445                        colors.push(avg);
446                        dirs.push(rep_dir);
447                    }
448                }
449            }
450        }
451        color_offsets[cols] = colors.len() as u32;
452
453        SpriteModel {
454            dims: [nx, ny, nz],
455            occ_words_per_col: owpc,
456            pivot: [
457                self.pivot[0] * 0.5,
458                self.pivot[1] * 0.5,
459                self.pivot[2] * 0.5,
460            ],
461            occupancy,
462            colors,
463            dirs,
464            color_offsets,
465            voxel_world_size: self.voxel_world_size * 2.0,
466        }
467    }
468}
469
470/// View frustum for CPU instance culling, in world space. Built each
471/// frame from the world camera. `half_w`/`half_h` are the tangents of
472/// the half-FOV (so the side planes are `|x| <= half_w * z` etc. in
473/// camera space).
474#[derive(Clone, Copy, Debug)]
475pub struct ViewFrustum {
476    pub pos: [f32; 3],
477    pub right: [f32; 3],
478    pub down: [f32; 3],
479    pub forward: [f32; 3],
480    pub half_w: f32,
481    pub half_h: f32,
482    pub far: f32,
483}
484
485/// CPU cull record: the GPU instance + its world bounding sphere.
486/// Not `Copy` — carries a boxed 256-entry `kv6colmul` table.
487#[derive(Clone)]
488struct CullInstance {
489    /// Instance transform + a placeholder `model_id`; the cull
490    /// overwrites `model_id` with the distance-chosen LOD entry.
491    gpu: SpriteInstanceGpu,
492    /// LOD chain this instance draws (the user-facing `model_id`).
493    chain_id: u32,
494    center: [f32; 3],
495    radius: f32,
496    /// voxlap `kv6colmul[256]` — per-surface-normal colour modulation
497    /// for this instance's pose + lighting. Defaults to identity
498    /// (`0x0100` in every channel lane → unshaded) until the facade sets
499    /// it via [`SpriteRegistryResident::set_instance_colmul`]. Packed
500    /// into the `colmul` GPU buffer (in visible order) each frame.
501    colmul: Box<[u64; 256]>,
502}
503
504/// Identity `kv6colmul` table: every channel lane = `0x0100`, so the
505/// shader's `(rgb[c] << 8) * 0x0100 >> 16 == rgb[c]` — i.e. no shading.
506fn identity_colmul() -> Box<[u64; 256]> {
507    const LANE: u64 = 0x0100;
508    let w = LANE | (LANE << 16) | (LANE << 32) | (LANE << 48);
509    Box::new([w; 256])
510}
511
512fn dot3(a: [f32; 3], b: [f32; 3]) -> f32 {
513    a[0] * b[0] + a[1] * b[1] + a[2] * b[2]
514}
515
516/// One sprite instance: a model reference + world pose.
517#[derive(Debug, Clone, Copy)]
518pub struct SpriteInstance {
519    pub model_id: u32,
520    pub transform: SpriteInstanceTransform,
521}
522
523/// GPU per-model metadata: where this model's data starts in the
524/// shared registry buffers + its dims/pivot. Mirrors `ModelMeta` in
525/// the shader (std430, 48 bytes).
526#[repr(C)]
527#[derive(Clone, Copy, Pod, Zeroable, Debug)]
528struct SpriteModelMeta {
529    occupancy_offset: u32,
530    colors_offset: u32,
531    color_offsets_offset: u32,
532    occ_words_per_col: u32,
533    dims: [u32; 3],
534    _pad0: u32,
535    pivot: [f32; 3],
536    /// GPU.10.4 — world size of one voxel of this (mip) entry.
537    voxel_world_size: f32,
538}
539
540/// GPU per-instance record. Mirrors `Instance` in the shader (std430,
541/// 64 bytes): inverse rotation columns + position + model id.
542#[repr(C)]
543#[derive(Clone, Copy, Pod, Zeroable, Debug)]
544struct SpriteInstanceGpu {
545    inv_rot0: [f32; 4],
546    inv_rot1: [f32; 4],
547    inv_rot2: [f32; 4],
548    pos: [f32; 3],
549    model_id: u32,
550}
551
552/// Invert a 3×3 matrix given as basis columns `[c0, c1, c2]`,
553/// returning the inverse as columns. For an orthonormal basis this is
554/// the transpose; the general path covers rotation + non-unit scale.
555#[must_use]
556fn mat3_inverse(cols: [[f32; 3]; 3]) -> [[f32; 3]; 3] {
557    let [a, b, c] = cols; // columns
558                          // Determinant via scalar triple product a · (b × c).
559    let cross = |u: [f32; 3], v: [f32; 3]| {
560        [
561            u[1] * v[2] - u[2] * v[1],
562            u[2] * v[0] - u[0] * v[2],
563            u[0] * v[1] - u[1] * v[0],
564        ]
565    };
566    let bc = cross(b, c);
567    let ca = cross(c, a);
568    let ab = cross(a, b);
569    let det = a[0] * bc[0] + a[1] * bc[1] + a[2] * bc[2];
570    let inv_det = if det.abs() < 1e-12 { 0.0 } else { 1.0 / det };
571    // Inverse rows are (b×c, c×a, a×b)/det; return as columns of the
572    // inverse, i.e. transpose of those rows.
573    [
574        [bc[0] * inv_det, ca[0] * inv_det, ab[0] * inv_det],
575        [bc[1] * inv_det, ca[1] * inv_det, ab[1] * inv_det],
576        [bc[2] * inv_det, ca[2] * inv_det, ab[2] * inv_det],
577    ]
578}
579
580/// GPU-resident registry + instances: every model's occupancy /
581/// colours / offsets concatenated into shared storage buffers, a
582/// per-model metadata table, and a capacity-sized instance buffer
583/// rewritten each frame with the frustum-visible subset (GPU.10.2).
584/// One bind group serves all models (same approach as the multi-grid
585/// scene).
586pub struct SpriteRegistryResident {
587    pub occupancy: wgpu::Buffer,
588    pub colors: wgpu::Buffer,
589    /// Per-voxel surface-normal index, concatenated across models in the
590    /// same layout as [`colors`](Self::colors). The shader indexes the
591    /// per-instance `kv6colmul` table by it.
592    pub dirs: wgpu::Buffer,
593    pub color_offsets: wgpu::Buffer,
594    pub model_meta: wgpu::Buffer,
595    /// Holds up to `instance_capacity` instances; the visible subset
596    /// is packed into `[0, count)` each frame by [`Self::cull_bin_upload`].
597    pub instances: wgpu::Buffer,
598    pub instance_capacity: u32,
599    /// Per-visible-instance `kv6colmul[256]` tables, packed in the same
600    /// order as the `instances` buffer each frame (two u32 per u64
601    /// entry: lanes 0|1 then 2|3). Sized `instance_capacity * 256 * 2`
602    /// u32; rewritten by [`Self::cull_bin_upload`].
603    pub colmul: wgpu::Buffer,
604    colmul_cap: u32,
605    /// GPU.10.3 — per-tile `(offset, count)` into `tile_instances`,
606    /// flat `2 * tiles_x * tiles_y` u32s. Grown to fit the screen.
607    pub tile_ranges: wgpu::Buffer,
608    tile_ranges_cap: u32,
609    /// GPU.10.3 — flat list of visible-instance indices grouped by
610    /// tile. Grown to fit the per-frame total.
611    pub tile_instances: wgpu::Buffer,
612    tile_instances_cap: u32,
613    /// CPU cull records (full set), with precomputed bounding spheres.
614    cull: Vec<CullInstance>,
615    /// GPU.10.4 — LOD chains: `chains[chain_id]` = entry ids, finest
616    /// first. The cull picks a level by distance and writes its entry
617    /// id into the packed instance's `model_id`.
618    chains: Vec<Vec<u32>>,
619    /// GPU.12 incremental — CPU mirror of the GPU `model_meta` table, one
620    /// per concrete entry. [`Self::update_model`] reads the fixed
621    /// occupancy/color_offsets bases from here and rewrites the changed
622    /// `colors_offset` on a relocation.
623    meta: Vec<SpriteModelMeta>,
624    /// GPU.12 incremental — per-entry placement of `colors`/`dirs` in the
625    /// shared buffers (drives both; same offsets/ranks). Lets an edit
626    /// re-upload one model's data without touching the others.
627    colors_alloc: ColorsAllocator,
628    /// Per-entry word length of the dims-fixed `occupancy` and
629    /// `color_offsets` arrays, kept so [`Self::update_model`] can assert a
630    /// carve never changed dims (which would invalidate the in-place
631    /// writes — growing dims is out of scope, handled by a full re-upload).
632    occ_lens: Vec<u32>,
633    coloff_lens: Vec<u32>,
634}
635
636impl SpriteRegistryResident {
637    /// Concatenate `registry`'s models into shared buffers and prepare
638    /// `instances` for per-frame culling. Model-relative indices stay
639    /// as built; the shader adds each model's base offset from the
640    /// metadata table.
641    #[must_use]
642    pub fn upload(
643        device: &wgpu::Device,
644        registry: &SpriteModelRegistry,
645        instances: &[SpriteInstance],
646    ) -> Self {
647        // `occupancy` + `color_offsets` are dims-fixed → tightly
648        // concatenated (never grow on a carve). `colors` + `dirs` are
649        // variable → laid out by the suballocator with per-slot slack so
650        // an incremental edit can rewrite one model in place.
651        let entry_lens: Vec<u32> = registry
652            .entries
653            .iter()
654            .map(|m| m.colors.len() as u32)
655            .collect();
656        let colors_alloc = ColorsAllocator::new(&entry_lens);
657        let cap_total = colors_alloc.cap_total();
658
659        let mut all_occ: Vec<u32> = Vec::new();
660        let mut all_offsets: Vec<u32> = Vec::new();
661        let mut all_colors: Vec<u32> = vec![0; cap_total as usize];
662        let mut all_dirs: Vec<u32> = vec![0; cap_total as usize];
663        let mut meta: Vec<SpriteModelMeta> = Vec::with_capacity(registry.entries.len());
664        let mut occ_lens: Vec<u32> = Vec::with_capacity(registry.entries.len());
665        let mut coloff_lens: Vec<u32> = Vec::with_capacity(registry.entries.len());
666
667        // One meta + placed data per concrete (mip-level) entry.
668        for (e, m) in registry.entries.iter().enumerate() {
669            let slot = colors_alloc.slot(e);
670            meta.push(SpriteModelMeta {
671                occupancy_offset: all_occ.len() as u32,
672                colors_offset: slot.off,
673                color_offsets_offset: all_offsets.len() as u32,
674                occ_words_per_col: m.occ_words_per_col,
675                dims: m.dims,
676                _pad0: 0,
677                pivot: m.pivot,
678                voxel_world_size: m.voxel_world_size,
679            });
680            occ_lens.push(m.occupancy.len() as u32);
681            coloff_lens.push(m.color_offsets.len() as u32);
682            all_occ.extend_from_slice(&m.occupancy);
683            all_offsets.extend_from_slice(&m.color_offsets);
684            let off = slot.off as usize;
685            all_colors[off..off + m.colors.len()].copy_from_slice(&m.colors);
686            all_dirs[off..off + m.dirs.len()].copy_from_slice(&m.dirs);
687        }
688
689        // Per-instance cull records: sphere centred at the instance
690        // position, radius from the chain's finest (mip-0) model.
691        // `colmul` starts at identity (unshaded) until the facade sets
692        // per-instance lighting via `set_instance_colmul`.
693        let cull: Vec<CullInstance> = instances
694            .iter()
695            .map(|i| CullInstance {
696                gpu: SpriteInstanceGpu {
697                    inv_rot0: i.transform.inv_rot[0],
698                    inv_rot1: i.transform.inv_rot[1],
699                    inv_rot2: i.transform.inv_rot[2],
700                    pos: i.transform.pos,
701                    model_id: i.model_id, // placeholder; cull rewrites
702                },
703                chain_id: i.model_id,
704                center: i.transform.pos,
705                radius: registry.model(i.model_id).bound_radius(),
706                colmul: identity_colmul(),
707            })
708            .collect();
709
710        // Capacity buffer (COPY_DST so cull can rewrite it each frame),
711        // seeded with the full set so frame 0 is valid pre-cull.
712        let seed: Vec<SpriteInstanceGpu> = cull.iter().map(|c| c.gpu).collect();
713        let instances_buf = {
714            use wgpu::util::DeviceExt;
715            let one = [SpriteInstanceGpu::zeroed()];
716            let src: &[SpriteInstanceGpu] = if seed.is_empty() { &one } else { &seed };
717            device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
718                label: Some("roxlap-gpu sprite_reg.instances"),
719                contents: bytemuck::cast_slice(src),
720                usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
721            })
722        };
723
724        let tile_ranges = storage_dst_u32(device, "roxlap-gpu sprite_reg.tile_ranges", 1);
725        let tile_instances = storage_dst_u32(device, "roxlap-gpu sprite_reg.tile_instances", 1);
726        // colmul: 256 entries × 2 u32 per visible instance. Sized to the
727        // full instance set (worst case all visible); rewritten per frame.
728        let colmul_cap = (cull.len() as u32).max(1) * 256 * 2;
729        let colmul = storage_dst_u32(device, "roxlap-gpu sprite_reg.colmul", colmul_cap);
730        Self {
731            occupancy: storage_dst_u32_cap(
732                device,
733                "roxlap-gpu sprite_reg.occupancy",
734                &all_occ,
735                all_occ.len() as u32,
736            ),
737            colors: storage_dst_u32_cap(
738                device,
739                "roxlap-gpu sprite_reg.colors",
740                &all_colors,
741                cap_total,
742            ),
743            dirs: storage_dst_u32_cap(device, "roxlap-gpu sprite_reg.dirs", &all_dirs, cap_total),
744            color_offsets: storage_dst_u32_cap(
745                device,
746                "roxlap-gpu sprite_reg.color_offsets",
747                &all_offsets,
748                all_offsets.len() as u32,
749            ),
750            model_meta: storage_dst_pod(device, "roxlap-gpu sprite_reg.model_meta", &meta),
751            instances: instances_buf,
752            instance_capacity: cull.len() as u32,
753            colmul,
754            colmul_cap,
755            tile_ranges,
756            tile_ranges_cap: 1,
757            tile_instances,
758            tile_instances_cap: 1,
759            cull,
760            chains: registry.chains.clone(),
761            meta,
762            colors_alloc,
763            occ_lens,
764            coloff_lens,
765        }
766    }
767
768    /// Set the per-instance `kv6colmul[256]` lighting tables (voxlap's
769    /// `update_reflects` output), in the same order/length as the
770    /// instances passed to [`Self::upload`]. The next
771    /// [`Self::cull_bin_upload`] packs the visible subset to the GPU.
772    /// Instances beyond `tables.len()` keep their previous tables.
773    pub fn set_instance_colmul(&mut self, tables: &[[u64; 256]]) {
774        for (ci, t) in self.cull.iter_mut().zip(tables) {
775            ci.colmul.copy_from_slice(t);
776        }
777    }
778
779    /// Refresh instance poses in place from `instances` — for animated
780    /// sprites (e.g. KFA limbs re-posed each frame) — **without** any
781    /// model-volume re-upload. `instances` must match the set passed to
782    /// [`Self::upload`] in length + order; each keeps its `model_id`
783    /// (LOD chain) so only the transform + cull centre change. No GPU
784    /// write happens here: the next [`Self::cull_bin_upload`] re-uploads
785    /// the packed visible subset, as it already does every frame.
786    pub fn update_transforms(&mut self, instances: &[SpriteInstance]) {
787        debug_assert_eq!(
788            instances.len(),
789            self.cull.len(),
790            "update_transforms instance count must match upload"
791        );
792        for (ci, inst) in self.cull.iter_mut().zip(instances) {
793            ci.gpu.inv_rot0 = inst.transform.inv_rot[0];
794            ci.gpu.inv_rot1 = inst.transform.inv_rot[1];
795            ci.gpu.inv_rot2 = inst.transform.inv_rot[2];
796            ci.gpu.pos = inst.transform.pos;
797            // Bounding sphere follows the pivot; radius/chain unchanged.
798            ci.center = inst.transform.pos;
799        }
800    }
801
802    /// GPU.12 incremental — re-upload only the entries of LOD chain
803    /// `chain_id` after an in-place edit (carve / recolour) of its model,
804    /// **without** rebuilding the whole registry. `registry` must be the
805    /// same registry uploaded (same entry ids), with chain `chain_id`'s
806    /// entries already edited (`model_mut` + `rebuild_lod`).
807    ///
808    /// For each entry: occupancy + color_offsets are dims-fixed, so they
809    /// are written in place; colors + dirs (variable, parallel) go through
810    /// the suballocator — written in place when they fit the slack,
811    /// relocated (with a `model_meta` rewrite) when they outgrow it, and
812    /// only when the buffer tail overflows are colors/dirs grown + the
813    /// whole registry repacked. Instances / cull / colmul are untouched
814    /// (a carve never moves an instance or grows its bounds) — that is the
815    /// win over [`Self::upload`].
816    ///
817    /// # Panics (debug)
818    /// If an entry's dims changed (occupancy / color_offsets length), which
819    /// the in-place path can't absorb — growing dims needs a full
820    /// re-upload via [`Self::upload`].
821    pub fn update_model(
822        &mut self,
823        device: &wgpu::Device,
824        queue: &wgpu::Queue,
825        registry: &SpriteModelRegistry,
826        chain_id: u32,
827    ) {
828        let entries = self.chains[chain_id as usize].clone();
829        let mut grew = false;
830        for &e in &entries {
831            let e = e as usize;
832            let m = &registry.entries[e];
833
834            // Dims-fixed arrays: assert unchanged, then write in place.
835            debug_assert_eq!(
836                m.occupancy.len() as u32,
837                self.occ_lens[e],
838                "update_model: entry {e} occupancy length changed (dims grew?)"
839            );
840            debug_assert_eq!(
841                m.color_offsets.len() as u32,
842                self.coloff_lens[e],
843                "update_model: entry {e} color_offsets length changed (dims grew?)"
844            );
845            queue.write_buffer(
846                &self.occupancy,
847                u64::from(self.meta[e].occupancy_offset) * 4,
848                bytemuck::cast_slice(&m.occupancy),
849            );
850            queue.write_buffer(
851                &self.color_offsets,
852                u64::from(self.meta[e].color_offsets_offset) * 4,
853                bytemuck::cast_slice(&m.color_offsets),
854            );
855
856            // Variable colors/dirs via the suballocator.
857            let new_len = m.colors.len() as u32;
858            match self.colors_alloc.place(e, new_len) {
859                Some(off) => {
860                    queue.write_buffer(
861                        &self.colors,
862                        u64::from(off) * 4,
863                        bytemuck::cast_slice(&m.colors),
864                    );
865                    queue.write_buffer(
866                        &self.dirs,
867                        u64::from(off) * 4,
868                        bytemuck::cast_slice(&m.dirs),
869                    );
870                    if self.meta[e].colors_offset != off {
871                        // Relocated — rewrite this entry's meta record.
872                        self.meta[e].colors_offset = off;
873                        queue.write_buffer(
874                            &self.model_meta,
875                            (e * std::mem::size_of::<SpriteModelMeta>()) as u64,
876                            bytemuck::bytes_of(&self.meta[e]),
877                        );
878                    }
879                }
880                None => grew = true,
881            }
882        }
883
884        // Buffer overflow on at least one entry → grow colors/dirs and
885        // repack the WHOLE registry (rare; offsets for every entry move).
886        if grew {
887            self.grow_and_repack(device, queue, registry);
888        }
889    }
890
891    /// Grow the `colors`/`dirs` buffers and repack every entry compactly
892    /// (with fresh slack) when an [`Self::update_model`] edit overflowed
893    /// the buffer tail. Recreates both buffers (the next frame's bind
894    /// group picks up the new handles) and rewrites every `model_meta`
895    /// `colors_offset`. O(registry) but rare — logged so a growth burst
896    /// is visible.
897    fn grow_and_repack(
898        &mut self,
899        device: &wgpu::Device,
900        queue: &wgpu::Queue,
901        registry: &SpriteModelRegistry,
902    ) {
903        let new_lens: Vec<u32> = registry
904            .entries
905            .iter()
906            .map(|m| m.colors.len() as u32)
907            .collect();
908        self.colors_alloc.repack(&new_lens);
909        let cap_total = self.colors_alloc.cap_total();
910
911        let mut all_colors = vec![0u32; cap_total as usize];
912        let mut all_dirs = vec![0u32; cap_total as usize];
913        for (e, m) in registry.entries.iter().enumerate() {
914            let off = self.colors_alloc.slot(e).off as usize;
915            all_colors[off..off + m.colors.len()].copy_from_slice(&m.colors);
916            all_dirs[off..off + m.dirs.len()].copy_from_slice(&m.dirs);
917            self.meta[e].colors_offset = off as u32;
918        }
919        self.colors = storage_dst_u32_cap(
920            device,
921            "roxlap-gpu sprite_reg.colors",
922            &all_colors,
923            cap_total,
924        );
925        self.dirs = storage_dst_u32_cap(device, "roxlap-gpu sprite_reg.dirs", &all_dirs, cap_total);
926        // Every entry's colors_offset moved → rewrite the whole meta table.
927        queue.write_buffer(&self.model_meta, 0, bytemuck::cast_slice(&self.meta));
928        eprintln!("roxlap-gpu: sprite registry colors/dirs grew + repacked to {cap_total} words");
929    }
930
931    /// GPU.10.3 — frustum-cull, pack the visible subset into the
932    /// instance buffer, then bin those instances into screen tiles:
933    /// project each visible bounding sphere to a screen AABB and append
934    /// its (visible) index to every overlapped tile. Uploads the
935    /// instance buffer + `tile_ranges` (per-tile offset/count) +
936    /// `tile_instances` (flat grouped indices), growing the tile
937    /// buffers as needed. Returns `(visible_count, tiles_x, tiles_y)`.
938    #[allow(clippy::too_many_arguments)]
939    pub fn cull_bin_upload(
940        &mut self,
941        device: &wgpu::Device,
942        queue: &wgpu::Queue,
943        f: &ViewFrustum,
944        screen_w: u32,
945        screen_h: u32,
946        tile_size: u32,
947        lod_px: f32,
948    ) -> (u32, u32, u32) {
949        let tiles_x = screen_w.div_ceil(tile_size).max(1);
950        let tiles_y = screen_h.div_ceil(tile_size).max(1);
951        let n_tiles = (tiles_x * tiles_y) as usize;
952
953        let nw = (1.0 + f.half_w * f.half_w).sqrt();
954        let nh = (1.0 + f.half_h * f.half_h).sqrt();
955        let cx = screen_w as f32 * 0.5;
956        let cy = screen_h as f32 * 0.5;
957        let px_per_world = cx / f.half_w; // isotropic: == cy/half_h
958        let ts = tile_size as f32;
959        let tx_max = tiles_x as i32 - 1;
960        let ty_max = tiles_y as i32 - 1;
961
962        let mut visible: Vec<SpriteInstanceGpu> = Vec::with_capacity(self.cull.len());
963        // Per-visible tile AABB (tx0, tx1, ty0, ty1) for the bin pass.
964        let mut boxes: Vec<[i32; 4]> = Vec::with_capacity(self.cull.len());
965        // Per-visible kv6colmul tables, flattened to two u32 per u64
966        // entry (lanes 0|1, then 2|3), packed in visible order so the
967        // shader indexes `colmul[inst_idx*512 + dir*2 + {0,1}]`.
968        let mut visible_colmul: Vec<u32> = Vec::with_capacity(self.cull.len() * 512);
969        let mut counts = vec![0u32; n_tiles];
970
971        for ci in &self.cull {
972            let rel = [
973                ci.center[0] - f.pos[0],
974                ci.center[1] - f.pos[1],
975                ci.center[2] - f.pos[2],
976            ];
977            let z = dot3(rel, f.forward);
978            let r = ci.radius;
979            if z + r < 0.0 || z - r > f.far {
980                continue; // behind / beyond far
981            }
982            let x = dot3(rel, f.right);
983            if (x - f.half_w * z) > r * nw || (-x - f.half_w * z) > r * nw {
984                continue; // right / left
985            }
986            let y = dot3(rel, f.down);
987            if (y - f.half_h * z) > r * nh || (-y - f.half_h * z) > r * nh {
988                continue; // bottom / top
989            }
990
991            // Visible: project the sphere to a screen AABB → tile range.
992            let (tx0, tx1, ty0, ty1) = if z > 1e-3 {
993                let sx = cx + (x / z) * px_per_world;
994                let sy = cy + (y / z) * px_per_world;
995                let sr = (r / z) * px_per_world;
996                (
997                    (((sx - sr) / ts).floor() as i32).clamp(0, tx_max),
998                    (((sx + sr) / ts).floor() as i32).clamp(0, tx_max),
999                    (((sy - sr) / ts).floor() as i32).clamp(0, ty_max),
1000                    (((sy + sr) / ts).floor() as i32).clamp(0, ty_max),
1001                )
1002            } else {
1003                // Sphere crosses the camera plane — cover all tiles.
1004                (0, tx_max, 0, ty_max)
1005            };
1006            // GPU.10.4 — pick the LOD level by projected voxel size:
1007            // choose the coarsest level whose voxel still covers at
1008            // least `lod_px` screen pixels, i.e. step up once a mip-0
1009            // voxel would be smaller than that. `lod_px = 1` is the
1010            // natural "don't go sub-pixel" threshold; larger values
1011            // force LOD in closer (tuning/inspection).
1012            let chain = &self.chains[ci.chain_id as usize];
1013            let level = if z > 1e-3 && chain.len() > 1 {
1014                let voxel_px = px_per_world / z; // mip-0 voxel screen size
1015                ((lod_px / voxel_px).log2().ceil().max(0.0) as usize).min(chain.len() - 1)
1016            } else {
1017                0
1018            };
1019            let mut g = ci.gpu;
1020            g.model_id = chain[level];
1021            visible.push(g);
1022            boxes.push([tx0, tx1, ty0, ty1]);
1023            for &w in ci.colmul.iter() {
1024                visible_colmul.push((w & 0xffff_ffff) as u32);
1025                visible_colmul.push((w >> 32) as u32);
1026            }
1027            for ty in ty0..=ty1 {
1028                for tx in tx0..=tx1 {
1029                    counts[(ty * tiles_x as i32 + tx) as usize] += 1;
1030                }
1031            }
1032        }
1033
1034        if visible.is_empty() {
1035            return (0, tiles_x, tiles_y);
1036        }
1037
1038        // Prefix-sum counts → per-tile offsets; build the flat grouped
1039        // index list.
1040        let mut tile_ranges = vec![0u32; n_tiles * 2];
1041        let mut running = 0u32;
1042        for t in 0..n_tiles {
1043            tile_ranges[2 * t] = running; // offset
1044            tile_ranges[2 * t + 1] = counts[t]; // count
1045            running += counts[t];
1046        }
1047        let total = running as usize;
1048        let mut tile_instances = vec![0u32; total.max(1)];
1049        let mut cursor: Vec<u32> = (0..n_tiles).map(|t| tile_ranges[2 * t]).collect();
1050        for (vis_idx, b) in boxes.iter().enumerate() {
1051            for ty in b[2]..=b[3] {
1052                for tx in b[0]..=b[1] {
1053                    let t = (ty * tiles_x as i32 + tx) as usize;
1054                    tile_instances[cursor[t] as usize] = vis_idx as u32;
1055                    cursor[t] += 1;
1056                }
1057            }
1058        }
1059
1060        // Upload: instances + (grown) tile buffers. Grow a tile buffer
1061        // only when this frame needs more than its capacity (wgpu has
1062        // no Clone on Buffer, so we replace the field in place).
1063        queue.write_buffer(&self.instances, 0, bytemuck::cast_slice(&visible));
1064        let need_ranges = tile_ranges.len() as u32;
1065        if need_ranges > self.tile_ranges_cap {
1066            self.tile_ranges_cap = need_ranges.next_power_of_two();
1067            self.tile_ranges = storage_dst_u32(
1068                device,
1069                "roxlap-gpu sprite_reg.tile_ranges",
1070                self.tile_ranges_cap,
1071            );
1072        }
1073        let need_inst = tile_instances.len() as u32;
1074        if need_inst > self.tile_instances_cap {
1075            self.tile_instances_cap = need_inst.next_power_of_two();
1076            self.tile_instances = storage_dst_u32(
1077                device,
1078                "roxlap-gpu sprite_reg.tile_instances",
1079                self.tile_instances_cap,
1080            );
1081        }
1082        queue.write_buffer(&self.tile_ranges, 0, bytemuck::cast_slice(&tile_ranges));
1083        queue.write_buffer(
1084            &self.tile_instances,
1085            0,
1086            bytemuck::cast_slice(&tile_instances),
1087        );
1088        let need_colmul = visible_colmul.len() as u32;
1089        if need_colmul > self.colmul_cap {
1090            self.colmul_cap = need_colmul.next_power_of_two();
1091            self.colmul = storage_dst_u32(device, "roxlap-gpu sprite_reg.colmul", self.colmul_cap);
1092        }
1093        queue.write_buffer(&self.colmul, 0, bytemuck::cast_slice(&visible_colmul));
1094
1095        (visible.len() as u32, tiles_x, tiles_y)
1096    }
1097}
1098
1099/// GPU.12 incremental — per-entry placement of one model's `colors`
1100/// (and the parallel `dirs`) within the shared registry buffers: a
1101/// `[off, off+cap)` word window holding `len` live words. `cap >= len`
1102/// gives slack so a carve that *grows* the surface-voxel count can be
1103/// rewritten in place without relocating.
1104#[derive(Clone, Copy, Debug, PartialEq, Eq)]
1105struct ColorSlot {
1106    off: u32,
1107    cap: u32,
1108    len: u32,
1109}
1110
1111/// First-fit suballocator over the parallel `colors`/`dirs` buffers
1112/// (same offsets/ranks → one allocator drives both). Each registry
1113/// entry owns a [`ColorSlot`]; growth past a slot's `cap` relocates it
1114/// (freeing the old block) via the free list or a bump tail, and only
1115/// when the tail would exceed `cap_total` does the caller grow + repack
1116/// the whole buffer. Pure (no GPU) so it unit-tests on its own.
1117#[derive(Debug, Default)]
1118struct ColorsAllocator {
1119    /// Per-entry slot, indexed by entry id.
1120    slots: Vec<ColorSlot>,
1121    /// Freed `(off, cap)` blocks available for first-fit reuse.
1122    free: Vec<(u32, u32)>,
1123    /// Next bump-allocation position (words).
1124    tail: u32,
1125    /// Total buffer capacity in words.
1126    cap_total: u32,
1127}
1128
1129/// Slack-padded capacity for a `len`-word array: +25% + 16 words, so a
1130/// few extra surface voxels from a carve fit without relocating.
1131fn slot_cap(len: u32) -> u32 {
1132    len + len / 4 + 16
1133}
1134
1135impl ColorsAllocator {
1136    /// Lay every entry out contiguously (with per-slot slack) and add a
1137    /// global tail headroom so early growth bump-allocates rather than
1138    /// repacks.
1139    fn new(entry_lens: &[u32]) -> Self {
1140        let mut a = Self::default();
1141        a.repack(entry_lens);
1142        a
1143    }
1144
1145    fn slot(&self, entry: usize) -> ColorSlot {
1146        self.slots[entry]
1147    }
1148
1149    fn cap_total(&self) -> u32 {
1150        self.cap_total
1151    }
1152
1153    /// Repack ALL entries compactly to fit `new_lens`, resetting the
1154    /// free list + tail and choosing a fresh `cap_total` with headroom.
1155    /// Used at initial build and on a buffer grow.
1156    fn repack(&mut self, new_lens: &[u32]) {
1157        self.free.clear();
1158        let mut off = 0u32;
1159        let mut slots = Vec::with_capacity(new_lens.len());
1160        for &len in new_lens {
1161            let cap = slot_cap(len);
1162            slots.push(ColorSlot { off, cap, len });
1163            off += cap;
1164        }
1165        self.slots = slots;
1166        self.tail = off;
1167        // Global headroom: +50% + 256 words.
1168        self.cap_total = off + off / 2 + 256;
1169    }
1170
1171    /// Place `new_len` words for `entry`. Returns `Some(off)` with the
1172    /// (possibly relocated) slot offset, or `None` if the buffer must
1173    /// grow + repack. On relocation the old block is pushed to the free
1174    /// list; an in-place fit returns the unchanged offset.
1175    fn place(&mut self, entry: usize, new_len: u32) -> Option<u32> {
1176        let cur = self.slots[entry];
1177        if new_len <= cur.cap {
1178            self.slots[entry] = ColorSlot {
1179                len: new_len,
1180                ..cur
1181            };
1182            return Some(cur.off);
1183        }
1184        let old = (cur.off, cur.cap);
1185        // First-fit a freed block big enough for the live data.
1186        if let Some(i) = self.free.iter().position(|&(_, c)| c >= new_len) {
1187            let (off, cap) = self.free.remove(i);
1188            self.free.push(old);
1189            self.slots[entry] = ColorSlot {
1190                off,
1191                cap,
1192                len: new_len,
1193            };
1194            return Some(off);
1195        }
1196        // Bump the tail if there's room.
1197        let want = slot_cap(new_len);
1198        if self.tail + want <= self.cap_total {
1199            let off = self.tail;
1200            self.tail += want;
1201            self.free.push(old);
1202            self.slots[entry] = ColorSlot {
1203                off,
1204                cap: want,
1205                len: new_len,
1206            };
1207            return Some(off);
1208        }
1209        None
1210    }
1211}
1212
1213/// Create a STORAGE buffer of u32s; pads empty input (wgpu rejects
1214/// zero-sized storage bindings).
1215#[allow(dead_code)]
1216fn storage_u32(device: &wgpu::Device, label: &str, data: &[u32]) -> wgpu::Buffer {
1217    use wgpu::util::DeviceExt;
1218    let bytes: &[u8] = if data.is_empty() {
1219        bytemuck::cast_slice(&[0u32])
1220    } else {
1221        bytemuck::cast_slice(data)
1222    };
1223    device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
1224        label: Some(label),
1225        contents: bytes,
1226        usage: wgpu::BufferUsages::STORAGE,
1227    })
1228}
1229
1230/// Create an uninitialised `STORAGE | COPY_DST` `u32` buffer of `cap`
1231/// words (≥1). Written each frame via `queue.write_buffer`.
1232fn storage_dst_u32(device: &wgpu::Device, label: &str, cap: u32) -> wgpu::Buffer {
1233    device.create_buffer(&wgpu::BufferDescriptor {
1234        label: Some(label),
1235        size: u64::from(cap.max(1)) * 4,
1236        usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
1237        mapped_at_creation: false,
1238    })
1239}
1240
1241/// Create a `STORAGE | COPY_DST` `u32` buffer of `cap` words (≥ data
1242/// length, ≥ 1), initialised with `data` at offset 0 and the tail left
1243/// zeroed. Unlike [`storage_u32`] (STORAGE-only, exact-size) this both
1244/// reserves spare capacity and is `COPY_DST`, so the incremental
1245/// [`SpriteRegistryResident::update_model`] can `write_buffer` a growing
1246/// `colors`/`dirs` array in place. Filled via `mapped_at_creation` so no
1247/// queue is needed at upload time.
1248fn storage_dst_u32_cap(device: &wgpu::Device, label: &str, data: &[u32], cap: u32) -> wgpu::Buffer {
1249    let cap = cap.max(data.len() as u32).max(1);
1250    let buf = device.create_buffer(&wgpu::BufferDescriptor {
1251        label: Some(label),
1252        size: u64::from(cap) * 4,
1253        usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
1254        mapped_at_creation: true,
1255    });
1256    if !data.is_empty() {
1257        buf.slice(..(data.len() as u64 * 4))
1258            .get_mapped_range_mut()
1259            .copy_from_slice(bytemuck::cast_slice(data));
1260    }
1261    buf.unmap();
1262    buf
1263}
1264
1265/// Create a `STORAGE | COPY_DST` buffer of Pod records, exact-size
1266/// (≥ 1, zero-padded), so individual records can be rewritten in place
1267/// by [`SpriteRegistryResident::update_model`] on a relocation. The
1268/// record *count* never changes on an incremental edit (no model is
1269/// added/removed), so no slack is needed here.
1270fn storage_dst_pod<T: Pod + Zeroable>(
1271    device: &wgpu::Device,
1272    label: &str,
1273    data: &[T],
1274) -> wgpu::Buffer {
1275    let one = [T::zeroed()];
1276    let src: &[T] = if data.is_empty() { &one } else { data };
1277    let buf = device.create_buffer(&wgpu::BufferDescriptor {
1278        label: Some(label),
1279        size: std::mem::size_of_val(src) as u64,
1280        usage: wgpu::BufferUsages::STORAGE | wgpu::BufferUsages::COPY_DST,
1281        mapped_at_creation: true,
1282    });
1283    buf.slice(..)
1284        .get_mapped_range_mut()
1285        .copy_from_slice(bytemuck::cast_slice(src));
1286    buf.unmap();
1287    buf
1288}
1289
1290/// Create a STORAGE buffer of Pod records; pads empty input with one
1291/// zeroed `T`.
1292#[allow(dead_code)]
1293fn storage_pod<T: Pod + Zeroable>(device: &wgpu::Device, label: &str, data: &[T]) -> wgpu::Buffer {
1294    use wgpu::util::DeviceExt;
1295    let one = [T::zeroed()];
1296    let src: &[T] = if data.is_empty() { &one } else { data };
1297    device.create_buffer_init(&wgpu::util::BufferInitDescriptor {
1298        label: Some(label),
1299        contents: bytemuck::cast_slice(src),
1300        usage: wgpu::BufferUsages::STORAGE,
1301    })
1302}
1303
1304#[cfg(test)]
1305mod tests {
1306    use super::*;
1307    use roxlap_formats::kv6::{Kv6, Voxel};
1308
1309    /// 2×1 kv6: column (0,0) has voxels at z=5 (red) and z=1 (green)
1310    /// stored OUT of z-order; column (1,0) has one voxel at z=3.
1311    fn kv6_unsorted() -> Kv6 {
1312        let mk = |z, col| Voxel {
1313            col,
1314            z,
1315            vis: 0,
1316            dir: 0,
1317        };
1318        Kv6 {
1319            xsiz: 2,
1320            ysiz: 1,
1321            zsiz: 8,
1322            xpiv: 0.0,
1323            ypiv: 0.0,
1324            zpiv: 0.0,
1325            voxels: vec![mk(5, 0xAA), mk(1, 0xBB), mk(3, 0xCC)],
1326            xlen: vec![2, 1],
1327            ylen: vec![vec![2], vec![1]],
1328            palette: None,
1329        }
1330    }
1331
1332    #[test]
1333    fn occupancy_bits_set_at_voxel_z() {
1334        let m = build_sprite_model(&kv6_unsorted());
1335        assert_eq!(m.dims, [2, 1, 8]);
1336        assert_eq!(m.occ_words_per_col, 1); // ceil(8/32)
1337                                            // col 0: bits 1 and 5; col 1: bit 3.
1338        assert_eq!(m.occupancy[0], (1 << 1) | (1 << 5));
1339        assert_eq!(m.occupancy[1], 1 << 3);
1340    }
1341
1342    #[test]
1343    fn colors_are_ascending_z_for_rank_lookup() {
1344        let m = build_sprite_model(&kv6_unsorted());
1345        // col 0 sorted ascending z ⇒ z=1 (green 0xBB) before z=5 (0xAA).
1346        assert_eq!(m.color_offsets, vec![0, 2, 3]);
1347        assert_eq!(&m.colors, &[0xBB, 0xAA, 0xCC]);
1348    }
1349
1350    #[test]
1351    fn identity_basis_inverts_to_identity() {
1352        let inv = mat3_inverse([[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]);
1353        assert_eq!(inv, [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]]);
1354    }
1355
1356    #[test]
1357    fn fork_is_independent_of_parent() {
1358        let mut reg = SpriteModelRegistry::new();
1359        let base = reg.add(build_sprite_model(&kv6_unsorted()));
1360        let forked = reg.fork(base);
1361        assert_ne!(base, forked);
1362        // Recolour only the fork.
1363        reg.model_mut(forked).recolor(|_| 0x11);
1364        // Parent colours untouched; fork fully overwritten.
1365        assert_eq!(&reg.model(base).colors, &[0xBB, 0xAA, 0xCC]);
1366        assert_eq!(&reg.model(forked).colors, &[0x11, 0x11, 0x11]);
1367    }
1368
1369    #[test]
1370    fn registry_gpu_structs_have_expected_sizes() {
1371        assert_eq!(std::mem::size_of::<SpriteModelMeta>(), 48);
1372        assert_eq!(std::mem::size_of::<SpriteInstanceGpu>(), 64);
1373    }
1374
1375    #[test]
1376    fn add_lod_builds_halving_mip_chain() {
1377        let mut reg = SpriteModelRegistry::new();
1378        // 8×8×8 single voxel-filled column model would be ideal, but
1379        // kv6_unsorted is 2×1×8 → mips: 2×1×8 → 1×1×4 → 1×1×2 → 1×1×1.
1380        let id = reg.add_lod(build_sprite_model(&kv6_unsorted()), 4);
1381        let m0 = reg.model(id);
1382        assert_eq!(m0.dims, [2, 1, 8]);
1383        assert!((m0.voxel_world_size - 1.0).abs() < 1e-6);
1384    }
1385
1386    /// kv6 from explicit voxels, ordered x-major/y-inner to match
1387    /// `build_sprite_model`'s column walk.
1388    fn kv6_from(xsiz: u32, ysiz: u32, zsiz: u32, voxels: &[(u32, u32, u16, u32)]) -> Kv6 {
1389        let mut ylen = vec![vec![0u16; ysiz as usize]; xsiz as usize];
1390        let mut flat = Vec::new();
1391        for x in 0..xsiz {
1392            for y in 0..ysiz {
1393                let mut col: Vec<(u16, u32)> = voxels
1394                    .iter()
1395                    .filter(|(vx, vy, _, _)| *vx == x && *vy == y)
1396                    .map(|(_, _, z, c)| (*z, *c))
1397                    .collect();
1398                col.sort_by_key(|(z, _)| *z);
1399                ylen[x as usize][y as usize] = col.len() as u16;
1400                for (z, c) in col {
1401                    flat.push(Voxel {
1402                        col: c,
1403                        z,
1404                        vis: 0,
1405                        dir: 0,
1406                    });
1407                }
1408            }
1409        }
1410        let xlen = ylen
1411            .iter()
1412            .map(|c| c.iter().map(|&v| u32::from(v)).sum())
1413            .collect();
1414        Kv6 {
1415            xsiz,
1416            ysiz,
1417            zsiz,
1418            xpiv: 0.0,
1419            ypiv: 0.0,
1420            zpiv: 0.0,
1421            voxels: flat,
1422            xlen,
1423            ylen,
1424            palette: None,
1425        }
1426    }
1427
1428    fn offsets_consistent(m: &SpriteModel) -> bool {
1429        let cols = (m.dims[0] * m.dims[1]) as usize;
1430        if m.color_offsets.len() != cols + 1 {
1431            return false;
1432        }
1433        // Monotonic non-decreasing + last == colors.len + each column's
1434        // span == its solid-voxel count.
1435        for w in m.color_offsets.windows(2) {
1436            if w[1] < w[0] {
1437                return false;
1438            }
1439        }
1440        m.color_offsets[cols] as usize == m.colors.len()
1441    }
1442
1443    #[test]
1444    fn carve_two_layers_keeps_offsets_consistent() {
1445        // Mirror the demo's carve: columns with voxels at varied z,
1446        // some sharing z=0/z=1, some not.
1447        let kv6 = kv6_from(
1448            3,
1449            2,
1450            8,
1451            &[
1452                (0, 0, 0, 0xA0),
1453                (0, 0, 1, 0xA1),
1454                (0, 0, 5, 0xA5),
1455                (1, 0, 1, 0xB1),
1456                (2, 1, 0, 0xC0),
1457                (2, 1, 3, 0xC3),
1458            ],
1459        );
1460        let mut m = build_sprite_model(&kv6);
1461        assert!(offsets_consistent(&m));
1462        for z in 0..2u32 {
1463            for y in 0..m.dims[1] {
1464                for x in 0..m.dims[0] {
1465                    m.set_voxel(x, y, z, None);
1466                }
1467            }
1468            assert!(offsets_consistent(&m), "inconsistent after carving z={z}");
1469            // downsample must not panic on the carved model.
1470            let _ = m.downsample();
1471        }
1472    }
1473
1474    #[test]
1475    fn set_voxel_inserts_replaces_and_clears() {
1476        // col 0 starts with z=1 (0xBB), z=5 (0xAA); col 1 with z=3 (0xCC).
1477        let mut m = build_sprite_model(&kv6_unsorted());
1478
1479        // Insert z=3 into col 0 (between z=1 and z=5) → rank 1.
1480        assert!(m.set_voxel(0, 0, 3, Some(0x55)));
1481        assert_eq!(m.occupancy[0], (1 << 1) | (1 << 3) | (1 << 5));
1482        // col 0 colours ascending z: 0xBB(z1), 0x55(z3), 0xAA(z5).
1483        assert_eq!(m.color_offsets, vec![0, 3, 4]);
1484        assert_eq!(&m.colors, &[0xBB, 0x55, 0xAA, 0xCC]);
1485
1486        // Replace z=3 in place (no offset shift).
1487        assert!(m.set_voxel(0, 0, 3, Some(0x66)));
1488        assert_eq!(&m.colors, &[0xBB, 0x66, 0xAA, 0xCC]);
1489        assert_eq!(m.color_offsets, vec![0, 3, 4]);
1490
1491        // Clear z=1 (rank 0) from col 0.
1492        assert!(m.set_voxel(0, 0, 1, None));
1493        assert_eq!(m.occupancy[0], (1 << 3) | (1 << 5));
1494        assert_eq!(m.color_offsets, vec![0, 2, 3]);
1495        assert_eq!(&m.colors, &[0x66, 0xAA, 0xCC]);
1496
1497        // No-ops: clear an empty voxel, edit out of bounds.
1498        assert!(!m.set_voxel(0, 0, 2, None));
1499        assert!(!m.set_voxel(9, 0, 0, Some(1)));
1500    }
1501
1502    #[test]
1503    fn rebuild_lod_refreshes_coarse_levels_from_mip0() {
1504        let mut reg = SpriteModelRegistry::new();
1505        let id = reg.add_lod(build_sprite_model(&kv6_unsorted()), 3);
1506        // Recolour mip-0 only via model_mut, then rebuild the ladder.
1507        reg.model_mut(id).recolor(|_| 0x0000_2000);
1508        reg.rebuild_lod(id);
1509        // The mip-1 average of all-0x2000 voxels is still 0x2000.
1510        let lvl1_entry = reg.chains[id as usize][1] as usize;
1511        assert!(reg.entries[lvl1_entry]
1512            .colors
1513            .iter()
1514            .all(|&c| c == 0x0000_2000));
1515    }
1516
1517    // ---- GPU.12 incremental: colors/dirs suballocator -----------------
1518
1519    /// Every slot fits its data, has slack, doesn't overlap the next, and
1520    /// the buffer reserves tail headroom past the last slot.
1521    fn alloc_invariants(a: &ColorsAllocator, lens: &[u32]) {
1522        let mut prev_end = 0u32;
1523        for (e, &len) in lens.iter().enumerate() {
1524            let s = a.slot(e);
1525            assert_eq!(s.len, len, "slot {e} len");
1526            assert!(s.cap >= s.len, "slot {e} cap >= len");
1527            // In a freshly repacked layout slots are in entry order.
1528            assert!(s.off >= prev_end, "slot {e} overlaps previous");
1529            assert!(s.off + s.cap <= a.cap_total(), "slot {e} past cap_total");
1530            prev_end = s.off + s.cap;
1531        }
1532        assert!(a.cap_total() >= prev_end, "tail headroom");
1533    }
1534
1535    #[test]
1536    fn allocator_new_lays_out_with_slack_and_headroom() {
1537        let lens = [10u32, 0, 64, 7];
1538        let a = ColorsAllocator::new(&lens);
1539        alloc_invariants(&a, &lens);
1540        // Slack: a 64-word slot has cap > 64 so a small carve-grow fits.
1541        assert!(a.slot(2).cap > 64);
1542        // Headroom past the bump tail for early growth.
1543        assert!(a.cap_total() > a.slot(3).off + a.slot(3).cap);
1544    }
1545
1546    #[test]
1547    fn allocator_place_in_place_when_within_cap() {
1548        let mut a = ColorsAllocator::new(&[10, 20]);
1549        let off0 = a.slot(0).off;
1550        let cap0 = a.slot(0).cap;
1551        // Shrink: still the same slot.
1552        assert_eq!(a.place(0, 5), Some(off0));
1553        assert_eq!(a.slot(0).len, 5);
1554        assert_eq!(a.slot(0).cap, cap0);
1555        // Grow within slack: same offset, no relocation.
1556        assert_eq!(a.place(0, cap0), Some(off0));
1557        assert_eq!(a.slot(0).off, off0);
1558        assert!(a.free.is_empty(), "no relocation should free anything");
1559    }
1560
1561    #[test]
1562    fn allocator_place_relocates_to_tail_and_frees_old() {
1563        let mut a = ColorsAllocator::new(&[10, 20]);
1564        let old0 = (a.slot(0).off, a.slot(0).cap);
1565        let tail_before = a.tail;
1566        // Overgrow entry 0 past its cap → relocate to the bump tail.
1567        let new_len = a.slot(0).cap + 5;
1568        let off = a.place(0, new_len).expect("fits in headroom");
1569        assert_eq!(off, tail_before, "relocated to old tail");
1570        assert_eq!(a.slot(0).off, off);
1571        assert_eq!(a.slot(0).len, new_len);
1572        assert!(a.free.contains(&old0), "old slot freed");
1573    }
1574
1575    #[test]
1576    fn allocator_reuses_freed_block_first_fit() {
1577        // Entry 0 has a large slot; entry 1 a tiny one, so growing 1 must
1578        // relocate (it can't fit in place) and lands in 0's freed block.
1579        let mut a = ColorsAllocator::new(&[10, 2]);
1580        let old0 = (a.slot(0).off, a.slot(0).cap);
1581        // Relocate entry 0 to the tail, freeing its original block.
1582        let _ = a.place(0, a.slot(0).cap + 5).unwrap();
1583        assert!(a.free.contains(&old0));
1584        // Grow entry 1 past its (tiny) cap but ≤ the freed block's cap →
1585        // first-fit reuses that block rather than bumping the tail.
1586        let new1 = a.slot(1).cap + 1;
1587        assert!(new1 <= old0.1, "freed block big enough");
1588        let off = a.place(1, new1).expect("reuses freed block");
1589        assert_eq!(off, old0.0, "first-fit reused the freed slot offset");
1590        assert!(!a.free.contains(&old0), "freed block consumed");
1591    }
1592
1593    #[test]
1594    fn allocator_signals_grow_then_repack_restores() {
1595        let mut a = ColorsAllocator::new(&[8, 8]);
1596        // Force overflow: ask for far more than cap_total.
1597        let huge = a.cap_total() + 100;
1598        assert_eq!(a.place(0, huge), None, "overflow must signal grow");
1599        // Repack with the new lengths compacts + grows the buffer.
1600        a.repack(&[huge, 8]);
1601        alloc_invariants(&a, &[huge, 8]);
1602        assert!(a.cap_total() > huge);
1603        // After repack the entry now fits in place.
1604        assert_eq!(a.place(0, huge), Some(a.slot(0).off));
1605    }
1606
1607    /// Drive the allocator like a real carve loop (mirroring
1608    /// `update_model`): one model's colour count drifts up and down
1609    /// across many edits while two neighbours stay put. Growth is
1610    /// absorbed in place / via the free list / by the bump tail, and on
1611    /// the rare overflow we repack (as `update_model` does). After every
1612    /// edit the live `[off, off+len)` windows must stay disjoint.
1613    #[test]
1614    fn allocator_carve_loop_keeps_live_windows_disjoint() {
1615        let mut a = ColorsAllocator::new(&[40, 12, 40]);
1616        let mut lens = [40u32, 12, 40];
1617        // A deterministic up/down walk of entry 1's length, incl. a jump
1618        // that forces at least one grow+repack.
1619        let walk = [13u32, 30, 60, 18, 9, 80, 80, 25, 200, 7];
1620        let mut grew = false;
1621        for &len in &walk {
1622            lens[1] = len;
1623            // Entry 1 re-placed; on overflow, repack the whole set.
1624            if a.place(1, len).is_none() {
1625                grew = true;
1626                a.repack(&lens);
1627            } else {
1628                // Neighbours fit in place every time.
1629                assert_eq!(a.place(0, 40), Some(a.slot(0).off));
1630                assert_eq!(a.place(2, 40), Some(a.slot(2).off));
1631            }
1632            assert_eq!(a.slot(1).len, len);
1633
1634            // No two entries' live windows overlap.
1635            let mut wins: Vec<(u32, u32)> =
1636                (0..3).map(|e| (a.slot(e).off, a.slot(e).len)).collect();
1637            wins.sort_by_key(|w| w.0);
1638            for pair in wins.windows(2) {
1639                let (o0, l0) = pair[0];
1640                let (o1, _) = pair[1];
1641                assert!(o0 + l0 <= o1, "live windows overlap: {pair:?}");
1642            }
1643        }
1644        assert!(grew, "the 200-word jump should have forced a repack");
1645    }
1646}
roxlap_gpu/sprite_model.rs

roxlap_gpu/
sprite_model.rs