Skip to main content

roxlap_core/
world_lighting.rs

1//! Voxlap's world-voxel lighting bake (`updatelighting`,
2//! voxlap5.c:10539).
3//!
4//! Walks every visible voxel inside a 3D bounding box and rewrites
5//! its alpha byte (the per-voxel "brightness" channel that the
6//! rendering path mulhi'es against `kv6colmul`-style modulators)
7//! based on the engine's current `LightSrc` set + lightmode.
8//!
9//! Two modes:
10//! - `lightmode == 1`: cheap directional bake — every voxel gets
11//!   shading from a single hardcoded sun direction
12//!   `(tp.y * 0.5 + tp.z) * 64 + 103.5` clamped to `[0, 255]`.
13//! - `lightmode == 2`: per-light Lambertian bake — for each light
14//!   in range, subtract `g * h * sc` where `g = 1/(d·d²) -
15//!   1/(r·r²)` (cube-falloff with hard cutoff at radius `r`),
16//!   `h = surface_normal · light_delta` (negative ⇒ face front-
17//!   lit, contributes; positive ⇒ self-shadowed, skipped). Result
18//!   subtracts from a base `(tp.y * 0.5 + tp.z) * 16 + 47.5`.
19//!
20//! The surface normal `tp` for each voxel comes from `estnorm` —
21//! a 5×5×5 voxel-solid neighbourhood vote (`ESTNORMRAD == 2` in
22//! voxlap, the production path).
23
24#![allow(
25    clippy::cast_possible_truncation,
26    clippy::cast_possible_wrap,
27    clippy::cast_sign_loss,
28    clippy::cast_precision_loss,
29    clippy::similar_names,
30    clippy::too_many_arguments,
31    clippy::too_many_lines,
32    clippy::doc_markdown,
33    clippy::many_single_char_names,
34    clippy::must_use_candidate,
35    clippy::unnecessary_cast,
36    clippy::cast_lossless,
37    clippy::needless_bool_assign,
38    clippy::needless_range_loop,
39    clippy::no_effect,
40    clippy::identity_op,
41    clippy::if_not_else
42)]
43
44use rayon::prelude::*;
45
46use crate::engine::LightSrc;
47
48/// Voxlap's `MAXZDIM` (`voxlap5.c`). World z runs `0..MAXZDIM`.
49pub(crate) const MAXZDIM: i32 = 256;
50
51/// Voxlap's `ESTNORMRAD == 2` cache window radius. The estnorm
52/// neighbourhood is `(2*RAD+1)³ = 5×5×5` voxels.
53pub(crate) const ESTNORMRAD: i32 = 2;
54
55/// Per-byte popcount table. Voxlap's `bitnum[32]` (voxlap5.c:1477)
56/// — number of set bits in the low 5 bits of each index. Used by
57/// estnorm's neighbourhood-vote reduction.
58pub(crate) const BITNUM: [i8; 32] = [
59    0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
60];
61
62/// Per-byte signed-symmetric popcount. Voxlap's `bitsnum[32]`
63/// (voxlap5.c:1487) — packs `popcount` into the low i16 lane and
64/// `popcount - 2·popcount_negative_axis` into the high i16 lane.
65/// The exact derivation is in voxlap's comment block; values
66/// reproduced verbatim.
67#[rustfmt::skip]
68pub(crate) const BITSNUM: [i32; 32] = [
69    0,           1 - (2 << 16), 1 - (1 << 16), 2 - (3 << 16),
70    1,           2 - (2 << 16), 2 - (1 << 16), 3 - (3 << 16),
71    1 + (1 << 16), 2 - (1 << 16), 2,           3 - (2 << 16),
72    2 + (1 << 16), 3 - (1 << 16), 3,           4 - (2 << 16),
73    1 + (2 << 16), 2,           2 + (1 << 16), 3 - (1 << 16),
74    2 + (2 << 16), 3,           3 + (1 << 16), 4 - (1 << 16),
75    2 + (3 << 16), 3 + (1 << 16), 3 + (2 << 16), 4,
76    3 + (3 << 16), 4 + (1 << 16), 4 + (2 << 16), 5,
77];
78
79/// `xbsflor[k] = -1i32 << k` — bits `k..31` set, low `k` bits
80/// clear. Used by `expandbit256` to splat air→solid transitions
81/// onto a partial 32-bit word.
82pub(crate) const fn xbsflor(k: usize) -> u32 {
83    if k >= 32 {
84        0
85    } else {
86        (-1i32 << k) as u32
87    }
88}
89
90/// `xbsceil[k] = ~xbsflor[k]` — low `k` bits set. Solid→air
91/// transitions.
92pub(crate) const fn xbsceil(k: usize) -> u32 {
93    !xbsflor(k)
94}
95
96/// `expandbit256` — slab structure → 256-bit "voxel solid" bit
97/// array (low-bit-first, low-z-first). Mirror of voxlap5.c:1059.
98///
99/// The output `bits` is a `[u32; 8]` (= 256 bits = `MAXZDIM` z
100/// levels). Bit `z` is set iff voxel at column `(x, y)`, depth `z`
101/// is solid (= part of any slab body, including hidden interiors
102/// between slabs).
103///
104/// Walks the slab linked list, alternating between `v[1]`
105/// (air→solid transition at top of slab) and `v[3]` (solid→air
106/// transition at bottom of next slab). Each transition flushes
107/// pending whole-words (full air `0` or full solid `-1`) until
108/// it lands inside the partial word containing the transition,
109/// then OR/ANDs the partial mask via `xbsflor` / `xbsceil`.
110pub(crate) fn expandbit256(column: &[u8], bits: &mut [u32; 8]) {
111    let mut src_idx: usize = 0;
112    let mut dst_idx: usize = 0;
113    let mut bitpos: i32 = 32;
114    let mut word: u32 = 0;
115    let nbits: i32 = (bits.len() as i32) * 32;
116
117    // First iteration: jump straight to the v[1] transition (no
118    // preceding slab whose v[3] we'd need to flush).
119    let mut next_len: i32;
120    let mut delta: i32;
121    let mut go_to_v3 = false;
122
123    'outer: loop {
124        if go_to_v3 {
125            // v[3] : solid → air transition.
126            if src_idx + 3 >= column.len() {
127                break;
128            }
129            delta = i32::from(column[src_idx + 3]) - bitpos;
130            while delta >= 0 {
131                if dst_idx >= bits.len() {
132                    break 'outer;
133                }
134                bits[dst_idx] = word;
135                dst_idx += 1;
136                word = u32::MAX;
137                bitpos += 32;
138                delta -= 32;
139            }
140            word &= xbsceil((delta + 32) as usize);
141        }
142        go_to_v3 = true;
143
144        // v[1] : air → solid transition.
145        if src_idx + 1 >= column.len() {
146            break;
147        }
148        delta = i32::from(column[src_idx + 1]) - bitpos;
149        while delta >= 0 {
150            if dst_idx >= bits.len() {
151                break 'outer;
152            }
153            bits[dst_idx] = word;
154            dst_idx += 1;
155            word = 0;
156            bitpos += 32;
157            delta -= 32;
158        }
159        word |= xbsflor((delta + 32) as usize);
160
161        next_len = i32::from(column[src_idx]);
162        if next_len == 0 {
163            break;
164        }
165        src_idx += (next_len as usize) * 4;
166    }
167
168    // Pad the rest of the buffer with `word`'s tail value (in C the
169    // post-loop word is whatever the last `v[1]` partial-set
170    // produced; remaining whole-words flush as solid `-1`).
171    if bitpos <= nbits {
172        while dst_idx < bits.len() {
173            bits[dst_idx] = word;
174            dst_idx += 1;
175            word = u32::MAX;
176        }
177    }
178}
179
180/// Pre-built `expandbit256` grid covering a 2D bounding region —
181/// `(x1 - x0 + 2*RAD) × (y1 - y0 + 2*RAD)` columns. Trades 32
182/// bytes per column of memory for O(1) bit-window lookups during
183/// the estnorm 5×5 neighbourhood vote.
184///
185/// This is the conceptual equivalent of voxlap's `xbsbuf` cache —
186/// just batch-pre-built rather than rotated row-by-row through
187/// the bake. Memory cost stays manageable: a 448×448 bake (the
188/// `diag_down_lit` oracle scope, which extends to 452×452 with
189/// padding) needs about 6.4 MB.
190#[allow(dead_code)] // vsid field/method preserved for voxlap-parity inspection
191pub struct EstNormCache {
192    /// Per-column bit arrays. `bits[(yidx) * width + (xidx)]` is
193    /// the slab bit-mask of column `(origin_x + xidx, origin_y +
194    /// yidx)`. `xidx ∈ 0..width`, mapping abs-x into
195    /// `[origin_x - RAD, origin_x + (x1 - x0) - 1 + RAD]`.
196    bits: Vec<[u32; 8]>,
197    /// Top-left of the cache window in world coords (= original
198    /// `x0 - RAD`).
199    origin_x: i32,
200    origin_y: i32,
201    /// Cached-region width (= `x1 - x0 + 2 * RAD`).
202    width: usize,
203    /// Reserved for symmetric debugging — kept so the cache layout
204    /// can be inspected without recomputing from `bits.len()`.
205    #[allow(dead_code)]
206    height: usize,
207    /// Inverse-square-root LUT — `fsqrecip[k] = 1 / sqrt(k)` for
208    /// `k ∈ 0..=5859`. Voxlap's `fsqrecip` table; same precision
209    /// as the C build (no Newton refinement for k > 22).
210    fsqrecip: Vec<f32>,
211    /// Voxel-grid limit (= `vsid`) used for out-of-bounds clamps.
212    vsid: i32,
213}
214
215/// Voxlap's `fsqrecip[5860]` table init (voxlap5.c:12240-12256).
216/// Mirror of the C calculation including the asymmetric Newton-
217/// refinement schedule for indices ≤ 22.
218fn build_fsqrecip() -> Vec<f32> {
219    const N: usize = 5860;
220    let mut t = vec![0.0_f32; N];
221    t[0] = 0.0;
222    t[1] = 1.0;
223    t[2] = (1.0_f32 / 2.0_f32.sqrt()) as f32;
224    t[3] = 1.0 / 3.0_f32.sqrt();
225    let mut i = 3usize;
226    let mut z = 4usize;
227    while z < N {
228        if z + 5 >= N {
229            // Safety stop — cycle increment by 6 may overshoot.
230            break;
231        }
232        t[z] = t[z >> 1] * t[2];
233        t[z + 2] = t[(z + 2) >> 1] * t[2];
234        t[z + 4] = t[(z + 4) >> 1] * t[2];
235        t[z + 5] = t[i] * t[3];
236        i += 2;
237
238        let mut f = (t[z] + t[z + 2]) * 0.5_f32;
239        if z <= 22 {
240            f = (1.5 - 0.5 * ((z + 1) as f32) * f * f) * f;
241        }
242        t[z + 1] = (1.5 - 0.5 * ((z + 1) as f32) * f * f) * f;
243
244        let mut f = (t[z + 2] + t[z + 4]) * 0.5_f32;
245        if z <= 22 {
246            f = (1.5 - 0.5 * ((z + 3) as f32) * f * f) * f;
247        }
248        t[z + 3] = (1.5 - 0.5 * ((z + 3) as f32) * f * f) * f;
249
250        z += 6;
251    }
252    t
253}
254
255impl EstNormCache {
256    /// Build the bit-grid cache covering the bounding region
257    /// `[x0..x1) × [y0..y1)` extended by `ESTNORMRAD` padding on
258    /// each side. Calling [`Self::estnorm`] for any `(x, y)` inside
259    /// the original `[x0..x1) × [y0..y1)` box is then a pure read.
260    ///
261    /// Wraps [`Self::build_with_reader`] with a flat-table closure.
262    #[must_use]
263    pub fn build(
264        world_data: &[u8],
265        column_offsets: &[u32],
266        vsid: u32,
267        x0: i32,
268        y0: i32,
269        x1: i32,
270        y1: i32,
271    ) -> Self {
272        let vsid_i = vsid as i32;
273        let reader = |x: i32, y: i32| -> Option<&[u8]> {
274            if (x | y) < 0 || x >= vsid_i || y >= vsid_i {
275                return None;
276            }
277            let col_idx = (y as u32) * vsid + (x as u32);
278            let off_start = column_offsets[col_idx as usize] as usize;
279            // Slice to end-of-buffer; the slab walker self-
280            // terminates via nextptr.
281            Some(&world_data[off_start..])
282        };
283        let mut cache = Self::build_with_reader(reader, x0, y0, x1, y1);
284        cache.vsid = vsid_i;
285        cache
286    }
287
288    /// S4B.4.b: chunk-aware cache build. The closure
289    /// `column_reader(x, y)` returns the slab bytes of the column
290    /// at world-or-grid-local position `(x, y)`, or `None` for an
291    /// implicit-air / out-of-grid column (matching `build`'s OOB
292    /// "treat as full air" semantics).
293    ///
294    /// No vsid bound — the reader owns OOB handling. Per-chunk
295    /// bakes use a closure that resolves `(x, y)` to a neighbour
296    /// chunk via `Grid::chunk(IVec3)` so the 2-voxel padding
297    /// extends seamlessly across chunk boundaries.
298    ///
299    /// The cache's [`Self::vsid`] field is left at `0` for chunk-
300    /// aware builds — the field is dead-code anyway, preserved
301    /// only for voxlap-parity inspection.
302    #[must_use]
303    pub fn build_with_reader<'r>(
304        column_reader: impl Fn(i32, i32) -> Option<&'r [u8]>,
305        x0: i32,
306        y0: i32,
307        x1: i32,
308        y1: i32,
309    ) -> Self {
310        let rad = ESTNORMRAD;
311        let pad_x0 = x0 - rad;
312        let pad_y0 = y0 - rad;
313        let pad_x1 = x1 + rad;
314        let pad_y1 = y1 + rad;
315        let width = (pad_x1 - pad_x0) as usize;
316        let height = (pad_y1 - pad_y0) as usize;
317
318        let mut bits = vec![[0u32; 8]; width * height];
319        for yi in 0..height {
320            let y = pad_y0 + yi as i32;
321            for xi in 0..width {
322                let x = pad_x0 + xi as i32;
323                if let Some(column) = column_reader(x, y) {
324                    expandbit256(column, &mut bits[yi * width + xi]);
325                }
326                // None → leave the cache slot zeroed (treat as full
327                // air), matching `build`'s OOB behaviour.
328            }
329        }
330
331        Self {
332            bits,
333            origin_x: pad_x0,
334            origin_y: pad_y0,
335            width,
336            height,
337            fsqrecip: build_fsqrecip(),
338            vsid: 0,
339        }
340    }
341
342    /// Read 5 consecutive bits starting at z-position `z` from the
343    /// column at `(xi, yi)` cache index. Returns `0..=31`.
344    /// Out-of-range positions:
345    /// - `z < -2`: returns 0 (air above world — though voxlap's
346    ///   convention is "above is sky", same effect).
347    /// - `z >= MAXZDIM`: returns `0x1f` (solid below world).
348    #[inline]
349    fn extract_bits5(&self, xi: usize, yi: usize, z: i32) -> u32 {
350        let col = &self.bits[yi * self.width + xi];
351        if z >= MAXZDIM {
352            return 0x1f;
353        }
354        if z + 5 <= 0 {
355            return 0;
356        }
357        // Combine adjacent words to handle the case where the 5-bit
358        // window straddles a word boundary.
359        let z_bit = z;
360        let word_idx = z_bit.div_euclid(32);
361        let bit_off = z_bit.rem_euclid(32) as u32;
362        let lo = if (0..8).contains(&word_idx) {
363            col[word_idx as usize]
364        } else if word_idx < 0 {
365            0 // air above world
366        } else {
367            u32::MAX // solid below world
368        };
369        let hi = if word_idx + 1 < 8 && word_idx >= -1 {
370            col[(word_idx + 1) as usize]
371        } else if word_idx + 1 < 0 {
372            0
373        } else {
374            u32::MAX
375        };
376        let combined = u64::from(lo) | (u64::from(hi) << 32);
377        ((combined >> bit_off) & 0x1f) as u32
378    }
379
380    /// Estimate the surface normal at `(x, y, z)` from a 5×5×5
381    /// voxel-solid neighbourhood vote. Mirror of voxlap5.c:1501
382    /// (`estnorm`, `ESTNORMRAD == 2` branch).
383    ///
384    /// `(x, y)` must lie inside the cache's `[x0..x1) × [y0..y1)`
385    /// region (panics otherwise — caller guarantees this via the
386    /// bounding-box iteration). `z` is unconstrained (handled via
387    /// air/solid clamping).
388    #[must_use]
389    pub fn estnorm(&self, x: i32, y: i32, z: i32) -> [f32; 3] {
390        let center_xi = (x - self.origin_x) as usize;
391        let center_yi = (y - self.origin_y) as usize;
392
393        let mut nx: i32 = 0;
394        let mut ny: i32 = 0;
395        let mut nz: i32 = 0;
396        let z_window = z - ESTNORMRAD; // top of the 5-bit z window
397
398        for yy in -ESTNORMRAD..=ESTNORMRAD {
399            let yi = (center_yi as i32 + yy) as usize;
400            // Read 5 columns at this yy row (xx = -2..=+2).
401            let b0 = self.extract_bits5(center_xi - 2, yi, z_window) as usize;
402            let b1 = self.extract_bits5(center_xi - 1, yi, z_window) as usize;
403            let b2 = self.extract_bits5(center_xi, yi, z_window) as usize;
404            let b3 = self.extract_bits5(center_xi + 1, yi, z_window) as usize;
405            let b4 = self.extract_bits5(center_xi + 2, yi, z_window) as usize;
406
407            // Per-column popcount differences give x-axis normal
408            // contributions. Voxlap weights:
409            //   2*(N(xx=+2) - N(xx=-2)) + N(xx=+1) - N(xx=-1)
410            // = `n.x` from this row (full normal sum is over yy).
411            nx += ((i32::from(BITNUM[b4]) - i32::from(BITNUM[b0])) << 1) + i32::from(BITNUM[b3])
412                - i32::from(BITNUM[b1]);
413
414            // Sum bitsnum across all 5 columns: `j` is the total
415            // signed-i16-packed contribution. Low 16 bits = number
416            // of solid voxels in this row across all 5 columns and
417            // 5 z levels. High 16 bits = z-axis contribution
418            // (positive bits from upper z, negative from lower).
419            let j = BITSNUM[b0]
420                .wrapping_add(BITSNUM[b1])
421                .wrapping_add(BITSNUM[b2])
422                .wrapping_add(BITSNUM[b3])
423                .wrapping_add(BITSNUM[b4]);
424            nz = nz.wrapping_add(j);
425            // n.y picks only the LOW i16 of `j` (= total solid
426            // count), scaled by yy. The high i16 (z contribution)
427            // doesn't enter n.y.
428            let j_lo16 = (j as i16) as i32;
429            ny = ny.wrapping_add(j_lo16 * yy);
430        }
431        nz >>= 16;
432
433        // Normalise via fsqrecip[len_sq]. Voxlap's table peaks at
434        // 5*5*5 box max = 75² + 15² + 3² = 5859 — within
435        // `fsqrecip`'s 5860-entry range. Out-of-range len_sq values
436        // (e.g. all-zero neighbourhood) get `fsqrecip[0] = 0` ⇒
437        // returns `(0, 0, 0)` which downstream lighting math
438        // tolerates.
439        let len_sq = (nx * nx + ny * ny + nz * nz) as usize;
440        let f = if len_sq < self.fsqrecip.len() {
441            self.fsqrecip[len_sq]
442        } else {
443            0.0
444        };
445        [(nx as f32) * f, (ny as f32) * f, (nz as f32) * f]
446    }
447
448    /// Voxel-grid limit; used by callers to bound their iteration.
449    #[must_use]
450    #[allow(dead_code)] // preserved for voxlap-parity inspection
451    pub(crate) fn vsid(&self) -> i32 {
452        self.vsid
453    }
454}
455
456/// Bake per-voxel lighting into the world's brightness bytes.
457/// Mirror of voxlap's `updatelighting` (`voxlap5.c:10539`).
458///
459/// Walks every visible voxel inside `[x0..x1) × [y0..y1) ×
460/// [z0..z1)` and rewrites its alpha byte (the brightness channel
461/// the rasterizer mulhi'es against `kv6colmul` modulators) under
462/// the current `lightmode` + `lights` state.
463///
464/// - `lightmode == 0`: no-op (fast return).
465/// - `lightmode == 1`: directional sun-style bake — every visible
466///   voxel gets `(tp.y * 0.5 + tp.z) * 64 + 103.5` clamped to
467///   `[0, 255]` from its surface normal `tp`.
468/// - `lightmode >= 2`: per-light Lambertian bake — base
469///   `(tp.y * 0.5 + tp.z) * 16 + 47.5` minus, for each light in
470///   range with surface normal facing it, `g * h * sc` where
471///   `g = 1/(d·d²) - 1/(r·r²)` (cube falloff with hard radius
472///   cutoff) and `h = tp · light_delta`.
473///
474/// Voxlap pads the bbox by `ESTNORMRAD` on each side internally
475/// to give estnorm enough neighbourhood; that's done here too.
476/// `lights` should match the engine's full `vx5.lightsrc[]` —
477/// the function does its own per-tile range filtering.
478///
479/// Mutates `world_data` in place. Caller is responsible for any
480/// `column_offsets` / `vsid` invariants.
481pub fn update_lighting(
482    world_data: &mut [u8],
483    column_offsets: &[u32],
484    vsid: u32,
485    x0: i32,
486    y0: i32,
487    z0: i32,
488    x1: i32,
489    y1: i32,
490    z1: i32,
491    lightmode: u32,
492    lights: &[LightSrc],
493) {
494    if lightmode == 0 {
495        return;
496    }
497    let vsid_i = vsid as i32;
498    let x0p = (x0 - ESTNORMRAD).max(0);
499    let y0p = (y0 - ESTNORMRAD).max(0);
500    let z0p = (z0 - ESTNORMRAD).max(0);
501    let x1p = (x1 + ESTNORMRAD).min(vsid_i);
502    let y1p = (y1 + ESTNORMRAD).min(vsid_i);
503    let z1p = (z1 + ESTNORMRAD).min(MAXZDIM);
504    if x0p >= x1p || y0p >= y1p || z0p >= z1p {
505        return;
506    }
507
508    // Build the cache once for the whole padded bake region.
509    // Voxlap tiles the bake into 64×64 chunks with a per-tile
510    // `lightlst` filter; for our (one-shot bake) use case the
511    // full-region filter computed inside the per-voxel loop is
512    // simpler and not measurably slower at oracle bake sizes.
513    let cache = EstNormCache::build(world_data, column_offsets, vsid, x0p, y0p, x1p, y1p);
514
515    // Per-light precomputed `lightsub[i] = 1 / (sqrt(r2) * r2)` —
516    // the radius-cutoff bias that makes the light contribution go
517    // to exactly zero at distance == sqrt(r2).
518    let lightsub: Vec<f32> = lights.iter().map(|l| 1.0 / (l.r2.sqrt() * l.r2)).collect();
519
520    // R12.4.1: parallelise the per-row bake via rayon. Each `(x, y)`
521    // pair maps to a unique column slice in `world_data`
522    // (`column_offsets[col_idx]..[col_idx + 1]` ranges are pairwise
523    // disjoint — the voxalloc allocator's invariant). Rows split
524    // cleanly across worker threads; per-row x-loops stay serial to
525    // amortise rayon's per-task overhead. Speedup follows
526    // `RAYON_NUM_THREADS` (set `=1` to disable).
527    //
528    // Lighting bakes are typically rare (one-shot at scene load) but
529    // dynamic-lighting / per-edit relighting use cases call
530    // `update_lighting` per frame — at which point the parallel
531    // path matters for interactive responsiveness.
532    // Per-column byte extents `(start, end)`. After voxalloc-driven
533    // edits (e.g. cave-gen's heavy `set_spans` carve, or runtime
534    // bullet-impact carves), columns are scattered in the slab
535    // pool, so `column_offsets[i+1]` is NOT column `i`'s end byte
536    // — voxlap walks each column's slab chain via `slng()` to
537    // recover length. We pre-compute extents here serially before
538    // moving `world_data` into the parallel mutable view; the
539    // slng walk is O(slab_count) per column, typically 1-3 slabs.
540    //
541    // **Region-bounded**: only the bake rectangle `[x0p..x1p) ×
542    // [y0p..y1p)` needs extents — the per-row body indexes only
543    // those columns. Sizing the table to `vsid²` is wasteful when
544    // a small chunk-sized region is baked against a large-vsid
545    // world (e.g. S4.1 scene-graph per-chunk bake against a
546    // vsid=4096 combined view — would have been 16M slng walks per
547    // chunk × 1024 chunks = 17B slng walks). The bake-region table
548    // collapses that to `bake_region` walks per call.
549    #[allow(clippy::cast_sign_loss)]
550    let region_w = (x1p - x0p) as usize;
551    #[allow(clippy::cast_sign_loss)]
552    let region_h = (y1p - y0p) as usize;
553    let mut column_extents: Vec<(usize, usize)> = Vec::with_capacity(region_w * region_h);
554    for yi in 0..region_h {
555        #[allow(clippy::cast_possible_wrap)]
556        let y = y0p + yi as i32;
557        for xi in 0..region_w {
558            #[allow(clippy::cast_possible_wrap)]
559            let x = x0p + xi as i32;
560            #[allow(clippy::cast_sign_loss)]
561            let col_idx = (y as u32) * vsid + (x as u32);
562            let start = column_offsets[col_idx as usize] as usize;
563            let end = start + roxlap_formats::vxl::slng(&world_data[start..]);
564            column_extents.push((start, end));
565        }
566    }
567
568    let world_view = WorldDataMutView::new(world_data);
569    let row_body = |y: i32| {
570        #[allow(clippy::cast_sign_loss)]
571        let yi = (y - y0p) as usize;
572        for x in x0p..x1p {
573            #[allow(clippy::cast_sign_loss)]
574            let xi = (x - x0p) as usize;
575            let (off_start, off_end) = column_extents[yi * region_w + xi];
576            // SAFETY: each (x, y) maps to a unique col_idx; column
577            // byte ranges `[off_start, off_end)` are pairwise
578            // disjoint across distinct `col_idx` (voxalloc's
579            // free-list invariant), so no two threads write to
580            // the same byte.
581            let column = unsafe { world_view.column_slice(off_start, off_end) };
582            shade_column(column, x, y, z0p, z1p, lightmode, lights, &lightsub, &cache);
583        }
584    };
585
586    (y0p..y1p).into_par_iter().for_each(row_body);
587}
588
589/// S4B.4.b: per-chunk variant of [`update_lighting`].
590///
591/// Writes alpha bytes into one chunk's slab buffer; reads
592/// neighbour-chunk voxels through `column_reader` for `estnorm`'s
593/// 5×5×5 padding. The reader takes chunk-local `(x, y)` (which can
594/// extend `±ESTNORMRAD` past the chunk's `[0, target_vsid)` extent)
595/// and returns the column at that position — typically resolved
596/// through `Grid::chunk(IVec3)` so the bake gets seamless
597/// cross-chunk neighbourhood reads without materialising a stitched
598/// combined view (Approach C retirement, S4B.4.b).
599///
600/// `(x0, y0, z0, x1, y1, z1)` is the bake region in chunk-local
601/// coords (typically `(0, 0, 0)..(CHUNK_SIZE_XY, CHUNK_SIZE_XY,
602/// CHUNK_SIZE_Z)`). Writes clip to the target chunk's vsid; reads
603/// extend into neighbour chunks via the closure.
604///
605/// `lightmode`, `lights`, and the per-voxel arithmetic match
606/// [`update_lighting`]; only the cache build + write-region
607/// scoping differ.
608#[allow(clippy::too_many_arguments)]
609pub fn update_lighting_chunk<'r>(
610    target_data: &mut [u8],
611    target_column_offsets: &[u32],
612    target_vsid: u32,
613    x0: i32,
614    y0: i32,
615    z0: i32,
616    x1: i32,
617    y1: i32,
618    z1: i32,
619    column_reader: impl Fn(i32, i32) -> Option<&'r [u8]>,
620    lightmode: u32,
621    lights: &[LightSrc],
622) {
623    if lightmode == 0 {
624        return;
625    }
626    let target_vsid_i = target_vsid as i32;
627
628    // Padded region for the cache (cross-chunk reads via reader).
629    // Z clamps to [0, MAXZDIM) because each chunk's slab data is
630    // chunk-local in z. For stacked grids (S4B.6) the caller
631    // invokes us once per chunk-z layer; cross-chz padding at the
632    // top/bottom of a chunk gets clipped here (a follow-up could
633    // pass z-aware columns to lift this). X/y intentionally don't
634    // clamp — the reader pulls from neighbour chunks via its own
635    // coord translation.
636    let z0p = (z0 - ESTNORMRAD).max(0);
637    let z1p = (z1 + ESTNORMRAD).min(MAXZDIM);
638    // Write region clipped to the target chunk's footprint.
639    let wx0 = x0.max(0);
640    let wy0 = y0.max(0);
641    let wx1 = x1.min(target_vsid_i);
642    let wy1 = y1.min(target_vsid_i);
643    if wx0 >= wx1 || wy0 >= wy1 || z0p >= z1p {
644        return;
645    }
646
647    let cache = EstNormCache::build_with_reader(column_reader, x0, y0, x1, y1);
648    apply_lighting_with_cache(
649        target_data,
650        target_column_offsets,
651        target_vsid,
652        wx0,
653        wy0,
654        z0p,
655        wx1,
656        wy1,
657        z1p,
658        &cache,
659        lightmode,
660        lights,
661    );
662}
663
664/// S4B.4.b: write half of [`update_lighting_chunk`], split out so
665/// callers can build the [`EstNormCache`] separately (via
666/// [`EstNormCache::build_with_reader`]) and pass it in.
667///
668/// The split matters when the cache build needs an immutable grid
669/// borrow (for cross-chunk reads) and the write phase needs a
670/// mutable target-chunk borrow — the two can't coexist. The
671/// caller builds the cache first while holding the immutable
672/// borrow, drops it, then mutably borrows the target chunk and
673/// invokes this.
674///
675/// The `(x0..x1, y0..y1, z0..z1)` region must already be clipped
676/// to the target chunk's footprint (this helper does no clipping).
677/// `cache` must cover at least `[x0..x1) × [y0..y1)` (a `±ESTNORMRAD`
678/// padding is the caller's responsibility — typically built via
679/// `build_with_reader(.., x0, y0, x1, y1)` which adds the padding
680/// itself).
681#[allow(clippy::too_many_arguments)]
682pub fn apply_lighting_with_cache(
683    target_data: &mut [u8],
684    target_column_offsets: &[u32],
685    target_vsid: u32,
686    x0: i32,
687    y0: i32,
688    z0: i32,
689    x1: i32,
690    y1: i32,
691    z1: i32,
692    cache: &EstNormCache,
693    lightmode: u32,
694    lights: &[LightSrc],
695) {
696    if lightmode == 0 || x0 >= x1 || y0 >= y1 || z0 >= z1 {
697        return;
698    }
699
700    let lightsub: Vec<f32> = lights.iter().map(|l| 1.0 / (l.r2.sqrt() * l.r2)).collect();
701
702    let region_w = (x1 - x0) as usize;
703    let region_h = (y1 - y0) as usize;
704    let mut column_extents: Vec<(usize, usize)> = Vec::with_capacity(region_w * region_h);
705    for yi in 0..region_h {
706        let y = y0 + yi as i32;
707        for xi in 0..region_w {
708            let x = x0 + xi as i32;
709            let col_idx = (y as u32) * target_vsid + (x as u32);
710            let start = target_column_offsets[col_idx as usize] as usize;
711            let end = start + roxlap_formats::vxl::slng(&target_data[start..]);
712            column_extents.push((start, end));
713        }
714    }
715
716    let world_view = WorldDataMutView::new(target_data);
717    let row_body = |y: i32| {
718        let yi = (y - y0) as usize;
719        for x in x0..x1 {
720            let xi = (x - x0) as usize;
721            let (off_start, off_end) = column_extents[yi * region_w + xi];
722            // SAFETY: per-column byte ranges are pairwise disjoint
723            // across distinct `(x, y)` (voxalloc invariant).
724            let column = unsafe { world_view.column_slice(off_start, off_end) };
725            shade_column(column, x, y, z0, z1, lightmode, lights, &lightsub, cache);
726        }
727    };
728
729    (y0..y1).into_par_iter().for_each(row_body);
730}
731
732/// Raw-pointer view of `world_data` so the parallel
733/// [`update_lighting`] body can hand out per-column `&mut [u8]`
734/// slices to multiple threads without each thread needing
735/// `&mut Vec<u8>` (which is exclusive). Constructed from a single
736/// `&mut [u8]` borrow at the start of the parallel section; the
737/// borrow's lifetime gates `WorldDataMutView`'s usable lifetime.
738///
739/// # Safety contract
740/// Callers that hand out concurrent `column_slice` references MUST
741/// guarantee the requested ranges are pairwise non-overlapping
742/// across threads. [`update_lighting`]'s call site relies on
743/// voxalloc's per-column-disjoint-byte-range invariant.
744struct WorldDataMutView<'a> {
745    ptr: *mut u8,
746    len: usize,
747    _marker: std::marker::PhantomData<&'a mut [u8]>,
748}
749
750// SAFETY: `WorldDataMutView` is morally a `&mut [u8]` re-exposed as
751// raw pointers. The disjoint-write invariant is enforced by the
752// caller; concurrent reads of `ptr` / `len` fields are race-free
753// (immutable scalar fields).
754unsafe impl Send for WorldDataMutView<'_> {}
755unsafe impl Sync for WorldDataMutView<'_> {}
756
757impl<'a> WorldDataMutView<'a> {
758    fn new(buf: &'a mut [u8]) -> Self {
759        Self {
760            ptr: buf.as_mut_ptr(),
761            len: buf.len(),
762            _marker: std::marker::PhantomData,
763        }
764    }
765
766    /// Carve out a sub-slice. Caller upholds the disjoint-write
767    /// invariant (see struct doc).
768    ///
769    /// # Safety
770    /// `off_start <= off_end <= self.len`, and the requested range
771    /// must not overlap with ranges concurrently held by other
772    /// threads.
773    unsafe fn column_slice(&self, off_start: usize, off_end: usize) -> &'a mut [u8] {
774        debug_assert!(off_start <= off_end, "column slice: start > end");
775        debug_assert!(off_end <= self.len, "column slice: end past buffer");
776        // SAFETY: caller asserts in-bounds + disjoint-from-other-threads.
777        unsafe { std::slice::from_raw_parts_mut(self.ptr.add(off_start), off_end - off_start) }
778    }
779}
780
781/// Walk one column's slab chain and shade every visible voxel
782/// inside `[z_lo, z_hi)`. Mirror of the inner loop in
783/// voxlap5.c:10588-10650.
784#[allow(clippy::cast_lossless)]
785fn shade_column(
786    column: &mut [u8],
787    x: i32,
788    y: i32,
789    z_lo: i32,
790    z_hi: i32,
791    lightmode: u32,
792    lights: &[LightSrc],
793    lightsub: &[f32],
794    cache: &EstNormCache,
795) {
796    let mut v_off: usize = 0;
797    // cstat = false ⇒ top-of-slab phase (floor colours); true ⇒
798    // ceiling-of-next-slab phase (bottom of current slab's solid
799    // mass, visible from the air pocket below).
800    let mut cstat = false;
801    loop {
802        let (sz0, sz1, voxel_byte_offset_signed): (i32, i32, isize);
803        if !cstat {
804            // Floor colours of the current slab. Voxel z=v[1]..=v[2].
805            // Alpha byte at offset (z - v[1]) * 4 + 7 from header
806            // (header is 4 bytes, voxel record is 4 bytes BGRA, +3
807            // for alpha). The voxlap formula encodes this as
808            // `(z << 2) + offs` with `offs = 7 - (v[1] << 2)`.
809            if v_off + 2 >= column.len() {
810                break;
811            }
812            let v1 = i32::from(column[v_off + 1]);
813            let v2 = i32::from(column[v_off + 2]);
814            sz0 = v1;
815            sz1 = v2 + 1;
816            voxel_byte_offset_signed = (v_off as isize) + 7 - ((sz0 as isize) << 2);
817            cstat = true;
818        } else {
819            // Ceiling colours of the next slab — must read v[0]
820            // BEFORE advancing v_off.
821            if v_off + 2 >= column.len() {
822                break;
823            }
824            let v0 = i32::from(column[v_off]);
825            let v1 = i32::from(column[v_off + 1]);
826            let v2 = i32::from(column[v_off + 2]);
827            let prev_offset = v2 - v1 - v0 + 2; // ceilnum from getcube convention
828            if v0 == 0 {
829                break;
830            }
831            v_off += (v0 as usize) * 4;
832            if v_off + 3 >= column.len() {
833                break;
834            }
835            let v3 = i32::from(column[v_off + 3]);
836            sz1 = v3;
837            sz0 = prev_offset + sz1;
838            voxel_byte_offset_signed = (v_off as isize) + 3 - ((sz1 as isize) << 2);
839            cstat = false;
840        }
841
842        let lo = sz0.max(z_lo);
843        let hi = sz1.min(z_hi);
844        for z in lo..hi {
845            let normal = cache.estnorm(x, y, z);
846            let brightness = compute_brightness(x, y, z, normal, lightmode, lights, lightsub);
847            let byte_off = voxel_byte_offset_signed + ((z as isize) << 2);
848            if byte_off >= 0 && (byte_off as usize) < column.len() {
849                column[byte_off as usize] = brightness;
850            }
851        }
852    }
853}
854
855/// Voxlap's per-voxel brightness math. Computes the `[0, 255]`
856/// alpha byte for one voxel from its surface normal `tp` + the
857/// light list. Mirror of voxlap5.c:10605-10646.
858fn compute_brightness(
859    x: i32,
860    y: i32,
861    z: i32,
862    tp: [f32; 3],
863    lightmode: u32,
864    lights: &[LightSrc],
865    lightsub: &[f32],
866) -> u8 {
867    if lightmode < 2 {
868        // Directional path (voxlap5.c:10607-10612): single sun
869        // direction baked into a hardcoded coefficient pair.
870        // i = (tp.y * 0.5 + tp.z) * 64 + 103.5, clamped to [0, 255].
871        let f = (tp[1] * 0.5 + tp[2]) * 64.0 + 103.5;
872        clamp_to_byte(f)
873    } else {
874        // Point-light path (voxlap5.c:10614-10645). Base brightness
875        // 47.5..63.5 + per-light front-face contribution.
876        let mut f = (tp[1] * 0.5 + tp[2]) * 16.0 + 47.5;
877        let xf = x as f32;
878        let yf = y as f32;
879        let zf = z as f32;
880        for (i, light) in lights.iter().enumerate() {
881            let fx = light.pos[0] - xf;
882            let fy = light.pos[1] - yf;
883            let fz = light.pos[2] - zf;
884            // tp · light_delta: positive ⇒ surface faces away from
885            // light (back-lit, no contribution); negative ⇒ surface
886            // faces light (front-lit, lambertian contribution).
887            let h = tp[0] * fx + tp[1] * fy + tp[2] * fz;
888            if h >= 0.0 {
889                continue;
890            }
891            let g_sq = fx * fx + fy * fy + fz * fz;
892            if g_sq >= light.r2 {
893                continue;
894            }
895            // Voxlap's SSE rcpss/rsqrtss sequence:
896            //   g = (1/g_sq) * rsqrt(g_sq) - lightsub[i]
897            //     = 1/(g_sq * sqrt(g_sq)) - 1/(r2 * sqrt(r2))
898            //     = 1/d³ - 1/r³
899            // The `_mm_rcp_ss` / `_mm_rsqrt_ss` are 12-bit
900            // approximations; the exact `f32::sqrt`-based form
901            // here is more precise but may drift from voxlap C.
902            // Bit-exactness will require switching to the
903            // intrinsic versions on x86_64; deferred until
904            // diag_down_lit oracle convergence demands it.
905            let g = 1.0 / (g_sq * g_sq.sqrt()) - lightsub[i];
906            f -= g * h * light.sc;
907        }
908        clamp_to_byte(f)
909    }
910}
911
912#[inline]
913fn clamp_to_byte(f: f32) -> u8 {
914    // Voxlap's `if (*(int32_t *)&f > 0x437f0000) f = 255` is the
915    // bit-trick form of `if (f > 255.0) f = 255.0`. Negatives wrap
916    // through `ftol` / cast; we clamp explicitly for safety.
917    if f >= 255.0 {
918        255
919    } else if f <= 0.0 {
920        0
921    } else {
922        f as u8
923    }
924}
925
926#[cfg(test)]
927mod tests {
928    use super::*;
929
930    /// xbsflor(0) = -1 (all bits set), xbsflor(32) clamped to 0,
931    /// xbsflor(5) = ~31 = 0xffff_ffe0.
932    #[test]
933    fn xbsflor_xbsceil_known_values() {
934        assert_eq!(xbsflor(0), 0xffff_ffff);
935        assert_eq!(xbsflor(1), 0xffff_fffe);
936        assert_eq!(xbsflor(5), 0xffff_ffe0);
937        assert_eq!(xbsflor(31), 0x8000_0000);
938        assert_eq!(xbsflor(32), 0);
939        assert_eq!(xbsceil(0), 0);
940        assert_eq!(xbsceil(5), 0x1f);
941        assert_eq!(xbsceil(31), 0x7fff_ffff);
942        assert_eq!(xbsceil(32), 0xffff_ffff);
943    }
944
945    /// Single-slab column [next=0, sz0=10, sz1=14, then 5 voxel
946    /// records]. Voxels exist at z = 10..15 (sz0..=sz1). After
947    /// expandbit256, bits 10..15 should be set, all others
948    /// (0..10 and 15..256) should reflect: air above (0..10) and
949    /// solid below (15..256), since voxlap treats z > sz1 of last
950    /// slab as solid.
951    #[test]
952    fn single_slab_z10_to_14_sets_correct_bits() {
953        // Column layout: [next=0, sz0=10, sz1=14, top_color, then 5x
954        // voxel records of 4 bytes each]. We don't use the voxel
955        // record contents; expandbit256 only reads v[0]..v[3].
956        let mut col = vec![0u8, 10, 14, 0]; // header
957        col.extend(vec![0u8; 5 * 4]); // 5 voxel records (z=10..14)
958
959        let mut bits = [0u32; 8];
960        expandbit256(&col, &mut bits);
961
962        // Word 0 covers bits 0..32. Air for z=0..10, solid 10..15,
963        // solid for z=15..32 (since this is the only slab → below
964        // is fully solid).
965        // bits 10..15 from the slab body: 0x7c00 (bits 10,11,12,13,14)
966        // bits 15..32 from "solid below last slab": 0xffff_8000
967        // Combined: 0xffff_fc00.
968        assert_eq!(
969            bits[0], 0xffff_fc00,
970            "word 0 want 0xffff_fc00 got 0x{:08x}",
971            bits[0]
972        );
973        // Words 1..7 should all be 0xffff_ffff (fully solid).
974        for (i, w) in bits.iter().enumerate().skip(1) {
975            assert_eq!(*w, 0xffff_ffff, "word {i} want -1 got 0x{:08x}", *w);
976        }
977    }
978
979    /// fsqrecip[N] should match `1/sqrt(N)` to a reasonable
980    /// tolerance for the values estnorm actually produces.
981    #[test]
982    fn fsqrecip_matches_1_over_sqrt() {
983        let t = build_fsqrecip();
984        for k in 1..=100 {
985            let want = 1.0_f32 / (k as f32).sqrt();
986            let got = t[k];
987            let err = (got - want).abs();
988            assert!(err < 1e-3, "fsqrecip[{k}] = {got}, want {want}, err {err}");
989        }
990        // Spot-check higher values (less precise but still close).
991        for k in [500, 1000, 2000, 5000] {
992            let want = 1.0_f32 / (k as f32).sqrt();
993            let got = t[k];
994            let rel = (got / want - 1.0).abs();
995            assert!(
996                rel < 0.01,
997                "fsqrecip[{k}] = {got}, want {want}, rel-err {rel}"
998            );
999        }
1000    }
1001
1002    /// Build a 4×4 synthetic world with a flat floor at z=20..=24,
1003    /// run lightmode-1 update_lighting over the centre 2×2, and
1004    /// verify (a) brightness bytes were rewritten, (b) the result
1005    /// is in `[0, 255]` for every shaded voxel, (c) the brightness
1006    /// is uniform within each (x, y) column at the same z (since
1007    /// lightmode-1 depends only on the surface normal).
1008    #[test]
1009    fn lightmode1_bakes_brightness_into_visible_voxels() {
1010        // 4×4 world, single slab at z=20..=24, sentinel column ends.
1011        let vsid: u32 = 4;
1012        let mut col = vec![0u8, 20, 24, 0]; // header: nextptr=0, z1=20, z2=24
1013        for _ in 20..=24 {
1014            // 5 voxel records, alpha pre-set to 0xab so we can verify
1015            // they got rewritten.
1016            col.extend([0x10, 0x20, 0x30, 0xab]);
1017        }
1018        let col_len = col.len() as u32;
1019        let mut data = Vec::new();
1020        let mut offsets = vec![0u32; (vsid * vsid + 1) as usize];
1021        for i in 0..(vsid * vsid) {
1022            offsets[i as usize] = data.len() as u32;
1023            data.extend_from_slice(&col);
1024        }
1025        offsets[(vsid * vsid) as usize] = data.len() as u32;
1026        assert_eq!(col_len as usize * (vsid * vsid) as usize, data.len());
1027
1028        update_lighting(
1029            &mut data,
1030            &offsets,
1031            vsid,
1032            1,
1033            1,
1034            0,
1035            3,
1036            3,
1037            30, // bbox 1..=2 in xy, z 0..30
1038            1,  // lightmode 1
1039            &[],
1040        );
1041
1042        // Pull every voxel record's alpha byte from the centre
1043        // (1, 1) column. Should all be in [0, 255] and ≠ 0xab.
1044        let off1 = offsets[(1 * vsid + 1) as usize] as usize;
1045        let alphas: Vec<u8> = (0..5).map(|i| data[off1 + 4 + i * 4 + 3]).collect();
1046        for (i, &a) in alphas.iter().enumerate() {
1047            assert_ne!(a, 0xab, "alpha[{i}] not rewritten");
1048        }
1049        // The shading should be mostly bright — flat-floor voxels
1050        // have ~vertical normals so `(tp.y*0.5 + tp.z)*64 + 103.5`
1051        // ≈ 1.0*64 + 103.5 = 167.5.
1052        for (i, &a) in alphas.iter().enumerate() {
1053            assert!(
1054                a > 100,
1055                "alpha[{i}]={a} should be on the bright side for top-of-floor voxels"
1056            );
1057        }
1058    }
1059
1060    /// lightmode-2 with one nearby light should darken voxels on
1061    /// the away side relative to the toward side. Use a 5×5 world
1062    /// with a flat floor and place a light such that it's on the
1063    /// +x side of the centre column — the +x face voxel's neighbour
1064    /// columns should end up brighter than the -x.
1065    #[test]
1066    fn lightmode2_with_light_produces_per_column_variation() {
1067        let vsid: u32 = 5;
1068        let mut col = vec![0u8, 20, 24, 0];
1069        for _ in 20..=24 {
1070            col.extend([0x10, 0x20, 0x30, 0]);
1071        }
1072        let mut data = Vec::new();
1073        let mut offsets = vec![0u32; (vsid * vsid + 1) as usize];
1074        for i in 0..(vsid * vsid) {
1075            offsets[i as usize] = data.len() as u32;
1076            data.extend_from_slice(&col);
1077        }
1078        offsets[(vsid * vsid) as usize] = data.len() as u32;
1079
1080        let lights = [LightSrc {
1081            // World coords: light right next to (4, 2, 20).
1082            pos: [4.0, 2.0, 20.0],
1083            r2: 50.0 * 50.0,
1084            sc: 64.0,
1085        }];
1086        update_lighting(&mut data, &offsets, vsid, 0, 0, 0, 5, 5, 30, 2, &lights);
1087
1088        // Sample the alpha at the top-floor voxel of each column
1089        // along y=2. Closer-to-light columns should be brighter.
1090        let alpha_at = |x: u32, z_idx: usize| {
1091            let off = offsets[(2 * vsid + x) as usize] as usize;
1092            data[off + 4 + z_idx * 4 + 3]
1093        };
1094        let close = alpha_at(4, 0); // closest column to light
1095        let far = alpha_at(0, 0); // farthest
1096        assert!(
1097            close >= far,
1098            "column nearer the light should be ≥ as bright as the far one (close={close} far={far})"
1099        );
1100    }
1101
1102    /// Empty column ([0, 0, 0, ...]) — no slabs. After
1103    /// expandbit256, all 256 bits = 0 (full air).
1104    #[test]
1105    fn empty_column_all_air() {
1106        let col = vec![0u8, 0, 0, 0]; // single-slab header at z=0..0, no body
1107        let mut bits = [0u32; 8];
1108        expandbit256(&col, &mut bits);
1109        // bit 0 from "air→solid transition at z=0", but only bit 0
1110        // is set within the slab range [0, 0+1). Then "solid below"
1111        // fills bits 1..256.
1112        // Actually for sz0=sz1=0: voxel record is z=0..0 inclusive
1113        // (0 voxels). The bit pattern is 1 set bit at z=0 then
1114        // solid below.
1115        // word 0: bit 0 set, bits 1..32 set ⇒ 0xffff_ffff.
1116        assert_eq!(
1117            bits[0], 0xffff_ffff,
1118            "empty column word 0 want all-1 got 0x{:08x}",
1119            bits[0]
1120        );
1121    }
1122}