roxlap_core/world_lighting.rs
1//! Voxlap's world-voxel lighting bake (`updatelighting`,
2//! voxlap5.c:10539).
3//!
4//! Walks every visible voxel inside a 3D bounding box and rewrites
5//! its alpha byte (the per-voxel "brightness" channel that the
6//! rendering path mulhi'es against `kv6colmul`-style modulators)
7//! based on the engine's current `LightSrc` set + lightmode.
8//!
9//! Two modes:
10//! - `lightmode == 1`: cheap directional bake — every voxel gets
11//! shading from a single hardcoded sun direction
12//! `(tp.y * 0.5 + tp.z) * 64 + 103.5` clamped to `[0, 255]`.
13//! - `lightmode == 2`: per-light Lambertian bake — for each light
14//! in range, subtract `g * h * sc` where `g = 1/(d·d²) -
15//! 1/(r·r²)` (cube-falloff with hard cutoff at radius `r`),
16//! `h = surface_normal · light_delta` (negative ⇒ face front-
17//! lit, contributes; positive ⇒ self-shadowed, skipped). Result
18//! subtracts from a base `(tp.y * 0.5 + tp.z) * 16 + 47.5`.
19//!
20//! The surface normal `tp` for each voxel comes from `estnorm` —
21//! a 5×5×5 voxel-solid neighbourhood vote (`ESTNORMRAD == 2` in
22//! voxlap, the production path).
23
24#![allow(
25 clippy::cast_possible_truncation,
26 clippy::cast_possible_wrap,
27 clippy::cast_sign_loss,
28 clippy::cast_precision_loss,
29 clippy::similar_names,
30 clippy::too_many_arguments,
31 clippy::too_many_lines,
32 clippy::doc_markdown,
33 clippy::many_single_char_names,
34 clippy::must_use_candidate,
35 clippy::unnecessary_cast,
36 clippy::cast_lossless,
37 clippy::needless_bool_assign,
38 clippy::needless_range_loop,
39 clippy::no_effect,
40 clippy::identity_op,
41 clippy::if_not_else
42)]
43
44use rayon::prelude::*;
45
46use crate::engine::LightSrc;
47
48/// Voxlap's `MAXZDIM` (`voxlap5.c`). World z runs `0..MAXZDIM`.
49pub(crate) const MAXZDIM: i32 = 256;
50
51/// Voxlap's `ESTNORMRAD == 2` cache window radius. The estnorm
52/// neighbourhood is `(2*RAD+1)³ = 5×5×5` voxels.
53pub(crate) const ESTNORMRAD: i32 = 2;
54
55/// Per-byte popcount table. Voxlap's `bitnum[32]` (voxlap5.c:1477)
56/// — number of set bits in the low 5 bits of each index. Used by
57/// estnorm's neighbourhood-vote reduction.
58pub(crate) const BITNUM: [i8; 32] = [
59 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
60];
61
62/// Per-byte signed-symmetric popcount. Voxlap's `bitsnum[32]`
63/// (voxlap5.c:1487) — packs `popcount` into the low i16 lane and
64/// `popcount - 2·popcount_negative_axis` into the high i16 lane.
65/// The exact derivation is in voxlap's comment block; values
66/// reproduced verbatim.
67#[rustfmt::skip]
68pub(crate) const BITSNUM: [i32; 32] = [
69 0, 1 - (2 << 16), 1 - (1 << 16), 2 - (3 << 16),
70 1, 2 - (2 << 16), 2 - (1 << 16), 3 - (3 << 16),
71 1 + (1 << 16), 2 - (1 << 16), 2, 3 - (2 << 16),
72 2 + (1 << 16), 3 - (1 << 16), 3, 4 - (2 << 16),
73 1 + (2 << 16), 2, 2 + (1 << 16), 3 - (1 << 16),
74 2 + (2 << 16), 3, 3 + (1 << 16), 4 - (1 << 16),
75 2 + (3 << 16), 3 + (1 << 16), 3 + (2 << 16), 4,
76 3 + (3 << 16), 4 + (1 << 16), 4 + (2 << 16), 5,
77];
78
79/// `xbsflor[k] = -1i32 << k` — bits `k..31` set, low `k` bits
80/// clear. Used by `expandbit256` to splat air→solid transitions
81/// onto a partial 32-bit word.
82pub(crate) const fn xbsflor(k: usize) -> u32 {
83 if k >= 32 {
84 0
85 } else {
86 (-1i32 << k) as u32
87 }
88}
89
90/// `xbsceil[k] = ~xbsflor[k]` — low `k` bits set. Solid→air
91/// transitions.
92pub(crate) const fn xbsceil(k: usize) -> u32 {
93 !xbsflor(k)
94}
95
96/// `expandbit256` — slab structure → 256-bit "voxel solid" bit
97/// array (low-bit-first, low-z-first). Mirror of voxlap5.c:1059.
98///
99/// The output `bits` is a `[u32; 8]` (= 256 bits = `MAXZDIM` z
100/// levels). Bit `z` is set iff voxel at column `(x, y)`, depth `z`
101/// is solid (= part of any slab body, including hidden interiors
102/// between slabs).
103///
104/// Walks the slab linked list, alternating between `v[1]`
105/// (air→solid transition at top of slab) and `v[3]` (solid→air
106/// transition at bottom of next slab). Each transition flushes
107/// pending whole-words (full air `0` or full solid `-1`) until
108/// it lands inside the partial word containing the transition,
109/// then OR/ANDs the partial mask via `xbsflor` / `xbsceil`.
110pub(crate) fn expandbit256(column: &[u8], bits: &mut [u32; 8]) {
111 let mut src_idx: usize = 0;
112 let mut dst_idx: usize = 0;
113 let mut bitpos: i32 = 32;
114 let mut word: u32 = 0;
115 let nbits: i32 = (bits.len() as i32) * 32;
116
117 // First iteration: jump straight to the v[1] transition (no
118 // preceding slab whose v[3] we'd need to flush).
119 let mut next_len: i32;
120 let mut delta: i32;
121 let mut go_to_v3 = false;
122
123 'outer: loop {
124 if go_to_v3 {
125 // v[3] : solid → air transition.
126 if src_idx + 3 >= column.len() {
127 break;
128 }
129 delta = i32::from(column[src_idx + 3]) - bitpos;
130 while delta >= 0 {
131 if dst_idx >= bits.len() {
132 break 'outer;
133 }
134 bits[dst_idx] = word;
135 dst_idx += 1;
136 word = u32::MAX;
137 bitpos += 32;
138 delta -= 32;
139 }
140 word &= xbsceil((delta + 32) as usize);
141 }
142 go_to_v3 = true;
143
144 // v[1] : air → solid transition.
145 if src_idx + 1 >= column.len() {
146 break;
147 }
148 delta = i32::from(column[src_idx + 1]) - bitpos;
149 while delta >= 0 {
150 if dst_idx >= bits.len() {
151 break 'outer;
152 }
153 bits[dst_idx] = word;
154 dst_idx += 1;
155 word = 0;
156 bitpos += 32;
157 delta -= 32;
158 }
159 word |= xbsflor((delta + 32) as usize);
160
161 next_len = i32::from(column[src_idx]);
162 if next_len == 0 {
163 break;
164 }
165 src_idx += (next_len as usize) * 4;
166 }
167
168 // Pad the rest of the buffer with `word`'s tail value (in C the
169 // post-loop word is whatever the last `v[1]` partial-set
170 // produced; remaining whole-words flush as solid `-1`).
171 if bitpos <= nbits {
172 while dst_idx < bits.len() {
173 bits[dst_idx] = word;
174 dst_idx += 1;
175 word = u32::MAX;
176 }
177 }
178}
179
180/// Pre-built `expandbit256` grid covering a 2D bounding region —
181/// `(x1 - x0 + 2*RAD) × (y1 - y0 + 2*RAD)` columns. Trades 32
182/// bytes per column of memory for O(1) bit-window lookups during
183/// the estnorm 5×5 neighbourhood vote.
184///
185/// This is the conceptual equivalent of voxlap's `xbsbuf` cache —
186/// just batch-pre-built rather than rotated row-by-row through
187/// the bake. Memory cost stays manageable: a 448×448 bake (the
188/// `diag_down_lit` oracle scope, which extends to 452×452 with
189/// padding) needs about 6.4 MB.
190#[allow(dead_code)] // vsid field/method preserved for voxlap-parity inspection
191pub struct EstNormCache {
192 /// Per-column bit arrays. `bits[(yidx) * width + (xidx)]` is
193 /// the slab bit-mask of column `(origin_x + xidx, origin_y +
194 /// yidx)`. `xidx ∈ 0..width`, mapping abs-x into
195 /// `[origin_x - RAD, origin_x + (x1 - x0) - 1 + RAD]`.
196 bits: Vec<[u32; 8]>,
197 /// Top-left of the cache window in world coords (= original
198 /// `x0 - RAD`).
199 origin_x: i32,
200 origin_y: i32,
201 /// Cached-region width (= `x1 - x0 + 2 * RAD`).
202 width: usize,
203 /// Reserved for symmetric debugging — kept so the cache layout
204 /// can be inspected without recomputing from `bits.len()`.
205 #[allow(dead_code)]
206 height: usize,
207 /// Inverse-square-root LUT — `fsqrecip[k] = 1 / sqrt(k)` for
208 /// `k ∈ 0..=5859`. Voxlap's `fsqrecip` table; same precision
209 /// as the C build (no Newton refinement for k > 22).
210 fsqrecip: Vec<f32>,
211 /// Voxel-grid limit (= `vsid`) used for out-of-bounds clamps.
212 vsid: i32,
213}
214
215/// Voxlap's `fsqrecip[5860]` table init (voxlap5.c:12240-12256).
216/// Mirror of the C calculation including the asymmetric Newton-
217/// refinement schedule for indices ≤ 22.
218fn build_fsqrecip() -> Vec<f32> {
219 const N: usize = 5860;
220 let mut t = vec![0.0_f32; N];
221 t[0] = 0.0;
222 t[1] = 1.0;
223 t[2] = (1.0_f32 / 2.0_f32.sqrt()) as f32;
224 t[3] = 1.0 / 3.0_f32.sqrt();
225 let mut i = 3usize;
226 let mut z = 4usize;
227 while z < N {
228 if z + 5 >= N {
229 // Safety stop — cycle increment by 6 may overshoot.
230 break;
231 }
232 t[z] = t[z >> 1] * t[2];
233 t[z + 2] = t[(z + 2) >> 1] * t[2];
234 t[z + 4] = t[(z + 4) >> 1] * t[2];
235 t[z + 5] = t[i] * t[3];
236 i += 2;
237
238 let mut f = (t[z] + t[z + 2]) * 0.5_f32;
239 if z <= 22 {
240 f = (1.5 - 0.5 * ((z + 1) as f32) * f * f) * f;
241 }
242 t[z + 1] = (1.5 - 0.5 * ((z + 1) as f32) * f * f) * f;
243
244 let mut f = (t[z + 2] + t[z + 4]) * 0.5_f32;
245 if z <= 22 {
246 f = (1.5 - 0.5 * ((z + 3) as f32) * f * f) * f;
247 }
248 t[z + 3] = (1.5 - 0.5 * ((z + 3) as f32) * f * f) * f;
249
250 z += 6;
251 }
252 t
253}
254
255impl EstNormCache {
256 /// Build the bit-grid cache covering the bounding region
257 /// `[x0..x1) × [y0..y1)` extended by `ESTNORMRAD` padding on
258 /// each side. Calling [`Self::estnorm`] for any `(x, y)` inside
259 /// the original `[x0..x1) × [y0..y1)` box is then a pure read.
260 ///
261 /// Wraps [`Self::build_with_reader`] with a flat-table closure.
262 #[must_use]
263 pub fn build(
264 world_data: &[u8],
265 column_offsets: &[u32],
266 vsid: u32,
267 x0: i32,
268 y0: i32,
269 x1: i32,
270 y1: i32,
271 ) -> Self {
272 let vsid_i = vsid as i32;
273 let reader = |x: i32, y: i32| -> Option<&[u8]> {
274 if (x | y) < 0 || x >= vsid_i || y >= vsid_i {
275 return None;
276 }
277 let col_idx = (y as u32) * vsid + (x as u32);
278 let off_start = column_offsets[col_idx as usize] as usize;
279 // Slice to end-of-buffer; the slab walker self-
280 // terminates via nextptr.
281 Some(&world_data[off_start..])
282 };
283 let mut cache = Self::build_with_reader(reader, x0, y0, x1, y1);
284 cache.vsid = vsid_i;
285 cache
286 }
287
288 /// S4B.4.b: chunk-aware cache build. The closure
289 /// `column_reader(x, y)` returns the slab bytes of the column
290 /// at world-or-grid-local position `(x, y)`, or `None` for an
291 /// implicit-air / out-of-grid column (matching `build`'s OOB
292 /// "treat as full air" semantics).
293 ///
294 /// No vsid bound — the reader owns OOB handling. Per-chunk
295 /// bakes use a closure that resolves `(x, y)` to a neighbour
296 /// chunk via `Grid::chunk(IVec3)` so the 2-voxel padding
297 /// extends seamlessly across chunk boundaries.
298 ///
299 /// The cache's [`Self::vsid`] field is left at `0` for chunk-
300 /// aware builds — the field is dead-code anyway, preserved
301 /// only for voxlap-parity inspection.
302 #[must_use]
303 pub fn build_with_reader<'r>(
304 column_reader: impl Fn(i32, i32) -> Option<&'r [u8]>,
305 x0: i32,
306 y0: i32,
307 x1: i32,
308 y1: i32,
309 ) -> Self {
310 let rad = ESTNORMRAD;
311 let pad_x0 = x0 - rad;
312 let pad_y0 = y0 - rad;
313 let pad_x1 = x1 + rad;
314 let pad_y1 = y1 + rad;
315 let width = (pad_x1 - pad_x0) as usize;
316 let height = (pad_y1 - pad_y0) as usize;
317
318 let mut bits = vec![[0u32; 8]; width * height];
319 for yi in 0..height {
320 let y = pad_y0 + yi as i32;
321 for xi in 0..width {
322 let x = pad_x0 + xi as i32;
323 if let Some(column) = column_reader(x, y) {
324 expandbit256(column, &mut bits[yi * width + xi]);
325 }
326 // None → leave the cache slot zeroed (treat as full
327 // air), matching `build`'s OOB behaviour.
328 }
329 }
330
331 Self {
332 bits,
333 origin_x: pad_x0,
334 origin_y: pad_y0,
335 width,
336 height,
337 fsqrecip: build_fsqrecip(),
338 vsid: 0,
339 }
340 }
341
342 /// Read 5 consecutive bits starting at z-position `z` from the
343 /// column at `(xi, yi)` cache index. Returns `0..=31`.
344 /// Out-of-range positions:
345 /// - `z < -2`: returns 0 (air above world — though voxlap's
346 /// convention is "above is sky", same effect).
347 /// - `z >= MAXZDIM`: returns `0x1f` (solid below world).
348 #[inline]
349 fn extract_bits5(&self, xi: usize, yi: usize, z: i32) -> u32 {
350 let col = &self.bits[yi * self.width + xi];
351 if z >= MAXZDIM {
352 return 0x1f;
353 }
354 if z + 5 <= 0 {
355 return 0;
356 }
357 // Combine adjacent words to handle the case where the 5-bit
358 // window straddles a word boundary.
359 let z_bit = z;
360 let word_idx = z_bit.div_euclid(32);
361 let bit_off = z_bit.rem_euclid(32) as u32;
362 let lo = if (0..8).contains(&word_idx) {
363 col[word_idx as usize]
364 } else if word_idx < 0 {
365 0 // air above world
366 } else {
367 u32::MAX // solid below world
368 };
369 let hi = if word_idx + 1 < 8 && word_idx >= -1 {
370 col[(word_idx + 1) as usize]
371 } else if word_idx + 1 < 0 {
372 0
373 } else {
374 u32::MAX
375 };
376 let combined = u64::from(lo) | (u64::from(hi) << 32);
377 ((combined >> bit_off) & 0x1f) as u32
378 }
379
380 /// Estimate the surface normal at `(x, y, z)` from a 5×5×5
381 /// voxel-solid neighbourhood vote. Mirror of voxlap5.c:1501
382 /// (`estnorm`, `ESTNORMRAD == 2` branch).
383 ///
384 /// `(x, y)` must lie inside the cache's `[x0..x1) × [y0..y1)`
385 /// region (panics otherwise — caller guarantees this via the
386 /// bounding-box iteration). `z` is unconstrained (handled via
387 /// air/solid clamping).
388 #[must_use]
389 pub fn estnorm(&self, x: i32, y: i32, z: i32) -> [f32; 3] {
390 let center_xi = (x - self.origin_x) as usize;
391 let center_yi = (y - self.origin_y) as usize;
392
393 let mut nx: i32 = 0;
394 let mut ny: i32 = 0;
395 let mut nz: i32 = 0;
396 let z_window = z - ESTNORMRAD; // top of the 5-bit z window
397
398 for yy in -ESTNORMRAD..=ESTNORMRAD {
399 let yi = (center_yi as i32 + yy) as usize;
400 // Read 5 columns at this yy row (xx = -2..=+2).
401 let b0 = self.extract_bits5(center_xi - 2, yi, z_window) as usize;
402 let b1 = self.extract_bits5(center_xi - 1, yi, z_window) as usize;
403 let b2 = self.extract_bits5(center_xi, yi, z_window) as usize;
404 let b3 = self.extract_bits5(center_xi + 1, yi, z_window) as usize;
405 let b4 = self.extract_bits5(center_xi + 2, yi, z_window) as usize;
406
407 // Per-column popcount differences give x-axis normal
408 // contributions. Voxlap weights:
409 // 2*(N(xx=+2) - N(xx=-2)) + N(xx=+1) - N(xx=-1)
410 // = `n.x` from this row (full normal sum is over yy).
411 nx += ((i32::from(BITNUM[b4]) - i32::from(BITNUM[b0])) << 1) + i32::from(BITNUM[b3])
412 - i32::from(BITNUM[b1]);
413
414 // Sum bitsnum across all 5 columns: `j` is the total
415 // signed-i16-packed contribution. Low 16 bits = number
416 // of solid voxels in this row across all 5 columns and
417 // 5 z levels. High 16 bits = z-axis contribution
418 // (positive bits from upper z, negative from lower).
419 let j = BITSNUM[b0]
420 .wrapping_add(BITSNUM[b1])
421 .wrapping_add(BITSNUM[b2])
422 .wrapping_add(BITSNUM[b3])
423 .wrapping_add(BITSNUM[b4]);
424 nz = nz.wrapping_add(j);
425 // n.y picks only the LOW i16 of `j` (= total solid
426 // count), scaled by yy. The high i16 (z contribution)
427 // doesn't enter n.y.
428 let j_lo16 = (j as i16) as i32;
429 ny = ny.wrapping_add(j_lo16 * yy);
430 }
431 nz >>= 16;
432
433 // Normalise via fsqrecip[len_sq]. Voxlap's table peaks at
434 // 5*5*5 box max = 75² + 15² + 3² = 5859 — within
435 // `fsqrecip`'s 5860-entry range. Out-of-range len_sq values
436 // (e.g. all-zero neighbourhood) get `fsqrecip[0] = 0` ⇒
437 // returns `(0, 0, 0)` which downstream lighting math
438 // tolerates.
439 let len_sq = (nx * nx + ny * ny + nz * nz) as usize;
440 let f = if len_sq < self.fsqrecip.len() {
441 self.fsqrecip[len_sq]
442 } else {
443 0.0
444 };
445 [(nx as f32) * f, (ny as f32) * f, (nz as f32) * f]
446 }
447
448 /// Voxel-grid limit; used by callers to bound their iteration.
449 #[must_use]
450 #[allow(dead_code)] // preserved for voxlap-parity inspection
451 pub(crate) fn vsid(&self) -> i32 {
452 self.vsid
453 }
454}
455
456/// Bake per-voxel lighting into the world's brightness bytes.
457/// Mirror of voxlap's `updatelighting` (`voxlap5.c:10539`).
458///
459/// Walks every visible voxel inside `[x0..x1) × [y0..y1) ×
460/// [z0..z1)` and rewrites its alpha byte (the brightness channel
461/// the rasterizer mulhi'es against `kv6colmul` modulators) under
462/// the current `lightmode` + `lights` state.
463///
464/// - `lightmode == 0`: no-op (fast return).
465/// - `lightmode == 1`: directional sun-style bake — every visible
466/// voxel gets `(tp.y * 0.5 + tp.z) * 64 + 103.5` clamped to
467/// `[0, 255]` from its surface normal `tp`.
468/// - `lightmode >= 2`: per-light Lambertian bake — base
469/// `(tp.y * 0.5 + tp.z) * 16 + 47.5` minus, for each light in
470/// range with surface normal facing it, `g * h * sc` where
471/// `g = 1/(d·d²) - 1/(r·r²)` (cube falloff with hard radius
472/// cutoff) and `h = tp · light_delta`.
473///
474/// Voxlap pads the bbox by `ESTNORMRAD` on each side internally
475/// to give estnorm enough neighbourhood; that's done here too.
476/// `lights` should match the engine's full `vx5.lightsrc[]` —
477/// the function does its own per-tile range filtering.
478///
479/// Mutates `world_data` in place. Caller is responsible for any
480/// `column_offsets` / `vsid` invariants.
481pub fn update_lighting(
482 world_data: &mut [u8],
483 column_offsets: &[u32],
484 vsid: u32,
485 x0: i32,
486 y0: i32,
487 z0: i32,
488 x1: i32,
489 y1: i32,
490 z1: i32,
491 lightmode: u32,
492 lights: &[LightSrc],
493) {
494 if lightmode == 0 {
495 return;
496 }
497 let vsid_i = vsid as i32;
498 let x0p = (x0 - ESTNORMRAD).max(0);
499 let y0p = (y0 - ESTNORMRAD).max(0);
500 let z0p = (z0 - ESTNORMRAD).max(0);
501 let x1p = (x1 + ESTNORMRAD).min(vsid_i);
502 let y1p = (y1 + ESTNORMRAD).min(vsid_i);
503 let z1p = (z1 + ESTNORMRAD).min(MAXZDIM);
504 if x0p >= x1p || y0p >= y1p || z0p >= z1p {
505 return;
506 }
507
508 // Build the cache once for the whole padded bake region.
509 // Voxlap tiles the bake into 64×64 chunks with a per-tile
510 // `lightlst` filter; for our (one-shot bake) use case the
511 // full-region filter computed inside the per-voxel loop is
512 // simpler and not measurably slower at oracle bake sizes.
513 let cache = EstNormCache::build(world_data, column_offsets, vsid, x0p, y0p, x1p, y1p);
514
515 // Per-light precomputed `lightsub[i] = 1 / (sqrt(r2) * r2)` —
516 // the radius-cutoff bias that makes the light contribution go
517 // to exactly zero at distance == sqrt(r2).
518 let lightsub: Vec<f32> = lights.iter().map(|l| 1.0 / (l.r2.sqrt() * l.r2)).collect();
519
520 // R12.4.1: parallelise the per-row bake via rayon. Each `(x, y)`
521 // pair maps to a unique column slice in `world_data`
522 // (`column_offsets[col_idx]..[col_idx + 1]` ranges are pairwise
523 // disjoint — the voxalloc allocator's invariant). Rows split
524 // cleanly across worker threads; per-row x-loops stay serial to
525 // amortise rayon's per-task overhead. Speedup follows
526 // `RAYON_NUM_THREADS` (set `=1` to disable).
527 //
528 // Lighting bakes are typically rare (one-shot at scene load) but
529 // dynamic-lighting / per-edit relighting use cases call
530 // `update_lighting` per frame — at which point the parallel
531 // path matters for interactive responsiveness.
532 // Per-column byte extents `(start, end)`. After voxalloc-driven
533 // edits (e.g. cave-gen's heavy `set_spans` carve, or runtime
534 // bullet-impact carves), columns are scattered in the slab
535 // pool, so `column_offsets[i+1]` is NOT column `i`'s end byte
536 // — voxlap walks each column's slab chain via `slng()` to
537 // recover length. We pre-compute extents here serially before
538 // moving `world_data` into the parallel mutable view; the
539 // slng walk is O(slab_count) per column, typically 1-3 slabs.
540 //
541 // **Region-bounded**: only the bake rectangle `[x0p..x1p) ×
542 // [y0p..y1p)` needs extents — the per-row body indexes only
543 // those columns. Sizing the table to `vsid²` is wasteful when
544 // a small chunk-sized region is baked against a large-vsid
545 // world (e.g. S4.1 scene-graph per-chunk bake against a
546 // vsid=4096 combined view — would have been 16M slng walks per
547 // chunk × 1024 chunks = 17B slng walks). The bake-region table
548 // collapses that to `bake_region` walks per call.
549 #[allow(clippy::cast_sign_loss)]
550 let region_w = (x1p - x0p) as usize;
551 #[allow(clippy::cast_sign_loss)]
552 let region_h = (y1p - y0p) as usize;
553 let mut column_extents: Vec<(usize, usize)> = Vec::with_capacity(region_w * region_h);
554 for yi in 0..region_h {
555 #[allow(clippy::cast_possible_wrap)]
556 let y = y0p + yi as i32;
557 for xi in 0..region_w {
558 #[allow(clippy::cast_possible_wrap)]
559 let x = x0p + xi as i32;
560 #[allow(clippy::cast_sign_loss)]
561 let col_idx = (y as u32) * vsid + (x as u32);
562 let start = column_offsets[col_idx as usize] as usize;
563 let end = start + roxlap_formats::vxl::slng(&world_data[start..]);
564 column_extents.push((start, end));
565 }
566 }
567
568 let world_view = WorldDataMutView::new(world_data);
569 let row_body = |y: i32| {
570 #[allow(clippy::cast_sign_loss)]
571 let yi = (y - y0p) as usize;
572 for x in x0p..x1p {
573 #[allow(clippy::cast_sign_loss)]
574 let xi = (x - x0p) as usize;
575 let (off_start, off_end) = column_extents[yi * region_w + xi];
576 // SAFETY: each (x, y) maps to a unique col_idx; column
577 // byte ranges `[off_start, off_end)` are pairwise
578 // disjoint across distinct `col_idx` (voxalloc's
579 // free-list invariant), so no two threads write to
580 // the same byte.
581 let column = unsafe { world_view.column_slice(off_start, off_end) };
582 shade_column(column, x, y, z0p, z1p, lightmode, lights, &lightsub, &cache);
583 }
584 };
585
586 (y0p..y1p).into_par_iter().for_each(row_body);
587}
588
589/// S4B.4.b: per-chunk variant of [`update_lighting`].
590///
591/// Writes alpha bytes into one chunk's slab buffer; reads
592/// neighbour-chunk voxels through `column_reader` for `estnorm`'s
593/// 5×5×5 padding. The reader takes chunk-local `(x, y)` (which can
594/// extend `±ESTNORMRAD` past the chunk's `[0, target_vsid)` extent)
595/// and returns the column at that position — typically resolved
596/// through `Grid::chunk(IVec3)` so the bake gets seamless
597/// cross-chunk neighbourhood reads without materialising a stitched
598/// combined view (Approach C retirement, S4B.4.b).
599///
600/// `(x0, y0, z0, x1, y1, z1)` is the bake region in chunk-local
601/// coords (typically `(0, 0, 0)..(CHUNK_SIZE_XY, CHUNK_SIZE_XY,
602/// CHUNK_SIZE_Z)`). Writes clip to the target chunk's vsid; reads
603/// extend into neighbour chunks via the closure.
604///
605/// `lightmode`, `lights`, and the per-voxel arithmetic match
606/// [`update_lighting`]; only the cache build + write-region
607/// scoping differ.
608#[allow(clippy::too_many_arguments)]
609pub fn update_lighting_chunk<'r>(
610 target_data: &mut [u8],
611 target_column_offsets: &[u32],
612 target_vsid: u32,
613 x0: i32,
614 y0: i32,
615 z0: i32,
616 x1: i32,
617 y1: i32,
618 z1: i32,
619 column_reader: impl Fn(i32, i32) -> Option<&'r [u8]>,
620 lightmode: u32,
621 lights: &[LightSrc],
622) {
623 if lightmode == 0 {
624 return;
625 }
626 let target_vsid_i = target_vsid as i32;
627
628 // Padded region for the cache (cross-chunk reads via reader).
629 // Z clamps to [0, MAXZDIM) because each chunk's slab data is
630 // chunk-local in z. For stacked grids (S4B.6) the caller
631 // invokes us once per chunk-z layer; cross-chz padding at the
632 // top/bottom of a chunk gets clipped here (a follow-up could
633 // pass z-aware columns to lift this). X/y intentionally don't
634 // clamp — the reader pulls from neighbour chunks via its own
635 // coord translation.
636 let z0p = (z0 - ESTNORMRAD).max(0);
637 let z1p = (z1 + ESTNORMRAD).min(MAXZDIM);
638 // Write region clipped to the target chunk's footprint.
639 let wx0 = x0.max(0);
640 let wy0 = y0.max(0);
641 let wx1 = x1.min(target_vsid_i);
642 let wy1 = y1.min(target_vsid_i);
643 if wx0 >= wx1 || wy0 >= wy1 || z0p >= z1p {
644 return;
645 }
646
647 let cache = EstNormCache::build_with_reader(column_reader, x0, y0, x1, y1);
648 apply_lighting_with_cache(
649 target_data,
650 target_column_offsets,
651 target_vsid,
652 wx0,
653 wy0,
654 z0p,
655 wx1,
656 wy1,
657 z1p,
658 &cache,
659 lightmode,
660 lights,
661 );
662}
663
664/// S4B.4.b: write half of [`update_lighting_chunk`], split out so
665/// callers can build the [`EstNormCache`] separately (via
666/// [`EstNormCache::build_with_reader`]) and pass it in.
667///
668/// The split matters when the cache build needs an immutable grid
669/// borrow (for cross-chunk reads) and the write phase needs a
670/// mutable target-chunk borrow — the two can't coexist. The
671/// caller builds the cache first while holding the immutable
672/// borrow, drops it, then mutably borrows the target chunk and
673/// invokes this.
674///
675/// The `(x0..x1, y0..y1, z0..z1)` region must already be clipped
676/// to the target chunk's footprint (this helper does no clipping).
677/// `cache` must cover at least `[x0..x1) × [y0..y1)` (a `±ESTNORMRAD`
678/// padding is the caller's responsibility — typically built via
679/// `build_with_reader(.., x0, y0, x1, y1)` which adds the padding
680/// itself).
681#[allow(clippy::too_many_arguments)]
682pub fn apply_lighting_with_cache(
683 target_data: &mut [u8],
684 target_column_offsets: &[u32],
685 target_vsid: u32,
686 x0: i32,
687 y0: i32,
688 z0: i32,
689 x1: i32,
690 y1: i32,
691 z1: i32,
692 cache: &EstNormCache,
693 lightmode: u32,
694 lights: &[LightSrc],
695) {
696 if lightmode == 0 || x0 >= x1 || y0 >= y1 || z0 >= z1 {
697 return;
698 }
699
700 let lightsub: Vec<f32> = lights.iter().map(|l| 1.0 / (l.r2.sqrt() * l.r2)).collect();
701
702 let region_w = (x1 - x0) as usize;
703 let region_h = (y1 - y0) as usize;
704 let mut column_extents: Vec<(usize, usize)> = Vec::with_capacity(region_w * region_h);
705 for yi in 0..region_h {
706 let y = y0 + yi as i32;
707 for xi in 0..region_w {
708 let x = x0 + xi as i32;
709 let col_idx = (y as u32) * target_vsid + (x as u32);
710 let start = target_column_offsets[col_idx as usize] as usize;
711 let end = start + roxlap_formats::vxl::slng(&target_data[start..]);
712 column_extents.push((start, end));
713 }
714 }
715
716 let world_view = WorldDataMutView::new(target_data);
717 let row_body = |y: i32| {
718 let yi = (y - y0) as usize;
719 for x in x0..x1 {
720 let xi = (x - x0) as usize;
721 let (off_start, off_end) = column_extents[yi * region_w + xi];
722 // SAFETY: per-column byte ranges are pairwise disjoint
723 // across distinct `(x, y)` (voxalloc invariant).
724 let column = unsafe { world_view.column_slice(off_start, off_end) };
725 shade_column(column, x, y, z0, z1, lightmode, lights, &lightsub, cache);
726 }
727 };
728
729 (y0..y1).into_par_iter().for_each(row_body);
730}
731
732/// Raw-pointer view of `world_data` so the parallel
733/// [`update_lighting`] body can hand out per-column `&mut [u8]`
734/// slices to multiple threads without each thread needing
735/// `&mut Vec<u8>` (which is exclusive). Constructed from a single
736/// `&mut [u8]` borrow at the start of the parallel section; the
737/// borrow's lifetime gates `WorldDataMutView`'s usable lifetime.
738///
739/// # Safety contract
740/// Callers that hand out concurrent `column_slice` references MUST
741/// guarantee the requested ranges are pairwise non-overlapping
742/// across threads. [`update_lighting`]'s call site relies on
743/// voxalloc's per-column-disjoint-byte-range invariant.
744struct WorldDataMutView<'a> {
745 ptr: *mut u8,
746 len: usize,
747 _marker: std::marker::PhantomData<&'a mut [u8]>,
748}
749
750// SAFETY: `WorldDataMutView` is morally a `&mut [u8]` re-exposed as
751// raw pointers. The disjoint-write invariant is enforced by the
752// caller; concurrent reads of `ptr` / `len` fields are race-free
753// (immutable scalar fields).
754unsafe impl Send for WorldDataMutView<'_> {}
755unsafe impl Sync for WorldDataMutView<'_> {}
756
757impl<'a> WorldDataMutView<'a> {
758 fn new(buf: &'a mut [u8]) -> Self {
759 Self {
760 ptr: buf.as_mut_ptr(),
761 len: buf.len(),
762 _marker: std::marker::PhantomData,
763 }
764 }
765
766 /// Carve out a sub-slice. Caller upholds the disjoint-write
767 /// invariant (see struct doc).
768 ///
769 /// # Safety
770 /// `off_start <= off_end <= self.len`, and the requested range
771 /// must not overlap with ranges concurrently held by other
772 /// threads.
773 unsafe fn column_slice(&self, off_start: usize, off_end: usize) -> &'a mut [u8] {
774 debug_assert!(off_start <= off_end, "column slice: start > end");
775 debug_assert!(off_end <= self.len, "column slice: end past buffer");
776 // SAFETY: caller asserts in-bounds + disjoint-from-other-threads.
777 unsafe { std::slice::from_raw_parts_mut(self.ptr.add(off_start), off_end - off_start) }
778 }
779}
780
781/// Walk one column's slab chain and shade every visible voxel
782/// inside `[z_lo, z_hi)`. Mirror of the inner loop in
783/// voxlap5.c:10588-10650.
784#[allow(clippy::cast_lossless)]
785fn shade_column(
786 column: &mut [u8],
787 x: i32,
788 y: i32,
789 z_lo: i32,
790 z_hi: i32,
791 lightmode: u32,
792 lights: &[LightSrc],
793 lightsub: &[f32],
794 cache: &EstNormCache,
795) {
796 let mut v_off: usize = 0;
797 // cstat = false ⇒ top-of-slab phase (floor colours); true ⇒
798 // ceiling-of-next-slab phase (bottom of current slab's solid
799 // mass, visible from the air pocket below).
800 let mut cstat = false;
801 loop {
802 let (sz0, sz1, voxel_byte_offset_signed): (i32, i32, isize);
803 if !cstat {
804 // Floor colours of the current slab. Voxel z=v[1]..=v[2].
805 // Alpha byte at offset (z - v[1]) * 4 + 7 from header
806 // (header is 4 bytes, voxel record is 4 bytes BGRA, +3
807 // for alpha). The voxlap formula encodes this as
808 // `(z << 2) + offs` with `offs = 7 - (v[1] << 2)`.
809 if v_off + 2 >= column.len() {
810 break;
811 }
812 let v1 = i32::from(column[v_off + 1]);
813 let v2 = i32::from(column[v_off + 2]);
814 sz0 = v1;
815 sz1 = v2 + 1;
816 voxel_byte_offset_signed = (v_off as isize) + 7 - ((sz0 as isize) << 2);
817 cstat = true;
818 } else {
819 // Ceiling colours of the next slab — must read v[0]
820 // BEFORE advancing v_off.
821 if v_off + 2 >= column.len() {
822 break;
823 }
824 let v0 = i32::from(column[v_off]);
825 let v1 = i32::from(column[v_off + 1]);
826 let v2 = i32::from(column[v_off + 2]);
827 let prev_offset = v2 - v1 - v0 + 2; // ceilnum from getcube convention
828 if v0 == 0 {
829 break;
830 }
831 v_off += (v0 as usize) * 4;
832 if v_off + 3 >= column.len() {
833 break;
834 }
835 let v3 = i32::from(column[v_off + 3]);
836 sz1 = v3;
837 sz0 = prev_offset + sz1;
838 voxel_byte_offset_signed = (v_off as isize) + 3 - ((sz1 as isize) << 2);
839 cstat = false;
840 }
841
842 let lo = sz0.max(z_lo);
843 let hi = sz1.min(z_hi);
844 for z in lo..hi {
845 let normal = cache.estnorm(x, y, z);
846 let brightness = compute_brightness(x, y, z, normal, lightmode, lights, lightsub);
847 let byte_off = voxel_byte_offset_signed + ((z as isize) << 2);
848 if byte_off >= 0 && (byte_off as usize) < column.len() {
849 column[byte_off as usize] = brightness;
850 }
851 }
852 }
853}
854
855/// Voxlap's per-voxel brightness math. Computes the `[0, 255]`
856/// alpha byte for one voxel from its surface normal `tp` + the
857/// light list. Mirror of voxlap5.c:10605-10646.
858fn compute_brightness(
859 x: i32,
860 y: i32,
861 z: i32,
862 tp: [f32; 3],
863 lightmode: u32,
864 lights: &[LightSrc],
865 lightsub: &[f32],
866) -> u8 {
867 if lightmode < 2 {
868 // Directional path (voxlap5.c:10607-10612): single sun
869 // direction baked into a hardcoded coefficient pair.
870 // i = (tp.y * 0.5 + tp.z) * 64 + 103.5, clamped to [0, 255].
871 let f = (tp[1] * 0.5 + tp[2]) * 64.0 + 103.5;
872 clamp_to_byte(f)
873 } else {
874 // Point-light path (voxlap5.c:10614-10645). Base brightness
875 // 47.5..63.5 + per-light front-face contribution.
876 let mut f = (tp[1] * 0.5 + tp[2]) * 16.0 + 47.5;
877 let xf = x as f32;
878 let yf = y as f32;
879 let zf = z as f32;
880 for (i, light) in lights.iter().enumerate() {
881 let fx = light.pos[0] - xf;
882 let fy = light.pos[1] - yf;
883 let fz = light.pos[2] - zf;
884 // tp · light_delta: positive ⇒ surface faces away from
885 // light (back-lit, no contribution); negative ⇒ surface
886 // faces light (front-lit, lambertian contribution).
887 let h = tp[0] * fx + tp[1] * fy + tp[2] * fz;
888 if h >= 0.0 {
889 continue;
890 }
891 let g_sq = fx * fx + fy * fy + fz * fz;
892 if g_sq >= light.r2 {
893 continue;
894 }
895 // Voxlap's SSE rcpss/rsqrtss sequence:
896 // g = (1/g_sq) * rsqrt(g_sq) - lightsub[i]
897 // = 1/(g_sq * sqrt(g_sq)) - 1/(r2 * sqrt(r2))
898 // = 1/d³ - 1/r³
899 // The `_mm_rcp_ss` / `_mm_rsqrt_ss` are 12-bit
900 // approximations; the exact `f32::sqrt`-based form
901 // here is more precise but may drift from voxlap C.
902 // Bit-exactness will require switching to the
903 // intrinsic versions on x86_64; deferred until
904 // diag_down_lit oracle convergence demands it.
905 let g = 1.0 / (g_sq * g_sq.sqrt()) - lightsub[i];
906 f -= g * h * light.sc;
907 }
908 clamp_to_byte(f)
909 }
910}
911
912#[inline]
913fn clamp_to_byte(f: f32) -> u8 {
914 // Voxlap's `if (*(int32_t *)&f > 0x437f0000) f = 255` is the
915 // bit-trick form of `if (f > 255.0) f = 255.0`. Negatives wrap
916 // through `ftol` / cast; we clamp explicitly for safety.
917 if f >= 255.0 {
918 255
919 } else if f <= 0.0 {
920 0
921 } else {
922 f as u8
923 }
924}
925
926#[cfg(test)]
927mod tests {
928 use super::*;
929
930 /// xbsflor(0) = -1 (all bits set), xbsflor(32) clamped to 0,
931 /// xbsflor(5) = ~31 = 0xffff_ffe0.
932 #[test]
933 fn xbsflor_xbsceil_known_values() {
934 assert_eq!(xbsflor(0), 0xffff_ffff);
935 assert_eq!(xbsflor(1), 0xffff_fffe);
936 assert_eq!(xbsflor(5), 0xffff_ffe0);
937 assert_eq!(xbsflor(31), 0x8000_0000);
938 assert_eq!(xbsflor(32), 0);
939 assert_eq!(xbsceil(0), 0);
940 assert_eq!(xbsceil(5), 0x1f);
941 assert_eq!(xbsceil(31), 0x7fff_ffff);
942 assert_eq!(xbsceil(32), 0xffff_ffff);
943 }
944
945 /// Single-slab column [next=0, sz0=10, sz1=14, then 5 voxel
946 /// records]. Voxels exist at z = 10..15 (sz0..=sz1). After
947 /// expandbit256, bits 10..15 should be set, all others
948 /// (0..10 and 15..256) should reflect: air above (0..10) and
949 /// solid below (15..256), since voxlap treats z > sz1 of last
950 /// slab as solid.
951 #[test]
952 fn single_slab_z10_to_14_sets_correct_bits() {
953 // Column layout: [next=0, sz0=10, sz1=14, top_color, then 5x
954 // voxel records of 4 bytes each]. We don't use the voxel
955 // record contents; expandbit256 only reads v[0]..v[3].
956 let mut col = vec![0u8, 10, 14, 0]; // header
957 col.extend(vec![0u8; 5 * 4]); // 5 voxel records (z=10..14)
958
959 let mut bits = [0u32; 8];
960 expandbit256(&col, &mut bits);
961
962 // Word 0 covers bits 0..32. Air for z=0..10, solid 10..15,
963 // solid for z=15..32 (since this is the only slab → below
964 // is fully solid).
965 // bits 10..15 from the slab body: 0x7c00 (bits 10,11,12,13,14)
966 // bits 15..32 from "solid below last slab": 0xffff_8000
967 // Combined: 0xffff_fc00.
968 assert_eq!(
969 bits[0], 0xffff_fc00,
970 "word 0 want 0xffff_fc00 got 0x{:08x}",
971 bits[0]
972 );
973 // Words 1..7 should all be 0xffff_ffff (fully solid).
974 for (i, w) in bits.iter().enumerate().skip(1) {
975 assert_eq!(*w, 0xffff_ffff, "word {i} want -1 got 0x{:08x}", *w);
976 }
977 }
978
979 /// fsqrecip[N] should match `1/sqrt(N)` to a reasonable
980 /// tolerance for the values estnorm actually produces.
981 #[test]
982 fn fsqrecip_matches_1_over_sqrt() {
983 let t = build_fsqrecip();
984 for k in 1..=100 {
985 let want = 1.0_f32 / (k as f32).sqrt();
986 let got = t[k];
987 let err = (got - want).abs();
988 assert!(err < 1e-3, "fsqrecip[{k}] = {got}, want {want}, err {err}");
989 }
990 // Spot-check higher values (less precise but still close).
991 for k in [500, 1000, 2000, 5000] {
992 let want = 1.0_f32 / (k as f32).sqrt();
993 let got = t[k];
994 let rel = (got / want - 1.0).abs();
995 assert!(
996 rel < 0.01,
997 "fsqrecip[{k}] = {got}, want {want}, rel-err {rel}"
998 );
999 }
1000 }
1001
1002 /// Build a 4×4 synthetic world with a flat floor at z=20..=24,
1003 /// run lightmode-1 update_lighting over the centre 2×2, and
1004 /// verify (a) brightness bytes were rewritten, (b) the result
1005 /// is in `[0, 255]` for every shaded voxel, (c) the brightness
1006 /// is uniform within each (x, y) column at the same z (since
1007 /// lightmode-1 depends only on the surface normal).
1008 #[test]
1009 fn lightmode1_bakes_brightness_into_visible_voxels() {
1010 // 4×4 world, single slab at z=20..=24, sentinel column ends.
1011 let vsid: u32 = 4;
1012 let mut col = vec![0u8, 20, 24, 0]; // header: nextptr=0, z1=20, z2=24
1013 for _ in 20..=24 {
1014 // 5 voxel records, alpha pre-set to 0xab so we can verify
1015 // they got rewritten.
1016 col.extend([0x10, 0x20, 0x30, 0xab]);
1017 }
1018 let col_len = col.len() as u32;
1019 let mut data = Vec::new();
1020 let mut offsets = vec![0u32; (vsid * vsid + 1) as usize];
1021 for i in 0..(vsid * vsid) {
1022 offsets[i as usize] = data.len() as u32;
1023 data.extend_from_slice(&col);
1024 }
1025 offsets[(vsid * vsid) as usize] = data.len() as u32;
1026 assert_eq!(col_len as usize * (vsid * vsid) as usize, data.len());
1027
1028 update_lighting(
1029 &mut data,
1030 &offsets,
1031 vsid,
1032 1,
1033 1,
1034 0,
1035 3,
1036 3,
1037 30, // bbox 1..=2 in xy, z 0..30
1038 1, // lightmode 1
1039 &[],
1040 );
1041
1042 // Pull every voxel record's alpha byte from the centre
1043 // (1, 1) column. Should all be in [0, 255] and ≠ 0xab.
1044 let off1 = offsets[(1 * vsid + 1) as usize] as usize;
1045 let alphas: Vec<u8> = (0..5).map(|i| data[off1 + 4 + i * 4 + 3]).collect();
1046 for (i, &a) in alphas.iter().enumerate() {
1047 assert_ne!(a, 0xab, "alpha[{i}] not rewritten");
1048 }
1049 // The shading should be mostly bright — flat-floor voxels
1050 // have ~vertical normals so `(tp.y*0.5 + tp.z)*64 + 103.5`
1051 // ≈ 1.0*64 + 103.5 = 167.5.
1052 for (i, &a) in alphas.iter().enumerate() {
1053 assert!(
1054 a > 100,
1055 "alpha[{i}]={a} should be on the bright side for top-of-floor voxels"
1056 );
1057 }
1058 }
1059
1060 /// lightmode-2 with one nearby light should darken voxels on
1061 /// the away side relative to the toward side. Use a 5×5 world
1062 /// with a flat floor and place a light such that it's on the
1063 /// +x side of the centre column — the +x face voxel's neighbour
1064 /// columns should end up brighter than the -x.
1065 #[test]
1066 fn lightmode2_with_light_produces_per_column_variation() {
1067 let vsid: u32 = 5;
1068 let mut col = vec![0u8, 20, 24, 0];
1069 for _ in 20..=24 {
1070 col.extend([0x10, 0x20, 0x30, 0]);
1071 }
1072 let mut data = Vec::new();
1073 let mut offsets = vec![0u32; (vsid * vsid + 1) as usize];
1074 for i in 0..(vsid * vsid) {
1075 offsets[i as usize] = data.len() as u32;
1076 data.extend_from_slice(&col);
1077 }
1078 offsets[(vsid * vsid) as usize] = data.len() as u32;
1079
1080 let lights = [LightSrc {
1081 // World coords: light right next to (4, 2, 20).
1082 pos: [4.0, 2.0, 20.0],
1083 r2: 50.0 * 50.0,
1084 sc: 64.0,
1085 }];
1086 update_lighting(&mut data, &offsets, vsid, 0, 0, 0, 5, 5, 30, 2, &lights);
1087
1088 // Sample the alpha at the top-floor voxel of each column
1089 // along y=2. Closer-to-light columns should be brighter.
1090 let alpha_at = |x: u32, z_idx: usize| {
1091 let off = offsets[(2 * vsid + x) as usize] as usize;
1092 data[off + 4 + z_idx * 4 + 3]
1093 };
1094 let close = alpha_at(4, 0); // closest column to light
1095 let far = alpha_at(0, 0); // farthest
1096 assert!(
1097 close >= far,
1098 "column nearer the light should be ≥ as bright as the far one (close={close} far={far})"
1099 );
1100 }
1101
1102 /// Empty column ([0, 0, 0, ...]) — no slabs. After
1103 /// expandbit256, all 256 bits = 0 (full air).
1104 #[test]
1105 fn empty_column_all_air() {
1106 let col = vec![0u8, 0, 0, 0]; // single-slab header at z=0..0, no body
1107 let mut bits = [0u32; 8];
1108 expandbit256(&col, &mut bits);
1109 // bit 0 from "air→solid transition at z=0", but only bit 0
1110 // is set within the slab range [0, 0+1). Then "solid below"
1111 // fills bits 1..256.
1112 // Actually for sz0=sz1=0: voxel record is z=0..0 inclusive
1113 // (0 voxels). The bit pattern is 1 set bit at z=0 then
1114 // solid below.
1115 // word 0: bit 0 set, bits 1..32 set ⇒ 0xffff_ffff.
1116 assert_eq!(
1117 bits[0], 0xffff_ffff,
1118 "empty column word 0 want all-1 got 0x{:08x}",
1119 bits[0]
1120 );
1121 }
1122}