roxlap_core/world_lighting.rs
1//! Voxlap's world-voxel lighting bake (`updatelighting`,
2//! voxlap5.c:10539).
3//!
4//! Walks every visible voxel inside a 3D bounding box and rewrites
5//! its alpha byte (the per-voxel "brightness" channel that the
6//! rendering path mulhi'es against `kv6colmul`-style modulators)
7//! based on the engine's current `LightSrc` set + lightmode.
8//!
9//! Two modes:
10//! - `lightmode == 1`: cheap directional bake — every voxel gets
11//! shading from a single hardcoded sun direction
12//! `(tp.y * 0.5 + tp.z) * 64 + 103.5` clamped to `[0, 255]`.
13//! - `lightmode == 2`: per-light Lambertian bake — for each light
14//! in range, subtract `g * h * sc` where `g = 1/(d·d²) -
15//! 1/(r·r²)` (cube-falloff with hard cutoff at radius `r`),
16//! `h = surface_normal · light_delta` (negative ⇒ face front-
17//! lit, contributes; positive ⇒ self-shadowed, skipped). Result
18//! subtracts from a base `(tp.y * 0.5 + tp.z) * 16 + 47.5`.
19//!
20//! The surface normal `tp` for each voxel comes from `estnorm` —
21//! a 5×5×5 voxel-solid neighbourhood vote (`ESTNORMRAD == 2` in
22//! voxlap, the production path).
23
24#![allow(
25 clippy::cast_possible_truncation,
26 clippy::cast_possible_wrap,
27 clippy::cast_sign_loss,
28 clippy::cast_precision_loss,
29 clippy::similar_names,
30 clippy::too_many_arguments,
31 clippy::too_many_lines,
32 clippy::doc_markdown,
33 clippy::many_single_char_names,
34 clippy::must_use_candidate,
35 clippy::unnecessary_cast,
36 clippy::cast_lossless,
37 clippy::needless_bool_assign,
38 clippy::needless_range_loop,
39 clippy::no_effect,
40 clippy::identity_op,
41 clippy::if_not_else
42)]
43
44use rayon::prelude::*;
45
46use crate::engine::LightSrc;
47
48/// Voxlap's `MAXZDIM` (`voxlap5.c`). World z runs `0..MAXZDIM`.
49pub(crate) const MAXZDIM: i32 = 256;
50
51/// Voxlap's `ESTNORMRAD == 2` cache window radius. The estnorm
52/// neighbourhood is `(2*RAD+1)³ = 5×5×5` voxels.
53pub(crate) const ESTNORMRAD: i32 = 2;
54
55/// Per-byte popcount table. Voxlap's `bitnum[32]` (voxlap5.c:1477)
56/// — number of set bits in the low 5 bits of each index. Used by
57/// estnorm's neighbourhood-vote reduction.
58pub(crate) const BITNUM: [i8; 32] = [
59 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4, 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
60];
61
62/// Per-byte signed-symmetric popcount. Voxlap's `bitsnum[32]`
63/// (voxlap5.c:1487) — packs `popcount` into the low i16 lane and
64/// `popcount - 2·popcount_negative_axis` into the high i16 lane.
65/// The exact derivation is in voxlap's comment block; values
66/// reproduced verbatim.
67#[rustfmt::skip]
68pub(crate) const BITSNUM: [i32; 32] = [
69 0, 1 - (2 << 16), 1 - (1 << 16), 2 - (3 << 16),
70 1, 2 - (2 << 16), 2 - (1 << 16), 3 - (3 << 16),
71 1 + (1 << 16), 2 - (1 << 16), 2, 3 - (2 << 16),
72 2 + (1 << 16), 3 - (1 << 16), 3, 4 - (2 << 16),
73 1 + (2 << 16), 2, 2 + (1 << 16), 3 - (1 << 16),
74 2 + (2 << 16), 3, 3 + (1 << 16), 4 - (1 << 16),
75 2 + (3 << 16), 3 + (1 << 16), 3 + (2 << 16), 4,
76 3 + (3 << 16), 4 + (1 << 16), 4 + (2 << 16), 5,
77];
78
79/// `xbsflor[k] = -1i32 << k` — bits `k..31` set, low `k` bits
80/// clear. Used by `expandbit256` to splat air→solid transitions
81/// onto a partial 32-bit word.
82pub(crate) const fn xbsflor(k: usize) -> u32 {
83 if k >= 32 {
84 0
85 } else {
86 (-1i32 << k) as u32
87 }
88}
89
90/// `xbsceil[k] = ~xbsflor[k]` — low `k` bits set. Solid→air
91/// transitions.
92pub(crate) const fn xbsceil(k: usize) -> u32 {
93 !xbsflor(k)
94}
95
96/// `expandbit256` — slab structure → 256-bit "voxel solid" bit
97/// array (low-bit-first, low-z-first). Mirror of voxlap5.c:1059.
98///
99/// The output `bits` is a `[u32; 8]` (= 256 bits = `MAXZDIM` z
100/// levels). Bit `z` is set iff voxel at column `(x, y)`, depth `z`
101/// is solid (= part of any slab body, including hidden interiors
102/// between slabs).
103///
104/// Walks the slab linked list, alternating between `v[1]`
105/// (air→solid transition at top of slab) and `v[3]` (solid→air
106/// transition at bottom of next slab). Each transition flushes
107/// pending whole-words (full air `0` or full solid `-1`) until
108/// it lands inside the partial word containing the transition,
109/// then OR/ANDs the partial mask via `xbsflor` / `xbsceil`.
110pub(crate) fn expandbit256(column: &[u8], bits: &mut [u32; 8]) {
111 let mut src_idx: usize = 0;
112 let mut dst_idx: usize = 0;
113 let mut bitpos: i32 = 32;
114 let mut word: u32 = 0;
115 let nbits: i32 = (bits.len() as i32) * 32;
116
117 // First iteration: jump straight to the v[1] transition (no
118 // preceding slab whose v[3] we'd need to flush).
119 let mut next_len: i32;
120 let mut delta: i32;
121 let mut go_to_v3 = false;
122
123 'outer: loop {
124 if go_to_v3 {
125 // v[3] : solid → air transition.
126 if src_idx + 3 >= column.len() {
127 break;
128 }
129 delta = i32::from(column[src_idx + 3]) - bitpos;
130 while delta >= 0 {
131 if dst_idx >= bits.len() {
132 break 'outer;
133 }
134 bits[dst_idx] = word;
135 dst_idx += 1;
136 word = u32::MAX;
137 bitpos += 32;
138 delta -= 32;
139 }
140 word &= xbsceil((delta + 32) as usize);
141 }
142 go_to_v3 = true;
143
144 // v[1] : air → solid transition.
145 if src_idx + 1 >= column.len() {
146 break;
147 }
148 delta = i32::from(column[src_idx + 1]) - bitpos;
149 while delta >= 0 {
150 if dst_idx >= bits.len() {
151 break 'outer;
152 }
153 bits[dst_idx] = word;
154 dst_idx += 1;
155 word = 0;
156 bitpos += 32;
157 delta -= 32;
158 }
159 word |= xbsflor((delta + 32) as usize);
160
161 next_len = i32::from(column[src_idx]);
162 if next_len == 0 {
163 break;
164 }
165 src_idx += (next_len as usize) * 4;
166 }
167
168 // Pad the rest of the buffer with `word`'s tail value (in C the
169 // post-loop word is whatever the last `v[1]` partial-set
170 // produced; remaining whole-words flush as solid `-1`).
171 if bitpos <= nbits {
172 while dst_idx < bits.len() {
173 bits[dst_idx] = word;
174 dst_idx += 1;
175 word = u32::MAX;
176 }
177 }
178}
179
180/// Pre-built `expandbit256` grid covering a 2D bounding region —
181/// `(x1 - x0 + 2*RAD) × (y1 - y0 + 2*RAD)` columns. Trades 32
182/// bytes per column of memory for O(1) bit-window lookups during
183/// the estnorm 5×5 neighbourhood vote.
184///
185/// This is the conceptual equivalent of voxlap's `xbsbuf` cache —
186/// just batch-pre-built rather than rotated row-by-row through
187/// the bake. Memory cost stays manageable: a 448×448 bake (the
188/// `diag_down_lit` oracle scope, which extends to 452×452 with
189/// padding) needs about 6.4 MB.
190#[allow(dead_code)] // vsid field/method preserved for voxlap-parity inspection
191pub(crate) struct EstNormCache {
192 /// Per-column bit arrays. `bits[(yidx) * width + (xidx)]` is
193 /// the slab bit-mask of column `(origin_x + xidx, origin_y +
194 /// yidx)`. `xidx ∈ 0..width`, mapping abs-x into
195 /// `[origin_x - RAD, origin_x + (x1 - x0) - 1 + RAD]`.
196 bits: Vec<[u32; 8]>,
197 /// Top-left of the cache window in world coords (= original
198 /// `x0 - RAD`).
199 origin_x: i32,
200 origin_y: i32,
201 /// Cached-region width (= `x1 - x0 + 2 * RAD`).
202 width: usize,
203 /// Reserved for symmetric debugging — kept so the cache layout
204 /// can be inspected without recomputing from `bits.len()`.
205 #[allow(dead_code)]
206 height: usize,
207 /// Inverse-square-root LUT — `fsqrecip[k] = 1 / sqrt(k)` for
208 /// `k ∈ 0..=5859`. Voxlap's `fsqrecip` table; same precision
209 /// as the C build (no Newton refinement for k > 22).
210 fsqrecip: Vec<f32>,
211 /// Voxel-grid limit (= `vsid`) used for out-of-bounds clamps.
212 vsid: i32,
213}
214
215/// Voxlap's `fsqrecip[5860]` table init (voxlap5.c:12240-12256).
216/// Mirror of the C calculation including the asymmetric Newton-
217/// refinement schedule for indices ≤ 22.
218fn build_fsqrecip() -> Vec<f32> {
219 const N: usize = 5860;
220 let mut t = vec![0.0_f32; N];
221 t[0] = 0.0;
222 t[1] = 1.0;
223 t[2] = (1.0_f32 / 2.0_f32.sqrt()) as f32;
224 t[3] = 1.0 / 3.0_f32.sqrt();
225 let mut i = 3usize;
226 let mut z = 4usize;
227 while z < N {
228 if z + 5 >= N {
229 // Safety stop — cycle increment by 6 may overshoot.
230 break;
231 }
232 t[z] = t[z >> 1] * t[2];
233 t[z + 2] = t[(z + 2) >> 1] * t[2];
234 t[z + 4] = t[(z + 4) >> 1] * t[2];
235 t[z + 5] = t[i] * t[3];
236 i += 2;
237
238 let mut f = (t[z] + t[z + 2]) * 0.5_f32;
239 if z <= 22 {
240 f = (1.5 - 0.5 * ((z + 1) as f32) * f * f) * f;
241 }
242 t[z + 1] = (1.5 - 0.5 * ((z + 1) as f32) * f * f) * f;
243
244 let mut f = (t[z + 2] + t[z + 4]) * 0.5_f32;
245 if z <= 22 {
246 f = (1.5 - 0.5 * ((z + 3) as f32) * f * f) * f;
247 }
248 t[z + 3] = (1.5 - 0.5 * ((z + 3) as f32) * f * f) * f;
249
250 z += 6;
251 }
252 t
253}
254
255impl EstNormCache {
256 /// Build the bit-grid cache covering the bounding region
257 /// `[x0..x1) × [y0..y1)` extended by `ESTNORMRAD` padding on
258 /// each side. Calling [`Self::estnorm`] for any `(x, y)` inside
259 /// the original `[x0..x1) × [y0..y1)` box is then a pure read.
260 #[must_use]
261 pub fn build(
262 world_data: &[u8],
263 column_offsets: &[u32],
264 vsid: u32,
265 x0: i32,
266 y0: i32,
267 x1: i32,
268 y1: i32,
269 ) -> Self {
270 let rad = ESTNORMRAD;
271 let pad_x0 = x0 - rad;
272 let pad_y0 = y0 - rad;
273 let pad_x1 = x1 + rad;
274 let pad_y1 = y1 + rad;
275 let width = (pad_x1 - pad_x0) as usize;
276 let height = (pad_y1 - pad_y0) as usize;
277
278 let mut bits = vec![[0u32; 8]; width * height];
279 let vsid_i = vsid as i32;
280 for yi in 0..height {
281 let y = pad_y0 + yi as i32;
282 for xi in 0..width {
283 let x = pad_x0 + xi as i32;
284 if (x | y) < 0 || x >= vsid_i || y >= vsid_i {
285 // Out-of-bounds: voxlap's `expandbitstack`
286 // zeros the bind buffer (= treat as full air).
287 continue;
288 }
289 let col_idx = (y as u32) * vsid + (x as u32);
290 let off_start = column_offsets[col_idx as usize] as usize;
291 // Slice to end-of-buffer; the slab walker self-
292 // terminates via nextptr. Same fix as world_query +
293 // opticast post-edit-scatter — column_offsets[idx+1]
294 // is the next table entry, NOT the next-byte-offset
295 // after edits.
296 let column = &world_data[off_start..];
297 expandbit256(column, &mut bits[yi * width + xi]);
298 }
299 }
300
301 Self {
302 bits,
303 origin_x: pad_x0,
304 origin_y: pad_y0,
305 width,
306 height,
307 fsqrecip: build_fsqrecip(),
308 vsid: vsid_i,
309 }
310 }
311
312 /// Read 5 consecutive bits starting at z-position `z` from the
313 /// column at `(xi, yi)` cache index. Returns `0..=31`.
314 /// Out-of-range positions:
315 /// - `z < -2`: returns 0 (air above world — though voxlap's
316 /// convention is "above is sky", same effect).
317 /// - `z >= MAXZDIM`: returns `0x1f` (solid below world).
318 #[inline]
319 fn extract_bits5(&self, xi: usize, yi: usize, z: i32) -> u32 {
320 let col = &self.bits[yi * self.width + xi];
321 if z >= MAXZDIM {
322 return 0x1f;
323 }
324 if z + 5 <= 0 {
325 return 0;
326 }
327 // Combine adjacent words to handle the case where the 5-bit
328 // window straddles a word boundary.
329 let z_bit = z;
330 let word_idx = z_bit.div_euclid(32);
331 let bit_off = z_bit.rem_euclid(32) as u32;
332 let lo = if (0..8).contains(&word_idx) {
333 col[word_idx as usize]
334 } else if word_idx < 0 {
335 0 // air above world
336 } else {
337 u32::MAX // solid below world
338 };
339 let hi = if word_idx + 1 < 8 && word_idx >= -1 {
340 col[(word_idx + 1) as usize]
341 } else if word_idx + 1 < 0 {
342 0
343 } else {
344 u32::MAX
345 };
346 let combined = u64::from(lo) | (u64::from(hi) << 32);
347 ((combined >> bit_off) & 0x1f) as u32
348 }
349
350 /// Estimate the surface normal at `(x, y, z)` from a 5×5×5
351 /// voxel-solid neighbourhood vote. Mirror of voxlap5.c:1501
352 /// (`estnorm`, `ESTNORMRAD == 2` branch).
353 ///
354 /// `(x, y)` must lie inside the cache's `[x0..x1) × [y0..y1)`
355 /// region (panics otherwise — caller guarantees this via the
356 /// bounding-box iteration). `z` is unconstrained (handled via
357 /// air/solid clamping).
358 #[must_use]
359 pub fn estnorm(&self, x: i32, y: i32, z: i32) -> [f32; 3] {
360 let center_xi = (x - self.origin_x) as usize;
361 let center_yi = (y - self.origin_y) as usize;
362
363 let mut nx: i32 = 0;
364 let mut ny: i32 = 0;
365 let mut nz: i32 = 0;
366 let z_window = z - ESTNORMRAD; // top of the 5-bit z window
367
368 for yy in -ESTNORMRAD..=ESTNORMRAD {
369 let yi = (center_yi as i32 + yy) as usize;
370 // Read 5 columns at this yy row (xx = -2..=+2).
371 let b0 = self.extract_bits5(center_xi - 2, yi, z_window) as usize;
372 let b1 = self.extract_bits5(center_xi - 1, yi, z_window) as usize;
373 let b2 = self.extract_bits5(center_xi, yi, z_window) as usize;
374 let b3 = self.extract_bits5(center_xi + 1, yi, z_window) as usize;
375 let b4 = self.extract_bits5(center_xi + 2, yi, z_window) as usize;
376
377 // Per-column popcount differences give x-axis normal
378 // contributions. Voxlap weights:
379 // 2*(N(xx=+2) - N(xx=-2)) + N(xx=+1) - N(xx=-1)
380 // = `n.x` from this row (full normal sum is over yy).
381 nx += ((i32::from(BITNUM[b4]) - i32::from(BITNUM[b0])) << 1) + i32::from(BITNUM[b3])
382 - i32::from(BITNUM[b1]);
383
384 // Sum bitsnum across all 5 columns: `j` is the total
385 // signed-i16-packed contribution. Low 16 bits = number
386 // of solid voxels in this row across all 5 columns and
387 // 5 z levels. High 16 bits = z-axis contribution
388 // (positive bits from upper z, negative from lower).
389 let j = BITSNUM[b0]
390 .wrapping_add(BITSNUM[b1])
391 .wrapping_add(BITSNUM[b2])
392 .wrapping_add(BITSNUM[b3])
393 .wrapping_add(BITSNUM[b4]);
394 nz = nz.wrapping_add(j);
395 // n.y picks only the LOW i16 of `j` (= total solid
396 // count), scaled by yy. The high i16 (z contribution)
397 // doesn't enter n.y.
398 let j_lo16 = (j as i16) as i32;
399 ny = ny.wrapping_add(j_lo16 * yy);
400 }
401 nz >>= 16;
402
403 // Normalise via fsqrecip[len_sq]. Voxlap's table peaks at
404 // 5*5*5 box max = 75² + 15² + 3² = 5859 — within
405 // `fsqrecip`'s 5860-entry range. Out-of-range len_sq values
406 // (e.g. all-zero neighbourhood) get `fsqrecip[0] = 0` ⇒
407 // returns `(0, 0, 0)` which downstream lighting math
408 // tolerates.
409 let len_sq = (nx * nx + ny * ny + nz * nz) as usize;
410 let f = if len_sq < self.fsqrecip.len() {
411 self.fsqrecip[len_sq]
412 } else {
413 0.0
414 };
415 [(nx as f32) * f, (ny as f32) * f, (nz as f32) * f]
416 }
417
418 /// Voxel-grid limit; used by callers to bound their iteration.
419 #[must_use]
420 #[allow(dead_code)] // preserved for voxlap-parity inspection
421 pub(crate) fn vsid(&self) -> i32 {
422 self.vsid
423 }
424}
425
426/// Bake per-voxel lighting into the world's brightness bytes.
427/// Mirror of voxlap's `updatelighting` (`voxlap5.c:10539`).
428///
429/// Walks every visible voxel inside `[x0..x1) × [y0..y1) ×
430/// [z0..z1)` and rewrites its alpha byte (the brightness channel
431/// the rasterizer mulhi'es against `kv6colmul` modulators) under
432/// the current `lightmode` + `lights` state.
433///
434/// - `lightmode == 0`: no-op (fast return).
435/// - `lightmode == 1`: directional sun-style bake — every visible
436/// voxel gets `(tp.y * 0.5 + tp.z) * 64 + 103.5` clamped to
437/// `[0, 255]` from its surface normal `tp`.
438/// - `lightmode >= 2`: per-light Lambertian bake — base
439/// `(tp.y * 0.5 + tp.z) * 16 + 47.5` minus, for each light in
440/// range with surface normal facing it, `g * h * sc` where
441/// `g = 1/(d·d²) - 1/(r·r²)` (cube falloff with hard radius
442/// cutoff) and `h = tp · light_delta`.
443///
444/// Voxlap pads the bbox by `ESTNORMRAD` on each side internally
445/// to give estnorm enough neighbourhood; that's done here too.
446/// `lights` should match the engine's full `vx5.lightsrc[]` —
447/// the function does its own per-tile range filtering.
448///
449/// Mutates `world_data` in place. Caller is responsible for any
450/// `column_offsets` / `vsid` invariants.
451pub fn update_lighting(
452 world_data: &mut [u8],
453 column_offsets: &[u32],
454 vsid: u32,
455 x0: i32,
456 y0: i32,
457 z0: i32,
458 x1: i32,
459 y1: i32,
460 z1: i32,
461 lightmode: u32,
462 lights: &[LightSrc],
463) {
464 if lightmode == 0 {
465 return;
466 }
467 let vsid_i = vsid as i32;
468 let x0p = (x0 - ESTNORMRAD).max(0);
469 let y0p = (y0 - ESTNORMRAD).max(0);
470 let z0p = (z0 - ESTNORMRAD).max(0);
471 let x1p = (x1 + ESTNORMRAD).min(vsid_i);
472 let y1p = (y1 + ESTNORMRAD).min(vsid_i);
473 let z1p = (z1 + ESTNORMRAD).min(MAXZDIM);
474 if x0p >= x1p || y0p >= y1p || z0p >= z1p {
475 return;
476 }
477
478 // Build the cache once for the whole padded bake region.
479 // Voxlap tiles the bake into 64×64 chunks with a per-tile
480 // `lightlst` filter; for our (one-shot bake) use case the
481 // full-region filter computed inside the per-voxel loop is
482 // simpler and not measurably slower at oracle bake sizes.
483 let cache = EstNormCache::build(world_data, column_offsets, vsid, x0p, y0p, x1p, y1p);
484
485 // Per-light precomputed `lightsub[i] = 1 / (sqrt(r2) * r2)` —
486 // the radius-cutoff bias that makes the light contribution go
487 // to exactly zero at distance == sqrt(r2).
488 let lightsub: Vec<f32> = lights.iter().map(|l| 1.0 / (l.r2.sqrt() * l.r2)).collect();
489
490 // R12.4.1: parallelise the per-row bake via rayon. Each `(x, y)`
491 // pair maps to a unique column slice in `world_data`
492 // (`column_offsets[col_idx]..[col_idx + 1]` ranges are pairwise
493 // disjoint — the voxalloc allocator's invariant). Rows split
494 // cleanly across worker threads; per-row x-loops stay serial to
495 // amortise rayon's per-task overhead. Speedup follows
496 // `RAYON_NUM_THREADS` (set `=1` to disable).
497 //
498 // Lighting bakes are typically rare (one-shot at scene load) but
499 // dynamic-lighting / per-edit relighting use cases call
500 // `update_lighting` per frame — at which point the parallel
501 // path matters for interactive responsiveness.
502 // Per-column byte extents `(start, end)`. After voxalloc-driven
503 // edits (e.g. cave-gen's heavy `set_spans` carve, or runtime
504 // bullet-impact carves), columns are scattered in the slab
505 // pool, so `column_offsets[i+1]` is NOT column `i`'s end byte
506 // — voxlap walks each column's slab chain via `slng()` to
507 // recover length. We pre-compute extents here serially before
508 // moving `world_data` into the parallel mutable view; the
509 // slng walk is O(slab_count) per column, typically 1-3 slabs
510 // → tens of microseconds per row at vsid = 1024. Negligible
511 // next to the bake itself.
512 let n_cols = (vsid as usize) * (vsid as usize);
513 let column_extents: Vec<(usize, usize)> = (0..n_cols)
514 .map(|col_idx| {
515 let start = column_offsets[col_idx] as usize;
516 let end = start + roxlap_formats::vxl::slng(&world_data[start..]);
517 (start, end)
518 })
519 .collect();
520
521 let world_view = WorldDataMutView::new(world_data);
522 let row_body = |y: i32| {
523 for x in x0p..x1p {
524 let col_idx = (y as u32) * vsid + (x as u32);
525 let (off_start, off_end) = column_extents[col_idx as usize];
526 // SAFETY: each (x, y) maps to a unique col_idx; column
527 // byte ranges `[off_start, off_end)` are pairwise
528 // disjoint across distinct `col_idx` (voxalloc's
529 // free-list invariant), so no two threads write to
530 // the same byte.
531 let column = unsafe { world_view.column_slice(off_start, off_end) };
532 shade_column(column, x, y, z0p, z1p, lightmode, lights, &lightsub, &cache);
533 }
534 };
535
536 (y0p..y1p).into_par_iter().for_each(row_body);
537}
538
539/// Raw-pointer view of `world_data` so the parallel
540/// [`update_lighting`] body can hand out per-column `&mut [u8]`
541/// slices to multiple threads without each thread needing
542/// `&mut Vec<u8>` (which is exclusive). Constructed from a single
543/// `&mut [u8]` borrow at the start of the parallel section; the
544/// borrow's lifetime gates `WorldDataMutView`'s usable lifetime.
545///
546/// # Safety contract
547/// Callers that hand out concurrent `column_slice` references MUST
548/// guarantee the requested ranges are pairwise non-overlapping
549/// across threads. [`update_lighting`]'s call site relies on
550/// voxalloc's per-column-disjoint-byte-range invariant.
551struct WorldDataMutView<'a> {
552 ptr: *mut u8,
553 len: usize,
554 _marker: std::marker::PhantomData<&'a mut [u8]>,
555}
556
557// SAFETY: `WorldDataMutView` is morally a `&mut [u8]` re-exposed as
558// raw pointers. The disjoint-write invariant is enforced by the
559// caller; concurrent reads of `ptr` / `len` fields are race-free
560// (immutable scalar fields).
561unsafe impl Send for WorldDataMutView<'_> {}
562unsafe impl Sync for WorldDataMutView<'_> {}
563
564impl<'a> WorldDataMutView<'a> {
565 fn new(buf: &'a mut [u8]) -> Self {
566 Self {
567 ptr: buf.as_mut_ptr(),
568 len: buf.len(),
569 _marker: std::marker::PhantomData,
570 }
571 }
572
573 /// Carve out a sub-slice. Caller upholds the disjoint-write
574 /// invariant (see struct doc).
575 ///
576 /// # Safety
577 /// `off_start <= off_end <= self.len`, and the requested range
578 /// must not overlap with ranges concurrently held by other
579 /// threads.
580 unsafe fn column_slice(&self, off_start: usize, off_end: usize) -> &'a mut [u8] {
581 debug_assert!(off_start <= off_end, "column slice: start > end");
582 debug_assert!(off_end <= self.len, "column slice: end past buffer");
583 // SAFETY: caller asserts in-bounds + disjoint-from-other-threads.
584 unsafe { std::slice::from_raw_parts_mut(self.ptr.add(off_start), off_end - off_start) }
585 }
586}
587
588/// Walk one column's slab chain and shade every visible voxel
589/// inside `[z_lo, z_hi)`. Mirror of the inner loop in
590/// voxlap5.c:10588-10650.
591#[allow(clippy::cast_lossless)]
592fn shade_column(
593 column: &mut [u8],
594 x: i32,
595 y: i32,
596 z_lo: i32,
597 z_hi: i32,
598 lightmode: u32,
599 lights: &[LightSrc],
600 lightsub: &[f32],
601 cache: &EstNormCache,
602) {
603 let mut v_off: usize = 0;
604 // cstat = false ⇒ top-of-slab phase (floor colours); true ⇒
605 // ceiling-of-next-slab phase (bottom of current slab's solid
606 // mass, visible from the air pocket below).
607 let mut cstat = false;
608 loop {
609 let (sz0, sz1, voxel_byte_offset_signed): (i32, i32, isize);
610 if !cstat {
611 // Floor colours of the current slab. Voxel z=v[1]..=v[2].
612 // Alpha byte at offset (z - v[1]) * 4 + 7 from header
613 // (header is 4 bytes, voxel record is 4 bytes BGRA, +3
614 // for alpha). The voxlap formula encodes this as
615 // `(z << 2) + offs` with `offs = 7 - (v[1] << 2)`.
616 if v_off + 2 >= column.len() {
617 break;
618 }
619 let v1 = i32::from(column[v_off + 1]);
620 let v2 = i32::from(column[v_off + 2]);
621 sz0 = v1;
622 sz1 = v2 + 1;
623 voxel_byte_offset_signed = (v_off as isize) + 7 - ((sz0 as isize) << 2);
624 cstat = true;
625 } else {
626 // Ceiling colours of the next slab — must read v[0]
627 // BEFORE advancing v_off.
628 if v_off + 2 >= column.len() {
629 break;
630 }
631 let v0 = i32::from(column[v_off]);
632 let v1 = i32::from(column[v_off + 1]);
633 let v2 = i32::from(column[v_off + 2]);
634 let prev_offset = v2 - v1 - v0 + 2; // ceilnum from getcube convention
635 if v0 == 0 {
636 break;
637 }
638 v_off += (v0 as usize) * 4;
639 if v_off + 3 >= column.len() {
640 break;
641 }
642 let v3 = i32::from(column[v_off + 3]);
643 sz1 = v3;
644 sz0 = prev_offset + sz1;
645 voxel_byte_offset_signed = (v_off as isize) + 3 - ((sz1 as isize) << 2);
646 cstat = false;
647 }
648
649 let lo = sz0.max(z_lo);
650 let hi = sz1.min(z_hi);
651 for z in lo..hi {
652 let normal = cache.estnorm(x, y, z);
653 let brightness = compute_brightness(x, y, z, normal, lightmode, lights, lightsub);
654 let byte_off = voxel_byte_offset_signed + ((z as isize) << 2);
655 if byte_off >= 0 && (byte_off as usize) < column.len() {
656 column[byte_off as usize] = brightness;
657 }
658 }
659 }
660}
661
662/// Voxlap's per-voxel brightness math. Computes the `[0, 255]`
663/// alpha byte for one voxel from its surface normal `tp` + the
664/// light list. Mirror of voxlap5.c:10605-10646.
665fn compute_brightness(
666 x: i32,
667 y: i32,
668 z: i32,
669 tp: [f32; 3],
670 lightmode: u32,
671 lights: &[LightSrc],
672 lightsub: &[f32],
673) -> u8 {
674 if lightmode < 2 {
675 // Directional path (voxlap5.c:10607-10612): single sun
676 // direction baked into a hardcoded coefficient pair.
677 // i = (tp.y * 0.5 + tp.z) * 64 + 103.5, clamped to [0, 255].
678 let f = (tp[1] * 0.5 + tp[2]) * 64.0 + 103.5;
679 clamp_to_byte(f)
680 } else {
681 // Point-light path (voxlap5.c:10614-10645). Base brightness
682 // 47.5..63.5 + per-light front-face contribution.
683 let mut f = (tp[1] * 0.5 + tp[2]) * 16.0 + 47.5;
684 let xf = x as f32;
685 let yf = y as f32;
686 let zf = z as f32;
687 for (i, light) in lights.iter().enumerate() {
688 let fx = light.pos[0] - xf;
689 let fy = light.pos[1] - yf;
690 let fz = light.pos[2] - zf;
691 // tp · light_delta: positive ⇒ surface faces away from
692 // light (back-lit, no contribution); negative ⇒ surface
693 // faces light (front-lit, lambertian contribution).
694 let h = tp[0] * fx + tp[1] * fy + tp[2] * fz;
695 if h >= 0.0 {
696 continue;
697 }
698 let g_sq = fx * fx + fy * fy + fz * fz;
699 if g_sq >= light.r2 {
700 continue;
701 }
702 // Voxlap's SSE rcpss/rsqrtss sequence:
703 // g = (1/g_sq) * rsqrt(g_sq) - lightsub[i]
704 // = 1/(g_sq * sqrt(g_sq)) - 1/(r2 * sqrt(r2))
705 // = 1/d³ - 1/r³
706 // The `_mm_rcp_ss` / `_mm_rsqrt_ss` are 12-bit
707 // approximations; the exact `f32::sqrt`-based form
708 // here is more precise but may drift from voxlap C.
709 // Bit-exactness will require switching to the
710 // intrinsic versions on x86_64; deferred until
711 // diag_down_lit oracle convergence demands it.
712 let g = 1.0 / (g_sq * g_sq.sqrt()) - lightsub[i];
713 f -= g * h * light.sc;
714 }
715 clamp_to_byte(f)
716 }
717}
718
719#[inline]
720fn clamp_to_byte(f: f32) -> u8 {
721 // Voxlap's `if (*(int32_t *)&f > 0x437f0000) f = 255` is the
722 // bit-trick form of `if (f > 255.0) f = 255.0`. Negatives wrap
723 // through `ftol` / cast; we clamp explicitly for safety.
724 if f >= 255.0 {
725 255
726 } else if f <= 0.0 {
727 0
728 } else {
729 f as u8
730 }
731}
732
733#[cfg(test)]
734mod tests {
735 use super::*;
736
737 /// xbsflor(0) = -1 (all bits set), xbsflor(32) clamped to 0,
738 /// xbsflor(5) = ~31 = 0xffff_ffe0.
739 #[test]
740 fn xbsflor_xbsceil_known_values() {
741 assert_eq!(xbsflor(0), 0xffff_ffff);
742 assert_eq!(xbsflor(1), 0xffff_fffe);
743 assert_eq!(xbsflor(5), 0xffff_ffe0);
744 assert_eq!(xbsflor(31), 0x8000_0000);
745 assert_eq!(xbsflor(32), 0);
746 assert_eq!(xbsceil(0), 0);
747 assert_eq!(xbsceil(5), 0x1f);
748 assert_eq!(xbsceil(31), 0x7fff_ffff);
749 assert_eq!(xbsceil(32), 0xffff_ffff);
750 }
751
752 /// Single-slab column [next=0, sz0=10, sz1=14, then 5 voxel
753 /// records]. Voxels exist at z = 10..15 (sz0..=sz1). After
754 /// expandbit256, bits 10..15 should be set, all others
755 /// (0..10 and 15..256) should reflect: air above (0..10) and
756 /// solid below (15..256), since voxlap treats z > sz1 of last
757 /// slab as solid.
758 #[test]
759 fn single_slab_z10_to_14_sets_correct_bits() {
760 // Column layout: [next=0, sz0=10, sz1=14, top_color, then 5x
761 // voxel records of 4 bytes each]. We don't use the voxel
762 // record contents; expandbit256 only reads v[0]..v[3].
763 let mut col = vec![0u8, 10, 14, 0]; // header
764 col.extend(vec![0u8; 5 * 4]); // 5 voxel records (z=10..14)
765
766 let mut bits = [0u32; 8];
767 expandbit256(&col, &mut bits);
768
769 // Word 0 covers bits 0..32. Air for z=0..10, solid 10..15,
770 // solid for z=15..32 (since this is the only slab → below
771 // is fully solid).
772 // bits 10..15 from the slab body: 0x7c00 (bits 10,11,12,13,14)
773 // bits 15..32 from "solid below last slab": 0xffff_8000
774 // Combined: 0xffff_fc00.
775 assert_eq!(
776 bits[0], 0xffff_fc00,
777 "word 0 want 0xffff_fc00 got 0x{:08x}",
778 bits[0]
779 );
780 // Words 1..7 should all be 0xffff_ffff (fully solid).
781 for (i, w) in bits.iter().enumerate().skip(1) {
782 assert_eq!(*w, 0xffff_ffff, "word {i} want -1 got 0x{:08x}", *w);
783 }
784 }
785
786 /// fsqrecip[N] should match `1/sqrt(N)` to a reasonable
787 /// tolerance for the values estnorm actually produces.
788 #[test]
789 fn fsqrecip_matches_1_over_sqrt() {
790 let t = build_fsqrecip();
791 for k in 1..=100 {
792 let want = 1.0_f32 / (k as f32).sqrt();
793 let got = t[k];
794 let err = (got - want).abs();
795 assert!(err < 1e-3, "fsqrecip[{k}] = {got}, want {want}, err {err}");
796 }
797 // Spot-check higher values (less precise but still close).
798 for k in [500, 1000, 2000, 5000] {
799 let want = 1.0_f32 / (k as f32).sqrt();
800 let got = t[k];
801 let rel = (got / want - 1.0).abs();
802 assert!(
803 rel < 0.01,
804 "fsqrecip[{k}] = {got}, want {want}, rel-err {rel}"
805 );
806 }
807 }
808
809 /// Build a 4×4 synthetic world with a flat floor at z=20..=24,
810 /// run lightmode-1 update_lighting over the centre 2×2, and
811 /// verify (a) brightness bytes were rewritten, (b) the result
812 /// is in `[0, 255]` for every shaded voxel, (c) the brightness
813 /// is uniform within each (x, y) column at the same z (since
814 /// lightmode-1 depends only on the surface normal).
815 #[test]
816 fn lightmode1_bakes_brightness_into_visible_voxels() {
817 // 4×4 world, single slab at z=20..=24, sentinel column ends.
818 let vsid: u32 = 4;
819 let mut col = vec![0u8, 20, 24, 0]; // header: nextptr=0, z1=20, z2=24
820 for _ in 20..=24 {
821 // 5 voxel records, alpha pre-set to 0xab so we can verify
822 // they got rewritten.
823 col.extend([0x10, 0x20, 0x30, 0xab]);
824 }
825 let col_len = col.len() as u32;
826 let mut data = Vec::new();
827 let mut offsets = vec![0u32; (vsid * vsid + 1) as usize];
828 for i in 0..(vsid * vsid) {
829 offsets[i as usize] = data.len() as u32;
830 data.extend_from_slice(&col);
831 }
832 offsets[(vsid * vsid) as usize] = data.len() as u32;
833 assert_eq!(col_len as usize * (vsid * vsid) as usize, data.len());
834
835 update_lighting(
836 &mut data,
837 &offsets,
838 vsid,
839 1,
840 1,
841 0,
842 3,
843 3,
844 30, // bbox 1..=2 in xy, z 0..30
845 1, // lightmode 1
846 &[],
847 );
848
849 // Pull every voxel record's alpha byte from the centre
850 // (1, 1) column. Should all be in [0, 255] and ≠ 0xab.
851 let off1 = offsets[(1 * vsid + 1) as usize] as usize;
852 let alphas: Vec<u8> = (0..5).map(|i| data[off1 + 4 + i * 4 + 3]).collect();
853 for (i, &a) in alphas.iter().enumerate() {
854 assert_ne!(a, 0xab, "alpha[{i}] not rewritten");
855 }
856 // The shading should be mostly bright — flat-floor voxels
857 // have ~vertical normals so `(tp.y*0.5 + tp.z)*64 + 103.5`
858 // ≈ 1.0*64 + 103.5 = 167.5.
859 for (i, &a) in alphas.iter().enumerate() {
860 assert!(
861 a > 100,
862 "alpha[{i}]={a} should be on the bright side for top-of-floor voxels"
863 );
864 }
865 }
866
867 /// lightmode-2 with one nearby light should darken voxels on
868 /// the away side relative to the toward side. Use a 5×5 world
869 /// with a flat floor and place a light such that it's on the
870 /// +x side of the centre column — the +x face voxel's neighbour
871 /// columns should end up brighter than the -x.
872 #[test]
873 fn lightmode2_with_light_produces_per_column_variation() {
874 let vsid: u32 = 5;
875 let mut col = vec![0u8, 20, 24, 0];
876 for _ in 20..=24 {
877 col.extend([0x10, 0x20, 0x30, 0]);
878 }
879 let mut data = Vec::new();
880 let mut offsets = vec![0u32; (vsid * vsid + 1) as usize];
881 for i in 0..(vsid * vsid) {
882 offsets[i as usize] = data.len() as u32;
883 data.extend_from_slice(&col);
884 }
885 offsets[(vsid * vsid) as usize] = data.len() as u32;
886
887 let lights = [LightSrc {
888 // World coords: light right next to (4, 2, 20).
889 pos: [4.0, 2.0, 20.0],
890 r2: 50.0 * 50.0,
891 sc: 64.0,
892 }];
893 update_lighting(&mut data, &offsets, vsid, 0, 0, 0, 5, 5, 30, 2, &lights);
894
895 // Sample the alpha at the top-floor voxel of each column
896 // along y=2. Closer-to-light columns should be brighter.
897 let alpha_at = |x: u32, z_idx: usize| {
898 let off = offsets[(2 * vsid + x) as usize] as usize;
899 data[off + 4 + z_idx * 4 + 3]
900 };
901 let close = alpha_at(4, 0); // closest column to light
902 let far = alpha_at(0, 0); // farthest
903 assert!(
904 close >= far,
905 "column nearer the light should be ≥ as bright as the far one (close={close} far={far})"
906 );
907 }
908
909 /// Empty column ([0, 0, 0, ...]) — no slabs. After
910 /// expandbit256, all 256 bits = 0 (full air).
911 #[test]
912 fn empty_column_all_air() {
913 let col = vec![0u8, 0, 0, 0]; // single-slab header at z=0..0, no body
914 let mut bits = [0u32; 8];
915 expandbit256(&col, &mut bits);
916 // bit 0 from "air→solid transition at z=0", but only bit 0
917 // is set within the slab range [0, 0+1). Then "solid below"
918 // fills bits 1..256.
919 // Actually for sz0=sz1=0: voxel record is z=0..0 inclusive
920 // (0 voxels). The bit pattern is 1 set bit at z=0 then
921 // solid below.
922 // word 0: bit 0 set, bits 1..32 set ⇒ 0xffff_ffff.
923 assert_eq!(
924 bits[0], 0xffff_ffff,
925 "empty column word 0 want all-1 got 0x{:08x}",
926 bits[0]
927 );
928 }
929}