roxlap_core/world_lighting.rs
1//! World-voxel lighting bake.
2//!
3//! Walks every visible voxel inside a 3D bounding box and writes its
4//! per-voxel brightness byte (the high byte of the packed colour, which
5//! the renderer multiplies into the RGB — see [`crate::dda`]'s `shade`)
6//! from the engine's current `LightSrc` set + lightmode.
7//!
8//! Two modes:
9//! - `lightmode == 1`: cheap directional bake — every voxel gets
10//! shading from a single fixed sun direction:
11//! `(n.y * 0.5 + n.z) * 64 + 103.5` clamped to `[0, 255]`.
12//! - `lightmode == 2`: per-light point-light bake — for each light in
13//! range, subtract `g * h * sc`, where `g = 1/(d·d²) - 1/(r·r²)`
14//! (cube-falloff with a hard cutoff at radius `r`) and
15//! `h = surface_normal · light_delta` (front-lit faces contribute;
16//! back faces are skipped). Subtracted from a base
17//! `(n.y * 0.5 + n.z) * 16 + 47.5`.
18//!
19//! The surface normal `n` comes from [`EstNormCache::estnorm`] — the
20//! occupancy gradient of a voxel's 5×5×5 neighbourhood.
21
22#![allow(
23 clippy::cast_possible_truncation,
24 clippy::cast_possible_wrap,
25 clippy::cast_sign_loss,
26 clippy::cast_precision_loss,
27 clippy::similar_names,
28 clippy::too_many_arguments,
29 clippy::too_many_lines,
30 clippy::doc_markdown,
31 clippy::many_single_char_names,
32 clippy::must_use_candidate,
33 clippy::unnecessary_cast,
34 clippy::cast_lossless,
35 clippy::needless_bool_assign,
36 clippy::needless_range_loop,
37 clippy::no_effect,
38 clippy::identity_op,
39 clippy::if_not_else
40)]
41
42use rayon::prelude::*;
43
44use crate::engine::LightSrc;
45
46/// World z is one byte → `0..MAXZDIM` (256) voxels tall.
47pub(crate) const MAXZDIM: i32 = 256;
48
49/// Estnorm neighbourhood radius. The surface normal at a voxel is
50/// estimated from the solid/air pattern in the surrounding
51/// `(2*RAD+1)³ = 5×5×5` cube.
52pub(crate) const ESTNORMRAD: i32 = 2;
53
54/// `bits k..31 set, low k bits clear` (`!0 << k`). Used by
55/// [`expandbit256`] to fill from an air→solid transition up to the
56/// top of a 32-bit word.
57pub(crate) const fn xbsflor(k: usize) -> u32 {
58 if k >= 32 {
59 0
60 } else {
61 (-1i32 << k) as u32
62 }
63}
64
65/// `~xbsflor[k]` — low `k` bits set. Fills from the bottom of a word
66/// up to a solid→air transition.
67pub(crate) const fn xbsceil(k: usize) -> u32 {
68 !xbsflor(k)
69}
70
71/// Decode a `.vxl` slab column into a 256-bit "voxel solid" bitset,
72/// low-bit-first / low-z-first.
73///
74/// The output `bits` is a `[u32; 8]` (= 256 bits = `MAXZDIM` z
75/// levels); bit `z` is set iff the voxel at depth `z` in this column is
76/// solid (including the hidden interior between a slab's coloured top
77/// and the next slab). This is a straight read of the `.vxl` column
78/// layout: each slab record's byte 1 is its top z (air→solid) and byte
79/// 3 the next slab's bottom (solid→air). Whole 32-bit words between
80/// transitions are flushed as all-air (`0`) or all-solid (`!0`); the
81/// word holding a transition gets a partial mask via
82/// [`xbsflor`] / [`xbsceil`].
83pub(crate) fn expandbit256(column: &[u8], bits: &mut [u32; 8]) {
84 let mut src_idx: usize = 0;
85 let mut dst_idx: usize = 0;
86 let mut bitpos: i32 = 32;
87 let mut word: u32 = 0;
88 let nbits: i32 = (bits.len() as i32) * 32;
89
90 // First iteration: jump straight to the v[1] transition (no
91 // preceding slab whose v[3] we'd need to flush).
92 let mut next_len: i32;
93 let mut delta: i32;
94 let mut go_to_v3 = false;
95
96 'outer: loop {
97 if go_to_v3 {
98 // v[3] : solid → air transition.
99 if src_idx + 3 >= column.len() {
100 break;
101 }
102 delta = i32::from(column[src_idx + 3]) - bitpos;
103 while delta >= 0 {
104 if dst_idx >= bits.len() {
105 break 'outer;
106 }
107 bits[dst_idx] = word;
108 dst_idx += 1;
109 word = u32::MAX;
110 bitpos += 32;
111 delta -= 32;
112 }
113 word &= xbsceil((delta + 32) as usize);
114 }
115 go_to_v3 = true;
116
117 // v[1] : air → solid transition.
118 if src_idx + 1 >= column.len() {
119 break;
120 }
121 delta = i32::from(column[src_idx + 1]) - bitpos;
122 while delta >= 0 {
123 if dst_idx >= bits.len() {
124 break 'outer;
125 }
126 bits[dst_idx] = word;
127 dst_idx += 1;
128 word = 0;
129 bitpos += 32;
130 delta -= 32;
131 }
132 word |= xbsflor((delta + 32) as usize);
133
134 next_len = i32::from(column[src_idx]);
135 if next_len == 0 {
136 break;
137 }
138 src_idx += (next_len as usize) * 4;
139 }
140
141 // Pad the rest of the buffer with `word`'s tail value (in C the
142 // post-loop word is whatever the last `v[1]` partial-set
143 // produced; remaining whole-words flush as solid `-1`).
144 if bitpos <= nbits {
145 while dst_idx < bits.len() {
146 bits[dst_idx] = word;
147 dst_idx += 1;
148 word = u32::MAX;
149 }
150 }
151}
152
153/// Per-column solid/air bitset grid covering a 2D bounding region —
154/// `(x1 - x0 + 2*RAD) × (y1 - y0 + 2*RAD)` columns. Decoding each
155/// column to a bitset once turns the estnorm 5×5×5 neighbourhood query
156/// into O(1) bit tests. A 448×448 bake (extending to 452×452 with
157/// padding) needs about 6.4 MB.
158#[allow(dead_code)] // vsid field/method preserved for inspection
159pub struct EstNormCache {
160 /// Per-column bit arrays. `bits[yidx * width + xidx]` is the
161 /// solid/air bitset of column `(origin_x + xidx, origin_y + yidx)`.
162 bits: Vec<[u32; 8]>,
163 /// Top-left of the cache window in world coords (= original
164 /// `x0 - RAD`).
165 origin_x: i32,
166 origin_y: i32,
167 /// Cached-region width (= `x1 - x0 + 2 * RAD`).
168 width: usize,
169 /// Reserved for symmetric debugging — kept so the cache layout
170 /// can be inspected without recomputing from `bits.len()`.
171 #[allow(dead_code)]
172 height: usize,
173 /// Voxel-grid limit (= `vsid`) used for out-of-bounds clamps.
174 vsid: i32,
175}
176
177impl EstNormCache {
178 /// Build the bit-grid cache covering the bounding region
179 /// `[x0..x1) × [y0..y1)` extended by `ESTNORMRAD` padding on
180 /// each side. Calling [`Self::estnorm`] for any `(x, y)` inside
181 /// the original `[x0..x1) × [y0..y1)` box is then a pure read.
182 ///
183 /// Wraps [`Self::build_with_reader`] with a flat-table closure.
184 #[must_use]
185 pub fn build(
186 world_data: &[u8],
187 column_offsets: &[u32],
188 vsid: u32,
189 x0: i32,
190 y0: i32,
191 x1: i32,
192 y1: i32,
193 ) -> Self {
194 let vsid_i = vsid as i32;
195 let reader = |x: i32, y: i32| -> Option<&[u8]> {
196 if (x | y) < 0 || x >= vsid_i || y >= vsid_i {
197 return None;
198 }
199 let col_idx = (y as u32) * vsid + (x as u32);
200 let off_start = column_offsets[col_idx as usize] as usize;
201 // Slice to end-of-buffer; the slab walker self-
202 // terminates via nextptr.
203 Some(&world_data[off_start..])
204 };
205 let mut cache = Self::build_with_reader(reader, x0, y0, x1, y1);
206 cache.vsid = vsid_i;
207 cache
208 }
209
210 /// S4B.4.b: chunk-aware cache build. The closure
211 /// `column_reader(x, y)` returns the slab bytes of the column
212 /// at world-or-grid-local position `(x, y)`, or `None` for an
213 /// implicit-air / out-of-grid column (matching `build`'s OOB
214 /// "treat as full air" semantics).
215 ///
216 /// No vsid bound — the reader owns OOB handling. Per-chunk
217 /// bakes use a closure that resolves `(x, y)` to a neighbour
218 /// chunk via `Grid::chunk(IVec3)` so the 2-voxel padding
219 /// extends seamlessly across chunk boundaries.
220 ///
221 /// The cache's [`Self::vsid`] field is left at `0` for chunk-
222 /// aware builds — the field is dead-code anyway, preserved
223 /// only for inspection.
224 #[must_use]
225 pub fn build_with_reader<'r>(
226 column_reader: impl Fn(i32, i32) -> Option<&'r [u8]>,
227 x0: i32,
228 y0: i32,
229 x1: i32,
230 y1: i32,
231 ) -> Self {
232 let rad = ESTNORMRAD;
233 let pad_x0 = x0 - rad;
234 let pad_y0 = y0 - rad;
235 let pad_x1 = x1 + rad;
236 let pad_y1 = y1 + rad;
237 let width = (pad_x1 - pad_x0) as usize;
238 let height = (pad_y1 - pad_y0) as usize;
239
240 let mut bits = vec![[0u32; 8]; width * height];
241 for yi in 0..height {
242 let y = pad_y0 + yi as i32;
243 for xi in 0..width {
244 let x = pad_x0 + xi as i32;
245 if let Some(column) = column_reader(x, y) {
246 expandbit256(column, &mut bits[yi * width + xi]);
247 }
248 // None → leave the cache slot zeroed (treat as full
249 // air), matching `build`'s OOB behaviour.
250 }
251 }
252
253 Self {
254 bits,
255 origin_x: pad_x0,
256 origin_y: pad_y0,
257 width,
258 height,
259 vsid: 0,
260 }
261 }
262
263 /// Whether the voxel at cache-column `(xi, yi)`, depth `z` is solid.
264 /// Out of the `[0, MAXZDIM)` z range: everything above the world is
265 /// air, everything below is solid (bedrock).
266 #[inline]
267 fn solid(&self, xi: usize, yi: usize, z: i32) -> bool {
268 if z < 0 {
269 return false;
270 }
271 if z >= MAXZDIM {
272 return true;
273 }
274 let col = &self.bits[yi * self.width + xi];
275 let z = z as usize;
276 (col[z >> 5] >> (z & 31)) & 1 != 0
277 }
278
279 /// Estimate the surface orientation at solid voxel `(x, y, z)` as
280 /// the **occupancy gradient** of its 5×5×5 neighbourhood:
281 ///
282 /// ```text
283 /// n = Σ_{solid neighbours} offset, normal = n / |n|
284 /// ```
285 ///
286 /// (the sum runs over `offset ∈ [-2, 2]³`). `n` points toward the
287 /// denser (solid) side; the lighting formulas in [`update_lighting`]
288 /// are calibrated to that orientation. On a flat surface the solid
289 /// half-space cancels laterally and leaves `n` along the inward
290 /// axis. An all-solid or all-air neighbourhood gives `n = 0` →
291 /// `(0, 0, 0)`, which the lighting math treats as unlit.
292 ///
293 /// `(x, y)` must lie inside the cache's `[x0..x1) × [y0..y1)` region
294 /// (the padded border supplies the ±2 neighbours); `z` is
295 /// unconstrained.
296 #[must_use]
297 #[allow(clippy::cast_precision_loss)]
298 pub fn estnorm(&self, x: i32, y: i32, z: i32) -> [f32; 3] {
299 let cx = (x - self.origin_x) as i32;
300 let cy = (y - self.origin_y) as i32;
301
302 let mut nx = 0i32;
303 let mut ny = 0i32;
304 let mut nz = 0i32;
305 for dy in -ESTNORMRAD..=ESTNORMRAD {
306 let yi = (cy + dy) as usize;
307 for dx in -ESTNORMRAD..=ESTNORMRAD {
308 let xi = (cx + dx) as usize;
309 for dz in -ESTNORMRAD..=ESTNORMRAD {
310 if self.solid(xi, yi, z + dz) {
311 nx += dx;
312 ny += dy;
313 nz += dz;
314 }
315 }
316 }
317 }
318
319 let len_sq = nx * nx + ny * ny + nz * nz;
320 if len_sq == 0 {
321 return [0.0, 0.0, 0.0];
322 }
323 let inv = 1.0 / (len_sq as f32).sqrt();
324 [nx as f32 * inv, ny as f32 * inv, nz as f32 * inv]
325 }
326
327 /// Voxel-grid limit; used by callers to bound their iteration.
328 #[must_use]
329 #[allow(dead_code)]
330 pub(crate) fn vsid(&self) -> i32 {
331 self.vsid
332 }
333}
334
335/// Bake per-voxel lighting into the world's brightness bytes.
336/// Bakes per-voxel brightness over a 3D bounding box.
337///
338/// Walks every visible voxel inside `[x0..x1) × [y0..y1) ×
339/// [z0..z1)` and rewrites its alpha byte (the brightness channel
340/// the rasterizer mulhi'es against `kv6colmul` modulators) under
341/// the current `lightmode` + `lights` state.
342///
343/// - `lightmode == 0`: no-op (fast return).
344/// - `lightmode == 1`: directional sun-style bake — every visible
345/// voxel gets `(tp.y * 0.5 + tp.z) * 64 + 103.5` clamped to
346/// `[0, 255]` from its surface normal `tp`.
347/// - `lightmode >= 2`: per-light Lambertian bake — base
348/// `(tp.y * 0.5 + tp.z) * 16 + 47.5` minus, for each light in
349/// range with surface normal facing it, `g * h * sc` where
350/// `g = 1/(d·d²) - 1/(r·r²)` (cube falloff with hard radius
351/// cutoff) and `h = tp · light_delta`.
352///
353/// The bbox is padded by `ESTNORMRAD` on each side internally
354/// to give estnorm enough neighbourhood; that's done here too.
355/// `lights` should match the engine's full `vx5.lightsrc[]` —
356/// the function does its own per-tile range filtering.
357///
358/// Mutates `world_data` in place. Caller is responsible for any
359/// `column_offsets` / `vsid` invariants.
360pub fn update_lighting(
361 world_data: &mut [u8],
362 column_offsets: &[u32],
363 vsid: u32,
364 x0: i32,
365 y0: i32,
366 z0: i32,
367 x1: i32,
368 y1: i32,
369 z1: i32,
370 lightmode: u32,
371 lights: &[LightSrc],
372) {
373 if lightmode == 0 {
374 return;
375 }
376 let vsid_i = vsid as i32;
377 let x0p = (x0 - ESTNORMRAD).max(0);
378 let y0p = (y0 - ESTNORMRAD).max(0);
379 let z0p = (z0 - ESTNORMRAD).max(0);
380 let x1p = (x1 + ESTNORMRAD).min(vsid_i);
381 let y1p = (y1 + ESTNORMRAD).min(vsid_i);
382 let z1p = (z1 + ESTNORMRAD).min(MAXZDIM);
383 if x0p >= x1p || y0p >= y1p || z0p >= z1p {
384 return;
385 }
386
387 // Build the cache once for the whole padded bake region.
388 // The bake is tiled into 64×64 chunks with a per-tile
389 // `lightlst` filter; for our (one-shot bake) use case the
390 // full-region filter computed inside the per-voxel loop is
391 // simpler and not measurably slower at oracle bake sizes.
392 let cache = EstNormCache::build(world_data, column_offsets, vsid, x0p, y0p, x1p, y1p);
393
394 // Per-light precomputed `lightsub[i] = 1 / (sqrt(r2) * r2)` —
395 // the radius-cutoff bias that makes the light contribution go
396 // to exactly zero at distance == sqrt(r2).
397 let lightsub: Vec<f32> = lights.iter().map(|l| 1.0 / (l.r2.sqrt() * l.r2)).collect();
398
399 // R12.4.1: parallelise the per-row bake via rayon. Each `(x, y)`
400 // pair maps to a unique column slice in `world_data`
401 // (`column_offsets[col_idx]..[col_idx + 1]` ranges are pairwise
402 // disjoint — the voxalloc allocator's invariant). Rows split
403 // cleanly across worker threads; per-row x-loops stay serial to
404 // amortise rayon's per-task overhead. Speedup follows
405 // `RAYON_NUM_THREADS` (set `=1` to disable).
406 //
407 // Lighting bakes are typically rare (one-shot at scene load) but
408 // dynamic-lighting / per-edit relighting use cases call
409 // `update_lighting` per frame — at which point the parallel
410 // path matters for interactive responsiveness.
411 // Per-column byte extents `(start, end)`. After voxalloc-driven
412 // edits (e.g. cave-gen's heavy `set_spans` carve, or runtime
413 // bullet-impact carves), columns are scattered in the slab
414 // pool, so `column_offsets[i+1]` is NOT column `i`'s end byte
415 // — walk each column's slab chain via `slng()` to
416 // recover length. We pre-compute extents here serially before
417 // moving `world_data` into the parallel mutable view; the
418 // slng walk is O(slab_count) per column, typically 1-3 slabs.
419 //
420 // **Region-bounded**: only the bake rectangle `[x0p..x1p) ×
421 // [y0p..y1p)` needs extents — the per-row body indexes only
422 // those columns. Sizing the table to `vsid²` is wasteful when
423 // a small chunk-sized region is baked against a large-vsid
424 // world (e.g. S4.1 scene-graph per-chunk bake against a
425 // vsid=4096 combined view — would have been 16M slng walks per
426 // chunk × 1024 chunks = 17B slng walks). The bake-region table
427 // collapses that to `bake_region` walks per call.
428 #[allow(clippy::cast_sign_loss)]
429 let region_w = (x1p - x0p) as usize;
430 #[allow(clippy::cast_sign_loss)]
431 let region_h = (y1p - y0p) as usize;
432 let mut column_extents: Vec<(usize, usize)> = Vec::with_capacity(region_w * region_h);
433 for yi in 0..region_h {
434 #[allow(clippy::cast_possible_wrap)]
435 let y = y0p + yi as i32;
436 for xi in 0..region_w {
437 #[allow(clippy::cast_possible_wrap)]
438 let x = x0p + xi as i32;
439 #[allow(clippy::cast_sign_loss)]
440 let col_idx = (y as u32) * vsid + (x as u32);
441 let start = column_offsets[col_idx as usize] as usize;
442 let end = start + roxlap_formats::vxl::slng(&world_data[start..]);
443 column_extents.push((start, end));
444 }
445 }
446
447 let world_view = WorldDataMutView::new(world_data);
448 let row_body = |y: i32| {
449 #[allow(clippy::cast_sign_loss)]
450 let yi = (y - y0p) as usize;
451 for x in x0p..x1p {
452 #[allow(clippy::cast_sign_loss)]
453 let xi = (x - x0p) as usize;
454 let (off_start, off_end) = column_extents[yi * region_w + xi];
455 // SAFETY: each (x, y) maps to a unique col_idx; column
456 // byte ranges `[off_start, off_end)` are pairwise
457 // disjoint across distinct `col_idx` (voxalloc's
458 // free-list invariant), so no two threads write to
459 // the same byte.
460 let column = unsafe { world_view.column_slice(off_start, off_end) };
461 shade_column(column, x, y, z0p, z1p, lightmode, lights, &lightsub, &cache);
462 }
463 };
464
465 (y0p..y1p).into_par_iter().for_each(row_body);
466}
467
468/// S4B.4.b: per-chunk variant of [`update_lighting`].
469///
470/// Writes alpha bytes into one chunk's slab buffer; reads
471/// neighbour-chunk voxels through `column_reader` for `estnorm`'s
472/// 5×5×5 padding. The reader takes chunk-local `(x, y)` (which can
473/// extend `±ESTNORMRAD` past the chunk's `[0, target_vsid)` extent)
474/// and returns the column at that position — typically resolved
475/// through `Grid::chunk(IVec3)` so the bake gets seamless
476/// cross-chunk neighbourhood reads without materialising a stitched
477/// combined view (Approach C retirement, S4B.4.b).
478///
479/// `(x0, y0, z0, x1, y1, z1)` is the bake region in chunk-local
480/// coords (typically `(0, 0, 0)..(CHUNK_SIZE_XY, CHUNK_SIZE_XY,
481/// CHUNK_SIZE_Z)`). Writes clip to the target chunk's vsid; reads
482/// extend into neighbour chunks via the closure.
483///
484/// `lightmode`, `lights`, and the per-voxel arithmetic match
485/// [`update_lighting`]; only the cache build + write-region
486/// scoping differ.
487#[allow(clippy::too_many_arguments)]
488pub fn update_lighting_chunk<'r>(
489 target_data: &mut [u8],
490 target_column_offsets: &[u32],
491 target_vsid: u32,
492 x0: i32,
493 y0: i32,
494 z0: i32,
495 x1: i32,
496 y1: i32,
497 z1: i32,
498 column_reader: impl Fn(i32, i32) -> Option<&'r [u8]>,
499 lightmode: u32,
500 lights: &[LightSrc],
501) {
502 if lightmode == 0 {
503 return;
504 }
505 let target_vsid_i = target_vsid as i32;
506
507 // Padded region for the cache (cross-chunk reads via reader).
508 // Z clamps to [0, MAXZDIM) because each chunk's slab data is
509 // chunk-local in z. For stacked grids (S4B.6) the caller
510 // invokes us once per chunk-z layer; cross-chz padding at the
511 // top/bottom of a chunk gets clipped here (a follow-up could
512 // pass z-aware columns to lift this). X/y intentionally don't
513 // clamp — the reader pulls from neighbour chunks via its own
514 // coord translation.
515 let z0p = (z0 - ESTNORMRAD).max(0);
516 let z1p = (z1 + ESTNORMRAD).min(MAXZDIM);
517 // Write region clipped to the target chunk's footprint.
518 let wx0 = x0.max(0);
519 let wy0 = y0.max(0);
520 let wx1 = x1.min(target_vsid_i);
521 let wy1 = y1.min(target_vsid_i);
522 if wx0 >= wx1 || wy0 >= wy1 || z0p >= z1p {
523 return;
524 }
525
526 let cache = EstNormCache::build_with_reader(column_reader, x0, y0, x1, y1);
527 apply_lighting_with_cache(
528 target_data,
529 target_column_offsets,
530 target_vsid,
531 wx0,
532 wy0,
533 z0p,
534 wx1,
535 wy1,
536 z1p,
537 &cache,
538 lightmode,
539 lights,
540 );
541}
542
543/// S4B.4.b: write half of [`update_lighting_chunk`], split out so
544/// callers can build the [`EstNormCache`] separately (via
545/// [`EstNormCache::build_with_reader`]) and pass it in.
546///
547/// The split matters when the cache build needs an immutable grid
548/// borrow (for cross-chunk reads) and the write phase needs a
549/// mutable target-chunk borrow — the two can't coexist. The
550/// caller builds the cache first while holding the immutable
551/// borrow, drops it, then mutably borrows the target chunk and
552/// invokes this.
553///
554/// The `(x0..x1, y0..y1, z0..z1)` region must already be clipped
555/// to the target chunk's footprint (this helper does no clipping).
556/// `cache` must cover at least `[x0..x1) × [y0..y1)` (a `±ESTNORMRAD`
557/// padding is the caller's responsibility — typically built via
558/// `build_with_reader(.., x0, y0, x1, y1)` which adds the padding
559/// itself).
560#[allow(clippy::too_many_arguments)]
561pub fn apply_lighting_with_cache(
562 target_data: &mut [u8],
563 target_column_offsets: &[u32],
564 target_vsid: u32,
565 x0: i32,
566 y0: i32,
567 z0: i32,
568 x1: i32,
569 y1: i32,
570 z1: i32,
571 cache: &EstNormCache,
572 lightmode: u32,
573 lights: &[LightSrc],
574) {
575 if lightmode == 0 || x0 >= x1 || y0 >= y1 || z0 >= z1 {
576 return;
577 }
578
579 let lightsub: Vec<f32> = lights.iter().map(|l| 1.0 / (l.r2.sqrt() * l.r2)).collect();
580
581 let region_w = (x1 - x0) as usize;
582 let region_h = (y1 - y0) as usize;
583 let mut column_extents: Vec<(usize, usize)> = Vec::with_capacity(region_w * region_h);
584 for yi in 0..region_h {
585 let y = y0 + yi as i32;
586 for xi in 0..region_w {
587 let x = x0 + xi as i32;
588 let col_idx = (y as u32) * target_vsid + (x as u32);
589 let start = target_column_offsets[col_idx as usize] as usize;
590 let end = start + roxlap_formats::vxl::slng(&target_data[start..]);
591 column_extents.push((start, end));
592 }
593 }
594
595 let world_view = WorldDataMutView::new(target_data);
596 let row_body = |y: i32| {
597 let yi = (y - y0) as usize;
598 for x in x0..x1 {
599 let xi = (x - x0) as usize;
600 let (off_start, off_end) = column_extents[yi * region_w + xi];
601 // SAFETY: per-column byte ranges are pairwise disjoint
602 // across distinct `(x, y)` (voxalloc invariant).
603 let column = unsafe { world_view.column_slice(off_start, off_end) };
604 shade_column(column, x, y, z0, z1, lightmode, lights, &lightsub, cache);
605 }
606 };
607
608 (y0..y1).into_par_iter().for_each(row_body);
609}
610
611/// Raw-pointer view of `world_data` so the parallel
612/// [`update_lighting`] body can hand out per-column `&mut [u8]`
613/// slices to multiple threads without each thread needing
614/// `&mut Vec<u8>` (which is exclusive). Constructed from a single
615/// `&mut [u8]` borrow at the start of the parallel section; the
616/// borrow's lifetime gates `WorldDataMutView`'s usable lifetime.
617///
618/// # Safety contract
619/// Callers that hand out concurrent `column_slice` references MUST
620/// guarantee the requested ranges are pairwise non-overlapping
621/// across threads. [`update_lighting`]'s call site relies on
622/// voxalloc's per-column-disjoint-byte-range invariant.
623struct WorldDataMutView<'a> {
624 ptr: *mut u8,
625 len: usize,
626 _marker: std::marker::PhantomData<&'a mut [u8]>,
627}
628
629// SAFETY: `WorldDataMutView` is morally a `&mut [u8]` re-exposed as
630// raw pointers. The disjoint-write invariant is enforced by the
631// caller; concurrent reads of `ptr` / `len` fields are race-free
632// (immutable scalar fields).
633unsafe impl Send for WorldDataMutView<'_> {}
634unsafe impl Sync for WorldDataMutView<'_> {}
635
636impl<'a> WorldDataMutView<'a> {
637 fn new(buf: &'a mut [u8]) -> Self {
638 Self {
639 ptr: buf.as_mut_ptr(),
640 len: buf.len(),
641 _marker: std::marker::PhantomData,
642 }
643 }
644
645 /// Carve out a sub-slice. Caller upholds the disjoint-write
646 /// invariant (see struct doc).
647 ///
648 /// # Safety
649 /// `off_start <= off_end <= self.len`, and the requested range
650 /// must not overlap with ranges concurrently held by other
651 /// threads.
652 unsafe fn column_slice(&self, off_start: usize, off_end: usize) -> &'a mut [u8] {
653 debug_assert!(off_start <= off_end, "column slice: start > end");
654 debug_assert!(off_end <= self.len, "column slice: end past buffer");
655 // SAFETY: caller asserts in-bounds + disjoint-from-other-threads.
656 unsafe { std::slice::from_raw_parts_mut(self.ptr.add(off_start), off_end - off_start) }
657 }
658}
659
660/// Walk one column's slab chain and shade every visible voxel
661/// inside `[z_lo, z_hi)`. Mirror of the inner loop in
662/// the per-voxel bake loop.
663#[allow(clippy::cast_lossless)]
664fn shade_column(
665 column: &mut [u8],
666 x: i32,
667 y: i32,
668 z_lo: i32,
669 z_hi: i32,
670 lightmode: u32,
671 lights: &[LightSrc],
672 lightsub: &[f32],
673 cache: &EstNormCache,
674) {
675 let mut v_off: usize = 0;
676 // cstat = false ⇒ top-of-slab phase (floor colours); true ⇒
677 // ceiling-of-next-slab phase (bottom of current slab's solid
678 // mass, visible from the air pocket below).
679 let mut cstat = false;
680 loop {
681 let (sz0, sz1, voxel_byte_offset_signed): (i32, i32, isize);
682 if !cstat {
683 // Floor colours of the current slab. Voxel z=v[1]..=v[2].
684 // Alpha byte at offset (z - v[1]) * 4 + 7 from header
685 // (header is 4 bytes, voxel record is 4 bytes BGRA, +3
686 // for alpha). The formula encodes this as
687 // `(z << 2) + offs` with `offs = 7 - (v[1] << 2)`.
688 if v_off + 2 >= column.len() {
689 break;
690 }
691 let v1 = i32::from(column[v_off + 1]);
692 let v2 = i32::from(column[v_off + 2]);
693 sz0 = v1;
694 sz1 = v2 + 1;
695 voxel_byte_offset_signed = (v_off as isize) + 7 - ((sz0 as isize) << 2);
696 cstat = true;
697 } else {
698 // Ceiling colours of the next slab — must read v[0]
699 // BEFORE advancing v_off.
700 if v_off + 2 >= column.len() {
701 break;
702 }
703 let v0 = i32::from(column[v_off]);
704 let v1 = i32::from(column[v_off + 1]);
705 let v2 = i32::from(column[v_off + 2]);
706 let prev_offset = v2 - v1 - v0 + 2; // ceilnum from getcube convention
707 if v0 == 0 {
708 break;
709 }
710 v_off += (v0 as usize) * 4;
711 if v_off + 3 >= column.len() {
712 break;
713 }
714 let v3 = i32::from(column[v_off + 3]);
715 sz1 = v3;
716 sz0 = prev_offset + sz1;
717 voxel_byte_offset_signed = (v_off as isize) + 3 - ((sz1 as isize) << 2);
718 cstat = false;
719 }
720
721 let lo = sz0.max(z_lo);
722 let hi = sz1.min(z_hi);
723 for z in lo..hi {
724 let normal = cache.estnorm(x, y, z);
725 let brightness = compute_brightness(x, y, z, normal, lightmode, lights, lightsub);
726 let byte_off = voxel_byte_offset_signed + ((z as isize) << 2);
727 if byte_off >= 0 && (byte_off as usize) < column.len() {
728 column[byte_off as usize] = brightness;
729 }
730 }
731 }
732}
733
734/// Per-voxel brightness math. Computes the `[0, 255]`
735/// alpha byte for one voxel from its surface normal `tp` + the
736/// light list.
737fn compute_brightness(
738 x: i32,
739 y: i32,
740 z: i32,
741 tp: [f32; 3],
742 lightmode: u32,
743 lights: &[LightSrc],
744 lightsub: &[f32],
745) -> u8 {
746 if lightmode < 2 {
747 // Directional path: single fixed sun direction
748 // direction baked into a hardcoded coefficient pair.
749 // i = (tp.y * 0.5 + tp.z) * 64 + 103.5, clamped to [0, 255].
750 let f = (tp[1] * 0.5 + tp[2]) * 64.0 + 103.5;
751 clamp_to_byte(f)
752 } else {
753 // Point-light path. Base brightness
754 // 47.5..63.5 + per-light front-face contribution.
755 let mut f = (tp[1] * 0.5 + tp[2]) * 16.0 + 47.5;
756 let xf = x as f32;
757 let yf = y as f32;
758 let zf = z as f32;
759 for (i, light) in lights.iter().enumerate() {
760 let fx = light.pos[0] - xf;
761 let fy = light.pos[1] - yf;
762 let fz = light.pos[2] - zf;
763 // tp · light_delta: positive ⇒ surface faces away from
764 // light (back-lit, no contribution); negative ⇒ surface
765 // faces light (front-lit, lambertian contribution).
766 let h = tp[0] * fx + tp[1] * fy + tp[2] * fz;
767 if h >= 0.0 {
768 continue;
769 }
770 let g_sq = fx * fx + fy * fy + fz * fz;
771 if g_sq >= light.r2 {
772 continue;
773 }
774 // Cube-law falloff with a hard cutoff at the light radius:
775 // g = 1/d³ - 1/r³ (d = distance, r = radius)
776 // so the contribution fades to exactly zero at `r`.
777 let g = 1.0 / (g_sq * g_sq.sqrt()) - lightsub[i];
778 f -= g * h * light.sc;
779 }
780 clamp_to_byte(f)
781 }
782}
783
784#[inline]
785fn clamp_to_byte(f: f32) -> u8 {
786 // Clamp the brightness into the `[0, 255]` byte range.
787 if f >= 255.0 {
788 255
789 } else if f <= 0.0 {
790 0
791 } else {
792 f as u8
793 }
794}
795
796#[cfg(test)]
797mod tests {
798 use super::*;
799
800 /// xbsflor(0) = -1 (all bits set), xbsflor(32) clamped to 0,
801 /// xbsflor(5) = ~31 = 0xffff_ffe0.
802 #[test]
803 fn xbsflor_xbsceil_known_values() {
804 assert_eq!(xbsflor(0), 0xffff_ffff);
805 assert_eq!(xbsflor(1), 0xffff_fffe);
806 assert_eq!(xbsflor(5), 0xffff_ffe0);
807 assert_eq!(xbsflor(31), 0x8000_0000);
808 assert_eq!(xbsflor(32), 0);
809 assert_eq!(xbsceil(0), 0);
810 assert_eq!(xbsceil(5), 0x1f);
811 assert_eq!(xbsceil(31), 0x7fff_ffff);
812 assert_eq!(xbsceil(32), 0xffff_ffff);
813 }
814
815 /// Single-slab column [next=0, sz0=10, sz1=14, then 5 voxel
816 /// records]. Voxels exist at z = 10..15 (sz0..=sz1). After
817 /// expandbit256, bits 10..15 should be set, all others
818 /// (0..10 and 15..256) should reflect: air above (0..10) and
819 /// solid below (15..256): z past the last slab's bottom reads
820 /// slab as solid.
821 #[test]
822 fn single_slab_z10_to_14_sets_correct_bits() {
823 // Column layout: [next=0, sz0=10, sz1=14, top_color, then 5x
824 // voxel records of 4 bytes each]. We don't use the voxel
825 // record contents; expandbit256 only reads v[0]..v[3].
826 let mut col = vec![0u8, 10, 14, 0]; // header
827 col.extend(vec![0u8; 5 * 4]); // 5 voxel records (z=10..14)
828
829 let mut bits = [0u32; 8];
830 expandbit256(&col, &mut bits);
831
832 // Word 0 covers bits 0..32. Air for z=0..10, solid 10..15,
833 // solid for z=15..32 (since this is the only slab → below
834 // is fully solid).
835 // bits 10..15 from the slab body: 0x7c00 (bits 10,11,12,13,14)
836 // bits 15..32 from "solid below last slab": 0xffff_8000
837 // Combined: 0xffff_fc00.
838 assert_eq!(
839 bits[0], 0xffff_fc00,
840 "word 0 want 0xffff_fc00 got 0x{:08x}",
841 bits[0]
842 );
843 // Words 1..7 should all be 0xffff_ffff (fully solid).
844 for (i, w) in bits.iter().enumerate().skip(1) {
845 assert_eq!(*w, 0xffff_ffff, "word {i} want -1 got 0x{:08x}", *w);
846 }
847 }
848
849 /// Build a 4×4 synthetic world with a flat floor at z=20..=24,
850 /// run lightmode-1 update_lighting over the centre 2×2, and
851 /// verify (a) brightness bytes were rewritten, (b) the result
852 /// is in `[0, 255]` for every shaded voxel, (c) the brightness
853 /// is uniform within each (x, y) column at the same z (since
854 /// lightmode-1 depends only on the surface normal).
855 #[test]
856 fn lightmode1_bakes_brightness_into_visible_voxels() {
857 // 4×4 world, single slab at z=20..=24, sentinel column ends.
858 let vsid: u32 = 4;
859 let mut col = vec![0u8, 20, 24, 0]; // header: nextptr=0, z1=20, z2=24
860 for _ in 20..=24 {
861 // 5 voxel records, alpha pre-set to 0xab so we can verify
862 // they got rewritten.
863 col.extend([0x10, 0x20, 0x30, 0xab]);
864 }
865 let col_len = col.len() as u32;
866 let mut data = Vec::new();
867 let mut offsets = vec![0u32; (vsid * vsid + 1) as usize];
868 for i in 0..(vsid * vsid) {
869 offsets[i as usize] = data.len() as u32;
870 data.extend_from_slice(&col);
871 }
872 offsets[(vsid * vsid) as usize] = data.len() as u32;
873 assert_eq!(col_len as usize * (vsid * vsid) as usize, data.len());
874
875 update_lighting(
876 &mut data,
877 &offsets,
878 vsid,
879 1,
880 1,
881 0,
882 3,
883 3,
884 30, // bbox 1..=2 in xy, z 0..30
885 1, // lightmode 1
886 &[],
887 );
888
889 // Pull every voxel record's alpha byte from the centre
890 // (1, 1) column. Should all be in [0, 255] and ≠ 0xab.
891 let off1 = offsets[(1 * vsid + 1) as usize] as usize;
892 let alphas: Vec<u8> = (0..5).map(|i| data[off1 + 4 + i * 4 + 3]).collect();
893 for (i, &a) in alphas.iter().enumerate() {
894 assert_ne!(a, 0xab, "alpha[{i}] not rewritten");
895 }
896 // The shading should be mostly bright — flat-floor voxels
897 // have ~vertical normals so `(tp.y*0.5 + tp.z)*64 + 103.5`
898 // ≈ 1.0*64 + 103.5 = 167.5.
899 for (i, &a) in alphas.iter().enumerate() {
900 assert!(
901 a > 100,
902 "alpha[{i}]={a} should be on the bright side for top-of-floor voxels"
903 );
904 }
905 }
906
907 /// lightmode-2 with one nearby light should darken voxels on
908 /// the away side relative to the toward side. Use a 5×5 world
909 /// with a flat floor and place a light such that it's on the
910 /// +x side of the centre column — the +x face voxel's neighbour
911 /// columns should end up brighter than the -x.
912 #[test]
913 fn lightmode2_with_light_produces_per_column_variation() {
914 let vsid: u32 = 5;
915 let mut col = vec![0u8, 20, 24, 0];
916 for _ in 20..=24 {
917 col.extend([0x10, 0x20, 0x30, 0]);
918 }
919 let mut data = Vec::new();
920 let mut offsets = vec![0u32; (vsid * vsid + 1) as usize];
921 for i in 0..(vsid * vsid) {
922 offsets[i as usize] = data.len() as u32;
923 data.extend_from_slice(&col);
924 }
925 offsets[(vsid * vsid) as usize] = data.len() as u32;
926
927 let lights = [LightSrc {
928 // World coords: light right next to (4, 2, 20).
929 pos: [4.0, 2.0, 20.0],
930 r2: 50.0 * 50.0,
931 sc: 64.0,
932 }];
933 update_lighting(&mut data, &offsets, vsid, 0, 0, 0, 5, 5, 30, 2, &lights);
934
935 // Sample the alpha at the top-floor voxel of each column
936 // along y=2. Closer-to-light columns should be brighter.
937 let alpha_at = |x: u32, z_idx: usize| {
938 let off = offsets[(2 * vsid + x) as usize] as usize;
939 data[off + 4 + z_idx * 4 + 3]
940 };
941 let close = alpha_at(4, 0); // closest column to light
942 let far = alpha_at(0, 0); // farthest
943 assert!(
944 close >= far,
945 "column nearer the light should be ≥ as bright as the far one (close={close} far={far})"
946 );
947 }
948
949 /// Empty column ([0, 0, 0, ...]) — no slabs. After
950 /// expandbit256, all 256 bits = 0 (full air).
951 #[test]
952 fn empty_column_all_air() {
953 let col = vec![0u8, 0, 0, 0]; // single-slab header at z=0..0, no body
954 let mut bits = [0u32; 8];
955 expandbit256(&col, &mut bits);
956 // bit 0 from "air→solid transition at z=0", but only bit 0
957 // is set within the slab range [0, 0+1). Then "solid below"
958 // fills bits 1..256.
959 // Actually for sz0=sz1=0: voxel record is z=0..0 inclusive
960 // (0 voxels). The bit pattern is 1 set bit at z=0 then
961 // solid below.
962 // word 0: bit 0 set, bits 1..32 set ⇒ 0xffff_ffff.
963 assert_eq!(
964 bits[0], 0xffff_ffff,
965 "empty column word 0 want all-1 got 0x{:08x}",
966 bits[0]
967 );
968 }
969}