Skip to main content

roxlap_core/
world_lighting.rs

1//! World-voxel lighting bake.
2//!
3//! Walks every visible voxel inside a 3D bounding box and writes its
4//! per-voxel brightness byte (the high byte of the packed colour, which
5//! the renderer multiplies into the RGB — see [`crate::dda`]'s `shade`)
6//! from the engine's current `LightSrc` set + lightmode.
7//!
8//! Two modes:
9//! - `lightmode == 1`: cheap directional bake — every voxel gets
10//!   shading from a single fixed sun direction:
11//!   `(n.y * 0.5 + n.z) * 64 + 103.5` clamped to `[0, 255]`.
12//! - `lightmode == 2`: per-light point-light bake — for each light in
13//!   range, subtract `g * h * sc`, where `g = 1/(d·d²) - 1/(r·r²)`
14//!   (cube-falloff with a hard cutoff at radius `r`) and
15//!   `h = surface_normal · light_delta` (front-lit faces contribute;
16//!   back faces are skipped). Subtracted from a base
17//!   `(n.y * 0.5 + n.z) * 16 + 47.5`.
18//!
19//! The surface normal `n` comes from [`EstNormCache::estnorm`] — the
20//! occupancy gradient of a voxel's 5×5×5 neighbourhood.
21
22#![allow(
23    clippy::cast_possible_truncation,
24    clippy::cast_possible_wrap,
25    clippy::cast_sign_loss,
26    clippy::cast_precision_loss,
27    clippy::similar_names,
28    clippy::too_many_arguments,
29    clippy::too_many_lines,
30    clippy::doc_markdown,
31    clippy::many_single_char_names,
32    clippy::must_use_candidate,
33    clippy::unnecessary_cast,
34    clippy::cast_lossless,
35    clippy::needless_bool_assign,
36    clippy::needless_range_loop,
37    clippy::no_effect,
38    clippy::identity_op,
39    clippy::if_not_else
40)]
41
42use rayon::prelude::*;
43
44use crate::engine::LightSrc;
45
46/// World z is one byte → `0..MAXZDIM` (256) voxels tall.
47pub(crate) const MAXZDIM: i32 = 256;
48
49/// Estnorm neighbourhood radius. The surface normal at a voxel is
50/// estimated from the solid/air pattern in the surrounding
51/// `(2*RAD+1)³ = 5×5×5` cube.
52pub(crate) const ESTNORMRAD: i32 = 2;
53
54/// `bits k..31 set, low k bits clear` (`!0 << k`). Used by
55/// [`expandbit256`] to fill from an air→solid transition up to the
56/// top of a 32-bit word.
57pub(crate) const fn xbsflor(k: usize) -> u32 {
58    if k >= 32 {
59        0
60    } else {
61        (-1i32 << k) as u32
62    }
63}
64
65/// `~xbsflor[k]` — low `k` bits set. Fills from the bottom of a word
66/// up to a solid→air transition.
67pub(crate) const fn xbsceil(k: usize) -> u32 {
68    !xbsflor(k)
69}
70
71/// Decode a `.vxl` slab column into a 256-bit "voxel solid" bitset,
72/// low-bit-first / low-z-first.
73///
74/// The output `bits` is a `[u32; 8]` (= 256 bits = `MAXZDIM` z
75/// levels); bit `z` is set iff the voxel at depth `z` in this column is
76/// solid (including the hidden interior between a slab's coloured top
77/// and the next slab). This is a straight read of the `.vxl` column
78/// layout: each slab record's byte 1 is its top z (air→solid) and byte
79/// 3 the next slab's bottom (solid→air). Whole 32-bit words between
80/// transitions are flushed as all-air (`0`) or all-solid (`!0`); the
81/// word holding a transition gets a partial mask via
82/// [`xbsflor`] / [`xbsceil`].
83pub(crate) fn expandbit256(column: &[u8], bits: &mut [u32; 8]) {
84    let mut src_idx: usize = 0;
85    let mut dst_idx: usize = 0;
86    let mut bitpos: i32 = 32;
87    let mut word: u32 = 0;
88    let nbits: i32 = (bits.len() as i32) * 32;
89
90    // First iteration: jump straight to the v[1] transition (no
91    // preceding slab whose v[3] we'd need to flush).
92    let mut next_len: i32;
93    let mut delta: i32;
94    let mut go_to_v3 = false;
95
96    'outer: loop {
97        if go_to_v3 {
98            // v[3] : solid → air transition.
99            if src_idx + 3 >= column.len() {
100                break;
101            }
102            delta = i32::from(column[src_idx + 3]) - bitpos;
103            while delta >= 0 {
104                if dst_idx >= bits.len() {
105                    break 'outer;
106                }
107                bits[dst_idx] = word;
108                dst_idx += 1;
109                word = u32::MAX;
110                bitpos += 32;
111                delta -= 32;
112            }
113            word &= xbsceil((delta + 32) as usize);
114        }
115        go_to_v3 = true;
116
117        // v[1] : air → solid transition.
118        if src_idx + 1 >= column.len() {
119            break;
120        }
121        delta = i32::from(column[src_idx + 1]) - bitpos;
122        while delta >= 0 {
123            if dst_idx >= bits.len() {
124                break 'outer;
125            }
126            bits[dst_idx] = word;
127            dst_idx += 1;
128            word = 0;
129            bitpos += 32;
130            delta -= 32;
131        }
132        word |= xbsflor((delta + 32) as usize);
133
134        next_len = i32::from(column[src_idx]);
135        if next_len == 0 {
136            break;
137        }
138        src_idx += (next_len as usize) * 4;
139    }
140
141    // Pad the rest of the buffer with `word`'s tail value (in C the
142    // post-loop word is whatever the last `v[1]` partial-set
143    // produced; remaining whole-words flush as solid `-1`).
144    if bitpos <= nbits {
145        while dst_idx < bits.len() {
146            bits[dst_idx] = word;
147            dst_idx += 1;
148            word = u32::MAX;
149        }
150    }
151}
152
153/// Per-column solid/air bitset grid covering a 2D bounding region —
154/// `(x1 - x0 + 2*RAD) × (y1 - y0 + 2*RAD)` columns. Decoding each
155/// column to a bitset once turns the estnorm 5×5×5 neighbourhood query
156/// into O(1) bit tests. A 448×448 bake (extending to 452×452 with
157/// padding) needs about 6.4 MB.
158#[allow(dead_code)] // vsid field/method preserved for inspection
159pub struct EstNormCache {
160    /// Per-column bit arrays. `bits[yidx * width + xidx]` is the
161    /// solid/air bitset of column `(origin_x + xidx, origin_y + yidx)`.
162    bits: Vec<[u32; 8]>,
163    /// Top-left of the cache window in world coords (= original
164    /// `x0 - RAD`).
165    origin_x: i32,
166    origin_y: i32,
167    /// Cached-region width (= `x1 - x0 + 2 * RAD`).
168    width: usize,
169    /// Reserved for symmetric debugging — kept so the cache layout
170    /// can be inspected without recomputing from `bits.len()`.
171    #[allow(dead_code)]
172    height: usize,
173    /// Voxel-grid limit (= `vsid`) used for out-of-bounds clamps.
174    vsid: i32,
175}
176
177impl EstNormCache {
178    /// Build the bit-grid cache covering the bounding region
179    /// `[x0..x1) × [y0..y1)` extended by `ESTNORMRAD` padding on
180    /// each side. Calling [`Self::estnorm`] for any `(x, y)` inside
181    /// the original `[x0..x1) × [y0..y1)` box is then a pure read.
182    ///
183    /// Wraps [`Self::build_with_reader`] with a flat-table closure.
184    #[must_use]
185    pub fn build(
186        world_data: &[u8],
187        column_offsets: &[u32],
188        vsid: u32,
189        x0: i32,
190        y0: i32,
191        x1: i32,
192        y1: i32,
193    ) -> Self {
194        let vsid_i = vsid as i32;
195        let reader = |x: i32, y: i32| -> Option<&[u8]> {
196            if (x | y) < 0 || x >= vsid_i || y >= vsid_i {
197                return None;
198            }
199            let col_idx = (y as u32) * vsid + (x as u32);
200            let off_start = column_offsets[col_idx as usize] as usize;
201            // Slice to end-of-buffer; the slab walker self-
202            // terminates via nextptr.
203            Some(&world_data[off_start..])
204        };
205        let mut cache = Self::build_with_reader(reader, x0, y0, x1, y1);
206        cache.vsid = vsid_i;
207        cache
208    }
209
210    /// S4B.4.b: chunk-aware cache build. The closure
211    /// `column_reader(x, y)` returns the slab bytes of the column
212    /// at world-or-grid-local position `(x, y)`, or `None` for an
213    /// implicit-air / out-of-grid column (matching `build`'s OOB
214    /// "treat as full air" semantics).
215    ///
216    /// No vsid bound — the reader owns OOB handling. Per-chunk
217    /// bakes use a closure that resolves `(x, y)` to a neighbour
218    /// chunk via `Grid::chunk(IVec3)` so the 2-voxel padding
219    /// extends seamlessly across chunk boundaries.
220    ///
221    /// The cache's [`Self::vsid`] field is left at `0` for chunk-
222    /// aware builds — the field is dead-code anyway, preserved
223    /// only for inspection.
224    #[must_use]
225    pub fn build_with_reader<'r>(
226        column_reader: impl Fn(i32, i32) -> Option<&'r [u8]>,
227        x0: i32,
228        y0: i32,
229        x1: i32,
230        y1: i32,
231    ) -> Self {
232        let rad = ESTNORMRAD;
233        let pad_x0 = x0 - rad;
234        let pad_y0 = y0 - rad;
235        let pad_x1 = x1 + rad;
236        let pad_y1 = y1 + rad;
237        let width = (pad_x1 - pad_x0) as usize;
238        let height = (pad_y1 - pad_y0) as usize;
239
240        let mut bits = vec![[0u32; 8]; width * height];
241        for yi in 0..height {
242            let y = pad_y0 + yi as i32;
243            for xi in 0..width {
244                let x = pad_x0 + xi as i32;
245                if let Some(column) = column_reader(x, y) {
246                    expandbit256(column, &mut bits[yi * width + xi]);
247                }
248                // None → leave the cache slot zeroed (treat as full
249                // air), matching `build`'s OOB behaviour.
250            }
251        }
252
253        Self {
254            bits,
255            origin_x: pad_x0,
256            origin_y: pad_y0,
257            width,
258            height,
259            vsid: 0,
260        }
261    }
262
263    /// Whether the voxel at cache-column `(xi, yi)`, depth `z` is solid.
264    /// Out of the `[0, MAXZDIM)` z range: everything above the world is
265    /// air, everything below is solid (bedrock).
266    #[inline]
267    fn solid(&self, xi: usize, yi: usize, z: i32) -> bool {
268        if z < 0 {
269            return false;
270        }
271        if z >= MAXZDIM {
272            return true;
273        }
274        let col = &self.bits[yi * self.width + xi];
275        let z = z as usize;
276        (col[z >> 5] >> (z & 31)) & 1 != 0
277    }
278
279    /// Estimate the surface orientation at solid voxel `(x, y, z)` as
280    /// the **occupancy gradient** of its 5×5×5 neighbourhood:
281    ///
282    /// ```text
283    /// n = Σ_{solid neighbours} offset,   normal = n / |n|
284    /// ```
285    ///
286    /// (the sum runs over `offset ∈ [-2, 2]³`). `n` points toward the
287    /// denser (solid) side; the lighting formulas in [`update_lighting`]
288    /// are calibrated to that orientation. On a flat surface the solid
289    /// half-space cancels laterally and leaves `n` along the inward
290    /// axis. An all-solid or all-air neighbourhood gives `n = 0` →
291    /// `(0, 0, 0)`, which the lighting math treats as unlit.
292    ///
293    /// `(x, y)` must lie inside the cache's `[x0..x1) × [y0..y1)` region
294    /// (the padded border supplies the ±2 neighbours); `z` is
295    /// unconstrained.
296    #[must_use]
297    #[allow(clippy::cast_precision_loss)]
298    pub fn estnorm(&self, x: i32, y: i32, z: i32) -> [f32; 3] {
299        let cx = (x - self.origin_x) as i32;
300        let cy = (y - self.origin_y) as i32;
301
302        let mut nx = 0i32;
303        let mut ny = 0i32;
304        let mut nz = 0i32;
305        for dy in -ESTNORMRAD..=ESTNORMRAD {
306            let yi = (cy + dy) as usize;
307            for dx in -ESTNORMRAD..=ESTNORMRAD {
308                let xi = (cx + dx) as usize;
309                for dz in -ESTNORMRAD..=ESTNORMRAD {
310                    if self.solid(xi, yi, z + dz) {
311                        nx += dx;
312                        ny += dy;
313                        nz += dz;
314                    }
315                }
316            }
317        }
318
319        let len_sq = nx * nx + ny * ny + nz * nz;
320        if len_sq == 0 {
321            return [0.0, 0.0, 0.0];
322        }
323        let inv = 1.0 / (len_sq as f32).sqrt();
324        [nx as f32 * inv, ny as f32 * inv, nz as f32 * inv]
325    }
326
327    /// Voxel-grid limit; used by callers to bound their iteration.
328    #[must_use]
329    #[allow(dead_code)]
330    pub(crate) fn vsid(&self) -> i32 {
331        self.vsid
332    }
333}
334
335/// Bake per-voxel lighting into the world's brightness bytes.
336/// Bakes per-voxel brightness over a 3D bounding box.
337///
338/// Walks every visible voxel inside `[x0..x1) × [y0..y1) ×
339/// [z0..z1)` and rewrites its alpha byte (the brightness channel
340/// the rasterizer mulhi'es against `kv6colmul` modulators) under
341/// the current `lightmode` + `lights` state.
342///
343/// - `lightmode == 0`: no-op (fast return).
344/// - `lightmode == 1`: directional sun-style bake — every visible
345///   voxel gets `(tp.y * 0.5 + tp.z) * 64 + 103.5` clamped to
346///   `[0, 255]` from its surface normal `tp`.
347/// - `lightmode >= 2`: per-light Lambertian bake — base
348///   `(tp.y * 0.5 + tp.z) * 16 + 47.5` minus, for each light in
349///   range with surface normal facing it, `g * h * sc` where
350///   `g = 1/(d·d²) - 1/(r·r²)` (cube falloff with hard radius
351///   cutoff) and `h = tp · light_delta`.
352///
353/// The bbox is padded by `ESTNORMRAD` on each side internally
354/// to give estnorm enough neighbourhood; that's done here too.
355/// `lights` should match the engine's full `vx5.lightsrc[]` —
356/// the function does its own per-tile range filtering.
357///
358/// Mutates `world_data` in place. Caller is responsible for any
359/// `column_offsets` / `vsid` invariants.
360pub fn update_lighting(
361    world_data: &mut [u8],
362    column_offsets: &[u32],
363    vsid: u32,
364    x0: i32,
365    y0: i32,
366    z0: i32,
367    x1: i32,
368    y1: i32,
369    z1: i32,
370    lightmode: u32,
371    lights: &[LightSrc],
372) {
373    if lightmode == 0 {
374        return;
375    }
376    let vsid_i = vsid as i32;
377    let x0p = (x0 - ESTNORMRAD).max(0);
378    let y0p = (y0 - ESTNORMRAD).max(0);
379    let z0p = (z0 - ESTNORMRAD).max(0);
380    let x1p = (x1 + ESTNORMRAD).min(vsid_i);
381    let y1p = (y1 + ESTNORMRAD).min(vsid_i);
382    let z1p = (z1 + ESTNORMRAD).min(MAXZDIM);
383    if x0p >= x1p || y0p >= y1p || z0p >= z1p {
384        return;
385    }
386
387    // Build the cache once for the whole padded bake region.
388    // The bake is tiled into 64×64 chunks with a per-tile
389    // `lightlst` filter; for our (one-shot bake) use case the
390    // full-region filter computed inside the per-voxel loop is
391    // simpler and not measurably slower at oracle bake sizes.
392    let cache = EstNormCache::build(world_data, column_offsets, vsid, x0p, y0p, x1p, y1p);
393
394    // Per-light precomputed `lightsub[i] = 1 / (sqrt(r2) * r2)` —
395    // the radius-cutoff bias that makes the light contribution go
396    // to exactly zero at distance == sqrt(r2).
397    let lightsub: Vec<f32> = lights.iter().map(|l| 1.0 / (l.r2.sqrt() * l.r2)).collect();
398
399    // R12.4.1: parallelise the per-row bake via rayon. Each `(x, y)`
400    // pair maps to a unique column slice in `world_data`
401    // (`column_offsets[col_idx]..[col_idx + 1]` ranges are pairwise
402    // disjoint — the voxalloc allocator's invariant). Rows split
403    // cleanly across worker threads; per-row x-loops stay serial to
404    // amortise rayon's per-task overhead. Speedup follows
405    // `RAYON_NUM_THREADS` (set `=1` to disable).
406    //
407    // Lighting bakes are typically rare (one-shot at scene load) but
408    // dynamic-lighting / per-edit relighting use cases call
409    // `update_lighting` per frame — at which point the parallel
410    // path matters for interactive responsiveness.
411    // Per-column byte extents `(start, end)`. After voxalloc-driven
412    // edits (e.g. cave-gen's heavy `set_spans` carve, or runtime
413    // bullet-impact carves), columns are scattered in the slab
414    // pool, so `column_offsets[i+1]` is NOT column `i`'s end byte
415    // — walk each column's slab chain via `slng()` to
416    // recover length. We pre-compute extents here serially before
417    // moving `world_data` into the parallel mutable view; the
418    // slng walk is O(slab_count) per column, typically 1-3 slabs.
419    //
420    // **Region-bounded**: only the bake rectangle `[x0p..x1p) ×
421    // [y0p..y1p)` needs extents — the per-row body indexes only
422    // those columns. Sizing the table to `vsid²` is wasteful when
423    // a small chunk-sized region is baked against a large-vsid
424    // world (e.g. S4.1 scene-graph per-chunk bake against a
425    // vsid=4096 combined view — would have been 16M slng walks per
426    // chunk × 1024 chunks = 17B slng walks). The bake-region table
427    // collapses that to `bake_region` walks per call.
428    #[allow(clippy::cast_sign_loss)]
429    let region_w = (x1p - x0p) as usize;
430    #[allow(clippy::cast_sign_loss)]
431    let region_h = (y1p - y0p) as usize;
432    let mut column_extents: Vec<(usize, usize)> = Vec::with_capacity(region_w * region_h);
433    for yi in 0..region_h {
434        #[allow(clippy::cast_possible_wrap)]
435        let y = y0p + yi as i32;
436        for xi in 0..region_w {
437            #[allow(clippy::cast_possible_wrap)]
438            let x = x0p + xi as i32;
439            #[allow(clippy::cast_sign_loss)]
440            let col_idx = (y as u32) * vsid + (x as u32);
441            let start = column_offsets[col_idx as usize] as usize;
442            let end = start + roxlap_formats::vxl::slng(&world_data[start..]);
443            column_extents.push((start, end));
444        }
445    }
446
447    let world_view = WorldDataMutView::new(world_data);
448    let row_body = |y: i32| {
449        #[allow(clippy::cast_sign_loss)]
450        let yi = (y - y0p) as usize;
451        for x in x0p..x1p {
452            #[allow(clippy::cast_sign_loss)]
453            let xi = (x - x0p) as usize;
454            let (off_start, off_end) = column_extents[yi * region_w + xi];
455            // SAFETY: each (x, y) maps to a unique col_idx; column
456            // byte ranges `[off_start, off_end)` are pairwise
457            // disjoint across distinct `col_idx` (voxalloc's
458            // free-list invariant), so no two threads write to
459            // the same byte.
460            let column = unsafe { world_view.column_slice(off_start, off_end) };
461            shade_column(column, x, y, z0p, z1p, lightmode, lights, &lightsub, &cache);
462        }
463    };
464
465    (y0p..y1p).into_par_iter().for_each(row_body);
466}
467
468/// S4B.4.b: per-chunk variant of [`update_lighting`].
469///
470/// Writes alpha bytes into one chunk's slab buffer; reads
471/// neighbour-chunk voxels through `column_reader` for `estnorm`'s
472/// 5×5×5 padding. The reader takes chunk-local `(x, y)` (which can
473/// extend `±ESTNORMRAD` past the chunk's `[0, target_vsid)` extent)
474/// and returns the column at that position — typically resolved
475/// through `Grid::chunk(IVec3)` so the bake gets seamless
476/// cross-chunk neighbourhood reads without materialising a stitched
477/// combined view (Approach C retirement, S4B.4.b).
478///
479/// `(x0, y0, z0, x1, y1, z1)` is the bake region in chunk-local
480/// coords (typically `(0, 0, 0)..(CHUNK_SIZE_XY, CHUNK_SIZE_XY,
481/// CHUNK_SIZE_Z)`). Writes clip to the target chunk's vsid; reads
482/// extend into neighbour chunks via the closure.
483///
484/// `lightmode`, `lights`, and the per-voxel arithmetic match
485/// [`update_lighting`]; only the cache build + write-region
486/// scoping differ.
487#[allow(clippy::too_many_arguments)]
488pub fn update_lighting_chunk<'r>(
489    target_data: &mut [u8],
490    target_column_offsets: &[u32],
491    target_vsid: u32,
492    x0: i32,
493    y0: i32,
494    z0: i32,
495    x1: i32,
496    y1: i32,
497    z1: i32,
498    column_reader: impl Fn(i32, i32) -> Option<&'r [u8]>,
499    lightmode: u32,
500    lights: &[LightSrc],
501) {
502    if lightmode == 0 {
503        return;
504    }
505    let target_vsid_i = target_vsid as i32;
506
507    // Padded region for the cache (cross-chunk reads via reader).
508    // Z clamps to [0, MAXZDIM) because each chunk's slab data is
509    // chunk-local in z. For stacked grids (S4B.6) the caller
510    // invokes us once per chunk-z layer; cross-chz padding at the
511    // top/bottom of a chunk gets clipped here (a follow-up could
512    // pass z-aware columns to lift this). X/y intentionally don't
513    // clamp — the reader pulls from neighbour chunks via its own
514    // coord translation.
515    let z0p = (z0 - ESTNORMRAD).max(0);
516    let z1p = (z1 + ESTNORMRAD).min(MAXZDIM);
517    // Write region clipped to the target chunk's footprint.
518    let wx0 = x0.max(0);
519    let wy0 = y0.max(0);
520    let wx1 = x1.min(target_vsid_i);
521    let wy1 = y1.min(target_vsid_i);
522    if wx0 >= wx1 || wy0 >= wy1 || z0p >= z1p {
523        return;
524    }
525
526    let cache = EstNormCache::build_with_reader(column_reader, x0, y0, x1, y1);
527    apply_lighting_with_cache(
528        target_data,
529        target_column_offsets,
530        target_vsid,
531        wx0,
532        wy0,
533        z0p,
534        wx1,
535        wy1,
536        z1p,
537        &cache,
538        lightmode,
539        lights,
540    );
541}
542
543/// S4B.4.b: write half of [`update_lighting_chunk`], split out so
544/// callers can build the [`EstNormCache`] separately (via
545/// [`EstNormCache::build_with_reader`]) and pass it in.
546///
547/// The split matters when the cache build needs an immutable grid
548/// borrow (for cross-chunk reads) and the write phase needs a
549/// mutable target-chunk borrow — the two can't coexist. The
550/// caller builds the cache first while holding the immutable
551/// borrow, drops it, then mutably borrows the target chunk and
552/// invokes this.
553///
554/// The `(x0..x1, y0..y1, z0..z1)` region must already be clipped
555/// to the target chunk's footprint (this helper does no clipping).
556/// `cache` must cover at least `[x0..x1) × [y0..y1)` (a `±ESTNORMRAD`
557/// padding is the caller's responsibility — typically built via
558/// `build_with_reader(.., x0, y0, x1, y1)` which adds the padding
559/// itself).
560#[allow(clippy::too_many_arguments)]
561pub fn apply_lighting_with_cache(
562    target_data: &mut [u8],
563    target_column_offsets: &[u32],
564    target_vsid: u32,
565    x0: i32,
566    y0: i32,
567    z0: i32,
568    x1: i32,
569    y1: i32,
570    z1: i32,
571    cache: &EstNormCache,
572    lightmode: u32,
573    lights: &[LightSrc],
574) {
575    if lightmode == 0 || x0 >= x1 || y0 >= y1 || z0 >= z1 {
576        return;
577    }
578
579    let lightsub: Vec<f32> = lights.iter().map(|l| 1.0 / (l.r2.sqrt() * l.r2)).collect();
580
581    let region_w = (x1 - x0) as usize;
582    let region_h = (y1 - y0) as usize;
583    let mut column_extents: Vec<(usize, usize)> = Vec::with_capacity(region_w * region_h);
584    for yi in 0..region_h {
585        let y = y0 + yi as i32;
586        for xi in 0..region_w {
587            let x = x0 + xi as i32;
588            let col_idx = (y as u32) * target_vsid + (x as u32);
589            let start = target_column_offsets[col_idx as usize] as usize;
590            let end = start + roxlap_formats::vxl::slng(&target_data[start..]);
591            column_extents.push((start, end));
592        }
593    }
594
595    let world_view = WorldDataMutView::new(target_data);
596    let row_body = |y: i32| {
597        let yi = (y - y0) as usize;
598        for x in x0..x1 {
599            let xi = (x - x0) as usize;
600            let (off_start, off_end) = column_extents[yi * region_w + xi];
601            // SAFETY: per-column byte ranges are pairwise disjoint
602            // across distinct `(x, y)` (voxalloc invariant).
603            let column = unsafe { world_view.column_slice(off_start, off_end) };
604            shade_column(column, x, y, z0, z1, lightmode, lights, &lightsub, cache);
605        }
606    };
607
608    (y0..y1).into_par_iter().for_each(row_body);
609}
610
611/// Raw-pointer view of `world_data` so the parallel
612/// [`update_lighting`] body can hand out per-column `&mut [u8]`
613/// slices to multiple threads without each thread needing
614/// `&mut Vec<u8>` (which is exclusive). Constructed from a single
615/// `&mut [u8]` borrow at the start of the parallel section; the
616/// borrow's lifetime gates `WorldDataMutView`'s usable lifetime.
617///
618/// # Safety contract
619/// Callers that hand out concurrent `column_slice` references MUST
620/// guarantee the requested ranges are pairwise non-overlapping
621/// across threads. [`update_lighting`]'s call site relies on
622/// voxalloc's per-column-disjoint-byte-range invariant.
623struct WorldDataMutView<'a> {
624    ptr: *mut u8,
625    len: usize,
626    _marker: std::marker::PhantomData<&'a mut [u8]>,
627}
628
629// SAFETY: `WorldDataMutView` is morally a `&mut [u8]` re-exposed as
630// raw pointers. The disjoint-write invariant is enforced by the
631// caller; concurrent reads of `ptr` / `len` fields are race-free
632// (immutable scalar fields).
633unsafe impl Send for WorldDataMutView<'_> {}
634unsafe impl Sync for WorldDataMutView<'_> {}
635
636impl<'a> WorldDataMutView<'a> {
637    fn new(buf: &'a mut [u8]) -> Self {
638        Self {
639            ptr: buf.as_mut_ptr(),
640            len: buf.len(),
641            _marker: std::marker::PhantomData,
642        }
643    }
644
645    /// Carve out a sub-slice. Caller upholds the disjoint-write
646    /// invariant (see struct doc).
647    ///
648    /// # Safety
649    /// `off_start <= off_end <= self.len`, and the requested range
650    /// must not overlap with ranges concurrently held by other
651    /// threads.
652    unsafe fn column_slice(&self, off_start: usize, off_end: usize) -> &'a mut [u8] {
653        debug_assert!(off_start <= off_end, "column slice: start > end");
654        debug_assert!(off_end <= self.len, "column slice: end past buffer");
655        // SAFETY: caller asserts in-bounds + disjoint-from-other-threads.
656        unsafe { std::slice::from_raw_parts_mut(self.ptr.add(off_start), off_end - off_start) }
657    }
658}
659
660/// Walk one column's slab chain and shade every visible voxel
661/// inside `[z_lo, z_hi)`. Mirror of the inner loop in
662/// the per-voxel bake loop.
663#[allow(clippy::cast_lossless)]
664fn shade_column(
665    column: &mut [u8],
666    x: i32,
667    y: i32,
668    z_lo: i32,
669    z_hi: i32,
670    lightmode: u32,
671    lights: &[LightSrc],
672    lightsub: &[f32],
673    cache: &EstNormCache,
674) {
675    let mut v_off: usize = 0;
676    // cstat = false ⇒ top-of-slab phase (floor colours); true ⇒
677    // ceiling-of-next-slab phase (bottom of current slab's solid
678    // mass, visible from the air pocket below).
679    let mut cstat = false;
680    loop {
681        let (sz0, sz1, voxel_byte_offset_signed): (i32, i32, isize);
682        if !cstat {
683            // Floor colours of the current slab. Voxel z=v[1]..=v[2].
684            // Alpha byte at offset (z - v[1]) * 4 + 7 from header
685            // (header is 4 bytes, voxel record is 4 bytes BGRA, +3
686            // for alpha). The formula encodes this as
687            // `(z << 2) + offs` with `offs = 7 - (v[1] << 2)`.
688            if v_off + 2 >= column.len() {
689                break;
690            }
691            let v1 = i32::from(column[v_off + 1]);
692            let v2 = i32::from(column[v_off + 2]);
693            sz0 = v1;
694            sz1 = v2 + 1;
695            voxel_byte_offset_signed = (v_off as isize) + 7 - ((sz0 as isize) << 2);
696            cstat = true;
697        } else {
698            // Ceiling colours of the next slab — must read v[0]
699            // BEFORE advancing v_off.
700            if v_off + 2 >= column.len() {
701                break;
702            }
703            let v0 = i32::from(column[v_off]);
704            let v1 = i32::from(column[v_off + 1]);
705            let v2 = i32::from(column[v_off + 2]);
706            let prev_offset = v2 - v1 - v0 + 2; // ceilnum from getcube convention
707            if v0 == 0 {
708                break;
709            }
710            v_off += (v0 as usize) * 4;
711            if v_off + 3 >= column.len() {
712                break;
713            }
714            let v3 = i32::from(column[v_off + 3]);
715            sz1 = v3;
716            sz0 = prev_offset + sz1;
717            voxel_byte_offset_signed = (v_off as isize) + 3 - ((sz1 as isize) << 2);
718            cstat = false;
719        }
720
721        let lo = sz0.max(z_lo);
722        let hi = sz1.min(z_hi);
723        for z in lo..hi {
724            let normal = cache.estnorm(x, y, z);
725            let brightness = compute_brightness(x, y, z, normal, lightmode, lights, lightsub);
726            let byte_off = voxel_byte_offset_signed + ((z as isize) << 2);
727            if byte_off >= 0 && (byte_off as usize) < column.len() {
728                column[byte_off as usize] = brightness;
729            }
730        }
731    }
732}
733
734/// Per-voxel brightness math. Computes the `[0, 255]`
735/// alpha byte for one voxel from its surface normal `tp` + the
736/// light list.
737fn compute_brightness(
738    x: i32,
739    y: i32,
740    z: i32,
741    tp: [f32; 3],
742    lightmode: u32,
743    lights: &[LightSrc],
744    lightsub: &[f32],
745) -> u8 {
746    if lightmode < 2 {
747        // Directional path: single fixed sun direction
748        // direction baked into a hardcoded coefficient pair.
749        // i = (tp.y * 0.5 + tp.z) * 64 + 103.5, clamped to [0, 255].
750        let f = (tp[1] * 0.5 + tp[2]) * 64.0 + 103.5;
751        clamp_to_byte(f)
752    } else {
753        // Point-light path. Base brightness
754        // 47.5..63.5 + per-light front-face contribution.
755        let mut f = (tp[1] * 0.5 + tp[2]) * 16.0 + 47.5;
756        let xf = x as f32;
757        let yf = y as f32;
758        let zf = z as f32;
759        for (i, light) in lights.iter().enumerate() {
760            let fx = light.pos[0] - xf;
761            let fy = light.pos[1] - yf;
762            let fz = light.pos[2] - zf;
763            // tp · light_delta: positive ⇒ surface faces away from
764            // light (back-lit, no contribution); negative ⇒ surface
765            // faces light (front-lit, lambertian contribution).
766            let h = tp[0] * fx + tp[1] * fy + tp[2] * fz;
767            if h >= 0.0 {
768                continue;
769            }
770            let g_sq = fx * fx + fy * fy + fz * fz;
771            if g_sq >= light.r2 {
772                continue;
773            }
774            // Cube-law falloff with a hard cutoff at the light radius:
775            //   g = 1/d³ - 1/r³   (d = distance, r = radius)
776            // so the contribution fades to exactly zero at `r`.
777            let g = 1.0 / (g_sq * g_sq.sqrt()) - lightsub[i];
778            f -= g * h * light.sc;
779        }
780        clamp_to_byte(f)
781    }
782}
783
784#[inline]
785fn clamp_to_byte(f: f32) -> u8 {
786    // Clamp the brightness into the `[0, 255]` byte range.
787    if f >= 255.0 {
788        255
789    } else if f <= 0.0 {
790        0
791    } else {
792        f as u8
793    }
794}
795
796#[cfg(test)]
797mod tests {
798    use super::*;
799
800    /// xbsflor(0) = -1 (all bits set), xbsflor(32) clamped to 0,
801    /// xbsflor(5) = ~31 = 0xffff_ffe0.
802    #[test]
803    fn xbsflor_xbsceil_known_values() {
804        assert_eq!(xbsflor(0), 0xffff_ffff);
805        assert_eq!(xbsflor(1), 0xffff_fffe);
806        assert_eq!(xbsflor(5), 0xffff_ffe0);
807        assert_eq!(xbsflor(31), 0x8000_0000);
808        assert_eq!(xbsflor(32), 0);
809        assert_eq!(xbsceil(0), 0);
810        assert_eq!(xbsceil(5), 0x1f);
811        assert_eq!(xbsceil(31), 0x7fff_ffff);
812        assert_eq!(xbsceil(32), 0xffff_ffff);
813    }
814
815    /// Single-slab column [next=0, sz0=10, sz1=14, then 5 voxel
816    /// records]. Voxels exist at z = 10..15 (sz0..=sz1). After
817    /// expandbit256, bits 10..15 should be set, all others
818    /// (0..10 and 15..256) should reflect: air above (0..10) and
819    /// solid below (15..256): z past the last slab's bottom reads
820    /// slab as solid.
821    #[test]
822    fn single_slab_z10_to_14_sets_correct_bits() {
823        // Column layout: [next=0, sz0=10, sz1=14, top_color, then 5x
824        // voxel records of 4 bytes each]. We don't use the voxel
825        // record contents; expandbit256 only reads v[0]..v[3].
826        let mut col = vec![0u8, 10, 14, 0]; // header
827        col.extend(vec![0u8; 5 * 4]); // 5 voxel records (z=10..14)
828
829        let mut bits = [0u32; 8];
830        expandbit256(&col, &mut bits);
831
832        // Word 0 covers bits 0..32. Air for z=0..10, solid 10..15,
833        // solid for z=15..32 (since this is the only slab → below
834        // is fully solid).
835        // bits 10..15 from the slab body: 0x7c00 (bits 10,11,12,13,14)
836        // bits 15..32 from "solid below last slab": 0xffff_8000
837        // Combined: 0xffff_fc00.
838        assert_eq!(
839            bits[0], 0xffff_fc00,
840            "word 0 want 0xffff_fc00 got 0x{:08x}",
841            bits[0]
842        );
843        // Words 1..7 should all be 0xffff_ffff (fully solid).
844        for (i, w) in bits.iter().enumerate().skip(1) {
845            assert_eq!(*w, 0xffff_ffff, "word {i} want -1 got 0x{:08x}", *w);
846        }
847    }
848
849    /// Build a 4×4 synthetic world with a flat floor at z=20..=24,
850    /// run lightmode-1 update_lighting over the centre 2×2, and
851    /// verify (a) brightness bytes were rewritten, (b) the result
852    /// is in `[0, 255]` for every shaded voxel, (c) the brightness
853    /// is uniform within each (x, y) column at the same z (since
854    /// lightmode-1 depends only on the surface normal).
855    #[test]
856    fn lightmode1_bakes_brightness_into_visible_voxels() {
857        // 4×4 world, single slab at z=20..=24, sentinel column ends.
858        let vsid: u32 = 4;
859        let mut col = vec![0u8, 20, 24, 0]; // header: nextptr=0, z1=20, z2=24
860        for _ in 20..=24 {
861            // 5 voxel records, alpha pre-set to 0xab so we can verify
862            // they got rewritten.
863            col.extend([0x10, 0x20, 0x30, 0xab]);
864        }
865        let col_len = col.len() as u32;
866        let mut data = Vec::new();
867        let mut offsets = vec![0u32; (vsid * vsid + 1) as usize];
868        for i in 0..(vsid * vsid) {
869            offsets[i as usize] = data.len() as u32;
870            data.extend_from_slice(&col);
871        }
872        offsets[(vsid * vsid) as usize] = data.len() as u32;
873        assert_eq!(col_len as usize * (vsid * vsid) as usize, data.len());
874
875        update_lighting(
876            &mut data,
877            &offsets,
878            vsid,
879            1,
880            1,
881            0,
882            3,
883            3,
884            30, // bbox 1..=2 in xy, z 0..30
885            1,  // lightmode 1
886            &[],
887        );
888
889        // Pull every voxel record's alpha byte from the centre
890        // (1, 1) column. Should all be in [0, 255] and ≠ 0xab.
891        let off1 = offsets[(1 * vsid + 1) as usize] as usize;
892        let alphas: Vec<u8> = (0..5).map(|i| data[off1 + 4 + i * 4 + 3]).collect();
893        for (i, &a) in alphas.iter().enumerate() {
894            assert_ne!(a, 0xab, "alpha[{i}] not rewritten");
895        }
896        // The shading should be mostly bright — flat-floor voxels
897        // have ~vertical normals so `(tp.y*0.5 + tp.z)*64 + 103.5`
898        // ≈ 1.0*64 + 103.5 = 167.5.
899        for (i, &a) in alphas.iter().enumerate() {
900            assert!(
901                a > 100,
902                "alpha[{i}]={a} should be on the bright side for top-of-floor voxels"
903            );
904        }
905    }
906
907    /// lightmode-2 with one nearby light should darken voxels on
908    /// the away side relative to the toward side. Use a 5×5 world
909    /// with a flat floor and place a light such that it's on the
910    /// +x side of the centre column — the +x face voxel's neighbour
911    /// columns should end up brighter than the -x.
912    #[test]
913    fn lightmode2_with_light_produces_per_column_variation() {
914        let vsid: u32 = 5;
915        let mut col = vec![0u8, 20, 24, 0];
916        for _ in 20..=24 {
917            col.extend([0x10, 0x20, 0x30, 0]);
918        }
919        let mut data = Vec::new();
920        let mut offsets = vec![0u32; (vsid * vsid + 1) as usize];
921        for i in 0..(vsid * vsid) {
922            offsets[i as usize] = data.len() as u32;
923            data.extend_from_slice(&col);
924        }
925        offsets[(vsid * vsid) as usize] = data.len() as u32;
926
927        let lights = [LightSrc {
928            // World coords: light right next to (4, 2, 20).
929            pos: [4.0, 2.0, 20.0],
930            r2: 50.0 * 50.0,
931            sc: 64.0,
932        }];
933        update_lighting(&mut data, &offsets, vsid, 0, 0, 0, 5, 5, 30, 2, &lights);
934
935        // Sample the alpha at the top-floor voxel of each column
936        // along y=2. Closer-to-light columns should be brighter.
937        let alpha_at = |x: u32, z_idx: usize| {
938            let off = offsets[(2 * vsid + x) as usize] as usize;
939            data[off + 4 + z_idx * 4 + 3]
940        };
941        let close = alpha_at(4, 0); // closest column to light
942        let far = alpha_at(0, 0); // farthest
943        assert!(
944            close >= far,
945            "column nearer the light should be ≥ as bright as the far one (close={close} far={far})"
946        );
947    }
948
949    /// Empty column ([0, 0, 0, ...]) — no slabs. After
950    /// expandbit256, all 256 bits = 0 (full air).
951    #[test]
952    fn empty_column_all_air() {
953        let col = vec![0u8, 0, 0, 0]; // single-slab header at z=0..0, no body
954        let mut bits = [0u32; 8];
955        expandbit256(&col, &mut bits);
956        // bit 0 from "air→solid transition at z=0", but only bit 0
957        // is set within the slab range [0, 0+1). Then "solid below"
958        // fills bits 1..256.
959        // Actually for sz0=sz1=0: voxel record is z=0..0 inclusive
960        // (0 voxels). The bit pattern is 1 set bit at z=0 then
961        // solid below.
962        // word 0: bit 0 set, bits 1..32 set ⇒ 0xffff_ffff.
963        assert_eq!(
964            bits[0], 0xffff_ffff,
965            "empty column word 0 want all-1 got 0x{:08x}",
966            bits[0]
967        );
968    }
969}