Skip to main content

roxlap_core/
world_lighting.rs

1//! World-voxel lighting bake.
2//!
3//! Walks every visible voxel inside a 3D bounding box and writes its
4//! per-voxel brightness byte (the high byte of the packed colour, which
5//! the renderer multiplies into the RGB — see [`crate::dda`]'s `shade`)
6//! from the engine's current `LightSrc` set + lightmode.
7//!
8//! Two modes:
9//! - `lightmode == 1`: cheap directional bake — every voxel gets
10//!   shading from a single fixed sun direction:
11//!   `(n.y * 0.5 + n.z) * 64 + 103.5` clamped to `[0, 255]`.
12//! - `lightmode == 2`: per-light point-light bake — for each light in
13//!   range, subtract `g * h * sc`, where `g = 1/(d·d²) - 1/(r·r²)`
14//!   (cube-falloff with a hard cutoff at radius `r`) and
15//!   `h = surface_normal · light_delta` (front-lit faces contribute;
16//!   back faces are skipped). Subtracted from a base
17//!   `(n.y * 0.5 + n.z) * 16 + 47.5`.
18//!
19//! The surface normal `n` comes from [`EstNormCache::estnorm`] — the
20//! occupancy gradient of a voxel's 5×5×5 neighbourhood.
21
22#![allow(
23    clippy::cast_possible_truncation,
24    clippy::cast_possible_wrap,
25    clippy::cast_sign_loss,
26    clippy::cast_precision_loss,
27    clippy::similar_names,
28    clippy::too_many_arguments,
29    clippy::too_many_lines,
30    clippy::doc_markdown,
31    clippy::many_single_char_names,
32    clippy::must_use_candidate,
33    clippy::unnecessary_cast,
34    clippy::cast_lossless,
35    clippy::needless_bool_assign,
36    clippy::needless_range_loop,
37    clippy::no_effect,
38    clippy::identity_op,
39    clippy::if_not_else
40)]
41
42use rayon::prelude::*;
43
44use crate::engine::LightSrc;
45
46/// World z is one byte → `0..MAXZDIM` (256) voxels tall.
47pub(crate) const MAXZDIM: i32 = 256;
48
49/// Estnorm neighbourhood radius. The surface normal at a voxel is
50/// estimated from the solid/air pattern in the surrounding
51/// `(2*RAD+1)³ = 5×5×5` cube.
52pub(crate) const ESTNORMRAD: i32 = 2;
53
54/// AO.2 — ambient-occlusion sampling radius (≤ `ESTNORMRAD`, so the same
55/// cache padding suffices). The per-exposed-face method (see
56/// [`EstNormCache::ambient_occlusion`]) is concave-only at any radius; this
57/// just sets how far a concave contact's darkening reaches. `1` keeps it a
58/// tight 1-voxel edge; raise for a wider contact band.
59pub(crate) const AO_RAD: i32 = 1;
60const _: () = assert!(AO_RAD <= ESTNORMRAD);
61
62/// AO.0 — how dark a fully-occluded voxel gets, as a fraction removed from
63/// the open-voxel ambient. `0.8` ⇒ a deep crevice keeps 20% of the ambient.
64pub(crate) const AO_STRENGTH: f32 = 0.8;
65
66/// AO.2 — tunable ambient-occlusion bake parameters (the `lightmode == 3`
67/// knobs). [`Default`] matches the AO.0/AO.1 constants.
68#[derive(Clone, Copy, Debug)]
69pub struct AoParams {
70    /// Fraction of ambient removed at full occlusion (`0` = off, `1` = black
71    /// crevices before `min_floor`).
72    pub strength: f32,
73    /// Sampling reach in voxels (clamped to `ESTNORMRAD`). `1` = tight edge.
74    pub radius: i32,
75    /// Lower bound on the darkening factor — crevices never dim below
76    /// `min_floor` of the open ambient (`0` ⇒ governed by `strength` alone).
77    pub min_floor: f32,
78}
79
80impl Default for AoParams {
81    fn default() -> Self {
82        Self {
83            strength: AO_STRENGTH,
84            radius: AO_RAD,
85            min_floor: 0.0,
86        }
87    }
88}
89
90/// `bits k..31 set, low k bits clear` (`!0 << k`). Used by
91/// [`expandbit256`] to fill from an air→solid transition up to the
92/// top of a 32-bit word.
93pub(crate) const fn xbsflor(k: usize) -> u32 {
94    if k >= 32 {
95        0
96    } else {
97        (-1i32 << k) as u32
98    }
99}
100
101/// `~xbsflor[k]` — low `k` bits set. Fills from the bottom of a word
102/// up to a solid→air transition.
103pub(crate) const fn xbsceil(k: usize) -> u32 {
104    !xbsflor(k)
105}
106
107/// Decode a `.vxl` slab column into a 256-bit "voxel solid" bitset,
108/// low-bit-first / low-z-first.
109///
110/// The output `bits` is a `[u32; 8]` (= 256 bits = `MAXZDIM` z
111/// levels); bit `z` is set iff the voxel at depth `z` in this column is
112/// solid (including the hidden interior between a slab's coloured top
113/// and the next slab). This is a straight read of the `.vxl` column
114/// layout: each slab record's byte 1 is its top z (air→solid) and byte
115/// 3 the next slab's bottom (solid→air). Whole 32-bit words between
116/// transitions are flushed as all-air (`0`) or all-solid (`!0`); the
117/// word holding a transition gets a partial mask via
118/// [`xbsflor`] / [`xbsceil`].
119pub(crate) fn expandbit256(column: &[u8], bits: &mut [u32; 8]) {
120    let mut src_idx: usize = 0;
121    let mut dst_idx: usize = 0;
122    let mut bitpos: i32 = 32;
123    let mut word: u32 = 0;
124    let nbits: i32 = (bits.len() as i32) * 32;
125
126    // First iteration: jump straight to the v[1] transition (no
127    // preceding slab whose v[3] we'd need to flush).
128    let mut next_len: i32;
129    let mut delta: i32;
130    let mut go_to_v3 = false;
131
132    'outer: loop {
133        if go_to_v3 {
134            // v[3] : solid → air transition.
135            if src_idx + 3 >= column.len() {
136                break;
137            }
138            delta = i32::from(column[src_idx + 3]) - bitpos;
139            while delta >= 0 {
140                if dst_idx >= bits.len() {
141                    break 'outer;
142                }
143                bits[dst_idx] = word;
144                dst_idx += 1;
145                word = u32::MAX;
146                bitpos += 32;
147                delta -= 32;
148            }
149            word &= xbsceil((delta + 32) as usize);
150        }
151        go_to_v3 = true;
152
153        // v[1] : air → solid transition.
154        if src_idx + 1 >= column.len() {
155            break;
156        }
157        delta = i32::from(column[src_idx + 1]) - bitpos;
158        while delta >= 0 {
159            if dst_idx >= bits.len() {
160                break 'outer;
161            }
162            bits[dst_idx] = word;
163            dst_idx += 1;
164            word = 0;
165            bitpos += 32;
166            delta -= 32;
167        }
168        word |= xbsflor((delta + 32) as usize);
169
170        next_len = i32::from(column[src_idx]);
171        if next_len == 0 {
172            break;
173        }
174        src_idx += (next_len as usize) * 4;
175    }
176
177    // Pad the rest of the buffer with `word`'s tail value (in C the
178    // post-loop word is whatever the last `v[1]` partial-set
179    // produced; remaining whole-words flush as solid `-1`).
180    if bitpos <= nbits {
181        while dst_idx < bits.len() {
182            bits[dst_idx] = word;
183            dst_idx += 1;
184            word = u32::MAX;
185        }
186    }
187}
188
189/// Read bit `z` (`0..256`) of a `[u32; 8]` z-column bitset.
190#[inline]
191pub(crate) fn bit256(bits: &[u32; 8], z: usize) -> bool {
192    (bits[z >> 5] >> (z & 31)) & 1 != 0
193}
194
195/// Per-column solid/air bitset grid covering a 2D bounding region —
196/// `(x1 - x0 + 2*RAD) × (y1 - y0 + 2*RAD)` columns. Decoding each
197/// column to a bitset once turns the estnorm 5×5×5 neighbourhood query
198/// into O(1) bit tests. A 448×448 bake (extending to 452×452 with
199/// padding) needs about 6.4 MB.
200#[allow(dead_code)] // vsid field/method preserved for inspection
201pub struct EstNormCache {
202    /// Per-column bit arrays. `bits[yidx * width + xidx]` is the
203    /// solid/air bitset of column `(origin_x + xidx, origin_y + yidx)`.
204    bits: Vec<[u32; 8]>,
205    /// Top-left of the cache window in world coords (= original
206    /// `x0 - RAD`).
207    origin_x: i32,
208    origin_y: i32,
209    /// Cached-region width (= `x1 - x0 + 2 * RAD`).
210    width: usize,
211    /// Reserved for symmetric debugging — kept so the cache layout
212    /// can be inspected without recomputing from `bits.len()`.
213    #[allow(dead_code)]
214    height: usize,
215    /// Voxel-grid limit (= `vsid`) used for out-of-bounds clamps.
216    vsid: i32,
217    /// AO cross-chunk z continuity (stacked grids, S4B.6). When
218    /// non-empty (a z-aware build via [`Self::build_with_reader_z`]),
219    /// these hold the solidity of the `ESTNORMRAD` voxels just outside
220    /// the `[0, MAXZDIM)` z-window — read from the chunks stacked above
221    /// (`chz-1`, world-z above) and below (`chz+1`, world-z below). Bit
222    /// `i` of `z_below[col]` ⇒ the voxel at `z = -1 - i` is solid; bit
223    /// `i` of `z_above[col]` ⇒ the voxel at `z = MAXZDIM + i` is solid.
224    /// Empty ⇒ single-layer bake, [`Self::solid`] uses the implicit
225    /// air-above / bedrock-below boundary (unchanged).
226    z_below: Vec<u8>,
227    z_above: Vec<u8>,
228}
229
230impl EstNormCache {
231    /// Build the bit-grid cache covering the bounding region
232    /// `[x0..x1) × [y0..y1)` extended by `ESTNORMRAD` padding on
233    /// each side. Calling [`Self::estnorm`] for any `(x, y)` inside
234    /// the original `[x0..x1) × [y0..y1)` box is then a pure read.
235    ///
236    /// Wraps [`Self::build_with_reader`] with a flat-table closure.
237    #[must_use]
238    pub fn build(
239        world_data: &[u8],
240        column_offsets: &[u32],
241        vsid: u32,
242        x0: i32,
243        y0: i32,
244        x1: i32,
245        y1: i32,
246    ) -> Self {
247        let vsid_i = vsid as i32;
248        let reader = |x: i32, y: i32| -> Option<&[u8]> {
249            if (x | y) < 0 || x >= vsid_i || y >= vsid_i {
250                return None;
251            }
252            let col_idx = (y as u32) * vsid + (x as u32);
253            let off_start = column_offsets[col_idx as usize] as usize;
254            // Slice to end-of-buffer; the slab walker self-
255            // terminates via nextptr.
256            Some(&world_data[off_start..])
257        };
258        let mut cache = Self::build_with_reader(reader, x0, y0, x1, y1);
259        cache.vsid = vsid_i;
260        cache
261    }
262
263    /// Z-aware variant of [`Self::build_with_reader`] for **stacked
264    /// grids**. `column_reader(x, y, chz_delta)` returns the slab bytes
265    /// of the column at cache-XY `(x, y)` in the chunk `chz_delta`
266    /// layers away in z (`0` = the target layer, `-1` = the layer above
267    /// in world-z, `+1` = below), or `None` for implicit-air / missing.
268    ///
269    /// The `chz_delta == 0` reads build the in-plane cache exactly like
270    /// [`Self::build_with_reader`]; the `±1` reads populate the
271    /// [`Self::z_below`] / [`Self::z_above`] overlays so [`Self::solid`]
272    /// — and therefore [`Self::ambient_occlusion`] / [`Self::estnorm`] —
273    /// see the neighbouring chunk's voxels across the z-seam instead of
274    /// the implicit air-above / bedrock-below boundary. Where a z-
275    /// neighbour is absent the overlay falls back to that same boundary
276    /// (air above, solid below), so a topmost/bottommost chunk bakes
277    /// identically to the single-layer path.
278    #[must_use]
279    pub fn build_with_reader_z<'r>(
280        column_reader: impl Fn(i32, i32, i32) -> Option<&'r [u8]>,
281        x0: i32,
282        y0: i32,
283        x1: i32,
284        y1: i32,
285    ) -> Self {
286        let mut cache = Self::build_with_reader(|x, y| column_reader(x, y, 0), x0, y0, x1, y1);
287
288        let n = cache.bits.len();
289        let mut z_below = vec![0u8; n];
290        let mut z_above = vec![0u8; n];
291        // `chz_delta = -1` is the chunk above in world-z; its bottom-most
292        // world-z voxels (its z-local `MAXZDIM-1, MAXZDIM-2`) sit at our
293        // `z = -1, -2`. `chz_delta = +1` is below; its top voxels (z-local
294        // `0, 1`) sit at our `z = MAXZDIM, MAXZDIM+1`.
295        let pad = ESTNORMRAD as usize;
296        for yi in 0..cache.height {
297            let y = cache.origin_y + yi as i32;
298            for xi in 0..cache.width {
299                let x = cache.origin_x + xi as i32;
300                let col = yi * cache.width + xi;
301
302                if let Some(column) = column_reader(x, y, -1) {
303                    let mut tmp = [0u32; 8];
304                    expandbit256(column, &mut tmp);
305                    for i in 0..pad {
306                        // world z = -1 - i  ⇐  neighbour z-local MAXZDIM-1-i.
307                        if bit256(&tmp, (MAXZDIM as usize) - 1 - i) {
308                            z_below[col] |= 1 << i;
309                        }
310                    }
311                }
312                // Absent neighbour above ⇒ leave bits clear (air), matching
313                // the implicit `z < 0 → air` boundary.
314
315                if let Some(column) = column_reader(x, y, 1) {
316                    let mut tmp = [0u32; 8];
317                    expandbit256(column, &mut tmp);
318                    for i in 0..pad {
319                        // world z = MAXZDIM + i  ⇐  neighbour z-local i.
320                        if bit256(&tmp, i) {
321                            z_above[col] |= 1 << i;
322                        }
323                    }
324                } else {
325                    // Absent neighbour below ⇒ solid (bedrock), matching the
326                    // implicit `z >= MAXZDIM → solid` boundary.
327                    z_above[col] = ((1u32 << pad) - 1) as u8;
328                }
329            }
330        }
331
332        cache.z_below = z_below;
333        cache.z_above = z_above;
334        cache
335    }
336
337    /// S4B.4.b: chunk-aware cache build. The closure
338    /// `column_reader(x, y)` returns the slab bytes of the column
339    /// at world-or-grid-local position `(x, y)`, or `None` for an
340    /// implicit-air / out-of-grid column (matching `build`'s OOB
341    /// "treat as full air" semantics).
342    ///
343    /// No vsid bound — the reader owns OOB handling. Per-chunk
344    /// bakes use a closure that resolves `(x, y)` to a neighbour
345    /// chunk via `Grid::chunk(IVec3)` so the 2-voxel padding
346    /// extends seamlessly across chunk boundaries.
347    ///
348    /// The cache's [`Self::vsid`] field is left at `0` for chunk-
349    /// aware builds — the field is dead-code anyway, preserved
350    /// only for inspection.
351    #[must_use]
352    pub fn build_with_reader<'r>(
353        column_reader: impl Fn(i32, i32) -> Option<&'r [u8]>,
354        x0: i32,
355        y0: i32,
356        x1: i32,
357        y1: i32,
358    ) -> Self {
359        let rad = ESTNORMRAD;
360        let pad_x0 = x0 - rad;
361        let pad_y0 = y0 - rad;
362        let pad_x1 = x1 + rad;
363        let pad_y1 = y1 + rad;
364        let width = (pad_x1 - pad_x0) as usize;
365        let height = (pad_y1 - pad_y0) as usize;
366
367        let mut bits = vec![[0u32; 8]; width * height];
368        for yi in 0..height {
369            let y = pad_y0 + yi as i32;
370            for xi in 0..width {
371                let x = pad_x0 + xi as i32;
372                if let Some(column) = column_reader(x, y) {
373                    expandbit256(column, &mut bits[yi * width + xi]);
374                }
375                // None → leave the cache slot zeroed (treat as full
376                // air), matching `build`'s OOB behaviour.
377            }
378        }
379
380        Self {
381            bits,
382            origin_x: pad_x0,
383            origin_y: pad_y0,
384            width,
385            height,
386            vsid: 0,
387            z_below: Vec::new(),
388            z_above: Vec::new(),
389        }
390    }
391
392    /// Whether the voxel at cache-column `(xi, yi)`, depth `z` is solid.
393    /// Out of the `[0, MAXZDIM)` z range: by default everything above
394    /// the world is air and everything below is solid (bedrock); a
395    /// z-aware build ([`Self::build_with_reader_z`]) instead reads the
396    /// stacked-neighbour chunk's voxels for the first `ESTNORMRAD`
397    /// levels past each boundary (the `z_below` / `z_above` overlays),
398    /// so AO is continuous across a chunk z-seam.
399    #[inline]
400    fn solid(&self, xi: usize, yi: usize, z: i32) -> bool {
401        if z < 0 {
402            let i = (-1 - z) as usize;
403            if !self.z_below.is_empty() && i < ESTNORMRAD as usize {
404                return (self.z_below[yi * self.width + xi] >> i) & 1 != 0;
405            }
406            return false;
407        }
408        if z >= MAXZDIM {
409            let i = (z - MAXZDIM) as usize;
410            if !self.z_above.is_empty() && i < ESTNORMRAD as usize {
411                return (self.z_above[yi * self.width + xi] >> i) & 1 != 0;
412            }
413            return true;
414        }
415        let col = &self.bits[yi * self.width + xi];
416        let z = z as usize;
417        (col[z >> 5] >> (z & 31)) & 1 != 0
418    }
419
420    /// Estimate the surface orientation at solid voxel `(x, y, z)` as
421    /// the **occupancy gradient** of its 5×5×5 neighbourhood:
422    ///
423    /// ```text
424    /// n = Σ_{solid neighbours} offset,   normal = n / |n|
425    /// ```
426    ///
427    /// (the sum runs over `offset ∈ [-2, 2]³`). `n` points toward the
428    /// denser (solid) side; the lighting formulas in [`update_lighting`]
429    /// are calibrated to that orientation. On a flat surface the solid
430    /// half-space cancels laterally and leaves `n` along the inward
431    /// axis. An all-solid or all-air neighbourhood gives `n = 0` →
432    /// `(0, 0, 0)`, which the lighting math treats as unlit.
433    ///
434    /// `(x, y)` must lie inside the cache's `[x0..x1) × [y0..y1)` region
435    /// (the padded border supplies the ±2 neighbours); `z` is
436    /// unconstrained.
437    #[must_use]
438    #[allow(clippy::cast_precision_loss)]
439    pub fn estnorm(&self, x: i32, y: i32, z: i32) -> [f32; 3] {
440        let cx = (x - self.origin_x) as i32;
441        let cy = (y - self.origin_y) as i32;
442
443        let mut nx = 0i32;
444        let mut ny = 0i32;
445        let mut nz = 0i32;
446        for dy in -ESTNORMRAD..=ESTNORMRAD {
447            let yi = (cy + dy) as usize;
448            for dx in -ESTNORMRAD..=ESTNORMRAD {
449                let xi = (cx + dx) as usize;
450                for dz in -ESTNORMRAD..=ESTNORMRAD {
451                    if self.solid(xi, yi, z + dz) {
452                        nx += dx;
453                        ny += dy;
454                        nz += dz;
455                    }
456                }
457            }
458        }
459
460        let len_sq = nx * nx + ny * ny + nz * nz;
461        if len_sq == 0 {
462            return [0.0, 0.0, 0.0];
463        }
464        let inv = 1.0 / (len_sq as f32).sqrt();
465        [nx as f32 * inv, ny as f32 * inv, nz as f32 * inv]
466    }
467
468    /// AO.2 — ambient occlusion at solid voxel `(x, y, z)`. Returns `0.0`
469    /// (open, e.g. a flat floor under open sky, or a convex edge) … `1.0`
470    /// (fully enclosed).
471    ///
472    /// **Per-exposed-face**, normal-free: for each of the 6 axis faces whose
473    /// immediate neighbour is air (an exposed face), sample the `±AO_RAD`
474    /// half-space **in front of that face** (`offset · face_dir > 0`) and
475    /// measure how much of it is solid (inverse-distance weighted). This only
476    /// fires at **concave** corners — a perpendicular solid sitting in front of
477    /// an exposed face. Flat faces and **convex** edges read `0` (the space in
478    /// front of every exposed face is air). Crucially it does **not** use the
479    /// `estnorm` gradient normal, which tilts near a convex edge and would make
480    /// a voxel's own folded-over surface (e.g. a pillar's top above its side
481    /// face) count as occlusion — the "pillow" border on every edge.
482    /// `radius` is the sampling reach (clamped to `ESTNORMRAD`, the cache's
483    /// padding); `1` = a tight 1-voxel concave edge, `2` = a wider contact.
484    #[must_use]
485    #[allow(clippy::cast_precision_loss)]
486    pub fn ambient_occlusion(&self, x: i32, y: i32, z: i32, radius: i32) -> f32 {
487        const FACES: [[i32; 3]; 6] = [
488            [-1, 0, 0],
489            [1, 0, 0],
490            [0, -1, 0],
491            [0, 1, 0],
492            [0, 0, -1],
493            [0, 0, 1],
494        ];
495        let r = radius.clamp(1, ESTNORMRAD);
496        let cx = (x - self.origin_x) as i32;
497        let cy = (y - self.origin_y) as i32;
498        let mut occ = 0.0f32;
499        let mut total = 0.0f32;
500        for f in FACES {
501            // Only exposed faces (immediate neighbour is air) contribute.
502            if self.solid((cx + f[0]) as usize, (cy + f[1]) as usize, z + f[2]) {
503                continue;
504            }
505            for dy in -r..=r {
506                for dx in -r..=r {
507                    for dz in -r..=r {
508                        // Only the half-space strictly in front of this face.
509                        if dx * f[0] + dy * f[1] + dz * f[2] <= 0 {
510                            continue;
511                        }
512                        let d = [dx as f32, dy as f32, dz as f32];
513                        let w = 1.0 / (d[0] * d[0] + d[1] * d[1] + d[2] * d[2]).sqrt();
514                        total += w;
515                        if self.solid((cx + dx) as usize, (cy + dy) as usize, z + dz) {
516                            occ += w;
517                        }
518                    }
519                }
520            }
521        }
522        if total <= 0.0 {
523            0.0
524        } else {
525            occ / total
526        }
527    }
528
529    /// Voxel-grid limit; used by callers to bound their iteration.
530    #[must_use]
531    #[allow(dead_code)]
532    pub(crate) fn vsid(&self) -> i32 {
533        self.vsid
534    }
535}
536
537/// Bake per-voxel lighting into the world's brightness bytes.
538/// Bakes per-voxel brightness over a 3D bounding box.
539///
540/// Walks every visible voxel inside `[x0..x1) × [y0..y1) ×
541/// [z0..z1)` and rewrites its alpha byte (the brightness channel
542/// the rasterizer mulhi'es against `kv6colmul` modulators) under
543/// the current `lightmode` + `lights` state.
544///
545/// - `lightmode == 0`: no-op (fast return).
546/// - `lightmode == 1`: directional sun-style bake — every visible
547///   voxel gets `(tp.y * 0.5 + tp.z) * 64 + 103.5` clamped to
548///   `[0, 255]` from its surface normal `tp`.
549/// - `lightmode == 2`: per-light Lambertian bake — base
550///   `(tp.y * 0.5 + tp.z) * 16 + 47.5` minus, for each light in
551///   range with surface normal facing it, `g * h * sc` where
552///   `g = 1/(d·d²) - 1/(r·r²)` (cube falloff with hard radius
553///   cutoff) and `h = tp · light_delta`.
554/// - `lightmode == 3` (AO): bake **ambient occlusion** into the byte
555///   (the DL ambient/AO channel) — open voxels keep `128`, crevices /
556///   inside corners darken (see [`EstNormCache::ambient_occlusion`]).
557///   The retro stylized lighting reads this byte as its ambient fill.
558///
559/// The bbox is padded by `ESTNORMRAD` on each side internally
560/// to give estnorm enough neighbourhood; that's done here too.
561/// `lights` should match the engine's full `vx5.lightsrc[]` —
562/// the function does its own per-tile range filtering.
563///
564/// Mutates `world_data` in place. Caller is responsible for any
565/// `column_offsets` / `vsid` invariants.
566pub fn update_lighting(
567    world_data: &mut [u8],
568    column_offsets: &[u32],
569    vsid: u32,
570    x0: i32,
571    y0: i32,
572    z0: i32,
573    x1: i32,
574    y1: i32,
575    z1: i32,
576    lightmode: u32,
577    lights: &[LightSrc],
578) {
579    if lightmode == 0 {
580        return;
581    }
582    let vsid_i = vsid as i32;
583    let x0p = (x0 - ESTNORMRAD).max(0);
584    let y0p = (y0 - ESTNORMRAD).max(0);
585    let z0p = (z0 - ESTNORMRAD).max(0);
586    let x1p = (x1 + ESTNORMRAD).min(vsid_i);
587    let y1p = (y1 + ESTNORMRAD).min(vsid_i);
588    let z1p = (z1 + ESTNORMRAD).min(MAXZDIM);
589    if x0p >= x1p || y0p >= y1p || z0p >= z1p {
590        return;
591    }
592
593    // Build the cache once for the whole padded bake region.
594    // The bake is tiled into 64×64 chunks with a per-tile
595    // `lightlst` filter; for our (one-shot bake) use case the
596    // full-region filter computed inside the per-voxel loop is
597    // simpler and not measurably slower at oracle bake sizes.
598    let cache = EstNormCache::build(world_data, column_offsets, vsid, x0p, y0p, x1p, y1p);
599
600    // Per-light precomputed `lightsub[i] = 1 / (sqrt(r2) * r2)` —
601    // the radius-cutoff bias that makes the light contribution go
602    // to exactly zero at distance == sqrt(r2).
603    let lightsub: Vec<f32> = lights.iter().map(|l| 1.0 / (l.r2.sqrt() * l.r2)).collect();
604
605    // R12.4.1: parallelise the per-row bake via rayon. Each `(x, y)`
606    // pair maps to a unique column slice in `world_data`
607    // (`column_offsets[col_idx]..[col_idx + 1]` ranges are pairwise
608    // disjoint — the voxalloc allocator's invariant). Rows split
609    // cleanly across worker threads; per-row x-loops stay serial to
610    // amortise rayon's per-task overhead. Speedup follows
611    // `RAYON_NUM_THREADS` (set `=1` to disable).
612    //
613    // Lighting bakes are typically rare (one-shot at scene load) but
614    // dynamic-lighting / per-edit relighting use cases call
615    // `update_lighting` per frame — at which point the parallel
616    // path matters for interactive responsiveness.
617    // Per-column byte extents `(start, end)`. After voxalloc-driven
618    // edits (e.g. cave-gen's heavy `set_spans` carve, or runtime
619    // bullet-impact carves), columns are scattered in the slab
620    // pool, so `column_offsets[i+1]` is NOT column `i`'s end byte
621    // — walk each column's slab chain via `slng()` to
622    // recover length. We pre-compute extents here serially before
623    // moving `world_data` into the parallel mutable view; the
624    // slng walk is O(slab_count) per column, typically 1-3 slabs.
625    //
626    // **Region-bounded**: only the bake rectangle `[x0p..x1p) ×
627    // [y0p..y1p)` needs extents — the per-row body indexes only
628    // those columns. Sizing the table to `vsid²` is wasteful when
629    // a small chunk-sized region is baked against a large-vsid
630    // world (e.g. S4.1 scene-graph per-chunk bake against a
631    // vsid=4096 combined view — would have been 16M slng walks per
632    // chunk × 1024 chunks = 17B slng walks). The bake-region table
633    // collapses that to `bake_region` walks per call.
634    #[allow(clippy::cast_sign_loss)]
635    let region_w = (x1p - x0p) as usize;
636    #[allow(clippy::cast_sign_loss)]
637    let region_h = (y1p - y0p) as usize;
638    let mut column_extents: Vec<(usize, usize)> = Vec::with_capacity(region_w * region_h);
639    for yi in 0..region_h {
640        #[allow(clippy::cast_possible_wrap)]
641        let y = y0p + yi as i32;
642        for xi in 0..region_w {
643            #[allow(clippy::cast_possible_wrap)]
644            let x = x0p + xi as i32;
645            #[allow(clippy::cast_sign_loss)]
646            let col_idx = (y as u32) * vsid + (x as u32);
647            let start = column_offsets[col_idx as usize] as usize;
648            let end = start + roxlap_formats::vxl::slng(&world_data[start..]);
649            column_extents.push((start, end));
650        }
651    }
652
653    let world_view = WorldDataMutView::new(world_data);
654    let row_body = |y: i32| {
655        #[allow(clippy::cast_sign_loss)]
656        let yi = (y - y0p) as usize;
657        for x in x0p..x1p {
658            #[allow(clippy::cast_sign_loss)]
659            let xi = (x - x0p) as usize;
660            let (off_start, off_end) = column_extents[yi * region_w + xi];
661            // SAFETY: each (x, y) maps to a unique col_idx; column
662            // byte ranges `[off_start, off_end)` are pairwise
663            // disjoint across distinct `col_idx` (voxalloc's
664            // free-list invariant), so no two threads write to
665            // the same byte.
666            let column = unsafe { world_view.column_slice(off_start, off_end) };
667            // AO (lightmode 3) via this engine entry uses default params.
668            shade_column(
669                column,
670                x,
671                y,
672                z0p,
673                z1p,
674                lightmode,
675                lights,
676                &lightsub,
677                &cache,
678                AoParams::default(),
679            );
680        }
681    };
682
683    (y0p..y1p).into_par_iter().for_each(row_body);
684}
685
686/// S4B.4.b: per-chunk variant of [`update_lighting`].
687///
688/// Writes alpha bytes into one chunk's slab buffer; reads
689/// neighbour-chunk voxels through `column_reader` for `estnorm`'s
690/// 5×5×5 padding. The reader takes chunk-local `(x, y)` (which can
691/// extend `±ESTNORMRAD` past the chunk's `[0, target_vsid)` extent)
692/// and returns the column at that position — typically resolved
693/// through `Grid::chunk(IVec3)` so the bake gets seamless
694/// cross-chunk neighbourhood reads without materialising a stitched
695/// combined view (Approach C retirement, S4B.4.b).
696///
697/// `(x0, y0, z0, x1, y1, z1)` is the bake region in chunk-local
698/// coords (typically `(0, 0, 0)..(CHUNK_SIZE_XY, CHUNK_SIZE_XY,
699/// CHUNK_SIZE_Z)`). Writes clip to the target chunk's vsid; reads
700/// extend into neighbour chunks via the closure.
701///
702/// `lightmode`, `lights`, and the per-voxel arithmetic match
703/// [`update_lighting`]; only the cache build + write-region
704/// scoping differ.
705#[allow(clippy::too_many_arguments)]
706pub fn update_lighting_chunk<'r>(
707    target_data: &mut [u8],
708    target_column_offsets: &[u32],
709    target_vsid: u32,
710    x0: i32,
711    y0: i32,
712    z0: i32,
713    x1: i32,
714    y1: i32,
715    z1: i32,
716    column_reader: impl Fn(i32, i32) -> Option<&'r [u8]>,
717    lightmode: u32,
718    lights: &[LightSrc],
719) {
720    if lightmode == 0 {
721        return;
722    }
723    let target_vsid_i = target_vsid as i32;
724
725    // Padded region for the cache (cross-chunk reads via reader).
726    // Z clamps to [0, MAXZDIM) because each chunk's slab data is
727    // chunk-local in z. This XY-only reader leaves the top/bottom
728    // boundary at the implicit air-above / bedrock-below default;
729    // callers that bake stacked grids and want AO/estnorm continuous
730    // across the z-seam build the cache via
731    // [`EstNormCache::build_with_reader_z`] (a `chz`-aware reader) and
732    // call [`apply_lighting_with_cache`] directly. X/y intentionally
733    // don't clamp — the reader pulls from neighbour chunks via its own
734    // coord translation.
735    let z0p = (z0 - ESTNORMRAD).max(0);
736    let z1p = (z1 + ESTNORMRAD).min(MAXZDIM);
737    // Write region clipped to the target chunk's footprint.
738    let wx0 = x0.max(0);
739    let wy0 = y0.max(0);
740    let wx1 = x1.min(target_vsid_i);
741    let wy1 = y1.min(target_vsid_i);
742    if wx0 >= wx1 || wy0 >= wy1 || z0p >= z1p {
743        return;
744    }
745
746    let cache = EstNormCache::build_with_reader(column_reader, x0, y0, x1, y1);
747    apply_lighting_with_cache(
748        target_data,
749        target_column_offsets,
750        target_vsid,
751        wx0,
752        wy0,
753        z0p,
754        wx1,
755        wy1,
756        z1p,
757        &cache,
758        lightmode,
759        lights,
760        AoParams::default(),
761    );
762}
763
764/// S4B.4.b: write half of [`update_lighting_chunk`], split out so
765/// callers can build the [`EstNormCache`] separately (via
766/// [`EstNormCache::build_with_reader`]) and pass it in.
767///
768/// The split matters when the cache build needs an immutable grid
769/// borrow (for cross-chunk reads) and the write phase needs a
770/// mutable target-chunk borrow — the two can't coexist. The
771/// caller builds the cache first while holding the immutable
772/// borrow, drops it, then mutably borrows the target chunk and
773/// invokes this.
774///
775/// The `(x0..x1, y0..y1, z0..z1)` region must already be clipped
776/// to the target chunk's footprint (this helper does no clipping).
777/// `cache` must cover at least `[x0..x1) × [y0..y1)` (a `±ESTNORMRAD`
778/// padding is the caller's responsibility — typically built via
779/// `build_with_reader(.., x0, y0, x1, y1)` which adds the padding
780/// itself).
781#[allow(clippy::too_many_arguments)]
782pub fn apply_lighting_with_cache(
783    target_data: &mut [u8],
784    target_column_offsets: &[u32],
785    target_vsid: u32,
786    x0: i32,
787    y0: i32,
788    z0: i32,
789    x1: i32,
790    y1: i32,
791    z1: i32,
792    cache: &EstNormCache,
793    lightmode: u32,
794    lights: &[LightSrc],
795    ao: AoParams,
796) {
797    if lightmode == 0 || x0 >= x1 || y0 >= y1 || z0 >= z1 {
798        return;
799    }
800
801    let lightsub: Vec<f32> = lights.iter().map(|l| 1.0 / (l.r2.sqrt() * l.r2)).collect();
802
803    let region_w = (x1 - x0) as usize;
804    let region_h = (y1 - y0) as usize;
805    let mut column_extents: Vec<(usize, usize)> = Vec::with_capacity(region_w * region_h);
806    for yi in 0..region_h {
807        let y = y0 + yi as i32;
808        for xi in 0..region_w {
809            let x = x0 + xi as i32;
810            let col_idx = (y as u32) * target_vsid + (x as u32);
811            let start = target_column_offsets[col_idx as usize] as usize;
812            let end = start + roxlap_formats::vxl::slng(&target_data[start..]);
813            column_extents.push((start, end));
814        }
815    }
816
817    let world_view = WorldDataMutView::new(target_data);
818    let row_body = |y: i32| {
819        let yi = (y - y0) as usize;
820        for x in x0..x1 {
821            let xi = (x - x0) as usize;
822            let (off_start, off_end) = column_extents[yi * region_w + xi];
823            // SAFETY: per-column byte ranges are pairwise disjoint
824            // across distinct `(x, y)` (voxalloc invariant).
825            let column = unsafe { world_view.column_slice(off_start, off_end) };
826            shade_column(
827                column, x, y, z0, z1, lightmode, lights, &lightsub, cache, ao,
828            );
829        }
830    };
831
832    (y0..y1).into_par_iter().for_each(row_body);
833}
834
835/// Raw-pointer view of `world_data` so the parallel
836/// [`update_lighting`] body can hand out per-column `&mut [u8]`
837/// slices to multiple threads without each thread needing
838/// `&mut Vec<u8>` (which is exclusive). Constructed from a single
839/// `&mut [u8]` borrow at the start of the parallel section; the
840/// borrow's lifetime gates `WorldDataMutView`'s usable lifetime.
841///
842/// # Safety contract
843/// Callers that hand out concurrent `column_slice` references MUST
844/// guarantee the requested ranges are pairwise non-overlapping
845/// across threads. [`update_lighting`]'s call site relies on
846/// voxalloc's per-column-disjoint-byte-range invariant.
847struct WorldDataMutView<'a> {
848    ptr: *mut u8,
849    len: usize,
850    _marker: std::marker::PhantomData<&'a mut [u8]>,
851}
852
853// SAFETY: `WorldDataMutView` is morally a `&mut [u8]` re-exposed as
854// raw pointers. The disjoint-write invariant is enforced by the
855// caller; concurrent reads of `ptr` / `len` fields are race-free
856// (immutable scalar fields).
857unsafe impl Send for WorldDataMutView<'_> {}
858unsafe impl Sync for WorldDataMutView<'_> {}
859
860impl<'a> WorldDataMutView<'a> {
861    fn new(buf: &'a mut [u8]) -> Self {
862        Self {
863            ptr: buf.as_mut_ptr(),
864            len: buf.len(),
865            _marker: std::marker::PhantomData,
866        }
867    }
868
869    /// Carve out a sub-slice. Caller upholds the disjoint-write
870    /// invariant (see struct doc).
871    ///
872    /// # Safety
873    /// `off_start <= off_end <= self.len`, and the requested range
874    /// must not overlap with ranges concurrently held by other
875    /// threads.
876    unsafe fn column_slice(&self, off_start: usize, off_end: usize) -> &'a mut [u8] {
877        debug_assert!(off_start <= off_end, "column slice: start > end");
878        debug_assert!(off_end <= self.len, "column slice: end past buffer");
879        // SAFETY: caller asserts in-bounds + disjoint-from-other-threads.
880        unsafe { std::slice::from_raw_parts_mut(self.ptr.add(off_start), off_end - off_start) }
881    }
882}
883
884/// Walk one column's slab chain and shade every visible voxel
885/// inside `[z_lo, z_hi)`. Mirror of the inner loop in
886/// the per-voxel bake loop.
887#[allow(clippy::cast_lossless)]
888fn shade_column(
889    column: &mut [u8],
890    x: i32,
891    y: i32,
892    z_lo: i32,
893    z_hi: i32,
894    lightmode: u32,
895    lights: &[LightSrc],
896    lightsub: &[f32],
897    cache: &EstNormCache,
898    ao: AoParams,
899) {
900    let mut v_off: usize = 0;
901    // cstat = false ⇒ top-of-slab phase (floor colours); true ⇒
902    // ceiling-of-next-slab phase (bottom of current slab's solid
903    // mass, visible from the air pocket below).
904    let mut cstat = false;
905    loop {
906        let (sz0, sz1, voxel_byte_offset_signed): (i32, i32, isize);
907        if !cstat {
908            // Floor colours of the current slab. Voxel z=v[1]..=v[2].
909            // Alpha byte at offset (z - v[1]) * 4 + 7 from header
910            // (header is 4 bytes, voxel record is 4 bytes BGRA, +3
911            // for alpha). The formula encodes this as
912            // `(z << 2) + offs` with `offs = 7 - (v[1] << 2)`.
913            if v_off + 2 >= column.len() {
914                break;
915            }
916            let v1 = i32::from(column[v_off + 1]);
917            let v2 = i32::from(column[v_off + 2]);
918            sz0 = v1;
919            sz1 = v2 + 1;
920            voxel_byte_offset_signed = (v_off as isize) + 7 - ((sz0 as isize) << 2);
921            cstat = true;
922        } else {
923            // Ceiling colours of the next slab — must read v[0]
924            // BEFORE advancing v_off.
925            if v_off + 2 >= column.len() {
926                break;
927            }
928            let v0 = i32::from(column[v_off]);
929            let v1 = i32::from(column[v_off + 1]);
930            let v2 = i32::from(column[v_off + 2]);
931            let prev_offset = v2 - v1 - v0 + 2; // ceilnum from getcube convention
932            if v0 == 0 {
933                break;
934            }
935            v_off += (v0 as usize) * 4;
936            if v_off + 3 >= column.len() {
937                break;
938            }
939            let v3 = i32::from(column[v_off + 3]);
940            sz1 = v3;
941            sz0 = prev_offset + sz1;
942            voxel_byte_offset_signed = (v_off as isize) + 3 - ((sz1 as isize) << 2);
943            cstat = false;
944        }
945
946        let lo = sz0.max(z_lo);
947        let hi = sz1.min(z_hi);
948        for z in lo..hi {
949            // AO.0 — `lightmode == 3` bakes ambient occlusion into the byte
950            // (the DL ambient/AO channel; normal-free); other modes use the
951            // estnorm surface normal for the directional / point-light bake.
952            let brightness = if lightmode == 3 {
953                ao_byte(cache, x, y, z, ao)
954            } else {
955                let normal = cache.estnorm(x, y, z);
956                compute_brightness(x, y, z, normal, lightmode, lights, lightsub)
957            };
958            let byte_off = voxel_byte_offset_signed + ((z as isize) << 2);
959            if byte_off >= 0 && (byte_off as usize) < column.len() {
960                column[byte_off as usize] = brightness;
961            }
962        }
963    }
964}
965
966/// AO.0/AO.2 — map ambient occlusion to the brightness byte (the DL
967/// ambient/AO channel). Open voxels keep the neutral `128` (= full ambient,
968/// shader `byte/128 == 1.0`); occluded voxels darken by `params.strength`,
969/// never below `params.min_floor` of the open ambient.
970fn ao_byte(cache: &EstNormCache, x: i32, y: i32, z: i32, params: AoParams) -> u8 {
971    let ao = cache.ambient_occlusion(x, y, z, params.radius);
972    let factor = (1.0 - params.strength * ao).max(params.min_floor);
973    clamp_to_byte(128.0 * factor)
974}
975
976/// Per-voxel brightness math. Computes the `[0, 255]`
977/// alpha byte for one voxel from its surface normal `tp` + the
978/// light list.
979fn compute_brightness(
980    x: i32,
981    y: i32,
982    z: i32,
983    tp: [f32; 3],
984    lightmode: u32,
985    lights: &[LightSrc],
986    lightsub: &[f32],
987) -> u8 {
988    if lightmode < 2 {
989        // Directional path: single fixed sun direction
990        // direction baked into a hardcoded coefficient pair.
991        // i = (tp.y * 0.5 + tp.z) * 64 + 103.5, clamped to [0, 255].
992        let f = (tp[1] * 0.5 + tp[2]) * 64.0 + 103.5;
993        clamp_to_byte(f)
994    } else {
995        // Point-light path. Base brightness
996        // 47.5..63.5 + per-light front-face contribution.
997        let mut f = (tp[1] * 0.5 + tp[2]) * 16.0 + 47.5;
998        let xf = x as f32;
999        let yf = y as f32;
1000        let zf = z as f32;
1001        for (i, light) in lights.iter().enumerate() {
1002            let fx = light.pos[0] - xf;
1003            let fy = light.pos[1] - yf;
1004            let fz = light.pos[2] - zf;
1005            // tp · light_delta: positive ⇒ surface faces away from
1006            // light (back-lit, no contribution); negative ⇒ surface
1007            // faces light (front-lit, lambertian contribution).
1008            let h = tp[0] * fx + tp[1] * fy + tp[2] * fz;
1009            if h >= 0.0 {
1010                continue;
1011            }
1012            let g_sq = fx * fx + fy * fy + fz * fz;
1013            if g_sq >= light.r2 {
1014                continue;
1015            }
1016            // Cube-law falloff with a hard cutoff at the light radius:
1017            //   g = 1/d³ - 1/r³   (d = distance, r = radius)
1018            // so the contribution fades to exactly zero at `r`.
1019            let g = 1.0 / (g_sq * g_sq.sqrt()) - lightsub[i];
1020            f -= g * h * light.sc;
1021        }
1022        clamp_to_byte(f)
1023    }
1024}
1025
1026#[inline]
1027fn clamp_to_byte(f: f32) -> u8 {
1028    // Clamp the brightness into the `[0, 255]` byte range.
1029    if f >= 255.0 {
1030        255
1031    } else if f <= 0.0 {
1032        0
1033    } else {
1034        f as u8
1035    }
1036}
1037
1038#[cfg(test)]
1039mod tests {
1040    use super::*;
1041
1042    /// AO.2 — only **concave** edges occlude: a raised block on a floor has a
1043    /// **convex** top (flat + edge) that must stay open (AO ≈ 0), while the
1044    /// floor at its **concave** base darkens.
1045    #[test]
1046    fn ao_only_darkens_concave_not_convex() {
1047        let vsid: u32 = 10;
1048        let column = |z1: u8, z2: u8| -> Vec<u8> {
1049            let mut c = vec![0u8, z1, z2, 0];
1050            for _ in z1..=z2 {
1051                c.extend([0x20, 0x20, 0x20, 0x80]);
1052            }
1053            c
1054        };
1055        let floor = column(20, 20); // air z<20, solid z>=20 (bedrock below)
1056        let block = column(15, 15); // raised: air z<15, solid z>=15
1057        let mut data = Vec::new();
1058        let mut offsets = vec![0u32; (vsid * vsid + 1) as usize];
1059        for i in 0..(vsid * vsid) {
1060            offsets[i as usize] = data.len() as u32;
1061            let x = i % vsid;
1062            let y = i / vsid;
1063            // 3×3 raised block at x∈[3,5], y∈[3,5].
1064            let raised = (3..=5).contains(&x) && (3..=5).contains(&y);
1065            data.extend_from_slice(if raised { &block } else { &floor });
1066        }
1067        offsets[(vsid * vsid) as usize] = data.len() as u32;
1068        let cache = EstNormCache::build(&data, &offsets, vsid, 0, 0, vsid as i32, vsid as i32);
1069
1070        let ao = |x, y, z| cache.ambient_occlusion(x, y, z, AO_RAD);
1071        let top_center = ao(4, 4, 15); // convex flat top
1072        let top_edge = ao(3, 4, 15); // convex top edge
1073        let base = ao(2, 4, 20); // concave: floor at the block's base
1074        let flat = ao(0, 0, 20); // open flat floor
1075        assert!(flat < 0.01, "open flat floor must not occlude: {flat}");
1076        assert!(
1077            top_center < 0.01,
1078            "convex flat top must not occlude: {top_center}"
1079        );
1080        assert!(top_edge < 0.01, "convex edge must not occlude: {top_edge}");
1081        assert!(base > 0.1, "concave base must occlude: {base}");
1082
1083        // AO.2 — params: strength scales the darkening, min_floor clamps it.
1084        let p = |strength, min_floor| AoParams {
1085            strength,
1086            radius: AO_RAD,
1087            min_floor,
1088        };
1089        let off = ao_byte(&cache, 2, 4, 20, p(0.0, 0.0));
1090        assert_eq!(off, 128, "strength 0 ⇒ no darkening (full ambient)");
1091        let full = ao_byte(&cache, 2, 4, 20, p(1.0, 0.0));
1092        assert!(full < 128, "strength 1 darkens the concave voxel: {full}");
1093        // A high min_floor clamps the darkening factor up to ~0.8·128.
1094        let floored = ao_byte(&cache, 2, 4, 20, p(1.0, 0.8));
1095        assert!(
1096            floored > full && floored >= 100,
1097            "min_floor 0.8 clamps darkening to ≥ ~102: floored={floored} full={full}",
1098        );
1099    }
1100
1101    /// AO cross-chunk z-seam continuity (stacked grids). A solid voxel
1102    /// sitting in the chunk **above** — one level past the target chunk's
1103    /// top z-boundary — must count as occlusion for a side face at the
1104    /// boundary. The plain (`build_with_reader`) cache can't see it (the
1105    /// implicit `z < 0 → air` boundary), so a z-aware build
1106    /// (`build_with_reader_z`) must occlude **more**.
1107    #[test]
1108    fn ao_z_seam_reads_stacked_neighbour() {
1109        let mk = |z1: u8, z2: u8| -> Vec<u8> {
1110            let mut c = vec![0u8, z1, z2, 0];
1111            for _ in z1..=z2 {
1112                c.extend([0x20, 0x20, 0x20, 0x80]);
1113            }
1114            c
1115        };
1116        // Target layer (chz_delta 0): (1,1) solid from the top boundary
1117        // down; (2,1) a pit (air at z=0, solid z≥1) so (1,1)'s +x face is
1118        // exposed. The chunk above (chz_delta -1) has a solid voxel at its
1119        // bottom (z-local 255) over (2,1) — i.e. at our z = -1, in front of
1120        // that +x face. Everything else is implicit air (reader → None).
1121        let floor = mk(0, 0);
1122        let pit = mk(1, 255);
1123        let above = mk(255, 255);
1124        let reader = |x: i32, y: i32, dz: i32| -> Option<&[u8]> {
1125            match (x, y, dz) {
1126                (1, 1, 0) => Some(&floor),
1127                (2, 1, 0) => Some(&pit),
1128                (2, 1, -1) => Some(&above),
1129                _ => None,
1130            }
1131        };
1132
1133        let plain = EstNormCache::build_with_reader(|x, y| reader(x, y, 0), 0, 0, 3, 3);
1134        let zaware = EstNormCache::build_with_reader_z(reader, 0, 0, 3, 3);
1135
1136        let ao_plain = plain.ambient_occlusion(1, 1, 0, AO_RAD);
1137        let ao_z = zaware.ambient_occlusion(1, 1, 0, AO_RAD);
1138        assert!(
1139            ao_plain > 0.0,
1140            "the in-layer pit wall should already occlude a little: {ao_plain}"
1141        );
1142        assert!(
1143            ao_z > ao_plain + 0.01,
1144            "the solid across the z-seam must add occlusion: z-aware={ao_z} plain={ao_plain}"
1145        );
1146    }
1147
1148    /// AO.0 — a floor voxel beside a wall is more occluded (darker) than an
1149    /// open floor voxel; an open voxel reads ≈0 occlusion.
1150    #[test]
1151    fn ambient_occlusion_darkens_next_to_a_wall() {
1152        let vsid: u32 = 8;
1153        // Per-column slab: `[nextptr=0, z1, z2, 0]` + (z2-z1+1) BGRA records.
1154        let column = |z1: u8, z2: u8| -> Vec<u8> {
1155            let mut c = vec![0u8, z1, z2, 0];
1156            for _ in z1..=z2 {
1157                c.extend([0x20, 0x20, 0x20, 0x80]);
1158            }
1159            c
1160        };
1161        let floor = column(20, 20); // single floor voxel at z=20
1162        let wall = column(10, 20); // wall rising from z=10..20 (above the floor)
1163        let mut data = Vec::new();
1164        let mut offsets = vec![0u32; (vsid * vsid + 1) as usize];
1165        for i in 0..(vsid * vsid) {
1166            offsets[i as usize] = data.len() as u32;
1167            // Tall wall at column (5, 3); floor everywhere else.
1168            let col = if i == 3 * vsid + 5 { &wall } else { &floor };
1169            data.extend_from_slice(col);
1170        }
1171        offsets[(vsid * vsid) as usize] = data.len() as u32;
1172
1173        let cache = EstNormCache::build(&data, &offsets, vsid, 0, 0, vsid as i32, vsid as i32);
1174        // (4,3) sits next to the wall at (5,3); (2,3) is in the open.
1175        let near = cache.ambient_occlusion(4, 3, 20, AO_RAD);
1176        let open = cache.ambient_occlusion(2, 3, 20, AO_RAD);
1177        assert!(
1178            open < 0.05,
1179            "open floor voxel should be ~unoccluded: {open}"
1180        );
1181        assert!(
1182            near > open + 0.1,
1183            "voxel beside the wall must be more occluded: near={near} open={open}",
1184        );
1185    }
1186
1187    /// AO.0 — `lightmode == 3` bakes occlusion into the alpha byte: the
1188    /// floor voxel beside the wall ends up darker than an open one, which
1189    /// stays at the neutral 128 (full ambient).
1190    #[test]
1191    fn lightmode3_bakes_ambient_occlusion() {
1192        let vsid: u32 = 8;
1193        let column = |z1: u8, z2: u8| -> Vec<u8> {
1194            let mut c = vec![0u8, z1, z2, 0];
1195            for _ in z1..=z2 {
1196                c.extend([0x20, 0x20, 0x20, 0xab]); // alpha 0xab to see the rewrite
1197            }
1198            c
1199        };
1200        let floor = column(20, 20);
1201        let wall = column(10, 20);
1202        let mut data = Vec::new();
1203        let mut offsets = vec![0u32; (vsid * vsid + 1) as usize];
1204        for i in 0..(vsid * vsid) {
1205            offsets[i as usize] = data.len() as u32;
1206            let col = if i == 3 * vsid + 5 { &wall } else { &floor };
1207            data.extend_from_slice(col);
1208        }
1209        offsets[(vsid * vsid) as usize] = data.len() as u32;
1210
1211        update_lighting(&mut data, &offsets, vsid, 0, 0, 0, 8, 8, 30, 3, &[]);
1212
1213        // Top floor voxel's alpha is at column offset + 7 (header 4 + BGR 3).
1214        let alpha = |x: u32, y: u32| data[offsets[(y * vsid + x) as usize] as usize + 7];
1215        let near = alpha(4, 3);
1216        let open = alpha(2, 3);
1217        assert_ne!(open, 0xab, "open voxel alpha rewritten by the AO bake");
1218        assert_eq!(open, 128, "open floor voxel keeps full ambient (128)");
1219        assert!(
1220            near < open,
1221            "voxel beside the wall is darker: near={near} open={open}"
1222        );
1223    }
1224
1225    /// xbsflor(0) = -1 (all bits set), xbsflor(32) clamped to 0,
1226    /// xbsflor(5) = ~31 = 0xffff_ffe0.
1227    #[test]
1228    fn xbsflor_xbsceil_known_values() {
1229        assert_eq!(xbsflor(0), 0xffff_ffff);
1230        assert_eq!(xbsflor(1), 0xffff_fffe);
1231        assert_eq!(xbsflor(5), 0xffff_ffe0);
1232        assert_eq!(xbsflor(31), 0x8000_0000);
1233        assert_eq!(xbsflor(32), 0);
1234        assert_eq!(xbsceil(0), 0);
1235        assert_eq!(xbsceil(5), 0x1f);
1236        assert_eq!(xbsceil(31), 0x7fff_ffff);
1237        assert_eq!(xbsceil(32), 0xffff_ffff);
1238    }
1239
1240    /// Single-slab column [next=0, sz0=10, sz1=14, then 5 voxel
1241    /// records]. Voxels exist at z = 10..15 (sz0..=sz1). After
1242    /// expandbit256, bits 10..15 should be set, all others
1243    /// (0..10 and 15..256) should reflect: air above (0..10) and
1244    /// solid below (15..256): z past the last slab's bottom reads
1245    /// slab as solid.
1246    #[test]
1247    fn single_slab_z10_to_14_sets_correct_bits() {
1248        // Column layout: [next=0, sz0=10, sz1=14, top_color, then 5x
1249        // voxel records of 4 bytes each]. We don't use the voxel
1250        // record contents; expandbit256 only reads v[0]..v[3].
1251        let mut col = vec![0u8, 10, 14, 0]; // header
1252        col.extend(vec![0u8; 5 * 4]); // 5 voxel records (z=10..14)
1253
1254        let mut bits = [0u32; 8];
1255        expandbit256(&col, &mut bits);
1256
1257        // Word 0 covers bits 0..32. Air for z=0..10, solid 10..15,
1258        // solid for z=15..32 (since this is the only slab → below
1259        // is fully solid).
1260        // bits 10..15 from the slab body: 0x7c00 (bits 10,11,12,13,14)
1261        // bits 15..32 from "solid below last slab": 0xffff_8000
1262        // Combined: 0xffff_fc00.
1263        assert_eq!(
1264            bits[0], 0xffff_fc00,
1265            "word 0 want 0xffff_fc00 got 0x{:08x}",
1266            bits[0]
1267        );
1268        // Words 1..7 should all be 0xffff_ffff (fully solid).
1269        for (i, w) in bits.iter().enumerate().skip(1) {
1270            assert_eq!(*w, 0xffff_ffff, "word {i} want -1 got 0x{:08x}", *w);
1271        }
1272    }
1273
1274    /// Build a 4×4 synthetic world with a flat floor at z=20..=24,
1275    /// run lightmode-1 update_lighting over the centre 2×2, and
1276    /// verify (a) brightness bytes were rewritten, (b) the result
1277    /// is in `[0, 255]` for every shaded voxel, (c) the brightness
1278    /// is uniform within each (x, y) column at the same z (since
1279    /// lightmode-1 depends only on the surface normal).
1280    #[test]
1281    fn lightmode1_bakes_brightness_into_visible_voxels() {
1282        // 4×4 world, single slab at z=20..=24, sentinel column ends.
1283        let vsid: u32 = 4;
1284        let mut col = vec![0u8, 20, 24, 0]; // header: nextptr=0, z1=20, z2=24
1285        for _ in 20..=24 {
1286            // 5 voxel records, alpha pre-set to 0xab so we can verify
1287            // they got rewritten.
1288            col.extend([0x10, 0x20, 0x30, 0xab]);
1289        }
1290        let col_len = col.len() as u32;
1291        let mut data = Vec::new();
1292        let mut offsets = vec![0u32; (vsid * vsid + 1) as usize];
1293        for i in 0..(vsid * vsid) {
1294            offsets[i as usize] = data.len() as u32;
1295            data.extend_from_slice(&col);
1296        }
1297        offsets[(vsid * vsid) as usize] = data.len() as u32;
1298        assert_eq!(col_len as usize * (vsid * vsid) as usize, data.len());
1299
1300        update_lighting(
1301            &mut data,
1302            &offsets,
1303            vsid,
1304            1,
1305            1,
1306            0,
1307            3,
1308            3,
1309            30, // bbox 1..=2 in xy, z 0..30
1310            1,  // lightmode 1
1311            &[],
1312        );
1313
1314        // Pull every voxel record's alpha byte from the centre
1315        // (1, 1) column. Should all be in [0, 255] and ≠ 0xab.
1316        let off1 = offsets[(1 * vsid + 1) as usize] as usize;
1317        let alphas: Vec<u8> = (0..5).map(|i| data[off1 + 4 + i * 4 + 3]).collect();
1318        for (i, &a) in alphas.iter().enumerate() {
1319            assert_ne!(a, 0xab, "alpha[{i}] not rewritten");
1320        }
1321        // The shading should be mostly bright — flat-floor voxels
1322        // have ~vertical normals so `(tp.y*0.5 + tp.z)*64 + 103.5`
1323        // ≈ 1.0*64 + 103.5 = 167.5.
1324        for (i, &a) in alphas.iter().enumerate() {
1325            assert!(
1326                a > 100,
1327                "alpha[{i}]={a} should be on the bright side for top-of-floor voxels"
1328            );
1329        }
1330    }
1331
1332    /// lightmode-2 with one nearby light should darken voxels on
1333    /// the away side relative to the toward side. Use a 5×5 world
1334    /// with a flat floor and place a light such that it's on the
1335    /// +x side of the centre column — the +x face voxel's neighbour
1336    /// columns should end up brighter than the -x.
1337    #[test]
1338    fn lightmode2_with_light_produces_per_column_variation() {
1339        let vsid: u32 = 5;
1340        let mut col = vec![0u8, 20, 24, 0];
1341        for _ in 20..=24 {
1342            col.extend([0x10, 0x20, 0x30, 0]);
1343        }
1344        let mut data = Vec::new();
1345        let mut offsets = vec![0u32; (vsid * vsid + 1) as usize];
1346        for i in 0..(vsid * vsid) {
1347            offsets[i as usize] = data.len() as u32;
1348            data.extend_from_slice(&col);
1349        }
1350        offsets[(vsid * vsid) as usize] = data.len() as u32;
1351
1352        let lights = [LightSrc {
1353            // World coords: light right next to (4, 2, 20).
1354            pos: [4.0, 2.0, 20.0],
1355            r2: 50.0 * 50.0,
1356            sc: 64.0,
1357        }];
1358        update_lighting(&mut data, &offsets, vsid, 0, 0, 0, 5, 5, 30, 2, &lights);
1359
1360        // Sample the alpha at the top-floor voxel of each column
1361        // along y=2. Closer-to-light columns should be brighter.
1362        let alpha_at = |x: u32, z_idx: usize| {
1363            let off = offsets[(2 * vsid + x) as usize] as usize;
1364            data[off + 4 + z_idx * 4 + 3]
1365        };
1366        let close = alpha_at(4, 0); // closest column to light
1367        let far = alpha_at(0, 0); // farthest
1368        assert!(
1369            close >= far,
1370            "column nearer the light should be ≥ as bright as the far one (close={close} far={far})"
1371        );
1372    }
1373
1374    /// Empty column ([0, 0, 0, ...]) — no slabs. After
1375    /// expandbit256, all 256 bits = 0 (full air).
1376    #[test]
1377    fn empty_column_all_air() {
1378        let col = vec![0u8, 0, 0, 0]; // single-slab header at z=0..0, no body
1379        let mut bits = [0u32; 8];
1380        expandbit256(&col, &mut bits);
1381        // bit 0 from "air→solid transition at z=0", but only bit 0
1382        // is set within the slab range [0, 0+1). Then "solid below"
1383        // fills bits 1..256.
1384        // Actually for sz0=sz1=0: voxel record is z=0..0 inclusive
1385        // (0 voxels). The bit pattern is 1 set bit at z=0 then
1386        // solid below.
1387        // word 0: bit 0 set, bits 1..32 set ⇒ 0xffff_ffff.
1388        assert_eq!(
1389            bits[0], 0xffff_ffff,
1390            "empty column word 0 want all-1 got 0x{:08x}",
1391            bits[0]
1392        );
1393    }
1394}