Skip to main content

roxlap_core/
dda.rs

1//! Per-pixel 3D-DDA + brickmap CPU renderer (Substage DDA).
2//!
3//! This is the clean-room replacement for the voxlap-derived
4//! column-coherent opticast pipeline (`opticast` + `grouscan` +
5//! `scan_loops`). Every pixel casts one independent ray, so none of
6//! the column/row-coherence stitching artifacts of the 2.5D voxlap
7//! renderer can occur (silhouette notch, floor hairlines, axis-aligned
8//! mip beams, cross-chunk virtual-column complexity). See
9//! `PORTING-DDA.md` for the full stage plan.
10//!
11//! **Stage status — DDA.6 (per-grid distance mip) + DDA.7 (tile
12//! parallelism).** Each pixel casts one ray over the grid's full voxel
13//! box ([`GridView::voxel_bounds`], spanning every chunk in XY **and**
14//! Z) via a 3D-DDA (Amanatides–Woo). A uniform render mip (chosen per
15//! grid by LOD distance, clamped by [`effective_mip`] to a level every
16//! chunk has built) coarsens the cell size to `2^mip` mip-0 voxels and
17//! samples mip-`mip` data — the ray stays in mip-0 units so depth and
18//! fog are exact. [`BrickMaps`] (one occupancy map per populated chunk,
19//! at the render mip) are built once per frame and shared immutably; a
20//! [`Sampler`] resolves each cell to its chunk
21//! ([`GridView::chunk_at_xyz`]) and brick-gates the
22//! [`GridView::surface_color_mip`] slab walk, caching the current chunk
23//! so air costs an O(1) bit test. [`render_dda_parallel`] splits the
24//! frame into disjoint rayon bands — bit-identical to sequential since
25//! pixels are independent. Hits are shaded by baked brightness
26//! ([`shade`]) + [`DdaEnv::side_shades`] face tint, fogged toward
27//! [`DdaEnv::fog_color`] ([`apply_fog`]); misses sample the
28//! [`DdaEnv::sky`] panorama ([`sample_sky`]) or keep the solid pre-fill.
29//!
30//! Buffer conventions match the rest of the engine so this backend is
31//! colour is packed `0x80RRGGBB`; depth is perpendicular distance from
32//! the camera with **smaller = closer** (so the scene compositor's
33//! min-z merge works directly on the z-buffer this writes).
34
35use std::collections::HashMap;
36
37use rayon::prelude::*;
38
39use crate::camera_math::{self, CameraState};
40use crate::grid_view::GridView;
41use crate::opticast::OpticastSettings;
42use crate::raster_target::RasterTarget;
43use crate::sky::Sky;
44use crate::Camera;
45use roxlap_formats::material::{material_for_color, Material, MaterialTable};
46
47/// Per-frame environment for DDA shading (Substage DDA.5): a textured
48/// sky panorama, distance fog, and per-face side shading.
49///
50/// [`DdaEnv::default`] disables all three — flat baked-brightness hits
51/// and a caller-pre-filled solid sky — so the brickmap/dense equivalence
52/// tests run against an unchanged pipeline.
53#[derive(Clone, Copy)]
54pub struct DdaEnv<'a> {
55    /// Textured sky sampled per-ray-direction on a miss. `None` leaves
56    /// the destination untouched (caller's solid sky pre-fill shows).
57    pub sky: Option<&'a Sky>,
58    /// Fog target colour (`0x__RRGGBB`); hits blend toward it with
59    /// distance. Typically the sky colour so terrain fades into the sky.
60    pub fog_color: u32,
61    /// Depth at which fog is fully opaque. `<= 0` disables fog.
62    pub fog_max_dist: f32,
63    /// Per-face brightness reduction `[x-, x+, y-, y+, z-, z+]`, applied
64    /// to the hit face (voxlap `setsideshades`). All-zero = off.
65    pub side_shades: [i8; 6],
66    /// TV: global voxel-material palette (id → opacity + blend mode). `None`
67    /// keeps terrain fully opaque (the first-hit path, bit-identical).
68    pub materials: Option<&'a MaterialTable>,
69    /// TV: terrain colour→material map (`(rgb, material_id)`). A hit voxel's
70    /// colour is looked up here for its material. **Empty** (the default) ⇒
71    /// every voxel is opaque, so the march returns the first hit unchanged.
72    pub terrain_materials: &'a [(u32, u8)],
73    /// CPU.1 — dynamic lighting (stage DL on the CPU): sun + point lights +
74    /// stylized cel/ramp, evaluated flat per voxel. Disabled by default ⇒ the
75    /// hit uses the baked-byte [`shade`] path, byte-identical to pre-DL. Lights
76    /// here are already in the grid's **local** frame (the scene renderer
77    /// transforms them per grid). Shadows: see [`Self::world_shadow`].
78    pub lights: CpuLights<'a>,
79    /// XS.1 — when set, shadow rays test the **whole scene** (all grids +
80    /// sprites) via this world-space occluder + the current grid's
81    /// local→world transform, instead of the single-grid [`SamplerShadow`].
82    /// `None` ⇒ single-grid shadows (the direct `render_dda` path / tests).
83    pub world_shadow: Option<WorldShadowCtx<'a>>,
84}
85
86/// CPU.1 — one point light in a grid's local frame for the CPU renderer.
87#[derive(Clone, Copy)]
88pub struct CpuPointLight {
89    /// Grid-local position (world/voxel units).
90    pub pos: [f32; 3],
91    /// Linear RGB, 0..1.
92    pub color: [f32; 3],
93    pub intensity: f32,
94    /// Hard cutoff distance (world/voxel units).
95    pub radius: f32,
96    /// CPU.2 — whether this light casts a hard shadow (a shadow ray
97    /// marches to the light through the grid's voxels). Mirrors the
98    /// GPU's per-light `casts_shadow`; the renderer applies the same
99    /// caster cap before building the CPU rig.
100    pub casts_shadow: bool,
101    /// SL — spot (cone) axis: grid-local unit direction the light shines
102    /// **along**. Ignored for a pure point light (see [`Self::cos_outer`]).
103    pub spot_dir: [f32; 3],
104    /// SL — cosine of the inner cone half-angle (full brightness within it).
105    pub cos_inner: f32,
106    /// SL — cosine of the outer cone half-angle (zero past it; soft
107    /// `smoothstep` between the two). `-1.0` (a 180° cone) ⇒ a pure point
108    /// light: the cone mask is skipped entirely and the light is omnidirectional.
109    pub cos_outer: f32,
110}
111
112/// CPU.1 — the per-frame dynamic-light environment for one grid (grid-local).
113/// Mirror of the GPU `shade_lit` inputs. `enabled == false` (the default)
114/// keeps the baked-byte path. CPU.2 adds hard voxel shadows (sun + flagged
115/// point lights) via a per-(voxel,face) shadow march; `shadow_strength == 0`
116/// (the [`Default`]) leaves the lighting diffuse-only.
117#[derive(Clone, Copy, Default)]
118pub struct CpuLights<'a> {
119    /// Whether dynamic lighting is active this frame (else the baked path).
120    pub enabled: bool,
121    /// Whether the sun is present.
122    pub sun: bool,
123    /// Grid-local unit direction **to** the sun.
124    pub sun_dir: [f32; 3],
125    pub sun_color: [f32; 3],
126    pub sun_intensity: f32,
127    /// CPU.2 — whether the sun casts a hard shadow.
128    pub sun_casts_shadow: bool,
129    /// Grid-local point lights.
130    pub points: &'a [CpuPointLight],
131    /// Ambient multiplier on the baked byte (smooth mode's fill).
132    pub ambient: [f32; 3],
133    /// Cel band count: 0 = smooth, ≥1 = quantize + gradient-map (stylized).
134    pub bands: u32,
135    /// Stylized ramp's cool unlit-end tint (used when `bands > 0`).
136    pub shadow_tint: [f32; 3],
137    /// CPU.2 — fraction of a caster's light removed where a shadow ray is
138    /// occluded (`0` ⇒ shadows off, `1` ⇒ full black). A shadowed sample
139    /// keeps `1 - shadow_strength` of that caster.
140    pub shadow_strength: f32,
141    /// CPU.2 — shadow-ray origin bias along the surface normal, voxel
142    /// units (kills self-shadow acne). ~1.5 is a good default.
143    pub shadow_bias: f32,
144    /// CPU.2 — sun shadow-ray length cap, voxel units (point-light rays
145    /// stop at the light instead).
146    pub shadow_max_dist: f32,
147}
148
149impl Default for DdaEnv<'_> {
150    fn default() -> Self {
151        Self {
152            sky: None,
153            fog_color: 0,
154            fog_max_dist: 0.0,
155            side_shades: [0; 6],
156            materials: None,
157            terrain_materials: &[],
158            lights: CpuLights::default(),
159            world_shadow: None,
160        }
161    }
162}
163
164/// Per-pixel output target for the DDA renderer.
165///
166/// Abstracts "where does a ray hit go" so the traversal core stays
167/// free of framebuffer mechanics. The production impl is
168/// [`RasterSink`] (raw fb/zb pointers); tests use a recording sink.
169/// Only *hits* are reported — misses (sky) leave the destination
170/// untouched, matching the caller-pre-fills-sky convention.
171pub trait PixelSink {
172    /// Record a ray hit at framebuffer index `idx` (`py * pitch + px`)
173    /// with packed ARGB `color` and perpendicular `dist` (smaller =
174    /// closer).
175    fn put(&mut self, idx: usize, color: u32, dist: f32);
176}
177
178/// [`PixelSink`] over a borrowed `(framebuffer, zbuffer)` pair.
179///
180/// Wraps a [`RasterTarget`] so the DDA path writes through the same
181/// raw-pointer mechanism the scalar rasterizer uses — which keeps the
182/// door open for the same strip/tile-disjoint parallel writes in
183/// DDA.7.
184pub struct RasterSink<'a> {
185    target: RasterTarget<'a>,
186    len: usize,
187}
188
189impl<'a> RasterSink<'a> {
190    /// Build a sink from exclusive framebuffer + zbuffer borrows.
191    /// Both slices must have the same length (the pixel count).
192    #[must_use]
193    pub fn new(framebuffer: &'a mut [u32], zbuffer: &'a mut [f32]) -> Self {
194        debug_assert_eq!(framebuffer.len(), zbuffer.len());
195        let len = framebuffer.len();
196        Self {
197            target: RasterTarget::new(framebuffer, zbuffer),
198            len,
199        }
200    }
201}
202
203impl PixelSink for RasterSink<'_> {
204    fn put(&mut self, idx: usize, color: u32, dist: f32) {
205        if idx < self.len {
206            // SAFETY: bounds checked above; single-threaded writer in
207            // DDA.0 so the disjoint-write invariant holds trivially.
208            unsafe {
209                self.target.write_color(idx, color);
210                self.target.write_depth(idx, dist);
211            }
212        }
213    }
214}
215
216/// A resolved ray hit: surface colour + perpendicular distance.
217#[derive(Debug, Clone, Copy)]
218struct Hit {
219    color: u32,
220    dist: f32,
221}
222
223/// Test-only per-thread traversal counters for the perf bench.
224#[cfg(test)]
225pub(crate) mod prof {
226    use std::cell::Cell;
227    thread_local! {
228        pub static CELLS: Cell<u64> = const { Cell::new(0) };
229        pub static BRICKS: Cell<u64> = const { Cell::new(0) };
230        pub static SURF: Cell<u64> = const { Cell::new(0) };
231    }
232    pub fn reset() {
233        CELLS.with(|x| x.set(0));
234        BRICKS.with(|x| x.set(0));
235        SURF.with(|x| x.set(0));
236    }
237    pub fn read() -> (u64, u64, u64) {
238        (
239            CELLS.with(Cell::get),
240            BRICKS.with(Cell::get),
241            SURF.with(Cell::get),
242        )
243    }
244}
245
246/// Apply the voxel's baked directional brightness (Substage DDA.5).
247///
248/// Voxlap (and the GPU marcher, `grid_dda.wgsl`) store per-voxel
249/// brightness in the colour's high byte on a `0..128` scale — `0x80`
250/// is full brightness — written by `Grid::bake_lightmode` (estnorm
251/// directional shading). The shaded channel is `c · a / 128`, so the
252/// DDA matches the GPU look; an unbaked / full-bright voxel (`a =
253/// 0x80`) passes through unchanged. Output alpha is normalised to
254/// `0x80` (the standard "lit" flag; the present blit ignores it).
255///
256/// The renderer only *reads* the baked byte — it computes no normals
257/// itself, so per-impact relight is free (re-bake the chunk and the
258/// byte updates). The estnorm bake that produces the byte is the
259/// voxlap-derived piece slated for a clean-room rewrite in DDA.10.
260///
261/// `bright_sub` is the per-face `side_shades` reduction (DDA.5): voxlap
262/// subtracts it from the brightness byte before the multiply, so a
263/// shaded face is uniformly darker. `0` = no side shading.
264#[inline]
265pub(crate) fn shade(color: u32, bright_sub: u32) -> u32 {
266    let a = ((color >> 24) & 0xff).saturating_sub(bright_sub);
267    let ch = |shift: u32| -> u32 { ((((color >> shift) & 0xff) * a) >> 7).min(255) };
268    0x8000_0000 | (ch(16) << 16) | (ch(8) << 8) | ch(0)
269}
270
271// CPU.1 — cel quantization: snap a 0..1 factor to `bands + 1` levels.
272#[inline]
273fn cel_band(x: f32, bands: u32) -> f32 {
274    let b = bands as f32;
275    ((x * b).round() / b).clamp(0.0, 1.0)
276}
277
278// CPU.1 — point-light distance falloff (mirror of the GPU's): smooth
279// quadratic from 1 at the light to 0 at `radius`, hard-cut beyond.
280#[inline]
281fn point_falloff(d: f32, radius: f32) -> f32 {
282    let x = (1.0 - d / radius).clamp(0.0, 1.0);
283    x * x
284}
285
286// SL — Hermite `smoothstep` (mirror of WGSL's), with a defined hard-edge case:
287// when `edge0 == edge1` WGSL is undefined, so we step at the shared threshold.
288#[inline]
289fn smoothstep_scalar(edge0: f32, edge1: f32, x: f32) -> f32 {
290    if edge1 <= edge0 {
291        return if x < edge0 { 0.0 } else { 1.0 };
292    }
293    let t = ((x - edge0) / (edge1 - edge0)).clamp(0.0, 1.0);
294    t * t * (3.0 - 2.0 * t)
295}
296
297// SL — spot (cone) angular mask (mirror of the shaders' `spot_cone`). `ldir` is
298// the unit direction from the surface TO the light; `axis` the cone axis (the
299// way the light shines). Returns 1.0 for a pure point light (`cos_outer <=
300// -0.999`, the 180° degenerate); else a soft `smoothstep` from 0 at the outer
301// half-angle to 1 at the inner (hard step when the two coincide).
302#[inline]
303fn spot_cone(ldir: [f32; 3], axis: [f32; 3], cos_inner: f32, cos_outer: f32) -> f32 {
304    if cos_outer <= -0.999 {
305        return 1.0;
306    }
307    let cd = -dot3(ldir, axis);
308    smoothstep_scalar(cos_outer, cos_inner, cd)
309}
310
311// CPU.1 — face normal (grid-local) from the crossed axis + step: points back
312// toward the incoming ray. `axis == 3` (entry voxel, no face) falls back to up
313// (-z, voxlap z-down).
314#[inline]
315fn face_normal_cpu(axis: usize, step: [i32; 3]) -> [f32; 3] {
316    let mut n = [0.0f32; 3];
317    if axis < 3 {
318        n[axis] = -(step[axis] as f32);
319    } else {
320        n[2] = -1.0;
321    }
322    n
323}
324
325#[inline]
326fn dot3(a: [f32; 3], b: [f32; 3]) -> f32 {
327    a[0] * b[0] + a[1] * b[1] + a[2] * b[2]
328}
329
330/// CPU.2 — a hard-shadow occlusion test for the dynamic-lighting shade.
331/// `occluded(origin, dir, max_t)` returns `true` if a solid voxel blocks
332/// the segment from `origin` (grid-local, already biased off the surface)
333/// in unit direction `dir` within `max_t` voxel units. Terrain hits pass
334/// a [`SamplerShadow`] (marches the current grid only) or a
335/// [`WorldShadow`] (cross-grid + sprites, XS.1/XS.2); sprites that don't
336/// cast/receive shadows pass `None`.
337pub(crate) trait ShadowTester {
338    fn occluded(&mut self, origin: [f32; 3], dir: [f32; 3], max_t: f32) -> bool;
339}
340
341/// XS.1 — a **world-space** occlusion oracle over the whole scene (all grids,
342/// and sprites in XS.2). Implemented in `roxlap-scene` (it needs the grid /
343/// sprite stores); the CPU DDA reaches it through [`DdaEnv::world_shadow`] so
344/// shadow rays cross grid + object boundaries instead of stopping at the
345/// current grid. `occluded_world(origin, dir, max_t)` is in **world** voxel
346/// units: `true` iff any solid voxel anywhere blocks the segment.
347///
348/// `Sync` because [`DdaEnv`] (which borrows it) is shared across the
349/// rayon strip workers in [`render_dda_parallel`]; the occluder is a
350/// read-only borrow of the scene, so this holds.
351pub trait WorldOccluder: Sync {
352    fn occluded_world(&self, origin: [f32; 3], dir: [f32; 3], max_t: f32) -> bool;
353}
354
355/// XS.1 — per-grid context for a cross-scene shadow query: the scene-wide
356/// [`WorldOccluder`] plus the **current grid's** local→world transform, so a
357/// grid-local shadow ray (the frame `shade_dynamic` works in) can be lifted
358/// to world space before the scene-wide test. `cols[i]` is the world-space
359/// image of grid-local axis `i` (the grid rotation's columns); `origin` is the
360/// grid's world origin.
361#[derive(Clone, Copy)]
362pub struct WorldShadowCtx<'a> {
363    pub occluder: &'a dyn WorldOccluder,
364    pub origin: [f32; 3],
365    pub cols: [[f32; 3]; 3],
366}
367
368impl<'a> WorldShadowCtx<'a> {
369    /// Identity transform — for shading already in world space (sprites): the
370    /// grid-local ray IS the world ray.
371    #[must_use]
372    pub fn identity(occluder: &'a dyn WorldOccluder) -> Self {
373        Self {
374            occluder,
375            origin: [0.0; 3],
376            cols: [[1.0, 0.0, 0.0], [0.0, 1.0, 0.0], [0.0, 0.0, 1.0]],
377        }
378    }
379}
380
381/// XS.2 — a [`WorldOccluder`] that ORs two others (e.g. the grid occluder +
382/// the sprite occluder), so a single shadow query covers both. `true` if
383/// either blocks the ray.
384pub struct CompositeOccluder<'a> {
385    pub a: &'a dyn WorldOccluder,
386    pub b: &'a dyn WorldOccluder,
387}
388
389impl WorldOccluder for CompositeOccluder<'_> {
390    fn occluded_world(&self, origin: [f32; 3], dir: [f32; 3], max_t: f32) -> bool {
391        self.a.occluded_world(origin, dir, max_t) || self.b.occluded_world(origin, dir, max_t)
392    }
393}
394
395/// XS.1 — [`ShadowTester`] that lifts a grid-local shadow ray to world space
396/// (via [`WorldShadowCtx`]) and queries the scene-wide [`WorldOccluder`], so
397/// occlusion crosses grid + sprite boundaries. Sprites (already world-space)
398/// use an identity [`WorldShadowCtx`] (see [`WorldShadowCtx::identity`]).
399pub(crate) struct WorldShadow<'a> {
400    pub ctx: WorldShadowCtx<'a>,
401}
402
403impl ShadowTester for WorldShadow<'_> {
404    fn occluded(&mut self, origin: [f32; 3], dir: [f32; 3], max_t: f32) -> bool {
405        let c = &self.ctx.cols;
406        // world = grid_origin + R · local (R columns = `cols`); dir rotates only.
407        let wo = [
408            self.ctx.origin[0] + c[0][0] * origin[0] + c[1][0] * origin[1] + c[2][0] * origin[2],
409            self.ctx.origin[1] + c[0][1] * origin[0] + c[1][1] * origin[1] + c[2][1] * origin[2],
410            self.ctx.origin[2] + c[0][2] * origin[0] + c[1][2] * origin[1] + c[2][2] * origin[2],
411        ];
412        let wd = [
413            c[0][0] * dir[0] + c[1][0] * dir[1] + c[2][0] * dir[2],
414            c[0][1] * dir[0] + c[1][1] * dir[1] + c[2][1] * dir[2],
415            c[0][2] * dir[0] + c[1][2] * dir[1] + c[2][2] * dir[2],
416        ];
417        self.ctx.occluder.occluded_world(wo, wd, max_t)
418    }
419}
420
421/// CPU.1 — dynamic-lighting shade for a terrain voxel (the CPU mirror of the
422/// GPU `shade_lit`): raw albedo × (ambient/AO + sun + point lights), evaluated
423/// **flat per voxel** (at the voxel centre, so a whole face reads one tone —
424/// the retro look). `bands > 0` quantizes (cel) and gradient-maps the sun key
425/// from `shadow_tint` (cool) to the sun colour (warm). **No shadows.** Returns
426/// a packed `0x80RRGGBB` colour (same convention as [`shade`]).
427fn shade_lit_cpu(
428    color: u32,
429    bright_sub: u32,
430    axis: usize,
431    step: [i32; 3],
432    cellc: [i32; 3],
433    cell_size: f32,
434    l: &CpuLights<'_>,
435    shadow: Option<&mut dyn ShadowTester>,
436) -> u32 {
437    let a_b = ((color >> 24) & 0xff).saturating_sub(bright_sub);
438    let ao = a_b as f32 / 128.0;
439    let albedo = [
440        ((color >> 16) & 0xff) as f32 / 255.0,
441        ((color >> 8) & 0xff) as f32 / 255.0,
442        (color & 0xff) as f32 / 255.0,
443    ];
444    let n = face_normal_cpu(axis, step);
445    // Voxel centre (grid-local) — flat per-voxel sample point.
446    let center = [
447        (cellc[0] as f32 + 0.5) * cell_size,
448        (cellc[1] as f32 + 0.5) * cell_size,
449        (cellc[2] as f32 + 0.5) * cell_size,
450    ];
451    shade_dynamic(albedo, ao, n, center, l, shadow)
452}
453
454/// CPU.1/DL.7 — the shared dynamic-lighting core (terrain + sprites): raw
455/// `albedo` × (ambient/AO + sun + point lights), sampled **flat per voxel**
456/// at `sample` with surface normal `n`. `bands > 0` quantizes (cel) and
457/// gradient-maps the sun key from `shadow_tint` (cool) to the sun colour
458/// (warm). **No shadows** (GPU-only). Returns a packed `0x80RRGGBB` colour.
459pub(crate) fn shade_dynamic(
460    albedo: [f32; 3],
461    ao: f32,
462    n: [f32; 3],
463    sample: [f32; 3],
464    l: &CpuLights<'_>,
465    shadow: Option<&mut dyn ShadowTester>,
466) -> u32 {
467    let styled = l.bands > 0;
468    // CPU.2 — shadow ray origin: bias off the surface along the normal to
469    // avoid self-shadow acne (shared by every caster). Light kept in
470    // shadow = `1 - shadow_strength` (1.0 ⇒ shadows effectively off).
471    let mut shadow = shadow;
472    let shadow_origin = [
473        sample[0] + n[0] * l.shadow_bias,
474        sample[1] + n[1] * l.shadow_bias,
475        sample[2] + n[2] * l.shadow_bias,
476    ];
477    let in_shadow = 1.0 - l.shadow_strength;
478
479    // Sun key (0..1): N·L × shadow factor.
480    let sun_key = if l.sun {
481        let ndl = dot3(n, l.sun_dir).max(0.0);
482        if ndl > 0.0 && l.sun_casts_shadow {
483            let occ = shadow
484                .as_deref_mut()
485                .is_some_and(|s| s.occluded(shadow_origin, l.sun_dir, l.shadow_max_dist));
486            if occ {
487                ndl * in_shadow
488            } else {
489                ndl
490            }
491        } else {
492            ndl
493        }
494    } else {
495        0.0
496    };
497
498    // Base term: ambient + sun. Smooth = additive; stylized = gradient map.
499    let mut lit = if styled {
500        let key = cel_band(sun_key, l.bands);
501        let m = |i: usize| {
502            let warm = l.sun_color[i] * l.sun_intensity;
503            (l.shadow_tint[i] + (warm - l.shadow_tint[i]) * key) * ao
504        };
505        [albedo[0] * m(0), albedo[1] * m(1), albedo[2] * m(2)]
506    } else {
507        let base = |i: usize| {
508            albedo[i] * l.ambient[i] * ao + albedo[i] * l.sun_color[i] * l.sun_intensity * sun_key
509        };
510        [base(0), base(1), base(2)]
511    };
512
513    // Point lights (flat per voxel). CPU.2 — a flagged caster's shadow ray
514    // marches to the light; an occluded sample keeps `in_shadow` of it.
515    for p in l.points {
516        let d3 = [
517            p.pos[0] - sample[0],
518            p.pos[1] - sample[1],
519            p.pos[2] - sample[2],
520        ];
521        let dist = (d3[0] * d3[0] + d3[1] * d3[1] + d3[2] * d3[2]).sqrt();
522        if dist < p.radius && dist > 1e-4 {
523            let inv = 1.0 / dist;
524            let ldir = [d3[0] * inv, d3[1] * inv, d3[2] * inv];
525            let ndl = dot3(n, ldir).max(0.0);
526            // SL — spot cone mask (1.0 for a pure point light). Computed
527            // before the shadow march so an off-cone spot skips it entirely.
528            let cone = spot_cone(ldir, p.spot_dir, p.cos_inner, p.cos_outer);
529            if ndl > 0.0 && cone > 0.0 {
530                // Shadow ray marches from the surface to the light (`dist`).
531                let sh = if p.casts_shadow
532                    && shadow
533                        .as_deref_mut()
534                        .is_some_and(|s| s.occluded(shadow_origin, ldir, dist))
535                {
536                    in_shadow
537                } else {
538                    1.0
539                };
540                let mut f = ndl * point_falloff(dist, p.radius) * cone * sh;
541                if styled {
542                    f = cel_band(f, l.bands);
543                }
544                for i in 0..3 {
545                    lit[i] += albedo[i] * p.color[i] * p.intensity * f;
546                }
547            }
548        }
549    }
550
551    let pack = |v: f32| -> u32 { (v.clamp(0.0, 1.0) * 255.0) as u32 };
552    0x8000_0000 | (pack(lit[0]) << 16) | (pack(lit[1]) << 8) | pack(lit[2])
553}
554
555/// Blend `color` toward `env.fog_color` by perpendicular `depth`
556/// (linear, fully fogged at `env.fog_max_dist`). No-op when fog is
557/// disabled (`fog_max_dist <= 0`).
558#[inline]
559fn apply_fog(color: u32, depth: f32, env: &DdaEnv<'_>) -> u32 {
560    if env.fog_max_dist <= 0.0 {
561        return color;
562    }
563    #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
564    let f = ((depth / env.fog_max_dist).clamp(0.0, 1.0) * 256.0) as u32; // 0..256
565    let g = 256 - f;
566    let fog = env.fog_color;
567    let mix = |shift: u32| -> u32 {
568        let src = (color >> shift) & 0xff;
569        let dst = (fog >> shift) & 0xff;
570        ((src * g + dst * f) >> 8).min(255)
571    };
572    0x8000_0000 | (mix(16) << 16) | (mix(8) << 8) | mix(0)
573}
574
575/// TV: resolve a terrain voxel's [`Material`] from its colour via the env's
576/// colour→material map + palette. Returns [`Material::OPAQUE`] when no
577/// material table is set, the map is empty, or the colour is unmapped — so
578/// the march stays on the opaque first-hit path.
579#[inline]
580fn terrain_material(env: &DdaEnv<'_>, color: u32) -> Material {
581    match env.materials {
582        Some(table) if !env.terrain_materials.is_empty() => {
583            table.get(material_for_color(env.terrain_materials, color))
584        }
585        _ => Material::OPAQUE,
586    }
587}
588
589/// Composite premultiplied `accum` (+ remaining `trans`) over a packed
590/// background colour → packed `0x80RRGGBB`.
591#[inline]
592fn composite_over(accum: [f32; 3], trans: f32, bg: u32) -> u32 {
593    let b = rgb_to_f32(bg);
594    f32_to_rgb([
595        accum[0] + trans * b[0],
596        accum[1] + trans * b[1],
597        accum[2] + trans * b[2],
598    ])
599}
600
601/// Finalize a translucent terrain ray that exited the grid (sky). Returns
602/// `None` when nothing was accumulated (the opaque first-hit path — the
603/// caller's sky handling stands, bit-identical), else the accumulated
604/// layers composited over the sky at `dist`.
605#[inline]
606fn finalize_exit(
607    touched: bool,
608    accum: [f32; 3],
609    trans: f32,
610    env: &DdaEnv<'_>,
611    dir: [f32; 3],
612    dist: f32,
613) -> Option<Hit> {
614    if !touched {
615        return None;
616    }
617    let bg = match env.sky {
618        Some(s) => sample_sky(s, dir),
619        None => 0x8000_0000 | (env.fog_color & 0x00ff_ffff),
620    };
621    Some(Hit {
622        color: composite_over(accum, trans, bg),
623        dist,
624    })
625}
626
627/// Unpack `0x__RRGGBB` to `0..1` float channels (RGB; the high byte is
628/// dropped — it has already been folded into the colour by `shade`/`fog`).
629#[inline]
630#[allow(clippy::cast_precision_loss)]
631fn rgb_to_f32(c: u32) -> [f32; 3] {
632    [
633        ((c >> 16) & 0xff) as f32 / 255.0,
634        ((c >> 8) & 0xff) as f32 / 255.0,
635        (c & 0xff) as f32 / 255.0,
636    ]
637}
638
639/// Repack `0..1` float channels (clamped) into `0x80RRGGBB`.
640#[inline]
641#[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
642fn f32_to_rgb(c: [f32; 3]) -> u32 {
643    let q = |v: f32| (v.clamp(0.0, 1.0) * 255.0 + 0.5) as u32;
644    0x8000_0000 | (q(c[0]) << 16) | (q(c[1]) << 8) | q(c[2])
645}
646
647/// Sample the sky panorama in ray direction `dir` (need not be
648/// normalised), returning a packed `0x80RRGGBB` colour.
649///
650/// Clean-room equirectangular mapping (not voxlap's `lng`/`lat` asm
651/// search): the texture's x axis is elevation (`asin` of the vertical
652/// component), the y axis is azimuth (`atan2` around the vertical). A
653/// `ysiz == 1` panorama (e.g. [`Sky::blue_gradient`]) is a pure
654/// horizon→zenith gradient.
655#[allow(
656    clippy::cast_possible_truncation,
657    clippy::cast_sign_loss,
658    clippy::cast_precision_loss
659)]
660fn sample_sky(sky: &Sky, dir: [f32; 3]) -> u32 {
661    let len = (dir[0] * dir[0] + dir[1] * dir[1] + dir[2] * dir[2]).sqrt();
662    if len < 1e-9 {
663        return 0x8000_0000;
664    }
665    let d = [dir[0] / len, dir[1] / len, dir[2] / len];
666    let xsiz_full = sky.lat.len().max(1) as i32; // original column count
667    let pi = std::f32::consts::PI;
668    // Elevation → x, matching the GPU `sky_color` (scene_dda.wgsl): z is
669    // down, so `acos(-z)` is 0 at the zenith (looking up) and π at the nadir
670    // (looking down); `/π` puts the zenith at x=0 and the nadir at x=xsiz.
671    let elev01 = (-d[2]).clamp(-1.0, 1.0).acos() / pi; // 0 (up) .. 1 (down)
672    let x = (elev01 * xsiz_full as f32) as i32;
673    let x = x.clamp(0, xsiz_full - 1);
674    // Azimuth → y (wrapped).
675    let y = if sky.ysiz <= 1 {
676        0
677    } else {
678        let az = d[1].atan2(d[0]); // -pi..pi
679        let yf = ((az / (pi * 2.0)) + 0.5) * sky.ysiz as f32;
680        (yf as i32).rem_euclid(sky.ysiz)
681    };
682    let idx = (y * xsiz_full + x) as usize;
683    let px = sky.pixels.get(idx).copied().unwrap_or(0) as u32;
684    0x8000_0000 | (px & 0x00ff_ffff)
685}
686
687/// Fill the panorama [`Sky`] into every **background** pixel — one whose
688/// z-buffer entry is still `+∞` (no grid/terrain hit). The per-grid DDA only
689/// samples the sky inside each grid's screen rect (and only its sky-owning
690/// grid); pixels outside any grid — most of a sprite/effect-only view, or the
691/// margins around a small world grid — would otherwise keep the caller's flat
692/// clear colour. This paints the real panorama there while leaving terrain
693/// (finite z) and composited translucent pixels untouched. The z-buffer is
694/// not modified. `cam`/`settings` are the same per-frame projection the
695/// renderer used.
696#[allow(clippy::cast_possible_truncation)]
697pub fn render_sky_fill(
698    fb: &mut [u32],
699    zb: &[f32],
700    pitch_pixels: usize,
701    width: u32,
702    height: u32,
703    cam: &CameraState,
704    settings: &OpticastSettings,
705    sky: &Sky,
706) {
707    for py in 0..height {
708        let row = py as usize * pitch_pixels;
709        for px in 0..width {
710            let idx = row + px as usize;
711            if zb[idx].is_finite() {
712                continue; // a grid/terrain hit owns this pixel
713            }
714            let (_origin, dir) = pixel_ray(cam, settings, px, py);
715            fb[idx] = sample_sky(sky, dir);
716        }
717    }
718}
719
720/// World-space ray for screen pixel `(px, py)` under opticast's
721/// pinhole: origin is the camera position, direction is
722/// `(px - hx)·right + (py - hy)·down + hz·forward`.
723///
724/// This is the exact ray `camera_math::derive` bakes into its corner
725/// vectors (`corn[0]` is `pixel (0, 0)`'s direction), so the DDA
726/// renderer samples the same rays the voxlap path's frustum is built
727/// from. The direction is **not** normalised — callers that need a
728/// unit ray (and a true Euclidean distance) normalise themselves;
729/// DDA.1 will track perpendicular distance via the forward-projection
730/// instead, matching the engine's z-buffer convention.
731#[must_use]
732pub fn pixel_ray(
733    cs: &CameraState,
734    settings: &OpticastSettings,
735    px: u32,
736    py: u32,
737) -> ([f32; 3], [f32; 3]) {
738    // u32 → f32 is exact for any realistic screen coordinate.
739    #[allow(clippy::cast_precision_loss)]
740    let sx = px as f32 - settings.hx;
741    #[allow(clippy::cast_precision_loss)]
742    let sy = py as f32 - settings.hy;
743    let dir = [
744        sx * cs.right[0] + sy * cs.down[0] + settings.hz * cs.forward[0],
745        sx * cs.right[1] + sy * cs.down[1] + settings.hz * cs.forward[1],
746        sx * cs.right[2] + sy * cs.down[2] + settings.hz * cs.forward[2],
747    ];
748    (cs.pos, dir)
749}
750
751/// Ray ↔ axis-aligned box `[lo, hi]` slab test. Returns the
752/// `(t_enter, t_exit)` parameter interval along `dir` (already clamped
753/// so `t_enter >= 0`, i.e. a camera inside the box starts at `t = 0`),
754/// or `None` if the ray misses the box. `dir` need not be normalised —
755/// `t` is in units of `|dir|`.
756pub(crate) fn intersect_aabb(
757    o: [f32; 3],
758    dir: [f32; 3],
759    lo: [f32; 3],
760    hi: [f32; 3],
761) -> Option<(f32, f32)> {
762    let mut t0 = 0.0f32;
763    let mut t1 = f32::INFINITY;
764    for a in 0..3 {
765        if dir[a].abs() < 1e-9 {
766            // Ray parallel to this slab — must already be inside it.
767            if o[a] < lo[a] || o[a] > hi[a] {
768                return None;
769            }
770        } else {
771            let inv = 1.0 / dir[a];
772            let mut ta = (lo[a] - o[a]) * inv;
773            let mut tb = (hi[a] - o[a]) * inv;
774            if ta > tb {
775                core::mem::swap(&mut ta, &mut tb);
776            }
777            t0 = t0.max(ta);
778            t1 = t1.min(tb);
779            if t0 > t1 {
780                return None;
781            }
782        }
783    }
784    Some((t0, t1))
785}
786
787/// Brick edge length in voxels — one occupancy bit per `BRICK³` block.
788const BRICK: i32 = 8;
789
790/// Per-chunk brick occupancy map for two-level DDA empty-space skip
791/// (Substage DDA.3).
792///
793/// One bit per `BRICK³` block of the active chunk, set iff any voxel in
794/// the block is solid. The ray steps the coarse brick grid (8× longer
795/// strides) and only descends into a per-voxel walk inside occupied
796/// bricks, so a ray through open air crosses ~`length / 8` empty bricks
797/// instead of `length` air voxels — each of which would otherwise walk
798/// the column slab chain via `surface_color`.
799///
800/// Built per frame from a [`GridView`] in [`render_dda`]. A persistent
801/// per-chunk cache with edit-driven invalidation (locked decision #2 in
802/// `PORTING-DDA.md`) is a later perf refinement.
803#[derive(Debug)]
804pub(crate) struct BrickMap {
805    /// Brick counts along x / y / z (one entry per `BRICK³` cells).
806    nb: [i32; 3],
807    /// Brick occupancy bitset; brick `(bx, by, bz)` is bit
808    /// `(bz * nb[1] + by) * nb[0] + bx`.
809    bits: Vec<u64>,
810    /// Super-brick counts (one entry per `BRICK³` *bricks* = `SUPER³`
811    /// cells), `ceil(nb / BRICK)`.
812    ns: [i32; 3],
813    /// Super-brick occupancy (DDA.7 perf): a coarse level so a ray
814    /// through open air above the terrain skips `SUPER` cells per outer
815    /// step instead of `BRICK`. A super-brick is set iff any child brick
816    /// is set.
817    super_bits: Vec<u64>,
818}
819
820/// Super-brick edge in cells (`BRICK` bricks per axis).
821const SUPER: i32 = BRICK * BRICK;
822
823impl BrickMap {
824    /// Scan every mip-`mip` column of `grid`, building brick + super-
825    /// brick occupancy. `mip` must be `< grid.mip_count()`.
826    #[allow(clippy::cast_possible_wrap, clippy::cast_sign_loss)]
827    fn build(grid: &GridView<'_>, mip: u32) -> Self {
828        let vsid_m = (grid.vsid >> mip).max(1) as i32;
829        let z_m = (crate::grid_view::CHUNK_SIZE_Z >> mip).max(1) as i32;
830        let nb = [
831            (vsid_m + BRICK - 1) / BRICK,
832            (vsid_m + BRICK - 1) / BRICK,
833            (z_m + BRICK - 1) / BRICK,
834        ];
835        let ns = [
836            (nb[0] + BRICK - 1) / BRICK,
837            (nb[1] + BRICK - 1) / BRICK,
838            (nb[2] + BRICK - 1) / BRICK,
839        ];
840        let count = (nb[0] * nb[1] * nb[2]) as usize;
841        let scount = (ns[0] * ns[1] * ns[2]) as usize;
842        let mut bits = vec![0u64; count.div_ceil(64)];
843        let mut super_bits = vec![0u64; scount.div_ceil(64)];
844        for y in 0..vsid_m {
845            for x in 0..vsid_m {
846                let (bx, by) = (x / BRICK, y / BRICK);
847                grid.for_each_run_mip(x as u32, y as u32, mip, |top, bot| {
848                    for bz in (top / BRICK)..=((bot - 1) / BRICK) {
849                        let idx = ((bz * nb[1] + by) * nb[0] + bx) as usize;
850                        bits[idx / 64] |= 1u64 << (idx % 64);
851                        let sidx =
852                            (((bz / BRICK) * ns[1] + by / BRICK) * ns[0] + bx / BRICK) as usize;
853                        super_bits[sidx / 64] |= 1u64 << (sidx % 64);
854                    }
855                });
856            }
857        }
858        Self {
859            nb,
860            bits,
861            ns,
862            super_bits,
863        }
864    }
865
866    /// Whether brick `b` is in range and holds any solid voxel.
867    #[inline]
868    #[allow(clippy::cast_sign_loss)]
869    fn occupied(&self, b: [i32; 3]) -> bool {
870        if b[0] < 0
871            || b[0] >= self.nb[0]
872            || b[1] < 0
873            || b[1] >= self.nb[1]
874            || b[2] < 0
875            || b[2] >= self.nb[2]
876        {
877            return false;
878        }
879        let idx = ((b[2] * self.nb[1] + b[1]) * self.nb[0] + b[0]) as usize;
880        (self.bits[idx / 64] >> (idx % 64)) & 1 != 0
881    }
882
883    /// Whether super-brick `s` is in range and holds any solid voxel.
884    #[inline]
885    #[allow(clippy::cast_sign_loss)]
886    fn occupied_super(&self, s: [i32; 3]) -> bool {
887        if s[0] < 0
888            || s[0] >= self.ns[0]
889            || s[1] < 0
890            || s[1] >= self.ns[1]
891            || s[2] < 0
892            || s[2] >= self.ns[2]
893        {
894            return false;
895        }
896        let idx = ((s[2] * self.ns[1] + s[1]) * self.ns[0] + s[0]) as usize;
897        (self.super_bits[idx / 64] >> (idx % 64)) & 1 != 0
898    }
899}
900
901/// Per-axis 3D-DDA stepping state for a cell size of `cell` voxels.
902/// `t_max[a]` is the ray parameter at which the next `a`-boundary is
903/// crossed; `t_delta[a]` is the parameter increment per cell. An
904/// axis-parallel component gets `t_max = t_delta = +inf` so it's never
905/// chosen as the stepping axis.
906pub(crate) fn dda_setup(
907    origin: [f32; 3],
908    dir: [f32; 3],
909    cell: [i32; 3],
910    cell_size: f32,
911) -> ([i32; 3], [f32; 3], [f32; 3]) {
912    let mut step = [0i32; 3];
913    let mut t_max = [f32::INFINITY; 3];
914    let mut t_delta = [f32::INFINITY; 3];
915    for a in 0..3 {
916        if dir[a] > 1e-9 {
917            step[a] = 1;
918            #[allow(clippy::cast_precision_loss)]
919            let boundary = (cell[a] + 1) as f32 * cell_size;
920            t_max[a] = (boundary - origin[a]) / dir[a];
921            t_delta[a] = cell_size / dir[a];
922        } else if dir[a] < -1e-9 {
923            step[a] = -1;
924            #[allow(clippy::cast_precision_loss)]
925            let boundary = cell[a] as f32 * cell_size;
926            t_max[a] = (boundary - origin[a]) / dir[a];
927            t_delta[a] = -cell_size / dir[a];
928        }
929    }
930    (step, t_max, t_delta)
931}
932
933/// Index of the axis with the smallest `t_max` (the next boundary the
934/// ray crosses).
935#[inline]
936pub(crate) fn min_axis(t_max: [f32; 3]) -> usize {
937    if t_max[0] <= t_max[1] && t_max[0] <= t_max[2] {
938        0
939    } else if t_max[1] <= t_max[2] {
940        1
941    } else {
942        2
943    }
944}
945
946/// Persistent, cross-frame brick occupancy cache (Substage DDA.7
947/// perf). Keyed by `(chunk x, y, z, mip)` with the chunk's edit
948/// `version`; an entry is reused until its chunk's version changes, so a
949/// static / streamed-once world pays **zero** brick-build cost after the
950/// first frame (the per-frame rebuild was the dominant DDA cost).
951///
952/// Owned by the caller across frames (the scene's `Grid`), populated
953/// single-threaded via [`Self::ensure`], then borrowed immutably by the
954/// parallel render bands.
955#[derive(Debug, Default)]
956pub struct BrickCache {
957    maps: HashMap<(i32, i32, i32, u32), (u64, BrickMap)>,
958}
959
960impl BrickCache {
961    #[must_use]
962    pub fn new() -> Self {
963        Self::default()
964    }
965
966    /// Ensure a current mip-`mip` brick map exists for `chunk` (built
967    /// from `view`); rebuilds only when the cached `version` differs.
968    pub fn ensure(&mut self, chunk: [i32; 3], mip: u32, version: u64, view: &GridView<'_>) {
969        let key = (chunk[0], chunk[1], chunk[2], mip);
970        let stale = self.maps.get(&key).map_or(true, |(v, _)| *v != version);
971        if stale {
972            self.maps.insert(key, (version, BrickMap::build(view, mip)));
973        }
974    }
975
976    #[inline]
977    fn get(&self, chunk: [i32; 3], mip: u32) -> Option<&BrickMap> {
978        self.maps
979            .get(&(chunk[0], chunk[1], chunk[2], mip))
980            .map(|(_, m)| m)
981    }
982
983    /// Drop cached entries whose chunk fails `keep` — bounds memory as
984    /// streaming evicts chunks. Called once per frame by the scene.
985    pub fn retain_chunks(&mut self, keep: impl Fn([i32; 3]) -> bool) {
986        self.maps.retain(|k, _| keep([k.0, k.1, k.2]));
987    }
988}
989
990/// Build a throwaway [`BrickCache`] covering every populated chunk of
991/// `grid` at the effective mip — for the sequential [`render_dda`] /
992/// tests, where no persistent cache is threaded in. Returns
993/// `(cache, effective_mip)`.
994#[allow(clippy::cast_possible_wrap)]
995fn local_cache(grid: &GridView<'_>, requested_mip: u32) -> (BrickCache, u32) {
996    let mip = effective_mip(grid, requested_mip);
997    let mut cache = BrickCache::new();
998    if let Some(cg) = grid.chunk_grid {
999        for dz in 0..cg.chunks_z as i32 {
1000            for dy in 0..cg.chunks_y as i32 {
1001                for dx in 0..cg.chunks_x as i32 {
1002                    let slot = ((dz * cg.chunks_y as i32 + dy) * cg.chunks_x as i32 + dx) as usize;
1003                    if let Some(Some(view)) = cg.chunks.get(slot) {
1004                        let ch = [
1005                            cg.origin_chunk_xy[0] + dx,
1006                            cg.origin_chunk_xy[1] + dy,
1007                            cg.origin_chunk_z + dz,
1008                        ];
1009                        cache.ensure(ch, mip, 0, view);
1010                    }
1011                }
1012            }
1013        }
1014    } else {
1015        cache.ensure([0, 0, 0], mip, 0, grid);
1016    }
1017    (cache, mip)
1018}
1019
1020/// Clamp a requested render mip to one every populated chunk actually
1021/// has built — so the uniform-mip traversal never under-samples a chunk
1022/// that lacks the requested level (which would punch holes). `0` short-
1023/// circuits (always available).
1024#[must_use]
1025pub fn effective_mip(grid: &GridView<'_>, requested: u32) -> u32 {
1026    if requested == 0 {
1027        return 0;
1028    }
1029    let mut m = requested;
1030    if let Some(cg) = grid.chunk_grid {
1031        for c in cg.chunks.iter().flatten() {
1032            m = m.min(c.mip_count().saturating_sub(1));
1033        }
1034    } else {
1035        m = m.min(grid.mip_count().saturating_sub(1));
1036    }
1037    m
1038}
1039
1040/// Cross-chunk voxel sampler (Substage DDA.4 / DDA.7).
1041///
1042/// Resolves a grid-local voxel coordinate to the chunk that owns it
1043/// (via [`GridView::chunk_at_xyz`]) and answers the DDA's per-voxel hit
1044/// query — brick-gated [`GridView::surface_color`]. It borrows the
1045/// shared immutable [`BrickMaps`] and caches the **current chunk**
1046/// (`cur_*`: view + brick-map reference): a ray usually stays in one
1047/// chunk for many voxels, so the per-voxel cost is a single index
1048/// compare + an O(1) brick bit test — no hashing, no mutation. Holding
1049/// only shared borrows, a `Sampler` is cheap to spin up per render band.
1050///
1051/// Single-chunk grids are the degenerate case: every voxel maps to
1052/// chunk `[0, 0, 0]` (= the view itself).
1053struct Sampler<'a> {
1054    grid: GridView<'a>,
1055    bricks: &'a BrickCache,
1056    /// Effective render mip (DDA.6). Traversal cells are mip-`mip`
1057    /// cells; sampling reads mip-`mip` data.
1058    mip: u32,
1059    /// Chunk size in mip-`mip` cells is a power of two; store it as
1060    /// `log2` (shift) + `size - 1` (mask) so [`Self::locate`] splits a
1061    /// cell into `(chunk, in-chunk)` with a shift + an `&` per axis
1062    /// instead of a signed `div_euclid` — the dominant per-cell cost.
1063    /// Arithmetic `>>` floors toward -∞ (= `div_euclid` for a positive
1064    /// power-of-two divisor) and `& mask` gives the non-negative
1065    /// remainder (= `rem_euclid`) even for negative cells (two's
1066    /// complement), so results are identical to the division form.
1067    xy_shift: u32,
1068    xy_mask: i32,
1069    z_shift: u32,
1070    z_mask: i32,
1071    cur_ch: [i32; 3],
1072    cur_view: Option<GridView<'a>>,
1073    cur_brick: Option<&'a BrickMap>,
1074    has_cur: bool,
1075}
1076
1077impl<'a> Sampler<'a> {
1078    fn new(grid: GridView<'a>, bricks: &'a BrickCache, mip: u32) -> Self {
1079        let cs_xy = (grid.chunk_size_xy >> mip).max(1);
1080        let cs_z = (crate::grid_view::CHUNK_SIZE_Z >> mip).max(1);
1081        debug_assert!(
1082            cs_xy.is_power_of_two() && cs_z.is_power_of_two(),
1083            "chunk dims must be powers of two for the shift/mask split"
1084        );
1085        #[allow(clippy::cast_possible_wrap)]
1086        Self {
1087            grid,
1088            bricks,
1089            mip,
1090            xy_shift: cs_xy.trailing_zeros(),
1091            xy_mask: cs_xy as i32 - 1,
1092            z_shift: cs_z.trailing_zeros(),
1093            z_mask: cs_z as i32 - 1,
1094            cur_ch: [0; 3],
1095            cur_view: None,
1096            cur_brick: None,
1097            has_cur: false,
1098        }
1099    }
1100
1101    /// Refresh the current-chunk cache (view + brick map) for `ch`.
1102    fn select_chunk(&mut self, ch: [i32; 3]) {
1103        if self.has_cur && self.cur_ch == ch {
1104            return;
1105        }
1106        self.cur_view = self.grid.chunk_at_xyz(ch);
1107        self.cur_brick = self.bricks.get(ch, self.mip);
1108        self.cur_ch = ch;
1109        self.has_cur = true;
1110    }
1111
1112    /// Split a grid-local **mip-`mip` cell** index into `(chunk index,
1113    /// in-chunk mip-cell)` via shift + mask (see field docs). Chunk
1114    /// indices are mip-independent; only the per-chunk resolution
1115    /// shrinks with mip.
1116    #[allow(clippy::cast_sign_loss)]
1117    fn locate(&self, c: [i32; 3]) -> ([i32; 3], [u32; 3]) {
1118        let ch = [
1119            c[0] >> self.xy_shift,
1120            c[1] >> self.xy_shift,
1121            c[2] >> self.z_shift,
1122        ];
1123        let loc = [
1124            (c[0] & self.xy_mask) as u32,
1125            (c[1] & self.xy_mask) as u32,
1126            (c[2] & self.z_mask) as u32,
1127        ];
1128        (ch, loc)
1129    }
1130
1131    /// Hit colour for grid-local mip-cell `c`, or `None` for air / empty
1132    /// chunk / uncoloured bedrock. Brick-gated so air inside a populated
1133    /// chunk costs only a bit test, not a slab walk.
1134    #[allow(clippy::cast_possible_wrap)]
1135    fn hit(&mut self, c: [i32; 3]) -> Option<u32> {
1136        #[cfg(test)]
1137        prof::SURF.with(|x| x.set(x.get() + 1));
1138        let (ch, loc) = self.locate(c);
1139        self.select_chunk(ch);
1140        let occupied = self.cur_brick.is_some_and(|bm| {
1141            bm.occupied([
1142                loc[0] as i32 / BRICK,
1143                loc[1] as i32 / BRICK,
1144                loc[2] as i32 / BRICK,
1145            ])
1146        });
1147        if !occupied {
1148            return None;
1149        }
1150        self.cur_view?
1151            .surface_color_mip(loc[0], loc[1], loc[2], self.mip)
1152    }
1153
1154    /// Chunk size in mip-cells along XY / Z (always a power of two).
1155    #[inline]
1156    fn cells_per_chunk_xy(&self) -> i32 {
1157        1 << self.xy_shift
1158    }
1159    #[inline]
1160    fn cells_per_chunk_z(&self) -> i32 {
1161        1 << self.z_shift
1162    }
1163
1164    /// Whether the brick at brick-index `brick` (in `BRICK`-mip-cell
1165    /// units) holds any solid voxel. Used by the outer brick-DDA to skip
1166    /// empty space `BRICK` cells at a time. Assumes bricks nest within
1167    /// chunks (caller gates on [`Self::cells_per_chunk_xy`]`>= BRICK`).
1168    #[allow(clippy::cast_sign_loss)]
1169    fn brick_occupied(&mut self, brick: [i32; 3]) -> bool {
1170        // First mip-cell of the brick (BRICK = 8 → `<< 3`).
1171        let c0 = [brick[0] << 3, brick[1] << 3, brick[2] << 3];
1172        let ch = [
1173            c0[0] >> self.xy_shift,
1174            c0[1] >> self.xy_shift,
1175            c0[2] >> self.z_shift,
1176        ];
1177        self.select_chunk(ch);
1178        self.cur_brick.is_some_and(|bm| {
1179            bm.occupied([
1180                (c0[0] & self.xy_mask) >> 3,
1181                (c0[1] & self.xy_mask) >> 3,
1182                (c0[2] & self.z_mask) >> 3,
1183            ])
1184        })
1185    }
1186
1187    /// Whether the super-brick at super-index `s` (in `SUPER`-mip-cell
1188    /// units) holds any solid voxel. Outer-most empty-space skip (steps
1189    /// `SUPER` cells). Assumes super-bricks nest in chunks (caller gates
1190    /// on `cells_per_chunk >= SUPER`).
1191    #[allow(clippy::cast_sign_loss)]
1192    fn super_occupied(&mut self, s: [i32; 3]) -> bool {
1193        // First mip-cell of the super-brick (SUPER = 64 → `<< 6`).
1194        let c0 = [s[0] << 6, s[1] << 6, s[2] << 6];
1195        let ch = [
1196            c0[0] >> self.xy_shift,
1197            c0[1] >> self.xy_shift,
1198            c0[2] >> self.z_shift,
1199        ];
1200        self.select_chunk(ch);
1201        self.cur_brick.is_some_and(|bm| {
1202            bm.occupied_super([
1203                (c0[0] & self.xy_mask) >> 6,
1204                (c0[1] & self.xy_mask) >> 6,
1205                (c0[2] & self.z_mask) >> 6,
1206            ])
1207        })
1208    }
1209}
1210
1211/// CPU.2 — safety cap on a shadow ray's voxel steps (the `shadow_max_dist`
1212/// / light-distance bound is the real limit; this only backstops a
1213/// degenerate ray). Mirrors the GPU `shadow_max_steps`.
1214const SHADOW_MAX_STEPS: u32 = 1024;
1215
1216/// CPU.2 — [`ShadowTester`] backed by the render [`Sampler`]: a hard-shadow
1217/// occlusion march over the grid's mip-`mip` voxels. The march reuses the
1218/// same `sampler.hit()` occupancy the primary ray uses (so a shadow ray is
1219/// blocked by the same surfaces the camera sees) and the same `[lo_c, hi_c)`
1220/// voxel-box bounds, stepping a standard 3D-DDA until it hits a solid cell
1221/// (occluded), leaves the box / exceeds `max_t` (lit), or hits the step cap.
1222struct SamplerShadow<'s, 'a> {
1223    sampler: &'s mut Sampler<'a>,
1224    cell_size: f32,
1225    lo_c: [i32; 3],
1226    hi_c: [i32; 3],
1227}
1228
1229impl ShadowTester for SamplerShadow<'_, '_> {
1230    #[allow(clippy::cast_possible_truncation, clippy::cast_precision_loss)]
1231    fn occluded(&mut self, origin: [f32; 3], dir: [f32; 3], max_t: f32) -> bool {
1232        let cs = self.cell_size;
1233        let mut cellc = [
1234            (origin[0] / cs).floor() as i32,
1235            (origin[1] / cs).floor() as i32,
1236            (origin[2] / cs).floor() as i32,
1237        ];
1238        let (step, mut t_max, t_delta) = dda_setup(origin, dir, cellc, cs);
1239        let mut t_curr = 0.0f32;
1240        for _ in 0..SHADOW_MAX_STEPS {
1241            if cellc[0] < self.lo_c[0]
1242                || cellc[0] >= self.hi_c[0]
1243                || cellc[1] < self.lo_c[1]
1244                || cellc[1] >= self.hi_c[1]
1245                || cellc[2] < self.lo_c[2]
1246                || cellc[2] >= self.hi_c[2]
1247            {
1248                return false; // left the voxel box → no occluder ahead
1249            }
1250            if t_curr > max_t {
1251                return false; // past the cap / the light → unshadowed
1252            }
1253            if self.sampler.hit(cellc).is_some() {
1254                return true; // a surface blocks the ray
1255            }
1256            let axis = min_axis(t_max);
1257            t_curr = t_max[axis];
1258            cellc[axis] += step[axis];
1259            t_max[axis] += t_delta[axis];
1260        }
1261        false
1262    }
1263}
1264
1265/// Walk mip-cells along the ray within `[lo_c, hi_c)` and return the
1266/// first solid hit, with leak-free empty-space skipping (DDA.7 redux).
1267///
1268/// **Why one continuous DDA, not nested level-walks.** The previous
1269/// design ran an outer brick/super DDA that *jumped* whole bricks and
1270/// only descended into occupied ones. Stepping a coarse cell at a time
1271/// lets the ray slip diagonally **past an occupied coarse cell it only
1272/// touches at a shared edge/corner** — a leak that showed as bright
1273/// sky seams across thin diagonal walls (the cave-demo report). Here a
1274/// *single* cell-granularity DDA carries the exact `(cellc, t_max)`
1275/// state for the whole ray; it only ever **fast-forwards across an
1276/// empty super-brick / brick**, where skipping cannot miss anything.
1277/// The exit axis lands on the integer box-boundary cell (no float
1278/// re-floor on the critical axis), so the entry cell of the next —
1279/// possibly occupied — box is always visited densely. Result: hits are
1280/// bit-identical to the dense per-cell reference, with the empty-space
1281/// speed-up retained.
1282///
1283/// `cell_size` is the mip-cell edge in mip-0 voxels (`1 << mip`);
1284/// `fwd_dot = dir·forward` → perpendicular depth.
1285#[allow(
1286    clippy::too_many_arguments,
1287    clippy::cast_possible_truncation,
1288    clippy::cast_sign_loss,
1289    clippy::cast_precision_loss
1290)]
1291fn cell_walk_skip(
1292    origin: [f32; 3],
1293    dir: [f32; 3],
1294    fwd_dot: f32,
1295    sampler: &mut Sampler<'_>,
1296    lo_c: [i32; 3],
1297    hi_c: [i32; 3],
1298    cell_size: f32,
1299    t_enter: f32,
1300    t_exit: f32,
1301    max_dist: f32,
1302    env: &DdaEnv<'_>,
1303) -> Option<Hit> {
1304    let has_super = sampler.cells_per_chunk_xy() >= SUPER && sampler.cells_per_chunk_z() >= SUPER;
1305    let has_brick = sampler.cells_per_chunk_xy() >= BRICK && sampler.cells_per_chunk_z() >= BRICK;
1306
1307    let start = t_enter + 1e-4;
1308    let p = [
1309        origin[0] + dir[0] * start,
1310        origin[1] + dir[1] * start,
1311        origin[2] + dir[2] * start,
1312    ];
1313    let mut cellc = [
1314        ((p[0] / cell_size).floor() as i32).clamp(lo_c[0], hi_c[0] - 1),
1315        ((p[1] / cell_size).floor() as i32).clamp(lo_c[1], hi_c[1] - 1),
1316        ((p[2] / cell_size).floor() as i32).clamp(lo_c[2], hi_c[2] - 1),
1317    ];
1318    let (step, mut t_max, t_delta) = dda_setup(origin, dir, cellc, cell_size);
1319    // Reciprocal direction → the per-skip box-boundary t and the t_max
1320    // refresh use multiplies instead of divisions (the dominant skip
1321    // cost). `0.0` where `step == 0` (that axis' t_max stays +∞).
1322    let inv = [
1323        if step[0] != 0 { 1.0 / dir[0] } else { 0.0 },
1324        if step[1] != 0 { 1.0 / dir[1] } else { 0.0 },
1325        if step[2] != 0 { 1.0 / dir[2] } else { 0.0 },
1326    ];
1327    let mut t_curr = t_enter;
1328    let mut last_axis = 3usize;
1329    // World ray length per ray-parameter unit; divided by `cell_size` it turns
1330    // a cell's `t` span into its path length in voxel units (Volumetric weight).
1331    let dir_len = (dir[0] * dir[0] + dir[1] * dir[1] + dir[2] * dir[2]).sqrt();
1332
1333    // TV: front-to-back translucent accumulation. While no translucent voxel
1334    // is hit (`touched` stays false) every return is unchanged — the opaque
1335    // world renders bit-identically. `prev_*` drive per-span compositing (one
1336    // alpha layer per contiguous solid run or material change).
1337    let mut accum = [0.0f32; 3];
1338    let mut trans = 1.0f32;
1339    let mut touched = false;
1340    let mut prev_solid = false;
1341    let mut prev_mat = 0u8;
1342
1343    // Each iteration either advances ≥1 cell (dense) or ≥1 box (skip),
1344    // so the total cell span bounds the loop.
1345    let span = (hi_c[0] - lo_c[0]) + (hi_c[1] - lo_c[1]) + (hi_c[2] - lo_c[2]);
1346    let max_steps = span.max(0) as usize + 16;
1347    for _ in 0..max_steps {
1348        if cellc[0] < lo_c[0]
1349            || cellc[0] >= hi_c[0]
1350            || cellc[1] < lo_c[1]
1351            || cellc[1] >= hi_c[1]
1352            || cellc[2] < lo_c[2]
1353            || cellc[2] >= hi_c[2]
1354        {
1355            return finalize_exit(touched, accum, trans, env, dir, max_dist);
1356        }
1357        let depth = t_curr * fwd_dot;
1358        if depth > max_dist || t_curr > t_exit {
1359            return finalize_exit(touched, accum, trans, env, dir, max_dist);
1360        }
1361        // Fog is fully opaque at `fog_max_dist`: nothing beyond is
1362        // visible, so stop the ray there and return the fog colour
1363        // rather than traversing (and skip/step-counting) to the far box
1364        // wall. Both correct and the dominant perf win for foggy worlds —
1365        // it caps every ray's length at the fog distance.
1366        if env.fog_max_dist > 0.0 && depth >= env.fog_max_dist {
1367            let fog = 0x8000_0000 | (env.fog_color & 0x00ff_ffff);
1368            let color = if touched {
1369                composite_over(accum, trans, fog)
1370            } else {
1371                fog
1372            };
1373            return Some(Hit {
1374                color,
1375                dist: env.fog_max_dist,
1376            });
1377        }
1378
1379        // Empty-space skip: a whole empty super-brick, else an empty
1380        // brick. Skipping only empty boxes can never miss a surface.
1381        let skip_shift = if has_super
1382            && !sampler.super_occupied([cellc[0] >> 6, cellc[1] >> 6, cellc[2] >> 6])
1383        {
1384            Some(6u32)
1385        } else if has_brick
1386            && !sampler.brick_occupied([cellc[0] >> 3, cellc[1] >> 3, cellc[2] >> 3])
1387        {
1388            Some(3u32)
1389        } else {
1390            None
1391        };
1392        if let Some(sh) = skip_shift {
1393            #[cfg(test)]
1394            prof::BRICKS.with(|x| x.set(x.get() + 1));
1395            // Nearest box boundary along the ray (in cell units).
1396            let mut best_t = f32::INFINITY;
1397            let mut best_axis = 3usize;
1398            let mut plane = [0i32; 3];
1399            for a in 0..3 {
1400                if step[a] == 0 {
1401                    continue;
1402                }
1403                let idx = cellc[a] >> sh;
1404                plane[a] = if step[a] > 0 {
1405                    (idx + 1) << sh
1406                } else {
1407                    idx << sh
1408                };
1409                let tb = (plane[a] as f32 * cell_size - origin[a]) * inv[a];
1410                if tb < best_t {
1411                    best_t = tb;
1412                    best_axis = a;
1413                }
1414            }
1415            if best_axis == 3 {
1416                return finalize_exit(touched, accum, trans, env, dir, max_dist);
1417            }
1418            // Land just across the boundary; pin the exit axis to the
1419            // integer boundary cell so float error can't skip the next
1420            // box's entry cell. Other axes haven't crossed their box
1421            // boundary (best_t is the min), so the point's floor is safe.
1422            let pb = [
1423                origin[0] + dir[0] * (best_t + 1e-4),
1424                origin[1] + dir[1] * (best_t + 1e-4),
1425                origin[2] + dir[2] * (best_t + 1e-4),
1426            ];
1427            let mut nc = [
1428                (pb[0] / cell_size).floor() as i32,
1429                (pb[1] / cell_size).floor() as i32,
1430                (pb[2] / cell_size).floor() as i32,
1431            ];
1432            nc[best_axis] = if step[best_axis] > 0 {
1433                plane[best_axis]
1434            } else {
1435                plane[best_axis] - 1
1436            };
1437            // The skip crossed a box boundary; if that takes the ray out
1438            // of the grid box it has exited (sky) — return rather than
1439            // clamping back in-bounds, which would spin at the edge.
1440            if nc[0] < lo_c[0]
1441                || nc[0] >= hi_c[0]
1442                || nc[1] < lo_c[1]
1443                || nc[1] >= hi_c[1]
1444                || nc[2] < lo_c[2]
1445                || nc[2] >= hi_c[2]
1446            {
1447                return finalize_exit(touched, accum, trans, env, dir, max_dist);
1448            }
1449            cellc = nc;
1450            // Refresh t_max for the new cell (dir unchanged → t_delta and
1451            // step constant; axes with step==0 keep their +∞).
1452            for a in 0..3 {
1453                if step[a] > 0 {
1454                    t_max[a] = ((cellc[a] + 1) as f32 * cell_size - origin[a]) * inv[a];
1455                } else if step[a] < 0 {
1456                    t_max[a] = (cellc[a] as f32 * cell_size - origin[a]) * inv[a];
1457                }
1458            }
1459            t_curr = best_t.max(t_curr);
1460            last_axis = best_axis;
1461            prev_solid = false; // skipped empty space → next hit starts a run
1462            continue;
1463        }
1464
1465        // Occupied brick: dense per-cell surface test.
1466        #[cfg(test)]
1467        prof::CELLS.with(|x| x.set(x.get() + 1));
1468        if let Some(color) = sampler.hit(cellc) {
1469            let bright_sub = side_shade_sub(env, last_axis, step);
1470            // CPU.1 — dynamic lighting (flat per voxel) when a rig is active;
1471            // else the baked-byte `shade` path (byte-identical). CPU.2 — a
1472            // sun/point shadow march reuses this same `sampler` (occupancy +
1473            // box bounds); only built when a caster is actually flagged so
1474            // the no-shadow rig stays march-free.
1475            let shaded = if env.lights.enabled {
1476                let casts = env.lights.shadow_strength > 0.0
1477                    && (env.lights.sun_casts_shadow
1478                        || env.lights.points.iter().any(|p| p.casts_shadow));
1479                // Pick the shadow oracle: the scene-wide one (cross-grid +
1480                // sprites, XS.1) when present, else the single-grid Sampler;
1481                // `None` when no caster is flagged, so the rig stays
1482                // march-free. The two testers live in branch-local slots so
1483                // exactly one is borrowed for the `shade_lit_cpu` call.
1484                let mut world_sh;
1485                let mut sampler_sh;
1486                let tester: Option<&mut dyn ShadowTester> = if !casts {
1487                    None
1488                } else if let Some(ctx) = env.world_shadow {
1489                    world_sh = WorldShadow { ctx };
1490                    Some(&mut world_sh)
1491                } else {
1492                    sampler_sh = SamplerShadow {
1493                        sampler: &mut *sampler,
1494                        cell_size,
1495                        lo_c,
1496                        hi_c,
1497                    };
1498                    Some(&mut sampler_sh)
1499                };
1500                shade_lit_cpu(
1501                    color,
1502                    bright_sub,
1503                    last_axis,
1504                    step,
1505                    cellc,
1506                    cell_size,
1507                    &env.lights,
1508                    tester,
1509                )
1510            } else {
1511                shade(color, bright_sub)
1512            };
1513            let lit = apply_fog(shaded, depth.max(0.0), env);
1514            let m = terrain_material(env, color);
1515            if m.is_opaque() {
1516                // Opaque surface: the background. Return the first hit verbatim
1517                // when nothing translucent preceded it (bit-identical), else
1518                // composite the accumulated layers over it.
1519                let color = if touched {
1520                    composite_over(accum, trans, lit)
1521                } else {
1522                    lit
1523                };
1524                return Some(Hit {
1525                    color,
1526                    dist: depth.max(0.0),
1527                });
1528            }
1529            let mat_id = material_for_color(env.terrain_materials, color);
1530            let a = f32::from(m.alpha) / 255.0;
1531            if matches!(m.mode, roxlap_formats::material::BlendMode::Volumetric) {
1532                // Per-cell Beer–Lambert: opacity weighted by the ray's path
1533                // length through this voxel (so a filled volume thickens
1534                // smoothly with depth, a sliver contributes ≈0). Occludes.
1535                let t_exit = t_max[min_axis(t_max)];
1536                let seg_len = (t_exit - t_curr).max(0.0) * dir_len / cell_size;
1537                let eff_a = 1.0 - (1.0 - a).powf(seg_len);
1538                let c = rgb_to_f32(lit);
1539                accum[0] += trans * eff_a * c[0];
1540                accum[1] += trans * eff_a * c[1];
1541                accum[2] += trans * eff_a * c[2];
1542                trans *= 1.0 - eff_a;
1543                touched = true;
1544                prev_mat = mat_id;
1545                if trans < 1.0 / 256.0 {
1546                    return Some(Hit {
1547                        color: f32_to_rgb(accum),
1548                        dist: depth.max(0.0),
1549                    });
1550                }
1551            } else if !prev_solid || mat_id != prev_mat {
1552                // AlphaBlend / Additive: one alpha layer per solid-run entry or
1553                // material change (per-span — avoids the voxel-grid striping
1554                // through a thick glass/water slab; thickness-independent).
1555                let c = rgb_to_f32(lit);
1556                accum[0] += trans * a * c[0];
1557                accum[1] += trans * a * c[1];
1558                accum[2] += trans * a * c[2];
1559                if !matches!(m.mode, roxlap_formats::material::BlendMode::Additive) {
1560                    trans *= 1.0 - a; // AlphaBlend occludes; Additive does not.
1561                }
1562                touched = true;
1563                prev_mat = mat_id;
1564                if trans < 1.0 / 256.0 {
1565                    return Some(Hit {
1566                        color: f32_to_rgb(accum),
1567                        dist: depth.max(0.0),
1568                    });
1569                }
1570            }
1571            prev_solid = true;
1572        } else {
1573            prev_solid = false;
1574        }
1575        let axis = min_axis(t_max);
1576        last_axis = axis;
1577        t_curr = t_max[axis];
1578        cellc[axis] += step[axis];
1579        t_max[axis] += t_delta[axis];
1580    }
1581    None
1582}
1583
1584/// Per-face brightness reduction for the hit face. `axis` is the axis
1585/// the ray crossed to enter the hit voxel (`3` = entry voxel, no face);
1586/// `step[axis]` gives the crossing direction. Maps to the
1587/// `[x-, x+, y-, y+, z-, z+]` `side_shades` entry of the face the ray
1588/// looks at (a `+step` crossing enters through the low / `-` face).
1589#[inline]
1590fn side_shade_sub(env: &DdaEnv<'_>, axis: usize, step: [i32; 3]) -> u32 {
1591    if axis >= 3 {
1592        return 0;
1593    }
1594    let face = axis * 2 + usize::from(step[axis] < 0);
1595    env.side_shades[face].max(0) as u32
1596}
1597
1598/// Cast one ray into the grid and return the first solid hit.
1599///
1600/// **DDA.4:** cross-chunk per-pixel 3D-DDA over the grid's full voxel
1601/// box ([`GridView::voxel_bounds`], spanning every chunk in XY **and**
1602/// Z). The [`Sampler`] resolves each stepped voxel to its chunk and
1603/// brick-gates the slab walk. Cross-chunk look-down (the case the
1604/// voxlap renderer needed the whole virtual-column stack for) falls out
1605/// of the box simply spanning `chunks_z` along Z.
1606fn cast_ray(
1607    origin: [f32; 3],
1608    dir: [f32; 3],
1609    forward: [f32; 3],
1610    sampler: &mut Sampler<'_>,
1611    settings: &OpticastSettings,
1612    env: &DdaEnv<'_>,
1613) -> Option<Hit> {
1614    let (lo_i, hi_i) = sampler.grid.voxel_bounds();
1615    #[allow(clippy::cast_precision_loss)]
1616    let lo_f = [lo_i[0] as f32, lo_i[1] as f32, lo_i[2] as f32];
1617    #[allow(clippy::cast_precision_loss)]
1618    let hi_f = [hi_i[0] as f32, hi_i[1] as f32, hi_i[2] as f32];
1619    let (t_enter, t_exit) = intersect_aabb(origin, dir, lo_f, hi_f)?;
1620    let fwd_dot = dir[0] * forward[0] + dir[1] * forward[1] + dir[2] * forward[2];
1621    #[allow(clippy::cast_precision_loss)]
1622    let max_dist = settings.max_scan_dist.max(1) as f32;
1623    let cell = 1i32 << sampler.mip;
1624    let cell_size = cell as f32;
1625    let lo_c = [
1626        lo_i[0].div_euclid(cell),
1627        lo_i[1].div_euclid(cell),
1628        lo_i[2].div_euclid(cell),
1629    ];
1630    let hi_c = [
1631        hi_i[0].div_euclid(cell),
1632        hi_i[1].div_euclid(cell),
1633        hi_i[2].div_euclid(cell),
1634    ];
1635    cell_walk_skip(
1636        origin, dir, fwd_dot, sampler, lo_c, hi_c, cell_size, t_enter, t_exit, max_dist, env,
1637    )
1638}
1639
1640/// Render one grid into `sink` with per-pixel 3D-DDA.
1641///
1642/// `camera` is the grid-local pose, `settings`
1643/// ([`OpticastSettings`]) carries the projection + viewport (including
1644/// the `y_start..y_end` strip bound), and `grid` is the per-frame
1645/// [`GridView`] borrow. `pitch_pixels` is the framebuffer
1646/// row stride in pixels (matches `ScalarRasterizer::new`'s argument).
1647///
1648/// On a miss, a textured sky ([`DdaEnv::sky`]) is sampled per ray
1649/// direction and written at `+inf` depth; with no textured sky the miss
1650/// writes nothing, so the caller's solid sky pre-fill shows (the
1651/// `render_scene_composed` path pre-fills it).
1652pub fn render_dda(
1653    camera: &Camera,
1654    settings: &OpticastSettings,
1655    grid: GridView<'_>,
1656    pitch_pixels: usize,
1657    env: &DdaEnv<'_>,
1658    mip: u32,
1659    sink: &mut impl PixelSink,
1660) {
1661    let cs = camera_math::derive(
1662        camera,
1663        settings.xres,
1664        settings.yres,
1665        settings.hx,
1666        settings.hy,
1667        settings.hz,
1668    );
1669
1670    // Sequential path builds a throwaway per-call cache (tests / single
1671    // grid). The parallel path takes a persistent cross-frame cache.
1672    let (cache, mip) = local_cache(&grid, mip);
1673    let mut sampler = Sampler::new(grid, &cache, mip);
1674
1675    for py in settings.y_start..settings.y_end {
1676        let row = py as usize * pitch_pixels;
1677        for px in 0..settings.xres {
1678            if let Some((color, dist)) = pixel_result(&cs, settings, &mut sampler, env, px, py) {
1679                sink.put(row + px as usize, color, dist);
1680            }
1681        }
1682    }
1683}
1684
1685/// Resolve one pixel: a shaded + fogged hit colour, a sampled textured
1686/// sky on a miss, or `None` (miss with no textured sky → caller's
1687/// pre-fill stands). Shared by the sequential ([`render_dda`]) and
1688/// parallel ([`render_dda_parallel`]) drivers.
1689#[inline]
1690fn pixel_result(
1691    cs: &CameraState,
1692    settings: &OpticastSettings,
1693    sampler: &mut Sampler<'_>,
1694    env: &DdaEnv<'_>,
1695    px: u32,
1696    py: u32,
1697) -> Option<(u32, f32)> {
1698    let (origin, dir) = pixel_ray(cs, settings, px, py);
1699    if let Some(hit) = cast_ray(origin, dir, cs.forward, sampler, settings, env) {
1700        Some((hit.color, hit.dist))
1701    } else {
1702        env.sky.map(|sky| (sample_sky(sky, dir), f32::INFINITY))
1703    }
1704}
1705
1706/// Tile-parallel [`render_dda`] writing straight into `(fb, zb)`.
1707///
1708/// DDA pixels are independent, so the framebuffer splits into disjoint
1709/// horizontal bands rendered concurrently (rayon) — **bit-identical**
1710/// to the sequential render regardless of thread count, unlike voxlap's
1711/// per-strip discretisation. Each band spins up its own lightweight
1712/// [`Sampler`] over the shared, immutable `cache`.
1713///
1714/// `cache` must already hold current brick maps for every chunk at
1715/// `mip` (populate via [`BrickCache::ensure`]); `mip` is the effective
1716/// render mip ([`effective_mip`]). `(fb, zb)` use the standard
1717/// conventions (`0x80RRGGBB`; z = perp distance, smaller = closer); a
1718/// miss writes nothing unless [`DdaEnv::sky`] is set. `pitch_pixels` is
1719/// the row stride.
1720#[allow(clippy::cast_possible_truncation, clippy::too_many_arguments)]
1721pub fn render_dda_parallel(
1722    camera: &Camera,
1723    settings: &OpticastSettings,
1724    grid: GridView<'_>,
1725    fb: &mut [u32],
1726    zb: &mut [f32],
1727    pitch_pixels: usize,
1728    env: &DdaEnv<'_>,
1729    cache: &BrickCache,
1730    mip: u32,
1731) {
1732    debug_assert_eq!(fb.len(), zb.len());
1733    let (y0, y1) = (settings.y_start, settings.y_end);
1734    if y1 <= y0 {
1735        return;
1736    }
1737    let cs = camera_math::derive(
1738        camera,
1739        settings.xres,
1740        settings.yres,
1741        settings.hx,
1742        settings.hy,
1743        settings.hz,
1744    );
1745    let target = RasterTarget::new(fb, zb);
1746
1747    // Split the y-range into ~one band per worker thread.
1748    let nthreads = rayon::current_num_threads().max(1);
1749    let rows = (y1 - y0) as usize;
1750    let band = rows.div_ceil(nthreads).max(1) as u32;
1751    let bands: Vec<(u32, u32)> = (y0..y1)
1752        .step_by(band as usize)
1753        .map(|s| (s, (s + band).min(y1)))
1754        .collect();
1755
1756    bands.par_iter().for_each(|&(by0, by1)| {
1757        let mut sampler = Sampler::new(grid, cache, mip);
1758        for py in by0..by1 {
1759            let row = py as usize * pitch_pixels;
1760            for px in 0..settings.xres {
1761                if let Some((color, dist)) = pixel_result(&cs, settings, &mut sampler, env, px, py)
1762                {
1763                    let idx = row + px as usize;
1764                    // SAFETY: bands cover disjoint row ranges, so writes
1765                    // never alias across threads; `idx` is in-bounds for
1766                    // a `pitch * height`-sized buffer.
1767                    unsafe {
1768                        target.write_color(idx, color);
1769                        target.write_depth(idx, dist);
1770                    }
1771                }
1772            }
1773        }
1774    });
1775}
1776
1777/// Dense per-voxel reference cast for a **single-chunk** grid: walks
1778/// every voxel of `[0, vsid)² × [0, CHUNK_SIZE_Z)` calling
1779/// [`GridView::surface_color`] directly — no brick gate, no chunk
1780/// resolution. The equivalence oracle the brickmap + sampler
1781/// [`cast_ray`] is checked against in tests.
1782#[cfg(test)]
1783#[allow(clippy::cast_precision_loss, clippy::cast_possible_truncation)]
1784fn cast_ray_reference(
1785    origin: [f32; 3],
1786    dir: [f32; 3],
1787    forward: [f32; 3],
1788    grid: &GridView<'_>,
1789    settings: &OpticastSettings,
1790) -> Option<Hit> {
1791    let nx = grid.vsid as f32;
1792    let nz = f32::from(u16::try_from(crate::grid_view::CHUNK_SIZE_Z).unwrap_or(256));
1793    #[allow(clippy::cast_possible_wrap)]
1794    let n_i = [
1795        grid.vsid as i32,
1796        grid.vsid as i32,
1797        crate::grid_view::CHUNK_SIZE_Z as i32,
1798    ];
1799    let (t_enter, t_exit) = intersect_aabb(origin, dir, [0.0; 3], [nx, nx, nz])?;
1800    let fwd_dot = dir[0] * forward[0] + dir[1] * forward[1] + dir[2] * forward[2];
1801    let max_dist = settings.max_scan_dist.max(1) as f32;
1802
1803    let start = t_enter + 1e-4;
1804    let p = [
1805        origin[0] + dir[0] * start,
1806        origin[1] + dir[1] * start,
1807        origin[2] + dir[2] * start,
1808    ];
1809    let mut voxel = [
1810        (p[0].floor() as i32).clamp(0, n_i[0] - 1),
1811        (p[1].floor() as i32).clamp(0, n_i[1] - 1),
1812        (p[2].floor() as i32).clamp(0, n_i[2] - 1),
1813    ];
1814    let (step, mut t_max, t_delta) = dda_setup(origin, dir, voxel, 1.0);
1815    let mut t_curr = t_enter;
1816    let max_steps = (n_i[0] + n_i[1] + n_i[2]) as usize + 8;
1817    for _ in 0..max_steps {
1818        if voxel[0] < 0
1819            || voxel[0] >= n_i[0]
1820            || voxel[1] < 0
1821            || voxel[1] >= n_i[1]
1822            || voxel[2] < 0
1823            || voxel[2] >= n_i[2]
1824        {
1825            return None;
1826        }
1827        let depth = t_curr * fwd_dot;
1828        if depth > max_dist || t_curr > t_exit {
1829            return None;
1830        }
1831        #[allow(clippy::cast_sign_loss)]
1832        if let Some(color) = grid.surface_color(voxel[0] as u32, voxel[1] as u32, voxel[2] as u32) {
1833            return Some(Hit {
1834                color: shade(color, 0),
1835                dist: depth.max(0.0),
1836            });
1837        }
1838        let axis = min_axis(t_max);
1839        t_curr = t_max[axis];
1840        voxel[axis] += step[axis];
1841        t_max[axis] += t_delta[axis];
1842    }
1843    None
1844}
1845
1846#[cfg(test)]
1847mod tests {
1848    use super::*;
1849
1850    // CPU.1 — luminance of a packed colour's low-24-bit RGB.
1851    fn lum(p: u32) -> u32 {
1852        (p & 0xff) + ((p >> 8) & 0xff) + ((p >> 16) & 0xff)
1853    }
1854
1855    #[test]
1856    fn cel_band_quantizes_and_collapses() {
1857        // Two distinct factors round to the same band at bands=2.
1858        assert_eq!(cel_band(0.8, 2), cel_band(0.9, 2));
1859        assert!((cel_band(0.8, 2) - 1.0).abs() < 1e-6);
1860        // ...but a low factor lands on a different band.
1861        assert_ne!(cel_band(0.3, 2), cel_band(0.8, 2));
1862    }
1863
1864    #[test]
1865    fn shade_lit_cpu_sun_lights_by_facing() {
1866        // Grey voxel (brightness 0x80 = full ambient). Floor top face: hit via
1867        // a +z step (axis 2) ⇒ normal points up (-z).
1868        let color = 0x80_80_80_80;
1869        let step = [0, 0, 1];
1870        let base = CpuLights {
1871            enabled: true,
1872            sun: true,
1873            sun_color: [1.0; 3],
1874            sun_intensity: 1.0,
1875            ambient: [0.2; 3],
1876            ..CpuLights::default()
1877        };
1878        let facing = CpuLights {
1879            sun_dir: [0.0, 0.0, -1.0],
1880            ..base
1881        }; // toward sun = up
1882        let back = CpuLights {
1883            sun_dir: [0.0, 0.0, 1.0],
1884            ..base
1885        }; // sun below the face
1886        let lit = shade_lit_cpu(color, 0, 2, step, [0, 0, 0], 1.0, &facing, None);
1887        let dark = shade_lit_cpu(color, 0, 2, step, [0, 0, 0], 1.0, &back, None);
1888        assert!(
1889            lum(lit) > lum(dark),
1890            "sun facing the surface must brighten it: {lit:#08x} vs {dark:#08x}",
1891        );
1892    }
1893
1894    #[test]
1895    fn shade_dynamic_spot_cone_masks_off_axis() {
1896        // Surface at the origin, up-facing normal (-z, voxlap z-down); a light
1897        // 10 units "above" it (at -z). No ambient/AO ⇒ only the light shows.
1898        let albedo = [0.5, 0.5, 0.5];
1899        let n = [0.0, 0.0, -1.0];
1900        let sample = [0.0, 0.0, 0.0];
1901        let inner = 10.0f32.to_radians().cos();
1902        let outer = 15.0f32.to_radians().cos();
1903        let shade = |spot_dir: [f32; 3], cos_inner: f32, cos_outer: f32| {
1904            let pts = [CpuPointLight {
1905                pos: [0.0, 0.0, -10.0],
1906                color: [1.0; 3],
1907                intensity: 1.0,
1908                radius: 64.0,
1909                casts_shadow: false,
1910                spot_dir,
1911                cos_inner,
1912                cos_outer,
1913            }];
1914            let l = CpuLights {
1915                enabled: true,
1916                ambient: [0.0; 3],
1917                points: &pts,
1918                ..CpuLights::default()
1919            };
1920            shade_dynamic(albedo, 0.0, n, sample, &l, None)
1921        };
1922        // A pure point light (cos_outer = -1) ignores the axis entirely.
1923        let point = shade([0.0, 0.0, 1.0], -1.0, -1.0);
1924        // A spot whose axis shines straight down onto the surface (on-axis).
1925        let on_axis = shade([0.0, 0.0, 1.0], inner, outer);
1926        // Same spot aimed sideways ⇒ the surface is outside the cone.
1927        let off_axis = shade([1.0, 0.0, 0.0], inner, outer);
1928
1929        // On-axis (cd == 1) is fully inside the cone ⇒ identical to a point.
1930        assert_eq!(
1931            on_axis, point,
1932            "on-axis spot must equal the point light: {on_axis:#08x} vs {point:#08x}",
1933        );
1934        // Off-axis is masked to zero ⇒ only the (zero) ambient remains.
1935        assert!(
1936            lum(on_axis) > lum(off_axis),
1937            "off-axis spot must be darker: {on_axis:#08x} vs {off_axis:#08x}",
1938        );
1939        assert_eq!(lum(off_axis), 0, "off-cone spot contributes nothing");
1940    }
1941
1942    #[test]
1943    fn shade_lit_cpu_cel_terraces_sun() {
1944        // Two sun elevations with distinct N·L (0.8 / 0.9) collapse to one
1945        // band at bands=2 ⇒ identical stylized colour; smooth (bands=0) differs.
1946        let color = 0x80_80_80_80;
1947        let step = [0, 0, 1];
1948        let mk = |zc: f32, bands: u32| {
1949            let n = (1.0f32 - zc * zc).sqrt();
1950            CpuLights {
1951                enabled: true,
1952                sun: true,
1953                sun_dir: [n, 0.0, -zc], // ndl on the up face = zc
1954                sun_color: [1.0; 3],
1955                sun_intensity: 1.0,
1956                ambient: [0.1; 3],
1957                bands,
1958                ..CpuLights::default()
1959            }
1960        };
1961        let smooth_a = shade_lit_cpu(color, 0, 2, step, [0, 0, 0], 1.0, &mk(0.8, 0), None);
1962        let smooth_b = shade_lit_cpu(color, 0, 2, step, [0, 0, 0], 1.0, &mk(0.9, 0), None);
1963        assert_ne!(smooth_a, smooth_b, "smooth diffuse must vary with N·L");
1964        let cel_a = shade_lit_cpu(color, 0, 2, step, [0, 0, 0], 1.0, &mk(0.8, 2), None);
1965        let cel_b = shade_lit_cpu(color, 0, 2, step, [0, 0, 0], 1.0, &mk(0.9, 2), None);
1966        assert_eq!(
1967            cel_a, cel_b,
1968            "cel banding must terrace both N·L to one level"
1969        );
1970    }
1971
1972    /// CPU.2 — the shadow application math (independent of the march): an
1973    /// occluded sun-lit sample keeps only `1 - shadow_strength` of the sun
1974    /// key, and `shadow_strength == 0` makes shadows invisible.
1975    #[test]
1976    fn shade_dynamic_sun_shadow_darkens() {
1977        struct Mock(bool);
1978        impl ShadowTester for Mock {
1979            fn occluded(&mut self, _: [f32; 3], _: [f32; 3], _: f32) -> bool {
1980                self.0
1981            }
1982        }
1983        let l = CpuLights {
1984            enabled: true,
1985            sun: true,
1986            sun_dir: [0.0, 0.0, -1.0], // up = toward the sun
1987            sun_color: [1.0; 3],
1988            sun_intensity: 1.0,
1989            sun_casts_shadow: true,
1990            ambient: [0.2; 3],
1991            shadow_strength: 0.7,
1992            shadow_bias: 1.5,
1993            shadow_max_dist: 64.0,
1994            ..CpuLights::default()
1995        };
1996        let albedo = [0.8; 3];
1997        let n = [0.0, 0.0, -1.0]; // up face, faces the sun
1998        let s = [0.5, 0.5, 0.5];
1999        let lit = shade_dynamic(albedo, 1.0, n, s, &l, Some(&mut Mock(false)));
2000        let shadowed = shade_dynamic(albedo, 1.0, n, s, &l, Some(&mut Mock(true)));
2001        assert!(
2002            lum(shadowed) < lum(lit),
2003            "an occluded sun face must darken: shadowed={shadowed:#08x} lit={lit:#08x}",
2004        );
2005        // strength 0 ⇒ no visible shadow even when occluded.
2006        let l0 = CpuLights {
2007            shadow_strength: 0.0,
2008            ..l
2009        };
2010        assert_eq!(
2011            shade_dynamic(albedo, 1.0, n, s, &l0, Some(&mut Mock(true))),
2012            shade_dynamic(albedo, 1.0, n, s, &l0, Some(&mut Mock(false))),
2013            "shadow_strength 0 ⇒ shadows invisible",
2014        );
2015    }
2016
2017    /// CPU.2 — the actual [`SamplerShadow`] march casts a sun shadow through
2018    /// the grid: a wall on a floor, lit by a grazing sun, darkens the floor
2019    /// in the wall's shadow. Total scene luminance with shadows enabled is
2020    /// strictly less than with them off (shadows only ever subtract), and
2021    /// the gap is non-trivial (a real shadow, not FP noise).
2022    #[test]
2023    fn sampler_shadow_march_casts_sun_shadow() {
2024        // Floor at z>=60; a thin wall at x==32 rising from the floor (z 30..60).
2025        let vxl = roxlap_formats::vxl::Vxl::from_dense(64, |x, _y, z| {
2026            if z >= 60 {
2027                Some(0x80_80_80_80) // floor
2028            } else if x == 32 && (30..60).contains(&z) {
2029                Some(0x80_70_70_70) // wall (distinct so it's not a dead branch)
2030            } else {
2031                None
2032            }
2033        });
2034        let grid = GridView::from_single_vxl(&vxl);
2035        // Straight-down camera over the floor (voxlap z-down: forward = +z).
2036        let cam = Camera {
2037            pos: [32.0, 32.0, 6.0],
2038            right: [1.0, 0.0, 0.0],
2039            down: [0.0, 1.0, 0.0],
2040            forward: [0.0, 0.0, 1.0],
2041        };
2042        // Sun grazing from +x and above ⇒ the wall shadows the floor at x<32.
2043        let inv = 1.0f32 / 2.0f32.sqrt();
2044        let base = CpuLights {
2045            enabled: true,
2046            sun: true,
2047            sun_dir: [inv, 0.0, -inv],
2048            sun_color: [1.0; 3],
2049            sun_intensity: 1.0,
2050            ambient: [0.25; 3],
2051            shadow_strength: 0.8,
2052            shadow_bias: 1.5,
2053            shadow_max_dist: 128.0,
2054            ..CpuLights::default()
2055        };
2056        let (w, h) = (96u32, 96u32);
2057        let lit_env = DdaEnv {
2058            lights: CpuLights {
2059                sun_casts_shadow: false,
2060                ..base
2061            },
2062            ..DdaEnv::default()
2063        };
2064        let shadow_env = DdaEnv {
2065            lights: CpuLights {
2066                sun_casts_shadow: true,
2067                ..base
2068            },
2069            ..DdaEnv::default()
2070        };
2071        let (fb_lit, _) = render_brickmap_env(grid, &cam, w, h, &lit_env);
2072        let (fb_sh, _) = render_brickmap_env(grid, &cam, w, h, &shadow_env);
2073        let sum: fn(&[u32]) -> u64 = |fb| fb.iter().map(|&p| u64::from(lum(p))).sum();
2074        let lit_sum = sum(&fb_lit);
2075        let sh_sum = sum(&fb_sh);
2076        assert!(
2077            sh_sum < lit_sum,
2078            "the wall's shadow must darken the floor: shadow_sum={sh_sum} lit_sum={lit_sum}",
2079        );
2080        // Non-trivial: at least a few % of the lit total was removed.
2081        assert!(
2082            (lit_sum - sh_sum) * 50 > lit_sum,
2083            "shadow should remove >2% of total luminance: lit={lit_sum} shadow={sh_sum}",
2084        );
2085    }
2086
2087    /// Recording sink: collects `(idx, color, dist)` puts for tests.
2088    #[derive(Default)]
2089    struct Recorder {
2090        puts: Vec<(usize, u32, f32)>,
2091    }
2092    impl PixelSink for Recorder {
2093        fn put(&mut self, idx: usize, color: u32, dist: f32) {
2094            self.puts.push((idx, color, dist));
2095        }
2096    }
2097
2098    fn oracle_camera() -> Camera {
2099        // Identity-basis camera at origin: ray math is integer-exact.
2100        Camera {
2101            pos: [0.0, 0.0, 0.0],
2102            right: [1.0, 0.0, 0.0],
2103            down: [0.0, 0.0, 1.0],
2104            forward: [0.0, 1.0, 0.0],
2105        }
2106    }
2107
2108    /// Render `grid` from `camera` into a `w × h` framebuffer and
2109    /// return the per-pixel hit mask (`true` where a ray hit a voxel).
2110    fn render_mask(grid: GridView<'_>, camera: &Camera, w: u32, h: u32) -> Vec<bool> {
2111        let n = (w as usize) * (h as usize);
2112        let mut fb = vec![0u32; n]; // sky sentinel = 0
2113        let mut zb = vec![f32::INFINITY; n];
2114        let settings = OpticastSettings::for_oracle_framebuffer(w, h);
2115        {
2116            let mut sink = RasterSink::new(&mut fb, &mut zb);
2117            render_dda(
2118                camera,
2119                &settings,
2120                grid,
2121                w as usize,
2122                &DdaEnv::default(),
2123                0,
2124                &mut sink,
2125            );
2126        }
2127        fb.iter().map(|&c| c != 0).collect()
2128    }
2129
2130    /// A silhouette is "row-convex" if every framebuffer row's hit
2131    /// pixels form a single contiguous run (no interior gap). The
2132    /// voxlap silhouette notch is exactly such an interior gap, so this
2133    /// is the headline DDA.1 acceptance check.
2134    fn rows_have_no_holes(mask: &[bool], w: u32, h: u32) -> bool {
2135        let w = w as usize;
2136        for y in 0..h as usize {
2137            let row = &mask[y * w..(y + 1) * w];
2138            let first = row.iter().position(|&b| b);
2139            let last = row.iter().rposition(|&b| b);
2140            if let (Some(f), Some(l)) = (first, last) {
2141                if row[f..=l].iter().any(|&b| !b) {
2142                    return false;
2143                }
2144            }
2145        }
2146        true
2147    }
2148
2149    /// Same contiguity check down each column.
2150    fn cols_have_no_holes(mask: &[bool], w: u32, h: u32) -> bool {
2151        let w = w as usize;
2152        let h = h as usize;
2153        for x in 0..w {
2154            let col: Vec<bool> = (0..h).map(|y| mask[y * w + x]).collect();
2155            let first = col.iter().position(|&b| b);
2156            let last = col.iter().rposition(|&b| b);
2157            if let (Some(f), Some(l)) = (first, last) {
2158                if col[f..=l].iter().any(|&b| !b) {
2159                    return false;
2160                }
2161            }
2162        }
2163        true
2164    }
2165
2166    /// The principal-point pixel `(hx, hy)` looks straight down the
2167    /// forward axis, scaled by `hz`.
2168    #[test]
2169    fn center_pixel_ray_is_forward() {
2170        let settings = OpticastSettings::for_oracle_framebuffer(640, 480);
2171        let cs = camera_math::derive(&oracle_camera(), 640, 480, 320.0, 240.0, 320.0);
2172        // hx = hy = 320 / 240 → use the exact principal point.
2173        #[allow(clippy::cast_possible_truncation, clippy::cast_sign_loss)]
2174        let (origin, dir) = pixel_ray(&cs, &settings, settings.hx as u32, settings.hy as u32);
2175        assert_eq!(origin, [0.0, 0.0, 0.0]);
2176        // hz·forward = 320·[0,1,0].
2177        assert_eq!(
2178            dir.map(f32::to_bits),
2179            [0.0f32, 320.0, 0.0].map(f32::to_bits)
2180        );
2181    }
2182
2183    /// Pixel `(0, 0)`'s ray equals `camera_math`'s `corn[0]` — proving
2184    /// the DDA renderer samples the same rays the voxlap frustum is
2185    /// built from.
2186    #[test]
2187    fn corner_pixel_ray_matches_camera_corn0() {
2188        let settings = OpticastSettings::for_oracle_framebuffer(640, 480);
2189        let cs = camera_math::derive(&oracle_camera(), 640, 480, 320.0, 240.0, 320.0);
2190        let (_origin, dir) = pixel_ray(&cs, &settings, 0, 0);
2191        assert_eq!(dir.map(f32::to_bits), cs.corn[0].map(f32::to_bits));
2192    }
2193
2194    /// The renderer's independent slab decoder
2195    /// ([`GridView::voxel_color`]) must agree with the reference
2196    /// [`roxlap_formats::vxl::Vxl::voxel_color`] for every cell —
2197    /// including a column with an air gap, which exercises the
2198    /// ceiling-colour-list branch.
2199    #[test]
2200    fn gridview_voxel_color_matches_reference() {
2201        // Two solid runs per column separated by air → ceiling list.
2202        let vxl = roxlap_formats::vxl::Vxl::from_dense(8, |x, _, z| {
2203            let lo = (10..=12).contains(&z);
2204            let hi = (40..=42).contains(&z);
2205            (lo || hi).then_some(0x80_10_20_30 + x)
2206        });
2207        let grid = GridView::from_single_vxl(&vxl);
2208        for x in 0..8 {
2209            for y in 0..8 {
2210                for z in 0..64 {
2211                    assert_eq!(
2212                        grid.voxel_color(x, y, z),
2213                        vxl.voxel_color(x, y, z),
2214                        "mismatch at ({x},{y},{z})"
2215                    );
2216                }
2217            }
2218        }
2219    }
2220
2221    /// An all-air grid produces no hits (every ray misses).
2222    #[test]
2223    fn empty_grid_no_hits() {
2224        let vxl = roxlap_formats::vxl::Vxl::empty(64);
2225        let grid = GridView::from_single_vxl(&vxl);
2226        let settings = OpticastSettings::for_oracle_framebuffer(64, 48);
2227        let mut rec = Recorder::default();
2228        render_dda(
2229            &oracle_camera(),
2230            &settings,
2231            grid,
2232            64,
2233            &DdaEnv::default(),
2234            0,
2235            &mut rec,
2236        );
2237        assert!(rec.puts.is_empty(), "all-air grid must produce no hits");
2238    }
2239
2240    /// Camera above a solid floor, looking straight down: every ray
2241    /// hits, the recovered colour is the floor colour, and the centre
2242    /// pixel's depth ≈ the camera's height above the floor.
2243    #[test]
2244    fn floor_seen_from_above() {
2245        const FLOOR_Z: u32 = 40;
2246        const FLOOR_COL: u32 = 0x80_30_60_90;
2247        let vxl =
2248            roxlap_formats::vxl::Vxl::from_dense(32, |_, _, z| (z >= FLOOR_Z).then_some(FLOOR_COL));
2249        let grid = GridView::from_single_vxl(&vxl);
2250
2251        // Eye above the floor (z is down), looking down (+z).
2252        let cam = Camera {
2253            pos: [16.0, 16.0, 10.0],
2254            right: [1.0, 0.0, 0.0],
2255            down: [0.0, 1.0, 0.0],
2256            forward: [0.0, 0.0, 1.0],
2257        };
2258        let settings = OpticastSettings::for_oracle_framebuffer(48, 48);
2259        let mut rec = Recorder::default();
2260        render_dda(&cam, &settings, grid, 48, &DdaEnv::default(), 0, &mut rec);
2261
2262        assert!(!rec.puts.is_empty(), "floor must be visible");
2263        // Centre pixel looks straight down → depth ≈ FLOOR_Z - eye_z.
2264        let centre = 24usize * 48 + 24;
2265        let hit = rec
2266            .puts
2267            .iter()
2268            .find(|(idx, _, _)| *idx == centre)
2269            .expect("centre ray must hit the floor");
2270        assert_eq!(hit.1 & 0x00ff_ffff, FLOOR_COL & 0x00ff_ffff);
2271        let expected = (FLOOR_Z as f32) - 10.0;
2272        assert!(
2273            (hit.2 - expected).abs() < 1.5,
2274            "centre depth {} not ≈ {}",
2275            hit.2,
2276            expected
2277        );
2278    }
2279
2280    /// DDA.2: a camera looking at the horizon splits the frame into
2281    /// sky (upward rays miss → no write) and floor (downward rays hit).
2282    /// The top of the frame must be mostly sky, the bottom mostly
2283    /// floor.
2284    #[test]
2285    fn horizon_splits_sky_and_floor() {
2286        const FLOOR_Z: u32 = 40;
2287        let vxl = roxlap_formats::vxl::Vxl::from_dense(64, |_, _, z| {
2288            (z >= FLOOR_Z).then_some(0x80_44_66_88)
2289        });
2290        let grid = GridView::from_single_vxl(&vxl);
2291
2292        // At z=30 (above the z=40 floor), looking +y horizontally,
2293        // down = +z. Upward rays (low py) escape through the box top
2294        // (z=0) → sky; downward rays (high py) strike the floor.
2295        let cam = Camera {
2296            pos: [32.0, 4.0, 30.0],
2297            right: [-1.0, 0.0, 0.0],
2298            down: [0.0, 0.0, 1.0],
2299            forward: [0.0, 1.0, 0.0],
2300        };
2301        let (w, h) = (64u32, 64u32);
2302        let mask = render_mask(grid, &cam, w, h);
2303
2304        let count_band = |y0: usize, y1: usize| -> usize {
2305            (y0 * w as usize..y1 * w as usize)
2306                .filter(|&i| mask[i])
2307                .count()
2308        };
2309        let top = count_band(0, h as usize / 4);
2310        let bottom = count_band(3 * h as usize / 4, h as usize);
2311        assert!(mask.iter().any(|&b| b), "floor must be visible");
2312        assert!(mask.iter().any(|&b| !b), "sky must be visible");
2313        assert!(
2314            bottom > top,
2315            "bottom band ({bottom}) should hit more floor than top band ({top})"
2316        );
2317    }
2318
2319    /// Render `grid` from `camera` with the dense reference cast (no
2320    /// brickmap), returning `(colour, depth)` buffers.
2321    fn render_reference(
2322        grid: GridView<'_>,
2323        camera: &Camera,
2324        w: u32,
2325        h: u32,
2326    ) -> (Vec<u32>, Vec<f32>) {
2327        let n = (w as usize) * (h as usize);
2328        let mut fb = vec![0u32; n];
2329        let mut zb = vec![f32::INFINITY; n];
2330        let settings = OpticastSettings::for_oracle_framebuffer(w, h);
2331        let cs = camera_math::derive(camera, w, h, settings.hx, settings.hy, settings.hz);
2332        for py in 0..h {
2333            for px in 0..w {
2334                let (o, d) = pixel_ray(&cs, &settings, px, py);
2335                if let Some(hit) = cast_ray_reference(o, d, cs.forward, &grid, &settings) {
2336                    let i = (py * w + px) as usize;
2337                    fb[i] = hit.color;
2338                    zb[i] = hit.dist;
2339                }
2340            }
2341        }
2342        (fb, zb)
2343    }
2344
2345    /// Render `grid` from `camera` via the production brickmap path.
2346    fn render_brickmap(
2347        grid: GridView<'_>,
2348        camera: &Camera,
2349        w: u32,
2350        h: u32,
2351    ) -> (Vec<u32>, Vec<f32>) {
2352        render_brickmap_env(grid, camera, w, h, &DdaEnv::default())
2353    }
2354
2355    /// As [`render_brickmap`] but with an explicit [`DdaEnv`] (fog /
2356    /// textured sky / side shades).
2357    fn render_brickmap_env(
2358        grid: GridView<'_>,
2359        camera: &Camera,
2360        w: u32,
2361        h: u32,
2362        env: &DdaEnv<'_>,
2363    ) -> (Vec<u32>, Vec<f32>) {
2364        let n = (w as usize) * (h as usize);
2365        let mut fb = vec![0u32; n];
2366        let mut zb = vec![f32::INFINITY; n];
2367        let settings = OpticastSettings::for_oracle_framebuffer(w, h);
2368        {
2369            let mut sink = RasterSink::new(&mut fb, &mut zb);
2370            render_dda(camera, &settings, grid, w as usize, env, 0, &mut sink);
2371        }
2372        (fb, zb)
2373    }
2374
2375    /// Regression for the cave-demo "bright sky seams" report: the
2376    /// empty-space-skip walk must not leak past an occupied box the ray
2377    /// only grazes at a shared edge/corner. A 1-voxel-thick diagonal
2378    /// wall (`x+y==64`, voxels edge-connected) with air on both sides is
2379    /// the canonical case. The production skip walk must hit exactly the
2380    /// same pixels as the dense per-cell reference — zero divergence.
2381    #[test]
2382    fn no_sky_leak_through_diagonal_wall() {
2383        let vxl = roxlap_formats::vxl::Vxl::from_dense(64, |x, y, z| {
2384            ((x + y == 64) && (2..62).contains(&z)).then_some(0x80_40_80_60)
2385        });
2386        let grid = GridView::from_single_vxl(&vxl);
2387        let (w, h) = (160u32, 160u32);
2388        let c = [10.0, 10.0, 32.0];
2389        let poses = [
2390            Camera::from_yaw_pitch(c, 0.785, 0.0),
2391            Camera::from_yaw_pitch(c, 0.6, 0.1),
2392            Camera::from_yaw_pitch(c, 0.95, -0.1),
2393            Camera::from_yaw_pitch(c, 0.785, 0.3),
2394            Camera::from_yaw_pitch(c, 0.5, 0.0),
2395        ];
2396        for (i, cam) in poses.iter().enumerate() {
2397            let (fb_b, _) = render_brickmap(grid, cam, w, h);
2398            let (fb_r, _) = render_reference(grid, cam, w, h);
2399            let leak = (0..(w * h) as usize)
2400                .filter(|&k| (fb_b[k] != 0) != (fb_r[k] != 0))
2401                .count();
2402            assert_eq!(leak, 0, "pose {i}: {leak} px diverge from dense reference");
2403        }
2404    }
2405
2406    /// TV terrain transparency: a glass-coloured voxel slab in front of an
2407    /// opaque floor. With no terrain material map the glass is an opaque first
2408    /// hit; with the map it becomes translucent and the floor tints through.
2409    #[test]
2410    fn terrain_glass_tints_floor_behind() {
2411        let glass = 0x80_40_C0_E0; // cyan
2412        let floor = 0x80_C0_40_40; // red
2413        let vxl = roxlap_formats::vxl::Vxl::from_dense(16, |_, _, z| {
2414            if z == 4 {
2415                Some(glass)
2416            } else if z >= 10 {
2417                Some(floor)
2418            } else {
2419                None
2420            }
2421        });
2422        let grid = GridView::from_single_vxl(&vxl);
2423        // Camera above the grid looking straight down (+z), centred.
2424        let cam = Camera {
2425            pos: [8.0, 8.0, 0.0],
2426            right: [1.0, 0.0, 0.0],
2427            down: [0.0, 1.0, 0.0],
2428            forward: [0.0, 0.0, 1.0],
2429        };
2430        let (w, h) = (32u32, 32u32);
2431        let centre = (h / 2 * w + w / 2) as usize;
2432
2433        // Opaque: the glass voxel stops the ray (no terrain materials).
2434        let (fb_op, _) = render_brickmap(grid, &cam, w, h);
2435        assert_eq!(
2436            fb_op[centre] & 0x00ff_ffff,
2437            0x0040_C0E0,
2438            "opaque glass first-hit"
2439        );
2440
2441        // Translucent: glass colour → material 1 (alpha-blend).
2442        let mut table = MaterialTable::new();
2443        table.set(1, Material::alpha_blend(128));
2444        let env = DdaEnv {
2445            materials: Some(&table),
2446            terrain_materials: &[(glass & 0x00ff_ffff, 1)],
2447            lights: CpuLights::default(),
2448            ..DdaEnv::default()
2449        };
2450        let (fb_tr, _) = render_brickmap_env(grid, &cam, w, h, &env);
2451        assert_ne!(
2452            fb_tr[centre], fb_op[centre],
2453            "glass should composite over the floor, not stay opaque"
2454        );
2455        let r_op = (fb_op[centre] >> 16) & 0xff; // glass red ≈ 0x40
2456        let r_tr = (fb_tr[centre] >> 16) & 0xff; // + floor red bleeds in
2457        assert!(
2458            r_tr > r_op,
2459            "floor red tints through the glass (op={r_op:02x} tr={r_tr:02x})"
2460        );
2461    }
2462
2463    /// TV terrain Volumetric: a **filled** grey smoke volume over a red floor.
2464    /// Beer–Lambert opacity grows with the ray's path length, so a deeper smoke
2465    /// column shows more of its own colour (green channel rises toward the
2466    /// smoke grey) — thickness-dependent, unlike per-span AlphaBlend.
2467    #[test]
2468    fn terrain_volumetric_thickness_deepens_opacity() {
2469        let smoke = 0x80_90_90_90; // grey
2470        let floor = 0x80_C0_20_20; // red (low green)
2471                                   // Centre green channel for a smoke column `depth` voxels deep (filled),
2472                                   // floor at z>=12, camera looking straight down.
2473        let green_at = |depth: u32| -> u32 {
2474            let vxl = roxlap_formats::vxl::Vxl::from_dense(16, |_, _, z| {
2475                if (4..4 + depth).contains(&z) {
2476                    Some(smoke)
2477                } else if z >= 12 {
2478                    Some(floor)
2479                } else {
2480                    None
2481                }
2482            });
2483            let grid = GridView::from_single_vxl(&vxl);
2484            let cam = Camera {
2485                pos: [8.0, 8.0, 0.0],
2486                right: [1.0, 0.0, 0.0],
2487                down: [0.0, 1.0, 0.0],
2488                forward: [0.0, 0.0, 1.0],
2489            };
2490            let (w, h) = (32u32, 32u32);
2491            let mut table = MaterialTable::new();
2492            table.set(1, Material::volumetric(80));
2493            let env = DdaEnv {
2494                materials: Some(&table),
2495                terrain_materials: &[(smoke & 0x00ff_ffff, 1)],
2496                lights: CpuLights::default(),
2497                ..DdaEnv::default()
2498            };
2499            let (fb, _) = render_brickmap_env(grid, &cam, w, h, &env);
2500            (fb[(h / 2 * w + w / 2) as usize] >> 8) & 0xff
2501        };
2502        let shallow = green_at(1);
2503        let deep = green_at(7);
2504        assert!(
2505            deep > shallow,
2506            "deeper Volumetric smoke shows more of its grey (deep g={deep:02x} > shallow g={shallow:02x})"
2507        );
2508    }
2509
2510    /// DDA.5: distance fog blends a hit toward the fog colour. A far
2511    /// floor pixel is closer to the fog colour than a near one.
2512    #[test]
2513    fn distance_fog_blends_toward_fog_color() {
2514        let vxl =
2515            roxlap_formats::vxl::Vxl::from_dense(64, |_, _, z| (z >= 40).then_some(0x80_FF_FF_FF));
2516        let grid = GridView::from_single_vxl(&vxl);
2517        let cam = Camera {
2518            pos: [32.0, 2.0, 38.0],
2519            right: [1.0, 0.0, 0.0],
2520            down: [0.0, 0.0, 1.0],
2521            forward: [0.0, 1.0, 0.0],
2522        };
2523        let env = DdaEnv {
2524            sky: None,
2525            fog_color: 0x00_00_00_00, // black fog → distance darkens
2526            fog_max_dist: 64.0,
2527            side_shades: [0; 6],
2528            materials: None,
2529            terrain_materials: &[],
2530            lights: CpuLights::default(),
2531            world_shadow: None,
2532        };
2533        let (w, h) = (64u32, 64u32);
2534        let (fog, _) = render_brickmap_env(grid, &cam, w, h, &env);
2535        let (nofog, zb) = render_brickmap(grid, &cam, w, h);
2536        let (idx, depth) = zb.iter().enumerate().filter(|(_, z)| z.is_finite()).fold(
2537            (0usize, 0.0f32),
2538            |acc, (i, &z)| {
2539                if z > acc.1 {
2540                    (i, z)
2541                } else {
2542                    acc
2543                }
2544            },
2545        );
2546        assert!(depth > 20.0, "need a deep pixel to test fog (got {depth})");
2547        let lum = |c: u32| (c & 0xff) + ((c >> 8) & 0xff) + ((c >> 16) & 0xff);
2548        assert!(
2549            lum(fog[idx]) < lum(nofog[idx]),
2550            "fogged pixel {:08x} not darker than {:08x}",
2551            fog[idx],
2552            nofog[idx]
2553        );
2554    }
2555
2556    /// DDA.5: with a textured sky, miss pixels are filled from the sky
2557    /// panorama (direction-dependent) instead of left at the pre-fill.
2558    #[test]
2559    fn textured_sky_fills_misses() {
2560        let sky = crate::sky::Sky::blue_gradient();
2561        let vxl = roxlap_formats::vxl::Vxl::empty(32); // all air → all miss
2562        let grid = GridView::from_single_vxl(&vxl);
2563        let env = DdaEnv {
2564            sky: Some(&sky),
2565            fog_color: 0,
2566            fog_max_dist: 0.0,
2567            side_shades: [0; 6],
2568            materials: None,
2569            terrain_materials: &[],
2570            lights: CpuLights::default(),
2571            world_shadow: None,
2572        };
2573        let cam = Camera::from_yaw_pitch([16.0, 16.0, 128.0], 0.3, -0.4);
2574        let (w, h) = (48u32, 48u32);
2575        let (fb, _) = render_brickmap_env(grid, &cam, w, h, &env);
2576        assert!(fb.iter().all(|&c| c >> 24 == 0x80), "all misses sky-filled");
2577        let top = fb[0];
2578        let bottom = fb[(h - 1) as usize * w as usize];
2579        assert_ne!(top, bottom, "sky gradient should vary with elevation");
2580    }
2581
2582    /// Sky elevation orientation matches the GPU `sky_color` (acos(-z)/π):
2583    /// looking **up** (−z) samples panorama column 0 (zenith), looking
2584    /// **down** (+z) samples the last column (nadir). Regression for the
2585    /// CPU up/down inversion.
2586    #[test]
2587    fn sky_elevation_zenith_at_column_zero() {
2588        let mut pixels = vec![0i32; 8];
2589        pixels[0] = 0x0011_1111; // zenith marker
2590        pixels[7] = 0x0099_9999; // nadir marker
2591        let sky = crate::sky::Sky::from_pixels(pixels, 8, 1);
2592        let up = sample_sky(&sky, [0.0, 0.0, -1.0]); // −z is up
2593        let down = sample_sky(&sky, [0.0, 0.0, 1.0]); // +z is down
2594        assert_eq!(
2595            up & 0x00ff_ffff,
2596            0x0011_1111,
2597            "looking up → column 0 (zenith)"
2598        );
2599        assert_eq!(
2600            down & 0x00ff_ffff,
2601            0x0099_9999,
2602            "looking down → last column (nadir)"
2603        );
2604    }
2605
2606    /// `render_sky_fill` paints the panorama for a **gridless** view — the
2607    /// same per-pixel sky sample the miss-ray path uses, with no grid present
2608    /// (the CPU empty-scene background, matching the GPU).
2609    #[test]
2610    fn sky_fill_paints_panorama_gridless() {
2611        let sky = crate::sky::Sky::blue_gradient();
2612        let cam = Camera::from_yaw_pitch([0.0, 0.0, 0.0], 0.3, -0.4);
2613        let (w, h) = (48u32, 48u32);
2614        let cs = crate::camera_math::derive(&cam, w, h, 24.0, 24.0, 24.0);
2615        let settings = crate::opticast::OpticastSettings::for_oracle_framebuffer(w, h);
2616        let mut fb = vec![0u32; (w * h) as usize];
2617        // All-background z-buffer (+∞) → every pixel gets the sky.
2618        let zb = vec![f32::INFINITY; (w * h) as usize];
2619        render_sky_fill(&mut fb, &zb, w as usize, w, h, &cs, &settings, &sky);
2620        assert!(
2621            fb.iter().all(|&c| c >> 24 == 0x80),
2622            "every pixel sky-filled with the brightness byte set"
2623        );
2624        let top = fb[0];
2625        let bottom = fb[(h - 1) as usize * w as usize];
2626        assert_ne!(top, bottom, "sky gradient should vary with elevation");
2627        // A finite-z (terrain) pixel is left untouched.
2628        let mut fb2 = vec![0x1234_5678u32; (w * h) as usize];
2629        let mut zb2 = vec![f32::INFINITY; (w * h) as usize];
2630        zb2[0] = 10.0; // pretend a terrain hit at pixel 0
2631        render_sky_fill(&mut fb2, &zb2, w as usize, w, h, &cs, &settings, &sky);
2632        assert_eq!(fb2[0], 0x1234_5678, "finite-z pixel is not overwritten");
2633    }
2634
2635    /// DDA.5: side shading darkens the hit face by its `side_shades`
2636    /// entry. A top-facing floor (ray crosses +z to enter) gets the
2637    /// `z-` face reduction (index 4).
2638    #[test]
2639    fn side_shades_darken_hit_face() {
2640        let vxl =
2641            roxlap_formats::vxl::Vxl::from_dense(16, |_, _, z| (z >= 8).then_some(0x80_FF_FF_FF));
2642        let grid = GridView::from_single_vxl(&vxl);
2643        let cam = Camera {
2644            pos: [8.0, 8.0, 2.0],
2645            right: [1.0, 0.0, 0.0],
2646            down: [0.0, 1.0, 0.0],
2647            forward: [0.0, 0.0, 1.0],
2648        };
2649        let centre = 16 * 32 + 16;
2650        let (plain, _) = render_brickmap(grid, &cam, 32, 32);
2651        let env = DdaEnv {
2652            sky: None,
2653            fog_color: 0,
2654            fog_max_dist: 0.0,
2655            side_shades: [0, 0, 0, 0, 0x40, 0],
2656            materials: None,
2657            terrain_materials: &[],
2658            lights: CpuLights::default(),
2659            world_shadow: None,
2660        };
2661        let (shaded, _) = render_brickmap_env(grid, &cam, 32, 32, &env);
2662        let lum = |c: u32| (c & 0xff) + ((c >> 8) & 0xff) + ((c >> 16) & 0xff);
2663        assert!(
2664            lum(shaded[centre]) < lum(plain[centre]),
2665            "side-shaded face {:08x} not darker than {:08x}",
2666            shaded[centre],
2667            plain[centre]
2668        );
2669    }
2670
2671    /// The two-level brick-skip cast closely approximates the dense
2672    /// per-voxel reference. The outer brick DDA re-seeds the inner cell
2673    /// walk at each occupied brick, so a few silhouette-boundary pixels
2674    /// jitter by one voxel (different hit cell → different colour/depth)
2675    /// — visually invisible, and the gain is ~`BRICK`× fewer air steps.
2676    /// Assert the divergence is tiny: coverage (hit/sky mask) is nearly
2677    /// identical and only a small fraction of pixels differ. (The
2678    /// thread-invariance guarantee is the separate, exact
2679    /// `parallel_matches_sequential`.)
2680    #[test]
2681    fn brickmap_approximates_dense_reference() {
2682        // Rolling heightmap + a floating block (air above and below).
2683        let vxl = roxlap_formats::vxl::Vxl::from_dense(64, |x, y, z| {
2684            let surf = 30 + ((x / 5 + y / 7) % 11);
2685            let ground = z >= surf;
2686            let block = (20..=24).contains(&z) && (10..20).contains(&x) && (40..50).contains(&y);
2687            (ground || block).then_some(0x80_30_50_70 + (x ^ y) % 0x40)
2688        });
2689        let grid = GridView::from_single_vxl(&vxl);
2690
2691        let (w, h) = (80u32, 80u32);
2692        let poses = [
2693            Camera::orbit(0.6, 0.5, 90.0, [32.0, 32.0, 40.0]),
2694            Camera::orbit(2.1, 0.2, 70.0, [32.0, 32.0, 35.0]),
2695            Camera::orbit(-1.0, 0.9, 120.0, [32.0, 32.0, 45.0]),
2696        ];
2697        let n = (w * h) as usize;
2698        for (i, cam) in poses.iter().enumerate() {
2699            let (fb_b, zb_b) = render_brickmap(grid, cam, w, h);
2700            let (fb_r, _zb_r) = render_reference(grid, cam, w, h);
2701            // Coverage (hit vs sky) must match almost exactly.
2702            let cov_b = fb_b.iter().filter(|&&c| c != 0).count();
2703            let cov_r = fb_r.iter().filter(|&&c| c != 0).count();
2704            assert!(cov_b > 200, "pose {i} rendered ~empty (cov {cov_b})");
2705            let cov_diff = cov_b.abs_diff(cov_r);
2706            assert!(
2707                cov_diff * 100 <= n, // < 1 % of pixels flip hit↔sky
2708                "pose {i} coverage diverged: brick {cov_b} vs dense {cov_r}"
2709            );
2710            // Colour diffs (boundary-voxel jitter) must be a small slice.
2711            let diffs = fb_b.iter().zip(&fb_r).filter(|(a, b)| a != b).count();
2712            assert!(
2713                diffs * 100 <= n * 3, // < 3 % of pixels differ
2714                "pose {i} too many pixel diffs vs dense: {diffs}/{n}"
2715            );
2716            // Depth must be sane (finite where hit), not wildly off.
2717            for k in 0..n {
2718                if fb_b[k] != 0 {
2719                    assert!(zb_b[k].is_finite(), "pose {i} px {k} non-finite depth");
2720                }
2721            }
2722        }
2723    }
2724
2725    /// DDA.5: a voxel's baked brightness byte darkens its colour. A
2726    /// half-bright voxel (`a = 0x40`) renders at roughly half RGB; a
2727    /// full-bright one (`a = 0x80`) is unchanged.
2728    #[test]
2729    fn baked_brightness_darkens_color() {
2730        // Half brightness: alpha 0x40 (64/128). White RGB → ~mid grey.
2731        let dim =
2732            roxlap_formats::vxl::Vxl::from_dense(16, |_, _, z| (z >= 8).then_some(0x40_FF_FF_FF));
2733        let grid = GridView::from_single_vxl(&dim);
2734        let cam = Camera {
2735            pos: [8.0, 8.0, 2.0],
2736            right: [1.0, 0.0, 0.0],
2737            down: [0.0, 1.0, 0.0],
2738            forward: [0.0, 0.0, 1.0],
2739        };
2740        let (fb, _) = render_brickmap(grid, &cam, 32, 32);
2741        let centre = 16 * 32 + 16;
2742        // 0xFF * 64 >> 7 = 127 per channel; alpha normalised to 0x80.
2743        assert_eq!(fb[centre], 0x80_7F_7F_7F, "got {:08x}", fb[centre]);
2744
2745        // Full brightness passes RGB through unchanged.
2746        let full =
2747            roxlap_formats::vxl::Vxl::from_dense(16, |_, _, z| (z >= 8).then_some(0x80_FF_FF_FF));
2748        let gridf = GridView::from_single_vxl(&full);
2749        let (fbf, _) = render_brickmap(gridf, &cam, 32, 32);
2750        assert_eq!(fbf[centre], 0x80_FF_FF_FF, "got {:08x}", fbf[centre]);
2751    }
2752
2753    /// DDA.4 headline gate: cross-chunk look-down. A camera in an
2754    /// all-air upper chunk (chz=0) looking straight down must see the
2755    /// floor in the *lower* stacked chunk (chz=1), through the chunk-Z
2756    /// boundary. This is exactly the case the voxlap renderer needed the
2757    /// whole virtual-column stack (S4B.6.j / VC) for; the DDA gets it
2758    /// for free from the outer box spanning `chunks_z`.
2759    #[test]
2760    fn cross_chunk_lookdown_sees_lower_stacked_floor() {
2761        const FLOOR_LOCAL_Z: u32 = 40;
2762        const FLOOR_COL: u32 = 0x80_22_88_44;
2763        let upper = roxlap_formats::vxl::Vxl::empty(32); // all air + bedrock
2764        let lower = roxlap_formats::vxl::Vxl::from_dense(32, |_, _, z| {
2765            (z >= FLOOR_LOCAL_Z).then_some(FLOOR_COL)
2766        });
2767        let v_up = GridView::from_single_vxl(&upper);
2768        let v_lo = GridView::from_single_vxl(&lower);
2769        // Z-stack: index (dz*chunks_y+dy)*chunks_x+dx → [upper, lower].
2770        let chunks = [Some(v_up), Some(v_lo)];
2771        let cg = crate::ChunkGrid {
2772            chunks: &chunks,
2773            origin_chunk_xy: [0, 0],
2774            origin_chunk_z: 0,
2775            chunks_x: 1,
2776            chunks_y: 1,
2777            chunks_z: 2,
2778        };
2779        let grid = GridView::from_chunk_grid(&cg, 32);
2780
2781        // Camera in the upper chunk (world z=100), looking straight down.
2782        let cam = Camera {
2783            pos: [16.0, 16.0, 100.0],
2784            right: [1.0, 0.0, 0.0],
2785            down: [0.0, 1.0, 0.0],
2786            forward: [0.0, 0.0, 1.0],
2787        };
2788        let (w, h) = (48u32, 48u32);
2789        let (fb, zb) = render_brickmap(grid, &cam, w, h);
2790        let centre = 24 * 48 + 24;
2791        assert!(
2792            fb[centre] & 0x00ff_ffff == FLOOR_COL & 0x00ff_ffff,
2793            "centre ray must reach the lower-chunk floor (got {:08x})",
2794            fb[centre]
2795        );
2796        // Floor world-z = 256 + 40 = 296; camera z = 100 → depth ≈ 196.
2797        let expected = 296.0 - 100.0;
2798        assert!(
2799            (zb[centre] - expected).abs() < 2.0,
2800            "look-down depth {} not ≈ {expected}",
2801            zb[centre]
2802        );
2803    }
2804
2805    /// DDA.4: a floor spanning two side-by-side chunks (chunks_x=2)
2806    /// renders continuously across the chunk-XY seam — hits on both
2807    /// sides, no gap column.
2808    #[test]
2809    fn cross_chunk_xy_floor_is_seamless() {
2810        let mk = || {
2811            roxlap_formats::vxl::Vxl::from_dense(32, |_, _, z| (z >= 20).then_some(0x80_50_50_50))
2812        };
2813        let (c0, c1) = (mk(), mk());
2814        let v0 = GridView::from_single_vxl(&c0);
2815        let v1 = GridView::from_single_vxl(&c1);
2816        let chunks = [Some(v0), Some(v1)];
2817        let cg = crate::ChunkGrid {
2818            chunks: &chunks,
2819            origin_chunk_xy: [0, 0],
2820            origin_chunk_z: 0,
2821            chunks_x: 2,
2822            chunks_y: 1,
2823            chunks_z: 1,
2824        };
2825        let grid = GridView::from_chunk_grid(&cg, 32);
2826
2827        // High above the seam (x=32), looking straight down.
2828        let cam = Camera {
2829            pos: [32.0, 16.0, 4.0],
2830            right: [1.0, 0.0, 0.0],
2831            down: [0.0, 1.0, 0.0],
2832            forward: [0.0, 0.0, 1.0],
2833        };
2834        let (w, h) = (64u32, 64u32);
2835        let mask = render_mask(grid, &cam, w, h);
2836        // Both the left chunk (screen left) and right chunk (screen
2837        // right) must show floor on the centre row.
2838        let row = (h / 2) as usize * w as usize;
2839        let left = (0..w as usize / 2).filter(|&x| mask[row + x]).count();
2840        let right = (w as usize / 2..w as usize)
2841            .filter(|&x| mask[row + x])
2842            .count();
2843        assert!(
2844            left > 5 && right > 5,
2845            "seam not continuous: left={left} right={right}"
2846        );
2847    }
2848
2849    /// Render `grid` from `camera` at render `mip` and return the hit
2850    /// mask.
2851    fn render_mask_mip(grid: GridView<'_>, camera: &Camera, w: u32, h: u32, mip: u32) -> Vec<bool> {
2852        let n = (w as usize) * (h as usize);
2853        let mut fb = vec![0u32; n];
2854        let mut zb = vec![f32::INFINITY; n];
2855        let settings = OpticastSettings::for_oracle_framebuffer(w, h);
2856        {
2857            let mut sink = RasterSink::new(&mut fb, &mut zb);
2858            render_dda(
2859                camera,
2860                &settings,
2861                grid,
2862                w as usize,
2863                &DdaEnv::default(),
2864                mip,
2865                &mut sink,
2866            );
2867        }
2868        fb.iter().map(|&c| c != 0).collect()
2869    }
2870
2871    /// DDA.6: rendering a mip-built grid at a coarse mip stays complete
2872    /// (hole-free silhouette) with roughly the same screen coverage as
2873    /// mip 0 — LOD coarsens detail, it doesn't punch holes or shrink the
2874    /// shape. (DDA has no axis-aligned mip beam — the artifact is
2875    /// structurally impossible with honest per-cell traversal.)
2876    #[test]
2877    fn mip_render_is_coarse_but_complete() {
2878        let mut vxl = roxlap_formats::vxl::Vxl::from_dense(64, |x, y, z| {
2879            let surf = 24 + ((x / 3 + y / 5) % 17);
2880            (z >= surf).then_some(0x80_50_70_90)
2881        });
2882        vxl.generate_mips(4);
2883        assert!(vxl.mip_count() >= 3, "need mips built for this test");
2884        let grid = GridView::from_single_vxl(&vxl);
2885        let (w, h) = (96u32, 96u32);
2886        let cam = Camera::orbit(0.7, 0.6, 110.0, [32.0, 32.0, 36.0]);
2887
2888        let m0 = render_mask_mip(grid, &cam, w, h, 0);
2889        let m2 = render_mask_mip(grid, &cam, w, h, 2);
2890
2891        let c0 = m0.iter().filter(|&&b| b).count();
2892        let c2 = m2.iter().filter(|&&b| b).count();
2893        assert!(c0 > 200 && c2 > 200, "both mips visible (c0={c0} c2={c2})");
2894        // Coverage within ~30 % — a coarse-mip silhouette closely tracks
2895        // the fine one (LOD coarsens detail, it doesn't lose the shape).
2896        // (Terrain silhouettes are non-convex — sky shows through
2897        // valleys — so a hole-free invariant doesn't apply here; that's
2898        // the convex single-voxel test's job.)
2899        let ratio = c2 as f32 / c0 as f32;
2900        assert!(
2901            (0.7..1.4).contains(&ratio),
2902            "mip-2 coverage {c2} vs mip-0 {c0} (ratio {ratio:.2}) diverged"
2903        );
2904    }
2905
2906    /// Headless perf bench (run: `cargo test -p roxlap-core --release
2907    /// dda::tests::bench_terrain -- --ignored --nocapture`). Single-
2908    /// thread `render_dda` over a hilly chunk at a horizon pose; prints
2909    /// ms/frame + per-frame traversal counters (cells / bricks /
2910    /// surface_color calls) to locate the bottleneck.
2911    #[test]
2912    #[ignore = "perf benchmark — run explicitly with --ignored"]
2913    fn bench_terrain() {
2914        use std::time::Instant;
2915        // Multi-chunk grid like the demo: NC×NC chunks of 128, hills.
2916        const NC: i32 = 6;
2917        let cs = crate::grid_view::CHUNK_SIZE_Z; // 256, but vsid is 128
2918        let _ = cs;
2919        let mut vxls: Vec<roxlap_formats::vxl::Vxl> = Vec::new();
2920        for cy in 0..NC {
2921            for cx in 0..NC {
2922                let (ox, oy) = (cx * 128, cy * 128);
2923                let mut v = roxlap_formats::vxl::Vxl::from_dense(128, |x, y, z| {
2924                    let (gx, gy) = (ox + x as i32, oy + y as i32);
2925                    let surf = 90 + ((gx / 7 + gy / 9).rem_euclid(40)) + ((gx / 23).rem_euclid(20));
2926                    (z as i32 >= surf).then_some(0x80_50_70_90 + (x ^ y) % 0x30)
2927                });
2928                v.generate_mips(4);
2929                vxls.push(v);
2930            }
2931        }
2932        let views: Vec<Option<GridView>> = vxls
2933            .iter()
2934            .map(|v| Some(GridView::from_single_vxl(v)))
2935            .collect();
2936        let cg = crate::ChunkGrid {
2937            chunks: &views,
2938            origin_chunk_xy: [0, 0],
2939            origin_chunk_z: 0,
2940            chunks_x: NC as u32,
2941            chunks_y: NC as u32,
2942            chunks_z: 1,
2943        };
2944        let grid = GridView::from_chunk_grid(&cg, 128);
2945
2946        let (w, h) = (960u32, 600u32);
2947        let mut settings = OpticastSettings::for_oracle_framebuffer(w, h);
2948        settings.max_scan_dist = 512;
2949        let n = (w * h) as usize;
2950        let mut fb = vec![0u32; n];
2951        let mut zb = vec![f32::INFINITY; n];
2952        let centre = [f64::from(NC * 128) / 2.0, f64::from(NC * 128) / 2.0, 60.0];
2953
2954        // Two poses: eye-level toward horizon (long rays) + looking down
2955        // at nearby terrain (short rays, demo-typical).
2956        let poses = [
2957            (
2958                "horizon",
2959                Camera::from_yaw_pitch([20.0, 20.0, 40.0], 0.6, 0.15),
2960            ),
2961            ("down", Camera::orbit(0.7, 1.0, 130.0, centre)),
2962        ];
2963        for (name, cam) in poses {
2964            {
2965                let mut sink = RasterSink::new(&mut fb, &mut zb);
2966                prof::reset();
2967                render_dda(
2968                    &cam,
2969                    &settings,
2970                    grid,
2971                    w as usize,
2972                    &DdaEnv::default(),
2973                    0,
2974                    &mut sink,
2975                );
2976            }
2977            let (cells, bricks, surf) = prof::read();
2978            let iters = 6;
2979            let t0 = Instant::now();
2980            for _ in 0..iters {
2981                let mut sink = RasterSink::new(&mut fb, &mut zb);
2982                render_dda(
2983                    &cam,
2984                    &settings,
2985                    grid,
2986                    w as usize,
2987                    &DdaEnv::default(),
2988                    0,
2989                    &mut sink,
2990                );
2991            }
2992            let ms = t0.elapsed().as_secs_f64() * 1000.0 / f64::from(iters);
2993            let hits = fb.iter().filter(|&&c| c != 0).count();
2994            eprintln!(
2995                "[{name}] {w}x{h} 1-thread: {ms:.1} ms | hits={hits}/{n} | per-px: cells={:.1} bricks={:.1} surf={:.1}",
2996                cells as f64 / n as f64,
2997                bricks as f64 / n as f64,
2998                surf as f64 / n as f64,
2999            );
3000        }
3001    }
3002
3003    /// DDA.7: the tile-parallel driver is bit-identical to the
3004    /// sequential one — DDA pixels are independent, so banding can't
3005    /// change a pixel.
3006    #[test]
3007    fn parallel_matches_sequential() {
3008        let vxl = roxlap_formats::vxl::Vxl::from_dense(64, |x, y, z| {
3009            let surf = 28 + ((x / 4 + y / 6) % 13);
3010            (z >= surf).then_some(0x80_40_60_80 + (x ^ y) % 0x30)
3011        });
3012        let grid = GridView::from_single_vxl(&vxl);
3013        let (w, h) = (96u32, 96u32);
3014        let cam = Camera::orbit(0.8, 0.55, 100.0, [32.0, 32.0, 40.0]);
3015        let env = DdaEnv {
3016            sky: None,
3017            fog_color: 0x00_20_30_40,
3018            fog_max_dist: 120.0,
3019            side_shades: [0, 0, 0, 0, 0x30, 0x10],
3020            materials: None,
3021            terrain_materials: &[],
3022            lights: CpuLights::default(),
3023            world_shadow: None,
3024        };
3025
3026        let (seq_fb, seq_zb) = render_brickmap_env(grid, &cam, w, h, &env);
3027
3028        let n = (w * h) as usize;
3029        let mut par_fb = vec![0u32; n];
3030        let mut par_zb = vec![f32::INFINITY; n];
3031        let settings = OpticastSettings::for_oracle_framebuffer(w, h);
3032        let (cache, mip) = local_cache(&grid, 0);
3033        render_dda_parallel(
3034            &cam,
3035            &settings,
3036            grid,
3037            &mut par_fb,
3038            &mut par_zb,
3039            w as usize,
3040            &env,
3041            &cache,
3042            mip,
3043        );
3044        assert!(par_fb == seq_fb, "parallel colour differs from sequential");
3045        assert!(
3046            par_zb
3047                .iter()
3048                .zip(&seq_zb)
3049                .all(|(a, b)| a.to_bits() == b.to_bits()),
3050            "parallel depth differs from sequential"
3051        );
3052    }
3053
3054    /// DDA.2 correctness: a heightmap column's interior is solid even
3055    /// though voxlap only stores a colour for its surface. `voxel_color`
3056    /// returns `None` for an interior voxel, but `surface_color` must
3057    /// return the run's surface colour — otherwise oblique rays striking
3058    /// a cliff *side* would pass straight through (see-through terrain).
3059    #[test]
3060    fn cliff_side_is_solid_not_see_through() {
3061        const TOP_Z: u32 = 50;
3062        const COL: u32 = 0x80_77_88_99;
3063        let vxl = roxlap_formats::vxl::Vxl::from_dense(8, |_, _, z| (z >= TOP_Z).then_some(COL));
3064        let grid = GridView::from_single_vxl(&vxl);
3065
3066        // Surface voxel: coloured directly.
3067        assert_eq!(grid.voxel_color(4, 4, TOP_Z), Some(COL));
3068        // Interior voxel: voxlap stores no colour …
3069        assert_eq!(grid.voxel_color(4, 4, 150), None);
3070        // … but it is solid, and surface_color bleeds the run-top colour
3071        // down the cliff face → a real hit, not see-through.
3072        assert_eq!(grid.surface_color(4, 4, 150), Some(COL));
3073        // Bedrock-style air above the surface stays air.
3074        assert_eq!(grid.surface_color(4, 4, 10), None);
3075    }
3076
3077    /// DDA.2: a camera embedded in solid material hits its own voxel
3078    /// immediately — every ray reports a hit (no skip / no garbage).
3079    #[test]
3080    fn camera_inside_solid_hits_everywhere() {
3081        let vxl = roxlap_formats::vxl::Vxl::from_dense(16, |_, _, _| Some(0x80_55_55_55));
3082        let grid = GridView::from_single_vxl(&vxl);
3083        let cam = Camera {
3084            pos: [8.0, 8.0, 128.0],
3085            right: [1.0, 0.0, 0.0],
3086            down: [0.0, 1.0, 0.0],
3087            forward: [0.0, 0.0, 1.0],
3088        };
3089        let (w, h) = (32u32, 32u32);
3090        let mask = render_mask(grid, &cam, w, h);
3091        assert!(
3092            mask.iter().all(|&b| b),
3093            "every ray must hit when the camera is inside solid"
3094        );
3095    }
3096
3097    /// Headline DDA.1 gate: a single solid voxel viewed obliquely
3098    /// projects to a convex silhouette with **no interior holes** —
3099    /// the artifact class (`tiny_grid_1x1x1` silhouette notch) the
3100    /// voxlap renderer cannot avoid. DDA casts independent per-pixel
3101    /// rays, so the silhouette is hole-free by construction.
3102    #[test]
3103    fn single_voxel_silhouette_has_no_notch() {
3104        const C: u32 = 0x80_FF_80_40;
3105        let vxl = roxlap_formats::vxl::Vxl::from_dense(16, |x, y, z| {
3106            (x == 8 && y == 8 && z == 8).then_some(C)
3107        });
3108        let grid = GridView::from_single_vxl(&vxl);
3109
3110        // Orbit the voxel centre obliquely so all three faces show and
3111        // the silhouette is a sizeable hexagon (dist 4 → ~12 px wide).
3112        let cam = Camera::orbit(0.7, 0.6, 4.0, [8.5, 8.5, 8.5]);
3113        let (w, h) = (96u32, 96u32);
3114        let mask = render_mask(grid, &cam, w, h);
3115
3116        let hits = mask.iter().filter(|&&b| b).count();
3117        assert!(
3118            hits > 30,
3119            "silhouette too small to be meaningful: {hits} px"
3120        );
3121        assert!(
3122            rows_have_no_holes(&mask, w, h),
3123            "row-interior gap in single-voxel silhouette (notch)"
3124        );
3125        assert!(
3126            cols_have_no_holes(&mask, w, h),
3127            "column-interior gap in single-voxel silhouette (notch)"
3128        );
3129    }
3130}