Skip to main content

roxlap_core/
opticast.rs

1//! Per-frame orchestrator — wires the R4.1 builders into a single
2//! `opticast` entry point.
3//!
4//! Port of the top-of-`opticast` execution order in
5//! `voxlap5.c:opticast` (lines 2284..end-of-function), minus the
6//! globals voxlap mutates inline:
7//!
8//! 1. `camera_math::derive` → per-frame f32 basis.
9//! 2. `opticast_prelude::derive_prelude` → integer / fixed-point cache.
10//! 3. `column_walk::camera_column_air_gap` → early-out if the camera
11//!    is inside solid voxel material.
12//! 4. `projection::derive_projection` → cx / cy / corner-cut quad.
13//! 5. `ray_step::derive_ray_step` → per-pixel ray-step coefficients.
14//! 6. Four-quadrant scan dispatch (top, right, bottom, left).
15//!
16//! [`OpticastSettings`] bundles the constants the four-quadrant scan
17//! loops need (xres / yres / projection params / mip + scan-dist
18//! controls) so the orchestrator's signature stays compact.
19
20use rayon::prelude::*;
21
22use crate::camera_math;
23use crate::camera_math::CameraState;
24use crate::column_walk;
25use crate::opticast_prelude;
26use crate::opticast_prelude::OpticastPrelude;
27use crate::projection;
28use crate::rasterizer::{Rasterizer, ScratchPool};
29use crate::ray_step;
30use crate::scan_loops::{
31    bottom_quadrant, left_quadrant, right_quadrant, top_quadrant, ScanContext,
32};
33use crate::Camera;
34
35/// Per-frame settings the orchestrator forwards to the builders. Most
36/// fields map 1:1 onto a voxlap global (`vx5.anginc`, `vx5.mipscandist`,
37/// `vx5.maxscandist`) or a `setcamera` argument (`dahx` / `dahy` /
38/// `dahz`). `mip_levels` is voxlap's `gmipnum` — `1` for the oracle
39/// scene.
40///
41/// `y_start..y_end` is the strip-render iteration bound (R12.3).
42/// Default is the full framebuffer (`0..yres`), giving pre-R12.3
43/// full-frame opticast behaviour bit-exactly. Tile / strip callers
44/// set a sub-range to render only that horizontal strip — pass-1
45/// gline ray casts and pass-2 hrend / vrend writes both stay
46/// inside the strip's y-range. The camera projection center stays
47/// in absolute screen coords; only the viewport edges shrink.
48#[derive(Debug, Clone, Copy)]
49pub struct OpticastSettings {
50    pub xres: u32,
51    pub yres: u32,
52    /// First y-row this opticast call renders (inclusive). `0` for
53    /// full-frame.
54    pub y_start: u32,
55    /// One past the last y-row (exclusive). `yres` for full-frame.
56    pub y_end: u32,
57    pub hx: f32,
58    pub hy: f32,
59    pub hz: f32,
60    pub anginc: i32,
61    pub mip_levels: u32,
62    pub mip_scan_dist: i32,
63    pub max_scan_dist: i32,
64}
65
66impl OpticastSettings {
67    /// Default settings for a `width × height` framebuffer with the
68    /// voxlap-oracle convention `(hx, hy, hz) = (w/2, h/2, w/2)` and
69    /// `anginc = 1`, matching `tests/oracle/oracle.c`. Renders the
70    /// full frame (`y_start = 0, y_end = height`).
71    //
72    // `width` / `height` cast to f32 is bounded by realistic screen
73    // sizes (≤ 16M, well within f32's 24-bit mantissa).
74    #[allow(clippy::cast_precision_loss)]
75    #[must_use]
76    pub fn for_oracle_framebuffer(width: u32, height: u32) -> Self {
77        let half_w = (width as f32) * 0.5;
78        let half_h = (height as f32) * 0.5;
79        Self {
80            xres: width,
81            yres: height,
82            y_start: 0,
83            y_end: height,
84            hx: half_w,
85            hy: half_h,
86            hz: half_w,
87            anginc: 1,
88            mip_levels: 1,
89            mip_scan_dist: 4,
90            max_scan_dist: 1024,
91        }
92    }
93
94    /// Restrict this settings struct to the `[y_start, y_end)`
95    /// horizontal strip. Used by the per-strip parallel dispatch
96    /// (R12.3.1) — each strip clones the base settings and clamps
97    /// the y-range. Caller is responsible for ensuring `y_start <
98    /// y_end <= yres`.
99    #[must_use]
100    pub fn with_y_range(mut self, y_start: u32, y_end: u32) -> Self {
101        self.y_start = y_start;
102        self.y_end = y_end;
103        self
104    }
105}
106
107/// Outcome of one [`opticast`] call.
108#[derive(Debug, Clone, Copy, PartialEq, Eq)]
109pub enum OpticastOutcome {
110    /// All four quadrants dispatched (some or all may have early-
111    /// outed on their own geometry guards — that is normal).
112    Rendered,
113    /// Camera position lies in solid voxel material. Voxlap returns
114    /// from `opticast` early in this case (no render, screen retains
115    /// previous contents — the host can pre-fill with sky).
116    SkippedCameraInSolid,
117}
118
119/// Drive one frame of opticast. The caller supplies:
120/// - `camera`: pose to render from.
121/// - `settings`: framebuffer + projection + scan-dist constants.
122/// - `vsid`: world dimension (square map).
123/// - `slab_buf` + `column_offsets`: world-level voxel data —
124///   `slab_buf` is the flat byte buffer holding all columns'
125///   slab lists concatenated; `column_offsets[i]` is the byte
126///   offset where column `i`'s slabs start.
127///   `column_offsets.len()` must equal `vsid * vsid + 1` (the
128///   final entry is `slab_buf.len()`, so column slices are
129///   `slab_buf[column_offsets[i]..column_offsets[i + 1]]`).
130///
131/// Whatever real or stub [`Rasterizer`] is plugged in receives the
132/// `gline` / `hrend` / `vrend` calls the four-quadrant scan loops
133/// produce; the [`ScratchPool`]'s slots accumulate the radar /
134/// angstart / lastx / uurend buffers between those calls.
135///
136/// Threading dial lives on the pool:
137/// - `pool.n_threads() == 1` → sequential. The four quadrants run
138///   on the calling thread against `pool.slot_mut(0)`. Pre-R12
139///   shape; the byte-stable golden baseline.
140/// - `pool.n_threads() >= 2` → R12.3.1 per-strip parallel. The
141///   framebuffer's y-range splits into N horizontal strips of
142///   `~yres/N` rows each. Each strip runs its own opticast pass
143///   (4 quadrants) against its own slot from
144///   `pool.slots_mut_slice()`, with [`OpticastSettings::y_start`] /
145///   `y_end` clipped to the strip. Strips run via
146///   `rayon::par_iter_mut`, each with a cloned rasterizer (raw
147///   fb / zb pointers shared, strip-disjoint row writes).
148///
149/// **Byte-stability caveat** (R12.3.1): per-strip rendering produces
150/// different pixel hashes than single-strip. Voxlap's screen-line
151/// interpolation in `gline` parameterises rays by viewport-y bounds
152/// (via the corner-cut quad's grd / dxy); strips have narrower
153/// y-bounds, so the per-strip ray fan discretises slightly
154/// differently. The image is geometrically valid — each pixel still
155/// samples a camera-correct ray — but the 1/N strip discretisation
156/// drifts by a fraction of a voxel from the full-frame
157/// discretisation. For CI, oracle goldens are frozen at
158/// `--threads 1` (single strip = full frame, byte-stable).
159///
160/// `R: Clone + Send + Sync` is required by the parallel branch even
161/// when it doesn't fire — keeping the bound consistent across both
162/// paths means the generic body monomorphizes once. The `Sync` bound
163/// shows up because `rayon::par_iter_mut`'s closure shares `&R` (the
164/// strip-cloning template) across worker threads. Test rasterizers
165/// (`Counts`, `RecordingRasterizer`) derive Clone + auto-Send/Sync
166/// so they satisfy the bound at no runtime cost.
167//
168// Sign convention: voxlap's opticast forwards everything as-is from
169// the static state; here it's all explicit parameters. The clippy
170// arg-count lint is allowed because each parameter pulls its weight
171// (a struct-of-args variant just renames the same data). The
172// xres / yres → i32 casts are bounded by realistic framebuffer
173// dimensions and won't wrap.
174#[allow(clippy::too_many_arguments, clippy::cast_possible_wrap)]
175#[must_use]
176pub fn opticast<R: Rasterizer + Clone + Send + Sync>(
177    rasterizer: &mut R,
178    pool: &mut ScratchPool,
179    camera: &Camera,
180    settings: &OpticastSettings,
181    vsid: u32,
182    slab_buf: &[u8],
183    column_offsets: &[u32],
184) -> OpticastOutcome {
185    let cs = camera_math::derive(
186        camera,
187        settings.xres,
188        settings.yres,
189        settings.hx,
190        settings.hy,
191        settings.hz,
192    );
193
194    let prelude = opticast_prelude::derive_prelude(
195        &cs,
196        vsid,
197        settings.mip_levels,
198        settings.mip_scan_dist,
199        settings.max_scan_dist,
200    );
201
202    // gstartv walk — early-out if the camera is inside solid voxel
203    // material. Slice `slab_buf` at the camera column's range
204    // (computed by the prelude as `column_index = li_pos.y * vsid +
205    // li_pos.x`); a malformed or out-of-bounds offset table is
206    // treated as "camera in solid" so we early-out cleanly.
207    let camera_column = camera_column_slice(slab_buf, column_offsets, prelude.column_index);
208    let Some(camera_column_data) = camera_column else {
209        return OpticastOutcome::SkippedCameraInSolid;
210    };
211    let Some((gstartz0, gstartz1, camera_vptr_offset)) =
212        column_walk::camera_column_air_gap(camera_column_data, prelude.li_pos[2])
213    else {
214        return OpticastOutcome::SkippedCameraInSolid;
215    };
216
217    // Per-frame setup hook needs a `ScanContext` with cy / camera
218    // state populated; build a "setup-only" projection over the
219    // FULL frame y-range so frame_setup sees the same projection
220    // center the strips inherit.
221    let setup_proj = projection::derive_projection_with_y_range(
222        &cs,
223        settings.xres,
224        settings.yres,
225        settings.y_start,
226        settings.y_end,
227        settings.hx,
228        settings.hy,
229        settings.hz,
230        settings.anginc,
231    );
232    let setup_rs = ray_step::derive_ray_step(&cs, setup_proj.cx, setup_proj.cy, settings.hz);
233    let setup_ctx = ScanContext {
234        proj: &setup_proj,
235        rs: &setup_rs,
236        prelude: &prelude,
237        xres: settings.xres as i32,
238        y_start: settings.y_start as i32,
239        y_end: settings.y_end as i32,
240        anginc: settings.anginc,
241        camera_state: &cs,
242        camera_gstartz0: gstartz0,
243        camera_gstartz1: gstartz1,
244        camera_vptr_offset,
245    };
246
247    // Per-frame setup hook — concrete rasterizers (R4.2) cache the
248    // bits of CameraState / RayStep / OpticastPrelude they need for
249    // the per-pixel math. Runs on the calling thread before any
250    // parallel fan-out so subsequent clones inherit the populated
251    // FrameCache. Stub rasterizers ignore via the trait's default
252    // no-op.
253    rasterizer.frame_setup(&setup_ctx);
254
255    let n_strips = pool.n_threads();
256    if n_strips <= 1 {
257        // Sequential — slot 0, full settings. Byte-stable golden
258        // baseline.
259        let scratch = pool.slot_mut(0);
260        top_quadrant(rasterizer, scratch, &setup_ctx);
261        right_quadrant(rasterizer, scratch, &setup_ctx);
262        bottom_quadrant(rasterizer, scratch, &setup_ctx);
263        left_quadrant(rasterizer, scratch, &setup_ctx);
264    } else {
265        // Per-strip parallel (R12.3.1). Slice the y-range into N
266        // strips of `~strip_height` rows each. Each strip runs its
267        // own opticast against its own slot. See
268        // `run_strip_parallel` for the per-strip body.
269        run_strip_parallel(
270            rasterizer,
271            pool,
272            settings,
273            &cs,
274            &prelude,
275            gstartz0,
276            gstartz1,
277            camera_vptr_offset,
278        );
279    }
280
281    OpticastOutcome::Rendered
282}
283
284/// Per-strip parallel body. Splits `[settings.y_start, settings.y_end)`
285/// into `pool.n_threads()` contiguous row strips and runs one
286/// opticast pass per strip via `rayon::par_iter_mut`. Each strip:
287///
288/// * clones `rasterizer` (raw fb / zb pointers in the
289///   [`crate::scalar_rasterizer::RasterTarget`] are `Copy`; the
290///   strip-disjoint row writes make the aliasing safe);
291/// * gets exclusive `&mut ScanScratch` access to one pool slot via
292///   `par_iter_mut`'s borrow split;
293/// * derives its own [`crate::projection::ProjectionRect`] with
294///   wy0 / wy1 clipped to the strip — `gline` and the four scan
295///   loops then auto-clip ray casts and pixel writes;
296/// * runs the four quadrants over its strip.
297//
298// Per-strip projection re-derivation is fast (a handful of f32
299// ops). prelude + camera_state are shared `&` borrows — Sync, no
300// per-strip allocation.
301#[allow(clippy::too_many_arguments)]
302fn run_strip_parallel<R: Rasterizer + Clone + Send + Sync>(
303    rasterizer: &mut R,
304    pool: &mut ScratchPool,
305    settings: &OpticastSettings,
306    cs: &CameraState,
307    prelude: &OpticastPrelude,
308    gstartz0: i32,
309    gstartz1: i32,
310    camera_vptr_offset: usize,
311) {
312    let n_strips = pool.n_threads();
313    let y_start_total = settings.y_start;
314    let y_end_total = settings.y_end;
315    let span = y_end_total.saturating_sub(y_start_total);
316    if span == 0 {
317        return;
318    }
319
320    // `(span + n - 1) / n` → ceiling-divide so trailing rows aren't
321    // dropped on non-divisible splits. Last strip may be smaller.
322    #[allow(clippy::cast_possible_truncation)]
323    let strip_height: u32 = ((span + n_strips as u32 - 1) / n_strips as u32).max(1);
324
325    // Capture borrowed copies for the parallel closure — closure
326    // needs `move` for the cloned rasterizer + slot, but the
327    // shared `&` borrows below are Send + Sync via auto-impl.
328    let rasterizer_template = &*rasterizer;
329    let cs_ref: &CameraState = cs;
330    let prelude_ref: &OpticastPrelude = prelude;
331    let settings_ref: &OpticastSettings = settings;
332
333    let strip_body = |(i, scratch): (usize, &mut crate::rasterizer::ScanScratch)| {
334        #[allow(clippy::cast_possible_truncation)]
335        let strip_y_start = y_start_total.saturating_add((i as u32).saturating_mul(strip_height));
336        let strip_y_end = strip_y_start.saturating_add(strip_height).min(y_end_total);
337        if strip_y_start >= strip_y_end {
338            // Tail strip past the actual y-range — happens when
339            // n_strips > span (e.g., 16 strips on a 12-row span).
340            return;
341        }
342
343        let strip_proj = projection::derive_projection_with_y_range(
344            cs_ref,
345            settings_ref.xres,
346            settings_ref.yres,
347            strip_y_start,
348            strip_y_end,
349            settings_ref.hx,
350            settings_ref.hy,
351            settings_ref.hz,
352            settings_ref.anginc,
353        );
354        let strip_rs =
355            ray_step::derive_ray_step(cs_ref, strip_proj.cx, strip_proj.cy, settings_ref.hz);
356        #[allow(clippy::cast_possible_wrap)]
357        let strip_ctx = ScanContext {
358            proj: &strip_proj,
359            rs: &strip_rs,
360            prelude: prelude_ref,
361            xres: settings_ref.xres as i32,
362            y_start: strip_y_start as i32,
363            y_end: strip_y_end as i32,
364            anginc: settings_ref.anginc,
365            camera_state: cs_ref,
366            camera_gstartz0: gstartz0,
367            camera_gstartz1: gstartz1,
368            camera_vptr_offset,
369        };
370
371        let mut strip_rasterizer: R = rasterizer_template.clone();
372        top_quadrant(&mut strip_rasterizer, scratch, &strip_ctx);
373        right_quadrant(&mut strip_rasterizer, scratch, &strip_ctx);
374        bottom_quadrant(&mut strip_rasterizer, scratch, &strip_ctx);
375        left_quadrant(&mut strip_rasterizer, scratch, &strip_ctx);
376    };
377
378    pool.slots_mut_slice()
379        .par_iter_mut()
380        .enumerate()
381        .for_each(strip_body);
382}
383
384/// Slice `slab_buf` at column `idx`'s byte range (per the
385/// `column_offsets` table). Returns `None` if the index is out of
386/// range or the offsets are malformed (non-monotonic, past the
387/// buffer end). Treated as camera-in-solid by the caller.
388pub(crate) fn camera_column_slice<'a>(
389    slab_buf: &'a [u8],
390    column_offsets: &[u32],
391    idx: u32,
392) -> Option<&'a [u8]> {
393    let i = idx as usize;
394    if i >= column_offsets.len() {
395        return None;
396    }
397    let start = column_offsets[i] as usize;
398    if start >= slab_buf.len() {
399        return None;
400    }
401    // Slice to end-of-buffer; the slab walker self-terminates on
402    // `nextptr == 0`. Using `column_offsets[i + 1]` as the end was
403    // wrong post-edit (voxalloc scatters columns across vbuf, so
404    // adjacent table indices are no longer adjacent in memory).
405    Some(&slab_buf[start..])
406}
407
408#[cfg(test)]
409mod tests {
410    use super::*;
411    use crate::rasterizer::ScanScratch;
412
413    /// Recording rasterizer that counts the three callback kinds.
414    /// `Clone` so the rasterizer satisfies opticast's `R: Clone +
415    /// Send` bound (R12.2.1).
416    #[derive(Debug, Default, Clone)]
417    struct Counts {
418        gline: u32,
419        hrend: u32,
420        vrend: u32,
421    }
422
423    impl Rasterizer for Counts {
424        fn gline(&mut self, _: &mut ScanScratch, _: u32, _: f32, _: f32, _: f32, _: f32) {
425            self.gline += 1;
426        }
427        fn hrend(&mut self, _: &mut ScanScratch, _: i32, _: i32, _: i32, _: i32, _: i32, _: i32) {
428            self.hrend += 1;
429        }
430        fn vrend(&mut self, _: &mut ScanScratch, _: i32, _: i32, _: i32, _: i32, _: i32) {
431            self.vrend += 1;
432        }
433    }
434
435    /// Single solid slab at z = 200..254. cz < 200 → air gap (0, 200).
436    /// cz inside [200, 254] → in solid → opticast skips.
437    fn solid_slab_z200_to_254() -> Vec<u8> {
438        // Header [nextptr=0, z1=200, z1c=254, dummy=0]. The walker
439        // doesn't read past the header, so no colour bytes needed.
440        vec![0, 200, 254, 0]
441    }
442
443    fn looking_down_camera() -> Camera {
444        Camera {
445            pos: [1024.0, 1024.0, 128.0],
446            right: [1.0, 0.0, 0.0],
447            down: [0.0, 1.0, 0.0],
448            forward: [0.0, 0.0, 1.0],
449        }
450    }
451
452    /// Build a `(slab_buf, column_offsets)` pair where one column —
453    /// `camera_column_index` — holds `column_data`'s bytes and
454    /// every other column is empty. Lets opticast tests target the
455    /// camera column without allocating per-column slab data for
456    /// the full `vsid²` grid.
457    #[allow(clippy::cast_possible_truncation)]
458    fn synthetic_world_with_camera_column(
459        column_data: &[u8],
460        camera_column_index: u32,
461        vsid: u32,
462    ) -> (Vec<u8>, Vec<u32>) {
463        let vsid_sq = (vsid as usize) * (vsid as usize);
464        let len_u32 = column_data.len() as u32;
465        let cam_idx = camera_column_index as usize;
466        let mut column_offsets = vec![0u32; vsid_sq + 1];
467        for offset in &mut column_offsets[(cam_idx + 1)..] {
468            *offset = len_u32;
469        }
470        (column_data.to_vec(), column_offsets)
471    }
472
473    /// `looking_down_camera` at pos = (1024, 1024) with vsid = 2048
474    /// → `column_index` = 1024 * 2048 + 1024 = `2_099_200`.
475    const LOOKING_DOWN_COL_INDEX: u32 = 1024 * 2048 + 1024;
476
477    #[test]
478    fn opticast_dispatches_all_four_quadrants() {
479        let cam = looking_down_camera();
480        let settings = OpticastSettings::for_oracle_framebuffer(640, 480);
481        let mut counts = Counts::default();
482        let mut pool = ScratchPool::new(640, 480, 2048);
483        let (slab_buf, column_offsets) = synthetic_world_with_camera_column(
484            &solid_slab_z200_to_254(),
485            LOOKING_DOWN_COL_INDEX,
486            2048,
487        );
488
489        let outcome = opticast(
490            &mut counts,
491            &mut pool,
492            &cam,
493            &settings,
494            2048,
495            &slab_buf,
496            &column_offsets,
497        );
498
499        assert_eq!(outcome, OpticastOutcome::Rendered);
500        // Looking-down camera: each quadrant fires. gline counts ≈
501        // 2 × x-fan-width + 2 × y-fan-width; positive total.
502        assert!(counts.gline > 0, "expected ≥ 1 gline call");
503        // Top + bottom quadrants both produce hrend; right + left
504        // produce vrend.
505        assert!(counts.hrend > 0, "expected ≥ 1 hrend call");
506        assert!(counts.vrend > 0, "expected ≥ 1 vrend call");
507    }
508
509    #[test]
510    fn opticast_skips_when_camera_in_solid() {
511        // Place the camera inside the solid slab z = 200..254 by
512        // moving pos.z to 220.
513        let mut cam = looking_down_camera();
514        cam.pos[2] = 220.0;
515        let settings = OpticastSettings::for_oracle_framebuffer(640, 480);
516        let mut counts = Counts::default();
517        let mut pool = ScratchPool::new(640, 480, 2048);
518        let (slab_buf, column_offsets) = synthetic_world_with_camera_column(
519            &solid_slab_z200_to_254(),
520            LOOKING_DOWN_COL_INDEX,
521            2048,
522        );
523
524        let outcome = opticast(
525            &mut counts,
526            &mut pool,
527            &cam,
528            &settings,
529            2048,
530            &slab_buf,
531            &column_offsets,
532        );
533
534        assert_eq!(outcome, OpticastOutcome::SkippedCameraInSolid);
535        assert_eq!(counts.gline, 0);
536        assert_eq!(counts.hrend, 0);
537        assert_eq!(counts.vrend, 0);
538    }
539
540    #[test]
541    fn for_oracle_framebuffer_defaults() {
542        let s = OpticastSettings::for_oracle_framebuffer(640, 480);
543        assert_eq!(s.xres, 640);
544        assert_eq!(s.yres, 480);
545        // hx / hy / hz: voxlap-oracle convention.
546        assert!((s.hx - 320.0).abs() < f32::EPSILON);
547        assert!((s.hy - 240.0).abs() < f32::EPSILON);
548        assert!((s.hz - 320.0).abs() < f32::EPSILON);
549        assert_eq!(s.anginc, 1);
550        assert_eq!(s.mip_levels, 1);
551        assert_eq!(s.max_scan_dist, 1024);
552    }
553}