Skip to main content

roxlap_core/
drawtile.rs

1//! Voxlap's 2D textured-quad blit primitive — `drawtile`
2//! (`voxlap5.c:6954`). Used for HUD overlays, weapon sprites,
3//! and the oracle's `tile_*` validation poses.
4//!
5//! Three rendering paths fork on `(black ^ white) & 0xff000000`
6//! (the two endpoints' alpha bytes) and on the zoom factors:
7//!
8//! 1. **Ignore-alpha, 0.5× zoom** (`xz == yz == 32768`): each
9//!    output pixel is the byte-wise rounded average of a 2×2
10//!    source block. Voxlap's MMX `pavgb`-chain fast path.
11//! 2. **Ignore-alpha, arbitrary zoom**: nearest-neighbour
12//!    texture stretch — output pixel = source[uu, vv] with
13//!    Q16.16 fixed-point u/v.
14//! 3. **Alpha modulate + blend**: per-channel `(W - B)/256 + B`
15//!    modulation between the `black` / `white` endpoint colours,
16//!    then alpha-blended onto the destination via the modulated
17//!    alpha byte. Includes voxlap's transparent-skip /
18//!    opaque-passthrough trichotomy.
19//!
20//! Tile pixels are voxlap's BGRA `i32` layout (low byte = blue,
21//! top byte = brightness/alpha) — same convention as the rest of
22//! the engine. The destination framebuffer is row-major `u32`
23//! with `pitch_pixels` stride.
24
25#![allow(
26    clippy::cast_possible_truncation,
27    clippy::cast_possible_wrap,
28    clippy::cast_sign_loss,
29    clippy::cast_lossless,
30    clippy::similar_names,
31    clippy::too_many_arguments,
32    clippy::too_many_lines,
33    clippy::doc_markdown,
34    clippy::many_single_char_names
35)]
36
37use crate::sprite::DrawTarget;
38
39/// Voxlap's `mulshr16(a, d) = (a * d) >> 16` with i64 intermediate
40/// to avoid signed-overflow on the multiply.
41#[inline]
42fn mulshr16(a: i32, d: i32) -> i32 {
43    ((i64::from(a) * i64::from(d)) >> 16) as i32
44}
45
46/// Voxlap's `shldiv16(a, b) = ((a << 16) / b)` — Q16.16 reciprocal-
47/// like helper for converting screen extents to tile-space steps.
48#[inline]
49fn shldiv16(a: i32, b: i32) -> i32 {
50    ((i64::from(a) << 16) / i64::from(b)) as i32
51}
52
53/// Render one screen-space tile blit. Mirror of voxlap5.c:6954-7082.
54///
55/// Parameters mirror voxlap's call signature:
56/// - `target`: framebuffer + dimensions. Z-buffer is unused.
57/// - `tile_pixels`: source pixels, row-major BGRA `i32`. Length
58///   must accommodate `(ty - 1) * (tile_pitch_bytes / 4) + tx`.
59/// - `tile_pitch_bytes`: byte stride between source rows
60///   (voxlap's `tp`). Typically `tx * 4`.
61/// - `(tx, ty)`: source tile dimensions in pixels.
62/// - `(tcx, tcy)`: tile centre in source-pixel Q16.16
63///   coordinates. Voxlap uses this to anchor the tile so the
64///   centre lands at `(sx, sy)` regardless of zoom.
65/// - `(sx, sy)`: screen-space anchor in Q16.16. The tile centre
66///   `(tcx, tcy)` ends up at this screen position.
67/// - `(xz, yz)`: per-axis zoom in Q16.16. `65536` = 1×;
68///   `32768` = 0.5× (triggers the 2×2-average fast path);
69///   anything else takes the texture-stretch path.
70/// - `black`, `white`: alpha-modulation endpoints. If the alpha
71///   bytes are equal, the alpha path is skipped (the colour
72///   modulation would be a constant tint applied to every pixel
73///   and voxlap special-cases it as "no alpha"). Otherwise:
74///   each source pixel's bytes get linearly remapped from
75///   `[0, 255]` to `[black_byte, white_byte]`, then the modulated
76///   alpha byte drives an alpha-blend onto the framebuffer.
77pub fn drawtile(
78    target: &mut DrawTarget<'_>,
79    tile_pixels: &[i32],
80    tile_pitch_bytes: i32,
81    tx: i32,
82    ty: i32,
83    tcx: i32,
84    tcy: i32,
85    sx: i32,
86    sy: i32,
87    xz: i32,
88    yz: i32,
89    black: i32,
90    white: i32,
91) {
92    if tile_pixels.is_empty() || xz == 0 || yz == 0 {
93        return;
94    }
95
96    // Voxlap5.c:6962-6967 — derive screen + tile clip + per-pixel
97    // step coefficients in Q16.16.
98    let sx0 = sx.wrapping_sub(mulshr16(tcx, xz));
99    let sx1 = sx0.wrapping_add(xz.wrapping_mul(tx));
100    let sy0 = sy.wrapping_sub(mulshr16(tcy, yz));
101    let sy1 = sy0.wrapping_add(yz.wrapping_mul(ty));
102
103    let xres = target.width as i32;
104    let yres = target.height as i32;
105    let x0 = ((sx0 + 65535) >> 16).max(0);
106    let x1 = ((sx1 + 65535) >> 16).min(xres);
107    let y0 = ((sy0 + 65535) >> 16).max(0);
108    let y1 = ((sy1 + 65535) >> 16).min(yres);
109    if x0 >= x1 || y0 >= y1 {
110        return;
111    }
112
113    let ui = shldiv16(65536, xz);
114    let u = mulshr16(-sx0, ui);
115    let vi = shldiv16(65536, yz);
116    let v = mulshr16(-sy0, vi);
117
118    let pitch_pixels = target.pitch_pixels;
119    let tile_pitch_pixels = (tile_pitch_bytes >> 2) as usize;
120
121    if (black ^ white) & 0x00ff_0000_u32.wrapping_shl(8) as i32 == 0
122        && (black ^ white) & (0xff_u32 << 24) as i32 == 0
123    {
124        // Voxlap's `if (!((black^white)&0xff000000))` — alpha bytes
125        // match → no-alpha branch. The literal `0xff000000` is i32
126        // negative; using the unsigned form above sidesteps clippy
127        // overflow warnings and is bit-equivalent.
128        if xz == 32768 && yz == 32768 {
129            // ---------------------------------------------------------
130            // Path 1: 0.5× zoom, 2×2-average. Voxlap5.c:6970-7000.
131            // ---------------------------------------------------------
132            for y in y0..y1 {
133                let vv = y.wrapping_mul(vi).wrapping_add(v);
134                let row_pixel = (y as usize) * pitch_pixels;
135                // Source-tile starting pixel for this row's first
136                // 2×2 block.
137                let plc_pixel = (((x0.wrapping_mul(ui).wrapping_add(u)) >> 16) as usize)
138                    + ((vv >> 16) as usize) * tile_pitch_pixels;
139                for x in x0..x1 {
140                    let k = (x - x0) as usize;
141                    // Each output pixel: 2x2 source block.
142                    let ta = tile_pixels[plc_pixel + k * 2] as u32;
143                    let tb = tile_pixels[plc_pixel + k * 2 + 1] as u32;
144                    let ba = tile_pixels[plc_pixel + k * 2 + tile_pitch_pixels] as u32;
145                    let bb = tile_pixels[plc_pixel + k * 2 + tile_pitch_pixels + 1] as u32;
146                    let mut out: u32 = 0;
147                    for b in 0..4u32 {
148                        let va = (ta >> (b * 8)) & 0xff;
149                        let vb = (tb >> (b * 8)) & 0xff;
150                        let va_avg = (va + vb + 1) >> 1;
151                        let v2a = (ba >> (b * 8)) & 0xff;
152                        let v2b = (bb >> (b * 8)) & 0xff;
153                        let vb_avg = (v2a + v2b + 1) >> 1;
154                        let avg2 = (va_avg + vb_avg + 1) >> 1;
155                        out |= avg2 << (b * 8);
156                    }
157                    // SAFETY: row_pixel + x < pitch_pixels * height by
158                    // x0/x1/y0/y1 viewport clip; sequential blit, no
159                    // parallel aliasing.
160                    unsafe { target.fb_write(row_pixel + x as usize, out) };
161                }
162            }
163        } else {
164            // ---------------------------------------------------------
165            // Path 2: arbitrary zoom, nearest-neighbour stretch.
166            // Voxlap5.c:7002-7012.
167            // ---------------------------------------------------------
168            let plc = x0.wrapping_mul(ui).wrapping_add(u);
169            for y in y0..y1 {
170                let vv = y.wrapping_mul(vi).wrapping_add(v);
171                let row_pixel = (y as usize) * pitch_pixels;
172                let j_pixel = ((vv >> 16) as usize) * tile_pitch_pixels;
173                let mut uu = plc;
174                for x in x0..x1 {
175                    let src = tile_pixels[j_pixel + ((uu >> 16) as usize)];
176                    // SAFETY: see Path 1's matching write.
177                    unsafe { target.fb_write(row_pixel + x as usize, src as u32) };
178                    uu = uu.wrapping_add(ui);
179                }
180            }
181        }
182    } else {
183        // -----------------------------------------------------------
184        // Path 3: alpha modulate + blend. Voxlap5.c:7014-7081.
185        // -----------------------------------------------------------
186        // Per-channel scale = (white_byte - black_byte) << 4. Voxlap
187        // bumps a ±255 difference to ±256 so the pmulhw-equivalent
188        // multiply produces the unbiased "scale by full byte range"
189        // result without losing 1 LSB.
190        let mut bw_scale = [0i16; 4];
191        let mut bk = [0i32; 4];
192        for b in 0..4usize {
193            let bl = (black >> (b * 8)) & 0xff;
194            let wh = (white >> (b * 8)) & 0xff;
195            let mut diff = wh - bl;
196            if diff == 255 {
197                diff = 256;
198            } else if diff == -255 {
199                diff = -256;
200            }
201            bw_scale[b] = (diff << 4) as i16;
202            bk[b] = bl;
203        }
204
205        for y in y0..y1 {
206            let vv = y.wrapping_mul(vi).wrapping_add(v);
207            let row_pixel = (y as usize) * pitch_pixels;
208            let j_pixel = ((vv >> 16) as usize) * tile_pitch_pixels;
209            let mut uu = x0.wrapping_mul(ui).wrapping_add(u);
210            for x in x0..x1 {
211                let src = tile_pixels[j_pixel + ((uu >> 16) as usize)];
212                uu = uu.wrapping_add(ui);
213
214                // Per-channel modulate: byte ↦ ((byte<<4)*scale)>>16
215                // + black_byte, then per-channel saturate to [0, 255].
216                let mut mod_word = [0i16; 4];
217                let mut isat: u32 = 0;
218                for (b, mw) in mod_word.iter_mut().enumerate() {
219                    let byte = (src >> (b * 8)) & 0xff;
220                    let prod = ((byte << 4) * i32::from(bw_scale[b])) >> 16;
221                    let m = (prod + bk[b]) as i16;
222                    *mw = m;
223                    let r = i32::from(m).clamp(0, 255);
224                    isat |= ((r as u32) & 0xff) << (b * 8);
225                }
226                let i = isat as i32;
227
228                // Voxlap's "transparent-skip / opaque-passthrough"
229                // hack (voxlap5.c:7056-7060). For i in
230                // `[-0x1000000, +0x1000000)`: skip pixel (= alpha
231                // ≈ 0). When i < 0 in this range (sign-extension of
232                // 0xff alpha), write the pixel as-is.
233                if (i.wrapping_add(0x0100_0000) as u32) < 0x0200_0000 {
234                    if i < 0 {
235                        // SAFETY: see Path 1's matching write.
236                        unsafe { target.fb_write(row_pixel + x as usize, i as u32) };
237                    }
238                    continue;
239                }
240
241                // Alpha blend: dst.byte = clamp((mod-dst)*alpha/256
242                // + dst, 0, 255). Voxlap's psubw / psllw 4 / pshufw
243                // alpha / pmulhw / paddw / packuswb.
244                // SAFETY: same in-bounds argument as Path 1's write.
245                let dst = unsafe { target.fb_read(row_pixel + x as usize) } & 0x00ff_ffff;
246                let alpha_shifted = i32::from(mod_word[3]) << 4;
247                let mut blended: u32 = 0;
248                for (b, &mw) in mod_word.iter().enumerate() {
249                    let screen_byte = ((dst >> (b * 8)) & 0xff) as i32;
250                    let delta = i32::from(mw) - screen_byte;
251                    let scaled = ((delta << 4) * alpha_shifted) >> 16;
252                    let r = (scaled + screen_byte).clamp(0, 255);
253                    blended |= (r as u32) << (b * 8);
254                }
255                // SAFETY: see Path 1's matching write.
256                unsafe { target.fb_write(row_pixel + x as usize, blended) };
257            }
258        }
259    }
260}
261
262#[cfg(test)]
263mod tests {
264    use super::*;
265
266    /// Allocate a framebuffer pre-filled with `fill_col` and a
267    /// matching dummy zbuffer for `DrawTarget`.
268    fn alloc_fb(w: u32, h: u32, fill_col: u32) -> (Vec<u32>, Vec<f32>) {
269        let n = (w * h) as usize;
270        (vec![fill_col; n], vec![f32::INFINITY; n])
271    }
272
273    fn make_target<'a>(fb: &'a mut [u32], zb: &'a mut [f32], w: u32, h: u32) -> DrawTarget<'a> {
274        DrawTarget::new(fb, zb, w as usize, w, h)
275    }
276
277    /// 1× zoom, no-alpha path: a 4×4 tile stamped at screen
278    /// centre should reproduce the tile exactly within the
279    /// destination region.
280    #[test]
281    fn one_x_no_alpha_copies_tile_pixels_unchanged() {
282        let tile: Vec<i32> = (0..16).map(|i| 0x80_000000_u32 as i32 + i).collect();
283        let (mut fb, mut zb) = alloc_fb(16, 16, 0);
284        let mut target = make_target(&mut fb, &mut zb, 16, 16);
285        // Tile centre = (2, 2); screen anchor = (8, 8). The 4×4
286        // tile lands at screen [6..10) × [6..10).
287        drawtile(
288            &mut target,
289            &tile,
290            4 * 4,
291            4,
292            4,
293            2 << 16,
294            2 << 16,
295            8 << 16,
296            8 << 16,
297            1 << 16,
298            1 << 16,
299            0,
300            0,
301        );
302        // Spot-check: tile[5] (= row 1, col 1) lands at screen
303        // (6+1, 6+1) = (7, 7).
304        assert_eq!(fb[7 * 16 + 7], 0x80_000005);
305        // Tile pixel at (3, 3) → screen (9, 9).
306        assert_eq!(fb[9 * 16 + 9], 0x80_00000f);
307    }
308
309    /// 0.5× zoom, no-alpha path: a 4×4 tile blits as a 2×2
310    /// region of byte-wise-averaged pixels.
311    #[test]
312    fn half_zoom_averages_2x2_blocks() {
313        // All-white tile: every output pixel should also be white
314        // (averaging white with white gives white).
315        let tile: Vec<i32> = vec![0x80_ffffff_u32 as i32; 16];
316        let (mut fb, mut zb) = alloc_fb(16, 16, 0);
317        let mut target = make_target(&mut fb, &mut zb, 16, 16);
318        drawtile(
319            &mut target,
320            &tile,
321            4 * 4,
322            4,
323            4,
324            2 << 16,
325            2 << 16,
326            8 << 16,
327            8 << 16,
328            32768,
329            32768,
330            0,
331            0,
332        );
333        // Output region for 4×4 tile @ 0.5× = 2×2 pixels at
334        // screen (7, 7) ish. Find any non-zero pixel.
335        let touched: Vec<u32> = fb.iter().copied().filter(|&p| p != 0).collect();
336        assert!(!touched.is_empty(), "blit produced no pixels");
337        for p in touched {
338            assert_eq!(p, 0x80_ffffff, "averaged white tile must stay white");
339        }
340    }
341
342    /// Out-of-bounds blit should be a no-op (no panic / no write
343    /// past the framebuffer).
344    #[test]
345    fn fully_offscreen_is_noop() {
346        let tile: Vec<i32> = vec![0x80_aabbcc_u32 as i32; 16];
347        let (mut fb, mut zb) = alloc_fb(16, 16, 0xdead_beef);
348        let mut target = make_target(&mut fb, &mut zb, 16, 16);
349        // Anchor far off-screen → blit clipped to zero pixels.
350        drawtile(
351            &mut target,
352            &tile,
353            16,
354            4,
355            4,
356            2 << 16,
357            2 << 16,
358            10000 << 16,
359            10000 << 16,
360            1 << 16,
361            1 << 16,
362            0,
363            0,
364        );
365        assert!(fb.iter().all(|&p| p == 0xdead_beef));
366    }
367}