Skip to main content

rasterrocket_render/pipe/
aa.rs

1//! AA pipe: shape byte present, `BlendMode::Normal`, no soft mask, isolated group.
2//!
3//! Equivalent to `Splash::pipeRunAA{Mono8,RGB8,XBGR8,BGR8,CMYK8,DeviceN8}`.
4//!
5//! For each pixel:
6//! 1. `a_src = div255(a_input * shape)` — scale source alpha by AA coverage.
7//! 2. If `a_src == 255`: direct write (no read-back needed).
8//! 3. If `a_src == 0` and `a_dst == 0`: write zeros.
9//! 4. Otherwise: `a_result = a_src + a_dst - div255(a_src * a_dst)`.
10//!    `c_result = ((a_result - a_src) * c_dst + a_src * c_src) / a_result`.
11//!    Then apply transfer LUT.
12
13use std::cell::RefCell;
14
15use crate::pipe::{self, PipeSrc, PipeState};
16use crate::simd::composite_aa_rgb8_opaque;
17use crate::types::BlendMode;
18use color::Pixel;
19use color::convert::div255;
20
21// Per-thread scratch buffer for pattern spans — grow-never-shrink, zero per-span alloc.
22thread_local! {
23    static PAT_BUF: RefCell<Vec<u8>> = const { RefCell::new(Vec::new()) };
24}
25
26/// Composite a span with per-pixel shape (AA coverage) bytes.
27///
28/// `shape[i]` is the coverage for pixel `x0 + i`.  Length must equal
29/// `x1 - x0 + 1`.
30///
31/// # Preconditions (checked in `render_span`)
32///
33/// - `pipe.use_aa_path()` — no soft mask, `BlendMode::Normal`, no group correction.
34/// - `dst_pixels.len() == count * P::BYTES`.
35/// - `shape.len() == count`.
36/// - `P::BYTES > 0`.
37#[expect(
38    clippy::too_many_arguments,
39    reason = "mirrors C++ SplashPipe API; all parameters are necessary"
40)]
41pub(crate) fn render_span_aa<P: Pixel>(
42    pipe: &PipeState<'_>,
43    src: &PipeSrc<'_>,
44    dst_pixels: &mut [u8],
45    dst_alpha: Option<&mut [u8]>,
46    shape: &[u8],
47    x0: i32,
48    x1: i32,
49    y: i32,
50) {
51    debug_assert_eq!(pipe.blend_mode, BlendMode::Normal);
52    debug_assert!(pipe.soft_mask.is_none());
53
54    #[expect(
55        clippy::cast_sign_loss,
56        reason = "x1 >= x0 is a precondition, so x1 - x0 + 1 >= 1 > 0"
57    )]
58    let count = (x1 - x0 + 1) as usize;
59    let ncomps = P::BYTES;
60
61    debug_assert_eq!(shape.len(), count, "shape length must equal pixel count");
62    debug_assert_eq!(dst_pixels.len(), count * ncomps);
63
64    let a_input = u32::from(pipe.a_input);
65
66    match src {
67        PipeSrc::Solid(color) => {
68            debug_assert_eq!(color.len(), ncomps);
69
70            // Fast path: solid RGB source, no alpha plane, identity transfer.
71            // composite_aa_rgb8_opaque processes 16 pixels/iter via [u16;16] lanes
72            // that LLVM auto-vectorizes into AVX2/AVX-512.
73            if dst_alpha.is_none() && ncomps == 3 && pipe.transfer.is_identity_rgb() {
74                composite_aa_rgb8_opaque(
75                    dst_pixels,
76                    [color[0], color[1], color[2]],
77                    pipe.a_input,
78                    shape,
79                );
80                return;
81            }
82
83            // General solid path: read colour directly — no allocation.
84            render_span_aa_inner(
85                pipe,
86                |_i| color,
87                dst_pixels,
88                dst_alpha,
89                shape,
90                count,
91                ncomps,
92                a_input,
93            );
94        }
95        PipeSrc::Pattern(pat) => {
96            // Reuse the thread-local scratch buffer — one allocation ever per thread,
97            // grown as needed, never shrunk.
98            PAT_BUF.with(|cell| {
99                let mut buf = cell.borrow_mut();
100                buf.resize(count * ncomps, 0);
101                pat.fill_span(y, x0, x1, &mut buf[..count * ncomps]);
102                render_span_aa_inner(
103                    pipe,
104                    |i| &buf[i * ncomps..(i + 1) * ncomps],
105                    dst_pixels,
106                    dst_alpha,
107                    shape,
108                    count,
109                    ncomps,
110                    a_input,
111                );
112            });
113        }
114    }
115}
116
117/// Inner AA compositing loop.
118///
119/// `src_px_at(i)` returns a `&[u8]` of length `ncomps` for the source pixel at
120/// index `i`.  For solid sources this is always the same slice; for patterns it
121/// indexes into the pre-filled scratch buffer.  Using a closure rather than a
122/// `bool` flag keeps a single code path and lets the compiler inline both variants.
123#[inline]
124#[expect(
125    clippy::too_many_arguments,
126    reason = "all params necessary; closure eliminates the solid/pattern duplication"
127)]
128fn render_span_aa_inner<'src>(
129    pipe: &PipeState<'_>,
130    src_px_at: impl Fn(usize) -> &'src [u8],
131    dst_pixels: &mut [u8],
132    dst_alpha: Option<&mut [u8]>,
133    shape: &[u8],
134    count: usize,
135    ncomps: usize,
136    a_input: u32,
137) {
138    match dst_alpha {
139        Some(dst_alpha) => {
140            debug_assert_eq!(dst_alpha.len(), count);
141            for i in 0..count {
142                let shape_v = u32::from(shape[i]);
143                let a_src = u32::from(div255(a_input * shape_v));
144                let a_dst = u32::from(dst_alpha[i]);
145
146                let (a_result, fully_opaque_src) = if a_src == 255 {
147                    (255u32, true)
148                } else if a_src == 0 && a_dst == 0 {
149                    // Transparent src over transparent dst: zero and skip.
150                    let base = i * ncomps;
151                    dst_pixels[base..base + ncomps].fill(0);
152                    dst_alpha[i] = 0;
153                    continue;
154                } else {
155                    let ar = a_src + a_dst - u32::from(div255(a_src * a_dst));
156                    (ar, false)
157                };
158
159                let base = i * ncomps;
160                let src_px = src_px_at(i);
161                let dst_px = &mut dst_pixels[base..base + ncomps];
162
163                if fully_opaque_src {
164                    // Full coverage: transfer src directly, no blending needed.
165                    pipe::apply_transfer_pixel(pipe, src_px, dst_px);
166                } else {
167                    // Partial coverage: Porter-Duff over, then apply transfer.
168                    for j in 0..ncomps {
169                        let c_src = u32::from(src_px[j]);
170                        let c_dst = u32::from(dst_px[j]);
171                        // ((a_result - a_src) * c_dst + a_src * c_src) / a_result
172                        let blended = ((a_result - a_src) * c_dst + a_src * c_src) / a_result;
173                        #[expect(
174                            clippy::cast_possible_truncation,
175                            reason = "blended = weighted average of values ≤ 255, divided by a_result ≤ 255"
176                        )]
177                        {
178                            dst_px[j] = blended as u8;
179                        }
180                    }
181                    pipe::apply_transfer_in_place(pipe, dst_px);
182                }
183                #[expect(
184                    clippy::cast_possible_truncation,
185                    reason = "a_result = a_src + a_dst - div255(a_src*a_dst) ≤ 255"
186                )]
187                {
188                    dst_alpha[i] = a_result as u8;
189                }
190            }
191        }
192        None => {
193            // No separate alpha plane: a_dst is implicitly 0xFF, a_result = 0xFF.
194            // Formula simplifies to: c = div255((255 - a_src) * c_dst + a_src * c_src).
195            for (i, &sh) in shape.iter().enumerate() {
196                let shape_v = u32::from(sh);
197                let a_src = u32::from(div255(a_input * shape_v));
198                let base = i * ncomps;
199                let src_px = src_px_at(i);
200                let dst_px = &mut dst_pixels[base..base + ncomps];
201                for j in 0..ncomps {
202                    let blended =
203                        div255((255 - a_src) * u32::from(dst_px[j]) + a_src * u32::from(src_px[j]));
204                    dst_px[j] = blended;
205                }
206                pipe::apply_transfer_in_place(pipe, dst_px);
207            }
208        }
209    }
210}
211
212#[cfg(test)]
213mod tests {
214    use super::*;
215    use crate::pipe::PipeSrc;
216    use crate::state::TransferSet;
217    use color::{Rgb8, TransferLut};
218
219    fn aa_pipe() -> PipeState<'static> {
220        PipeState {
221            blend_mode: BlendMode::Normal,
222            a_input: 255,
223            overprint_mask: 0xFFFF_FFFF,
224            overprint_additive: false,
225            transfer: TransferSet::identity_rgb(),
226            soft_mask: None,
227            alpha0: None,
228            knockout: false,
229            knockout_opacity: 255,
230            non_isolated_group: false,
231        }
232    }
233
234    #[test]
235    fn full_coverage_writes_src() {
236        let pipe = aa_pipe();
237        let color = [200u8, 100, 50];
238        let src = PipeSrc::Solid(&color);
239        let shape = [255u8, 255];
240
241        let mut dst = vec![50u8; 6]; // two pixels, initially different from src
242        let mut alpha = vec![128u8; 2];
243
244        render_span_aa::<Rgb8>(&pipe, &src, &mut dst, Some(&mut alpha), &shape, 0, 1, 0);
245
246        assert_eq!(&dst[0..3], &[200, 100, 50]);
247        assert_eq!(&dst[3..6], &[200, 100, 50]);
248        assert_eq!(alpha[0], 255);
249        assert_eq!(alpha[1], 255);
250    }
251
252    #[test]
253    fn zero_coverage_over_transparent_zeroes_output() {
254        let pipe = aa_pipe();
255        let color = [255u8, 255, 255];
256        let src = PipeSrc::Solid(&color);
257        let shape = [0u8];
258
259        let mut dst = vec![0u8; 3];
260        let mut alpha = vec![0u8; 1]; // dest also transparent
261
262        render_span_aa::<Rgb8>(&pipe, &src, &mut dst, Some(&mut alpha), &shape, 0, 0, 0);
263
264        assert_eq!(dst[0], 0);
265        assert_eq!(alpha[0], 0);
266    }
267
268    #[test]
269    fn half_coverage_blends_correctly() {
270        let pipe = aa_pipe();
271        // src = white (255,255,255), dst = black (0,0,0), shape ≈ 128 ≈ 50%.
272        let color = [255u8, 255, 255];
273        let src = PipeSrc::Solid(&color);
274        let shape = [128u8];
275
276        let mut dst = vec![0u8; 3];
277        let mut alpha = vec![255u8; 1]; // fully opaque destination
278
279        render_span_aa::<Rgb8>(&pipe, &src, &mut dst, Some(&mut alpha), &shape, 0, 0, 0);
280
281        // a_src = div255(255 * 128) ≈ 128.
282        // a_result = 128 + 255 - div255(128 * 255) ≈ 255.
283        // c = ((255 - 128) * 0 + 128 * 255) / 255 ≈ 128.
284        let v = dst[0];
285        assert!((125..=131).contains(&v), "expected ~128, got {v}");
286        assert_eq!(alpha[0], 255);
287    }
288
289    #[test]
290    fn no_alpha_plane_uses_opaque_dst() {
291        let pipe = aa_pipe();
292        let color = [200u8, 100, 50];
293        let src = PipeSrc::Solid(&color);
294        let shape = [128u8];
295
296        let mut dst = vec![0u8; 3]; // black dst
297
298        render_span_aa::<Rgb8>(&pipe, &src, &mut dst, None, &shape, 0, 0, 0);
299
300        // With implicit a_dst=255: result should be a blend.
301        // Expected: div255((255 - 128) * 0 + 128 * 200) ≈ 100.
302        let v = dst[0];
303        assert!((95..=105).contains(&v), "expected ~100, got {v}");
304    }
305
306    /// `TransferSet::is_identity_rgb()` gates a SIMD-friendly fast path
307    /// (`composite_aa_rgb8_opaque`) that intentionally skips transfer-LUT
308    /// application. If the predicate mis-reports `true` for a non-identity
309    /// LUT (cargo-mutants whole-body → `true` survives without this test),
310    /// the fast path runs and silently drops the transfer.
311    ///
312    /// Construct a non-identity LUT (channel-inverting), run `render_span_aa`,
313    /// and require the inversion to be visible — only the general path
314    /// applies it.
315    #[test]
316    fn non_identity_transfer_must_use_general_path() {
317        // Inverting RGB transfer + identity gray/cmyk/device_n; the inverting
318        // table is what makes this test's transfer set non-identity.
319        static DN_ID: [[u8; 256]; 8] = [TransferLut::IDENTITY.0; 8];
320        let id = TransferLut::IDENTITY.as_array();
321        let inv = TransferLut::INVERTED.as_array();
322
323        let pipe = PipeState {
324            blend_mode: BlendMode::Normal,
325            a_input: 255,
326            overprint_mask: 0xFFFF_FFFF,
327            overprint_additive: false,
328            transfer: TransferSet {
329                rgb: [inv; 3],
330                gray: id,
331                cmyk: [id; 4],
332                device_n: &DN_ID,
333            },
334            soft_mask: None,
335            alpha0: None,
336            knockout: false,
337            knockout_opacity: 255,
338            non_isolated_group: false,
339        };
340        assert!(
341            !pipe.transfer.is_identity_rgb(),
342            "test prerequisite: inverting LUT must not register as identity"
343        );
344
345        let color = [200u8, 100, 50];
346        let src = PipeSrc::Solid(&color);
347        let shape = [255u8; 4]; // full coverage → general path writes src, then applies transfer
348        let mut dst = vec![0u8; 12];
349
350        render_span_aa::<Rgb8>(&pipe, &src, &mut dst, None, &shape, 0, 3, 0);
351
352        // General path: full coverage → `apply_transfer_pixel` runs and
353        // emits `255 - src`. Fast path would emit `src` unchanged.
354        for px in 0..4 {
355            assert_eq!(
356                &dst[px * 3..px * 3 + 3],
357                &[55, 155, 205],
358                "pixel {px}: transfer LUT must invert each channel; \
359                 if the fast-path gate mis-fired, dst would be [200, 100, 50]"
360            );
361        }
362    }
363
364    /// `TransferSet::is_identity_rgb()` gates the fast path; when it returns
365    /// `true`, `composite_aa_rgb8_opaque` runs. If the predicate mis-reports
366    /// `false` for a genuinely-identity LUT (cargo-mutants whole-body
367    /// → `false` survives without this test), the general path runs and
368    /// uses a higher-precision `div255` than the fast path, producing
369    /// different output bytes on some inputs.
370    ///
371    /// Pin the byte values that the fast path produces on a representative
372    /// large span; the general path's higher-precision `div255` would shift
373    /// at least one byte by ≥ 1 LSB on this input set.
374    #[test]
375    fn identity_transfer_takes_fast_path_with_pinned_bytes() {
376        let pipe = aa_pipe();
377        assert!(
378            pipe.transfer.is_identity_rgb(),
379            "test prerequisite: aa_pipe() must register as identity"
380        );
381
382        // 17 pixels: crosses the LANE=16 boundary, exercising both the
383        // chunked path and the scalar tail.
384        let color = [200u8, 100, 50];
385        let src = PipeSrc::Solid(&color);
386        let shape: Vec<u8> = (0u8..17).map(|i| i.wrapping_mul(17)).collect();
387        let initial: Vec<u8> = (0u8..51).map(|i| i.wrapping_mul(13)).collect();
388        let mut dst_fast = initial.clone();
389
390        render_span_aa::<Rgb8>(&pipe, &src, &mut dst_fast, None, &shape, 0, 16, 0);
391
392        // Compute the reference via the fast path's formula:
393        //   a_src   = (a_input * shape[i] + 255) >> 8
394        //   c_out_j = ((255 - a_src) * c_dst[j] + a_src * src[j] + 255) >> 8
395        let a_in = 255u16;
396        let mut expected = initial;
397        for (i, &sh) in shape.iter().enumerate() {
398            let a_src = (a_in * u16::from(sh) + 255) >> 8;
399            let inv = 255 - a_src;
400            let b = i * 3;
401            for (j, sc) in color.iter().enumerate() {
402                let v = (inv * u16::from(expected[b + j]) + a_src * u16::from(*sc) + 255) >> 8;
403                // `v` is bounded by the fast path's div255 (`(.. + 255) >> 8` ≤ 255).
404                expected[b + j] = u8::try_from(v).expect("fast-path div255 result must fit in u8");
405            }
406        }
407        assert_eq!(
408            dst_fast, expected,
409            "identity-LUT path must use the fast path's `(v + 255) >> 8` div255"
410        );
411    }
412
413    // ── Cross-path 1-LSB byte-equality fixture ───────────────────────────────
414    //
415    // The fast path (`composite_aa_rgb8_opaque` via `(v+255)>>8`) and the
416    // general no-alpha path (`render_span_aa_inner` via
417    // `color::convert::div255`'s exact `(v + v>>8 + 0x80) >> 8`) must agree
418    // to within 1 LSB per channel on every gate-eligible input.
419    //
420    // This pins the cross-path invariant that the deferred v0.9.2 audit
421    // (audit/2026-05-11-avx512-fastpath-vs-general-byte-equality.md) flagged:
422    // each path's *internal* byte values are pinned by sibling tests, but a
423    // regression that swapped one `div255` shape for the other — or that
424    // mis-classified an identity-LUT call as needing the general path, or
425    // vice-versa — could silently shift output by 1 LSB and only be caught
426    // by the pixel-diff integration suite against pdftoppm.
427    //
428    // Strategy:
429    //   1. Build a deterministic corpus of (color, a_input, shape, dst)
430    //      tuples that span the gate-firing domain.
431    //   2. Run the fast path via `render_span_aa::<Rgb8>` with
432    //      `dst_alpha = None` and an identity transfer (so the gate fires).
433    //   3. Compute the general-path reference using the exact
434    //      `color::convert::div255` — same algebra as the `None` arm of
435    //      `render_span_aa_inner`, with the identity transfer step elided.
436    //   4. Assert `|fast[i] - reference[i]| <= 1` for every byte.
437    //
438    // Why `<= 1` and not `==`: the two `div255` formulas round differently.
439    // The fast path's `(v + 255) >> 8` is the upper-rounding approximation
440    // (correct within ±1 LSB); the general path's `(v + v>>8 + 0x80) >> 8`
441    // is the exact (banker's) form.  A concrete divergence at numerator
442    // `v = 100`:
443    //     fast  = (100 + 255) >> 8 = 355 >> 8 = 1
444    //     exact = (100 +   0 + 128) >> 8 = 228 >> 8 = 0
445    // — 1 LSB apart.  The corpus below produces this and similar
446    // divergences on alternating-shape × low-`a_input` cases.  What
447    // matters for downstream correctness is the ceiling, not exactness.
448
449    /// Span lengths the corpus iterates over.  Covers:
450    /// * `7`  — pure scalar tail (count < LANE), no chunked branch.
451    /// * `16` — exactly one LANE chunk, zero tail.
452    /// * `17` — one LANE chunk + 1-byte tail (chunk-tail boundary).
453    /// * `23` — one LANE chunk + 7-byte tail (typical mixed-mode width).
454    const ONE_LSB_SPAN_LENGTHS: [usize; 4] = [7, 16, 17, 23];
455
456    // ── Corpus pattern generators (free `fn` items, no allocation) ────────────
457
458    fn shape_full(_: usize) -> u8 {
459        255
460    }
461    fn shape_zero(_: usize) -> u8 {
462        0
463    }
464    #[expect(
465        clippy::cast_possible_truncation,
466        reason = "i ∈ [0, max span = 23) * 11 ≤ 242 fits u8"
467    )]
468    fn shape_ramp(i: usize) -> u8 {
469        (i * 11) as u8
470    }
471    fn shape_alt(i: usize) -> u8 {
472        if i.is_multiple_of(2) { 255 } else { 64 }
473    }
474
475    fn dst_black(_: usize) -> u8 {
476        0
477    }
478    fn dst_white(_: usize) -> u8 {
479        255
480    }
481    #[expect(
482        clippy::cast_possible_truncation,
483        reason = "(i * 7) % 256 fits u8 by construction"
484    )]
485    fn dst_ramp(i: usize) -> u8 {
486        ((i * 7) % 256) as u8
487    }
488    fn dst_alt(i: usize) -> u8 {
489        if i.is_multiple_of(2) { 0 } else { 200 }
490    }
491
492    /// Run the gate-eligible (no alpha plane, identity transfer) fast path
493    /// over `initial`, returning the result. The test's "ground truth" leg
494    /// is `run_exact_reference`.
495    fn run_fast_path(color: [u8; 3], a_input: u8, shape: &[u8], initial: &[u8]) -> Vec<u8> {
496        let mut dst = initial.to_vec();
497        let mut pipe = aa_pipe();
498        pipe.a_input = a_input;
499        let src = PipeSrc::Solid(color.as_slice());
500        let count = shape.len();
501        assert!(count >= 1, "render_span_aa requires count >= 1");
502        let x1: i32 = i32::try_from(count - 1)
503            .expect("ONE_LSB_SPAN_LENGTHS values must fit in i32 for render_span_aa's x0..=x1 API");
504        render_span_aa::<Rgb8>(&pipe, &src, &mut dst, None, shape, 0, x1, 0);
505        dst
506    }
507
508    /// Inline algebra of the general no-alpha path (`aa.rs:192-207`) under
509    /// an identity transfer — `apply_transfer_in_place` is a no-op so the
510    /// loop reduces to per-channel
511    /// `div255((255 - a_src) * c_dst + a_src * c_src)` using the exact
512    /// `color::convert::div255`.
513    fn run_exact_reference(color: [u8; 3], a_input: u8, shape: &[u8], initial: &[u8]) -> Vec<u8> {
514        use color::convert::div255 as exact_div255;
515        let mut dst = initial.to_vec();
516        let a_in_u32 = u32::from(a_input);
517        for (i, &sh) in shape.iter().enumerate() {
518            let a_src = u32::from(exact_div255(a_in_u32 * u32::from(sh)));
519            let base = i * 3;
520            for j in 0..3 {
521                let c_dst = u32::from(dst[base + j]);
522                let c_src = u32::from(color[j]);
523                dst[base + j] = exact_div255((255 - a_src) * c_dst + a_src * c_src);
524            }
525        }
526        dst
527    }
528
529    /// Per-byte `|fast - exact| ≤ 1` assertion. Returns the number of bytes
530    /// that diverged by exactly 1 LSB (the trip-wire counter for the outer
531    /// "corpus actually exercises the divergence" sanity check).
532    fn assert_within_one_lsb(
533        fast: &[u8],
534        exact: &[u8],
535        color: [u8; 3],
536        a_input: u8,
537        shape: &[u8],
538        initial: &[u8],
539    ) -> usize {
540        let mut one_lsb_count = 0usize;
541        for (i, (&f, &r)) in fast.iter().zip(exact.iter()).enumerate() {
542            let diff = i32::from(f).abs_diff(i32::from(r));
543            assert!(
544                diff <= 1,
545                "byte {i}: fast={f}, exact={r}, diff={diff}; \
546                 colour={color:?}, a_input={a_input}, \
547                 shape[{i_px}]={sh}, initial[{i}]={init}",
548                i_px = i / 3,
549                sh = shape[i / 3],
550                init = initial[i],
551            );
552            if diff == 1 {
553                one_lsb_count += 1;
554            }
555        }
556        one_lsb_count
557    }
558
559    #[test]
560    fn fast_path_matches_general_div255_within_one_lsb() {
561        let pipe = aa_pipe();
562        assert!(
563            pipe.transfer.is_identity_rgb(),
564            "test prerequisite: aa_pipe() must register as identity so the gate fires"
565        );
566
567        // Corpus: cross-product of representative source colours, alpha
568        // inputs, shape patterns, initial destinations, and span lengths.
569        let colours: [[u8; 3]; 5] = [
570            [0, 0, 0],       // black
571            [255, 255, 255], // white
572            [200, 100, 50],  // mid-saturated warm
573            [1, 254, 127],   // off-by-one boundaries
574            [128, 128, 128], // 50%-grey
575        ];
576        let a_inputs: [u8; 4] = [0, 1, 128, 255];
577        let shape_patterns: [fn(usize) -> u8; 4] = [shape_full, shape_zero, shape_ramp, shape_alt];
578        let dst_patterns: [fn(usize) -> u8; 4] = [dst_black, dst_white, dst_ramp, dst_alt];
579
580        let mut total_cases = 0usize;
581        let mut total_bytes_at_one_lsb = 0usize;
582
583        for &count in &ONE_LSB_SPAN_LENGTHS {
584            for &color in &colours {
585                for &a_input in &a_inputs {
586                    for &sh_fn in &shape_patterns {
587                        for &dst_fn in &dst_patterns {
588                            let shape: Vec<u8> = (0..count).map(sh_fn).collect();
589                            let initial: Vec<u8> = (0..count * 3).map(dst_fn).collect();
590                            let fast = run_fast_path(color, a_input, &shape, &initial);
591                            let exact = run_exact_reference(color, a_input, &shape, &initial);
592                            total_bytes_at_one_lsb += assert_within_one_lsb(
593                                &fast, &exact, color, a_input, &shape, &initial,
594                            );
595                            total_cases += 1;
596                        }
597                    }
598                }
599            }
600        }
601
602        // Trip-wire: the corpus must actually exercise the 1-LSB
603        // divergence somewhere, otherwise the test is silently equivalent
604        // to byte equality and the deferred audit's premise was wrong.
605        // Empirically this corpus produces ≥1 byte at diff=1; if it stops
606        // doing so, widen the corpus before trusting the ≤1 LSB ceiling.
607        assert!(
608            total_bytes_at_one_lsb > 0,
609            "corpus of {total_cases} cases produced no 1-LSB divergence — \
610             corpus is too narrow to actually pin the ≤1 LSB ceiling"
611        );
612    }
613}