rasterrocket_render/pipe/aa.rs
1//! AA pipe: shape byte present, `BlendMode::Normal`, no soft mask, isolated group.
2//!
3//! Equivalent to `Splash::pipeRunAA{Mono8,RGB8,XBGR8,BGR8,CMYK8,DeviceN8}`.
4//!
5//! For each pixel:
6//! 1. `a_src = div255(a_input * shape)` — scale source alpha by AA coverage.
7//! 2. If `a_src == 255`: direct write (no read-back needed).
8//! 3. If `a_src == 0` and `a_dst == 0`: write zeros.
9//! 4. Otherwise: `a_result = a_src + a_dst - div255(a_src * a_dst)`.
10//! `c_result = ((a_result - a_src) * c_dst + a_src * c_src) / a_result`.
11//! Then apply transfer LUT.
12
13use std::cell::RefCell;
14
15use crate::pipe::{self, PipeSrc, PipeState};
16use crate::simd::composite_aa_rgb8_opaque;
17use crate::types::BlendMode;
18use color::Pixel;
19use color::convert::div255;
20
21// Per-thread scratch buffer for pattern spans — grow-never-shrink, zero per-span alloc.
22thread_local! {
23 static PAT_BUF: RefCell<Vec<u8>> = const { RefCell::new(Vec::new()) };
24}
25
26/// Composite a span with per-pixel shape (AA coverage) bytes.
27///
28/// `shape[i]` is the coverage for pixel `x0 + i`. Length must equal
29/// `x1 - x0 + 1`.
30///
31/// # Preconditions (checked in `render_span`)
32///
33/// - `pipe.use_aa_path()` — no soft mask, `BlendMode::Normal`, no group correction.
34/// - `dst_pixels.len() == count * P::BYTES`.
35/// - `shape.len() == count`.
36/// - `P::BYTES > 0`.
37#[expect(
38 clippy::too_many_arguments,
39 reason = "mirrors C++ SplashPipe API; all parameters are necessary"
40)]
41pub(crate) fn render_span_aa<P: Pixel>(
42 pipe: &PipeState<'_>,
43 src: &PipeSrc<'_>,
44 dst_pixels: &mut [u8],
45 dst_alpha: Option<&mut [u8]>,
46 shape: &[u8],
47 x0: i32,
48 x1: i32,
49 y: i32,
50) {
51 debug_assert_eq!(pipe.blend_mode, BlendMode::Normal);
52 debug_assert!(pipe.soft_mask.is_none());
53
54 #[expect(
55 clippy::cast_sign_loss,
56 reason = "x1 >= x0 is a precondition, so x1 - x0 + 1 >= 1 > 0"
57 )]
58 let count = (x1 - x0 + 1) as usize;
59 let ncomps = P::BYTES;
60
61 debug_assert_eq!(shape.len(), count, "shape length must equal pixel count");
62 debug_assert_eq!(dst_pixels.len(), count * ncomps);
63
64 let a_input = u32::from(pipe.a_input);
65
66 match src {
67 PipeSrc::Solid(color) => {
68 debug_assert_eq!(color.len(), ncomps);
69
70 // Fast path: solid RGB source, no alpha plane, identity transfer.
71 // composite_aa_rgb8_opaque processes 16 pixels/iter via [u16;16] lanes
72 // that LLVM auto-vectorizes into AVX2/AVX-512.
73 if dst_alpha.is_none() && ncomps == 3 && pipe.transfer.is_identity_rgb() {
74 composite_aa_rgb8_opaque(
75 dst_pixels,
76 [color[0], color[1], color[2]],
77 pipe.a_input,
78 shape,
79 );
80 return;
81 }
82
83 // General solid path: read colour directly — no allocation.
84 render_span_aa_inner(
85 pipe,
86 |_i| color,
87 dst_pixels,
88 dst_alpha,
89 shape,
90 count,
91 ncomps,
92 a_input,
93 );
94 }
95 PipeSrc::Pattern(pat) => {
96 // Reuse the thread-local scratch buffer — one allocation ever per thread,
97 // grown as needed, never shrunk.
98 PAT_BUF.with(|cell| {
99 let mut buf = cell.borrow_mut();
100 buf.resize(count * ncomps, 0);
101 pat.fill_span(y, x0, x1, &mut buf[..count * ncomps]);
102 render_span_aa_inner(
103 pipe,
104 |i| &buf[i * ncomps..(i + 1) * ncomps],
105 dst_pixels,
106 dst_alpha,
107 shape,
108 count,
109 ncomps,
110 a_input,
111 );
112 });
113 }
114 }
115}
116
117/// Inner AA compositing loop.
118///
119/// `src_px_at(i)` returns a `&[u8]` of length `ncomps` for the source pixel at
120/// index `i`. For solid sources this is always the same slice; for patterns it
121/// indexes into the pre-filled scratch buffer. Using a closure rather than a
122/// `bool` flag keeps a single code path and lets the compiler inline both variants.
123#[inline]
124#[expect(
125 clippy::too_many_arguments,
126 reason = "all params necessary; closure eliminates the solid/pattern duplication"
127)]
128fn render_span_aa_inner<'src>(
129 pipe: &PipeState<'_>,
130 src_px_at: impl Fn(usize) -> &'src [u8],
131 dst_pixels: &mut [u8],
132 dst_alpha: Option<&mut [u8]>,
133 shape: &[u8],
134 count: usize,
135 ncomps: usize,
136 a_input: u32,
137) {
138 match dst_alpha {
139 Some(dst_alpha) => {
140 debug_assert_eq!(dst_alpha.len(), count);
141 for i in 0..count {
142 let shape_v = u32::from(shape[i]);
143 let a_src = u32::from(div255(a_input * shape_v));
144 let a_dst = u32::from(dst_alpha[i]);
145
146 let (a_result, fully_opaque_src) = if a_src == 255 {
147 (255u32, true)
148 } else if a_src == 0 && a_dst == 0 {
149 // Transparent src over transparent dst: zero and skip.
150 let base = i * ncomps;
151 dst_pixels[base..base + ncomps].fill(0);
152 dst_alpha[i] = 0;
153 continue;
154 } else {
155 let ar = a_src + a_dst - u32::from(div255(a_src * a_dst));
156 (ar, false)
157 };
158
159 let base = i * ncomps;
160 let src_px = src_px_at(i);
161 let dst_px = &mut dst_pixels[base..base + ncomps];
162
163 if fully_opaque_src {
164 // Full coverage: transfer src directly, no blending needed.
165 pipe::apply_transfer_pixel(pipe, src_px, dst_px);
166 } else {
167 // Partial coverage: Porter-Duff over, then apply transfer.
168 for j in 0..ncomps {
169 let c_src = u32::from(src_px[j]);
170 let c_dst = u32::from(dst_px[j]);
171 // ((a_result - a_src) * c_dst + a_src * c_src) / a_result
172 let blended = ((a_result - a_src) * c_dst + a_src * c_src) / a_result;
173 #[expect(
174 clippy::cast_possible_truncation,
175 reason = "blended = weighted average of values ≤ 255, divided by a_result ≤ 255"
176 )]
177 {
178 dst_px[j] = blended as u8;
179 }
180 }
181 pipe::apply_transfer_in_place(pipe, dst_px);
182 }
183 #[expect(
184 clippy::cast_possible_truncation,
185 reason = "a_result = a_src + a_dst - div255(a_src*a_dst) ≤ 255"
186 )]
187 {
188 dst_alpha[i] = a_result as u8;
189 }
190 }
191 }
192 None => {
193 // No separate alpha plane: a_dst is implicitly 0xFF, a_result = 0xFF.
194 // Formula simplifies to: c = div255((255 - a_src) * c_dst + a_src * c_src).
195 for (i, &sh) in shape.iter().enumerate() {
196 let shape_v = u32::from(sh);
197 let a_src = u32::from(div255(a_input * shape_v));
198 let base = i * ncomps;
199 let src_px = src_px_at(i);
200 let dst_px = &mut dst_pixels[base..base + ncomps];
201 for j in 0..ncomps {
202 let blended =
203 div255((255 - a_src) * u32::from(dst_px[j]) + a_src * u32::from(src_px[j]));
204 dst_px[j] = blended;
205 }
206 pipe::apply_transfer_in_place(pipe, dst_px);
207 }
208 }
209 }
210}
211
212#[cfg(test)]
213mod tests {
214 use super::*;
215 use crate::pipe::PipeSrc;
216 use crate::state::TransferSet;
217 use color::{Rgb8, TransferLut};
218
219 fn aa_pipe() -> PipeState<'static> {
220 PipeState {
221 blend_mode: BlendMode::Normal,
222 a_input: 255,
223 overprint_mask: 0xFFFF_FFFF,
224 overprint_additive: false,
225 transfer: TransferSet::identity_rgb(),
226 soft_mask: None,
227 alpha0: None,
228 knockout: false,
229 knockout_opacity: 255,
230 non_isolated_group: false,
231 }
232 }
233
234 #[test]
235 fn full_coverage_writes_src() {
236 let pipe = aa_pipe();
237 let color = [200u8, 100, 50];
238 let src = PipeSrc::Solid(&color);
239 let shape = [255u8, 255];
240
241 let mut dst = vec![50u8; 6]; // two pixels, initially different from src
242 let mut alpha = vec![128u8; 2];
243
244 render_span_aa::<Rgb8>(&pipe, &src, &mut dst, Some(&mut alpha), &shape, 0, 1, 0);
245
246 assert_eq!(&dst[0..3], &[200, 100, 50]);
247 assert_eq!(&dst[3..6], &[200, 100, 50]);
248 assert_eq!(alpha[0], 255);
249 assert_eq!(alpha[1], 255);
250 }
251
252 #[test]
253 fn zero_coverage_over_transparent_zeroes_output() {
254 let pipe = aa_pipe();
255 let color = [255u8, 255, 255];
256 let src = PipeSrc::Solid(&color);
257 let shape = [0u8];
258
259 let mut dst = vec![0u8; 3];
260 let mut alpha = vec![0u8; 1]; // dest also transparent
261
262 render_span_aa::<Rgb8>(&pipe, &src, &mut dst, Some(&mut alpha), &shape, 0, 0, 0);
263
264 assert_eq!(dst[0], 0);
265 assert_eq!(alpha[0], 0);
266 }
267
268 #[test]
269 fn half_coverage_blends_correctly() {
270 let pipe = aa_pipe();
271 // src = white (255,255,255), dst = black (0,0,0), shape ≈ 128 ≈ 50%.
272 let color = [255u8, 255, 255];
273 let src = PipeSrc::Solid(&color);
274 let shape = [128u8];
275
276 let mut dst = vec![0u8; 3];
277 let mut alpha = vec![255u8; 1]; // fully opaque destination
278
279 render_span_aa::<Rgb8>(&pipe, &src, &mut dst, Some(&mut alpha), &shape, 0, 0, 0);
280
281 // a_src = div255(255 * 128) ≈ 128.
282 // a_result = 128 + 255 - div255(128 * 255) ≈ 255.
283 // c = ((255 - 128) * 0 + 128 * 255) / 255 ≈ 128.
284 let v = dst[0];
285 assert!((125..=131).contains(&v), "expected ~128, got {v}");
286 assert_eq!(alpha[0], 255);
287 }
288
289 #[test]
290 fn no_alpha_plane_uses_opaque_dst() {
291 let pipe = aa_pipe();
292 let color = [200u8, 100, 50];
293 let src = PipeSrc::Solid(&color);
294 let shape = [128u8];
295
296 let mut dst = vec![0u8; 3]; // black dst
297
298 render_span_aa::<Rgb8>(&pipe, &src, &mut dst, None, &shape, 0, 0, 0);
299
300 // With implicit a_dst=255: result should be a blend.
301 // Expected: div255((255 - 128) * 0 + 128 * 200) ≈ 100.
302 let v = dst[0];
303 assert!((95..=105).contains(&v), "expected ~100, got {v}");
304 }
305
306 /// `TransferSet::is_identity_rgb()` gates a SIMD-friendly fast path
307 /// (`composite_aa_rgb8_opaque`) that intentionally skips transfer-LUT
308 /// application. If the predicate mis-reports `true` for a non-identity
309 /// LUT (cargo-mutants whole-body → `true` survives without this test),
310 /// the fast path runs and silently drops the transfer.
311 ///
312 /// Construct a non-identity LUT (channel-inverting), run `render_span_aa`,
313 /// and require the inversion to be visible — only the general path
314 /// applies it.
315 #[test]
316 fn non_identity_transfer_must_use_general_path() {
317 // Inverting RGB transfer + identity gray/cmyk/device_n; the inverting
318 // table is what makes this test's transfer set non-identity.
319 static DN_ID: [[u8; 256]; 8] = [TransferLut::IDENTITY.0; 8];
320 let id = TransferLut::IDENTITY.as_array();
321 let inv = TransferLut::INVERTED.as_array();
322
323 let pipe = PipeState {
324 blend_mode: BlendMode::Normal,
325 a_input: 255,
326 overprint_mask: 0xFFFF_FFFF,
327 overprint_additive: false,
328 transfer: TransferSet {
329 rgb: [inv; 3],
330 gray: id,
331 cmyk: [id; 4],
332 device_n: &DN_ID,
333 },
334 soft_mask: None,
335 alpha0: None,
336 knockout: false,
337 knockout_opacity: 255,
338 non_isolated_group: false,
339 };
340 assert!(
341 !pipe.transfer.is_identity_rgb(),
342 "test prerequisite: inverting LUT must not register as identity"
343 );
344
345 let color = [200u8, 100, 50];
346 let src = PipeSrc::Solid(&color);
347 let shape = [255u8; 4]; // full coverage → general path writes src, then applies transfer
348 let mut dst = vec![0u8; 12];
349
350 render_span_aa::<Rgb8>(&pipe, &src, &mut dst, None, &shape, 0, 3, 0);
351
352 // General path: full coverage → `apply_transfer_pixel` runs and
353 // emits `255 - src`. Fast path would emit `src` unchanged.
354 for px in 0..4 {
355 assert_eq!(
356 &dst[px * 3..px * 3 + 3],
357 &[55, 155, 205],
358 "pixel {px}: transfer LUT must invert each channel; \
359 if the fast-path gate mis-fired, dst would be [200, 100, 50]"
360 );
361 }
362 }
363
364 /// `TransferSet::is_identity_rgb()` gates the fast path; when it returns
365 /// `true`, `composite_aa_rgb8_opaque` runs. If the predicate mis-reports
366 /// `false` for a genuinely-identity LUT (cargo-mutants whole-body
367 /// → `false` survives without this test), the general path runs and
368 /// uses a higher-precision `div255` than the fast path, producing
369 /// different output bytes on some inputs.
370 ///
371 /// Pin the byte values that the fast path produces on a representative
372 /// large span; the general path's higher-precision `div255` would shift
373 /// at least one byte by ≥ 1 LSB on this input set.
374 #[test]
375 fn identity_transfer_takes_fast_path_with_pinned_bytes() {
376 let pipe = aa_pipe();
377 assert!(
378 pipe.transfer.is_identity_rgb(),
379 "test prerequisite: aa_pipe() must register as identity"
380 );
381
382 // 17 pixels: crosses the LANE=16 boundary, exercising both the
383 // chunked path and the scalar tail.
384 let color = [200u8, 100, 50];
385 let src = PipeSrc::Solid(&color);
386 let shape: Vec<u8> = (0u8..17).map(|i| i.wrapping_mul(17)).collect();
387 let initial: Vec<u8> = (0u8..51).map(|i| i.wrapping_mul(13)).collect();
388 let mut dst_fast = initial.clone();
389
390 render_span_aa::<Rgb8>(&pipe, &src, &mut dst_fast, None, &shape, 0, 16, 0);
391
392 // Compute the reference via the fast path's formula:
393 // a_src = (a_input * shape[i] + 255) >> 8
394 // c_out_j = ((255 - a_src) * c_dst[j] + a_src * src[j] + 255) >> 8
395 let a_in = 255u16;
396 let mut expected = initial;
397 for (i, &sh) in shape.iter().enumerate() {
398 let a_src = (a_in * u16::from(sh) + 255) >> 8;
399 let inv = 255 - a_src;
400 let b = i * 3;
401 for (j, sc) in color.iter().enumerate() {
402 let v = (inv * u16::from(expected[b + j]) + a_src * u16::from(*sc) + 255) >> 8;
403 // `v` is bounded by the fast path's div255 (`(.. + 255) >> 8` ≤ 255).
404 expected[b + j] = u8::try_from(v).expect("fast-path div255 result must fit in u8");
405 }
406 }
407 assert_eq!(
408 dst_fast, expected,
409 "identity-LUT path must use the fast path's `(v + 255) >> 8` div255"
410 );
411 }
412
413 // ── Cross-path 1-LSB byte-equality fixture ───────────────────────────────
414 //
415 // The fast path (`composite_aa_rgb8_opaque` via `(v+255)>>8`) and the
416 // general no-alpha path (`render_span_aa_inner` via
417 // `color::convert::div255`'s exact `(v + v>>8 + 0x80) >> 8`) must agree
418 // to within 1 LSB per channel on every gate-eligible input.
419 //
420 // This pins the cross-path invariant that the deferred v0.9.2 audit
421 // (audit/2026-05-11-avx512-fastpath-vs-general-byte-equality.md) flagged:
422 // each path's *internal* byte values are pinned by sibling tests, but a
423 // regression that swapped one `div255` shape for the other — or that
424 // mis-classified an identity-LUT call as needing the general path, or
425 // vice-versa — could silently shift output by 1 LSB and only be caught
426 // by the pixel-diff integration suite against pdftoppm.
427 //
428 // Strategy:
429 // 1. Build a deterministic corpus of (color, a_input, shape, dst)
430 // tuples that span the gate-firing domain.
431 // 2. Run the fast path via `render_span_aa::<Rgb8>` with
432 // `dst_alpha = None` and an identity transfer (so the gate fires).
433 // 3. Compute the general-path reference using the exact
434 // `color::convert::div255` — same algebra as the `None` arm of
435 // `render_span_aa_inner`, with the identity transfer step elided.
436 // 4. Assert `|fast[i] - reference[i]| <= 1` for every byte.
437 //
438 // Why `<= 1` and not `==`: the two `div255` formulas round differently.
439 // The fast path's `(v + 255) >> 8` is the upper-rounding approximation
440 // (correct within ±1 LSB); the general path's `(v + v>>8 + 0x80) >> 8`
441 // is the exact (banker's) form. A concrete divergence at numerator
442 // `v = 100`:
443 // fast = (100 + 255) >> 8 = 355 >> 8 = 1
444 // exact = (100 + 0 + 128) >> 8 = 228 >> 8 = 0
445 // — 1 LSB apart. The corpus below produces this and similar
446 // divergences on alternating-shape × low-`a_input` cases. What
447 // matters for downstream correctness is the ceiling, not exactness.
448
449 /// Span lengths the corpus iterates over. Covers:
450 /// * `7` — pure scalar tail (count < LANE), no chunked branch.
451 /// * `16` — exactly one LANE chunk, zero tail.
452 /// * `17` — one LANE chunk + 1-byte tail (chunk-tail boundary).
453 /// * `23` — one LANE chunk + 7-byte tail (typical mixed-mode width).
454 const ONE_LSB_SPAN_LENGTHS: [usize; 4] = [7, 16, 17, 23];
455
456 // ── Corpus pattern generators (free `fn` items, no allocation) ────────────
457
458 fn shape_full(_: usize) -> u8 {
459 255
460 }
461 fn shape_zero(_: usize) -> u8 {
462 0
463 }
464 #[expect(
465 clippy::cast_possible_truncation,
466 reason = "i ∈ [0, max span = 23) * 11 ≤ 242 fits u8"
467 )]
468 fn shape_ramp(i: usize) -> u8 {
469 (i * 11) as u8
470 }
471 fn shape_alt(i: usize) -> u8 {
472 if i.is_multiple_of(2) { 255 } else { 64 }
473 }
474
475 fn dst_black(_: usize) -> u8 {
476 0
477 }
478 fn dst_white(_: usize) -> u8 {
479 255
480 }
481 #[expect(
482 clippy::cast_possible_truncation,
483 reason = "(i * 7) % 256 fits u8 by construction"
484 )]
485 fn dst_ramp(i: usize) -> u8 {
486 ((i * 7) % 256) as u8
487 }
488 fn dst_alt(i: usize) -> u8 {
489 if i.is_multiple_of(2) { 0 } else { 200 }
490 }
491
492 /// Run the gate-eligible (no alpha plane, identity transfer) fast path
493 /// over `initial`, returning the result. The test's "ground truth" leg
494 /// is `run_exact_reference`.
495 fn run_fast_path(color: [u8; 3], a_input: u8, shape: &[u8], initial: &[u8]) -> Vec<u8> {
496 let mut dst = initial.to_vec();
497 let mut pipe = aa_pipe();
498 pipe.a_input = a_input;
499 let src = PipeSrc::Solid(color.as_slice());
500 let count = shape.len();
501 assert!(count >= 1, "render_span_aa requires count >= 1");
502 let x1: i32 = i32::try_from(count - 1)
503 .expect("ONE_LSB_SPAN_LENGTHS values must fit in i32 for render_span_aa's x0..=x1 API");
504 render_span_aa::<Rgb8>(&pipe, &src, &mut dst, None, shape, 0, x1, 0);
505 dst
506 }
507
508 /// Inline algebra of the general no-alpha path (`aa.rs:192-207`) under
509 /// an identity transfer — `apply_transfer_in_place` is a no-op so the
510 /// loop reduces to per-channel
511 /// `div255((255 - a_src) * c_dst + a_src * c_src)` using the exact
512 /// `color::convert::div255`.
513 fn run_exact_reference(color: [u8; 3], a_input: u8, shape: &[u8], initial: &[u8]) -> Vec<u8> {
514 use color::convert::div255 as exact_div255;
515 let mut dst = initial.to_vec();
516 let a_in_u32 = u32::from(a_input);
517 for (i, &sh) in shape.iter().enumerate() {
518 let a_src = u32::from(exact_div255(a_in_u32 * u32::from(sh)));
519 let base = i * 3;
520 for j in 0..3 {
521 let c_dst = u32::from(dst[base + j]);
522 let c_src = u32::from(color[j]);
523 dst[base + j] = exact_div255((255 - a_src) * c_dst + a_src * c_src);
524 }
525 }
526 dst
527 }
528
529 /// Per-byte `|fast - exact| ≤ 1` assertion. Returns the number of bytes
530 /// that diverged by exactly 1 LSB (the trip-wire counter for the outer
531 /// "corpus actually exercises the divergence" sanity check).
532 fn assert_within_one_lsb(
533 fast: &[u8],
534 exact: &[u8],
535 color: [u8; 3],
536 a_input: u8,
537 shape: &[u8],
538 initial: &[u8],
539 ) -> usize {
540 let mut one_lsb_count = 0usize;
541 for (i, (&f, &r)) in fast.iter().zip(exact.iter()).enumerate() {
542 let diff = i32::from(f).abs_diff(i32::from(r));
543 assert!(
544 diff <= 1,
545 "byte {i}: fast={f}, exact={r}, diff={diff}; \
546 colour={color:?}, a_input={a_input}, \
547 shape[{i_px}]={sh}, initial[{i}]={init}",
548 i_px = i / 3,
549 sh = shape[i / 3],
550 init = initial[i],
551 );
552 if diff == 1 {
553 one_lsb_count += 1;
554 }
555 }
556 one_lsb_count
557 }
558
559 #[test]
560 fn fast_path_matches_general_div255_within_one_lsb() {
561 let pipe = aa_pipe();
562 assert!(
563 pipe.transfer.is_identity_rgb(),
564 "test prerequisite: aa_pipe() must register as identity so the gate fires"
565 );
566
567 // Corpus: cross-product of representative source colours, alpha
568 // inputs, shape patterns, initial destinations, and span lengths.
569 let colours: [[u8; 3]; 5] = [
570 [0, 0, 0], // black
571 [255, 255, 255], // white
572 [200, 100, 50], // mid-saturated warm
573 [1, 254, 127], // off-by-one boundaries
574 [128, 128, 128], // 50%-grey
575 ];
576 let a_inputs: [u8; 4] = [0, 1, 128, 255];
577 let shape_patterns: [fn(usize) -> u8; 4] = [shape_full, shape_zero, shape_ramp, shape_alt];
578 let dst_patterns: [fn(usize) -> u8; 4] = [dst_black, dst_white, dst_ramp, dst_alt];
579
580 let mut total_cases = 0usize;
581 let mut total_bytes_at_one_lsb = 0usize;
582
583 for &count in &ONE_LSB_SPAN_LENGTHS {
584 for &color in &colours {
585 for &a_input in &a_inputs {
586 for &sh_fn in &shape_patterns {
587 for &dst_fn in &dst_patterns {
588 let shape: Vec<u8> = (0..count).map(sh_fn).collect();
589 let initial: Vec<u8> = (0..count * 3).map(dst_fn).collect();
590 let fast = run_fast_path(color, a_input, &shape, &initial);
591 let exact = run_exact_reference(color, a_input, &shape, &initial);
592 total_bytes_at_one_lsb += assert_within_one_lsb(
593 &fast, &exact, color, a_input, &shape, &initial,
594 );
595 total_cases += 1;
596 }
597 }
598 }
599 }
600 }
601
602 // Trip-wire: the corpus must actually exercise the 1-LSB
603 // divergence somewhere, otherwise the test is silently equivalent
604 // to byte equality and the deferred audit's premise was wrong.
605 // Empirically this corpus produces ≥1 byte at diff=1; if it stops
606 // doing so, widen the corpus before trusting the ≤1 LSB ceiling.
607 assert!(
608 total_bytes_at_one_lsb > 0,
609 "corpus of {total_cases} cases produced no 1-LSB divergence — \
610 corpus is too narrow to actually pin the ≤1 LSB ceiling"
611 );
612 }
613}