Skip to main content

locus_core/
decoder.rs

1//! Tag decoding, homography computation, and bit sampling.
2//!
3//! This module handles the final stage of the pipeline:
4//! 1. **Homography**: Computing the projection from canonical tag space to image pixels.
5//! 2. **Bit Sampling**: Bilinear interpolation of intensities at grid points.
6//! 3. **Error Correction**: Correcting bit flips using tag-family specific Hamming distances.
7
8#![allow(unsafe_code, clippy::cast_sign_loss)]
9use crate::batch::{Matrix3x3, Point2f};
10use crate::config;
11use crate::simd::math::{bilinear_interpolate_fixed, rcp_nr};
12use crate::simd::roi::RoiCache;
13use bumpalo::Bump;
14use multiversion::multiversion;
15use nalgebra::{SMatrix, SVector};
16use std::cell::RefCell;
17
18thread_local! {
19    // Use a small initial capacity (e.g., 4KB) to avoid system allocation overhead for small workloads.
20    // The arena will still grow if needed.
21    pub(crate) static DECODE_ARENA: RefCell<Bump> = RefCell::new(Bump::with_capacity(4096));
22}
23
24/// A 3x3 Homography matrix.
25pub struct Homography {
26    /// The 3x3 homography matrix.
27    pub h: SMatrix<f64, 3, 3>,
28}
29
30impl Homography {
31    /// Compute homography from 4 source points to 4 destination points using DLT.
32    /// Points are [x, y].
33    /// Compute homography from 4 source points to 4 destination points using DLT.
34    /// Points are [x, y].
35    #[must_use]
36    pub fn from_pairs(src: &[[f64; 2]; 4], dst: &[[f64; 2]; 4]) -> Option<Self> {
37        let mut a = SMatrix::<f64, 8, 9>::zeros();
38
39        for i in 0..4 {
40            let sx = src[i][0];
41            let sy = src[i][1];
42            let dx = dst[i][0];
43            let dy = dst[i][1];
44
45            a[(i * 2, 0)] = -sx;
46            a[(i * 2, 1)] = -sy;
47            a[(i * 2, 2)] = -1.0;
48            a[(i * 2, 6)] = sx * dx;
49            a[(i * 2, 7)] = sy * dx;
50            a[(i * 2, 8)] = dx;
51
52            a[(i * 2 + 1, 3)] = -sx;
53            a[(i * 2 + 1, 4)] = -sy;
54            a[(i * 2 + 1, 5)] = -1.0;
55            a[(i * 2 + 1, 6)] = sx * dy;
56            a[(i * 2 + 1, 7)] = sy * dy;
57            a[(i * 2 + 1, 8)] = dy;
58        }
59
60        let mut b = SVector::<f64, 8>::zeros();
61        let mut m = SMatrix::<f64, 8, 8>::zeros();
62        for i in 0..8 {
63            for j in 0..8 {
64                m[(i, j)] = a[(i, j)];
65            }
66            b[i] = -a[(i, 8)];
67        }
68
69        m.lu().solve(&b).and_then(|h_vec| {
70            let mut h = SMatrix::<f64, 3, 3>::identity();
71            h[(0, 0)] = h_vec[0];
72            h[(0, 1)] = h_vec[1];
73            h[(0, 2)] = h_vec[2];
74            h[(1, 0)] = h_vec[3];
75            h[(1, 1)] = h_vec[4];
76            h[(1, 2)] = h_vec[5];
77            h[(2, 0)] = h_vec[6];
78            h[(2, 1)] = h_vec[7];
79            h[(2, 2)] = 1.0;
80            let res = Self { h };
81            for i in 0..4 {
82                let p_proj = res.project(src[i]);
83                let err_sq = (p_proj[0] - dst[i][0]).powi(2) + (p_proj[1] - dst[i][1]).powi(2);
84                if !err_sq.is_finite() || err_sq > 1e-4 {
85                    return None;
86                }
87            }
88            Some(res)
89        })
90    }
91
92    /// Optimized homography computation from canonical unit square to a quad.
93    /// Source points are assumed to be: `[(-1,-1), (1,-1), (1,1), (-1,1)]`.
94    #[must_use]
95    pub fn square_to_quad(dst: &[[f64; 2]; 4]) -> Option<Self> {
96        let mut b = SVector::<f64, 8>::zeros();
97        let mut m = SMatrix::<f64, 8, 8>::zeros();
98
99        // Hardcoded coefficients for src = [(-1,-1), (1,-1), (1,1), (-1,1)]
100        // Point 0: (-1, -1) -> (x0, y0)
101        let x0 = dst[0][0];
102        let y0 = dst[0][1];
103        // h0 + h1 - h2 - x0*h6 - x0*h7 = -x0  =>  1, 1, -1, ..., -x0, -x0
104        m[(0, 0)] = 1.0;
105        m[(0, 1)] = 1.0;
106        m[(0, 2)] = -1.0;
107        m[(0, 6)] = -x0;
108        m[(0, 7)] = -x0;
109        b[0] = -x0;
110        // h3 + h4 - h5 - y0*h6 - y0*h7 = -y0  =>  ..., 1, 1, -1, -y0, -y0
111        m[(1, 3)] = 1.0;
112        m[(1, 4)] = 1.0;
113        m[(1, 5)] = -1.0;
114        m[(1, 6)] = -y0;
115        m[(1, 7)] = -y0;
116        b[1] = -y0;
117
118        // Point 1: (1, -1) -> (x1, y1)
119        let x1 = dst[1][0];
120        let y1 = dst[1][1];
121        // -h0 + h1 + h2 + x1*h6 - x1*h7 = -x1
122        m[(2, 0)] = -1.0;
123        m[(2, 1)] = 1.0;
124        m[(2, 2)] = -1.0;
125        m[(2, 6)] = x1;
126        m[(2, 7)] = -x1;
127        b[2] = -x1;
128        m[(3, 3)] = -1.0;
129        m[(3, 4)] = 1.0;
130        m[(3, 5)] = -1.0;
131        m[(3, 6)] = y1;
132        m[(3, 7)] = -y1;
133        b[3] = -y1;
134
135        // Point 2: (1, 1) -> (x2, y2)
136        let x2 = dst[2][0];
137        let y2 = dst[2][1];
138        // -h0 - h1 + h2 + x2*h6 + x2*h7 = -x2
139        m[(4, 0)] = -1.0;
140        m[(4, 1)] = -1.0;
141        m[(4, 2)] = -1.0;
142        m[(4, 6)] = x2;
143        m[(4, 7)] = x2;
144        b[4] = -x2;
145        m[(5, 3)] = -1.0;
146        m[(5, 4)] = -1.0;
147        m[(5, 5)] = -1.0;
148        m[(5, 6)] = y2;
149        m[(5, 7)] = y2;
150        b[5] = -y2;
151
152        // Point 3: (-1, 1) -> (x3, y3)
153        let x3 = dst[3][0];
154        let y3 = dst[3][1];
155        // h0 - h1 + h2 - x3*h6 + x3*h7 = -x3
156        m[(6, 0)] = 1.0;
157        m[(6, 1)] = -1.0;
158        m[(6, 2)] = -1.0;
159        m[(6, 6)] = -x3;
160        m[(6, 7)] = x3;
161        b[6] = -x3;
162        m[(7, 3)] = 1.0;
163        m[(7, 4)] = -1.0;
164        m[(7, 5)] = -1.0;
165        m[(7, 6)] = -y3;
166        m[(7, 7)] = y3;
167        b[7] = -y3;
168
169        m.lu().solve(&b).and_then(|h_vec| {
170            let mut h = SMatrix::<f64, 3, 3>::identity();
171            h[(0, 0)] = h_vec[0];
172            h[(0, 1)] = h_vec[1];
173            h[(0, 2)] = h_vec[2];
174            h[(1, 0)] = h_vec[3];
175            h[(1, 1)] = h_vec[4];
176            h[(1, 2)] = h_vec[5];
177            h[(2, 0)] = h_vec[6];
178            h[(2, 1)] = h_vec[7];
179            h[(2, 2)] = 1.0;
180            let res = Self { h };
181            let src_unit = [[-1.0, -1.0], [1.0, -1.0], [1.0, 1.0], [-1.0, 1.0]];
182            for i in 0..4 {
183                let p_proj = res.project(src_unit[i]);
184                let err_sq = (p_proj[0] - dst[i][0]).powi(2) + (p_proj[1] - dst[i][1]).powi(2);
185                if err_sq > 1e-4 {
186                    return None;
187                }
188            }
189            Some(res)
190        })
191    }
192
193    /// Project a point using the homography.
194    #[must_use]
195    pub fn project(&self, p: [f64; 2]) -> [f64; 2] {
196        let res = self.h * SVector::<f64, 3>::new(p[0], p[1], 1.0);
197        let w = res[2];
198        [res[0] / w, res[1] / w]
199    }
200}
201
202/// Compute homographies for all quads in the batch using a pure-function SoA approach.
203///
204/// This uses `rayon` for data-parallel computation of the square-to-quad homographies.
205/// Quads are defined by 4 corners in `corners` for each candidate index.
206#[tracing::instrument(skip_all, name = "pipeline::homography_pass")]
207pub fn compute_homographies_soa(corners: &[[Point2f; 4]], homographies: &mut [Matrix3x3]) {
208    use rayon::prelude::*;
209
210    // Each homography maps from canonical square [(-1,-1), (1,-1), (1,1), (-1,1)] to image quads.
211    homographies
212        .par_iter_mut()
213        .enumerate()
214        .for_each(|(i, h_out)| {
215            let dst = [
216                [f64::from(corners[i][0].x), f64::from(corners[i][0].y)],
217                [f64::from(corners[i][1].x), f64::from(corners[i][1].y)],
218                [f64::from(corners[i][2].x), f64::from(corners[i][2].y)],
219                [f64::from(corners[i][3].x), f64::from(corners[i][3].y)],
220            ];
221
222            if let Some(h) = Homography::square_to_quad(&dst) {
223                // Copy data to f32 batch. Nalgebra stores in column-major order.
224                for (j, val) in h.h.iter().enumerate() {
225                    h_out.data[j] = *val as f32;
226                }
227                h_out.padding = [0.0; 7];
228            } else {
229                // Failed to compute homography (e.g. degenerate quad).
230                h_out.data = [0.0; 9];
231                h_out.padding = [0.0; 7];
232            }
233        });
234}
235
236/// Refine corners using "Erf-Fit" (Gaussian fit to intensity profile).
237///
238/// This assumes the edge intensity profile is an Error Function (convolution of step edge with Gaussian PSF).
239/// We minimize the photometric error between the image and the ERF model using Gauss-Newton.
240pub(crate) fn refine_corners_erf(
241    arena: &bumpalo::Bump,
242    img: &crate::image::ImageView,
243    corners: &[[f64; 2]; 4],
244    sigma: f64,
245) -> [[f64; 2]; 4] {
246    let mut lines = [(0.0f64, 0.0f64, 0.0f64); 4];
247    let mut line_valid = [false; 4];
248
249    // Sub-pixel edge refinement for each of the 4 edges
250    for i in 0..4 {
251        let next = (i + 1) % 4;
252        let p1 = corners[i];
253        let p2 = corners[next];
254
255        if let Some((nx, ny, d)) = fit_edge_erf(arena, img, p1, p2, sigma) {
256            lines[i] = (nx, ny, d);
257            line_valid[i] = true;
258        }
259    }
260
261    if !line_valid.iter().all(|&v| v) {
262        return *corners;
263    }
264
265    // Intersect lines to get refined corners
266    let mut refined = *corners;
267    for i in 0..4 {
268        let prev = (i + 3) % 4;
269        let (a1, b1, c1) = lines[prev];
270        let (a2, b2, c2) = lines[i];
271        let det = a1 * b2 - a2 * b1;
272        if det.abs() > 1e-6 {
273            let x = (b1 * c2 - b2 * c1) / det;
274            let y = (a2 * c1 - a1 * c2) / det;
275
276            // Sanity check
277            let dist_sq = (x - corners[i][0]).powi(2) + (y - corners[i][1]).powi(2);
278            if dist_sq < 4.0 {
279                refined[i] = [x, y];
280            }
281        }
282    }
283    refined
284}
285
286/// Helper for ERF edge fitting
287struct EdgeFitter<'a> {
288    img: &'a crate::image::ImageView<'a>,
289    p1: [f64; 2],
290    dx: f64,
291    dy: f64,
292    len: f64,
293    nx: f64,
294    ny: f64,
295    d: f64,
296}
297
298impl<'a> EdgeFitter<'a> {
299    fn new(img: &'a crate::image::ImageView<'a>, p1: [f64; 2], p2: [f64; 2]) -> Option<Self> {
300        let dx = p2[0] - p1[0];
301        let dy = p2[1] - p1[1];
302        let len = (dx * dx + dy * dy).sqrt();
303        if len < 4.0 {
304            return None;
305        }
306        let nx = -dy / len;
307        let ny = dx / len;
308        // Initial d from input corners
309        let d = -(nx * p1[0] + ny * p1[1]);
310
311        Some(Self {
312            img,
313            p1,
314            dx,
315            dy,
316            len,
317            nx,
318            ny,
319            d,
320        })
321    }
322
323    fn scan_initial_d(&mut self) {
324        let window = 2.5;
325        let (x0, x1, y0, y1) = self.get_scan_bounds(window);
326
327        let mut best_offset = 0.0;
328        let mut best_grad = 0.0;
329
330        for k in -6..=6 {
331            let offset = f64::from(k) * 0.4;
332            let scan_d = self.d + offset;
333
334            let (sum_g, count) =
335                project_gradients_optimized(self.img, self.nx, self.ny, x0, x1, y0, y1, scan_d);
336
337            if count > 0 && sum_g > best_grad {
338                best_grad = sum_g;
339                best_offset = offset;
340            }
341        }
342        self.d += best_offset;
343    }
344
345    fn collect_samples(
346        &self,
347        arena: &'a bumpalo::Bump,
348    ) -> bumpalo::collections::Vec<'a, (f64, f64, f64)> {
349        let window = 2.5;
350        let (x0, x1, y0, y1) = self.get_scan_bounds(window);
351
352        collect_samples_optimized(
353            self.img, self.nx, self.ny, self.d, self.p1, self.dx, self.dy, self.len, x0, x1, y0,
354            y1, window, arena,
355        )
356    }
357
358    fn refine(&mut self, samples: &[(f64, f64, f64)], sigma: f64) {
359        if samples.len() < 10 {
360            return;
361        }
362        let mut a = 128.0;
363        let mut b = 128.0;
364        let inv_sigma = 1.0 / sigma;
365        let _sqrt_pi = std::f64::consts::PI.sqrt();
366
367        for _ in 0..15 {
368            let mut dark_sum = 0.0;
369            let mut dark_weight = 0.0;
370            let mut light_sum = 0.0;
371            let mut light_weight = 0.0;
372
373            for &(x, y, _) in samples {
374                let dist = self.nx * x + self.ny * y + self.d;
375                let val = self.img.sample_bilinear(x, y);
376                if dist < -1.0 {
377                    let w = (-dist - 0.5).clamp(0.1, 2.0);
378                    dark_sum += val * w;
379                    dark_weight += w;
380                } else if dist > 1.0 {
381                    let w = (dist - 0.5).clamp(0.1, 2.0);
382                    light_sum += val * w;
383                    light_weight += w;
384                }
385            }
386
387            if dark_weight > 0.0 && light_weight > 0.0 {
388                a = dark_sum / dark_weight;
389                b = light_sum / light_weight;
390            }
391
392            if (b - a).abs() < 5.0 {
393                break;
394            }
395
396            let (sum_jtj, sum_jt_res) = refine_accumulate_optimized(
397                samples, self.img, self.nx, self.ny, self.d, a, b, sigma, inv_sigma,
398            );
399
400            if sum_jtj < 1e-6 {
401                break;
402            }
403            let step = sum_jt_res / sum_jtj;
404            self.d += step.clamp(-0.5, 0.5);
405            if step.abs() < 1e-4 {
406                break;
407            }
408        }
409    }
410
411    #[allow(clippy::cast_sign_loss)]
412    fn get_scan_bounds(&self, window: f64) -> (usize, usize, usize, usize) {
413        let p2_0 = self.p1[0] + self.dx;
414        let p2_1 = self.p1[1] + self.dy;
415
416        // Clamp to valid image coordinates (padding of 1 pixel)
417        let w_limit = (self.img.width - 2) as f64;
418        let h_limit = (self.img.height - 2) as f64;
419
420        let x0 = (self.p1[0].min(p2_0) - window - 0.5).clamp(1.0, w_limit) as usize;
421        let x1 = (self.p1[0].max(p2_0) + window + 0.5).clamp(1.0, w_limit) as usize;
422        let y0 = (self.p1[1].min(p2_1) - window - 0.5).clamp(1.0, h_limit) as usize;
423        let y1 = (self.p1[1].max(p2_1) + window + 0.5).clamp(1.0, h_limit) as usize;
424        (x0, x1, y0, y1)
425    }
426}
427
428#[multiversion(targets(
429    "x86_64+avx2+bmi1+bmi2+popcnt+lzcnt",
430    "x86_64+avx512f+avx512bw+avx512dq+avx512vl",
431    "aarch64+neon"
432))]
433#[allow(clippy::too_many_arguments)]
434fn project_gradients_optimized(
435    img: &crate::image::ImageView,
436    nx: f64,
437    ny: f64,
438    x0: usize,
439    x1: usize,
440    y0: usize,
441    y1: usize,
442    scan_d: f64,
443) -> (f64, usize) {
444    let mut sum_g = 0.0;
445    let mut count = 0;
446
447    for py in y0..=y1 {
448        let mut px = x0;
449        let y = py as f64 + 0.5;
450
451        #[cfg(all(target_arch = "x86_64", target_feature = "avx2"))]
452        if let Some(_dispatch) = multiversion::target::x86_64::avx2::get() {
453            unsafe {
454                use std::arch::x86_64::*;
455                let v_nx = _mm256_set1_pd(nx);
456                let v_ny = _mm256_set1_pd(ny);
457                let v_scan_d = _mm256_set1_pd(scan_d);
458                let v_y = _mm256_set1_pd(y);
459                let v_abs_mask = _mm256_set1_pd(-0.0);
460
461                while px + 4 <= x1 {
462                    let v_x = _mm256_set_pd(
463                        (px + 3) as f64 + 0.5,
464                        (px + 2) as f64 + 0.5,
465                        (px + 1) as f64 + 0.5,
466                        px as f64 + 0.5,
467                    );
468
469                    let v_dist = _mm256_add_pd(
470                        _mm256_add_pd(_mm256_mul_pd(v_nx, v_x), _mm256_mul_pd(v_ny, v_y)),
471                        v_scan_d,
472                    );
473
474                    let v_abs_dist = _mm256_andnot_pd(v_abs_mask, v_dist);
475                    let v_cmp = _mm256_cmp_pd(v_abs_dist, _mm256_set1_pd(1.0), _CMP_LT_OQ);
476                    let mask = _mm256_movemask_pd(v_cmp);
477
478                    if mask != 0 {
479                        for j in 0..4 {
480                            if (mask >> j) & 1 != 0 {
481                                let g = img.sample_gradient_bilinear((px + j) as f64, y);
482                                sum_g += (g[0] * nx + g[1] * ny).abs();
483                                count += 1;
484                            }
485                        }
486                    }
487                    px += 4;
488                }
489            }
490        }
491
492        while px <= x1 {
493            let x = px as f64 + 0.5;
494            let dist = nx * x + ny * y + scan_d;
495            if dist.abs() < 1.0 {
496                let g = img.sample_gradient_bilinear(x, y);
497                sum_g += (g[0] * nx + g[1] * ny).abs();
498                count += 1;
499            }
500            px += 1;
501        }
502    }
503    (sum_g, count)
504}
505
506#[multiversion(targets(
507    "x86_64+avx2+bmi1+bmi2+popcnt+lzcnt",
508    "x86_64+avx512f+avx512bw+avx512dq+avx512vl",
509    "aarch64+neon"
510))]
511#[allow(clippy::too_many_arguments)]
512fn collect_samples_optimized<'a>(
513    img: &crate::image::ImageView,
514    nx: f64,
515    ny: f64,
516    d: f64,
517    p1: [f64; 2],
518    dx: f64,
519    dy: f64,
520    len: f64,
521    x0: usize,
522    x1: usize,
523    y0: usize,
524    y1: usize,
525    window: f64,
526    arena: &'a bumpalo::Bump,
527) -> bumpalo::collections::Vec<'a, (f64, f64, f64)> {
528    let mut samples = bumpalo::collections::Vec::with_capacity_in(128, arena);
529
530    for py in y0..=y1 {
531        let mut px = x0;
532        let y = py as f64 + 0.5;
533
534        #[cfg(all(target_arch = "x86_64", target_feature = "avx2"))]
535        if let Some(_dispatch) = multiversion::target::x86_64::avx2::get() {
536            unsafe {
537                use std::arch::x86_64::*;
538                let v_nx = _mm256_set1_pd(nx);
539                let v_ny = _mm256_set1_pd(ny);
540                let v_d = _mm256_set1_pd(d);
541                let v_y = _mm256_set1_pd(y);
542                let v_p1x = _mm256_set1_pd(p1[0]);
543                let v_p1y = _mm256_set1_pd(p1[1]);
544                let v_dx = _mm256_set1_pd(dx);
545                let v_dy = _mm256_set1_pd(dy);
546                let v_inv_len_sq = _mm256_set1_pd(1.0 / (len * len));
547                let v_abs_mask = _mm256_set1_pd(-0.0);
548
549                while px + 4 <= x1 {
550                    let v_x = _mm256_set_pd(
551                        (px + 3) as f64 + 0.5,
552                        (px + 2) as f64 + 0.5,
553                        (px + 1) as f64 + 0.5,
554                        px as f64 + 0.5,
555                    );
556
557                    let v_dist = _mm256_add_pd(
558                        _mm256_add_pd(_mm256_mul_pd(v_nx, v_x), _mm256_mul_pd(v_ny, v_y)),
559                        v_d,
560                    );
561                    let v_abs_dist = _mm256_andnot_pd(v_abs_mask, v_dist);
562                    let v_dist_mask = _mm256_cmp_pd(v_abs_dist, _mm256_set1_pd(window), _CMP_LE_OQ);
563
564                    let v_t = _mm256_mul_pd(
565                        _mm256_add_pd(
566                            _mm256_mul_pd(_mm256_sub_pd(v_x, v_p1x), v_dx),
567                            _mm256_mul_pd(_mm256_sub_pd(v_y, v_p1y), v_dy),
568                        ),
569                        v_inv_len_sq,
570                    );
571
572                    let v_t_mask_low = _mm256_cmp_pd(v_t, _mm256_set1_pd(-0.1), _CMP_GE_OQ);
573                    let v_t_mask_high = _mm256_cmp_pd(v_t, _mm256_set1_pd(1.1), _CMP_LE_OQ);
574
575                    let final_mask = _mm256_movemask_pd(_mm256_and_pd(
576                        v_dist_mask,
577                        _mm256_and_pd(v_t_mask_low, v_t_mask_high),
578                    ));
579
580                    if final_mask != 0 {
581                        for j in 0..4 {
582                            if (final_mask >> j) & 1 != 0 {
583                                let val = f64::from(img.get_pixel(px + j, py));
584                                samples.push(((px + j) as f64 + 0.5, y, val));
585                            }
586                        }
587                    }
588                    px += 4;
589                }
590            }
591        }
592
593        while px <= x1 {
594            let x = px as f64 + 0.5;
595            let dist = nx * x + ny * y + d;
596            if dist.abs() <= window {
597                let t = ((x - p1[0]) * dx + (y - p1[1]) * dy) / (len * len);
598                if (-0.1..=1.1).contains(&t) {
599                    let val = f64::from(img.get_pixel(px, py));
600                    samples.push((x, y, val));
601                }
602            }
603            px += 1;
604        }
605    }
606    samples
607}
608
609#[multiversion(targets(
610    "x86_64+avx2+bmi1+bmi2+popcnt+lzcnt",
611    "x86_64+avx512f+avx512bw+avx512dq+avx512vl",
612    "aarch64+neon"
613))]
614#[allow(clippy::too_many_arguments)]
615fn refine_accumulate_optimized(
616    samples: &[(f64, f64, f64)],
617    #[allow(unused_variables)] img: &crate::image::ImageView,
618    nx: f64,
619    ny: f64,
620    d: f64,
621    a: f64,
622    b: f64,
623    sigma: f64,
624    inv_sigma: f64,
625) -> (f64, f64) {
626    let mut sum_jtj = 0.0;
627    let mut sum_jt_res = 0.0;
628    let sqrt_pi = std::f64::consts::PI.sqrt();
629    let k = (b - a) / (sqrt_pi * sigma);
630
631    let mut i = 0;
632
633    #[cfg(all(target_arch = "x86_64", target_feature = "avx2"))]
634    if let Some(_dispatch) = multiversion::target::x86_64::avx2::get() {
635        unsafe {
636            use std::arch::x86_64::*;
637            let v_nx = _mm256_set1_pd(nx);
638            let v_ny = _mm256_set1_pd(ny);
639            let v_d = _mm256_set1_pd(d);
640            let v_a = _mm256_set1_pd(a);
641            let v_b = _mm256_set1_pd(b);
642            let v_inv_sigma = _mm256_set1_pd(inv_sigma);
643            let v_k = _mm256_set1_pd(k);
644            let v_half = _mm256_set1_pd(0.5);
645            let v_one = _mm256_set1_pd(1.0);
646            let v_abs_mask = _mm256_set1_pd(-0.0);
647
648            let mut v_sum_jtj = _mm256_setzero_pd();
649            let mut v_sum_jt_res = _mm256_setzero_pd();
650
651            while i + 4 <= samples.len() {
652                let s0 = samples[i];
653                let s1 = samples[i + 1];
654                let s2 = samples[i + 2];
655                let s3 = samples[i + 3];
656
657                let v_x = _mm256_set_pd(s3.0, s2.0, s1.0, s0.0);
658                let v_y = _mm256_set_pd(s3.1, s2.1, s1.1, s0.1);
659                let v_img_val = _mm256_set_pd(s3.2, s2.2, s1.2, s0.2);
660
661                let v_dist = _mm256_add_pd(
662                    _mm256_add_pd(_mm256_mul_pd(v_nx, v_x), _mm256_mul_pd(v_ny, v_y)),
663                    v_d,
664                );
665                let v_s = _mm256_mul_pd(v_dist, v_inv_sigma);
666
667                let v_abs_s = _mm256_andnot_pd(v_abs_mask, v_s);
668                let v_range_mask = _mm256_cmp_pd(v_abs_s, _mm256_set1_pd(3.0), _CMP_LE_OQ);
669
670                if _mm256_movemask_pd(v_range_mask) != 0 {
671                    // Simple ERF approx: erf(x) ~= sign(x) * sqrt(1 - exp(-x^2 * 4/pi))
672                    // For now, just use scalar fallback for the math inside to keep it sound,
673                    // but we could use a SIMD exp/erf here.
674                    let ss: [f64; 4] = std::mem::transmute(v_s);
675                    let iv: [f64; 4] = std::mem::transmute(v_img_val);
676                    let mm: [f64; 4] = std::mem::transmute(v_range_mask);
677
678                    for j in 0..4 {
679                        if mm[j] != 0.0 {
680                            let s_val = ss[j];
681                            let model =
682                                (a + b) * 0.5 + (b - a) * 0.5 * crate::quad::erf_approx(s_val);
683                            let residual = iv[j] - model;
684                            let jac = k * (-s_val * s_val).exp();
685                            sum_jtj += jac * jac;
686                            sum_jt_res += jac * residual;
687                        }
688                    }
689                }
690                i += 4;
691            }
692        }
693    }
694
695    // Scalar tail
696    while i < samples.len() {
697        let (x, y, img_val) = samples[i];
698        let dist = nx * x + ny * y + d;
699        let s = dist * inv_sigma;
700        if s.abs() <= 3.0 {
701            let model = (a + b) * 0.5 + (b - a) * 0.5 * crate::quad::erf_approx(s);
702            let residual = img_val - model;
703            let jac = k * (-s * s).exp();
704            sum_jtj += jac * jac;
705            sum_jt_res += jac * residual;
706        }
707        i += 1;
708    }
709
710    (sum_jtj, sum_jt_res)
711}
712
713/// Fit a line (nx*x + ny*y + d = 0) to an edge using the ERF intensity model.
714fn fit_edge_erf(
715    arena: &bumpalo::Bump,
716    img: &crate::image::ImageView,
717    p1: [f64; 2],
718    p2: [f64; 2],
719    sigma: f64,
720) -> Option<(f64, f64, f64)> {
721    let mut fitter = EdgeFitter::new(img, p1, p2)?;
722    fitter.scan_initial_d();
723    let samples = fitter.collect_samples(arena);
724    if samples.len() < 10 {
725        return None;
726    }
727    fitter.refine(&samples, sigma);
728    Some((fitter.nx, fitter.ny, fitter.d))
729}
730
731/// Returns the threshold that maximizes inter-class variance.
732pub(crate) fn compute_otsu_threshold(values: &[f64]) -> f64 {
733    if values.is_empty() {
734        return 128.0;
735    }
736
737    let n = values.len() as f64;
738    let total_sum: f64 = values.iter().sum();
739
740    // Find min/max to define search range
741    let min_val = values.iter().copied().fold(f64::MAX, f64::min);
742    let max_val = values.iter().copied().fold(f64::MIN, f64::max);
743
744    if (max_val - min_val) < 1.0 {
745        return f64::midpoint(min_val, max_val);
746    }
747
748    // Search for optimal threshold
749    let mut best_threshold = f64::midpoint(min_val, max_val);
750    let mut best_variance = 0.0;
751
752    // Use 16 candidate thresholds between min and max
753    for i in 1..16 {
754        let t = min_val + (max_val - min_val) * (f64::from(i) / 16.0);
755
756        let mut w0 = 0.0;
757        let mut sum0 = 0.0;
758
759        for &v in values {
760            if v <= t {
761                w0 += 1.0;
762                sum0 += v;
763            }
764        }
765
766        let w1 = n - w0;
767        if w0 < 1.0 || w1 < 1.0 {
768            continue;
769        }
770
771        let mean0 = sum0 / w0;
772        let mean1 = (total_sum - sum0) / w1;
773
774        // Inter-class variance
775        let variance = w0 * w1 * (mean0 - mean1) * (mean0 - mean1);
776
777        if variance > best_variance {
778            best_variance = variance;
779            best_threshold = t;
780        }
781    }
782
783    best_threshold
784}
785
786/// Sample values from the image using SIMD-optimized Fast-Math and ROI caching.
787#[multiversion(targets(
788    "x86_64+avx2+bmi1+bmi2+popcnt+lzcnt",
789    "x86_64+avx512f+avx512bw+avx512dq+avx512vl",
790    "aarch64+neon"
791))]
792fn sample_grid_values_optimized(
793    img: &crate::image::ImageView,
794    h: &Homography,
795    roi: &RoiCache,
796    points: &[(f64, f64)],
797    intensities: &mut [f64],
798    n: usize,
799) -> bool {
800    let h00 = h.h[(0, 0)] as f32;
801    let h01 = h.h[(0, 1)] as f32;
802    let h02 = h.h[(0, 2)] as f32;
803    let h10 = h.h[(1, 0)] as f32;
804    let h11 = h.h[(1, 1)] as f32;
805    let h12 = h.h[(1, 2)] as f32;
806    let h20 = h.h[(2, 0)] as f32;
807    let h21 = h.h[(2, 1)] as f32;
808    let h22 = h.h[(2, 2)] as f32;
809
810    let w_limit = (img.width - 1) as f32;
811    let h_limit = (img.height - 1) as f32;
812
813    for (i, &p) in points.iter().take(n).enumerate() {
814        let px = p.0 as f32;
815        let py = p.1 as f32;
816
817        // Fast-Math Reciprocal
818        let wz = h20 * px + h21 * py + h22;
819        let winv = rcp_nr(wz);
820
821        let img_x = (h00 * px + h01 * py + h02) * winv - 0.5;
822        let img_y = (h10 * px + h11 * py + h12) * winv - 0.5;
823
824        if img_x < 0.0 || img_x >= w_limit || img_y < 0.0 || img_y >= h_limit {
825            return false;
826        }
827
828        let ix = img_x.floor() as usize;
829        let iy = img_y.floor() as usize;
830
831        // Sample from ROI cache using fixed-point bilinear
832        let v00 = roi.get(ix, iy);
833        let v10 = roi.get(ix + 1, iy);
834        let v01 = roi.get(ix, iy + 1);
835        let v11 = roi.get(ix + 1, iy + 1);
836
837        intensities[i] = f64::from(bilinear_interpolate_fixed(img_x, img_y, v00, v10, v01, v11));
838    }
839    true
840}
841
842/// Sample the bit grid from the image using the homography and decoder points.
843///
844/// Uses bilinear interpolation for sampling and a spatially adaptive threshold
845/// (based on min/max stats of the grid) to determine bit values.
846///
847/// # Parameters
848/// - `min_contrast`: Minimum contrast range for Otsu-based classification.
849///   Default is 20.0. Lower values (e.g., 10.0) improve recall on small/blurry tags.
850///
851/// This computes the intensities at sample points and the adaptive thresholds,
852/// then delegates to the strategy to produce the code.
853#[allow(clippy::cast_sign_loss, clippy::too_many_lines)]
854pub fn sample_grid_generic<S: crate::strategy::DecodingStrategy>(
855    img: &crate::image::ImageView,
856    arena: &Bump,
857    detection: &crate::Detection,
858    decoder: &(impl TagDecoder + ?Sized),
859) -> Option<S::Code> {
860    let (min_x, min_y, max_x, max_y) = detection.aabb();
861    let roi = RoiCache::new(img, arena, min_x, min_y, max_x, max_y);
862
863    let homography = Homography::square_to_quad(&detection.corners)?;
864
865    let points = decoder.sample_points();
866    // Stack-allocated buffer for up to 64 sample points (covers all standard tag families)
867    let mut intensities = [0.0f64; 64];
868    let n = points.len().min(64);
869
870    if !sample_grid_values_optimized(img, &homography, &roi, points, &mut intensities, n) {
871        return None;
872    }
873
874    Some(S::from_intensities(
875        &intensities[..n],
876        &compute_adaptive_thresholds(&intensities[..n], points),
877    ))
878}
879
880/// Sample the bit grid using Structure of Arrays (SoA) data.
881pub fn sample_grid_soa<S: crate::strategy::DecodingStrategy>(
882    img: &crate::image::ImageView,
883    arena: &Bump,
884    corners: &[Point2f],
885    homography: &Matrix3x3,
886    decoder: &(impl TagDecoder + ?Sized),
887) -> Option<S::Code> {
888    // Compute AABB for RoiCache
889    let mut min_x = f32::MAX;
890    let mut min_y = f32::MAX;
891    let mut max_x = f32::MIN;
892    let mut max_y = f32::MIN;
893    for p in corners {
894        min_x = min_x.min(p.x);
895        min_y = min_y.min(p.y);
896        max_x = max_x.max(p.x);
897        max_y = max_y.max(p.y);
898    }
899
900    let roi = RoiCache::new(
901        img,
902        arena,
903        (min_x.floor() as i32).max(0) as usize,
904        (min_y.floor() as i32).max(0) as usize,
905        (max_x.ceil() as i32).max(0) as usize,
906        (max_y.ceil() as i32).max(0) as usize,
907    );
908
909    // Convert Matrix3x3 to Homography (internal use).
910    // Nalgebra stores in column-major order, which matches our data layout.
911    let mut h_mat = SMatrix::<f64, 3, 3>::identity();
912    for (i, val) in homography.data.iter().enumerate() {
913        h_mat.as_mut_slice()[i] = f64::from(*val);
914    }
915    let homography_obj = Homography { h: h_mat };
916
917    let points = decoder.sample_points();
918    let mut intensities = [0.0f64; 64];
919    let n = points.len().min(64);
920
921    if !sample_grid_values_optimized(img, &homography_obj, &roi, points, &mut intensities, n) {
922        return None;
923    }
924
925    Some(S::from_intensities(
926        &intensities[..n],
927        &compute_adaptive_thresholds(&intensities[..n], points),
928    ))
929}
930
931/// Sample the bit grid using Structure of Arrays (SoA) data and a precomputed ROI cache.
932pub fn sample_grid_soa_precomputed<S: crate::strategy::DecodingStrategy>(
933    img: &crate::image::ImageView,
934    roi: &RoiCache,
935    homography: &Matrix3x3,
936    decoder: &(impl TagDecoder + ?Sized),
937) -> Option<S::Code> {
938    // Convert Matrix3x3 to Homography (internal use).
939    let mut h_mat = SMatrix::<f64, 3, 3>::identity();
940    for (i, val) in homography.data.iter().enumerate() {
941        h_mat.as_mut_slice()[i] = f64::from(*val);
942    }
943    let homography_obj = Homography { h: h_mat };
944
945    let points = decoder.sample_points();
946    let mut intensities = [0.0f64; 64];
947    let n = points.len().min(64);
948
949    if !sample_grid_values_optimized(img, &homography_obj, roi, points, &mut intensities, n) {
950        return None;
951    }
952
953    Some(S::from_intensities(
954        &intensities[..n],
955        &compute_adaptive_thresholds(&intensities[..n], points),
956    ))
957}
958
959/// Internal helper to compute adaptive thresholds for a grid of intensities.
960fn compute_adaptive_thresholds(intensities: &[f64], points: &[(f64, f64)]) -> [f64; 64] {
961    let n = intensities.len();
962    let global_threshold = compute_otsu_threshold(intensities);
963
964    let mut quad_sums = [0.0; 4];
965    let mut quad_counts = [0; 4];
966    for (i, p) in points.iter().take(n).enumerate() {
967        let qi = if p.0 < 0.0 {
968            usize::from(p.1 >= 0.0)
969        } else {
970            2 + usize::from(p.1 >= 0.0)
971        };
972        quad_sums[qi] += intensities[i];
973        quad_counts[qi] += 1;
974    }
975
976    let mut thresholds = [0.0f64; 64];
977    for (i, p) in points.iter().take(n).enumerate() {
978        let qi = if p.0 < 0.0 {
979            usize::from(p.1 >= 0.0)
980        } else {
981            2 + usize::from(p.1 >= 0.0)
982        };
983        let quad_avg = if quad_counts[qi] > 0 {
984            quad_sums[qi] / f64::from(quad_counts[qi])
985        } else {
986            global_threshold
987        };
988
989        // Blend global Otsu and local mean (0.7 / 0.3 weighting is common for fiducials)
990        thresholds[i] = 0.7 * global_threshold + 0.3 * quad_avg;
991    }
992    thresholds
993}
994
995/// Sample the bit grid from the image (Legacy/Hard wrapper).
996#[allow(clippy::cast_sign_loss, clippy::too_many_lines)]
997pub fn sample_grid(
998    img: &crate::image::ImageView,
999    arena: &Bump,
1000    detection: &crate::Detection,
1001    decoder: &(impl TagDecoder + ?Sized),
1002    _min_contrast: f64,
1003) -> Option<u64> {
1004    sample_grid_generic::<crate::strategy::HardStrategy>(img, arena, detection, decoder)
1005}
1006
1007/// Rotate a square bit grid 90 degrees clockwise.
1008/// This is an O(1) bitwise operation but conceptually represents rotating the N x N pixel grid.
1009#[must_use]
1010pub fn rotate90(bits: u64, dim: usize) -> u64 {
1011    let mut res = 0u64;
1012    for y in 0..dim {
1013        for x in 0..dim {
1014            if (bits >> (y * dim + x)) & 1 != 0 {
1015                let nx = dim - 1 - y;
1016                let ny = x;
1017                res |= 1 << (ny * dim + nx);
1018            }
1019        }
1020    }
1021    res
1022}
1023
1024/// Decode all active candidates in the batch using the Structure of Arrays (SoA) layout.
1025///
1026/// This phase executes SIMD bilinear interpolation and Hamming error correction.
1027/// If a candidate fails decoding, its `status_mask` is flipped to `FailedDecode`.
1028#[tracing::instrument(skip_all, name = "pipeline::decoding_pass")]
1029pub fn decode_batch_soa(
1030    batch: &mut crate::batch::DetectionBatch,
1031    n: usize,
1032    img: &crate::image::ImageView,
1033    decoders: &[Box<dyn TagDecoder + Send + Sync>],
1034    config: &crate::config::DetectorConfig,
1035) {
1036    match config.decode_mode {
1037        crate::config::DecodeMode::Hard => {
1038            decode_batch_soa_generic::<crate::strategy::HardStrategy>(
1039                batch, n, img, decoders, config,
1040            );
1041        },
1042        crate::config::DecodeMode::Soft => {
1043            decode_batch_soa_generic::<crate::strategy::SoftStrategy>(
1044                batch, n, img, decoders, config,
1045            );
1046        },
1047    }
1048}
1049
1050#[allow(
1051    clippy::too_many_lines,
1052    clippy::cast_possible_wrap,
1053    clippy::collapsible_if,
1054    unused_assignments
1055)]
1056fn decode_batch_soa_generic<S: crate::strategy::DecodingStrategy>(
1057    batch: &mut crate::batch::DetectionBatch,
1058    n: usize,
1059    img: &crate::image::ImageView,
1060    decoders: &[Box<dyn TagDecoder + Send + Sync>],
1061    config: &crate::config::DetectorConfig,
1062) {
1063    use crate::batch::CandidateState;
1064    use rayon::prelude::*;
1065
1066    // We collect results into a temporary Vec to avoid unsafe parallel writes to the batch.
1067    let results: Vec<_> = (0..n)
1068        .into_par_iter()
1069        .map(|i| {
1070            DECODE_ARENA.with_borrow_mut(|arena| {
1071                    arena.reset();
1072
1073                    let corners = &batch.corners[i];
1074                    let homography = &batch.homographies[i];
1075
1076                    // Compute AABB for RoiCache ONCE per candidate.
1077                    // We expand it slightly (10%) to ensure scaled versions (0.9, 1.1) still fit.
1078                    let mut min_x = f32::MAX;
1079                    let mut min_y = f32::MAX;
1080                    let mut max_x = f32::MIN;
1081                    let mut max_y = f32::MIN;
1082                    for p in corners {
1083                        min_x = min_x.min(p.x);
1084                        min_y = min_y.min(p.y);
1085                        max_x = max_x.max(p.x);
1086                        max_y = max_y.max(p.y);
1087                    }
1088                    let w_aabb = max_x - min_x;
1089                    let h_aabb = max_y - min_y;
1090                    let roi = RoiCache::new(
1091                        img,
1092                        arena,
1093                        ((min_x - w_aabb * 0.1).floor() as i32).max(0) as usize,
1094                        ((min_y - h_aabb * 0.1).floor() as i32).max(0) as usize,
1095                        (((max_x + w_aabb * 0.1).ceil() as i32).min(img.width as i32 - 1)).max(0)
1096                            as usize,
1097                        (((max_y + h_aabb * 0.1).ceil() as i32).min(img.height as i32 - 1)).max(0)
1098                            as usize,
1099                    );
1100
1101
1102                    let mut best_h = u32::MAX;
1103                    let mut best_code = None;
1104                    let mut best_id = 0;
1105                    let mut best_rot = 0;
1106                    let mut best_overall_code = None;
1107
1108                    let scales = [1.0, 0.9, 1.1];
1109                    let center = [
1110                        (corners[0].x + corners[1].x + corners[2].x + corners[3].x) / 4.0,
1111                        (corners[0].y + corners[1].y + corners[2].y + corners[3].y) / 4.0,
1112                    ];
1113
1114                    for scale in scales {
1115                        let mut scaled_corners = [Point2f::default(); 4];
1116                        let mut scaled_h_mat = Matrix3x3 {
1117                            data: [0.0; 9],
1118                            padding: [0.0; 7],
1119                        };
1120
1121                        let current_homography: &Matrix3x3;
1122
1123                        let mut best_h_in_scale = u32::MAX;
1124                        let mut best_match_in_scale: Option<(u32, u32, u8, S::Code, usize)> = None;
1125
1126                        if (scale - 1.0f32).abs() > 1e-4 {
1127                            for j in 0..4 {
1128                                scaled_corners[j].x =
1129                                    center[0] + (corners[j].x - center[0]) * scale;
1130                                scaled_corners[j].y =
1131                                    center[1] + (corners[j].y - center[1]) * scale;
1132                            }
1133
1134                            // Must recompute homography for scaled corners
1135                            let dst = [
1136                                [
1137                                    f64::from(scaled_corners[0].x),
1138                                    f64::from(scaled_corners[0].y),
1139                                ],
1140                                [
1141                                    f64::from(scaled_corners[1].x),
1142                                    f64::from(scaled_corners[1].y),
1143                                ],
1144                                [
1145                                    f64::from(scaled_corners[2].x),
1146                                    f64::from(scaled_corners[2].y),
1147                                ],
1148                                [
1149                                    f64::from(scaled_corners[3].x),
1150                                    f64::from(scaled_corners[3].y),
1151                                ],
1152                            ];
1153
1154                            if let Some(h_new) = Homography::square_to_quad(&dst) {
1155                                for (j, val) in h_new.h.iter().enumerate() {
1156                                    scaled_h_mat.data[j] = *val as f32;
1157                                }
1158                                current_homography = &scaled_h_mat;
1159                            } else {
1160                                // Degenerate scale, skip
1161                                continue;
1162                            }
1163                        } else {
1164                            scaled_corners.copy_from_slice(&corners[..4]);
1165                            current_homography = homography;
1166                        }
1167
1168                        for (decoder_idx, decoder) in decoders.iter().enumerate() {
1169                            if let Some(code) = sample_grid_soa_precomputed::<S>(
1170                                img,
1171                                &roi,
1172                                current_homography,
1173                                decoder.as_ref(),
1174                            ) {
1175                                if let Some((id, hamming, rot)) =
1176                                    S::decode(&code, decoder.as_ref(), 255)
1177                                {
1178                                    if hamming < best_h {
1179                                        best_h = hamming;
1180                                        let _ = code.clone();
1181                                        best_overall_code = Some(code.clone());
1182                                    }
1183
1184                                    if hamming <= config.max_hamming_error
1185                                        && (best_code.is_none() || hamming < best_h_in_scale)
1186                                    {
1187                                        best_h_in_scale = hamming;
1188                                        best_match_in_scale =
1189                                            Some((id, hamming, rot, code, decoder_idx));
1190                                    }
1191                                }
1192                            }
1193                        }
1194                        if let Some((id, hamming, rot, code, decoder_idx)) = best_match_in_scale {
1195                            best_id = id;
1196                            best_rot = rot;
1197                            best_code = Some(code.clone());
1198                            let decoder = decoders[decoder_idx].as_ref();
1199
1200                            // Always perform ERF refinement for finalists if requested
1201                            if config.refinement_mode == crate::config::CornerRefinementMode::Erf {
1202                                // Reassemble corners for ERF (it uses [f64; 2])
1203                                let mut current_corners = [[0.0f64; 2]; 4];
1204                                for j in 0..4 {
1205                                    current_corners[j] =
1206                                        [f64::from(corners[j].x), f64::from(corners[j].y)];
1207                                }
1208
1209                                let refined_corners = refine_corners_erf(
1210                                    arena,
1211                                    img,
1212                                    &current_corners,
1213                                    config.subpixel_refinement_sigma,
1214                                );
1215
1216                                // Verify that refined corners still yield a valid decode
1217                                let mut refined_corners_f32 = [Point2f::default(); 4];
1218                                for j in 0..4 {
1219                                    refined_corners_f32[j] = Point2f {
1220                                        x: refined_corners[j][0] as f32,
1221                                        y: refined_corners[j][1] as f32,
1222                                    };
1223                                }
1224
1225                                // Must recompute homography for refined corners
1226                                let mut ref_h_mat = Matrix3x3 {
1227                                    data: [0.0; 9],
1228                                    padding: [0.0; 7],
1229                                };
1230                                if let Some(h_new) = Homography::square_to_quad(&refined_corners) {
1231                                    for (j, val) in h_new.h.iter().enumerate() {
1232                                        ref_h_mat.data[j] = *val as f32;
1233                                    }
1234                                } else {
1235                                    // Degenerate refinement, reject
1236                                    continue;
1237                                }
1238
1239                                if let Some(code_ref) =
1240                                    sample_grid_soa_precomputed::<S>(img, &roi, &ref_h_mat, decoder)
1241                                {
1242                                    if let Some((id_ref, hamming_ref, _)) =
1243                                        S::decode(&code_ref, decoder, 255)
1244                                    {
1245                                        // Only keep if it's the same tag and hamming is not worse
1246                                        if id_ref == id && hamming_ref <= hamming {
1247                                            best_h = hamming_ref;
1248                                            best_code = Some(code_ref);
1249                                            // Update the actual corners in the batch!
1250                                            if let Some(code_inner) = &best_code {
1251                                                return (
1252                                                    CandidateState::Valid,
1253                                                    best_id,
1254                                                    best_rot,
1255                                                    S::to_debug_bits(code_inner),
1256                                                    best_h as f32,
1257                                                    Some(refined_corners_f32),
1258                                                );
1259                                            }
1260                                        }
1261                                    }
1262                                }
1263                            }
1264
1265                            return (
1266                                CandidateState::Valid,
1267                                best_id,
1268                                best_rot,
1269                                S::to_debug_bits(&code),
1270                                hamming as f32,
1271                                None,
1272                            );
1273                        }
1274
1275                        if best_h == 0 {
1276                            break;
1277                        }
1278                    }
1279
1280                    // Stage 2: Configurable Corner Refinement (Recovery for near-misses)
1281                    let max_h_for_refine = if decoders.iter().any(|d| d.name() == "36h11") {
1282                        10
1283                    } else {
1284                        4
1285                    };
1286
1287                    if best_h > config.max_hamming_error
1288                        && best_h <= max_h_for_refine
1289                        && best_overall_code.is_some()
1290                    {
1291                        match config.refinement_mode {
1292                            crate::config::CornerRefinementMode::None
1293                            | crate::config::CornerRefinementMode::GridFit => {
1294                                // GridFit not ported to SoA yet to save complexity.
1295                            }
1296                            crate::config::CornerRefinementMode::Edge
1297                            | crate::config::CornerRefinementMode::Erf => {
1298                                let nudge = 0.2;
1299                                let mut current_corners = [Point2f::default(); 4];
1300                                current_corners.copy_from_slice(corners);
1301
1302                                for _pass in 0..2 {
1303                                    let mut pass_improved = false;
1304                                    for c_idx in 0..4 {
1305                                        for (dx, dy) in [
1306                                            (nudge, 0.0),
1307                                            (-nudge, 0.0),
1308                                            (0.0, nudge),
1309                                            (0.0, -nudge),
1310                                        ] {
1311                                            let mut test_corners = current_corners;
1312                                            test_corners[c_idx].x += dx;
1313                                            test_corners[c_idx].y += dy;
1314
1315                                            // Must recompute homography for the nudged corners
1316                                            let dst = [
1317                                                [
1318                                                    f64::from(test_corners[0].x),
1319                                                    f64::from(test_corners[0].y),
1320                                                ],
1321                                                [
1322                                                    f64::from(test_corners[1].x),
1323                                                    f64::from(test_corners[1].y),
1324                                                ],
1325                                                [
1326                                                    f64::from(test_corners[2].x),
1327                                                    f64::from(test_corners[2].y),
1328                                                ],
1329                                                [
1330                                                    f64::from(test_corners[3].x),
1331                                                    f64::from(test_corners[3].y),
1332                                                ],
1333                                            ];
1334
1335                                            if let Some(h_new) = Homography::square_to_quad(&dst) {
1336                                                let mut h_mat = Matrix3x3 {
1337                                                    data: [0.0; 9],
1338                                                    padding: [0.0; 7],
1339                                                };
1340                                                for (j, val) in h_new.h.iter().enumerate() {
1341                                                    h_mat.data[j] = *val as f32;
1342                                                }
1343
1344                                                for decoder in decoders {
1345                                                    if let Some(code) =
1346                                                        sample_grid_soa_precomputed::<S>(
1347                                                            img,
1348                                                            &roi,
1349                                                            &h_mat,
1350                                                            decoder.as_ref(),
1351                                                        )
1352                                                    {
1353                                                        if let Some((id, hamming, rot)) =
1354                                                            S::decode(&code, decoder.as_ref(), 255)
1355                                                        {
1356                                                            if hamming < best_h {
1357                                                                best_h = hamming;
1358                                                                best_overall_code =
1359                                                                    Some(code.clone());
1360                                                                current_corners = test_corners;
1361                                                                pass_improved = true;
1362
1363                                                                if hamming
1364                                                                    <= config.max_hamming_error
1365                                                                {
1366                                                                    best_id = id;
1367                                                                    best_rot = rot;
1368                                                                    best_code = Some(code.clone());
1369
1370                                                                    return (
1371                                                                        CandidateState::Valid,
1372                                                                        best_id,
1373                                                                        best_rot,
1374                                                                        S::to_debug_bits(&code),
1375                                                                        best_h as f32,
1376                                                                        Some(current_corners),
1377                                                                    );
1378                                                                }
1379                                                            }
1380                                                        }
1381                                                    }
1382                                                }
1383                                            }
1384                                        }
1385                                    }
1386                                    if !pass_improved {
1387                                        break;
1388                                    }
1389                                }
1390                            }
1391                        }
1392                    }
1393
1394                    if let Some(code) = best_code {
1395                        (
1396                            CandidateState::Valid,
1397                            best_id,
1398                            best_rot,
1399                            S::to_debug_bits(&code),
1400                            best_h as f32,
1401                            None,
1402                        )
1403                    } else {
1404                        // Even on failure, return the best hamming distance found for debugging.
1405                        // If no code was sampled at all, best_h will be u32::MAX.
1406                        (
1407                            CandidateState::FailedDecode,
1408                            0,
1409                            0,
1410                            0,
1411                            if best_h == u32::MAX { 0.0 } else { best_h as f32 },
1412                            None,
1413                        )
1414                    }
1415                })
1416        })
1417        .collect();
1418
1419    for (i, (state, id, rot, payload, error_rate, refined_corners)) in
1420        results.into_iter().enumerate()
1421    {
1422        batch.status_mask[i] = state;
1423        batch.ids[i] = id;
1424        batch.payloads[i] = payload;
1425        batch.error_rates[i] = error_rate;
1426
1427        if let Some(refined) = refined_corners {
1428            for (j, corner) in refined.iter().enumerate() {
1429                batch.corners[i][j] = *corner;
1430            }
1431        }
1432
1433        if state == CandidateState::Valid && rot > 0 {
1434            // Reorder corners based on rotation
1435            let mut temp_corners = [Point2f::default(); 4];
1436            for (j, item) in temp_corners.iter_mut().enumerate() {
1437                let src_idx = (j + usize::from(rot)) % 4;
1438                *item = batch.corners[i][src_idx];
1439            }
1440            for (j, item) in temp_corners.iter().enumerate() {
1441                batch.corners[i][j] = *item;
1442            }
1443        }
1444    }
1445}
1446
1447/// A trait for decoding binary payloads from extracted tags.
1448pub trait TagDecoder: Send + Sync {
1449    /// Returns the name of the decoder family (e.g., "AprilTag36h11").
1450    fn name(&self) -> &str;
1451    /// Returns the dimension of the tag grid (e.g., 6 for 36h11).
1452    fn dimension(&self) -> usize;
1453    /// Returns the active number of bits in the tag (e.g., 41 for 41h12).
1454    fn bit_count(&self) -> usize;
1455    /// Returns the ideal sample points in canonical coordinates [-1, 1].
1456    fn sample_points(&self) -> &[(f64, f64)];
1457    /// Decodes the extracted bits into a tag ID, hamming distance, and rotation count.
1458    ///
1459    /// Returns `Some((id, hamming, rotation))` if decoding is successful, `None` otherwise.
1460    /// `rotation` is 0-3, representing 90-degree CW increments.
1461    fn decode(&self, bits: u64) -> Option<(u32, u32, u8)>; // (id, hamming, rotation)
1462    /// Decodes with custom maximum hamming distance.
1463    fn decode_full(&self, bits: u64, max_hamming: u32) -> Option<(u32, u32, u8)>;
1464    /// Get the original code for a given ID (useful for testing/simulation).
1465    fn get_code(&self, id: u16) -> Option<u64>;
1466    /// Returns the total number of codes in the dictionary.
1467    fn num_codes(&self) -> usize;
1468    /// Returns all rotated versions of all codes in the dictionary: (bits, id, rotation)
1469    fn rotated_codes(&self) -> &[(u64, u16, u8)];
1470    /// Executes a callback for each candidate in the dictionary within a given Hamming distance.
1471    /// This uses Multi-Index Hashing if available for efficiency.
1472    fn for_each_candidate_within_hamming(
1473        &self,
1474        bits: u64,
1475        max_hamming: u32,
1476        callback: &mut dyn FnMut(u64, u16, u8),
1477    );
1478}
1479
1480/// Decoder for the AprilTag 36h11 family.
1481pub struct AprilTag36h11;
1482
1483impl TagDecoder for AprilTag36h11 {
1484    fn name(&self) -> &'static str {
1485        "36h11"
1486    }
1487    fn dimension(&self) -> usize {
1488        6
1489    } // 6x6 grid of bits (excluding border)
1490    fn bit_count(&self) -> usize {
1491        36
1492    }
1493
1494    fn sample_points(&self) -> &[(f64, f64)] {
1495        crate::dictionaries::POINTS_APRILTAG36H11
1496    }
1497
1498    fn decode(&self, bits: u64) -> Option<(u32, u32, u8)> {
1499        // Use the pre-calculated dictionary with O(1) exact match + cached rotations.
1500        crate::dictionaries::get_dictionary(crate::config::TagFamily::AprilTag36h11)
1501            .decode(bits, 4) // Allow up to 4 bit errors for maximum recall
1502            .map(|(id, hamming, rot)| (u32::from(id), hamming, rot))
1503    }
1504
1505    fn decode_full(&self, bits: u64, max_hamming: u32) -> Option<(u32, u32, u8)> {
1506        crate::dictionaries::get_dictionary(crate::config::TagFamily::AprilTag36h11)
1507            .decode(bits, max_hamming)
1508            .map(|(id, hamming, rot)| (u32::from(id), hamming, rot))
1509    }
1510
1511    fn get_code(&self, id: u16) -> Option<u64> {
1512        crate::dictionaries::get_dictionary(crate::config::TagFamily::AprilTag36h11).get_code(id)
1513    }
1514
1515    fn num_codes(&self) -> usize {
1516        crate::dictionaries::get_dictionary(crate::config::TagFamily::AprilTag36h11).len()
1517    }
1518
1519    fn rotated_codes(&self) -> &[(u64, u16, u8)] {
1520        &[] // Removed from runtime struct, only used by testing/simulation which we will adjust later.
1521    }
1522    fn for_each_candidate_within_hamming(
1523        &self,
1524        bits: u64,
1525        max_hamming: u32,
1526        callback: &mut dyn FnMut(u64, u16, u8),
1527    ) {
1528        crate::dictionaries::get_dictionary(crate::config::TagFamily::AprilTag36h11)
1529            .for_each_candidate_within_hamming(bits, max_hamming, callback);
1530    }
1531}
1532
1533/// Decoder for the AprilTag 16h5 family.
1534pub struct AprilTag16h5;
1535
1536impl TagDecoder for AprilTag16h5 {
1537    fn name(&self) -> &'static str {
1538        "16h5"
1539    }
1540    fn dimension(&self) -> usize {
1541        4
1542    }
1543    fn bit_count(&self) -> usize {
1544        16
1545    }
1546    fn sample_points(&self) -> &[(f64, f64)] {
1547        crate::dictionaries::POINTS_APRILTAG16H5
1548    }
1549    fn decode(&self, bits: u64) -> Option<(u32, u32, u8)> {
1550        crate::dictionaries::get_dictionary(crate::config::TagFamily::AprilTag16h5)
1551            .decode(bits, 1)
1552            .map(|(id, hamming, rot)| (u32::from(id), hamming, rot))
1553    }
1554    fn decode_full(&self, bits: u64, max_hamming: u32) -> Option<(u32, u32, u8)> {
1555        crate::dictionaries::get_dictionary(crate::config::TagFamily::AprilTag16h5)
1556            .decode(bits, max_hamming)
1557            .map(|(id, hamming, rot)| (u32::from(id), hamming, rot))
1558    }
1559    fn get_code(&self, id: u16) -> Option<u64> {
1560        crate::dictionaries::get_dictionary(crate::config::TagFamily::AprilTag16h5).get_code(id)
1561    }
1562    fn num_codes(&self) -> usize {
1563        crate::dictionaries::get_dictionary(crate::config::TagFamily::AprilTag16h5).len()
1564    }
1565    fn rotated_codes(&self) -> &[(u64, u16, u8)] {
1566        &[]
1567    }
1568    fn for_each_candidate_within_hamming(
1569        &self,
1570        bits: u64,
1571        max_hamming: u32,
1572        callback: &mut dyn FnMut(u64, u16, u8),
1573    ) {
1574        crate::dictionaries::get_dictionary(crate::config::TagFamily::AprilTag16h5)
1575            .for_each_candidate_within_hamming(bits, max_hamming, callback);
1576    }
1577}
1578
1579/// Decoder for the ArUco 4x4_50 family.
1580pub struct ArUco4x4_50;
1581
1582impl TagDecoder for ArUco4x4_50 {
1583    fn name(&self) -> &'static str {
1584        "4X4_50"
1585    }
1586    fn dimension(&self) -> usize {
1587        4
1588    }
1589    fn bit_count(&self) -> usize {
1590        16
1591    }
1592
1593    fn sample_points(&self) -> &[(f64, f64)] {
1594        crate::dictionaries::POINTS_ARUCO4X4_50
1595    }
1596
1597    fn decode(&self, bits: u64) -> Option<(u32, u32, u8)> {
1598        crate::dictionaries::get_dictionary(crate::config::TagFamily::ArUco4x4_50)
1599            .decode(bits, 2)
1600            .map(|(id, hamming, rot)| (u32::from(id), hamming, rot))
1601    }
1602
1603    fn decode_full(&self, bits: u64, max_hamming: u32) -> Option<(u32, u32, u8)> {
1604        crate::dictionaries::get_dictionary(crate::config::TagFamily::ArUco4x4_50)
1605            .decode(bits, max_hamming)
1606            .map(|(id, hamming, rot)| (u32::from(id), hamming, rot))
1607    }
1608
1609    fn get_code(&self, id: u16) -> Option<u64> {
1610        crate::dictionaries::get_dictionary(crate::config::TagFamily::ArUco4x4_50).get_code(id)
1611    }
1612
1613    fn num_codes(&self) -> usize {
1614        crate::dictionaries::get_dictionary(crate::config::TagFamily::ArUco4x4_50).len()
1615    }
1616
1617    fn rotated_codes(&self) -> &[(u64, u16, u8)] {
1618        &[]
1619    }
1620    fn for_each_candidate_within_hamming(
1621        &self,
1622        bits: u64,
1623        max_hamming: u32,
1624        callback: &mut dyn FnMut(u64, u16, u8),
1625    ) {
1626        crate::dictionaries::get_dictionary(crate::config::TagFamily::ArUco4x4_50)
1627            .for_each_candidate_within_hamming(bits, max_hamming, callback);
1628    }
1629}
1630
1631/// Decoder for the ArUco 4x4_100 family.
1632pub struct ArUco4x4_100;
1633
1634impl TagDecoder for ArUco4x4_100 {
1635    fn name(&self) -> &'static str {
1636        "4X4_100"
1637    }
1638    fn dimension(&self) -> usize {
1639        4
1640    }
1641    fn bit_count(&self) -> usize {
1642        16
1643    }
1644
1645    fn sample_points(&self) -> &[(f64, f64)] {
1646        crate::dictionaries::POINTS_ARUCO4X4_100
1647    }
1648
1649    fn decode(&self, bits: u64) -> Option<(u32, u32, u8)> {
1650        crate::dictionaries::get_dictionary(crate::config::TagFamily::ArUco4x4_100)
1651            .decode(bits, 2)
1652            .map(|(id, hamming, rot)| (u32::from(id), hamming, rot))
1653    }
1654
1655    fn decode_full(&self, bits: u64, max_hamming: u32) -> Option<(u32, u32, u8)> {
1656        crate::dictionaries::get_dictionary(crate::config::TagFamily::ArUco4x4_100)
1657            .decode(bits, max_hamming)
1658            .map(|(id, hamming, rot)| (u32::from(id), hamming, rot))
1659    }
1660
1661    fn get_code(&self, id: u16) -> Option<u64> {
1662        crate::dictionaries::get_dictionary(crate::config::TagFamily::ArUco4x4_100).get_code(id)
1663    }
1664
1665    fn num_codes(&self) -> usize {
1666        crate::dictionaries::get_dictionary(crate::config::TagFamily::ArUco4x4_100).len()
1667    }
1668
1669    fn rotated_codes(&self) -> &[(u64, u16, u8)] {
1670        &[]
1671    }
1672    fn for_each_candidate_within_hamming(
1673        &self,
1674        bits: u64,
1675        max_hamming: u32,
1676        callback: &mut dyn FnMut(u64, u16, u8),
1677    ) {
1678        crate::dictionaries::get_dictionary(crate::config::TagFamily::ArUco4x4_100)
1679            .for_each_candidate_within_hamming(bits, max_hamming, callback);
1680    }
1681}
1682
1683/// Convert a TagFamily enum to a boxed decoder instance.
1684#[must_use]
1685pub fn family_to_decoder(family: config::TagFamily) -> Box<dyn TagDecoder + Send + Sync> {
1686    match family {
1687        config::TagFamily::AprilTag16h5 => Box::new(AprilTag16h5),
1688        config::TagFamily::AprilTag36h11 => Box::new(AprilTag36h11),
1689        config::TagFamily::ArUco4x4_50 => Box::new(ArUco4x4_50),
1690        config::TagFamily::ArUco4x4_100 => Box::new(ArUco4x4_100),
1691    }
1692}
1693
1694#[cfg(test)]
1695#[allow(clippy::unwrap_used)]
1696mod tests {
1697    use super::*;
1698    use proptest::prelude::*;
1699
1700    proptest! {
1701        #[test]
1702        fn test_rotation_invariants(bits in 0..u64::MAX) {
1703            let dim = 6;
1704            let r1 = rotate90(bits, dim);
1705            let r2 = rotate90(r1, dim);
1706            let r3 = rotate90(r2, dim);
1707            let r4 = rotate90(r3, dim);
1708
1709            // Mask to dim*dim bits to avoid noise in upper bits
1710            let mask = (1u64 << (dim * dim)) - 1;
1711            prop_assert_eq!(bits & mask, r4 & mask);
1712        }
1713
1714        #[test]
1715        fn test_hamming_robustness(
1716            id_idx in 0usize..10,
1717            rotation in 0..4usize,
1718            flip1 in 0..36usize,
1719            flip2 in 0..36usize
1720        ) {
1721            let decoder = AprilTag36h11;
1722            let orig_id = id_idx as u16;
1723            let dict = crate::dictionaries::get_dictionary(config::TagFamily::AprilTag36h11);
1724
1725            // Get the correctly geometrically rotated code directly from our generated dictionaries
1726            let mut test_bits = dict.codes[(id_idx * 4) + rotation];
1727
1728            // Flip bits
1729            test_bits ^= 1 << flip1;
1730            test_bits ^= 1 << flip2;
1731
1732            let result = decoder.decode(test_bits);
1733            prop_assert!(result.is_some());
1734            let (decoded_id, _, _) = result.expect("Should decode valid pattern");
1735            prop_assert_eq!(decoded_id, u32::from(orig_id));
1736        }
1737
1738        #[test]
1739        fn test_false_positive_resistance(bits in 0..u64::MAX) {
1740            let decoder = AprilTag36h11;
1741            // Random bitstreams should rarely match any of the 587 codes
1742            if let Some((_id, hamming, _rot)) = decoder.decode(bits) {
1743                // If it decodes, it must have low hamming distance
1744                prop_assert!(hamming <= 4);
1745            }
1746        }
1747
1748        #[test]
1749        fn prop_homography_projection(
1750            src in prop::collection::vec((-100.0..100.0, -100.0..100.0), 4),
1751            dst in prop::collection::vec((0.0..1000.0, 0.0..1000.0), 4)
1752        ) {
1753            let src_pts = [
1754                [src[0].0, src[0].1],
1755                [src[1].0, src[1].1],
1756                [src[2].0, src[2].1],
1757                [src[3].0, src[3].1],
1758            ];
1759            let dst_pts = [
1760                [dst[0].0, dst[0].1],
1761                [dst[1].0, dst[1].1],
1762                [dst[2].0, dst[2].1],
1763                [dst[3].0, dst[3].1],
1764            ];
1765
1766            if let Some(h) = Homography::from_pairs(&src_pts, &dst_pts) {
1767                for i in 0..4 {
1768                    let p = h.project(src_pts[i]);
1769                    // Check for reasonable accuracy. 1e-4 is conservative for float precision
1770                    // issues in near-singular cases where from_pairs still returns Some.
1771                    prop_assert!((p[0] - dst_pts[i][0]).abs() < 1e-3,
1772                        "Point {}: project({:?}) -> {:?}, expected {:?}", i, src_pts[i], p, dst_pts[i]);
1773                    prop_assert!((p[1] - dst_pts[i][1]).abs() < 1e-3);
1774                }
1775            }
1776        }
1777    }
1778
1779    #[test]
1780    fn test_all_codes_decode() {
1781        let decoder = AprilTag36h11;
1782        for id in 0..587u16 {
1783            let code = crate::dictionaries::DICT_APRILTAG36H11
1784                .get_code(id)
1785                .expect("valid ID");
1786            let result = decoder.decode(code);
1787            assert!(result.is_some());
1788            let (id_out, _, _) = result.unwrap();
1789            assert_eq!(id_out, u32::from(id));
1790        }
1791    }
1792    #[test]
1793    fn test_grid_sampling() {
1794        let width = 64;
1795        let height = 64;
1796        let mut data = vec![0u8; width * height];
1797        // 8x8 grid, 36x36px tag centered at 32,32 => corners [14, 50]
1798        // TL=(14,14), TR=(50,14), BR=(50,50), BL=(14,50)
1799
1800        // Border:
1801        for gy in 0..8 {
1802            for gx in 0..8 {
1803                if gx == 0 || gx == 7 || gy == 0 || gy == 7 {
1804                    for y in 0..4 {
1805                        for x in 0..4 {
1806                            let px = 14 + (f64::from(gx) * 4.5) as usize + x;
1807                            let py = 14 + (f64::from(gy) * 4.5) as usize + y;
1808                            if px < 64 && py < 64 {
1809                                data[py * width + px] = 0;
1810                            }
1811                        }
1812                    }
1813                }
1814            }
1815        }
1816        // Bit 0 (cell 1,1) -> White (canonical p = -0.625, -0.625)
1817        for y in 0..4 {
1818            for x in 0..4 {
1819                let px = 14 + (1.0 * 4.5) as usize + x;
1820                let py = 14 + (1.0 * 4.5) as usize + y;
1821                data[py * width + px] = 255;
1822            }
1823        }
1824        // Bit 35 (cell 6,6) -> Black (canonical p = 0.625, 0.625)
1825        for y in 0..4 {
1826            for x in 0..4 {
1827                let px = 14 + (6.0 * 4.5) as usize + x;
1828                let py = 14 + (6.0 * 4.5) as usize + y;
1829                data[py * width + px] = 0;
1830            }
1831        }
1832
1833        let img = crate::image::ImageView::new(&data, width, height, width).unwrap();
1834
1835        let decoder = AprilTag36h11;
1836        let arena = Bump::new();
1837        let cand = crate::Detection {
1838            corners: [[14.0, 14.0], [50.0, 14.0], [50.0, 50.0], [14.0, 50.0]],
1839            ..Default::default()
1840        };
1841        let bits =
1842            sample_grid(&img, &arena, &cand, &decoder, 20.0).expect("Should sample successfully");
1843
1844        // bit 0 should be 1 (high intensity)
1845        assert_eq!(bits & 1, 1, "Bit 0 should be 1");
1846        // bit 35 should be 0 (low intensity)
1847        assert_eq!((bits >> 35) & 1, 0, "Bit 35 should be 0");
1848    }
1849
1850    #[test]
1851    fn test_homography_dlt() {
1852        let src = [[0.0, 0.0], [1.0, 0.0], [1.0, 1.0], [0.0, 1.0]];
1853        let dst = [[10.0, 10.0], [20.0, 11.0], [19.0, 21.0], [9.0, 20.0]];
1854
1855        let h = Homography::from_pairs(&src, &dst).expect("DLT should succeed");
1856        for i in 0..4 {
1857            let p = h.project(src[i]);
1858            assert!((p[0] - dst[i][0]).abs() < 1e-6);
1859            assert!((p[1] - dst[i][1]).abs() < 1e-6);
1860        }
1861    }
1862
1863    // ========================================================================
1864    // END-TO-END DECODER ROBUSTNESS TESTS
1865    // ========================================================================
1866
1867    use crate::config::TagFamily;
1868    use crate::image::ImageView;
1869    use crate::quad::extract_quads_fast;
1870    use crate::segmentation::label_components_with_stats;
1871    use crate::test_utils::{TestImageParams, generate_test_image_with_params};
1872    use crate::threshold::ThresholdEngine;
1873    use bumpalo::Bump;
1874
1875    /// Run full pipeline from image to decoded tags.
1876    fn run_full_pipeline(tag_size: usize, canvas_size: usize, tag_id: u16) -> Vec<(u32, u32)> {
1877        let params = TestImageParams {
1878            family: TagFamily::AprilTag36h11,
1879            id: tag_id,
1880            tag_size,
1881            canvas_size,
1882            ..Default::default()
1883        };
1884
1885        let (data, _corners) = generate_test_image_with_params(&params);
1886        let img = ImageView::new(&data, canvas_size, canvas_size, canvas_size).unwrap();
1887
1888        let arena = Bump::new();
1889        let engine = ThresholdEngine::new();
1890        let stats = engine.compute_tile_stats(&arena, &img);
1891        let mut binary = vec![0u8; canvas_size * canvas_size];
1892        engine.apply_threshold(&arena, &img, &stats, &mut binary);
1893        let label_result =
1894            label_components_with_stats(&arena, &binary, canvas_size, canvas_size, true);
1895        let detections = extract_quads_fast(&arena, &img, &label_result);
1896
1897        let decoder = AprilTag36h11;
1898        let mut results = Vec::new();
1899
1900        for quad in &detections {
1901            if let Some(bits) = sample_grid(&img, &arena, quad, &decoder, 20.0)
1902                && let Some((id, hamming, _rot)) = decoder.decode(bits)
1903            {
1904                results.push((id, hamming));
1905            }
1906        }
1907
1908        results
1909    }
1910
1911    /// Test E2E pipeline decodes correctly at varying sizes.
1912    #[test]
1913    fn test_e2e_decoding_at_varying_sizes() {
1914        let canvas_size = 640;
1915        let tag_sizes = [64, 100, 150, 200, 300];
1916        let test_id: u16 = 42;
1917
1918        for tag_size in tag_sizes {
1919            let decoded = run_full_pipeline(tag_size, canvas_size, test_id);
1920            let found = decoded.iter().any(|(id, _)| *id == u32::from(test_id));
1921
1922            if tag_size >= 64 {
1923                assert!(found, "Tag size {tag_size}: ID {test_id} not found");
1924            }
1925
1926            if found {
1927                let (_, hamming) = decoded
1928                    .iter()
1929                    .find(|(id, _)| *id == u32::from(test_id))
1930                    .unwrap();
1931                println!("Tag size {tag_size:>3}px: ID {test_id} with hamming {hamming}");
1932            }
1933        }
1934    }
1935
1936    /// Test that multiple tag IDs decode correctly.
1937    #[test]
1938    fn test_e2e_multiple_ids() {
1939        let canvas_size = 400;
1940        let tag_size = 150;
1941        let test_ids: [u16; 5] = [0, 42, 100, 200, 500];
1942
1943        for &test_id in &test_ids {
1944            let decoded = run_full_pipeline(tag_size, canvas_size, test_id);
1945            let found = decoded.iter().any(|(id, _)| *id == u32::from(test_id));
1946            assert!(found, "ID {test_id} not decoded");
1947
1948            let (_, hamming) = decoded
1949                .iter()
1950                .find(|(id, _)| *id == u32::from(test_id))
1951                .unwrap();
1952            assert_eq!(*hamming, 0, "ID {test_id} should have 0 hamming");
1953            println!("ID {test_id:>3}: Decoded with hamming {hamming}");
1954        }
1955    }
1956
1957    /// Test decoding with edge ID values.
1958    #[test]
1959    fn test_e2e_edge_ids() {
1960        let canvas_size = 400;
1961        let tag_size = 150;
1962        let edge_ids: [u16; 2] = [0, 586];
1963
1964        for &test_id in &edge_ids {
1965            let decoded = run_full_pipeline(tag_size, canvas_size, test_id);
1966            let found = decoded.iter().any(|(id, _)| *id == u32::from(test_id));
1967            assert!(found, "Edge ID {test_id} not decoded");
1968            println!("Edge ID {test_id}: Decoded");
1969        }
1970    }
1971}