Skip to main content

locus_core/
decoder.rs

1//! Tag decoding, homography computation, and bit sampling.
2//!
3//! This module handles the final stage of the pipeline:
4//! 1. **Homography**: Computing the projection from canonical tag space to image pixels.
5//! 2. **Bit Sampling**: Bilinear interpolation of intensities at grid points.
6//! 3. **Error Correction**: Correcting bit flips using tag-family specific Hamming distances.
7
8#![allow(unsafe_code, clippy::cast_sign_loss)]
9use crate::batch::{Matrix3x3, Point2f};
10use crate::config;
11use crate::simd::math::{bilinear_interpolate_fixed, rcp_nr};
12use crate::simd::roi::RoiCache;
13use bumpalo::Bump;
14use multiversion::multiversion;
15use nalgebra::{SMatrix, SVector};
16
17use crate::workspace::WORKSPACE_ARENA;
18
19/// A 3x3 Homography matrix.
20pub struct Homography {
21    /// The 3x3 homography matrix.
22    pub h: SMatrix<f64, 3, 3>,
23}
24
25/// A Digital Differential Analyzer (DDA) for incremental homography projection.
26///
27/// This avoids expensive matrix multiplications by using discrete partial derivatives
28/// when stepping through a uniform grid in tag space.
29#[derive(Debug, Clone, Copy)]
30pub struct HomographyDda {
31    /// Current numerator for X coordinate.
32    pub nx: f64,
33    /// Current numerator for Y coordinate.
34    pub ny: f64,
35    /// Current denominator (perspective divide).
36    pub d: f64,
37    /// Partial derivative of nx with respect to u.
38    pub dnx_du: f64,
39    /// Partial derivative of ny with respect to u.
40    pub dny_du: f64,
41    /// Partial derivative of d with respect to u.
42    pub dd_du: f64,
43    /// Partial derivative of nx with respect to v.
44    pub dnx_dv: f64,
45    /// Partial derivative of ny with respect to v.
46    pub dny_dv: f64,
47    /// Partial derivative of d with respect to v.
48    pub dd_dv: f64,
49}
50
51impl Homography {
52    /// Convert the homography into a DDA state for a grid with step size (du, dv).
53    /// Initial state is computed at (u0, v0) in canonical tag space.
54    #[must_use]
55    pub fn to_dda(&self, u0: f64, v0: f64, du: f64, dv: f64) -> HomographyDda {
56        let h = self.h;
57        let nx = h[(0, 0)] * u0 + h[(0, 1)] * v0 + h[(0, 2)];
58        let ny = h[(1, 0)] * u0 + h[(1, 1)] * v0 + h[(1, 2)];
59        let d = h[(2, 0)] * u0 + h[(2, 1)] * v0 + h[(2, 2)];
60
61        HomographyDda {
62            nx,
63            ny,
64            d,
65            dnx_du: h[(0, 0)] * du,
66            dny_du: h[(1, 0)] * du,
67            dd_du: h[(2, 0)] * du,
68            dnx_dv: h[(0, 1)] * dv,
69            dny_dv: h[(1, 1)] * dv,
70            dd_dv: h[(2, 1)] * dv,
71        }
72    }
73
74    /// Compute homography from 4 source points to 4 destination points using DLT.
75    /// Points are [x, y].
76    /// Compute homography from 4 source points to 4 destination points using DLT.
77    /// Points are [x, y].
78    #[must_use]
79    pub fn from_pairs(src: &[[f64; 2]; 4], dst: &[[f64; 2]; 4]) -> Option<Self> {
80        let mut a = SMatrix::<f64, 8, 9>::zeros();
81
82        for i in 0..4 {
83            let sx = src[i][0];
84            let sy = src[i][1];
85            let dx = dst[i][0];
86            let dy = dst[i][1];
87
88            a[(i * 2, 0)] = -sx;
89            a[(i * 2, 1)] = -sy;
90            a[(i * 2, 2)] = -1.0;
91            a[(i * 2, 6)] = sx * dx;
92            a[(i * 2, 7)] = sy * dx;
93            a[(i * 2, 8)] = dx;
94
95            a[(i * 2 + 1, 3)] = -sx;
96            a[(i * 2 + 1, 4)] = -sy;
97            a[(i * 2 + 1, 5)] = -1.0;
98            a[(i * 2 + 1, 6)] = sx * dy;
99            a[(i * 2 + 1, 7)] = sy * dy;
100            a[(i * 2 + 1, 8)] = dy;
101        }
102
103        let mut b = SVector::<f64, 8>::zeros();
104        let mut m = SMatrix::<f64, 8, 8>::zeros();
105        for i in 0..8 {
106            for j in 0..8 {
107                m[(i, j)] = a[(i, j)];
108            }
109            b[i] = -a[(i, 8)];
110        }
111
112        m.lu().solve(&b).and_then(|h_vec| {
113            let mut h = SMatrix::<f64, 3, 3>::identity();
114            h[(0, 0)] = h_vec[0];
115            h[(0, 1)] = h_vec[1];
116            h[(0, 2)] = h_vec[2];
117            h[(1, 0)] = h_vec[3];
118            h[(1, 1)] = h_vec[4];
119            h[(1, 2)] = h_vec[5];
120            h[(2, 0)] = h_vec[6];
121            h[(2, 1)] = h_vec[7];
122            h[(2, 2)] = 1.0;
123            let res = Self { h };
124            for i in 0..4 {
125                let p_proj = res.project(src[i]);
126                let err_sq = (p_proj[0] - dst[i][0]).powi(2) + (p_proj[1] - dst[i][1]).powi(2);
127                if !err_sq.is_finite() || err_sq > 1e-4 {
128                    return None;
129                }
130            }
131            Some(res)
132        })
133    }
134
135    /// Optimized homography computation from canonical unit square to a quad.
136    /// Source points are assumed to be: `[(-1,-1), (1,-1), (1,1), (-1,1)]`.
137    #[must_use]
138    pub fn square_to_quad(dst: &[[f64; 2]; 4]) -> Option<Self> {
139        let mut b = SVector::<f64, 8>::zeros();
140        let mut m = SMatrix::<f64, 8, 8>::zeros();
141
142        // Hardcoded coefficients for src = [(-1,-1), (1,-1), (1,1), (-1,1)]
143        // Point 0: (-1, -1) -> (x0, y0)
144        let x0 = dst[0][0];
145        let y0 = dst[0][1];
146        // h0 + h1 - h2 - x0*h6 - x0*h7 = -x0  =>  1, 1, -1, ..., -x0, -x0
147        m[(0, 0)] = 1.0;
148        m[(0, 1)] = 1.0;
149        m[(0, 2)] = -1.0;
150        m[(0, 6)] = -x0;
151        m[(0, 7)] = -x0;
152        b[0] = -x0;
153        // h3 + h4 - h5 - y0*h6 - y0*h7 = -y0  =>  ..., 1, 1, -1, -y0, -y0
154        m[(1, 3)] = 1.0;
155        m[(1, 4)] = 1.0;
156        m[(1, 5)] = -1.0;
157        m[(1, 6)] = -y0;
158        m[(1, 7)] = -y0;
159        b[1] = -y0;
160
161        // Point 1: (1, -1) -> (x1, y1)
162        let x1 = dst[1][0];
163        let y1 = dst[1][1];
164        // -h0 + h1 + h2 + x1*h6 - x1*h7 = -x1
165        m[(2, 0)] = -1.0;
166        m[(2, 1)] = 1.0;
167        m[(2, 2)] = -1.0;
168        m[(2, 6)] = x1;
169        m[(2, 7)] = -x1;
170        b[2] = -x1;
171        m[(3, 3)] = -1.0;
172        m[(3, 4)] = 1.0;
173        m[(3, 5)] = -1.0;
174        m[(3, 6)] = y1;
175        m[(3, 7)] = -y1;
176        b[3] = -y1;
177
178        // Point 2: (1, 1) -> (x2, y2)
179        let x2 = dst[2][0];
180        let y2 = dst[2][1];
181        // -h0 - h1 + h2 + x2*h6 + x2*h7 = -x2
182        m[(4, 0)] = -1.0;
183        m[(4, 1)] = -1.0;
184        m[(4, 2)] = -1.0;
185        m[(4, 6)] = x2;
186        m[(4, 7)] = x2;
187        b[4] = -x2;
188        m[(5, 3)] = -1.0;
189        m[(5, 4)] = -1.0;
190        m[(5, 5)] = -1.0;
191        m[(5, 6)] = y2;
192        m[(5, 7)] = y2;
193        b[5] = -y2;
194
195        // Point 3: (-1, 1) -> (x3, y3)
196        let x3 = dst[3][0];
197        let y3 = dst[3][1];
198        // h0 - h1 + h2 - x3*h6 + x3*h7 = -x3
199        m[(6, 0)] = 1.0;
200        m[(6, 1)] = -1.0;
201        m[(6, 2)] = -1.0;
202        m[(6, 6)] = -x3;
203        m[(6, 7)] = x3;
204        b[6] = -x3;
205        m[(7, 3)] = 1.0;
206        m[(7, 4)] = -1.0;
207        m[(7, 5)] = -1.0;
208        m[(7, 6)] = -y3;
209        m[(7, 7)] = y3;
210        b[7] = -y3;
211
212        m.lu().solve(&b).and_then(|h_vec| {
213            let mut h = SMatrix::<f64, 3, 3>::identity();
214            h[(0, 0)] = h_vec[0];
215            h[(0, 1)] = h_vec[1];
216            h[(0, 2)] = h_vec[2];
217            h[(1, 0)] = h_vec[3];
218            h[(1, 1)] = h_vec[4];
219            h[(1, 2)] = h_vec[5];
220            h[(2, 0)] = h_vec[6];
221            h[(2, 1)] = h_vec[7];
222            h[(2, 2)] = 1.0;
223            let res = Self { h };
224            let src_unit = [[-1.0, -1.0], [1.0, -1.0], [1.0, 1.0], [-1.0, 1.0]];
225            for i in 0..4 {
226                let p_proj = res.project(src_unit[i]);
227                let err_sq = (p_proj[0] - dst[i][0]).powi(2) + (p_proj[1] - dst[i][1]).powi(2);
228                if err_sq > 1e-4 {
229                    return None;
230                }
231            }
232            Some(res)
233        })
234    }
235
236    /// Project a point using the homography.
237    #[must_use]
238    pub fn project(&self, p: [f64; 2]) -> [f64; 2] {
239        let res = self.h * SVector::<f64, 3>::new(p[0], p[1], 1.0);
240        let w = res[2];
241        [res[0] / w, res[1] / w]
242    }
243}
244
245/// Compute homographies for all active quads in the batch using a pure-function SoA approach.
246///
247/// This uses `rayon` for data-parallel computation of the square-to-quad homographies.
248/// Quads are defined by 4 corners in `corners` for each candidate index.
249#[tracing::instrument(skip_all, name = "pipeline::homography_pass")]
250pub fn compute_homographies_soa(
251    corners: &[[Point2f; 4]],
252    status_mask: &[crate::batch::CandidateState],
253    homographies: &mut [Matrix3x3],
254) {
255    use crate::batch::CandidateState;
256    use rayon::prelude::*;
257
258    // Each homography maps from canonical square [(-1,-1), (1,-1), (1,1), (-1,1)] to image quads.
259    homographies
260        .par_iter_mut()
261        .enumerate()
262        .for_each(|(i, h_out)| {
263            if status_mask[i] != CandidateState::Active {
264                h_out.data = [0.0; 9];
265                h_out.padding = [0.0; 7];
266                return;
267            }
268
269            let dst = [
270                [f64::from(corners[i][0].x), f64::from(corners[i][0].y)],
271                [f64::from(corners[i][1].x), f64::from(corners[i][1].y)],
272                [f64::from(corners[i][2].x), f64::from(corners[i][2].y)],
273                [f64::from(corners[i][3].x), f64::from(corners[i][3].y)],
274            ];
275
276            if let Some(h) = Homography::square_to_quad(&dst) {
277                // Copy data to f32 batch. Nalgebra stores in column-major order.
278                for (j, val) in h.h.iter().enumerate() {
279                    h_out.data[j] = *val as f32;
280                }
281                h_out.padding = [0.0; 7];
282            } else {
283                // Failed to compute homography (e.g. degenerate quad).
284                h_out.data = [0.0; 9];
285                h_out.padding = [0.0; 7];
286            }
287        });
288}
289
290/// Refine corners using "Erf-Fit" (Gaussian fit to intensity profile).
291///
292/// This assumes the edge intensity profile is an Error Function (convolution of step edge with Gaussian PSF).
293/// We minimize the photometric error between the image and the ERF model using Gauss-Newton.
294pub(crate) fn refine_corners_erf(
295    arena: &bumpalo::Bump,
296    img: &crate::image::ImageView,
297    corners: &[[f64; 2]; 4],
298    sigma: f64,
299) -> [[f64; 2]; 4] {
300    let mut lines = [(0.0f64, 0.0f64, 0.0f64); 4];
301    let mut line_valid = [false; 4];
302
303    // Sub-pixel edge refinement for each of the 4 edges
304    for i in 0..4 {
305        let next = (i + 1) % 4;
306        let p1 = corners[i];
307        let p2 = corners[next];
308
309        if let Some((nx, ny, d)) = fit_edge_erf(arena, img, p1, p2, sigma) {
310            lines[i] = (nx, ny, d);
311            line_valid[i] = true;
312        }
313    }
314
315    if !line_valid.iter().all(|&v| v) {
316        return *corners;
317    }
318
319    // Intersect lines to get refined corners
320    let mut refined = *corners;
321    for i in 0..4 {
322        let prev = (i + 3) % 4;
323        let (a1, b1, c1) = lines[prev];
324        let (a2, b2, c2) = lines[i];
325        let det = a1 * b2 - a2 * b1;
326        if det.abs() > 1e-6 {
327            let x = (b1 * c2 - b2 * c1) / det;
328            let y = (a2 * c1 - a1 * c2) / det;
329
330            // Sanity check
331            let dist_sq = (x - corners[i][0]).powi(2) + (y - corners[i][1]).powi(2);
332            if dist_sq < 4.0 {
333                refined[i] = [x, y];
334            }
335        }
336    }
337    refined
338}
339
340/// Helper for ERF edge fitting
341struct EdgeFitter<'a> {
342    img: &'a crate::image::ImageView<'a>,
343    p1: [f64; 2],
344    dx: f64,
345    dy: f64,
346    len: f64,
347    nx: f64,
348    ny: f64,
349    d: f64,
350}
351
352impl<'a> EdgeFitter<'a> {
353    fn new(img: &'a crate::image::ImageView<'a>, p1: [f64; 2], p2: [f64; 2]) -> Option<Self> {
354        let dx = p2[0] - p1[0];
355        let dy = p2[1] - p1[1];
356        let len = (dx * dx + dy * dy).sqrt();
357        if len < 4.0 {
358            return None;
359        }
360        let nx = -dy / len;
361        let ny = dx / len;
362        // Initial d from input corners
363        let d = -(nx * p1[0] + ny * p1[1]);
364
365        Some(Self {
366            img,
367            p1,
368            dx,
369            dy,
370            len,
371            nx,
372            ny,
373            d,
374        })
375    }
376
377    fn scan_initial_d(&mut self) {
378        let window = 2.5;
379        let (x0, x1, y0, y1) = self.get_scan_bounds(window);
380
381        let mut best_offset = 0.0;
382        let mut best_grad = 0.0;
383
384        for k in -6..=6 {
385            let offset = f64::from(k) * 0.4;
386            let scan_d = self.d + offset;
387
388            let (sum_g, count) =
389                project_gradients_optimized(self.img, self.nx, self.ny, x0, x1, y0, y1, scan_d);
390
391            if count > 0 && sum_g > best_grad {
392                best_grad = sum_g;
393                best_offset = offset;
394            }
395        }
396        self.d += best_offset;
397    }
398
399    fn collect_samples(
400        &self,
401        arena: &'a bumpalo::Bump,
402    ) -> bumpalo::collections::Vec<'a, (f64, f64, f64)> {
403        let window = 2.5;
404        let (x0, x1, y0, y1) = self.get_scan_bounds(window);
405
406        collect_samples_optimized(
407            self.img, self.nx, self.ny, self.d, self.p1, self.dx, self.dy, self.len, x0, x1, y0,
408            y1, window, arena,
409        )
410    }
411
412    fn refine(&mut self, samples: &[(f64, f64, f64)], sigma: f64) {
413        if samples.len() < 10 {
414            return;
415        }
416        let mut a = 128.0;
417        let mut b = 128.0;
418        let inv_sigma = 1.0 / sigma;
419        let _sqrt_pi = std::f64::consts::PI.sqrt();
420
421        for _ in 0..15 {
422            let mut dark_sum = 0.0;
423            let mut dark_weight = 0.0;
424            let mut light_sum = 0.0;
425            let mut light_weight = 0.0;
426
427            for &(x, y, _) in samples {
428                let dist = self.nx * x + self.ny * y + self.d;
429                let val = self.img.sample_bilinear(x, y);
430                if dist < -1.0 {
431                    let w = (-dist - 0.5).clamp(0.1, 2.0);
432                    dark_sum += val * w;
433                    dark_weight += w;
434                } else if dist > 1.0 {
435                    let w = (dist - 0.5).clamp(0.1, 2.0);
436                    light_sum += val * w;
437                    light_weight += w;
438                }
439            }
440
441            if dark_weight > 0.0 && light_weight > 0.0 {
442                a = dark_sum / dark_weight;
443                b = light_sum / light_weight;
444            }
445
446            if (b - a).abs() < 5.0 {
447                break;
448            }
449
450            let (sum_jtj, sum_jt_res) = refine_accumulate_optimized(
451                samples, self.img, self.nx, self.ny, self.d, a, b, sigma, inv_sigma,
452            );
453
454            if sum_jtj < 1e-6 {
455                break;
456            }
457            let step = sum_jt_res / sum_jtj;
458            self.d += step.clamp(-0.5, 0.5);
459            if step.abs() < 1e-4 {
460                break;
461            }
462        }
463    }
464
465    #[allow(clippy::cast_sign_loss)]
466    fn get_scan_bounds(&self, window: f64) -> (usize, usize, usize, usize) {
467        let p2_0 = self.p1[0] + self.dx;
468        let p2_1 = self.p1[1] + self.dy;
469
470        // Clamp to valid image coordinates (padding of 1 pixel)
471        let w_limit = (self.img.width - 2) as f64;
472        let h_limit = (self.img.height - 2) as f64;
473
474        let x0 = (self.p1[0].min(p2_0) - window - 0.5).clamp(1.0, w_limit) as usize;
475        let x1 = (self.p1[0].max(p2_0) + window + 0.5).clamp(1.0, w_limit) as usize;
476        let y0 = (self.p1[1].min(p2_1) - window - 0.5).clamp(1.0, h_limit) as usize;
477        let y1 = (self.p1[1].max(p2_1) + window + 0.5).clamp(1.0, h_limit) as usize;
478        (x0, x1, y0, y1)
479    }
480}
481
482#[multiversion(targets(
483    "x86_64+avx2+bmi1+bmi2+popcnt+lzcnt",
484    "x86_64+avx512f+avx512bw+avx512dq+avx512vl",
485    "aarch64+neon"
486))]
487#[allow(clippy::too_many_arguments)]
488fn project_gradients_optimized(
489    img: &crate::image::ImageView,
490    nx: f64,
491    ny: f64,
492    x0: usize,
493    x1: usize,
494    y0: usize,
495    y1: usize,
496    scan_d: f64,
497) -> (f64, usize) {
498    let mut sum_g = 0.0;
499    let mut count = 0;
500
501    for py in y0..=y1 {
502        let mut px = x0;
503        let y = py as f64 + 0.5;
504
505        #[cfg(all(target_arch = "x86_64", target_feature = "avx2"))]
506        if let Some(_dispatch) = multiversion::target::x86_64::avx2::get() {
507            unsafe {
508                use std::arch::x86_64::*;
509                let v_nx = _mm256_set1_pd(nx);
510                let v_ny = _mm256_set1_pd(ny);
511                let v_scan_d = _mm256_set1_pd(scan_d);
512                let v_y = _mm256_set1_pd(y);
513                let v_abs_mask = _mm256_set1_pd(-0.0);
514
515                while px + 4 <= x1 {
516                    let v_x = _mm256_set_pd(
517                        (px + 3) as f64 + 0.5,
518                        (px + 2) as f64 + 0.5,
519                        (px + 1) as f64 + 0.5,
520                        px as f64 + 0.5,
521                    );
522
523                    let v_dist = _mm256_add_pd(
524                        _mm256_add_pd(_mm256_mul_pd(v_nx, v_x), _mm256_mul_pd(v_ny, v_y)),
525                        v_scan_d,
526                    );
527
528                    let v_abs_dist = _mm256_andnot_pd(v_abs_mask, v_dist);
529                    let v_cmp = _mm256_cmp_pd(v_abs_dist, _mm256_set1_pd(1.0), _CMP_LT_OQ);
530                    let mask = _mm256_movemask_pd(v_cmp);
531
532                    if mask != 0 {
533                        for j in 0..4 {
534                            if (mask >> j) & 1 != 0 {
535                                let g = img.sample_gradient_bilinear((px + j) as f64, y);
536                                sum_g += (g[0] * nx + g[1] * ny).abs();
537                                count += 1;
538                            }
539                        }
540                    }
541                    px += 4;
542                }
543            }
544        }
545
546        while px <= x1 {
547            let x = px as f64 + 0.5;
548            let dist = nx * x + ny * y + scan_d;
549            if dist.abs() < 1.0 {
550                let g = img.sample_gradient_bilinear(x, y);
551                sum_g += (g[0] * nx + g[1] * ny).abs();
552                count += 1;
553            }
554            px += 1;
555        }
556    }
557    (sum_g, count)
558}
559
560#[multiversion(targets(
561    "x86_64+avx2+bmi1+bmi2+popcnt+lzcnt",
562    "x86_64+avx512f+avx512bw+avx512dq+avx512vl",
563    "aarch64+neon"
564))]
565#[allow(clippy::too_many_arguments)]
566fn collect_samples_optimized<'a>(
567    img: &crate::image::ImageView,
568    nx: f64,
569    ny: f64,
570    d: f64,
571    p1: [f64; 2],
572    dx: f64,
573    dy: f64,
574    len: f64,
575    x0: usize,
576    x1: usize,
577    y0: usize,
578    y1: usize,
579    window: f64,
580    arena: &'a bumpalo::Bump,
581) -> bumpalo::collections::Vec<'a, (f64, f64, f64)> {
582    let mut samples = bumpalo::collections::Vec::with_capacity_in(128, arena);
583
584    for py in y0..=y1 {
585        let mut px = x0;
586        let y = py as f64 + 0.5;
587
588        #[cfg(all(target_arch = "x86_64", target_feature = "avx2"))]
589        if let Some(_dispatch) = multiversion::target::x86_64::avx2::get() {
590            unsafe {
591                use std::arch::x86_64::*;
592                let v_nx = _mm256_set1_pd(nx);
593                let v_ny = _mm256_set1_pd(ny);
594                let v_d = _mm256_set1_pd(d);
595                let v_y = _mm256_set1_pd(y);
596                let v_p1x = _mm256_set1_pd(p1[0]);
597                let v_p1y = _mm256_set1_pd(p1[1]);
598                let v_dx = _mm256_set1_pd(dx);
599                let v_dy = _mm256_set1_pd(dy);
600                let v_inv_len_sq = _mm256_set1_pd(1.0 / (len * len));
601                let v_abs_mask = _mm256_set1_pd(-0.0);
602
603                while px + 4 <= x1 {
604                    let v_x = _mm256_set_pd(
605                        (px + 3) as f64 + 0.5,
606                        (px + 2) as f64 + 0.5,
607                        (px + 1) as f64 + 0.5,
608                        px as f64 + 0.5,
609                    );
610
611                    let v_dist = _mm256_add_pd(
612                        _mm256_add_pd(_mm256_mul_pd(v_nx, v_x), _mm256_mul_pd(v_ny, v_y)),
613                        v_d,
614                    );
615                    let v_abs_dist = _mm256_andnot_pd(v_abs_mask, v_dist);
616                    let v_dist_mask = _mm256_cmp_pd(v_abs_dist, _mm256_set1_pd(window), _CMP_LE_OQ);
617
618                    let v_t = _mm256_mul_pd(
619                        _mm256_add_pd(
620                            _mm256_mul_pd(_mm256_sub_pd(v_x, v_p1x), v_dx),
621                            _mm256_mul_pd(_mm256_sub_pd(v_y, v_p1y), v_dy),
622                        ),
623                        v_inv_len_sq,
624                    );
625
626                    let v_t_mask_low = _mm256_cmp_pd(v_t, _mm256_set1_pd(-0.1), _CMP_GE_OQ);
627                    let v_t_mask_high = _mm256_cmp_pd(v_t, _mm256_set1_pd(1.1), _CMP_LE_OQ);
628
629                    let final_mask = _mm256_movemask_pd(_mm256_and_pd(
630                        v_dist_mask,
631                        _mm256_and_pd(v_t_mask_low, v_t_mask_high),
632                    ));
633
634                    if final_mask != 0 {
635                        for j in 0..4 {
636                            if (final_mask >> j) & 1 != 0 {
637                                let val = f64::from(img.get_pixel(px + j, py));
638                                samples.push(((px + j) as f64 + 0.5, y, val));
639                            }
640                        }
641                    }
642                    px += 4;
643                }
644            }
645        }
646
647        while px <= x1 {
648            let x = px as f64 + 0.5;
649            let dist = nx * x + ny * y + d;
650            if dist.abs() <= window {
651                let t = ((x - p1[0]) * dx + (y - p1[1]) * dy) / (len * len);
652                if (-0.1..=1.1).contains(&t) {
653                    let val = f64::from(img.get_pixel(px, py));
654                    samples.push((x, y, val));
655                }
656            }
657            px += 1;
658        }
659    }
660    samples
661}
662
663#[multiversion(targets(
664    "x86_64+avx2+bmi1+bmi2+popcnt+lzcnt",
665    "x86_64+avx512f+avx512bw+avx512dq+avx512vl",
666    "aarch64+neon"
667))]
668#[allow(clippy::too_many_arguments)]
669fn refine_accumulate_optimized(
670    samples: &[(f64, f64, f64)],
671    #[allow(unused_variables)] img: &crate::image::ImageView,
672    nx: f64,
673    ny: f64,
674    d: f64,
675    a: f64,
676    b: f64,
677    sigma: f64,
678    inv_sigma: f64,
679) -> (f64, f64) {
680    let mut sum_jtj = 0.0;
681    let mut sum_jt_res = 0.0;
682    let sqrt_pi = std::f64::consts::PI.sqrt();
683    let k = (b - a) / (sqrt_pi * sigma);
684
685    let mut i = 0;
686
687    #[cfg(all(target_arch = "x86_64", target_feature = "avx2"))]
688    if let Some(_dispatch) = multiversion::target::x86_64::avx2::get() {
689        unsafe {
690            use std::arch::x86_64::*;
691            let v_nx = _mm256_set1_pd(nx);
692            let v_ny = _mm256_set1_pd(ny);
693            let v_d = _mm256_set1_pd(d);
694            let v_a = _mm256_set1_pd(a);
695            let v_b = _mm256_set1_pd(b);
696            let v_inv_sigma = _mm256_set1_pd(inv_sigma);
697            let v_k = _mm256_set1_pd(k);
698            let v_half = _mm256_set1_pd(0.5);
699            let v_abs_mask = _mm256_set1_pd(-0.0);
700
701            while i + 4 <= samples.len() {
702                let s0 = samples[i];
703                let s1 = samples[i + 1];
704                let s2 = samples[i + 2];
705                let s3 = samples[i + 3];
706
707                let v_x = _mm256_set_pd(s3.0, s2.0, s1.0, s0.0);
708                let v_y = _mm256_set_pd(s3.1, s2.1, s1.1, s0.1);
709                let v_img_val = _mm256_set_pd(s3.2, s2.2, s1.2, s0.2);
710
711                let v_dist = _mm256_add_pd(
712                    _mm256_add_pd(_mm256_mul_pd(v_nx, v_x), _mm256_mul_pd(v_ny, v_y)),
713                    v_d,
714                );
715                let v_s = _mm256_mul_pd(v_dist, v_inv_sigma);
716
717                let v_abs_s = _mm256_andnot_pd(v_abs_mask, v_s);
718                let v_range_mask = _mm256_cmp_pd(v_abs_s, _mm256_set1_pd(3.0), _CMP_LE_OQ);
719
720                if _mm256_movemask_pd(v_range_mask) != 0 {
721                    // SAFETY: transmute is safe for same-size SIMD ↔ array conversions.
722                    let ss: [f64; 4] = std::mem::transmute(v_s);
723                    let iv: [f64; 4] = std::mem::transmute(v_img_val);
724                    let mm: [f64; 4] = std::mem::transmute(v_range_mask);
725
726                    for j in 0..4 {
727                        if mm[j] != 0.0 {
728                            let s_val = ss[j];
729                            let model = (a + b) * 0.5
730                                + (b - a) * 0.5 * crate::simd::math::erf_approx(s_val);
731                            let residual = iv[j] - model;
732                            let jac = k * (-s_val * s_val).exp();
733                            sum_jtj += jac * jac;
734                            sum_jt_res += jac * residual;
735                        }
736                    }
737                }
738                i += 4;
739            }
740        }
741    }
742
743    // Scalar tail
744    while i < samples.len() {
745        let (x, y, img_val) = samples[i];
746        let dist = nx * x + ny * y + d;
747        let s = dist * inv_sigma;
748        if s.abs() <= 3.0 {
749            let model = (a + b) * 0.5 + (b - a) * 0.5 * crate::simd::math::erf_approx(s);
750            let residual = img_val - model;
751            let jac = k * (-s * s).exp();
752            sum_jtj += jac * jac;
753            sum_jt_res += jac * residual;
754        }
755        i += 1;
756    }
757
758    (sum_jtj, sum_jt_res)
759}
760
761/// Fit a line (nx*x + ny*y + d = 0) to an edge using the ERF intensity model.
762fn fit_edge_erf(
763    arena: &bumpalo::Bump,
764    img: &crate::image::ImageView,
765    p1: [f64; 2],
766    p2: [f64; 2],
767    sigma: f64,
768) -> Option<(f64, f64, f64)> {
769    let mut fitter = EdgeFitter::new(img, p1, p2)?;
770    fitter.scan_initial_d();
771    let samples = fitter.collect_samples(arena);
772    if samples.len() < 10 {
773        return None;
774    }
775    fitter.refine(&samples, sigma);
776    Some((fitter.nx, fitter.ny, fitter.d))
777}
778
779/// Returns the threshold that maximizes inter-class variance.
780pub(crate) fn compute_otsu_threshold(values: &[f64]) -> f64 {
781    if values.is_empty() {
782        return 128.0;
783    }
784
785    let n = values.len() as f64;
786    let total_sum: f64 = values.iter().sum();
787
788    // Find min/max to define search range
789    let min_val = values.iter().copied().fold(f64::MAX, f64::min);
790    let max_val = values.iter().copied().fold(f64::MIN, f64::max);
791
792    if (max_val - min_val) < 1.0 {
793        return f64::midpoint(min_val, max_val);
794    }
795
796    // Search for optimal threshold
797    let mut best_threshold = f64::midpoint(min_val, max_val);
798    let mut best_variance = 0.0;
799
800    // Use 16 candidate thresholds between min and max
801    for i in 1..16 {
802        let t = min_val + (max_val - min_val) * (f64::from(i) / 16.0);
803
804        let mut w0 = 0.0;
805        let mut sum0 = 0.0;
806
807        for &v in values {
808            if v <= t {
809                w0 += 1.0;
810                sum0 += v;
811            }
812        }
813
814        let w1 = n - w0;
815        if w0 < 1.0 || w1 < 1.0 {
816            continue;
817        }
818
819        let mean0 = sum0 / w0;
820        let mean1 = (total_sum - sum0) / w1;
821
822        // Inter-class variance
823        let variance = w0 * w1 * (mean0 - mean1) * (mean0 - mean1);
824
825        if variance > best_variance {
826            best_variance = variance;
827            best_threshold = t;
828        }
829    }
830
831    best_threshold
832}
833
834/// Maximum number of bits in a supported tag family payload.
835const MAX_BIT_COUNT: usize = 64;
836
837/// Sample values from the image using DDA-based coordinate generation and SIMD bilinear sampling.
838#[multiversion(targets("x86_64+avx2+fma", "aarch64+neon"))]
839fn sample_grid_values_dda_simd(
840    img: &crate::image::ImageView,
841    roi: &RoiCache,
842    h: &Homography,
843    decoder: &(impl TagDecoder + ?Sized),
844    intensities: &mut [f64],
845) -> bool {
846    let dim = decoder.dimension();
847    let n = decoder.bit_count();
848    let points = decoder.sample_points();
849    if points.is_empty() {
850        return false;
851    }
852
853    let _du = if dim > 1 {
854        points[1].0 - points[0].0
855    } else {
856        0.0
857    };
858    let _dv = if dim > 1 {
859        points[dim].1 - points[0].1
860    } else {
861        0.0
862    };
863
864    #[cfg(all(
865        target_arch = "x86_64",
866        target_feature = "avx2",
867        target_feature = "fma"
868    ))]
869    unsafe {
870        use crate::simd::math::rcp_nr_v8;
871        use crate::simd::sampler::sample_bilinear_v8;
872        use std::arch::x86_64::*;
873
874        let dda = h.to_dda(points[0].0, points[0].1, _du, _dv);
875
876        let w_limit = _mm256_set1_ps(img.width as f32 - 1.0);
877        let h_limit = _mm256_set1_ps(img.height as f32 - 1.0);
878
879        let mut current_nx_row = dda.nx as f32;
880        let mut current_ny_row = dda.ny as f32;
881        let mut current_d_row = dda.d as f32;
882
883        let dnx_du = dda.dnx_du as f32;
884        let dny_du = dda.dny_du as f32;
885        let dd_du = dda.dd_du as f32;
886
887        let v_dnx_du = _mm256_set1_ps(dnx_du);
888        let v_dny_du = _mm256_set1_ps(dny_du);
889        let v_dd_du = _mm256_set1_ps(dd_du);
890        let v_steps = _mm256_set_ps(7.0, 6.0, 5.0, 4.0, 3.0, 2.0, 1.0, 0.0);
891        let v_half = _mm256_set1_ps(0.5);
892
893        let mut idx = 0;
894        for _y in 0..dim {
895            let mut nx_start = current_nx_row;
896            let mut ny_start = current_ny_row;
897            let mut d_start = current_d_row;
898
899            for _x in (0..dim).step_by(8) {
900                let count = (dim - _x).min(8);
901
902                // Vectorized coordinate generation (DDA)
903                // x[i] = (nx + i*dnx_du)
904                let v_nx_simd = _mm256_fmadd_ps(v_steps, v_dnx_du, _mm256_set1_ps(nx_start));
905                let v_ny_simd = _mm256_fmadd_ps(v_steps, v_dny_du, _mm256_set1_ps(ny_start));
906                let v_d_simd = _mm256_fmadd_ps(v_steps, v_dd_du, _mm256_set1_ps(d_start));
907
908                // Perspective divide: (nx/d, ny/d)
909                let v_winv = rcp_nr_v8(v_d_simd);
910                let v_img_x_raw = _mm256_mul_ps(v_nx_simd, v_winv);
911                let v_img_y_raw = _mm256_mul_ps(v_ny_simd, v_winv);
912
913                // Offset by -0.5 to match bilinear logic center alignment
914                let v_img_x = _mm256_sub_ps(v_img_x_raw, v_half);
915                let v_img_y = _mm256_sub_ps(v_img_y_raw, v_half);
916
917                // Bounds check: must be in [0, width - 1) for safe 2x2 bilinear fetch
918                let v_zero = _mm256_setzero_ps();
919                let mask_x = _mm256_and_ps(
920                    _mm256_cmp_ps(v_img_x, v_zero, _CMP_GE_OQ),
921                    _mm256_cmp_ps(v_img_x, w_limit, _CMP_LT_OQ),
922                );
923                let mask_y = _mm256_and_ps(
924                    _mm256_cmp_ps(v_img_y, v_zero, _CMP_GE_OQ),
925                    _mm256_cmp_ps(v_img_y, h_limit, _CMP_LT_OQ),
926                );
927                let mask = _mm256_movemask_ps(_mm256_and_ps(mask_x, mask_y));
928
929                if (mask & ((1 << count) - 1)) != ((1 << count) - 1) {
930                    return false;
931                }
932
933                // Restore original (non-subtracted) coords for sample_bilinear_v8 which handles the offset
934                let mut v_img_x_arr = [0.0f32; 8];
935                let mut v_img_y_arr = [0.0f32; 8];
936                _mm256_storeu_ps(v_img_x_arr.as_mut_ptr(), v_img_x_raw);
937                _mm256_storeu_ps(v_img_y_arr.as_mut_ptr(), v_img_y_raw);
938
939                let mut sampled = [0.0f32; 8];
940                sample_bilinear_v8(img, &v_img_x_arr, &v_img_y_arr, &mut sampled);
941
942                for i in 0..count {
943                    intensities[idx] = f64::from(sampled[i]);
944                    idx += 1;
945                }
946
947                // Advance scalar row starts for next SIMD chunk
948                nx_start += 8.0 * dnx_du;
949                ny_start += 8.0 * dny_du;
950                d_start += 8.0 * dd_du;
951            }
952
953            current_nx_row += dda.dnx_dv as f32;
954            current_ny_row += dda.dny_dv as f32;
955            current_d_row += dda.dd_dv as f32;
956        }
957    }
958
959    #[cfg(all(target_arch = "aarch64", target_feature = "neon"))]
960    #[allow(unsafe_code)]
961    // SAFETY: NEON intrinsics are safe on aarch64 with neon feature.
962    unsafe {
963        use crate::simd::sampler::sample_bilinear_v8;
964        use std::arch::aarch64::*;
965
966        let dda = h.to_dda(points[0].0, points[0].1, _du, _dv);
967
968        let mut current_nx_row = dda.nx as f32;
969        let mut current_ny_row = dda.ny as f32;
970        let mut current_d_row = dda.d as f32;
971
972        let dnx_du = dda.dnx_du as f32;
973        let dny_du = dda.dny_du as f32;
974        let dd_du = dda.dd_du as f32;
975
976        let v_dnx_du = vdupq_n_f32(dnx_du);
977        let v_dny_du = vdupq_n_f32(dny_du);
978        let v_dd_du = vdupq_n_f32(dd_du);
979        let v_steps_low = vld1q_f32([0.0, 1.0, 2.0, 3.0].as_ptr());
980        let v_steps_high = vld1q_f32([4.0, 5.0, 6.0, 7.0].as_ptr());
981
982        let mut idx = 0;
983        for _y in 0..dim {
984            let mut nx_start = current_nx_row;
985            let mut ny_start = current_ny_row;
986            let mut d_start = current_d_row;
987
988            for _x in (0..dim).step_by(8) {
989                let count = (dim - _x).min(8);
990
991                // NEON perspective divide using vrecpeq_f32 + vrecpsq_f32
992                let mut v_img_x = [0.0f32; 8];
993                let mut v_img_y = [0.0f32; 8];
994
995                for (chunk, v_steps) in [v_steps_low, v_steps_high].into_iter().enumerate() {
996                    let v_nx_c = vfmaq_f32(vdupq_n_f32(nx_start), v_steps, v_dnx_du);
997                    let v_ny_c = vfmaq_f32(vdupq_n_f32(ny_start), v_steps, v_dny_du);
998                    let v_d_c = vfmaq_f32(vdupq_n_f32(d_start), v_steps, v_dd_du);
999
1000                    let v_winv = vrecpeq_f32(v_d_c);
1001                    let v_winv = vmulq_f32(v_winv, vrecpsq_f32(v_d_c, v_winv));
1002
1003                    let img_x = vmulq_f32(v_nx_c, v_winv);
1004                    let img_y = vmulq_f32(v_ny_c, v_winv);
1005
1006                    let offset = chunk * 4;
1007                    vst1q_f32(v_img_x.as_mut_ptr().add(offset), img_x);
1008                    vst1q_f32(v_img_y.as_mut_ptr().add(offset), img_y);
1009                }
1010
1011                let mut sampled = [0.0f32; 8];
1012                sample_bilinear_v8(img, &v_img_x, &v_img_y, &mut sampled);
1013
1014                for i in 0..count {
1015                    intensities[idx] = f64::from(sampled[i]);
1016                    idx += 1;
1017                }
1018
1019                nx_start += 8.0 * dnx_du;
1020                ny_start += 8.0 * dny_du;
1021                d_start += 8.0 * dd_du;
1022            }
1023
1024            current_nx_row += dda.dnx_dv as f32;
1025            current_ny_row += dda.dny_dv as f32;
1026            current_d_row += dda.dd_dv as f32;
1027        }
1028    }
1029
1030    #[cfg(not(any(
1031        all(
1032            target_arch = "x86_64",
1033            target_feature = "avx2",
1034            target_feature = "fma"
1035        ),
1036        all(target_arch = "aarch64", target_feature = "neon")
1037    )))]
1038    return sample_grid_values_optimized(img, h, roi, points, intensities, n);
1039
1040    #[cfg(any(
1041        all(
1042            target_arch = "x86_64",
1043            target_feature = "avx2",
1044            target_feature = "fma"
1045        ),
1046        all(target_arch = "aarch64", target_feature = "neon")
1047    ))]
1048    true
1049}
1050
1051/// Sample values from the image using SIMD-optimized Fast-Math and ROI caching.
1052///
1053/// # Panics
1054/// Panics if the number of sample points exceeds `MAX_BIT_COUNT`.
1055#[multiversion(targets(
1056    "x86_64+avx2+bmi1+bmi2+popcnt+lzcnt",
1057    "x86_64+avx512f+avx512bw+avx512dq+avx512vl",
1058    "aarch64+neon"
1059))]
1060fn sample_grid_values_optimized(
1061    img: &crate::image::ImageView,
1062    h: &Homography,
1063    roi: &RoiCache,
1064    points: &[(f64, f64)],
1065    intensities: &mut [f64],
1066    n: usize,
1067) -> bool {
1068    let h00 = h.h[(0, 0)] as f32;
1069    let h01 = h.h[(0, 1)] as f32;
1070    let h02 = h.h[(0, 2)] as f32;
1071    let h10 = h.h[(1, 0)] as f32;
1072    let h11 = h.h[(1, 1)] as f32;
1073    let h12 = h.h[(1, 2)] as f32;
1074    let h20 = h.h[(2, 0)] as f32;
1075    let h21 = h.h[(2, 1)] as f32;
1076    let h22 = h.h[(2, 2)] as f32;
1077
1078    let w_limit = (img.width - 1) as f32;
1079    let h_limit = (img.height - 1) as f32;
1080
1081    for (i, &p) in points.iter().take(n).enumerate() {
1082        let px = p.0 as f32;
1083        let py = p.1 as f32;
1084
1085        // Fast-Math Reciprocal
1086        let wz = h20 * px + h21 * py + h22;
1087        let winv = rcp_nr(wz);
1088
1089        let img_x = (h00 * px + h01 * py + h02) * winv - 0.5;
1090        let img_y = (h10 * px + h11 * py + h12) * winv - 0.5;
1091
1092        if img_x < 0.0 || img_x >= w_limit || img_y < 0.0 || img_y >= h_limit {
1093            return false;
1094        }
1095
1096        let ix = img_x.floor() as usize;
1097        let iy = img_y.floor() as usize;
1098
1099        // Sample from ROI cache using fixed-point bilinear
1100        let v00 = roi.get(ix, iy);
1101        let v10 = roi.get(ix + 1, iy);
1102        let v01 = roi.get(ix, iy + 1);
1103        let v11 = roi.get(ix + 1, iy + 1);
1104
1105        intensities[i] = f64::from(bilinear_interpolate_fixed(img_x, img_y, v00, v10, v01, v11));
1106    }
1107    true
1108}
1109
1110/// Sample the bit grid from the image using the homography and decoder points.
1111///
1112/// Uses bilinear interpolation for sampling and a spatially adaptive threshold
1113/// (based on min/max stats of the grid) to determine bit values.
1114///
1115/// # Parameters
1116/// - `min_contrast`: Minimum contrast range for Otsu-based classification.
1117///   Default is 20.0. Lower values (e.g., 10.0) improve recall on small/blurry tags.
1118///
1119/// This computes the intensities at sample points and the adaptive thresholds,
1120/// then delegates to the strategy to produce the code.
1121///
1122/// # Panics
1123/// Panics if the number of sample points exceeds `MAX_BIT_COUNT`.
1124#[allow(clippy::cast_sign_loss, clippy::too_many_lines)]
1125pub fn sample_grid_generic<S: crate::strategy::DecodingStrategy>(
1126    img: &crate::image::ImageView,
1127    arena: &Bump,
1128    detection: &crate::Detection,
1129    decoder: &(impl TagDecoder + ?Sized),
1130) -> Option<S::Code> {
1131    let (min_x, min_y, max_x, max_y) = detection.aabb();
1132    let roi = RoiCache::new(img, arena, min_x, min_y, max_x, max_y);
1133
1134    let homography = Homography::square_to_quad(&detection.corners)?;
1135
1136    let points = decoder.sample_points();
1137    // Stack-allocated buffer for up to 64 sample points (covers all standard tag families)
1138    let mut intensities = [0.0f64; MAX_BIT_COUNT];
1139    let n = points.len().min(MAX_BIT_COUNT);
1140    assert!(
1141        points.len() <= MAX_BIT_COUNT,
1142        "Tag bit count ({}) exceeds static buffer size ({})",
1143        points.len(),
1144        MAX_BIT_COUNT
1145    );
1146
1147    if !sample_grid_values_dda_simd(img, &roi, &homography, decoder, &mut intensities) {
1148        return None;
1149    }
1150
1151    Some(S::from_intensities(
1152        &intensities[..n],
1153        &compute_adaptive_thresholds(&intensities[..n], points),
1154    ))
1155}
1156
1157/// Sample the bit grid using Structure of Arrays (SoA) data.
1158///
1159/// # Panics
1160/// Panics if the number of sample points exceeds `MAX_BIT_COUNT`.
1161pub fn sample_grid_soa<S: crate::strategy::DecodingStrategy>(
1162    img: &crate::image::ImageView,
1163    arena: &Bump,
1164    corners: &[Point2f],
1165    homography: &Matrix3x3,
1166    decoder: &(impl TagDecoder + ?Sized),
1167) -> Option<S::Code> {
1168    // Compute AABB for RoiCache
1169    let mut min_x = f32::MAX;
1170    let mut min_y = f32::MAX;
1171    let mut max_x = f32::MIN;
1172    let mut max_y = f32::MIN;
1173    for p in corners {
1174        min_x = min_x.min(p.x);
1175        min_y = min_y.min(p.y);
1176        max_x = max_x.max(p.x);
1177        max_y = max_y.max(p.y);
1178    }
1179
1180    let roi = RoiCache::new(
1181        img,
1182        arena,
1183        (min_x.floor() as i32).max(0) as usize,
1184        (min_y.floor() as i32).max(0) as usize,
1185        (max_x.ceil() as i32).max(0) as usize,
1186        (max_y.ceil() as i32).max(0) as usize,
1187    );
1188
1189    // Convert Matrix3x3 to Homography (internal use).
1190    // Nalgebra stores in column-major order, which matches our data layout.
1191    let mut h_mat = SMatrix::<f64, 3, 3>::identity();
1192    for (i, val) in homography.data.iter().enumerate() {
1193        h_mat.as_mut_slice()[i] = f64::from(*val);
1194    }
1195    let homography_obj = Homography { h: h_mat };
1196
1197    let points = decoder.sample_points();
1198    let mut intensities = [0.0f64; MAX_BIT_COUNT];
1199    let n = points.len().min(MAX_BIT_COUNT);
1200    assert!(
1201        points.len() <= MAX_BIT_COUNT,
1202        "Tag bit count ({}) exceeds static buffer size ({})",
1203        points.len(),
1204        MAX_BIT_COUNT
1205    );
1206
1207    if !sample_grid_values_dda_simd(img, &roi, &homography_obj, decoder, &mut intensities) {
1208        return None;
1209    }
1210
1211    Some(S::from_intensities(
1212        &intensities[..n],
1213        &compute_adaptive_thresholds(&intensities[..n], points),
1214    ))
1215}
1216
1217/// Sample the bit grid using Structure of Arrays (SoA) data and a precomputed ROI cache.
1218///
1219/// # Panics
1220/// Panics if the number of sample points exceeds `MAX_BIT_COUNT`.
1221pub fn sample_grid_soa_precomputed<S: crate::strategy::DecodingStrategy>(
1222    img: &crate::image::ImageView,
1223    roi: &RoiCache,
1224    homography: &Matrix3x3,
1225    decoder: &(impl TagDecoder + ?Sized),
1226) -> Option<S::Code> {
1227    // Convert Matrix3x3 to Homography (internal use).
1228    let mut h_mat = SMatrix::<f64, 3, 3>::identity();
1229    for (i, val) in homography.data.iter().enumerate() {
1230        h_mat.as_mut_slice()[i] = f64::from(*val);
1231    }
1232    let homography_obj = Homography { h: h_mat };
1233
1234    let points = decoder.sample_points();
1235    let mut intensities = [0.0f64; MAX_BIT_COUNT];
1236    let n = points.len().min(MAX_BIT_COUNT);
1237    assert!(
1238        points.len() <= MAX_BIT_COUNT,
1239        "Tag bit count ({}) exceeds static buffer size ({})",
1240        points.len(),
1241        MAX_BIT_COUNT
1242    );
1243
1244    if !sample_grid_values_dda_simd(img, roi, &homography_obj, decoder, &mut intensities) {
1245        return None;
1246    }
1247
1248    Some(S::from_intensities(
1249        &intensities[..n],
1250        &compute_adaptive_thresholds(&intensities[..n], points),
1251    ))
1252}
1253
1254/// Internal helper to compute adaptive thresholds for a grid of intensities.
1255fn compute_adaptive_thresholds(intensities: &[f64], points: &[(f64, f64)]) -> [f64; 64] {
1256    let n = intensities.len();
1257    let global_threshold = compute_otsu_threshold(intensities);
1258
1259    let mut quad_sums = [0.0; 4];
1260    let mut quad_counts = [0; 4];
1261    for (i, p) in points.iter().take(n).enumerate() {
1262        let qi = if p.0 < 0.0 {
1263            usize::from(p.1 >= 0.0)
1264        } else {
1265            2 + usize::from(p.1 >= 0.0)
1266        };
1267        quad_sums[qi] += intensities[i];
1268        quad_counts[qi] += 1;
1269    }
1270
1271    let mut thresholds = [0.0f64; 64];
1272    for (i, p) in points.iter().take(n).enumerate() {
1273        let qi = if p.0 < 0.0 {
1274            usize::from(p.1 >= 0.0)
1275        } else {
1276            2 + usize::from(p.1 >= 0.0)
1277        };
1278        let quad_avg = if quad_counts[qi] > 0 {
1279            quad_sums[qi] / f64::from(quad_counts[qi])
1280        } else {
1281            global_threshold
1282        };
1283
1284        // Blend global Otsu and local mean (0.7 / 0.3 weighting is common for fiducials)
1285        thresholds[i] = 0.7 * global_threshold + 0.3 * quad_avg;
1286    }
1287    thresholds
1288}
1289
1290/// Sample the bit grid from the image (Legacy/Hard wrapper).
1291#[allow(clippy::cast_sign_loss, clippy::too_many_lines)]
1292pub fn sample_grid(
1293    img: &crate::image::ImageView,
1294    arena: &Bump,
1295    detection: &crate::Detection,
1296    decoder: &(impl TagDecoder + ?Sized),
1297    _min_contrast: f64,
1298) -> Option<u64> {
1299    sample_grid_generic::<crate::strategy::HardStrategy>(img, arena, detection, decoder)
1300}
1301
1302/// Rotate a square bit grid 90 degrees clockwise.
1303/// This is an O(1) bitwise operation but conceptually represents rotating the N x N pixel grid.
1304#[must_use]
1305pub fn rotate90(bits: u64, dim: usize) -> u64 {
1306    let mut res = 0u64;
1307    for y in 0..dim {
1308        for x in 0..dim {
1309            if (bits >> (y * dim + x)) & 1 != 0 {
1310                let nx = dim - 1 - y;
1311                let ny = x;
1312                res |= 1 << (ny * dim + nx);
1313            }
1314        }
1315    }
1316    res
1317}
1318
1319/// Decode all active candidates in the batch using the Structure of Arrays (SoA) layout.
1320///
1321/// This phase executes SIMD bilinear interpolation and Hamming error correction.
1322/// If a candidate fails decoding, its `status_mask` is flipped to `FailedDecode`.
1323#[tracing::instrument(skip_all, name = "pipeline::decoding_pass")]
1324pub fn decode_batch_soa(
1325    batch: &mut crate::batch::DetectionBatch,
1326    n: usize,
1327    img: &crate::image::ImageView,
1328    decoders: &[Box<dyn TagDecoder + Send + Sync>],
1329    config: &crate::config::DetectorConfig,
1330) {
1331    match config.decode_mode {
1332        crate::config::DecodeMode::Hard => {
1333            decode_batch_soa_generic::<crate::strategy::HardStrategy>(
1334                batch, n, img, decoders, config,
1335            );
1336        },
1337        crate::config::DecodeMode::Soft => {
1338            decode_batch_soa_generic::<crate::strategy::SoftStrategy>(
1339                batch, n, img, decoders, config,
1340            );
1341        },
1342    }
1343}
1344
1345#[allow(
1346    clippy::too_many_lines,
1347    clippy::cast_possible_wrap,
1348    clippy::collapsible_if,
1349    unused_assignments
1350)]
1351fn decode_batch_soa_generic<S: crate::strategy::DecodingStrategy>(
1352    batch: &mut crate::batch::DetectionBatch,
1353    n: usize,
1354    img: &crate::image::ImageView,
1355    decoders: &[Box<dyn TagDecoder + Send + Sync>],
1356    config: &crate::config::DetectorConfig,
1357) {
1358    use crate::batch::CandidateState;
1359    use rayon::prelude::*;
1360
1361    // We collect results into a temporary Vec to avoid unsafe parallel writes to the batch.
1362    let results: Vec<_> = (0..n)
1363        .into_par_iter()
1364        .map(|i| {
1365            if batch.status_mask[i] != CandidateState::Active {
1366                return (batch.status_mask[i], 0, 0, 0, batch.error_rates[i], None);
1367            }
1368
1369            WORKSPACE_ARENA.with_borrow_mut(|arena| {
1370                    arena.reset();
1371
1372                    let corners = &batch.corners[i];
1373                    let homography = &batch.homographies[i];
1374
1375                    // Compute AABB for RoiCache ONCE per candidate.
1376                    // We expand it slightly (10%) to ensure scaled versions (0.9, 1.1) still fit.
1377                    let mut min_x = f32::MAX;
1378                    let mut min_y = f32::MAX;
1379                    let mut max_x = f32::MIN;
1380                    let mut max_y = f32::MIN;
1381                    for p in corners {
1382                        min_x = min_x.min(p.x);
1383                        min_y = min_y.min(p.y);
1384                        max_x = max_x.max(p.x);
1385                        max_y = max_y.max(p.y);
1386                    }
1387                    let w_aabb = max_x - min_x;
1388                    let h_aabb = max_y - min_y;
1389                    let roi = RoiCache::new(
1390                        img,
1391                        arena,
1392                        ((min_x - w_aabb * 0.1).floor() as i32).max(0) as usize,
1393                        ((min_y - h_aabb * 0.1).floor() as i32).max(0) as usize,
1394                        (((max_x + w_aabb * 0.1).ceil() as i32).min(img.width as i32 - 1)).max(0)
1395                            as usize,
1396                        (((max_y + h_aabb * 0.1).ceil() as i32).min(img.height as i32 - 1)).max(0)
1397                            as usize,
1398                    );
1399
1400
1401                    let mut best_h = u32::MAX;
1402                    let mut best_code = None;
1403                    let mut best_id = 0;
1404                    let mut best_rot = 0;
1405                    let mut best_overall_code = None;
1406
1407                    let scales = [1.0, 0.9, 1.1];
1408                    let center = [
1409                        (corners[0].x + corners[1].x + corners[2].x + corners[3].x) / 4.0,
1410                        (corners[0].y + corners[1].y + corners[2].y + corners[3].y) / 4.0,
1411                    ];
1412
1413                    for scale in scales {
1414                        let mut scaled_corners = [Point2f::default(); 4];
1415                        let mut scaled_h_mat = Matrix3x3 {
1416                            data: [0.0; 9],
1417                            padding: [0.0; 7],
1418                        };
1419
1420                        let current_homography: &Matrix3x3;
1421
1422                        let mut best_h_in_scale = u32::MAX;
1423                        let mut best_match_in_scale: Option<(u32, u32, u8, S::Code, usize)> = None;
1424
1425                        if (scale - 1.0f32).abs() > 1e-4 {
1426                            for j in 0..4 {
1427                                scaled_corners[j].x =
1428                                    center[0] + (corners[j].x - center[0]) * scale;
1429                                scaled_corners[j].y =
1430                                    center[1] + (corners[j].y - center[1]) * scale;
1431                            }
1432
1433                            // Must recompute homography for scaled corners
1434                            let dst = [
1435                                [
1436                                    f64::from(scaled_corners[0].x),
1437                                    f64::from(scaled_corners[0].y),
1438                                ],
1439                                [
1440                                    f64::from(scaled_corners[1].x),
1441                                    f64::from(scaled_corners[1].y),
1442                                ],
1443                                [
1444                                    f64::from(scaled_corners[2].x),
1445                                    f64::from(scaled_corners[2].y),
1446                                ],
1447                                [
1448                                    f64::from(scaled_corners[3].x),
1449                                    f64::from(scaled_corners[3].y),
1450                                ],
1451                            ];
1452
1453                            if let Some(h_new) = Homography::square_to_quad(&dst) {
1454                                for (j, val) in h_new.h.iter().enumerate() {
1455                                    scaled_h_mat.data[j] = *val as f32;
1456                                }
1457                                current_homography = &scaled_h_mat;
1458                            } else {
1459                                // Degenerate scale, skip
1460                                continue;
1461                            }
1462                        } else {
1463                            scaled_corners.copy_from_slice(&corners[..4]);
1464                            current_homography = homography;
1465                        }
1466
1467                        for (decoder_idx, decoder) in decoders.iter().enumerate() {
1468                            if let Some(code) = sample_grid_soa_precomputed::<S>(
1469                                img,
1470                                &roi,
1471                                current_homography,
1472                                decoder.as_ref(),
1473                            ) {
1474                                if let Some((id, hamming, rot)) =
1475                                    S::decode(&code, decoder.as_ref(), 255)
1476                                {
1477                                    if hamming < best_h {
1478                                        best_h = hamming;
1479                                        let _ = code.clone();
1480                                        best_overall_code = Some(code.clone());
1481                                    }
1482
1483                                    if hamming <= config.max_hamming_error
1484                                        && (best_code.is_none() || hamming < best_h_in_scale)
1485                                    {
1486                                        best_h_in_scale = hamming;
1487                                        best_match_in_scale =
1488                                            Some((id, hamming, rot, code, decoder_idx));
1489                                    }
1490                                }
1491                            }
1492                        }
1493                        if let Some((id, hamming, rot, code, decoder_idx)) = best_match_in_scale {
1494                            best_id = id;
1495                            best_rot = rot;
1496                            best_code = Some(code.clone());
1497                            let decoder = decoders[decoder_idx].as_ref();
1498
1499                            // Always perform ERF refinement for finalists if requested
1500                            if config.refinement_mode == crate::config::CornerRefinementMode::Erf {
1501                                // Reassemble corners for ERF (it uses [f64; 2])
1502                                let mut current_corners = [[0.0f64; 2]; 4];
1503                                for j in 0..4 {
1504                                    current_corners[j] =
1505                                        [f64::from(corners[j].x), f64::from(corners[j].y)];
1506                                }
1507
1508                                let refined_corners = refine_corners_erf(
1509                                    arena,
1510                                    img,
1511                                    &current_corners,
1512                                    config.subpixel_refinement_sigma,
1513                                );
1514
1515                                // Verify that refined corners still yield a valid decode
1516                                let mut refined_corners_f32 = [Point2f::default(); 4];
1517                                for j in 0..4 {
1518                                    refined_corners_f32[j] = Point2f {
1519                                        x: refined_corners[j][0] as f32,
1520                                        y: refined_corners[j][1] as f32,
1521                                    };
1522                                }
1523
1524                                // Must recompute homography for refined corners
1525                                let mut ref_h_mat = Matrix3x3 {
1526                                    data: [0.0; 9],
1527                                    padding: [0.0; 7],
1528                                };
1529                                if let Some(h_new) = Homography::square_to_quad(&refined_corners) {
1530                                    for (j, val) in h_new.h.iter().enumerate() {
1531                                        ref_h_mat.data[j] = *val as f32;
1532                                    }
1533                                } else {
1534                                    // Degenerate refinement, reject
1535                                    continue;
1536                                }
1537
1538                                if let Some(code_ref) =
1539                                    sample_grid_soa_precomputed::<S>(img, &roi, &ref_h_mat, decoder)
1540                                {
1541                                    if let Some((id_ref, hamming_ref, _)) =
1542                                        S::decode(&code_ref, decoder, 255)
1543                                    {
1544                                        // Only keep if it's the same tag and hamming is not worse
1545                                        if id_ref == id && hamming_ref <= hamming {
1546                                            best_h = hamming_ref;
1547                                            best_code = Some(code_ref);
1548                                            // Update the actual corners in the batch!
1549                                            if let Some(code_inner) = &best_code {
1550                                                return (
1551                                                    CandidateState::Valid,
1552                                                    best_id,
1553                                                    best_rot,
1554                                                    S::to_debug_bits(code_inner),
1555                                                    best_h as f32,
1556                                                    Some(refined_corners_f32),
1557                                                );
1558                                            }
1559                                        }
1560                                    }
1561                                }
1562                            }
1563
1564                            return (
1565                                CandidateState::Valid,
1566                                best_id,
1567                                best_rot,
1568                                S::to_debug_bits(&code),
1569                                hamming as f32,
1570                                None,
1571                            );
1572                        }
1573
1574                        if best_h == 0 {
1575                            break;
1576                        }
1577                    }
1578
1579                    // Stage 2: Configurable Corner Refinement (Recovery for near-misses)
1580                    let max_h_for_refine = if decoders.iter().any(|d| d.name() == "36h11") {
1581                        10
1582                    } else {
1583                        4
1584                    };
1585
1586                    if best_h > config.max_hamming_error
1587                        && best_h <= max_h_for_refine
1588                        && best_overall_code.is_some()
1589                    {
1590                        match config.refinement_mode {
1591                            crate::config::CornerRefinementMode::None
1592                            | crate::config::CornerRefinementMode::GridFit
1593                            | crate::config::CornerRefinementMode::Gwlf => {
1594                                // GridFit/Gwlf not ported to SoA yet or not implemented.
1595                            }
1596                            crate::config::CornerRefinementMode::Edge
1597                            | crate::config::CornerRefinementMode::Erf => {
1598                                let nudge = 0.2;
1599                                let mut current_corners = [Point2f::default(); 4];
1600                                current_corners.copy_from_slice(corners);
1601
1602                                for _pass in 0..2 {
1603                                    let mut pass_improved = false;
1604                                    for c_idx in 0..4 {
1605                                        for (dx, dy) in [
1606                                            (nudge, 0.0),
1607                                            (-nudge, 0.0),
1608                                            (0.0, nudge),
1609                                            (0.0, -nudge),
1610                                        ] {
1611                                            let mut test_corners = current_corners;
1612                                            test_corners[c_idx].x += dx;
1613                                            test_corners[c_idx].y += dy;
1614
1615                                            // Must recompute homography for the nudged corners
1616                                            let dst = [
1617                                                [
1618                                                    f64::from(test_corners[0].x),
1619                                                    f64::from(test_corners[0].y),
1620                                                ],
1621                                                [
1622                                                    f64::from(test_corners[1].x),
1623                                                    f64::from(test_corners[1].y),
1624                                                ],
1625                                                [
1626                                                    f64::from(test_corners[2].x),
1627                                                    f64::from(test_corners[2].y),
1628                                                ],
1629                                                [
1630                                                    f64::from(test_corners[3].x),
1631                                                    f64::from(test_corners[3].y),
1632                                                ],
1633                                            ];
1634
1635                                            if let Some(h_new) = Homography::square_to_quad(&dst) {
1636                                                let mut h_mat = Matrix3x3 {
1637                                                    data: [0.0; 9],
1638                                                    padding: [0.0; 7],
1639                                                };
1640                                                for (j, val) in h_new.h.iter().enumerate() {
1641                                                    h_mat.data[j] = *val as f32;
1642                                                }
1643
1644                                                for decoder in decoders {
1645                                                    if let Some(code) =
1646                                                        sample_grid_soa_precomputed::<S>(
1647                                                            img,
1648                                                            &roi,
1649                                                            &h_mat,
1650                                                            decoder.as_ref(),
1651                                                        )
1652                                                    {
1653                                                        if let Some((id, hamming, rot)) =
1654                                                            S::decode(&code, decoder.as_ref(), 255)
1655                                                        {
1656                                                            if hamming < best_h {
1657                                                                best_h = hamming;
1658                                                                best_overall_code =
1659                                                                    Some(code.clone());
1660                                                                current_corners = test_corners;
1661                                                                pass_improved = true;
1662
1663                                                                if hamming
1664                                                                    <= config.max_hamming_error
1665                                                                {
1666                                                                    best_id = id;
1667                                                                    best_rot = rot;
1668                                                                    best_code = Some(code.clone());
1669
1670                                                                    return (
1671                                                                        CandidateState::Valid,
1672                                                                        best_id,
1673                                                                        best_rot,
1674                                                                        S::to_debug_bits(&code),
1675                                                                        best_h as f32,
1676                                                                        Some(current_corners),
1677                                                                    );
1678                                                                }
1679                                                            }
1680                                                        }
1681                                                    }
1682                                                }
1683                                            }
1684                                        }
1685                                    }
1686                                    if !pass_improved {
1687                                        break;
1688                                    }
1689                                }
1690                            }
1691                        }
1692                    }
1693
1694                    if let Some(code) = best_code {
1695                        (
1696                            CandidateState::Valid,
1697                            best_id,
1698                            best_rot,
1699                            S::to_debug_bits(&code),
1700                            best_h as f32,
1701                            None,
1702                        )
1703                    } else {
1704                        // Even on failure, return the best hamming distance found for debugging.
1705                        // If no code was sampled at all, best_h will be u32::MAX.
1706                        (
1707                            CandidateState::FailedDecode,
1708                            0,
1709                            0,
1710                            0,
1711                            if best_h == u32::MAX { 0.0 } else { best_h as f32 },
1712                            None,
1713                        )
1714                    }
1715                })
1716        })
1717        .collect();
1718
1719    for (i, (state, id, rot, payload, error_rate, refined_corners)) in
1720        results.into_iter().enumerate()
1721    {
1722        batch.status_mask[i] = state;
1723        batch.ids[i] = id;
1724        batch.payloads[i] = payload;
1725        batch.error_rates[i] = error_rate;
1726
1727        if let Some(refined) = refined_corners {
1728            for (j, corner) in refined.iter().enumerate() {
1729                batch.corners[i][j] = *corner;
1730            }
1731        }
1732
1733        if state == CandidateState::Valid && rot > 0 {
1734            // Reorder corners based on rotation
1735            let mut temp_corners = [Point2f::default(); 4];
1736            for (j, item) in temp_corners.iter_mut().enumerate() {
1737                let src_idx = (j + usize::from(rot)) % 4;
1738                *item = batch.corners[i][src_idx];
1739            }
1740            for (j, item) in temp_corners.iter().enumerate() {
1741                batch.corners[i][j] = *item;
1742            }
1743        }
1744    }
1745}
1746
1747/// A trait for decoding binary payloads from extracted tags.
1748pub trait TagDecoder: Send + Sync {
1749    /// Returns the name of the decoder family (e.g., "AprilTag36h11").
1750    fn name(&self) -> &str;
1751    /// Returns the dimension of the tag grid (e.g., 6 for 36h11).
1752    fn dimension(&self) -> usize;
1753    /// Returns the active number of bits in the tag (e.g., 41 for 41h12).
1754    fn bit_count(&self) -> usize;
1755    /// Returns the ideal sample points in canonical coordinates [-1, 1].
1756    fn sample_points(&self) -> &[(f64, f64)];
1757    /// Decodes the extracted bits into a tag ID, hamming distance, and rotation count.
1758    ///
1759    /// Returns `Some((id, hamming, rotation))` if decoding is successful, `None` otherwise.
1760    /// `rotation` is 0-3, representing 90-degree CW increments.
1761    fn decode(&self, bits: u64) -> Option<(u32, u32, u8)>; // (id, hamming, rotation)
1762    /// Decodes with custom maximum hamming distance.
1763    fn decode_full(&self, bits: u64, max_hamming: u32) -> Option<(u32, u32, u8)>;
1764    /// Get the original code for a given ID (useful for testing/simulation).
1765    fn get_code(&self, id: u16) -> Option<u64>;
1766    /// Returns the total number of codes in the dictionary.
1767    fn num_codes(&self) -> usize;
1768    /// Returns all rotated versions of all codes in the dictionary: (bits, id, rotation)
1769    fn rotated_codes(&self) -> &[(u64, u16, u8)];
1770    /// Executes a callback for each candidate in the dictionary within a given Hamming distance.
1771    /// This uses Multi-Index Hashing if available for efficiency.
1772    fn for_each_candidate_within_hamming(
1773        &self,
1774        bits: u64,
1775        max_hamming: u32,
1776        callback: &mut dyn FnMut(u64, u16, u8),
1777    );
1778}
1779
1780/// Decoder for the AprilTag 36h11 family.
1781pub struct AprilTag36h11;
1782
1783impl TagDecoder for AprilTag36h11 {
1784    fn name(&self) -> &'static str {
1785        "36h11"
1786    }
1787    fn dimension(&self) -> usize {
1788        6
1789    } // 6x6 grid of bits (excluding border)
1790    fn bit_count(&self) -> usize {
1791        36
1792    }
1793
1794    fn sample_points(&self) -> &[(f64, f64)] {
1795        crate::dictionaries::POINTS_APRILTAG36H11
1796    }
1797
1798    fn decode(&self, bits: u64) -> Option<(u32, u32, u8)> {
1799        // Use the pre-calculated dictionary with O(1) exact match + cached rotations.
1800        crate::dictionaries::get_dictionary(crate::config::TagFamily::AprilTag36h11)
1801            .decode(bits, 4) // Allow up to 4 bit errors for maximum recall
1802            .map(|(id, hamming, rot)| (u32::from(id), hamming, rot))
1803    }
1804
1805    fn decode_full(&self, bits: u64, max_hamming: u32) -> Option<(u32, u32, u8)> {
1806        crate::dictionaries::get_dictionary(crate::config::TagFamily::AprilTag36h11)
1807            .decode(bits, max_hamming)
1808            .map(|(id, hamming, rot)| (u32::from(id), hamming, rot))
1809    }
1810
1811    fn get_code(&self, id: u16) -> Option<u64> {
1812        crate::dictionaries::get_dictionary(crate::config::TagFamily::AprilTag36h11).get_code(id)
1813    }
1814
1815    fn num_codes(&self) -> usize {
1816        crate::dictionaries::get_dictionary(crate::config::TagFamily::AprilTag36h11).len()
1817    }
1818
1819    fn rotated_codes(&self) -> &[(u64, u16, u8)] {
1820        &[] // Removed from runtime struct, only used by testing/simulation which we will adjust later.
1821    }
1822    fn for_each_candidate_within_hamming(
1823        &self,
1824        bits: u64,
1825        max_hamming: u32,
1826        callback: &mut dyn FnMut(u64, u16, u8),
1827    ) {
1828        crate::dictionaries::get_dictionary(crate::config::TagFamily::AprilTag36h11)
1829            .for_each_candidate_within_hamming(bits, max_hamming, callback);
1830    }
1831}
1832
1833/// Decoder for the AprilTag 16h5 family.
1834pub struct AprilTag16h5;
1835
1836impl TagDecoder for AprilTag16h5 {
1837    fn name(&self) -> &'static str {
1838        "16h5"
1839    }
1840    fn dimension(&self) -> usize {
1841        4
1842    }
1843    fn bit_count(&self) -> usize {
1844        16
1845    }
1846    fn sample_points(&self) -> &[(f64, f64)] {
1847        crate::dictionaries::POINTS_APRILTAG16H5
1848    }
1849    fn decode(&self, bits: u64) -> Option<(u32, u32, u8)> {
1850        crate::dictionaries::get_dictionary(crate::config::TagFamily::AprilTag16h5)
1851            .decode(bits, 1)
1852            .map(|(id, hamming, rot)| (u32::from(id), hamming, rot))
1853    }
1854    fn decode_full(&self, bits: u64, max_hamming: u32) -> Option<(u32, u32, u8)> {
1855        crate::dictionaries::get_dictionary(crate::config::TagFamily::AprilTag16h5)
1856            .decode(bits, max_hamming)
1857            .map(|(id, hamming, rot)| (u32::from(id), hamming, rot))
1858    }
1859    fn get_code(&self, id: u16) -> Option<u64> {
1860        crate::dictionaries::get_dictionary(crate::config::TagFamily::AprilTag16h5).get_code(id)
1861    }
1862    fn num_codes(&self) -> usize {
1863        crate::dictionaries::get_dictionary(crate::config::TagFamily::AprilTag16h5).len()
1864    }
1865    fn rotated_codes(&self) -> &[(u64, u16, u8)] {
1866        &[]
1867    }
1868    fn for_each_candidate_within_hamming(
1869        &self,
1870        bits: u64,
1871        max_hamming: u32,
1872        callback: &mut dyn FnMut(u64, u16, u8),
1873    ) {
1874        crate::dictionaries::get_dictionary(crate::config::TagFamily::AprilTag16h5)
1875            .for_each_candidate_within_hamming(bits, max_hamming, callback);
1876    }
1877}
1878
1879/// Decoder for the ArUco 4x4_50 family.
1880pub struct ArUco4x4_50;
1881
1882impl TagDecoder for ArUco4x4_50 {
1883    fn name(&self) -> &'static str {
1884        "4X4_50"
1885    }
1886    fn dimension(&self) -> usize {
1887        4
1888    }
1889    fn bit_count(&self) -> usize {
1890        16
1891    }
1892
1893    fn sample_points(&self) -> &[(f64, f64)] {
1894        crate::dictionaries::POINTS_ARUCO4X4_50
1895    }
1896
1897    fn decode(&self, bits: u64) -> Option<(u32, u32, u8)> {
1898        crate::dictionaries::get_dictionary(crate::config::TagFamily::ArUco4x4_50)
1899            .decode(bits, 2)
1900            .map(|(id, hamming, rot)| (u32::from(id), hamming, rot))
1901    }
1902
1903    fn decode_full(&self, bits: u64, max_hamming: u32) -> Option<(u32, u32, u8)> {
1904        crate::dictionaries::get_dictionary(crate::config::TagFamily::ArUco4x4_50)
1905            .decode(bits, max_hamming)
1906            .map(|(id, hamming, rot)| (u32::from(id), hamming, rot))
1907    }
1908
1909    fn get_code(&self, id: u16) -> Option<u64> {
1910        crate::dictionaries::get_dictionary(crate::config::TagFamily::ArUco4x4_50).get_code(id)
1911    }
1912
1913    fn num_codes(&self) -> usize {
1914        crate::dictionaries::get_dictionary(crate::config::TagFamily::ArUco4x4_50).len()
1915    }
1916
1917    fn rotated_codes(&self) -> &[(u64, u16, u8)] {
1918        &[]
1919    }
1920    fn for_each_candidate_within_hamming(
1921        &self,
1922        bits: u64,
1923        max_hamming: u32,
1924        callback: &mut dyn FnMut(u64, u16, u8),
1925    ) {
1926        crate::dictionaries::get_dictionary(crate::config::TagFamily::ArUco4x4_50)
1927            .for_each_candidate_within_hamming(bits, max_hamming, callback);
1928    }
1929}
1930
1931/// Decoder for the ArUco 4x4_100 family.
1932pub struct ArUco4x4_100;
1933
1934impl TagDecoder for ArUco4x4_100 {
1935    fn name(&self) -> &'static str {
1936        "4X4_100"
1937    }
1938    fn dimension(&self) -> usize {
1939        4
1940    }
1941    fn bit_count(&self) -> usize {
1942        16
1943    }
1944
1945    fn sample_points(&self) -> &[(f64, f64)] {
1946        crate::dictionaries::POINTS_ARUCO4X4_100
1947    }
1948
1949    fn decode(&self, bits: u64) -> Option<(u32, u32, u8)> {
1950        crate::dictionaries::get_dictionary(crate::config::TagFamily::ArUco4x4_100)
1951            .decode(bits, 2)
1952            .map(|(id, hamming, rot)| (u32::from(id), hamming, rot))
1953    }
1954
1955    fn decode_full(&self, bits: u64, max_hamming: u32) -> Option<(u32, u32, u8)> {
1956        crate::dictionaries::get_dictionary(crate::config::TagFamily::ArUco4x4_100)
1957            .decode(bits, max_hamming)
1958            .map(|(id, hamming, rot)| (u32::from(id), hamming, rot))
1959    }
1960
1961    fn get_code(&self, id: u16) -> Option<u64> {
1962        crate::dictionaries::get_dictionary(crate::config::TagFamily::ArUco4x4_100).get_code(id)
1963    }
1964
1965    fn num_codes(&self) -> usize {
1966        crate::dictionaries::get_dictionary(crate::config::TagFamily::ArUco4x4_100).len()
1967    }
1968
1969    fn rotated_codes(&self) -> &[(u64, u16, u8)] {
1970        &[]
1971    }
1972    fn for_each_candidate_within_hamming(
1973        &self,
1974        bits: u64,
1975        max_hamming: u32,
1976        callback: &mut dyn FnMut(u64, u16, u8),
1977    ) {
1978        crate::dictionaries::get_dictionary(crate::config::TagFamily::ArUco4x4_100)
1979            .for_each_candidate_within_hamming(bits, max_hamming, callback);
1980    }
1981}
1982
1983/// Convert a TagFamily enum to a boxed decoder instance.
1984#[must_use]
1985pub fn family_to_decoder(family: config::TagFamily) -> Box<dyn TagDecoder + Send + Sync> {
1986    match family {
1987        config::TagFamily::AprilTag16h5 => Box::new(AprilTag16h5),
1988        config::TagFamily::AprilTag36h11 => Box::new(AprilTag36h11),
1989        config::TagFamily::ArUco4x4_50 => Box::new(ArUco4x4_50),
1990        config::TagFamily::ArUco4x4_100 => Box::new(ArUco4x4_100),
1991    }
1992}
1993
1994#[cfg(test)]
1995#[allow(clippy::unwrap_used)]
1996mod tests {
1997    use super::*;
1998    use proptest::prelude::*;
1999
2000    proptest! {
2001        #[test]
2002        fn test_rotation_invariants(bits in 0..u64::MAX) {
2003            let dim = 6;
2004            let r1 = rotate90(bits, dim);
2005            let r2 = rotate90(r1, dim);
2006            let r3 = rotate90(r2, dim);
2007            let r4 = rotate90(r3, dim);
2008
2009            // Mask to dim*dim bits to avoid noise in upper bits
2010            let mask = (1u64 << (dim * dim)) - 1;
2011            prop_assert_eq!(bits & mask, r4 & mask);
2012        }
2013
2014        #[test]
2015        fn test_hamming_robustness(
2016            id_idx in 0usize..10,
2017            rotation in 0..4usize,
2018            flip1 in 0..36usize,
2019            flip2 in 0..36usize
2020        ) {
2021            let decoder = AprilTag36h11;
2022            let orig_id = id_idx as u16;
2023            let dict = crate::dictionaries::get_dictionary(config::TagFamily::AprilTag36h11);
2024
2025            // Get the correctly geometrically rotated code directly from our generated dictionaries
2026            let mut test_bits = dict.codes[(id_idx * 4) + rotation];
2027
2028            // Flip bits
2029            test_bits ^= 1 << flip1;
2030            test_bits ^= 1 << flip2;
2031
2032            let result = decoder.decode(test_bits);
2033            prop_assert!(result.is_some());
2034            let (decoded_id, _, _) = result.expect("Should decode valid pattern");
2035            prop_assert_eq!(decoded_id, u32::from(orig_id));
2036        }
2037
2038        #[test]
2039        fn test_false_positive_resistance(bits in 0..u64::MAX) {
2040            let decoder = AprilTag36h11;
2041            // Random bitstreams should rarely match any of the 587 codes
2042            if let Some((_id, hamming, _rot)) = decoder.decode(bits) {
2043                // If it decodes, it must have low hamming distance
2044                prop_assert!(hamming <= 4);
2045            }
2046        }
2047
2048        #[test]
2049        fn prop_homography_projection(
2050            src in prop::collection::vec((-100.0..100.0, -100.0..100.0), 4),
2051            dst in prop::collection::vec((0.0..1000.0, 0.0..1000.0), 4)
2052        ) {
2053            let src_pts = [
2054                [src[0].0, src[0].1],
2055                [src[1].0, src[1].1],
2056                [src[2].0, src[2].1],
2057                [src[3].0, src[3].1],
2058            ];
2059            let dst_pts = [
2060                [dst[0].0, dst[0].1],
2061                [dst[1].0, dst[1].1],
2062                [dst[2].0, dst[2].1],
2063                [dst[3].0, dst[3].1],
2064            ];
2065
2066            if let Some(h) = Homography::from_pairs(&src_pts, &dst_pts) {
2067                for i in 0..4 {
2068                    let p = h.project(src_pts[i]);
2069                    // Check for reasonable accuracy. 1e-4 is conservative for float precision
2070                    // issues in near-singular cases where from_pairs still returns Some.
2071                    prop_assert!((p[0] - dst_pts[i][0]).abs() < 1e-3,
2072                        "Point {}: project({:?}) -> {:?}, expected {:?}", i, src_pts[i], p, dst_pts[i]);
2073                    prop_assert!((p[1] - dst_pts[i][1]).abs() < 1e-3);
2074                }
2075            }
2076        }
2077    }
2078
2079    #[test]
2080    fn test_all_codes_decode() {
2081        let decoder = AprilTag36h11;
2082        for id in 0..587u16 {
2083            let code = crate::dictionaries::DICT_APRILTAG36H11
2084                .get_code(id)
2085                .expect("valid ID");
2086            let result = decoder.decode(code);
2087            assert!(result.is_some());
2088            let (id_out, _, _) = result.unwrap();
2089            assert_eq!(id_out, u32::from(id));
2090        }
2091    }
2092    #[test]
2093    fn test_grid_sampling() {
2094        let width = 64;
2095        let height = 64;
2096        let mut data = vec![0u8; width * height];
2097        // 8x8 grid, 36x36px tag centered at 32,32 => corners [14, 50]
2098        // TL=(14,14), TR=(50,14), BR=(50,50), BL=(14,50)
2099
2100        // Border:
2101        for gy in 0..8 {
2102            for gx in 0..8 {
2103                if gx == 0 || gx == 7 || gy == 0 || gy == 7 {
2104                    for y in 0..4 {
2105                        for x in 0..4 {
2106                            let px = 14 + (f64::from(gx) * 4.5) as usize + x;
2107                            let py = 14 + (f64::from(gy) * 4.5) as usize + y;
2108                            if px < 64 && py < 64 {
2109                                data[py * width + px] = 0;
2110                            }
2111                        }
2112                    }
2113                }
2114            }
2115        }
2116        // Bit 0 (cell 1,1) -> White (canonical p = -0.625, -0.625)
2117        for y in 0..4 {
2118            for x in 0..4 {
2119                let px = 14 + (1.0 * 4.5) as usize + x;
2120                let py = 14 + (1.0 * 4.5) as usize + y;
2121                data[py * width + px] = 255;
2122            }
2123        }
2124        // Bit 35 (cell 6,6) -> Black (canonical p = 0.625, 0.625)
2125        for y in 0..4 {
2126            for x in 0..4 {
2127                let px = 14 + (6.0 * 4.5) as usize + x;
2128                let py = 14 + (6.0 * 4.5) as usize + y;
2129                data[py * width + px] = 0;
2130            }
2131        }
2132
2133        let img = crate::image::ImageView::new(&data, width, height, width).unwrap();
2134
2135        let decoder = AprilTag36h11;
2136        let arena = Bump::new();
2137        let cand = crate::Detection {
2138            corners: [[14.0, 14.0], [50.0, 14.0], [50.0, 50.0], [14.0, 50.0]],
2139            ..Default::default()
2140        };
2141        let bits =
2142            sample_grid(&img, &arena, &cand, &decoder, 20.0).expect("Should sample successfully");
2143
2144        // bit 0 should be 1 (high intensity)
2145        assert_eq!(bits & 1, 1, "Bit 0 should be 1");
2146        // bit 35 should be 0 (low intensity)
2147        assert_eq!((bits >> 35) & 1, 0, "Bit 35 should be 0");
2148    }
2149
2150    #[test]
2151    fn test_homography_dlt() {
2152        let src = [[0.0, 0.0], [1.0, 0.0], [1.0, 1.0], [0.0, 1.0]];
2153        let dst = [[10.0, 10.0], [20.0, 11.0], [19.0, 21.0], [9.0, 20.0]];
2154
2155        let h = Homography::from_pairs(&src, &dst).expect("DLT should succeed");
2156        for i in 0..4 {
2157            let p = h.project(src[i]);
2158            assert!((p[0] - dst[i][0]).abs() < 1e-6);
2159            assert!((p[1] - dst[i][1]).abs() < 1e-6);
2160        }
2161    }
2162
2163    // ========================================================================
2164    // END-TO-END DECODER ROBUSTNESS TESTS
2165    // ========================================================================
2166
2167    use crate::config::TagFamily;
2168    use crate::image::ImageView;
2169    use crate::quad::extract_quads_fast;
2170    use crate::segmentation::label_components_with_stats;
2171    use crate::test_utils::{TestImageParams, generate_test_image_with_params};
2172    use crate::threshold::ThresholdEngine;
2173    use bumpalo::Bump;
2174
2175    /// Run full pipeline from image to decoded tags.
2176    fn run_full_pipeline(tag_size: usize, canvas_size: usize, tag_id: u16) -> Vec<(u32, u32)> {
2177        let params = TestImageParams {
2178            family: TagFamily::AprilTag36h11,
2179            id: tag_id,
2180            tag_size,
2181            canvas_size,
2182            ..Default::default()
2183        };
2184
2185        let (data, _corners) = generate_test_image_with_params(&params);
2186        let img = ImageView::new(&data, canvas_size, canvas_size, canvas_size).unwrap();
2187
2188        let arena = Bump::new();
2189        let engine = ThresholdEngine::new();
2190        let stats = engine.compute_tile_stats(&arena, &img);
2191        let mut binary = vec![0u8; canvas_size * canvas_size];
2192        engine.apply_threshold(&arena, &img, &stats, &mut binary);
2193        let label_result =
2194            label_components_with_stats(&arena, &binary, canvas_size, canvas_size, true);
2195        let detections = extract_quads_fast(&arena, &img, &label_result);
2196
2197        let decoder = AprilTag36h11;
2198        let mut results = Vec::new();
2199
2200        for quad in &detections {
2201            if let Some(bits) = sample_grid(&img, &arena, quad, &decoder, 20.0)
2202                && let Some((id, hamming, _rot)) = decoder.decode(bits)
2203            {
2204                results.push((id, hamming));
2205            }
2206        }
2207
2208        results
2209    }
2210
2211    /// Test E2E pipeline decodes correctly at varying sizes.
2212    #[test]
2213    fn test_e2e_decoding_at_varying_sizes() {
2214        let canvas_size = 640;
2215        let tag_sizes = [64, 100, 150, 200, 300];
2216        let test_id: u16 = 42;
2217
2218        for tag_size in tag_sizes {
2219            let decoded = run_full_pipeline(tag_size, canvas_size, test_id);
2220            let found = decoded.iter().any(|(id, _)| *id == u32::from(test_id));
2221
2222            if tag_size >= 64 {
2223                assert!(found, "Tag size {tag_size}: ID {test_id} not found");
2224            }
2225
2226            if found {
2227                let (_, hamming) = decoded
2228                    .iter()
2229                    .find(|(id, _)| *id == u32::from(test_id))
2230                    .unwrap();
2231                println!("Tag size {tag_size:>3}px: ID {test_id} with hamming {hamming}");
2232            }
2233        }
2234    }
2235
2236    /// Test that multiple tag IDs decode correctly.
2237    #[test]
2238    fn test_e2e_multiple_ids() {
2239        let canvas_size = 400;
2240        let tag_size = 150;
2241        let test_ids: [u16; 5] = [0, 42, 100, 200, 500];
2242
2243        for &test_id in &test_ids {
2244            let decoded = run_full_pipeline(tag_size, canvas_size, test_id);
2245            let found = decoded.iter().any(|(id, _)| *id == u32::from(test_id));
2246            assert!(found, "ID {test_id} not decoded");
2247
2248            let (_, hamming) = decoded
2249                .iter()
2250                .find(|(id, _)| *id == u32::from(test_id))
2251                .unwrap();
2252            assert_eq!(*hamming, 0, "ID {test_id} should have 0 hamming");
2253            println!("ID {test_id:>3}: Decoded with hamming {hamming}");
2254        }
2255    }
2256
2257    /// Test decoding with edge ID values.
2258    #[test]
2259    fn test_e2e_edge_ids() {
2260        let canvas_size = 400;
2261        let tag_size = 150;
2262        let edge_ids: [u16; 2] = [0, 586];
2263
2264        for &test_id in &edge_ids {
2265            let decoded = run_full_pipeline(tag_size, canvas_size, test_id);
2266            let found = decoded.iter().any(|(id, _)| *id == u32::from(test_id));
2267            assert!(found, "Edge ID {test_id} not decoded");
2268            println!("Edge ID {test_id}: Decoded");
2269        }
2270    }
2271}