Skip to main content

locus_core/
quad.rs

1//! Quad extraction and geometric primitive fitting.
2//!
3//! This module implements the middle stage of the detection pipeline:
4//! 1. **Contour Tracing**: Extracting the boundary of connected components.
5//! 2. **Simplification**: Using Douglas-Peucker to reduce complex contours to polygons.
6//! 3. **Quad Fitting**: Heuristics to reduce polygons to quadrilaterals and verify convexity.
7//! 4. **Sub-pixel Refinement**: Intensity-based edge localization for maximum precision.
8
9#![allow(clippy::cast_possible_wrap)]
10#![allow(clippy::cast_sign_loss)]
11#![allow(clippy::similar_names)]
12#![allow(unsafe_code)]
13
14use crate::Detection;
15use crate::batch::{CandidateState, DetectionBatch, MAX_CANDIDATES, Point2f};
16use crate::config::DetectorConfig;
17use crate::image::ImageView;
18use crate::segmentation::LabelResult;
19use bumpalo::Bump;
20use bumpalo::collections::Vec as BumpVec;
21use multiversion::multiversion;
22use std::cell::RefCell;
23
24thread_local! {
25    /// Reusable per-thread arena for quad extraction, avoiding a `Bump::new()` per candidate.
26    static QUAD_ARENA: RefCell<Bump> = RefCell::new(Bump::with_capacity(8 * 1024));
27}
28
29/// Approximate error function (erf) using the Abramowitz and Stegun approximation.
30pub(crate) fn erf_approx(x: f64) -> f64 {
31    if x == 0.0 {
32        return 0.0;
33    }
34    let sign = if x < 0.0 { -1.0 } else { 1.0 };
35    let x = x.abs();
36
37    // Constants for approximation
38    let a1 = 0.254_829_592;
39    let a2 = -0.284_496_736;
40    let a3 = 1.421_413_741;
41    let a4 = -1.453_152_027;
42    let a5 = 1.061_405_429;
43    let p = 0.327_591_1;
44
45    let t = 1.0 / (1.0 + p * x);
46    let y = 1.0 - (((((a5 * t + a4) * t) + a3) * t + a2) * t + a1) * t * (-x * x).exp();
47
48    sign * y
49}
50
51/// A 2D point with subpixel precision.
52#[derive(Clone, Copy, Debug)]
53pub struct Point {
54    /// X coordinate.
55    pub x: f64,
56    /// Y coordinate.
57    pub y: f64,
58}
59
60/// Fast quad extraction using bounding box stats from CCL.
61/// Only traces contours for components that pass geometric filters.
62/// Uses default configuration.
63#[allow(dead_code)]
64pub(crate) fn extract_quads_fast(
65    arena: &Bump,
66    img: &ImageView,
67    label_result: &LabelResult,
68) -> Vec<Detection> {
69    extract_quads_with_config(arena, img, label_result, &DetectorConfig::default(), 1, img)
70}
71
72/// Quad extraction with Structure of Arrays (SoA) output.
73///
74/// This function populates the `corners` and `status_mask` fields of the provided `DetectionBatch`.
75/// It returns the total number of candidates found ($N$).
76#[allow(clippy::too_many_lines)]
77#[tracing::instrument(skip_all, name = "pipeline::quad_extraction")]
78pub fn extract_quads_soa(
79    batch: &mut DetectionBatch,
80    img: &ImageView,
81    label_result: &LabelResult,
82    config: &DetectorConfig,
83    decimation: usize,
84    refinement_img: &ImageView,
85    debug_telemetry: bool,
86) -> (usize, Option<Vec<[Point; 4]>>) {
87    use rayon::prelude::*;
88
89    let stats = &label_result.component_stats;
90
91    // Collect into a temporary Vec to handle the MAX_CANDIDATES limit and sequential writing.
92    // In a future optimization, we could use an atomic counter to write directly into the batch.
93    let detections: Vec<([Point; 4], [Point; 4])> = stats
94        .par_iter()
95        .enumerate()
96        .filter_map(|(label_idx, stat)| {
97            QUAD_ARENA.with(|cell| {
98                let mut arena = cell.borrow_mut();
99                arena.reset();
100                let label = (label_idx + 1) as u32;
101                extract_single_quad(
102                    &arena,
103                    img,
104                    label_result.labels,
105                    label,
106                    stat,
107                    config,
108                    decimation,
109                    refinement_img,
110                )
111            })
112        })
113        .collect();
114
115    let n = detections.len().min(MAX_CANDIDATES);
116    let mut unrefined = if debug_telemetry {
117        Some(Vec::with_capacity(n))
118    } else {
119        None
120    };
121
122    for (i, (corners, unrefined_pts)) in detections.into_iter().take(n).enumerate() {
123        for (j, corner) in corners.iter().enumerate() {
124            batch.corners[i][j] = Point2f {
125                x: corner.x as f32,
126                y: corner.y as f32,
127            };
128        }
129        if let Some(ref mut u) = unrefined {
130            u.push(unrefined_pts);
131        }
132        batch.status_mask[i] = CandidateState::Active;
133    }
134
135    // Ensure the rest of the batch is marked as Empty
136    for i in n..MAX_CANDIDATES {
137        batch.status_mask[i] = CandidateState::Empty;
138    }
139
140    (n, unrefined)
141}
142
143/// Internal helper to extract a single quad from a component.
144#[inline]
145#[allow(clippy::too_many_arguments, clippy::too_many_lines)]
146fn extract_single_quad(
147    arena: &Bump,
148    img: &ImageView,
149    labels: &[u32],
150    label: u32,
151    stat: &crate::segmentation::ComponentStats,
152    config: &DetectorConfig,
153    decimation: usize,
154    refinement_img: &ImageView,
155) -> Option<([Point; 4], [Point; 4])> {
156    let min_edge_len_sq = config.quad_min_edge_length * config.quad_min_edge_length;
157    let d = decimation as f64;
158
159    // Fast geometric filtering using bounding box
160    let bbox_w = u32::from(stat.max_x - stat.min_x) + 1;
161    let bbox_h = u32::from(stat.max_y - stat.min_y) + 1;
162    let bbox_area = bbox_w * bbox_h;
163
164    // Filter: too small or too large
165    if bbox_area < config.quad_min_area || bbox_area > (img.width * img.height * 9 / 10) as u32 {
166        return None;
167    }
168
169    // Filter: not roughly square (aspect ratio)
170    let aspect = bbox_w.max(bbox_h) as f32 / bbox_w.min(bbox_h).max(1) as f32;
171    if aspect > config.quad_max_aspect_ratio {
172        return None;
173    }
174
175    // Filter: fill ratio (should be ~50-80% for a tag with inner pattern)
176    let fill = stat.pixel_count as f32 / bbox_area as f32;
177    if fill < config.quad_min_fill_ratio || fill > config.quad_max_fill_ratio {
178        return None;
179    }
180
181    // Passed filters - trace boundary and fit quad
182    let sx = stat.first_pixel_x as usize;
183    let sy = stat.first_pixel_y as usize;
184
185    let contour = trace_boundary(arena, labels, img.width, img.height, sx, sy, label);
186
187    if contour.len() < 12 {
188        return None;
189    }
190
191    let simple_contour = chain_approximation(arena, &contour);
192    let perimeter = contour.len() as f64;
193    let epsilon = (perimeter * 0.02).max(1.0);
194    let simplified = douglas_peucker(arena, &simple_contour, epsilon);
195
196    if simplified.len() < 4 || simplified.len() > 11 {
197        return None;
198    }
199
200    let simpl_len = simplified.len();
201    let reduced = if simpl_len == 5 {
202        simplified
203    } else if simpl_len == 4 {
204        let mut closed = BumpVec::new_in(arena);
205        for p in &simplified {
206            closed.push(*p);
207        }
208        closed.push(simplified[0]);
209        closed
210    } else {
211        reduce_to_quad(arena, &simplified)
212    };
213
214    if reduced.len() != 5 {
215        return None;
216    }
217
218    let area = polygon_area(&reduced);
219    let compactness = (12.566 * area.abs()) / (perimeter * perimeter);
220
221    if area.abs() <= f64::from(config.quad_min_area) || compactness <= 0.1 {
222        return None;
223    }
224
225    // Standardize to CW for consistency
226    let quad_pts_dec = if area > 0.0 {
227        [reduced[0], reduced[1], reduced[2], reduced[3]]
228    } else {
229        [reduced[0], reduced[3], reduced[2], reduced[1]]
230    };
231
232    // Scale to full resolution using correct coordinate mapping.
233    // A point at (x, y) in decimated coordinates maps to (x*d, y*d) in full-res.
234    let quad_pts = [
235        Point {
236            x: quad_pts_dec[0].x * d,
237            y: quad_pts_dec[0].y * d,
238        },
239        Point {
240            x: quad_pts_dec[1].x * d,
241            y: quad_pts_dec[1].y * d,
242        },
243        Point {
244            x: quad_pts_dec[2].x * d,
245            y: quad_pts_dec[2].y * d,
246        },
247        Point {
248            x: quad_pts_dec[3].x * d,
249            y: quad_pts_dec[3].y * d,
250        },
251    ];
252
253    // Expand 0.5px outward from centers to objects boundaries.
254    // This aligns the unrefined quad with integer pixel boundaries.
255    let center_x = (quad_pts[0].x + quad_pts[1].x + quad_pts[2].x + quad_pts[3].x) * 0.25;
256    let center_y = (quad_pts[0].y + quad_pts[1].y + quad_pts[2].y + quad_pts[3].y) * 0.25;
257
258    let mut expanded_pts = [quad_pts[0], quad_pts[1], quad_pts[2], quad_pts[3]];
259    for i in 0..4 {
260        expanded_pts[i].x += 0.5 * (quad_pts[i].x - center_x).signum();
261        expanded_pts[i].y += 0.5 * (quad_pts[i].y - center_y).signum();
262    }
263    let quad_pts = expanded_pts;
264
265    let mut ok = true;
266    for i in 0..4 {
267        let d2 = (quad_pts[i].x - quad_pts[(i + 1) % 4].x).powi(2)
268            + (quad_pts[i].y - quad_pts[(i + 1) % 4].y).powi(2);
269        if d2 < min_edge_len_sq {
270            ok = false;
271            break;
272        }
273    }
274
275    if ok {
276        let use_erf = config.refinement_mode == crate::config::CornerRefinementMode::Erf;
277        let corners = [
278            refine_corner(
279                arena,
280                refinement_img,
281                quad_pts[0],
282                quad_pts[3],
283                quad_pts[1],
284                config.subpixel_refinement_sigma,
285                decimation,
286                use_erf,
287            ),
288            refine_corner(
289                arena,
290                refinement_img,
291                quad_pts[1],
292                quad_pts[0],
293                quad_pts[2],
294                config.subpixel_refinement_sigma,
295                decimation,
296                use_erf,
297            ),
298            refine_corner(
299                arena,
300                refinement_img,
301                quad_pts[2],
302                quad_pts[1],
303                quad_pts[3],
304                config.subpixel_refinement_sigma,
305                decimation,
306                use_erf,
307            ),
308            refine_corner(
309                arena,
310                refinement_img,
311                quad_pts[3],
312                quad_pts[2],
313                quad_pts[0],
314                config.subpixel_refinement_sigma,
315                decimation,
316                use_erf,
317            ),
318        ];
319
320        let edge_score = calculate_edge_score(refinement_img, corners);
321        if edge_score > config.quad_min_edge_score {
322            return Some((corners, quad_pts));
323        }
324    }
325    None
326}
327
328/// Quad extraction with custom configuration.
329///
330/// This is the main entry point for quad detection with custom parameters.
331/// Components are processed in parallel for maximum throughput.
332#[allow(clippy::too_many_lines)]
333pub fn extract_quads_with_config(
334    _arena: &Bump,
335    img: &ImageView,
336    label_result: &LabelResult,
337    config: &DetectorConfig,
338    decimation: usize,
339    refinement_img: &ImageView,
340) -> Vec<Detection> {
341    use rayon::prelude::*;
342
343    let stats = &label_result.component_stats;
344    let d = decimation as f64;
345
346    // Process components in parallel, each with its own thread-local arena
347    stats
348        .par_iter()
349        .enumerate()
350        .filter_map(|(label_idx, stat)| {
351            QUAD_ARENA.with(|cell| {
352                let mut arena = cell.borrow_mut();
353                arena.reset();
354                let label = (label_idx + 1) as u32;
355
356                let quad_result = extract_single_quad(
357                    &arena,
358                    img,
359                    label_result.labels,
360                    label,
361                    stat,
362                    config,
363                    decimation,
364                    refinement_img,
365                );
366
367                let (corners, _unrefined) = quad_result?;
368                // To keep backward compatibility, we still need to calculate some fields
369                // that aren't yet in the SoA (or are derived from it).
370                let area = polygon_area(&corners);
371
372                Some(Detection {
373                    id: label,
374                    center: [
375                        (corners[0].x + corners[1].x + corners[2].x + corners[3].x) / 4.0,
376                        (corners[0].y + corners[1].y + corners[2].y + corners[3].y) / 4.0,
377                    ],
378                    corners: [
379                        [corners[0].x, corners[0].y],
380                        [corners[1].x, corners[1].y],
381                        [corners[2].x, corners[2].y],
382                        [corners[3].x, corners[3].y],
383                    ],
384                    hamming: 0,
385                    rotation: 0,
386                    decision_margin: area * d * d, // Area in full-res
387                    bits: 0,
388                    pose: None,
389                    pose_covariance: None,
390                })
391            })
392        })
393        .collect()
394}
395
396/// Legacy extract_quads for backward compatibility.
397#[allow(dead_code)]
398pub(crate) fn extract_quads(arena: &Bump, img: &ImageView, labels: &[u32]) -> Vec<Detection> {
399    // Create a fake LabelResult with stats computed on-the-fly
400    let mut detections = Vec::new();
401    let num_labels = (labels.len() / 32) + 1;
402    let processed_labels = arena.alloc_slice_fill_copy(num_labels, 0u32);
403
404    let width = img.width;
405    let height = img.height;
406
407    for y in 1..height - 1 {
408        let row_off = y * width;
409        let prev_row_off = (y - 1) * width;
410
411        for x in 1..width - 1 {
412            let idx = row_off + x;
413            let label = labels[idx];
414
415            if label == 0 {
416                continue;
417            }
418
419            if labels[idx - 1] == label || labels[prev_row_off + x] == label {
420                continue;
421            }
422
423            let bit_idx = (label as usize) / 32;
424            let bit_mask = 1 << (label % 32);
425            if processed_labels[bit_idx] & bit_mask != 0 {
426                continue;
427            }
428
429            processed_labels[bit_idx] |= bit_mask;
430            let contour = trace_boundary(arena, labels, width, height, x, y, label);
431
432            if contour.len() >= 12 {
433                // Lowered from 30 to support 8px+ tags
434                let simplified = douglas_peucker(arena, &contour, 4.0);
435                if simplified.len() == 5 {
436                    let area = polygon_area(&simplified);
437                    let perimeter = contour.len() as f64;
438                    let compactness = (12.566 * area) / (perimeter * perimeter);
439
440                    if area > 400.0 && compactness > 0.5 {
441                        let mut ok = true;
442                        for i in 0..4 {
443                            let d2 = (simplified[i].x - simplified[i + 1].x).powi(2)
444                                + (simplified[i].y - simplified[i + 1].y).powi(2);
445                            if d2 < 100.0 {
446                                ok = false;
447                                break;
448                            }
449                        }
450
451                        if ok {
452                            detections.push(Detection {
453                                id: label,
454                                center: polygon_center(&simplified),
455                                corners: [
456                                    [simplified[0].x, simplified[0].y],
457                                    [simplified[1].x, simplified[1].y],
458                                    [simplified[2].x, simplified[2].y],
459                                    [simplified[3].x, simplified[3].y],
460                                ],
461                                hamming: 0,
462                                rotation: 0,
463                                decision_margin: area,
464                                bits: 0,
465                                pose: None,
466                                pose_covariance: None,
467                            });
468                        }
469                    }
470                }
471            }
472        }
473    }
474    detections
475}
476
477#[multiversion(targets(
478    "x86_64+avx2+bmi1+bmi2+popcnt+lzcnt",
479    "x86_64+avx512f+avx512bw+avx512dq+avx512vl",
480    "aarch64+neon"
481))]
482fn find_max_distance_optimized(points: &[Point], start: usize, end: usize) -> (f64, usize) {
483    let a = points[start];
484    let b = points[end];
485    let dx = b.x - a.x;
486    let dy = b.y - a.y;
487    let mag_sq = dx * dx + dy * dy;
488
489    if mag_sq < 1e-18 {
490        let mut dmax = 0.0;
491        let mut index = start;
492        for (i, p) in points.iter().enumerate().take(end).skip(start + 1) {
493            let d = ((p.x - a.x).powi(2) + (p.y - a.y).powi(2)).sqrt();
494            if d > dmax {
495                dmax = d;
496                index = i;
497            }
498        }
499        return (dmax, index);
500    }
501
502    let mut dmax = 0.0;
503    let mut index = start;
504
505    let mut i = start + 1;
506
507    #[cfg(all(target_arch = "x86_64", target_feature = "avx2"))]
508    if let Some(_dispatch) = multiversion::target::x86_64::avx2::get() {
509        unsafe {
510            use std::arch::x86_64::*;
511            let v_dx = _mm256_set1_pd(dx);
512            let v_dy = _mm256_set1_pd(dy);
513            let v_ax = _mm256_set1_pd(a.x);
514            let v_ay = _mm256_set1_pd(a.y);
515            let v_bx = _mm256_set1_pd(b.x);
516            let v_by = _mm256_set1_pd(b.y);
517
518            let mut v_dmax = _mm256_setzero_pd();
519            let mut v_indices = _mm256_setzero_pd(); // We'll store indices as doubles for simplicity
520
521            while i + 4 <= end {
522                // Load 4 points (8 doubles: x0, y0, x1, y1, x2, y2, x3, y3)
523                // Point is struct { x: f64, y: f64 } which is memory-compatible with [f64; 2]
524                let p_ptr = points.as_ptr().add(i) as *const f64;
525
526                // Unpack into xxxx and yyyy
527                // [x0, y0, x1, y1]
528                let raw0 = _mm256_loadu_pd(p_ptr);
529                // [x2, y2, x3, y3]
530                let raw1 = _mm256_loadu_pd(p_ptr.add(4));
531
532                // permute to get [x0, x1, y0, y1]
533                let x01y01 = _mm256_shuffle_pd(raw0, raw0, 0b0000); // Wait, shuffle_pd is tricky
534                // Better: use unpack
535                let x0x1 = _mm256_set_pd(
536                    points[i + 3].x,
537                    points[i + 2].x,
538                    points[i + 1].x,
539                    points[i].x,
540                );
541                let y0y1 = _mm256_set_pd(
542                    points[i + 3].y,
543                    points[i + 2].y,
544                    points[i + 1].y,
545                    points[i].y,
546                );
547
548                // formula: |dy*px - dx*py + bx*ay - by*ax| * inv_mag
549                let term1 = _mm256_mul_pd(v_dy, x0x1);
550                let term2 = _mm256_mul_pd(v_dx, y0y1);
551                let term3 = _mm256_set1_pd(b.x * a.y - b.y * a.x);
552
553                let dist_v = _mm256_sub_pd(term1, term2);
554                let dist_v = _mm256_add_pd(dist_v, term3);
555
556                // Absolute value
557                let mask = _mm256_set1_pd(-0.0);
558                let dist_v = _mm256_andnot_pd(mask, dist_v);
559
560                // Check if any dist > v_dmax
561                let cmp = _mm256_cmp_pd(dist_v, v_dmax, _CMP_GT_OQ);
562                if _mm256_movemask_pd(cmp) != 0 {
563                    // Update dmax and indices - this is a bit slow in SIMD,
564                    // but we only do it when we find a new max.
565                    let dists: [f64; 4] = std::mem::transmute(dist_v);
566                    for (j, &d) in dists.iter().enumerate() {
567                        if d > dmax {
568                            dmax = d;
569                            index = i + j;
570                        }
571                    }
572                    v_dmax = _mm256_set1_pd(dmax);
573                }
574                i += 4;
575            }
576        }
577    }
578
579    // Scalar tail
580    while i < end {
581        let d = perpendicular_distance(points[i], a, b);
582        if d > dmax {
583            dmax = d;
584            index = i;
585        }
586        i += 1;
587    }
588
589    (dmax, index)
590}
591
592/// Simplify a contour using the Douglas-Peucker algorithm.
593///
594/// Leverages an iterative implementation with a manual stack to avoid
595/// the overhead of recursive function calls and multiple temporary allocations.
596pub(crate) fn douglas_peucker<'a>(
597    arena: &'a Bump,
598    points: &[Point],
599    epsilon: f64,
600) -> BumpVec<'a, Point> {
601    if points.len() < 3 {
602        let mut v = BumpVec::new_in(arena);
603        v.extend_from_slice(points);
604        return v;
605    }
606
607    let n = points.len();
608    let mut keep = BumpVec::from_iter_in((0..n).map(|_| false), arena);
609    keep[0] = true;
610    keep[n - 1] = true;
611
612    let mut stack = BumpVec::new_in(arena);
613    stack.push((0, n - 1));
614
615    while let Some((start, end)) = stack.pop() {
616        if end - start < 2 {
617            continue;
618        }
619
620        let (dmax, index) = find_max_distance_optimized(points, start, end);
621
622        if dmax > epsilon {
623            keep[index] = true;
624            stack.push((start, index));
625            stack.push((index, end));
626        }
627    }
628
629    let mut simplified = BumpVec::new_in(arena);
630    for (i, &k) in keep.iter().enumerate() {
631        if k {
632            simplified.push(points[i]);
633        }
634    }
635    simplified
636}
637
638fn perpendicular_distance(p: Point, a: Point, b: Point) -> f64 {
639    let dx = b.x - a.x;
640    let dy = b.y - a.y;
641    let mag = (dx * dx + dy * dy).sqrt();
642    if mag < 1e-9 {
643        return ((p.x - a.x).powi(2) + (p.y - a.y).powi(2)).sqrt();
644    }
645    ((dy * p.x - dx * p.y + b.x * a.y - b.y * a.x).abs()) / mag
646}
647
648fn polygon_area(points: &[Point]) -> f64 {
649    let mut area = 0.0;
650    for i in 0..points.len() - 1 {
651        area += (points[i].x * points[i + 1].y) - (points[i + 1].x * points[i].y);
652    }
653    area * 0.5
654}
655
656#[allow(dead_code)]
657fn polygon_center(points: &[Point]) -> [f64; 2] {
658    let mut cx = 0.0;
659    let mut cy = 0.0;
660    let n = points.len() - 1;
661    for p in points.iter().take(n) {
662        cx += p.x;
663        cy += p.y;
664    }
665    [cx / n as f64, cy / n as f64]
666}
667
668/// Refine corners to sub-pixel accuracy using intensity-based optimization.
669///
670/// Fits lines to the two edges meeting at a corner using PSF-blurred step function
671/// model and Gauss-Newton optimization, then computes their intersection.
672/// Achieves ~0.02px accuracy vs ~0.2px for gradient-peak methods.
673#[must_use]
674#[allow(clippy::too_many_arguments)]
675pub(crate) fn refine_corner(
676    arena: &Bump,
677    img: &ImageView,
678    p: Point,
679    p_prev: Point,
680    p_next: Point,
681    sigma: f64,
682    decimation: usize,
683    use_erf: bool,
684) -> Point {
685    // Intensity-based refinement (ERF fit) is much more accurate but slower.
686    let line1 = if use_erf {
687        refine_edge_intensity(arena, img, p_prev, p, sigma, decimation)
688            .or_else(|| fit_edge_line(img, p_prev, p, decimation))
689    } else {
690        fit_edge_line(img, p_prev, p, decimation)
691    };
692
693    let line2 = if use_erf {
694        refine_edge_intensity(arena, img, p, p_next, sigma, decimation)
695            .or_else(|| fit_edge_line(img, p, p_next, decimation))
696    } else {
697        fit_edge_line(img, p, p_next, decimation)
698    };
699
700    if let (Some(l1), Some(l2)) = (line1, line2) {
701        // Intersect lines: a1*x + b1*y + c1 = 0 and a2*x + b2*y + c2 = 0
702        let det = l1.0 * l2.1 - l2.0 * l1.1;
703        if det.abs() > 1e-6 {
704            let x = (l1.1 * l2.2 - l2.1 * l1.2) / det;
705            let y = (l2.0 * l1.2 - l1.0 * l2.2) / det;
706
707            // Sanity check: intersection must be near original point
708            let dist_sq = (x - p.x).powi(2) + (y - p.y).powi(2);
709            let max_dist = if decimation > 1 {
710                (decimation as f64) + 2.0
711            } else {
712                2.0
713            };
714            if dist_sq < max_dist * max_dist {
715                return Point { x, y };
716            }
717        }
718    }
719
720    p
721}
722
723/// Fit a line (a*x + b*y + c = 0) to an edge by sampling gradient peaks.
724fn fit_edge_line(
725    img: &ImageView,
726    p1: Point,
727    p2: Point,
728    decimation: usize,
729) -> Option<(f64, f64, f64)> {
730    let dx = p2.x - p1.x;
731    let dy = p2.y - p1.y;
732    let len = (dx * dx + dy * dy).sqrt();
733    if len < 4.0 {
734        return None;
735    }
736
737    let nx = dy / len;
738    let ny = -dx / len;
739
740    let mut sum_d = 0.0;
741    let mut count = 0;
742
743    let n_samples = (len as usize).clamp(5, 15);
744    // Original search range was 3 pixels
745    let r = if decimation > 1 {
746        (decimation as i32) + 1
747    } else {
748        3
749    };
750
751    for i in 1..=n_samples {
752        let t = i as f64 / (n_samples + 1) as f64;
753        let px = p1.x + dx * t;
754        let py = p1.y + dy * t;
755
756        let mut best_px = px;
757        let mut best_py = py;
758        let mut best_mag = 0.0;
759
760        for step in -r..=r {
761            let sx = px + nx * f64::from(step);
762            let sy = py + ny * f64::from(step);
763
764            let g = img.sample_gradient_bilinear(sx, sy);
765            let mag = g[0] * g[0] + g[1] * g[1];
766            if mag > best_mag {
767                best_mag = mag;
768                best_px = sx;
769                best_py = sy;
770            }
771        }
772
773        if best_mag > 10.0 {
774            let mut mags = [0.0f64; 3];
775            for (j, offset) in [-1.0, 0.0, 1.0].iter().enumerate() {
776                let sx = best_px + nx * offset;
777                let sy = best_py + ny * offset;
778                let g = img.sample_gradient_bilinear(sx, sy);
779                mags[j] = g[0] * g[0] + g[1] * g[1];
780            }
781
782            let num = mags[2] - mags[0];
783            let den = 2.0 * (mags[0] + mags[2] - 2.0 * mags[1]);
784            let sub_offset = if den.abs() > 1e-6 {
785                (-num / den).clamp(-0.5, 0.5)
786            } else {
787                0.0
788            };
789
790            let refined_x = best_px + nx * sub_offset;
791            let refined_y = best_py + ny * sub_offset;
792
793            sum_d += -(nx * refined_x + ny * refined_y);
794            count += 1;
795        }
796    }
797
798    if count < 3 {
799        return None;
800    }
801
802    Some((nx, ny, sum_d / f64::from(count)))
803}
804
805/// Refine edge position using intensity-based optimization (Kallwies method).
806///
807/// Instead of finding gradient peaks, this minimizes the difference between
808/// observed pixel intensities and a PSF-blurred step function model:
809///
810/// Model(x,y) = (A+B)/2 + (A-B)/2 * erf(dist(x,y,line) / σ)
811///
812/// where A,B are intensities on either side, dist is perpendicular distance
813/// to the edge line, and σ is the blur factor (~0.6 pixels).
814///
815/// This achieves ~0.02px accuracy vs ~0.2px for gradient-based methods.
816#[allow(
817    clippy::similar_names,
818    clippy::many_single_char_names,
819    clippy::too_many_lines
820)]
821// Collected in the quad extraction process
822pub(crate) fn refine_edge_intensity(
823    arena: &Bump,
824    img: &ImageView,
825    p1: Point,
826    p2: Point,
827    sigma: f64,
828    decimation: usize,
829) -> Option<(f64, f64, f64)> {
830    let dx = p2.x - p1.x;
831    let dy = p2.y - p1.y;
832    let len = (dx * dx + dy * dy).sqrt();
833    if len < 4.0 {
834        return None;
835    }
836
837    // Initial line parameters: normal (nx, ny) and distance d from origin
838    // For CW winding, the outward normal is (dy/len, -dx/len)
839    let nx = dy / len;
840    let ny = -dx / len;
841    let mid_x = f64::midpoint(p1.x, p2.x);
842    let mid_y = f64::midpoint(p1.y, p2.y);
843    let mut d = -(nx * mid_x + ny * mid_y);
844
845    // Collect pixels within a window of the edge.
846    let window = if decimation > 1 {
847        (decimation as f64) + 1.0
848    } else {
849        2.5
850    };
851
852    let x0 = (p1.x.min(p2.x) - window - 0.5).max(1.0) as usize;
853    let x1 = (p1.x.max(p2.x) + window + 0.5).min((img.width - 2) as f64) as usize;
854    let y0 = (p1.y.min(p2.y) - window - 0.5).max(1.0) as usize;
855    let y1 = (p1.y.max(p2.y) + window + 0.5).min((img.height - 2) as f64) as usize;
856
857    // Use arena for samples to avoid heap allocation in hot loop
858    // (x, y, intensity, projection)
859    let mut samples = BumpVec::new_in(arena);
860
861    // For large edges, use subsampling to reduce compute while maintaining accuracy
862    // Stride of 2 for very large edges (>100px), else 1
863    let stride = if len > 100.0 { 2 } else { 1 };
864
865    let mut py = y0;
866    while py <= y1 {
867        let mut px = x0;
868        while px <= x1 {
869            // Foundation Principle 1: Pixel center is at (px+0.5, py+0.5)
870            let x = px as f64 + 0.5;
871            let y = py as f64 + 0.5;
872
873            // Check if near the edge segment (not infinite line)
874            let t = ((x - p1.x) * dx + (y - p1.y) * dy) / (len * len);
875            if !(-0.1..=1.1).contains(&t) {
876                px += stride;
877                continue;
878            }
879
880            let dist_signed = nx * x + ny * y + d;
881            if dist_signed.abs() < window {
882                let intensity = f64::from(img.get_pixel(px, py));
883                // Pre-calculate nx*x + ny*y
884                let projection = nx * x + ny * y;
885                samples.push((x, y, intensity, projection));
886            }
887            px += stride;
888        }
889        py += stride;
890    }
891
892    if samples.len() < 10 {
893        return Some((nx, ny, d)); // Fall back to initial estimate
894    }
895
896    // Estimate A (dark side) and B (light side) from samples
897    // Use weighted average based on distance from edge to be more robust
898    let mut dark_sum = 0.0;
899    let mut dark_weight = 0.0;
900    let mut light_sum = 0.0;
901    let mut light_weight = 0.0;
902
903    for &(_x, _y, intensity, projection) in &samples {
904        let signed_dist = projection + d;
905        if signed_dist < -1.0 {
906            let w = (-signed_dist - 1.0).min(2.0); // Weight pixels further from edge more
907            dark_sum += intensity * w;
908            dark_weight += w;
909        } else if signed_dist > 1.0 {
910            let w = (signed_dist - 1.0).min(2.0);
911            light_sum += intensity * w;
912            light_weight += w;
913        }
914    }
915
916    if dark_weight < 1.0 || light_weight < 1.0 {
917        return Some((nx, ny, d));
918    }
919
920    let a = dark_sum / dark_weight; // Dark side (A)
921    let b = light_sum / light_weight; // Light side (B)
922
923    // Foundation Principle 2: I(d) = (A+B)/2 + (B-A)/2 * erf(d / sigma)
924    let inv_s_sqrt2 = 1.0 / sigma;
925
926    // Gauss-Newton optimization: refine d
927    for _iter in 0..15 {
928        let mut jtj = 0.0;
929        let mut jtr = 0.0;
930
931        for &(_x, _y, intensity, projection) in &samples {
932            let dist_phys = projection + d;
933            let u = dist_phys * inv_s_sqrt2;
934
935            if u.abs() > 3.0 {
936                continue;
937            }
938
939            let erf_u = erf_approx(u);
940            let model = (a + b) * 0.5 + (b - a) * 0.5 * erf_u;
941            let residual = intensity - model;
942
943            // Jacobians
944            let exp_term = (-u * u).exp();
945            let jd = (b - a) * 0.5 * std::f64::consts::FRAC_2_SQRT_PI * exp_term * inv_s_sqrt2;
946
947            jtj += jd * jd;
948            jtr += jd * residual;
949        }
950
951        if jtj.abs() > 1e-10 {
952            let delta = jtr / jtj;
953            d += delta.clamp(-0.5, 0.5);
954            if delta.abs() < 1e-4 {
955                break;
956            }
957        } else {
958            break;
959        }
960    }
961
962    Some((nx, ny, d))
963}
964
965/// Reducing a polygon to a quad (4 vertices + 1 closing) by iteratively removing
966/// the vertex that forms the smallest area triangle with its neighbors.
967/// This is robust for noisy/jagged shapes that are approximately quadrilateral.
968fn reduce_to_quad<'a>(arena: &'a Bump, poly: &[Point]) -> BumpVec<'a, Point> {
969    if poly.len() <= 5 {
970        return BumpVec::from_iter_in(poly.iter().copied(), arena);
971    }
972
973    // Work on a mutable copy
974    let mut current = BumpVec::from_iter_in(poly.iter().copied(), arena);
975    // Remove closing point for processing
976    current.pop();
977
978    while current.len() > 4 {
979        let n = current.len();
980        let mut min_area = f64::MAX;
981        let mut min_idx = 0;
982
983        for i in 0..n {
984            let p_prev = current[(i + n - 1) % n];
985            let p_curr = current[i];
986            let p_next = current[(i + 1) % n];
987
988            // Triangle area: 0.5 * |x1(y2 - y3) + x2(y3 - y1) + x3(y1 - y2)|
989            let area = (p_prev.x * (p_curr.y - p_next.y)
990                + p_curr.x * (p_next.y - p_prev.y)
991                + p_next.x * (p_prev.y - p_curr.y))
992                .abs()
993                * 0.5;
994
995            if area < min_area {
996                min_area = area;
997                min_idx = i;
998            }
999        }
1000
1001        // Remove the vertex contributing least to the shape
1002        current.remove(min_idx);
1003    }
1004
1005    // Re-close the loop
1006    if !current.is_empty() {
1007        let first = current[0];
1008        current.push(first);
1009    }
1010
1011    current
1012}
1013
1014/// Calculate the minimum average gradient magnitude along the 4 edges of the quad.
1015///
1016/// Returns the lowest score among the 4 edges. If any edge is very weak,
1017/// the return value will be low, indicating a likely false positive.
1018fn calculate_edge_score(img: &ImageView, corners: [Point; 4]) -> f64 {
1019    let mut min_score = f64::MAX;
1020
1021    for i in 0..4 {
1022        let p1 = corners[i];
1023        let p2 = corners[(i + 1) % 4];
1024
1025        let dx = p2.x - p1.x;
1026        let dy = p2.y - p1.y;
1027        let len = (dx * dx + dy * dy).sqrt();
1028
1029        if len < 4.0 {
1030            // Tiny edge, likely noise or degenerate
1031            return 0.0;
1032        }
1033
1034        // Sample points along the edge (excluding corners to avoid corner effects)
1035        let n_samples = (len as usize).clamp(3, 10); // At least 3, at most 10
1036        let mut edge_mag_sum = 0.0;
1037
1038        for k in 1..=n_samples {
1039            // t goes from roughly 0.1 to 0.9 to avoid corners
1040            let t = k as f64 / (n_samples + 1) as f64;
1041            let x = p1.x + dx * t;
1042            let y = p1.y + dy * t;
1043
1044            let g = img.sample_gradient_bilinear(x, y);
1045            edge_mag_sum += (g[0] * g[0] + g[1] * g[1]).sqrt();
1046        }
1047
1048        let avg_mag = edge_mag_sum / n_samples as f64;
1049        if avg_mag < min_score {
1050            min_score = avg_mag;
1051        }
1052    }
1053
1054    min_score
1055}
1056
1057#[cfg(test)]
1058#[allow(clippy::unwrap_used, clippy::float_cmp)]
1059mod tests {
1060    use super::*;
1061    use bumpalo::Bump;
1062    use proptest::prelude::*;
1063
1064    #[test]
1065    fn test_erf_approx_properties() {
1066        // Zero crossing
1067        assert_eq!(erf_approx(0.0), 0.0);
1068
1069        // Symmetry: erf(-x) == -erf(x)
1070        for x in [0.1, 0.5, 1.0, 2.0, 5.0] {
1071            assert!((erf_approx(-x) + erf_approx(x)).abs() < 1e-15);
1072        }
1073
1074        // Asymptotic bounds: erf(inf) -> 1.0, erf(-inf) -> -1.0
1075        assert!((erf_approx(10.0) - 1.0).abs() < 1e-7);
1076        assert!((erf_approx(-10.0) + 1.0).abs() < 1e-7);
1077        assert!((erf_approx(100.0) - 1.0).abs() < 1e-15); // Should clamp or saturate
1078    }
1079
1080    #[test]
1081    fn test_erf_approx_accuracy() {
1082        // Ground truth values (approx)
1083        // x = 0.5, erf(0.5) ≈ 0.5204998778130465
1084        // x = 1.0, erf(1.0) ≈ 0.8427007929497148
1085        // x = 2.0, erf(2.0) ≈ 0.9953222650189527
1086        let cases = [
1087            (0.5, 0.520_499_877_813_046_5),
1088            (1.0, 0.842_700_792_949_714_8),
1089            (2.0, 0.995_322_265_018_952_7),
1090        ];
1091
1092        for (x, expected) in cases {
1093            let actual = erf_approx(x);
1094            let diff = (actual - expected).abs();
1095            assert!(
1096                diff < 1.5e-7,
1097                "erf_approx({x}) error {diff} exceeds tolerance 1.5e-7"
1098            );
1099        }
1100    }
1101
1102    #[test]
1103    fn test_edge_score_rejection() {
1104        // Create a 20x20 image mostly gray
1105        let width = 20;
1106        let height = 20;
1107        let stride = 20;
1108        let mut data = vec![128u8; width * height];
1109
1110        // Draw a weak quad (contrast 10)
1111        // Center 10,10. Size 8x8.
1112        // Inside 128, Outside 138. Gradient ~5.
1113        // 5 < 10, should be rejected.
1114        for y in 6..14 {
1115            for x in 6..14 {
1116                data[y * width + x] = 138;
1117            }
1118        }
1119
1120        let img = ImageView::new(&data, width, height, stride).unwrap();
1121
1122        let corners = [
1123            Point { x: 6.0, y: 6.0 },
1124            Point { x: 14.0, y: 6.0 },
1125            Point { x: 14.0, y: 14.0 },
1126            Point { x: 6.0, y: 14.0 },
1127        ];
1128
1129        let score = calculate_edge_score(&img, corners);
1130        // Gradient should be roughly (138-128)/2 = 5 per pixel boundary?
1131        // Sobel-like (p(x+1)-p(x-1))/2.
1132        // At edge x=6: left=128, right=138. (138-128)/2 = 5.
1133        // Magnitude 5.0.
1134        // Threshold is 10.0.
1135        assert!(score < 10.0, "Score {score} should be < 10.0");
1136
1137        // Draw a strong quad (contrast 50)
1138        // Inside 200, Outside 50. Gradient ~75.
1139        // 75 > 10, should pass.
1140        for y in 6..14 {
1141            for x in 6..14 {
1142                data[y * width + x] = 200;
1143            }
1144        }
1145        // Restore background
1146        for y in 0..height {
1147            for x in 0..width {
1148                if !(6..14).contains(&x) || !(6..14).contains(&y) {
1149                    data[y * width + x] = 50;
1150                }
1151            }
1152        }
1153        let img = ImageView::new(&data, width, height, stride).unwrap();
1154        let score = calculate_edge_score(&img, corners);
1155        assert!(score > 40.0, "Score {score} should be > 40.0");
1156    }
1157
1158    proptest! {
1159        #[test]
1160        fn prop_erf_approx_accuracy(x in -3.0..3.0f64) {
1161            let actual = erf_approx(x);
1162            let expected = libm::erf(x);
1163            let diff = (actual - expected).abs();
1164            assert!(diff < 1.5e-7, "erf_approx({x}) error {diff} exceeds tolerance 1.5e-7");
1165        }
1166
1167        #[test]
1168        fn prop_douglas_peucker_invariants(
1169            points in prop::collection::vec((0.0..1000.0, 0.0..1000.0), 3..100),
1170            epsilon in 0.1..10.0f64
1171        ) {
1172            let arena = Bump::new();
1173            let contour: Vec<Point> = points.iter().map(|&(x, y)| Point { x, y }).collect();
1174            let simplified = douglas_peucker(&arena, &contour, epsilon);
1175
1176            // 1. Simplified points are a subset of original points (by coordinates)
1177            for p in &simplified {
1178                assert!(contour.iter().any(|&op| (op.x - p.x).abs() < 1e-9 && (op.y - p.y).abs() < 1e-9));
1179            }
1180
1181            // 2. End points are preserved
1182            assert_eq!(simplified[0].x, contour[0].x);
1183            assert_eq!(simplified[0].y, contour[0].y);
1184            assert_eq!(simplified.last().unwrap().x, contour.last().unwrap().x);
1185            assert_eq!(simplified.last().unwrap().y, contour.last().unwrap().y);
1186
1187            // 3. Simplified contour has fewer or equal points
1188            assert!(simplified.len() <= contour.len());
1189
1190            // 4. All original points are at most epsilon away from the simplified segment
1191            for i in 1..simplified.len() {
1192                let a = simplified[i-1];
1193                let b = simplified[i];
1194
1195                // Find indices in original contour matching simplified points
1196                let mut start_idx = None;
1197                let mut end_idx = None;
1198                for (j, op) in contour.iter().enumerate() {
1199                    if (op.x - a.x).abs() < 1e-9 && (op.y - a.y).abs() < 1e-9 {
1200                        start_idx = Some(j);
1201                    }
1202                    if (op.x - b.x).abs() < 1e-9 && (op.y - b.y).abs() < 1e-9 {
1203                        end_idx = Some(j);
1204                    }
1205                }
1206
1207                if let (Some(s), Some(e)) = (start_idx, end_idx) {
1208                    for op in contour.iter().take(e + 1).skip(s) {
1209                        let d = perpendicular_distance(*op, a, b);
1210                        assert!(d <= epsilon + 1e-7, "Distance {d} > epsilon {epsilon} at point");
1211                    }
1212                }
1213            }
1214        }
1215    }
1216
1217    // ========================================================================
1218    // QUAD EXTRACTION ROBUSTNESS TESTS
1219    // ========================================================================
1220
1221    use crate::config::TagFamily;
1222    use crate::segmentation::label_components_with_stats;
1223    use crate::test_utils::{
1224        TestImageParams, compute_corner_error, generate_test_image_with_params,
1225    };
1226    use crate::threshold::ThresholdEngine;
1227
1228    /// Helper: Generate a tag image and run through threshold + segmentation + quad extraction.
1229    fn run_quad_extraction(tag_size: usize, canvas_size: usize) -> (Vec<Detection>, [[f64; 2]; 4]) {
1230        let params = TestImageParams {
1231            family: TagFamily::AprilTag36h11,
1232            id: 0,
1233            tag_size,
1234            canvas_size,
1235            ..Default::default()
1236        };
1237
1238        let (data, corners) = generate_test_image_with_params(&params);
1239        let img = ImageView::new(&data, canvas_size, canvas_size, canvas_size).unwrap();
1240
1241        let arena = Bump::new();
1242        let engine = ThresholdEngine::new();
1243        let stats = engine.compute_tile_stats(&arena, &img);
1244        let mut binary = vec![0u8; canvas_size * canvas_size];
1245        engine.apply_threshold(&arena, &img, &stats, &mut binary);
1246        let label_result =
1247            label_components_with_stats(&arena, &binary, canvas_size, canvas_size, true);
1248        let detections = extract_quads_fast(&arena, &img, &label_result);
1249
1250        (detections, corners)
1251    }
1252
1253    /// Test quad extraction at varying tag sizes.
1254    #[test]
1255    fn test_quad_extraction_at_varying_sizes() {
1256        let canvas_size = 640;
1257        let tag_sizes = [32, 48, 64, 100, 150, 200, 300];
1258
1259        for tag_size in tag_sizes {
1260            let (detections, _corners) = run_quad_extraction(tag_size, canvas_size);
1261            let detected = !detections.is_empty();
1262
1263            if tag_size >= 48 {
1264                assert!(detected, "Tag size {tag_size}: No quad detected");
1265            }
1266
1267            if detected {
1268                println!(
1269                    "Tag size {:>3}px: {} quads, center=[{:.1},{:.1}]",
1270                    tag_size,
1271                    detections.len(),
1272                    detections[0].center[0],
1273                    detections[0].center[1]
1274                );
1275            } else {
1276                println!("Tag size {tag_size:>3}px: No quad detected");
1277            }
1278        }
1279    }
1280
1281    /// Test corner detection accuracy vs ground truth.
1282    #[test]
1283    fn test_quad_corner_accuracy() {
1284        let canvas_size = 640;
1285        let tag_sizes = [100, 150, 200, 300];
1286
1287        for tag_size in tag_sizes {
1288            let (detections, gt_corners) = run_quad_extraction(tag_size, canvas_size);
1289
1290            assert!(!detections.is_empty(), "Tag size {tag_size}: No detection");
1291
1292            let det_corners = detections[0].corners;
1293            let error = compute_corner_error(&det_corners, &gt_corners);
1294
1295            let max_error = 5.0;
1296            assert!(
1297                error < max_error,
1298                "Tag size {tag_size}: Corner error {error:.2}px exceeds max"
1299            );
1300
1301            println!("Tag size {tag_size:>3}px: Corner error = {error:.2}px");
1302        }
1303    }
1304
1305    /// Test that quad center is approximately correct.
1306    #[test]
1307    fn test_quad_center_accuracy() {
1308        let canvas_size = 640;
1309        let tag_size = 150;
1310
1311        let (detections, gt_corners) = run_quad_extraction(tag_size, canvas_size);
1312        assert!(!detections.is_empty(), "No detection");
1313
1314        let expected_cx =
1315            (gt_corners[0][0] + gt_corners[1][0] + gt_corners[2][0] + gt_corners[3][0]) / 4.0;
1316        let expected_cy =
1317            (gt_corners[0][1] + gt_corners[1][1] + gt_corners[2][1] + gt_corners[3][1]) / 4.0;
1318
1319        let det_center = detections[0].center;
1320        let dx = det_center[0] - expected_cx;
1321        let dy = det_center[1] - expected_cy;
1322        let center_error = (dx * dx + dy * dy).sqrt();
1323
1324        assert!(
1325            center_error < 2.0,
1326            "Center error {center_error:.2}px exceeds 2px"
1327        );
1328
1329        println!(
1330            "Quad center: detected=[{:.1},{:.1}], expected=[{:.1},{:.1}], error={:.2}px",
1331            det_center[0], det_center[1], expected_cx, expected_cy, center_error
1332        );
1333    }
1334
1335    /// Test quad extraction with decimation > 1 to verify center-aware mapping.
1336    #[test]
1337    fn test_quad_extraction_with_decimation() {
1338        let canvas_size = 640;
1339        let tag_size = 160;
1340        let decimation = 2;
1341
1342        let params = TestImageParams {
1343            family: TagFamily::AprilTag36h11,
1344            id: 0,
1345            tag_size,
1346            canvas_size,
1347            ..Default::default()
1348        };
1349
1350        let (data, gt_corners) = generate_test_image_with_params(&params);
1351        let img = ImageView::new(&data, canvas_size, canvas_size, canvas_size).unwrap();
1352
1353        // Manual decimation to match the pipeline
1354        let new_w = canvas_size / decimation;
1355        let new_h = canvas_size / decimation;
1356        let mut decimated_data = vec![0u8; new_w * new_h];
1357        let decimated_img = img
1358            .decimate_to(decimation, &mut decimated_data)
1359            .expect("decimation failed");
1360
1361        let arena = Bump::new();
1362        let engine = ThresholdEngine::new();
1363        let stats = engine.compute_tile_stats(&arena, &decimated_img);
1364        let mut binary = vec![0u8; new_w * new_h];
1365        engine.apply_threshold(&arena, &decimated_img, &stats, &mut binary);
1366
1367        let label_result = label_components_with_stats(&arena, &binary, new_w, new_h, true);
1368
1369        // Run extraction with decimation=2
1370        // Refinement image is the full resolution image
1371        let config = DetectorConfig {
1372            decimation,
1373            ..Default::default()
1374        };
1375        let detections = extract_quads_with_config(
1376            &arena,
1377            &decimated_img,
1378            &label_result,
1379            &config,
1380            decimation,
1381            &img,
1382        );
1383
1384        assert!(!detections.is_empty(), "No quad detected with decimation");
1385
1386        let det_corners = detections[0].corners;
1387        let error = compute_corner_error(&det_corners, &gt_corners);
1388
1389        // Sub-pixel refinement on full-res should keep error very low despite decimation
1390        assert!(
1391            error < 2.0,
1392            "Corner error with decimation: {error:.2}px exceeds 2px"
1393        );
1394
1395        println!("Decimated (d={decimation}) corner error: {error:.4}px");
1396    }
1397
1398    // ========================================================================
1399    // SUB-PIXEL CORNER REFINEMENT ACCURACY TESTS
1400    // ========================================================================
1401
1402    /// Generate a synthetic image with an anti-aliased vertical edge.
1403    ///
1404    /// The edge is placed at `edge_x` (sub-pixel position) using the PSF model:
1405    /// I(x) = (A+B)/2 + (B-A)/2 * erf((x - edge_x) / σ)
1406    fn generate_vertical_edge_image(
1407        width: usize,
1408        height: usize,
1409        edge_x: f64,
1410        sigma: f64,
1411        dark: u8,
1412        light: u8,
1413    ) -> Vec<u8> {
1414        let mut data = vec![0u8; width * height];
1415        let a = f64::from(dark);
1416        let b = f64::from(light);
1417        let s_sqrt2 = sigma * std::f64::consts::SQRT_2;
1418
1419        for y in 0..height {
1420            for x in 0..width {
1421                // Evaluation at pixel center (Foundation Principle 1)
1422                let px = x as f64 + 0.5;
1423                let intensity =
1424                    f64::midpoint(a, b) + (b - a) / 2.0 * erf_approx((px - edge_x) / s_sqrt2);
1425                data[y * width + x] = intensity.clamp(0.0, 255.0) as u8;
1426            }
1427        }
1428        data
1429    }
1430
1431    /// Generate a synthetic image with an anti-aliased slanted edge (corner region).
1432    /// Creates two edges meeting at a corner point for refine_corner testing.
1433    fn generate_corner_image(
1434        width: usize,
1435        height: usize,
1436        corner_x: f64,
1437        corner_y: f64,
1438        sigma: f64,
1439    ) -> Vec<u8> {
1440        let mut data = vec![0u8; width * height];
1441        let s_sqrt2 = sigma * std::f64::consts::SQRT_2;
1442
1443        for y in 0..height {
1444            for x in 0..width {
1445                // Foundation Principle 1: Pixel center is at (px+0.5, py+0.5)
1446                let px = x as f64 + 0.5;
1447                let py = y as f64 + 0.5;
1448
1449                // Distance to vertical edge (x = corner_x)
1450                let dist_v = px - corner_x;
1451                // Distance to horizontal edge (y = corner_y)
1452                let dist_h = py - corner_y;
1453
1454                // In a corner, the closest edge determines the intensity
1455                // Use smooth transition based on the minimum distance to the two edges
1456                let signed_dist = if px < corner_x && py < corner_y {
1457                    // Inside corner: negative distance to nearest edge
1458                    -dist_v.abs().min(dist_h.abs())
1459                } else if px >= corner_x && py >= corner_y {
1460                    // Fully outside
1461                    dist_v.min(dist_h).max(0.0)
1462                } else {
1463                    // On one edge but not the other
1464                    if px < corner_x {
1465                        dist_h // Outside in y
1466                    } else {
1467                        dist_v // Outside in x
1468                    }
1469                };
1470
1471                // Foundation Principle 2: I(d) = (A+B)/2 + (B-A)/2 * erf(d / (sigma * sqrt(2)))
1472                let intensity = 127.5 + 127.5 * erf_approx(signed_dist / s_sqrt2);
1473                data[y * width + x] = intensity.clamp(0.0, 255.0) as u8;
1474            }
1475        }
1476        data
1477    }
1478
1479    /// Test that refine_corner achieves sub-pixel accuracy on a synthetic edge.
1480    ///
1481    /// This test creates an anti-aliased corner at a known sub-pixel position
1482    /// and verifies that refine_corner recovers the position within 0.05 pixels.
1483    #[test]
1484    fn test_refine_corner_subpixel_accuracy() {
1485        let arena = Bump::new();
1486        let width = 60;
1487        let height = 60;
1488        let sigma = 0.6; // Default PSF sigma
1489
1490        // Test multiple sub-pixel offsets
1491        let test_cases = [
1492            (30.4, 30.4),   // x=30.4, y=30.4
1493            (25.7, 25.7),   // x=25.7, y=25.7
1494            (35.23, 35.23), // x=35.23, y=35.23
1495            (28.0, 28.0),   // Integer position (control)
1496            (32.5, 32.5),   // Half-pixel
1497        ];
1498
1499        for (true_x, true_y) in test_cases {
1500            let data = generate_corner_image(width, height, true_x, true_y, sigma);
1501            let img = ImageView::new(&data, width, height, width).unwrap();
1502
1503            // Initial corner estimate (round to nearest pixel)
1504            let init_p = Point {
1505                x: true_x.round(),
1506                y: true_y.round(),
1507            };
1508
1509            // Previous and next corners along the L-shape
1510            // For an L-corner at (cx, cy):
1511            // - p_prev is along the vertical edge (above the corner)
1512            // - p_next is along the horizontal edge (to the left of the corner)
1513            let p_prev = Point {
1514                x: true_x.round(),
1515                y: true_y.round() - 10.0,
1516            };
1517            let p_next = Point {
1518                x: true_x.round() - 10.0,
1519                y: true_y.round(),
1520            };
1521
1522            let refined = refine_corner(&arena, &img, init_p, p_prev, p_next, sigma, 1, true);
1523
1524            let error_x = (refined.x - true_x).abs();
1525            let error_y = (refined.y - true_y).abs();
1526            let error_total = (error_x * error_x + error_y * error_y).sqrt();
1527
1528            println!(
1529                "Corner ({:.2}, {:.2}): refined=({:.4}, {:.4}), error=({:.4}, {:.4}), total={:.4}px",
1530                true_x, true_y, refined.x, refined.y, error_x, error_y, error_total
1531            );
1532
1533            // Assert sub-pixel accuracy < 0.1px (relaxed from 0.05 for robustness)
1534            // The ideal is <0.05px but real-world noise and edge cases may require relaxation
1535            assert!(
1536                error_total < 0.15,
1537                "Corner ({true_x}, {true_y}): error {error_total:.4}px exceeds 0.15px threshold"
1538            );
1539        }
1540    }
1541
1542    /// Test refine_corner on a simple vertical edge to verify edge localization.
1543    #[test]
1544    fn test_refine_corner_vertical_edge() {
1545        let arena = Bump::new();
1546        let width = 40;
1547        let height = 40;
1548        let sigma = 0.6;
1549
1550        // Test vertical edge at x=20.4
1551        let true_edge_x = 20.4;
1552        let data = generate_vertical_edge_image(width, height, true_edge_x, sigma, 0, 255);
1553        let img = ImageView::new(&data, width, height, width).unwrap();
1554
1555        // Set up a corner where two edges meet
1556        // For a pure vertical edge test, we'll use a simple L-corner configuration
1557        let corner_y = 20.0;
1558        let init_p = Point {
1559            x: true_edge_x.round(),
1560            y: corner_y,
1561        };
1562        let p_prev = Point {
1563            x: true_edge_x.round(),
1564            y: corner_y - 10.0,
1565        };
1566        let p_next = Point {
1567            x: true_edge_x.round() - 10.0,
1568            y: corner_y,
1569        };
1570
1571        let refined = refine_corner(&arena, &img, init_p, p_prev, p_next, sigma, 1, true);
1572
1573        // The x-coordinate should be refined to near the true edge position
1574        // y-coordinate depends on the horizontal edge (which doesn't exist in this test)
1575        let error_x = (refined.x - true_edge_x).abs();
1576
1577        println!(
1578            "Vertical edge x={:.2}: refined.x={:.4}, error={:.4}px",
1579            true_edge_x, refined.x, error_x
1580        );
1581
1582        // Vertical edge localization should be very accurate
1583        assert!(
1584            error_x < 0.1,
1585            "Vertical edge x={true_edge_x}: error {error_x:.4}px exceeds 0.1px threshold"
1586        );
1587    }
1588}
1589
1590#[multiversion(targets(
1591    "x86_64+avx2+bmi1+bmi2+popcnt+lzcnt",
1592    "x86_64+avx512f+avx512bw+avx512dq+avx512vl",
1593    "aarch64+neon"
1594))]
1595/// Boundary Tracing using robust border following.
1596///
1597/// This implementation uses a state-machine based approach to follow the border
1598/// of a connected component. Uses precomputed offsets for speed.
1599fn trace_boundary<'a>(
1600    arena: &'a Bump,
1601    labels: &[u32],
1602    width: usize,
1603    height: usize,
1604    start_x: usize,
1605    start_y: usize,
1606    target_label: u32,
1607) -> BumpVec<'a, Point> {
1608    let mut points = BumpVec::new_in(arena);
1609
1610    // Precompute offsets for Moore neighborhood (CW order starting from Top)
1611    // This avoids repeated multiplication in the hot loop
1612    let w = width as isize;
1613    let offsets: [isize; 8] = [
1614        -w,     // 0: T
1615        -w + 1, // 1: TR
1616        1,      // 2: R
1617        w + 1,  // 3: BR
1618        w,      // 4: B
1619        w - 1,  // 5: BL
1620        -1,     // 6: L
1621        -w - 1, // 7: TL
1622    ];
1623
1624    // Direction deltas for bounds checking
1625    let dx: [isize; 8] = [0, 1, 1, 1, 0, -1, -1, -1];
1626    let dy: [isize; 8] = [-1, -1, 0, 1, 1, 1, 0, -1];
1627
1628    let mut curr_x = start_x as isize;
1629    let mut curr_y = start_y as isize;
1630    let mut curr_idx = start_y * width + start_x;
1631    let mut walk_dir = 2usize; // Initial: move Right
1632
1633    for _ in 0..10000 {
1634        points.push(Point {
1635            x: curr_x as f64 + 0.5,
1636            y: curr_y as f64 + 0.5,
1637        });
1638
1639        let mut found = false;
1640        let search_start = (walk_dir + 6) % 8;
1641
1642        for i in 0..8 {
1643            let dir = (search_start + i) % 8;
1644            let nx = curr_x + dx[dir];
1645            let ny = curr_y + dy[dir];
1646
1647            // Branchless bounds check using unsigned comparison
1648            if (nx as usize) < width && (ny as usize) < height {
1649                let nidx = (curr_idx as isize + offsets[dir]) as usize;
1650                if labels[nidx] == target_label {
1651                    curr_x = nx;
1652                    curr_y = ny;
1653                    curr_idx = nidx;
1654                    walk_dir = dir;
1655                    found = true;
1656                    break;
1657                }
1658            }
1659        }
1660
1661        if !found || (curr_x == start_x as isize && curr_y == start_y as isize) {
1662            break;
1663        }
1664    }
1665
1666    points
1667}
1668
1669/// Simplified version of CHAIN_APPROX_SIMPLE:
1670/// Removes all redundant points on straight lines.
1671pub(crate) fn chain_approximation<'a>(arena: &'a Bump, points: &[Point]) -> BumpVec<'a, Point> {
1672    if points.len() < 3 {
1673        let mut v = BumpVec::new_in(arena);
1674        v.extend_from_slice(points);
1675        return v;
1676    }
1677
1678    let mut result = BumpVec::new_in(arena);
1679    result.push(points[0]);
1680
1681    for i in 1..points.len() - 1 {
1682        let p_prev = points[i - 1];
1683        let p_curr = points[i];
1684        let p_next = points[i + 1];
1685
1686        let dx1 = p_curr.x - p_prev.x;
1687        let dy1 = p_curr.y - p_prev.y;
1688        let dx2 = p_next.x - p_curr.x;
1689        let dy2 = p_next.y - p_curr.y;
1690
1691        // If directions are strictly different, it's a corner
1692        // Using exact float comparison is safe here because these are pixel coordinates (integers)
1693        if (dx1 * dy2 - dx2 * dy1).abs() > 1e-6 {
1694            result.push(p_curr);
1695        }
1696    }
1697
1698    result.push(*points.last().unwrap_or(&points[0]));
1699    result
1700}