Skip to main content

locus_core/
quad.rs

1//! Quad extraction and geometric primitive fitting.
2//!
3//! This module implements the middle stage of the detection pipeline:
4//! 1. **Contour Tracing**: Extracting the boundary of connected components.
5//! 2. **Simplification**: Using Douglas-Peucker to reduce complex contours to polygons.
6//! 3. **Quad Fitting**: Heuristics to reduce polygons to quadrilaterals and verify convexity.
7//! 4. **Sub-pixel Refinement**: Intensity-based edge localization for maximum precision.
8
9#![allow(clippy::cast_possible_wrap)]
10#![allow(clippy::cast_sign_loss)]
11#![allow(clippy::similar_names)]
12#![allow(unsafe_code)]
13
14use crate::Detection;
15use crate::batch::{CandidateState, DetectionBatch, MAX_CANDIDATES, Point2f};
16use crate::config::DetectorConfig;
17use crate::image::ImageView;
18use crate::segmentation::LabelResult;
19use bumpalo::Bump;
20use bumpalo::collections::Vec as BumpVec;
21use multiversion::multiversion;
22
23use crate::workspace::WORKSPACE_ARENA;
24
25/// Per-corner 2×2 covariances as `[[σ_xx, σ_xy, σ_yx, σ_yy]; 4]`.
26pub(crate) type CornerCovariances = [[f32; 4]; 4];
27
28// Re-export erf_approx from its canonical home in the math module.
29pub(crate) use crate::simd::math::erf_approx;
30
31// Re-export the canonical Point type from the crate root.
32pub use crate::Point;
33
34/// Fast quad extraction using bounding box stats from CCL.
35/// Only traces contours for components that pass geometric filters.
36/// Uses default configuration.
37#[allow(dead_code)]
38pub(crate) fn extract_quads_fast(
39    arena: &Bump,
40    img: &ImageView,
41    label_result: &LabelResult,
42) -> Vec<Detection> {
43    extract_quads_with_config(arena, img, label_result, &DetectorConfig::default(), 1, img)
44}
45
46/// Quad extraction with Structure of Arrays (SoA) output.
47///
48/// This function populates the `corners` and `status_mask` fields of the provided `DetectionBatch`.
49/// It returns the total number of candidates found ($N$).
50#[allow(clippy::too_many_lines)]
51#[tracing::instrument(skip_all, name = "pipeline::quad_extraction")]
52pub fn extract_quads_soa(
53    batch: &mut DetectionBatch,
54    img: &ImageView,
55    label_result: &LabelResult,
56    config: &DetectorConfig,
57    decimation: usize,
58    refinement_img: &ImageView,
59    debug_telemetry: bool,
60) -> (usize, Option<Vec<[Point; 4]>>) {
61    use rayon::prelude::*;
62
63    let stats = &label_result.component_stats;
64
65    // Collect into a temporary Vec to handle the MAX_CANDIDATES limit and sequential writing.
66    // In a future optimization, we could use an atomic counter to write directly into the batch.
67    let detections: Vec<([Point; 4], [Point; 4], CornerCovariances)> = stats
68        .par_iter()
69        .enumerate()
70        .filter_map(|(label_idx, stat)| {
71            WORKSPACE_ARENA.with(|cell| {
72                let mut arena = cell.borrow_mut();
73                arena.reset();
74                let label = (label_idx + 1) as u32;
75                extract_single_quad(
76                    &arena,
77                    img,
78                    label_result.labels,
79                    label,
80                    stat,
81                    config,
82                    decimation,
83                    refinement_img,
84                )
85            })
86        })
87        .collect();
88
89    let n = detections.len().min(MAX_CANDIDATES);
90    let mut unrefined = if debug_telemetry {
91        Some(Vec::with_capacity(n))
92    } else {
93        None
94    };
95
96    for (i, (corners, unrefined_pts, covs)) in detections.into_iter().take(n).enumerate() {
97        for (j, corner) in corners.iter().enumerate() {
98            batch.corners[i][j] = Point2f {
99                x: corner.x as f32,
100                y: corner.y as f32,
101            };
102        }
103        // Write per-corner 2×2 covariances (4 floats each, 16 total per candidate).
104        if covs.is_empty() {
105            batch.corner_covariances[i].fill(0.0);
106        } else {
107            for (chunk, cov) in batch.corner_covariances[i]
108                .chunks_exact_mut(4)
109                .zip(covs.iter())
110            {
111                chunk.copy_from_slice(cov);
112            }
113        }
114        if let Some(ref mut u) = unrefined {
115            u.push(unrefined_pts);
116        }
117        batch.status_mask[i] = CandidateState::Active;
118    }
119
120    // Ensure the rest of the batch is marked as Empty
121    for i in n..MAX_CANDIDATES {
122        batch.status_mask[i] = CandidateState::Empty;
123    }
124
125    (n, unrefined)
126}
127
128/// Internal helper to extract a single quad from a component.
129#[inline]
130#[allow(clippy::too_many_arguments, clippy::too_many_lines)]
131fn extract_single_quad(
132    arena: &Bump,
133    img: &ImageView,
134    labels: &[u32],
135    label: u32,
136    stat: &crate::segmentation::ComponentStats,
137    config: &DetectorConfig,
138    decimation: usize,
139    refinement_img: &ImageView,
140) -> Option<([Point; 4], [Point; 4], CornerCovariances)> {
141    let min_edge_len_sq = config.quad_min_edge_length * config.quad_min_edge_length;
142    let d = decimation as f64;
143
144    // Fast geometric filtering using bounding box
145    let bbox_w = u32::from(stat.max_x - stat.min_x) + 1;
146    let bbox_h = u32::from(stat.max_y - stat.min_y) + 1;
147    let bbox_area = bbox_w * bbox_h;
148
149    // Filter: too small or too large
150    if bbox_area < config.quad_min_area || bbox_area > (img.width * img.height * 9 / 10) as u32 {
151        return None;
152    }
153
154    // Filter: not roughly square (aspect ratio)
155    let aspect = bbox_w.max(bbox_h) as f32 / bbox_w.min(bbox_h).max(1) as f32;
156    if aspect > config.quad_max_aspect_ratio {
157        return None;
158    }
159
160    // Filter: fill ratio (should be ~50-80% for a tag with inner pattern)
161    let fill = stat.pixel_count as f32 / bbox_area as f32;
162    if fill < config.quad_min_fill_ratio || fill > config.quad_max_fill_ratio {
163        return None;
164    }
165
166    // Moments-based culling gate: reject elongated or sparse blobs before contour tracing.
167    // Disabled by default (both thresholds are 0.0).
168    if (config.quad_max_elongation > 0.0 || config.quad_min_density > 0.0)
169        && let Some((elongation, density)) = crate::segmentation::compute_moment_shape(stat)
170    {
171        if config.quad_max_elongation > 0.0 && elongation > config.quad_max_elongation {
172            return None;
173        }
174        if config.quad_min_density > 0.0 && density < config.quad_min_density {
175            return None;
176        }
177    }
178
179    // Passed filters — extract rough quad corners using the configured mode.
180    // `gn_covs` carries per-corner 2×2 covariances from the EdLines GN solver;
181    // zero for ContourRdp (no covariance information available).
182    let (quad_pts_dec, gn_covs): ([Point; 4], CornerCovariances) = match config.quad_extraction_mode
183    {
184        crate::config::QuadExtractionMode::EdLines => {
185            let ed_cfg = crate::edlines::EdLinesConfig::from_detector_config(config);
186            crate::edlines::extract_quad_edlines(
187                arena,
188                img,
189                refinement_img,
190                labels,
191                label,
192                stat,
193                &ed_cfg,
194            )?
195        },
196        crate::config::QuadExtractionMode::ContourRdp => {
197            let sx = stat.first_pixel_x as usize;
198            let sy = stat.first_pixel_y as usize;
199
200            let contour = trace_boundary(arena, labels, img.width, img.height, sx, sy, label);
201
202            if contour.len() < 12 {
203                return None;
204            }
205
206            let simple_contour = chain_approximation(arena, &contour);
207            let perimeter = contour.len() as f64;
208            let epsilon = (perimeter * 0.02).max(1.0);
209            let simplified = douglas_peucker(arena, &simple_contour, epsilon);
210
211            if simplified.len() < 4 || simplified.len() > 11 {
212                return None;
213            }
214
215            let simpl_len = simplified.len();
216            let reduced = if simpl_len == 5 {
217                simplified
218            } else if simpl_len == 4 {
219                let mut closed = BumpVec::new_in(arena);
220                for p in &simplified {
221                    closed.push(*p);
222                }
223                closed.push(simplified[0]);
224                closed
225            } else {
226                reduce_to_quad(arena, &simplified)
227            };
228
229            if reduced.len() != 5 {
230                return None;
231            }
232
233            let area = polygon_area(&reduced);
234            let compactness = (12.566 * area.abs()) / (perimeter * perimeter);
235
236            if area.abs() <= f64::from(config.quad_min_area) || compactness <= 0.1 {
237                return None;
238            }
239
240            // Standardize to CW for consistency
241            if area > 0.0 {
242                (
243                    [reduced[0], reduced[1], reduced[2], reduced[3]],
244                    [[0.0; 4]; 4],
245                )
246            } else {
247                (
248                    [reduced[0], reduced[3], reduced[2], reduced[1]],
249                    [[0.0; 4]; 4],
250                )
251            }
252        },
253    };
254
255    // Scale to full resolution using correct coordinate mapping.
256    // A point at (x, y) in decimated coordinates maps to (x*d, y*d) in full-res.
257    let quad_pts = [
258        Point {
259            x: quad_pts_dec[0].x * d,
260            y: quad_pts_dec[0].y * d,
261        },
262        Point {
263            x: quad_pts_dec[1].x * d,
264            y: quad_pts_dec[1].y * d,
265        },
266        Point {
267            x: quad_pts_dec[2].x * d,
268            y: quad_pts_dec[2].y * d,
269        },
270        Point {
271            x: quad_pts_dec[3].x * d,
272            y: quad_pts_dec[3].y * d,
273        },
274    ];
275
276    // Expand 0.5px outward from the centroid to align with pixel boundaries.
277    // This is needed for ContourRdp, whose corners are at integer-coordinate
278    // midpoints of edge segments.  EdLines already produces full-resolution
279    // sub-pixel corners (from its micro-ray parabola + Gauss-Newton pass), so
280    // applying the expansion would move them *away* from the true edge and force
281    // the subsequent refine_corner to fight the artificial offset.
282    let quad_pts = if config.quad_extraction_mode == crate::config::QuadExtractionMode::EdLines {
283        quad_pts // corners already sub-pixel accurate; no expansion needed
284    } else {
285        let center_x = (quad_pts[0].x + quad_pts[1].x + quad_pts[2].x + quad_pts[3].x) * 0.25;
286        let center_y = (quad_pts[0].y + quad_pts[1].y + quad_pts[2].y + quad_pts[3].y) * 0.25;
287        let mut ep = quad_pts;
288        for i in 0..4 {
289            ep[i].x += 0.5 * (quad_pts[i].x - center_x).signum();
290            ep[i].y += 0.5 * (quad_pts[i].y - center_y).signum();
291        }
292        ep
293    };
294
295    let mut ok = true;
296    for i in 0..4 {
297        let d2 = (quad_pts[i].x - quad_pts[(i + 1) % 4].x).powi(2)
298            + (quad_pts[i].y - quad_pts[(i + 1) % 4].y).powi(2);
299        if d2 < min_edge_len_sq {
300            ok = false;
301            break;
302        }
303    }
304
305    if ok {
306        // `CornerRefinementMode::None` passes the quad corners through untouched.
307        // This is appropriate for EdLines, which handles sub-pixel refinement
308        // internally; it is also an explicit opt-out for benchmarking.
309        // All other modes call `refine_corner`.
310        //
311        // Covariances: only propagate GN covariances when no further refinement
312        // is applied (Mode::None).  GWLF computes its own covariances in
313        // detector.rs; ERF has none.  When refinement overwrites corners,
314        // the GN covariances are no longer valid.
315        let (corners, out_covs) =
316            if config.refinement_mode == crate::config::CornerRefinementMode::None {
317                (quad_pts, gn_covs)
318            } else {
319                let use_erf = config.refinement_mode == crate::config::CornerRefinementMode::Erf;
320                (
321                    [
322                        refine_corner(
323                            arena,
324                            refinement_img,
325                            quad_pts[0],
326                            quad_pts[3],
327                            quad_pts[1],
328                            config.subpixel_refinement_sigma,
329                            decimation,
330                            use_erf,
331                        ),
332                        refine_corner(
333                            arena,
334                            refinement_img,
335                            quad_pts[1],
336                            quad_pts[0],
337                            quad_pts[2],
338                            config.subpixel_refinement_sigma,
339                            decimation,
340                            use_erf,
341                        ),
342                        refine_corner(
343                            arena,
344                            refinement_img,
345                            quad_pts[2],
346                            quad_pts[1],
347                            quad_pts[3],
348                            config.subpixel_refinement_sigma,
349                            decimation,
350                            use_erf,
351                        ),
352                        refine_corner(
353                            arena,
354                            refinement_img,
355                            quad_pts[3],
356                            quad_pts[2],
357                            quad_pts[0],
358                            config.subpixel_refinement_sigma,
359                            decimation,
360                            use_erf,
361                        ),
362                    ],
363                    [[0.0; 4]; 4],
364                )
365            };
366
367        let edge_score = calculate_edge_score(refinement_img, corners);
368        if edge_score > config.quad_min_edge_score {
369            return Some((corners, quad_pts, out_covs));
370        }
371    }
372    None
373}
374
375/// Quad extraction with custom configuration.
376///
377/// This is the main entry point for quad detection with custom parameters.
378/// Components are processed in parallel for maximum throughput.
379#[allow(clippy::too_many_lines)]
380pub fn extract_quads_with_config(
381    _arena: &Bump,
382    img: &ImageView,
383    label_result: &LabelResult,
384    config: &DetectorConfig,
385    decimation: usize,
386    refinement_img: &ImageView,
387) -> Vec<Detection> {
388    use rayon::prelude::*;
389
390    let stats = &label_result.component_stats;
391    let d = decimation as f64;
392
393    // Process components in parallel, each with its own thread-local arena
394    stats
395        .par_iter()
396        .enumerate()
397        .filter_map(|(label_idx, stat)| {
398            WORKSPACE_ARENA.with(|cell| {
399                let mut arena = cell.borrow_mut();
400                arena.reset();
401                let label = (label_idx + 1) as u32;
402
403                let quad_result = extract_single_quad(
404                    &arena,
405                    img,
406                    label_result.labels,
407                    label,
408                    stat,
409                    config,
410                    decimation,
411                    refinement_img,
412                );
413
414                let (corners, _unrefined, _covs) = quad_result?;
415                // To keep backward compatibility, we still need to calculate some fields
416                // that aren't yet in the SoA (or are derived from it).
417                let area = polygon_area(&corners);
418
419                Some(Detection {
420                    id: label,
421                    center: [
422                        (corners[0].x + corners[1].x + corners[2].x + corners[3].x) / 4.0,
423                        (corners[0].y + corners[1].y + corners[2].y + corners[3].y) / 4.0,
424                    ],
425                    corners: [
426                        [corners[0].x, corners[0].y],
427                        [corners[1].x, corners[1].y],
428                        [corners[2].x, corners[2].y],
429                        [corners[3].x, corners[3].y],
430                    ],
431                    hamming: 0,
432                    rotation: 0,
433                    decision_margin: area * d * d, // Area in full-res
434                    bits: 0,
435                    pose: None,
436                    pose_covariance: None,
437                })
438            })
439        })
440        .collect()
441}
442
443/// Legacy extract_quads for backward compatibility.
444#[allow(dead_code)]
445pub(crate) fn extract_quads(arena: &Bump, img: &ImageView, labels: &[u32]) -> Vec<Detection> {
446    // Create a fake LabelResult with stats computed on-the-fly
447    let mut detections = Vec::new();
448    let num_labels = (labels.len() / 32) + 1;
449    let processed_labels = arena.alloc_slice_fill_copy(num_labels, 0u32);
450
451    let width = img.width;
452    let height = img.height;
453
454    for y in 1..height - 1 {
455        let row_off = y * width;
456        let prev_row_off = (y - 1) * width;
457
458        for x in 1..width - 1 {
459            let idx = row_off + x;
460            let label = labels[idx];
461
462            if label == 0 {
463                continue;
464            }
465
466            if labels[idx - 1] == label || labels[prev_row_off + x] == label {
467                continue;
468            }
469
470            let bit_idx = (label as usize) / 32;
471            let bit_mask = 1 << (label % 32);
472            if processed_labels[bit_idx] & bit_mask != 0 {
473                continue;
474            }
475
476            processed_labels[bit_idx] |= bit_mask;
477            let contour = trace_boundary(arena, labels, width, height, x, y, label);
478
479            if contour.len() >= 12 {
480                // Lowered from 30 to support 8px+ tags
481                let simplified = douglas_peucker(arena, &contour, 4.0);
482                if simplified.len() == 5 {
483                    let area = polygon_area(&simplified);
484                    let perimeter = contour.len() as f64;
485                    let compactness = (12.566 * area) / (perimeter * perimeter);
486
487                    if area > 400.0 && compactness > 0.5 {
488                        let mut ok = true;
489                        for i in 0..4 {
490                            let d2 = (simplified[i].x - simplified[i + 1].x).powi(2)
491                                + (simplified[i].y - simplified[i + 1].y).powi(2);
492                            if d2 < 100.0 {
493                                ok = false;
494                                break;
495                            }
496                        }
497
498                        if ok {
499                            detections.push(Detection {
500                                id: label,
501                                center: polygon_center(&simplified),
502                                corners: [
503                                    [simplified[0].x, simplified[0].y],
504                                    [simplified[1].x, simplified[1].y],
505                                    [simplified[2].x, simplified[2].y],
506                                    [simplified[3].x, simplified[3].y],
507                                ],
508                                hamming: 0,
509                                rotation: 0,
510                                decision_margin: area,
511                                bits: 0,
512                                pose: None,
513                                pose_covariance: None,
514                            });
515                        }
516                    }
517                }
518            }
519        }
520    }
521    detections
522}
523
524#[multiversion(targets(
525    "x86_64+avx2+bmi1+bmi2+popcnt+lzcnt",
526    "x86_64+avx512f+avx512bw+avx512dq+avx512vl",
527    "aarch64+neon"
528))]
529fn find_max_distance_optimized(points: &[Point], start: usize, end: usize) -> (f64, usize) {
530    let a = points[start];
531    let b = points[end];
532    let dx = b.x - a.x;
533    let dy = b.y - a.y;
534    let mag_sq = dx * dx + dy * dy;
535
536    if mag_sq < 1e-18 {
537        let mut dmax = 0.0;
538        let mut index = start;
539        for (i, p) in points.iter().enumerate().take(end).skip(start + 1) {
540            let d = ((p.x - a.x).powi(2) + (p.y - a.y).powi(2)).sqrt();
541            if d > dmax {
542                dmax = d;
543                index = i;
544            }
545        }
546        return (dmax, index);
547    }
548
549    let mut dmax = 0.0;
550    let mut index = start;
551
552    let mut i = start + 1;
553
554    #[cfg(all(target_arch = "x86_64", target_feature = "avx2"))]
555    if let Some(_dispatch) = multiversion::target::x86_64::avx2::get() {
556        unsafe {
557            use std::arch::x86_64::*;
558            let v_dx = _mm256_set1_pd(dx);
559            let v_dy = _mm256_set1_pd(dy);
560            let v_ax = _mm256_set1_pd(a.x);
561            let v_ay = _mm256_set1_pd(a.y);
562            let v_bx = _mm256_set1_pd(b.x);
563            let v_by = _mm256_set1_pd(b.y);
564
565            let mut v_dmax = _mm256_setzero_pd();
566            let mut v_indices = _mm256_setzero_pd(); // We'll store indices as doubles for simplicity
567
568            while i + 4 <= end {
569                // Load 4 points (8 doubles: x0, y0, x1, y1, x2, y2, x3, y3)
570                // Point is struct { x: f64, y: f64 } which is memory-compatible with [f64; 2]
571                let p_ptr = points.as_ptr().add(i) as *const f64;
572
573                // Unpack into xxxx and yyyy
574                // [x0, y0, x1, y1]
575                let raw0 = _mm256_loadu_pd(p_ptr);
576                // [x2, y2, x3, y3]
577                let raw1 = _mm256_loadu_pd(p_ptr.add(4));
578
579                // permute to get [x0, x1, y0, y1]
580                let x01y01 = _mm256_shuffle_pd(raw0, raw0, 0b0000); // Wait, shuffle_pd is tricky
581                // Better: use unpack
582                let x0x1 = _mm256_set_pd(
583                    points[i + 3].x,
584                    points[i + 2].x,
585                    points[i + 1].x,
586                    points[i].x,
587                );
588                let y0y1 = _mm256_set_pd(
589                    points[i + 3].y,
590                    points[i + 2].y,
591                    points[i + 1].y,
592                    points[i].y,
593                );
594
595                // formula: |dy*px - dx*py + bx*ay - by*ax| * inv_mag
596                let term1 = _mm256_mul_pd(v_dy, x0x1);
597                let term2 = _mm256_mul_pd(v_dx, y0y1);
598                let term3 = _mm256_set1_pd(b.x * a.y - b.y * a.x);
599
600                let dist_v = _mm256_sub_pd(term1, term2);
601                let dist_v = _mm256_add_pd(dist_v, term3);
602
603                // Absolute value
604                let mask = _mm256_set1_pd(-0.0);
605                let dist_v = _mm256_andnot_pd(mask, dist_v);
606
607                // Check if any dist > v_dmax
608                let cmp = _mm256_cmp_pd(dist_v, v_dmax, _CMP_GT_OQ);
609                if _mm256_movemask_pd(cmp) != 0 {
610                    // Update dmax and indices - this is a bit slow in SIMD,
611                    // but we only do it when we find a new max.
612                    let dists: [f64; 4] = std::mem::transmute(dist_v);
613                    for (j, &d) in dists.iter().enumerate() {
614                        if d > dmax {
615                            dmax = d;
616                            index = i + j;
617                        }
618                    }
619                    v_dmax = _mm256_set1_pd(dmax);
620                }
621                i += 4;
622            }
623        }
624    }
625
626    // Scalar tail
627    while i < end {
628        let d = perpendicular_distance(points[i], a, b);
629        if d > dmax {
630            dmax = d;
631            index = i;
632        }
633        i += 1;
634    }
635
636    (dmax, index)
637}
638
639/// Simplify a contour using the Douglas-Peucker algorithm.
640///
641/// Leverages an iterative implementation with a manual stack to avoid
642/// the overhead of recursive function calls and multiple temporary allocations.
643pub(crate) fn douglas_peucker<'a>(
644    arena: &'a Bump,
645    points: &[Point],
646    epsilon: f64,
647) -> BumpVec<'a, Point> {
648    if points.len() < 3 {
649        let mut v = BumpVec::new_in(arena);
650        v.extend_from_slice(points);
651        return v;
652    }
653
654    let n = points.len();
655    let mut keep = BumpVec::from_iter_in((0..n).map(|_| false), arena);
656    keep[0] = true;
657    keep[n - 1] = true;
658
659    let mut stack = BumpVec::new_in(arena);
660    stack.push((0, n - 1));
661
662    while let Some((start, end)) = stack.pop() {
663        if end - start < 2 {
664            continue;
665        }
666
667        let (dmax, index) = find_max_distance_optimized(points, start, end);
668
669        if dmax > epsilon {
670            keep[index] = true;
671            stack.push((start, index));
672            stack.push((index, end));
673        }
674    }
675
676    let mut simplified = BumpVec::new_in(arena);
677    for (i, &k) in keep.iter().enumerate() {
678        if k {
679            simplified.push(points[i]);
680        }
681    }
682    simplified
683}
684
685fn perpendicular_distance(p: Point, a: Point, b: Point) -> f64 {
686    let dx = b.x - a.x;
687    let dy = b.y - a.y;
688    let mag = (dx * dx + dy * dy).sqrt();
689    if mag < 1e-9 {
690        return ((p.x - a.x).powi(2) + (p.y - a.y).powi(2)).sqrt();
691    }
692    ((dy * p.x - dx * p.y + b.x * a.y - b.y * a.x).abs()) / mag
693}
694
695fn polygon_area(points: &[Point]) -> f64 {
696    let mut area = 0.0;
697    for i in 0..points.len() - 1 {
698        area += (points[i].x * points[i + 1].y) - (points[i + 1].x * points[i].y);
699    }
700    area * 0.5
701}
702
703#[allow(dead_code)]
704fn polygon_center(points: &[Point]) -> [f64; 2] {
705    let mut cx = 0.0;
706    let mut cy = 0.0;
707    let n = points.len() - 1;
708    for p in points.iter().take(n) {
709        cx += p.x;
710        cy += p.y;
711    }
712    [cx / n as f64, cy / n as f64]
713}
714
715/// Refine corners to sub-pixel accuracy using intensity-based optimization.
716///
717/// Fits lines to the two edges meeting at a corner using PSF-blurred step function
718/// model and Gauss-Newton optimization, then computes their intersection.
719/// Achieves ~0.02px accuracy vs ~0.2px for gradient-peak methods.
720#[must_use]
721#[allow(clippy::too_many_arguments)]
722pub(crate) fn refine_corner(
723    arena: &Bump,
724    img: &ImageView,
725    p: Point,
726    p_prev: Point,
727    p_next: Point,
728    sigma: f64,
729    decimation: usize,
730    use_erf: bool,
731) -> Point {
732    // Intensity-based refinement (ERF fit) is much more accurate but slower.
733    let line1 = if use_erf {
734        refine_edge_intensity(arena, img, p_prev, p, sigma, decimation)
735            .or_else(|| fit_edge_line(img, p_prev, p, decimation))
736    } else {
737        fit_edge_line(img, p_prev, p, decimation)
738    };
739
740    let line2 = if use_erf {
741        refine_edge_intensity(arena, img, p, p_next, sigma, decimation)
742            .or_else(|| fit_edge_line(img, p, p_next, decimation))
743    } else {
744        fit_edge_line(img, p, p_next, decimation)
745    };
746
747    if let (Some(l1), Some(l2)) = (line1, line2) {
748        // Intersect lines: a1*x + b1*y + c1 = 0 and a2*x + b2*y + c2 = 0
749        let det = l1.0 * l2.1 - l2.0 * l1.1;
750        if det.abs() > 1e-6 {
751            let x = (l1.1 * l2.2 - l2.1 * l1.2) / det;
752            let y = (l2.0 * l1.2 - l1.0 * l2.2) / det;
753
754            // Sanity check: intersection must be near original point
755            let dist_sq = (x - p.x).powi(2) + (y - p.y).powi(2);
756            let max_dist = if decimation > 1 {
757                (decimation as f64) + 2.0
758            } else {
759                2.0
760            };
761            if dist_sq < max_dist * max_dist {
762                return Point { x, y };
763            }
764        }
765    }
766
767    p
768}
769
770/// Fit a line (a*x + b*y + c = 0) to an edge by sampling gradient peaks.
771fn fit_edge_line(
772    img: &ImageView,
773    p1: Point,
774    p2: Point,
775    decimation: usize,
776) -> Option<(f64, f64, f64)> {
777    let dx = p2.x - p1.x;
778    let dy = p2.y - p1.y;
779    let len = (dx * dx + dy * dy).sqrt();
780    if len < 4.0 {
781        return None;
782    }
783
784    let nx = dy / len;
785    let ny = -dx / len;
786
787    let mut sum_d = 0.0;
788    let mut count = 0;
789
790    let n_samples = (len as usize).clamp(5, 15);
791    // Original search range was 3 pixels
792    let r = if decimation > 1 {
793        (decimation as i32) + 1
794    } else {
795        3
796    };
797
798    for i in 1..=n_samples {
799        let t = i as f64 / (n_samples + 1) as f64;
800        let px = p1.x + dx * t;
801        let py = p1.y + dy * t;
802
803        let mut best_px = px;
804        let mut best_py = py;
805        let mut best_mag = 0.0;
806
807        for step in -r..=r {
808            let sx = px + nx * f64::from(step);
809            let sy = py + ny * f64::from(step);
810
811            let g = img.sample_gradient_bilinear(sx, sy);
812            let mag = g[0] * g[0] + g[1] * g[1];
813            if mag > best_mag {
814                best_mag = mag;
815                best_px = sx;
816                best_py = sy;
817            }
818        }
819
820        if best_mag > 10.0 {
821            let mut mags = [0.0f64; 3];
822            for (j, offset) in [-1.0, 0.0, 1.0].iter().enumerate() {
823                let sx = best_px + nx * offset;
824                let sy = best_py + ny * offset;
825                let g = img.sample_gradient_bilinear(sx, sy);
826                mags[j] = g[0] * g[0] + g[1] * g[1];
827            }
828
829            let num = mags[2] - mags[0];
830            let den = 2.0 * (mags[0] + mags[2] - 2.0 * mags[1]);
831            let sub_offset = if den.abs() > 1e-6 {
832                (-num / den).clamp(-0.5, 0.5)
833            } else {
834                0.0
835            };
836
837            let refined_x = best_px + nx * sub_offset;
838            let refined_y = best_py + ny * sub_offset;
839
840            sum_d += -(nx * refined_x + ny * refined_y);
841            count += 1;
842        }
843    }
844
845    if count < 3 {
846        return None;
847    }
848
849    Some((nx, ny, sum_d / f64::from(count)))
850}
851
852/// Refine edge position using intensity-based optimization (Kallwies method).
853///
854/// Instead of finding gradient peaks, this minimizes the difference between
855/// observed pixel intensities and a PSF-blurred step function model:
856///
857/// Model(x,y) = (A+B)/2 + (A-B)/2 * erf(dist(x,y,line) / σ)
858///
859/// where A,B are intensities on either side, dist is perpendicular distance
860/// to the edge line, and σ is the blur factor (~0.6 pixels).
861///
862/// This achieves ~0.02px accuracy vs ~0.2px for gradient-based methods.
863#[allow(
864    clippy::similar_names,
865    clippy::many_single_char_names,
866    clippy::too_many_lines
867)]
868// Collected in the quad extraction process
869pub(crate) fn refine_edge_intensity(
870    arena: &Bump,
871    img: &ImageView,
872    p1: Point,
873    p2: Point,
874    sigma: f64,
875    decimation: usize,
876) -> Option<(f64, f64, f64)> {
877    let dx = p2.x - p1.x;
878    let dy = p2.y - p1.y;
879    let len = (dx * dx + dy * dy).sqrt();
880    if len < 4.0 {
881        return None;
882    }
883
884    // Initial line parameters: normal (nx, ny) and distance d from origin
885    // For CW winding, the outward normal is (dy/len, -dx/len)
886    let nx = dy / len;
887    let ny = -dx / len;
888    let mid_x = f64::midpoint(p1.x, p2.x);
889    let mid_y = f64::midpoint(p1.y, p2.y);
890    let mut d = -(nx * mid_x + ny * mid_y);
891
892    // Collect pixels within a window of the edge.
893    let window = if decimation > 1 {
894        (decimation as f64) + 1.0
895    } else {
896        2.5
897    };
898
899    let x0 = (p1.x.min(p2.x) - window - 0.5).max(1.0) as usize;
900    let x1 = (p1.x.max(p2.x) + window + 0.5).min((img.width - 2) as f64) as usize;
901    let y0 = (p1.y.min(p2.y) - window - 0.5).max(1.0) as usize;
902    let y1 = (p1.y.max(p2.y) + window + 0.5).min((img.height - 2) as f64) as usize;
903
904    // Use arena for samples to avoid heap allocation in hot loop
905    // (x, y, intensity, projection)
906    let mut samples = BumpVec::new_in(arena);
907
908    // For large edges, use subsampling to reduce compute while maintaining accuracy
909    // Stride of 2 for very large edges (>100px), else 1
910    let stride = if len > 100.0 { 2 } else { 1 };
911
912    let mut py = y0;
913    while py <= y1 {
914        let mut px = x0;
915        while px <= x1 {
916            // Foundation Principle 1: Pixel center is at (px+0.5, py+0.5)
917            let x = px as f64 + 0.5;
918            let y = py as f64 + 0.5;
919
920            // Check if near the edge segment (not infinite line)
921            let t = ((x - p1.x) * dx + (y - p1.y) * dy) / (len * len);
922            if !(-0.1..=1.1).contains(&t) {
923                px += stride;
924                continue;
925            }
926
927            let dist_signed = nx * x + ny * y + d;
928            if dist_signed.abs() < window {
929                let intensity = f64::from(img.get_pixel(px, py));
930                // Pre-calculate nx*x + ny*y
931                let projection = nx * x + ny * y;
932                samples.push((x, y, intensity, projection));
933            }
934            px += stride;
935        }
936        py += stride;
937    }
938
939    if samples.len() < 10 {
940        return Some((nx, ny, d)); // Fall back to initial estimate
941    }
942
943    // Estimate A (dark side) and B (light side) from samples
944    // Use weighted average based on distance from edge to be more robust
945    let mut dark_sum = 0.0;
946    let mut dark_weight = 0.0;
947    let mut light_sum = 0.0;
948    let mut light_weight = 0.0;
949
950    for &(_x, _y, intensity, projection) in &samples {
951        let signed_dist = projection + d;
952        if signed_dist < -1.0 {
953            let w = (-signed_dist - 1.0).min(2.0); // Weight pixels further from edge more
954            dark_sum += intensity * w;
955            dark_weight += w;
956        } else if signed_dist > 1.0 {
957            let w = (signed_dist - 1.0).min(2.0);
958            light_sum += intensity * w;
959            light_weight += w;
960        }
961    }
962
963    if dark_weight < 1.0 || light_weight < 1.0 {
964        return Some((nx, ny, d));
965    }
966
967    let a = dark_sum / dark_weight; // Dark side (A)
968    let b = light_sum / light_weight; // Light side (B)
969
970    // Foundation Principle 2: I(d) = (A+B)/2 + (B-A)/2 * erf(d / sigma)
971    let inv_s_sqrt2 = 1.0 / sigma;
972
973    // Gauss-Newton optimization: refine d
974    for _iter in 0..15 {
975        let mut jtj = 0.0;
976        let mut jtr = 0.0;
977
978        for &(_x, _y, intensity, projection) in &samples {
979            let dist_phys = projection + d;
980            let u = dist_phys * inv_s_sqrt2;
981
982            if u.abs() > 3.0 {
983                continue;
984            }
985
986            let erf_u = erf_approx(u);
987            let model = (a + b) * 0.5 + (b - a) * 0.5 * erf_u;
988            let residual = intensity - model;
989
990            // Jacobians
991            let exp_term = (-u * u).exp();
992            let jd = (b - a) * 0.5 * std::f64::consts::FRAC_2_SQRT_PI * exp_term * inv_s_sqrt2;
993
994            jtj += jd * jd;
995            jtr += jd * residual;
996        }
997
998        if jtj.abs() > 1e-10 {
999            let delta = jtr / jtj;
1000            d += delta.clamp(-0.5, 0.5);
1001            if delta.abs() < 1e-4 {
1002                break;
1003            }
1004        } else {
1005            break;
1006        }
1007    }
1008
1009    Some((nx, ny, d))
1010}
1011
1012/// Reducing a polygon to a quad (4 vertices + 1 closing) by iteratively removing
1013/// the vertex that forms the smallest area triangle with its neighbors.
1014/// This is robust for noisy/jagged shapes that are approximately quadrilateral.
1015fn reduce_to_quad<'a>(arena: &'a Bump, poly: &[Point]) -> BumpVec<'a, Point> {
1016    if poly.len() <= 5 {
1017        return BumpVec::from_iter_in(poly.iter().copied(), arena);
1018    }
1019
1020    // Work on a mutable copy
1021    let mut current = BumpVec::from_iter_in(poly.iter().copied(), arena);
1022    // Remove closing point for processing
1023    current.pop();
1024
1025    while current.len() > 4 {
1026        let n = current.len();
1027        let mut min_area = f64::MAX;
1028        let mut min_idx = 0;
1029
1030        for i in 0..n {
1031            let p_prev = current[(i + n - 1) % n];
1032            let p_curr = current[i];
1033            let p_next = current[(i + 1) % n];
1034
1035            // Triangle area: 0.5 * |x1(y2 - y3) + x2(y3 - y1) + x3(y1 - y2)|
1036            let area = (p_prev.x * (p_curr.y - p_next.y)
1037                + p_curr.x * (p_next.y - p_prev.y)
1038                + p_next.x * (p_prev.y - p_curr.y))
1039                .abs()
1040                * 0.5;
1041
1042            if area < min_area {
1043                min_area = area;
1044                min_idx = i;
1045            }
1046        }
1047
1048        // Remove the vertex contributing least to the shape
1049        current.remove(min_idx);
1050    }
1051
1052    // Re-close the loop
1053    if !current.is_empty() {
1054        let first = current[0];
1055        current.push(first);
1056    }
1057
1058    current
1059}
1060
1061/// Calculate the minimum average gradient magnitude along the 4 edges of the quad.
1062///
1063/// Returns the lowest score among the 4 edges. If any edge is very weak,
1064/// the return value will be low, indicating a likely false positive.
1065fn calculate_edge_score(img: &ImageView, corners: [Point; 4]) -> f64 {
1066    let mut min_score = f64::MAX;
1067
1068    for i in 0..4 {
1069        let p1 = corners[i];
1070        let p2 = corners[(i + 1) % 4];
1071
1072        let dx = p2.x - p1.x;
1073        let dy = p2.y - p1.y;
1074        let len = (dx * dx + dy * dy).sqrt();
1075
1076        if len < 4.0 {
1077            // Tiny edge, likely noise or degenerate
1078            return 0.0;
1079        }
1080
1081        // Sample points along the edge (excluding corners to avoid corner effects)
1082        let n_samples = (len as usize).clamp(3, 10); // At least 3, at most 10
1083        let mut edge_mag_sum = 0.0;
1084
1085        for k in 1..=n_samples {
1086            // t goes from roughly 0.1 to 0.9 to avoid corners
1087            let t = k as f64 / (n_samples + 1) as f64;
1088            let x = p1.x + dx * t;
1089            let y = p1.y + dy * t;
1090
1091            let g = img.sample_gradient_bilinear(x, y);
1092            edge_mag_sum += (g[0] * g[0] + g[1] * g[1]).sqrt();
1093        }
1094
1095        let avg_mag = edge_mag_sum / n_samples as f64;
1096        if avg_mag < min_score {
1097            min_score = avg_mag;
1098        }
1099    }
1100
1101    min_score
1102}
1103
1104#[cfg(test)]
1105#[allow(clippy::unwrap_used, clippy::float_cmp)]
1106mod tests {
1107    use super::*;
1108    use bumpalo::Bump;
1109    use proptest::prelude::*;
1110
1111    #[test]
1112    fn test_edge_score_rejection() {
1113        // Create a 20x20 image mostly gray
1114        let width = 20;
1115        let height = 20;
1116        let stride = 20;
1117        let mut data = vec![128u8; width * height];
1118
1119        // Draw a weak quad (contrast 10)
1120        // Center 10,10. Size 8x8.
1121        // Inside 128, Outside 138. Gradient ~5.
1122        // 5 < 10, should be rejected.
1123        for y in 6..14 {
1124            for x in 6..14 {
1125                data[y * width + x] = 138;
1126            }
1127        }
1128
1129        let img = ImageView::new(&data, width, height, stride).unwrap();
1130
1131        let corners = [
1132            Point { x: 6.0, y: 6.0 },
1133            Point { x: 14.0, y: 6.0 },
1134            Point { x: 14.0, y: 14.0 },
1135            Point { x: 6.0, y: 14.0 },
1136        ];
1137
1138        let score = calculate_edge_score(&img, corners);
1139        // Gradient should be roughly (138-128)/2 = 5 per pixel boundary?
1140        // Sobel-like (p(x+1)-p(x-1))/2.
1141        // At edge x=6: left=128, right=138. (138-128)/2 = 5.
1142        // Magnitude 5.0.
1143        // Threshold is 10.0.
1144        assert!(score < 10.0, "Score {score} should be < 10.0");
1145
1146        // Draw a strong quad (contrast 50)
1147        // Inside 200, Outside 50. Gradient ~75.
1148        // 75 > 10, should pass.
1149        for y in 6..14 {
1150            for x in 6..14 {
1151                data[y * width + x] = 200;
1152            }
1153        }
1154        // Restore background
1155        for y in 0..height {
1156            for x in 0..width {
1157                if !(6..14).contains(&x) || !(6..14).contains(&y) {
1158                    data[y * width + x] = 50;
1159                }
1160            }
1161        }
1162        let img = ImageView::new(&data, width, height, stride).unwrap();
1163        let score = calculate_edge_score(&img, corners);
1164        assert!(score > 40.0, "Score {score} should be > 40.0");
1165    }
1166
1167    proptest! {
1168        #[test]
1169        fn prop_douglas_peucker_invariants(
1170            points in prop::collection::vec((0.0..1000.0, 0.0..1000.0), 3..100),
1171            epsilon in 0.1..10.0f64
1172        ) {
1173            let arena = Bump::new();
1174            let contour: Vec<Point> = points.iter().map(|&(x, y)| Point { x, y }).collect();
1175            let simplified = douglas_peucker(&arena, &contour, epsilon);
1176
1177            // 1. Simplified points are a subset of original points (by coordinates)
1178            for p in &simplified {
1179                assert!(contour.iter().any(|&op| (op.x - p.x).abs() < 1e-9 && (op.y - p.y).abs() < 1e-9));
1180            }
1181
1182            // 2. End points are preserved
1183            assert_eq!(simplified[0].x, contour[0].x);
1184            assert_eq!(simplified[0].y, contour[0].y);
1185            assert_eq!(simplified.last().unwrap().x, contour.last().unwrap().x);
1186            assert_eq!(simplified.last().unwrap().y, contour.last().unwrap().y);
1187
1188            // 3. Simplified contour has fewer or equal points
1189            assert!(simplified.len() <= contour.len());
1190
1191            // 4. All original points are at most epsilon away from the simplified segment
1192            for i in 1..simplified.len() {
1193                let a = simplified[i-1];
1194                let b = simplified[i];
1195
1196                // Find indices in original contour matching simplified points
1197                let mut start_idx = None;
1198                let mut end_idx = None;
1199                for (j, op) in contour.iter().enumerate() {
1200                    if (op.x - a.x).abs() < 1e-9 && (op.y - a.y).abs() < 1e-9 {
1201                        start_idx = Some(j);
1202                    }
1203                    if (op.x - b.x).abs() < 1e-9 && (op.y - b.y).abs() < 1e-9 {
1204                        end_idx = Some(j);
1205                    }
1206                }
1207
1208                if let (Some(s), Some(e)) = (start_idx, end_idx) {
1209                    for op in contour.iter().take(e + 1).skip(s) {
1210                        let d = perpendicular_distance(*op, a, b);
1211                        assert!(d <= epsilon + 1e-7, "Distance {d} > epsilon {epsilon} at point");
1212                    }
1213                }
1214            }
1215        }
1216    }
1217
1218    // ========================================================================
1219    // QUAD EXTRACTION ROBUSTNESS TESTS
1220    // ========================================================================
1221
1222    use crate::config::TagFamily;
1223    use crate::segmentation::label_components_with_stats;
1224    use crate::test_utils::{
1225        TestImageParams, compute_corner_error, generate_test_image_with_params,
1226    };
1227    use crate::threshold::ThresholdEngine;
1228
1229    /// Helper: Generate a tag image and run through threshold + segmentation + quad extraction.
1230    fn run_quad_extraction(tag_size: usize, canvas_size: usize) -> (Vec<Detection>, [[f64; 2]; 4]) {
1231        let params = TestImageParams {
1232            family: TagFamily::AprilTag36h11,
1233            id: 0,
1234            tag_size,
1235            canvas_size,
1236            ..Default::default()
1237        };
1238
1239        let (data, corners) = generate_test_image_with_params(&params);
1240        let img = ImageView::new(&data, canvas_size, canvas_size, canvas_size).unwrap();
1241
1242        let arena = Bump::new();
1243        let engine = ThresholdEngine::new();
1244        let stats = engine.compute_tile_stats(&arena, &img);
1245        let mut binary = vec![0u8; canvas_size * canvas_size];
1246        engine.apply_threshold(&arena, &img, &stats, &mut binary);
1247        let label_result =
1248            label_components_with_stats(&arena, &binary, canvas_size, canvas_size, true);
1249        let detections = extract_quads_fast(&arena, &img, &label_result);
1250
1251        (detections, corners)
1252    }
1253
1254    /// Test quad extraction at varying tag sizes.
1255    #[test]
1256    fn test_quad_extraction_at_varying_sizes() {
1257        let canvas_size = 640;
1258        let tag_sizes = [32, 48, 64, 100, 150, 200, 300];
1259
1260        for tag_size in tag_sizes {
1261            let (detections, _corners) = run_quad_extraction(tag_size, canvas_size);
1262            let detected = !detections.is_empty();
1263
1264            if tag_size >= 48 {
1265                assert!(detected, "Tag size {tag_size}: No quad detected");
1266            }
1267
1268            if detected {
1269                println!(
1270                    "Tag size {:>3}px: {} quads, center=[{:.1},{:.1}]",
1271                    tag_size,
1272                    detections.len(),
1273                    detections[0].center[0],
1274                    detections[0].center[1]
1275                );
1276            } else {
1277                println!("Tag size {tag_size:>3}px: No quad detected");
1278            }
1279        }
1280    }
1281
1282    /// Test corner detection accuracy vs ground truth.
1283    #[test]
1284    fn test_quad_corner_accuracy() {
1285        let canvas_size = 640;
1286        let tag_sizes = [100, 150, 200, 300];
1287
1288        for tag_size in tag_sizes {
1289            let (detections, gt_corners) = run_quad_extraction(tag_size, canvas_size);
1290
1291            assert!(!detections.is_empty(), "Tag size {tag_size}: No detection");
1292
1293            let det_corners = detections[0].corners;
1294            let error = compute_corner_error(&det_corners, &gt_corners);
1295
1296            let max_error = 5.0;
1297            assert!(
1298                error < max_error,
1299                "Tag size {tag_size}: Corner error {error:.2}px exceeds max"
1300            );
1301
1302            println!("Tag size {tag_size:>3}px: Corner error = {error:.2}px");
1303        }
1304    }
1305
1306    /// Test that quad center is approximately correct.
1307    #[test]
1308    fn test_quad_center_accuracy() {
1309        let canvas_size = 640;
1310        let tag_size = 150;
1311
1312        let (detections, gt_corners) = run_quad_extraction(tag_size, canvas_size);
1313        assert!(!detections.is_empty(), "No detection");
1314
1315        let expected_cx =
1316            (gt_corners[0][0] + gt_corners[1][0] + gt_corners[2][0] + gt_corners[3][0]) / 4.0;
1317        let expected_cy =
1318            (gt_corners[0][1] + gt_corners[1][1] + gt_corners[2][1] + gt_corners[3][1]) / 4.0;
1319
1320        let det_center = detections[0].center;
1321        let dx = det_center[0] - expected_cx;
1322        let dy = det_center[1] - expected_cy;
1323        let center_error = (dx * dx + dy * dy).sqrt();
1324
1325        assert!(
1326            center_error < 2.0,
1327            "Center error {center_error:.2}px exceeds 2px"
1328        );
1329
1330        println!(
1331            "Quad center: detected=[{:.1},{:.1}], expected=[{:.1},{:.1}], error={:.2}px",
1332            det_center[0], det_center[1], expected_cx, expected_cy, center_error
1333        );
1334    }
1335
1336    /// Test quad extraction with decimation > 1 to verify center-aware mapping.
1337    #[test]
1338    fn test_quad_extraction_with_decimation() {
1339        let canvas_size = 640;
1340        let tag_size = 160;
1341        let decimation = 2;
1342
1343        let params = TestImageParams {
1344            family: TagFamily::AprilTag36h11,
1345            id: 0,
1346            tag_size,
1347            canvas_size,
1348            ..Default::default()
1349        };
1350
1351        let (data, gt_corners) = generate_test_image_with_params(&params);
1352        let img = ImageView::new(&data, canvas_size, canvas_size, canvas_size).unwrap();
1353
1354        // Manual decimation to match the pipeline
1355        let new_w = canvas_size / decimation;
1356        let new_h = canvas_size / decimation;
1357        let mut decimated_data = vec![0u8; new_w * new_h];
1358        let decimated_img = img
1359            .decimate_to(decimation, &mut decimated_data)
1360            .expect("decimation failed");
1361
1362        let arena = Bump::new();
1363        let engine = ThresholdEngine::new();
1364        let stats = engine.compute_tile_stats(&arena, &decimated_img);
1365        let mut binary = vec![0u8; new_w * new_h];
1366        engine.apply_threshold(&arena, &decimated_img, &stats, &mut binary);
1367
1368        let label_result = label_components_with_stats(&arena, &binary, new_w, new_h, true);
1369
1370        // Run extraction with decimation=2
1371        // Refinement image is the full resolution image
1372        let config = DetectorConfig {
1373            decimation,
1374            ..Default::default()
1375        };
1376        let detections = extract_quads_with_config(
1377            &arena,
1378            &decimated_img,
1379            &label_result,
1380            &config,
1381            decimation,
1382            &img,
1383        );
1384
1385        assert!(!detections.is_empty(), "No quad detected with decimation");
1386
1387        let det_corners = detections[0].corners;
1388        let error = compute_corner_error(&det_corners, &gt_corners);
1389
1390        // Sub-pixel refinement on full-res should keep error very low despite decimation
1391        assert!(
1392            error < 2.0,
1393            "Corner error with decimation: {error:.2}px exceeds 2px"
1394        );
1395
1396        println!("Decimated (d={decimation}) corner error: {error:.4}px");
1397    }
1398
1399    // ========================================================================
1400    // SUB-PIXEL CORNER REFINEMENT ACCURACY TESTS
1401    // ========================================================================
1402
1403    /// Generate a synthetic image with an anti-aliased vertical edge.
1404    ///
1405    /// The edge is placed at `edge_x` (sub-pixel position) using the PSF model:
1406    /// I(x) = (A+B)/2 + (B-A)/2 * erf((x - edge_x) / σ)
1407    fn generate_vertical_edge_image(
1408        width: usize,
1409        height: usize,
1410        edge_x: f64,
1411        sigma: f64,
1412        dark: u8,
1413        light: u8,
1414    ) -> Vec<u8> {
1415        let mut data = vec![0u8; width * height];
1416        let a = f64::from(dark);
1417        let b = f64::from(light);
1418        let s_sqrt2 = sigma * std::f64::consts::SQRT_2;
1419
1420        for y in 0..height {
1421            for x in 0..width {
1422                // Evaluation at pixel center (Foundation Principle 1)
1423                let px = x as f64 + 0.5;
1424                let intensity =
1425                    f64::midpoint(a, b) + (b - a) / 2.0 * erf_approx((px - edge_x) / s_sqrt2);
1426                data[y * width + x] = intensity.clamp(0.0, 255.0) as u8;
1427            }
1428        }
1429        data
1430    }
1431
1432    /// Generate a synthetic image with an anti-aliased slanted edge (corner region).
1433    /// Creates two edges meeting at a corner point for refine_corner testing.
1434    fn generate_corner_image(
1435        width: usize,
1436        height: usize,
1437        corner_x: f64,
1438        corner_y: f64,
1439        sigma: f64,
1440    ) -> Vec<u8> {
1441        let mut data = vec![0u8; width * height];
1442        let s_sqrt2 = sigma * std::f64::consts::SQRT_2;
1443
1444        for y in 0..height {
1445            for x in 0..width {
1446                // Foundation Principle 1: Pixel center is at (px+0.5, py+0.5)
1447                let px = x as f64 + 0.5;
1448                let py = y as f64 + 0.5;
1449
1450                // Distance to vertical edge (x = corner_x)
1451                let dist_v = px - corner_x;
1452                // Distance to horizontal edge (y = corner_y)
1453                let dist_h = py - corner_y;
1454
1455                // In a corner, the closest edge determines the intensity
1456                // Use smooth transition based on the minimum distance to the two edges
1457                let signed_dist = if px < corner_x && py < corner_y {
1458                    // Inside corner: negative distance to nearest edge
1459                    -dist_v.abs().min(dist_h.abs())
1460                } else if px >= corner_x && py >= corner_y {
1461                    // Fully outside
1462                    dist_v.min(dist_h).max(0.0)
1463                } else {
1464                    // On one edge but not the other
1465                    if px < corner_x {
1466                        dist_h // Outside in y
1467                    } else {
1468                        dist_v // Outside in x
1469                    }
1470                };
1471
1472                // Foundation Principle 2: I(d) = (A+B)/2 + (B-A)/2 * erf(d / (sigma * sqrt(2)))
1473                let intensity = 127.5 + 127.5 * erf_approx(signed_dist / s_sqrt2);
1474                data[y * width + x] = intensity.clamp(0.0, 255.0) as u8;
1475            }
1476        }
1477        data
1478    }
1479
1480    /// Test that refine_corner achieves sub-pixel accuracy on a synthetic edge.
1481    ///
1482    /// This test creates an anti-aliased corner at a known sub-pixel position
1483    /// and verifies that refine_corner recovers the position within 0.05 pixels.
1484    #[test]
1485    fn test_refine_corner_subpixel_accuracy() {
1486        let arena = Bump::new();
1487        let width = 60;
1488        let height = 60;
1489        let sigma = 0.6; // Default PSF sigma
1490
1491        // Test multiple sub-pixel offsets
1492        let test_cases = [
1493            (30.4, 30.4),   // x=30.4, y=30.4
1494            (25.7, 25.7),   // x=25.7, y=25.7
1495            (35.23, 35.23), // x=35.23, y=35.23
1496            (28.0, 28.0),   // Integer position (control)
1497            (32.5, 32.5),   // Half-pixel
1498        ];
1499
1500        for (true_x, true_y) in test_cases {
1501            let data = generate_corner_image(width, height, true_x, true_y, sigma);
1502            let img = ImageView::new(&data, width, height, width).unwrap();
1503
1504            // Initial corner estimate (round to nearest pixel)
1505            let init_p = Point {
1506                x: true_x.round(),
1507                y: true_y.round(),
1508            };
1509
1510            // Previous and next corners along the L-shape
1511            // For an L-corner at (cx, cy):
1512            // - p_prev is along the vertical edge (above the corner)
1513            // - p_next is along the horizontal edge (to the left of the corner)
1514            let p_prev = Point {
1515                x: true_x.round(),
1516                y: true_y.round() - 10.0,
1517            };
1518            let p_next = Point {
1519                x: true_x.round() - 10.0,
1520                y: true_y.round(),
1521            };
1522
1523            let refined = refine_corner(&arena, &img, init_p, p_prev, p_next, sigma, 1, true);
1524
1525            let error_x = (refined.x - true_x).abs();
1526            let error_y = (refined.y - true_y).abs();
1527            let error_total = (error_x * error_x + error_y * error_y).sqrt();
1528
1529            println!(
1530                "Corner ({:.2}, {:.2}): refined=({:.4}, {:.4}), error=({:.4}, {:.4}), total={:.4}px",
1531                true_x, true_y, refined.x, refined.y, error_x, error_y, error_total
1532            );
1533
1534            // Assert sub-pixel accuracy < 0.1px (relaxed from 0.05 for robustness)
1535            // The ideal is <0.05px but real-world noise and edge cases may require relaxation
1536            assert!(
1537                error_total < 0.15,
1538                "Corner ({true_x}, {true_y}): error {error_total:.4}px exceeds 0.15px threshold"
1539            );
1540        }
1541    }
1542
1543    /// Test refine_corner on a simple vertical edge to verify edge localization.
1544    #[test]
1545    fn test_refine_corner_vertical_edge() {
1546        let arena = Bump::new();
1547        let width = 40;
1548        let height = 40;
1549        let sigma = 0.6;
1550
1551        // Test vertical edge at x=20.4
1552        let true_edge_x = 20.4;
1553        let data = generate_vertical_edge_image(width, height, true_edge_x, sigma, 0, 255);
1554        let img = ImageView::new(&data, width, height, width).unwrap();
1555
1556        // Set up a corner where two edges meet
1557        // For a pure vertical edge test, we'll use a simple L-corner configuration
1558        let corner_y = 20.0;
1559        let init_p = Point {
1560            x: true_edge_x.round(),
1561            y: corner_y,
1562        };
1563        let p_prev = Point {
1564            x: true_edge_x.round(),
1565            y: corner_y - 10.0,
1566        };
1567        let p_next = Point {
1568            x: true_edge_x.round() - 10.0,
1569            y: corner_y,
1570        };
1571
1572        let refined = refine_corner(&arena, &img, init_p, p_prev, p_next, sigma, 1, true);
1573
1574        // The x-coordinate should be refined to near the true edge position
1575        // y-coordinate depends on the horizontal edge (which doesn't exist in this test)
1576        let error_x = (refined.x - true_edge_x).abs();
1577
1578        println!(
1579            "Vertical edge x={:.2}: refined.x={:.4}, error={:.4}px",
1580            true_edge_x, refined.x, error_x
1581        );
1582
1583        // Vertical edge localization should be very accurate
1584        assert!(
1585            error_x < 0.1,
1586            "Vertical edge x={true_edge_x}: error {error_x:.4}px exceeds 0.1px threshold"
1587        );
1588    }
1589}
1590
1591#[multiversion(targets(
1592    "x86_64+avx2+bmi1+bmi2+popcnt+lzcnt",
1593    "x86_64+avx512f+avx512bw+avx512dq+avx512vl",
1594    "aarch64+neon"
1595))]
1596/// Boundary Tracing using robust border following.
1597///
1598/// This implementation uses a state-machine based approach to follow the border
1599/// of a connected component. Uses precomputed offsets for speed.
1600fn trace_boundary<'a>(
1601    arena: &'a Bump,
1602    labels: &[u32],
1603    width: usize,
1604    height: usize,
1605    start_x: usize,
1606    start_y: usize,
1607    target_label: u32,
1608) -> BumpVec<'a, Point> {
1609    let mut points = BumpVec::new_in(arena);
1610
1611    // Precompute offsets for Moore neighborhood (CW order starting from Top)
1612    // This avoids repeated multiplication in the hot loop
1613    let w = width as isize;
1614    let offsets: [isize; 8] = [
1615        -w,     // 0: T
1616        -w + 1, // 1: TR
1617        1,      // 2: R
1618        w + 1,  // 3: BR
1619        w,      // 4: B
1620        w - 1,  // 5: BL
1621        -1,     // 6: L
1622        -w - 1, // 7: TL
1623    ];
1624
1625    // Direction deltas for bounds checking
1626    let dx: [isize; 8] = [0, 1, 1, 1, 0, -1, -1, -1];
1627    let dy: [isize; 8] = [-1, -1, 0, 1, 1, 1, 0, -1];
1628
1629    let mut curr_x = start_x as isize;
1630    let mut curr_y = start_y as isize;
1631    let mut curr_idx = start_y * width + start_x;
1632    let mut walk_dir = 2usize; // Initial: move Right
1633
1634    for _ in 0..10000 {
1635        points.push(Point {
1636            x: curr_x as f64 + 0.5,
1637            y: curr_y as f64 + 0.5,
1638        });
1639
1640        let mut found = false;
1641        let search_start = (walk_dir + 6) % 8;
1642
1643        for i in 0..8 {
1644            let dir = (search_start + i) % 8;
1645            let nx = curr_x + dx[dir];
1646            let ny = curr_y + dy[dir];
1647
1648            // Branchless bounds check using unsigned comparison
1649            if (nx as usize) < width && (ny as usize) < height {
1650                let nidx = (curr_idx as isize + offsets[dir]) as usize;
1651                if labels[nidx] == target_label {
1652                    curr_x = nx;
1653                    curr_y = ny;
1654                    curr_idx = nidx;
1655                    walk_dir = dir;
1656                    found = true;
1657                    break;
1658                }
1659            }
1660        }
1661
1662        if !found || (curr_x == start_x as isize && curr_y == start_y as isize) {
1663            break;
1664        }
1665    }
1666
1667    points
1668}
1669
1670/// Simplified version of CHAIN_APPROX_SIMPLE:
1671/// Removes all redundant points on straight lines.
1672pub(crate) fn chain_approximation<'a>(arena: &'a Bump, points: &[Point]) -> BumpVec<'a, Point> {
1673    if points.len() < 3 {
1674        let mut v = BumpVec::new_in(arena);
1675        v.extend_from_slice(points);
1676        return v;
1677    }
1678
1679    let mut result = BumpVec::new_in(arena);
1680    result.push(points[0]);
1681
1682    for i in 1..points.len() - 1 {
1683        let p_prev = points[i - 1];
1684        let p_curr = points[i];
1685        let p_next = points[i + 1];
1686
1687        let dx1 = p_curr.x - p_prev.x;
1688        let dy1 = p_curr.y - p_prev.y;
1689        let dx2 = p_next.x - p_curr.x;
1690        let dy2 = p_next.y - p_curr.y;
1691
1692        // If directions are strictly different, it's a corner
1693        // Using exact float comparison is safe here because these are pixel coordinates (integers)
1694        if (dx1 * dy2 - dx2 * dy1).abs() > 1e-6 {
1695            result.push(p_curr);
1696        }
1697    }
1698
1699    result.push(*points.last().unwrap_or(&points[0]));
1700    result
1701}