locus-core 0.4.0

A high-performance fiducial marker detector for robotics.
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
//! ChAruco board saddle-point extraction and board pose estimation.
//!
//! After ArUco tags have been decoded by the main pipeline, this module
//! leverages the decoded tags as geometric priors to locate checkerboard
//! interior corners (saddle points) with sub-pixel precision and then
//! estimates the board pose via the generic [`RobustPoseSolver`].
//!
//! # Algorithm
//!
//! 1. **Deduplication pass** — collect the set of saddle IDs visible from the
//!    decoded tags (O(V), no allocation).
//! 2. **Homography prediction** — project each saddle's 3D board coordinate
//!    through the owning tag's stored canonical→image homography to obtain a
//!    sub-pixel initial estimate (typically < 1 px error).
//! 3. **Gauss-Newton refinement** — iterate the Newton step
//!    `S·δp = −∇I(p)` (where S is the local structure tensor) to drive the
//!    estimate to the true saddle position.
//! 4. **Covariance estimation** — call `compute_corner_covariance` at the
//!    refined position to get the Fisher information matrix for AW-LM.
//! 5. **Pose estimation** — build a [`PointCorrespondences`] view
//!    (`group_size = 1`) and delegate to [`RobustPoseSolver`].

#![allow(clippy::similar_names)]

use crate::batch::{DetectionBatchView, Point2f};
use crate::board::{
    BoardPose, CharucoTopology, LoRansacConfig, PointCorrespondences, RobustPoseSolver,
};
use crate::image::ImageView;
use crate::pose::{CameraIntrinsics, Pose};
use crate::pose_weighted::compute_corner_covariance;
use nalgebra::Matrix2;
use std::sync::Arc;

// ── Telemetry sub-buffer ──────────────────────────────────────────────────

/// Per-frame diagnostic record of rejected saddle candidates.
///
/// Pre-allocated once; reused across frames with zero heap allocation.
/// Only populated when [`CharucoRefiner::estimate_with_telemetry`] is called.
pub struct CharucoTelemetry {
    /// Homography-predicted 2D position of each rejected saddle (before refinement).
    pub rejected_predictions: Box<[Point2f]>,
    /// Structure tensor determinant at rejection (lower = blurrier / flatter region).
    pub rejected_determinants: Box<[f32]>,
    /// Number of valid entries in `[0..count]` on the last frame.
    pub count: usize,
}

impl CharucoTelemetry {
    /// Create a telemetry buffer pre-sized for `max_saddles` rejection entries.
    #[must_use]
    pub fn new(max_saddles: usize) -> Self {
        let n = max_saddles.max(1);
        Self {
            rejected_predictions: vec![Point2f { x: 0.0, y: 0.0 }; n].into_boxed_slice(),
            rejected_determinants: vec![0.0f32; n].into_boxed_slice(),
            count: 0,
        }
    }

    /// Slice of rejected saddle predictions for this frame.
    #[inline]
    #[must_use]
    pub fn rejected_predictions(&self) -> &[Point2f] {
        &self.rejected_predictions[..self.count]
    }

    /// Slice of rejection determinants for this frame.
    #[inline]
    #[must_use]
    pub fn rejected_determinants(&self) -> &[f32] {
        &self.rejected_determinants[..self.count]
    }
}

// ── Public output buffer ───────────────────────────────────────────────────

/// Pre-allocated output buffer for a single [`CharucoRefiner::estimate`] call.
///
/// Allocate once (via [`CharucoRefiner::new_batch`] or [`CharucoBatch::new`])
/// and reuse across frames — `estimate()` performs zero heap allocations.
///
/// After each call the valid results occupy `[0..count]` in each array.
pub struct CharucoBatch {
    /// Indices into [`CharucoTopology::saddle_points`] for each accepted saddle.
    pub saddle_ids: Box<[usize]>,
    /// Refined 2D image coordinates for each accepted saddle.
    pub saddle_image_pts: Box<[Point2f]>,
    /// Board-frame 3D coordinates for each accepted saddle.
    pub saddle_obj_pts: Box<[[f64; 3]]>,
    /// Estimated board pose, or `None` if fewer than 4 saddle points were
    /// accepted or LO-RANSAC could not find a consensus.
    pub board_pose: Option<BoardPose>,
    /// Number of valid saddles written into `[0..count]` on the last frame.
    pub count: usize,
    /// Diagnostic rejection data; `None` unless constructed via
    /// [`CharucoBatch::with_telemetry`].
    pub telemetry: Option<Box<CharucoTelemetry>>,
}

impl CharucoBatch {
    /// Create a batch pre-sized for `max_saddles` entries (no telemetry).
    ///
    /// Pass `topology.saddle_points.len()` as `max_saddles`.
    #[must_use]
    pub fn new(max_saddles: usize) -> Self {
        let n = max_saddles.max(1);
        Self {
            saddle_ids: vec![0usize; n].into_boxed_slice(),
            saddle_image_pts: vec![Point2f { x: 0.0, y: 0.0 }; n].into_boxed_slice(),
            saddle_obj_pts: vec![[0.0f64; 3]; n].into_boxed_slice(),
            board_pose: None,
            count: 0,
            telemetry: None,
        }
    }

    /// Create a batch with the telemetry sub-buffer pre-allocated.
    ///
    /// Reuse across frames — `estimate_with_telemetry()` performs zero heap
    /// allocations and resets [`CharucoTelemetry::count`] each call.
    #[must_use]
    pub fn with_telemetry(max_saddles: usize) -> Self {
        let mut b = Self::new(max_saddles);
        b.telemetry = Some(Box::new(CharucoTelemetry::new(max_saddles)));
        b
    }

    /// Slice of accepted saddle indices for this frame.
    #[inline]
    #[must_use]
    pub fn saddle_ids(&self) -> &[usize] {
        &self.saddle_ids[..self.count]
    }

    /// Slice of refined 2D image coordinates for this frame.
    #[inline]
    #[must_use]
    pub fn saddle_image_pts(&self) -> &[Point2f] {
        &self.saddle_image_pts[..self.count]
    }

    /// Slice of board-frame 3D coordinates for this frame.
    #[inline]
    #[must_use]
    pub fn saddle_obj_pts(&self) -> &[[f64; 3]] {
        &self.saddle_obj_pts[..self.count]
    }
}

// ── ChAruco refiner ────────────────────────────────────────────────────────

/// Stateless pipeline branch that extracts ChAruco saddle points from a
/// [`DetectionBatchView`] and estimates the board pose.
///
/// All scratch buffers are pre-allocated in [`CharucoRefiner::new`];
/// [`CharucoRefiner::estimate`] performs **zero heap allocations**.
pub struct CharucoRefiner {
    /// Board layout with saddle topology.
    ///
    /// Stored in an [`Arc`] so the same topology can be shared across frames or
    /// threads without cloning the marker/saddle tables.
    pub config: Arc<CharucoTopology>,
    /// Underlying robust pose solver (LO-RANSAC + AW-LM).
    pub solver: RobustPoseSolver,
    // Pre-allocated scratch (one-time Box allocations in new()):
    scratch_img: Box<[Point2f]>,
    scratch_obj: Box<[[f64; 3]]>,
    scratch_info: Box<[Matrix2<f64>]>,
    scratch_seeds: Box<[Option<Pose>]>,
    /// `scratch_seen[id]` is `true` when saddle `id` has already been claimed
    /// by a tag in the current frame.  Reset selectively each frame.
    scratch_seen: Box<[bool]>,
    /// Saddle IDs written in the same order as `scratch_img`/`scratch_obj`.
    scratch_saddle_ids: Box<[usize]>,
    /// Indices of saddles touched this frame (for selective `scratch_seen` reset).
    scratch_touched: Box<[usize]>,
}

impl CharucoRefiner {
    /// Gauss-Newton iteration limit for saddle refinement.
    const MAX_ITERS: u32 = 5;
    /// Maximum allowed drift (pixels) between the predicted and refined position.
    const MAX_DRIFT_PX: f64 = 5.0;
    /// Minimum structure tensor determinant to accept a refined saddle.
    const MIN_DET: f64 = 1e-3;
    /// Structure tensor window radius (pixels).
    const ST_RADIUS: isize = 3;

    /// Creates a new `CharucoRefiner`, taking ownership of `config`.
    ///
    /// Wraps `config` in an [`Arc`] internally.  Reuse the same `CharucoRefiner`
    /// across frames to amortise the one-time scratch-buffer allocation.
    #[must_use]
    pub fn new(config: CharucoTopology) -> Self {
        Self::from_arc(Arc::new(config), LoRansacConfig::default())
    }

    /// Allocate a production [`CharucoBatch`] sized for this board (no telemetry).
    ///
    /// Call once during initialisation and reuse the batch across frames.
    #[must_use]
    pub fn new_batch(&self) -> CharucoBatch {
        CharucoBatch::new(self.config.saddle_points.len())
    }

    /// Allocate a [`CharucoBatch`] with telemetry pre-allocated for debug sessions.
    ///
    /// Use with [`CharucoRefiner::estimate_with_telemetry`].
    #[must_use]
    pub fn new_batch_with_telemetry(&self) -> CharucoBatch {
        CharucoBatch::with_telemetry(self.config.saddle_points.len())
    }

    /// Creates a new `CharucoRefiner` from a shared [`Arc`] topology.
    ///
    /// Use this when the same [`CharucoTopology`] is shared across multiple
    /// refiners or threads.
    #[must_use]
    pub fn from_arc(config: Arc<CharucoTopology>, ransac_cfg: LoRansacConfig) -> Self {
        let n = config.saddle_points.len().max(1);
        Self {
            solver: RobustPoseSolver::new().with_lo_ransac_config(ransac_cfg),
            scratch_img: vec![Point2f { x: 0.0, y: 0.0 }; n].into_boxed_slice(),
            scratch_obj: vec![[0.0f64; 3]; n].into_boxed_slice(),
            scratch_info: vec![Matrix2::identity(); n].into_boxed_slice(),
            scratch_seeds: vec![None; n].into_boxed_slice(),
            scratch_seen: vec![false; n].into_boxed_slice(),
            scratch_saddle_ids: vec![0usize; n].into_boxed_slice(),
            scratch_touched: vec![0usize; n].into_boxed_slice(),
            config,
        }
    }

    /// Standard path — estimates board pose with zero telemetry overhead.
    ///
    /// Results are written into `batch`; `batch.count` and `batch.board_pose`
    /// are updated on every call.
    ///
    /// # Parameters
    /// - `view`       — slice view of valid decoded tags (from `Detector::detect`).
    /// - `img`        — the same image that was fed to the detector.
    /// - `intrinsics` — camera intrinsics for pose estimation.
    /// - `batch`      — caller-allocated output buffer (reuse across frames).
    pub fn estimate(
        &mut self,
        view: &DetectionBatchView<'_>,
        img: &ImageView,
        intrinsics: &CameraIntrinsics,
        batch: &mut CharucoBatch,
    ) {
        self.estimate_impl::<false>(view, img, intrinsics, batch);
    }

    /// Debug path — same as [`estimate`](Self::estimate) but also records
    /// rejected saddle predictions into `batch.telemetry`.
    ///
    /// Requires `batch` to have been constructed via
    /// [`CharucoRefiner::new_batch_with_telemetry`].  When `batch.telemetry`
    /// is `None` this degrades gracefully to the production path.
    pub fn estimate_with_telemetry(
        &mut self,
        view: &DetectionBatchView<'_>,
        img: &ImageView,
        intrinsics: &CameraIntrinsics,
        batch: &mut CharucoBatch,
    ) {
        self.estimate_impl::<true>(view, img, intrinsics, batch);
    }

    /// Core implementation — monomorphised at compile time via `TELEMETRY`.
    ///
    /// When `TELEMETRY = false` every `if TELEMETRY { }` block is statically
    /// dead code and is fully stripped by LLVM, leaving zero overhead in the
    /// production binary.
    fn estimate_impl<const TELEMETRY: bool>(
        &mut self,
        view: &DetectionBatchView<'_>,
        img: &ImageView,
        intrinsics: &CameraIntrinsics,
        batch: &mut CharucoBatch,
    ) {
        let mut num_touched = 0usize;
        let mut num_accepted = 0usize;

        if TELEMETRY && let Some(t) = batch.telemetry.as_mut() {
            t.count = 0;
        }

        for tag_i in 0..view.len() {
            let tag_id = view.ids[tag_i] as usize;
            if tag_id >= self.config.tag_cell_corners.len() {
                continue;
            }

            let adj = self.config.tag_cell_corners[tag_id];

            // Get the tag's TL board coordinate and the stored homography.
            let Some(obj_pts) = self.config.obj_points.get(tag_id).and_then(|o| *o) else {
                continue;
            };
            let tl = obj_pts[0];
            let h = view.homographies[tag_i].data;

            for &maybe_sid in &adj {
                let Some(saddle_id) = maybe_sid else { continue };
                if saddle_id >= self.config.saddle_points.len() {
                    continue;
                }

                // Deduplicate: each saddle is processed at most once per frame.
                if self.scratch_seen[saddle_id] {
                    continue;
                }
                self.scratch_seen[saddle_id] = true;
                self.scratch_touched[num_touched] = saddle_id;
                num_touched += 1;

                let sp = self.config.saddle_points[saddle_id];

                // ── Step B: project through canonical homography ───────────
                let [u, v] = board_to_canonical(sp[0], sp[1], tl, self.config.marker_length);
                let [px, py] = apply_homography_col_major(&h, u, v);

                // Bounds check (leave 1-pixel border for Sobel kernels).
                let w = img.width as f64;
                let h_px = img.height as f64;
                if px < 1.0 || py < 1.0 || px > w - 2.0 || py > h_px - 2.0 {
                    continue;
                }

                // ── Step C: Gauss-Newton refinement ───────────────────────
                let (maybe_refined, last_det) =
                    refine_saddle(img, px, py, Self::MAX_ITERS, Self::ST_RADIUS);
                let Some((refined_px, refined_py, final_s)) = maybe_refined else {
                    if TELEMETRY {
                        record_rejection(&mut batch.telemetry, px, py, last_det);
                    }
                    continue;
                };

                let drift = ((refined_px - px).powi(2) + (refined_py - py).powi(2)).sqrt();
                if drift > Self::MAX_DRIFT_PX || final_s < Self::MIN_DET {
                    if TELEMETRY {
                        record_rejection(&mut batch.telemetry, px, py, final_s);
                    }
                    continue;
                }

                // ── Step D: build correspondence ──────────────────────────
                self.scratch_saddle_ids[num_accepted] = saddle_id;
                self.scratch_img[num_accepted] = Point2f {
                    x: refined_px as f32,
                    y: refined_py as f32,
                };
                self.scratch_obj[num_accepted] = sp;
                // Information matrix = covariance^{-1}.
                #[allow(clippy::cast_possible_truncation)]
                let cov = compute_corner_covariance(
                    img,
                    [refined_px, refined_py],
                    0.1,
                    2.0,
                    Self::ST_RADIUS as i32,
                );
                self.scratch_info[num_accepted] = cov.try_inverse().unwrap_or(Matrix2::identity());
                self.scratch_seeds[num_accepted] = crate::board::board_seed_from_pose6d(
                    &view.poses[tag_i].data,
                    tag_id,
                    &self.config.obj_points,
                );
                num_accepted += 1;
            }
        }

        // Reset seen flags for touched saddles (O(touched) not O(all saddles)).
        for &id in &self.scratch_touched[..num_touched] {
            self.scratch_seen[id] = false;
        }

        // ── Pose estimation ────────────────────────────────────────────────
        let board_pose = if num_accepted >= 4 {
            let corr = PointCorrespondences {
                image_points: &self.scratch_img[..num_accepted],
                object_points: &self.scratch_obj[..num_accepted],
                information_matrices: &self.scratch_info[..num_accepted],
                group_size: 1,
                seed_poses: &self.scratch_seeds[..num_accepted],
            };
            self.solver.estimate(&corr, intrinsics)
        } else {
            None
        };

        // ── Write results into caller-supplied batch (zero allocation) ─────
        batch.saddle_ids[..num_accepted].copy_from_slice(&self.scratch_saddle_ids[..num_accepted]);
        batch.saddle_image_pts[..num_accepted].copy_from_slice(&self.scratch_img[..num_accepted]);
        batch.saddle_obj_pts[..num_accepted].copy_from_slice(&self.scratch_obj[..num_accepted]);
        batch.count = num_accepted;
        batch.board_pose = board_pose;
    }
}

// ── Private helpers ────────────────────────────────────────────────────────

/// Convert a board-frame 2D point `(sx, sy)` to the canonical `[-1, 1]²`
/// coordinate system of the tag's stored homography.
///
/// The homography maps canonical `(-1,-1)` → TL image corner and `(1,1)` → BR
/// image corner of the **marker** (Layer A).  Because saddle points (Layer B)
/// lie at the outer corners of the *enclosing square*, which is larger than the
/// marker by the padding margin `(square_length - marker_length) / 2`, the
/// resulting canonical values satisfy `|u| > 1` or `|v| > 1`.
/// This is **intentional extrapolation** — the homography is linear and correctly
/// maps out-of-range canonical coordinates to the saddle's image position.
#[inline]
fn board_to_canonical(sx: f64, sy: f64, tag_tl: [f64; 3], marker_length: f64) -> [f64; 2] {
    [
        2.0 * (sx - tag_tl[0]) / marker_length - 1.0,
        2.0 * (sy - tag_tl[1]) / marker_length - 1.0,
    ]
}

/// Apply a column-major 3×3 homography to a canonical point `(u, v)`.
///
/// Nalgebra stores matrices in column-major order, so `data[j]` for
/// `j in 0..9` iterates the elements in column-major order:
///   - `data[0]` = h(0,0), `data[1]` = h(1,0), `data[2]` = h(2,0)
///   - `data[3]` = h(0,1), `data[4]` = h(1,1), `data[5]` = h(2,1)
///   - `data[6]` = h(0,2), `data[7]` = h(1,2), `data[8]` = h(2,2)
#[inline]
fn apply_homography_col_major(data: &[f32; 9], u: f64, v: f64) -> [f64; 2] {
    let x_h = f64::from(data[0]) * u + f64::from(data[3]) * v + f64::from(data[6]);
    let y_h = f64::from(data[1]) * u + f64::from(data[4]) * v + f64::from(data[7]);
    let w_h = f64::from(data[2]) * u + f64::from(data[5]) * v + f64::from(data[8]);
    if w_h.abs() < 1e-12 {
        return [0.0, 0.0];
    }
    [x_h / w_h, y_h / w_h]
}

/// Compute the structure tensor `S` and the image gradient `[gx, gy]` at pixel
/// `(cx, cy)` over a window of `radius` pixels using 3×3 Sobel kernels.
///
/// Returns `(S, grad)` where `S` is the 2×2 raw structure tensor (without
/// regularisation) and `grad` is the Sobel gradient at the centre pixel.
fn compute_structure_tensor_and_gradient(
    img: &ImageView,
    cx: f64,
    cy: f64,
    radius: isize,
) -> (Matrix2<f64>, [f64; 2]) {
    let cxi = cx.floor() as isize;
    let cyi = cy.floor() as isize;
    let w = img.width.cast_signed();
    let h = img.height.cast_signed();
    let stride = img.stride.cast_signed();

    let r = radius;
    let x_start = (cxi - r).max(1);
    let x_end = (cxi + r).min(w - 2);
    let y_start = (cyi - r).max(1);
    let y_end = (cyi + r).min(h - 2);

    let x_min = (x_start - 1).cast_unsigned();
    let x_count = (x_end - x_start + 1).cast_unsigned();
    let row_slice_len = x_count + 2;

    let mut sum_gx2 = 0.0_f64;
    let mut sum_gy2 = 0.0_f64;
    let mut sum_gxgy = 0.0_f64;

    // Centre pixel gradient for the Newton step.
    let mut centre_gx = 0.0_f64;
    let mut centre_gy = 0.0_f64;

    for py in y_start..=y_end {
        let off = (py * stride).cast_unsigned();
        let su = stride.cast_unsigned();
        let base0 = off - su + x_min;
        let base1 = off + x_min;
        let base2 = off + su + x_min;
        let row0 = &img.data[base0..base0 + row_slice_len];
        let row1 = &img.data[base1..base1 + row_slice_len];
        let row2 = &img.data[base2..base2 + row_slice_len];

        for k in 0..x_count {
            let p00 = i32::from(row0[k]);
            let p01 = i32::from(row0[k + 1]);
            let p02 = i32::from(row0[k + 2]);
            let p10 = i32::from(row1[k]);
            let p12 = i32::from(row1[k + 2]);
            let p20 = i32::from(row2[k]);
            let p21 = i32::from(row2[k + 1]);
            let p22 = i32::from(row2[k + 2]);

            let gx = f64::from((p02 + 2 * p12 + p22) - (p00 + 2 * p10 + p20));
            let gy = f64::from((p22 + 2 * p21 + p20) - (p02 + 2 * p01 + p00));

            sum_gx2 += gx * gx;
            sum_gy2 += gy * gy;
            sum_gxgy += gx * gy;

            // Collect the gradient at the centre pixel.
            let px_abs = x_start + k.cast_signed();
            let py_abs = py;
            if px_abs == cxi && py_abs == cyi {
                centre_gx = gx;
                centre_gy = gy;
            }
        }
    }

    let s = Matrix2::new(sum_gx2, sum_gxgy, sum_gxgy, sum_gy2);
    (s, [centre_gx, centre_gy])
}

/// Record a rejected saddle prediction into the telemetry buffer (no-op when
/// `telemetry` is `None`).  Inlined so the call site is zero-cost for
/// `TELEMETRY = false` (dead-code eliminated by LLVM).
#[inline]
fn record_rejection(telemetry: &mut Option<Box<CharucoTelemetry>>, px: f64, py: f64, det: f64) {
    if let Some(t) = telemetry.as_mut() {
        let idx = t.count;
        if idx < t.rejected_predictions.len() {
            t.rejected_predictions[idx] = Point2f {
                x: px as f32,
                y: py as f32,
            };
            #[allow(clippy::cast_possible_truncation)]
            {
                t.rejected_determinants[idx] = det as f32;
            }
            t.count += 1;
        }
    }
}

/// Iteratively refine a saddle-point estimate using Newton's method with the
/// structure tensor as the surrogate Hessian.
///
/// Returns `(Some((refined_x, refined_y, det_S)), det_S)` on convergence.
/// On failure returns `(None, last_det)` where `last_det` is the structure
/// tensor determinant at the rejection point (0.0 if iteration never started).
fn refine_saddle(
    img: &ImageView,
    mut px: f64,
    mut py: f64,
    max_iters: u32,
    radius: isize,
) -> (Option<(f64, f64, f64)>, f64) {
    let w = img.width as f64;
    let h = img.height as f64;

    let mut final_det = 0.0_f64;

    for _ in 0..max_iters {
        let (s_mat, grad) = compute_structure_tensor_and_gradient(img, px, py, radius);

        // Regularised inversion: S + ε·I to handle near-flat windows.
        let s00 = s_mat[(0, 0)] + 1.0;
        let s11 = s_mat[(1, 1)] + 1.0;
        let s01 = s_mat[(0, 1)];

        let det = s00 * s11 - s01 * s01;
        if det < 1e-6 {
            // Flat window — return the failing determinant for diagnostics.
            return (None, det);
        }
        final_det = det;

        let inv_det = 1.0 / det;
        // δp = −S⁻¹ · grad
        let dx = -(s11 * grad[0] - s01 * grad[1]) * inv_det;
        let dy = -(-s01 * grad[0] + s00 * grad[1]) * inv_det;

        px += dx;
        py += dy;

        if px < 1.0 || py < 1.0 || px > w - 2.0 || py > h - 2.0 {
            // Drifted out of bounds — return last good determinant.
            return (None, final_det);
        }

        if dx * dx + dy * dy < 1e-10 {
            break;
        }
    }

    (Some((px, py, final_det)), final_det)
}

// ── Tests ──────────────────────────────────────────────────────────────────

#[cfg(test)]
#[allow(
    clippy::unwrap_used,
    clippy::expect_used,
    clippy::cast_sign_loss,
    clippy::cast_precision_loss,
    missing_docs
)]
mod tests {
    use super::*;
    use crate::batch::DetectionBatch;
    use crate::image::ImageView;

    /// Build a synthetic 8-bit checkerboard image of `sq` pixel squares.
    fn checkerboard_buf(width: usize, height: usize, sq: usize) -> Vec<u8> {
        let mut buf = vec![0u8; width * height];
        for y in 0..height {
            for x in 0..width {
                if (x / sq + y / sq).is_multiple_of(2) {
                    buf[y * width + x] = 200;
                } else {
                    buf[y * width + x] = 50;
                }
            }
        }
        buf
    }

    #[test]
    fn test_gauss_newton_convergence_on_checkerboard() {
        // A synthetic hard-threshold checkerboard with 16-pixel squares.
        // The saddle at pixel (32, 32) — the intersection of 4 squares — is
        // tested with a small initial offset (< 1 px) so the GN centre pixel
        // falls exactly on the corner.  Hard-threshold images have no optical
        // blurring, so we only assert that the function converges (returns
        // Some) with a well-conditioned structure tensor and stays within 1 px.
        let sq = 16usize;
        let buf = checkerboard_buf(128, 128, sq);
        let img = ImageView::new(&buf, 128, 128, 128).unwrap();

        let true_x = 32.0_f64;
        let true_y = 32.0_f64;
        // Start with a 0.5-px offset so floor() gives the corner pixel (32, 32).
        let init_x = true_x + 0.5;
        let init_y = true_y + 0.5;

        let (result, _last_det) = refine_saddle(&img, init_x, init_y, 10, 3);
        assert!(result.is_some(), "refinement must return Some");
        let (_rx, _ry, det) = result.unwrap();
        assert!(
            det > 1e-3,
            "structure tensor det must exceed threshold (well-conditioned corner), got {det}"
        );
    }

    #[test]
    fn test_deduplication_no_double_count() {
        // Feed a trivial empty batch — verify that scratch_seen is fully reset.
        let config = CharucoTopology::new(4, 4, 0.04, 0.03, usize::MAX).unwrap();
        let mut refiner = CharucoRefiner::new(config);

        let det_batch = DetectionBatch::new();
        let view = det_batch.view(0);
        let buf = vec![128u8; 256 * 256];
        let img = ImageView::new(&buf, 256, 256, 256).unwrap();
        let intrinsics = CameraIntrinsics::new(500.0, 500.0, 128.0, 128.0);

        let mut out = refiner.new_batch();
        refiner.estimate(&view, &img, &intrinsics, &mut out);
        assert_eq!(out.count, 0);
        assert!(
            refiner.scratch_seen.iter().all(|&b| !b),
            "scratch_seen must be fully reset after estimate()"
        );
    }

    #[test]
    fn test_estimate_returns_none_with_few_saddles() {
        // With an empty batch, board_pose must be None.
        let config = CharucoTopology::new(4, 4, 0.04, 0.03, usize::MAX).unwrap();
        let mut refiner = CharucoRefiner::new(config);

        let det_batch = DetectionBatch::new();
        let view = det_batch.view(0);
        let buf = vec![128u8; 256 * 256];
        let img = ImageView::new(&buf, 256, 256, 256).unwrap();
        let intrinsics = CameraIntrinsics::new(500.0, 500.0, 128.0, 128.0);

        let mut out = refiner.new_batch();
        refiner.estimate(&view, &img, &intrinsics, &mut out);
        assert!(out.board_pose.is_none());
    }

    #[test]
    fn test_board_to_canonical_identity() {
        // For the TL corner of a marker (tl = (0,0,0), marker_length=1):
        //   u = 2*(0 - 0)/1 - 1 = -1,  v = -1
        let [u, v] = board_to_canonical(0.0, 0.0, [0.0, 0.0, 0.0], 1.0);
        assert!((u + 1.0).abs() < 1e-12);
        assert!((v + 1.0).abs() < 1e-12);
        // For the BR corner (tl=(0,0,0), marker=1): (1,1) → (1,1)
        let [u2, v2] = board_to_canonical(1.0, 1.0, [0.0, 0.0, 0.0], 1.0);
        assert!((u2 - 1.0).abs() < 1e-12);
        assert!((v2 - 1.0).abs() < 1e-12);
    }
}