vernier_core/
evaluate.rs

1//! Per-image evaluation orchestrator.
2//!
3//! The bridge between the dataset layer ([`crate::CocoDataset`] /
4//! [`crate::CocoDetections`]) and the IoU-type-agnostic spine
5//! ([`crate::matching`] → [`crate::accumulate`]). Pycocotools fuses
6//! these in `evaluate()` (cocoeval.py 174-216); we keep the layers
7//! separate so the spine stays untouchable per ADR-0005.
8//!
9//! The pass is generic over [`EvalKernel`] — a `Similarity` supertrait
10//! that adds the dataset-bridging methods that turn a `(image, category)`
11//! cell into kernel-typed annotations. Bbox and segm reuse the same
12//! orchestrator with [`BboxIou`] and [`SegmIou`] respectively; future
13//! kernels (OKS, Boundary IoU) plug in by adding one
14//! `impl EvalKernel for FooIou` block — `match_image`, `accumulate`,
15//! and `summarize_*` stay untouched.
16//!
17//! ## What this layer does
18//!
19//! For each `(image, category)` cell:
20//!
21//! 1. Gather GTs and DTs from the dataset indices.
22//! 2. Pre-filter DTs to the top `max_dets_per_image` by score (the
23//!    matching engine and accumulator both rely on this cap; smaller
24//!    `max_dets` values are sliced downstream by `accumulate`).
25//! 3. Build the kernel's annotation slices via
26//!    [`EvalKernel::build_gt_anns`] / [`EvalKernel::build_dt_anns`] and
27//!    compute the GT × DT IoU matrix once via [`Similarity::compute`].
28//! 4. For each area range, build the per-call `_ignore` vector
29//!    (quirk **D3**) from the dataset's base ignore (D1) plus the area
30//!    filter (D6/D7), run the [`crate::matching`] engine, apply quirk **B7** by
31//!    flipping `dt_ignore` for unmatched DTs whose area is outside the
32//!    active range, and pack the result as a [`crate::accumulate::PerImageEval`] at
33//!    `[k][a][i]`.
34//!
35//! ## Quirk dispositions handled here
36//!
37//! - **D3** (`aligned`): per-call `_ignore` computed without mutating
38//!   the dataset.
39//! - **D6/D7** (`strict`): area filter uses non-strict `<=` / `>=` on
40//!   both bounds (mirrors `cocoeval.py:251`'s
41//!   `g['area'] < aRng[0] or g['area'] > aRng[1]` exclusion). An
42//!   annotation whose area equals a bucket boundary lands in *both*
43//!   adjacent buckets. Inequality direction matches the eval-time filter
44//!   in pycocotools, *not* `getAnnIds(areaRng=...)`.
45//! - **B7** (`strict`): unmatched DTs whose area is out of range get
46//!   `dt_ignore=true` so they do not contribute to the precision/recall
47//!   curve in this area cell.
48//! - **AA3** (`strict`, ADR-0026): when the dataset carries LVIS
49//!   federated metadata and the current `(image, category)` cell is in
50//!   `not_exhaustive_category_ids[image]`, every unmatched DT in the
51//!   cell has its `dt_ignore` set to `true`. Mirrors lvis-api
52//!   `eval.py:269-278`'s OR into the area-bucket `dt_ig_mask`. The
53//!   matching engine is unchanged: the flag piggybacks on the same
54//!   `dt_ignore` field B7 already drives.
55//! - **AA4** (`strict`, ADR-0026): on a federated dataset and with
56//!   `use_cats=true`, a cell `(image I, category C)` is evaluated only
57//!   when `C ∈ pos[I] ∪ neg[I]`. Cells with no GT (so `C ∉ pos[I]`)
58//!   and no `neg` listing produce no `eval_imgs` entry — the existing
59//!   `Option<PerImageEval>` distinction (`None` vs an empty cell) is
60//!   the same one lvis-api's `eval.py:336` filter relies on.
61//! - **L4** (`aligned`): `use_cats=false` collapses every category onto
62//!   a single virtual `k=0` bucket, with `category_id` carried through
63//!   matching as a no-op.
64//! - **E2 / J4** (`strict`): DTs never carry an `is_crowd` flag — the
65//!   [`crate::dataset::CocoDetection`] type lacks the field. Only GT crowdness
66//!   drives the E1 asymmetry inside the kernel.
67//! - **J3** (`strict`): DT areas are read from
68//!   [`crate::dataset::CocoDetection::area`], which the dataset layer derives
69//!   from the bbox at construction.
70//! - **J2** (`strict`): under [`ParityMode::Strict`], a DT lacking a
71//!   `segmentation` field under `iouType="segm"` has its bbox
72//!   synthesized into a 4-point rectangle polygon
73//!   `[[x1,y1, x1,y2, x2,y2, x2,y1]]` and rasterized — bit-for-bit the
74//!   path `pycocotools/coco.py:341` follows. Under
75//!   [`ParityMode::Corrected`] (the default for net-new users) the
76//!   synthesis is refused with [`EvalError::InvalidAnnotation`]: silent
77//!   coercion of bbox results to rectangle masks is a footgun, and
78//!   users who want strict parity opt in.
79//! - **J6** (`corrected`): per-entry dispatch — every detection is
80//!   inspected independently for the segm/bbox kind. Under
81//!   [`ParityMode::Corrected`] heterogeneous DT lists (some entries
82//!   with `segmentation`, some without) are rejected up-front rather
83//!   than silently routed through the first-entry-decides dispatch
84//!   pycocotools follows at `coco.py:330-363`.
85
86use ndarray::{Array2, ArrayView2, ArrayViewMut2};
87
88use crate::accumulate::PerImageEval;
89use crate::dataset::{
90    Bbox, CategoryId, CocoAnnotation, CocoDataset, CocoDetection, CocoDetections, EvalDataset,
91    ImageId, ImageMeta,
92};
93use crate::error::EvalError;
94use crate::matching::{match_image, MatchResult};
95use crate::parity::ParityMode;
96use crate::segmentation::Segmentation;
97use crate::similarity::{
98    boundary_iou_compute, segm_iou_compute, BboxAnn, BboxIou, BoundaryComputeScratch,
99    BoundaryGtCache, BoundaryIou, OksAnn, OksSimilarity, SegmAnn, SegmComputeScratch, SegmGtCache,
100    SegmIou, Similarity,
101};
102use std::collections::{HashMap, HashSet};
103use std::sync::{Arc, Mutex};
104use vernier_mask::Rle;
105
106/// Either a borrowed or `Arc`-owned reference to a per-kernel GT cache.
107///
108/// The borrowed variant feeds the one-shot batch entry points
109/// ([`evaluate_boundary_cached`], [`evaluate_segm_cached`]) where the
110/// cache's lifetime trivially exceeds the call. The `Arc` variant feeds
111/// the streaming substrate ([`crate::stream::StreamingEvaluator`]),
112/// where the kernel lives on a worker thread that needs `'static` and
113/// the cache is the same `Arc` the FFI [`crate::CocoDataset`] handle
114/// holds (ADR-0020).
115#[derive(Clone)]
116pub enum GtCacheRef<'a, T: ?Sized> {
117    /// Caller-owned cache passed by reference; lifetime tied to the
118    /// borrow. Used by the batch entry points.
119    Borrowed(&'a T),
120    /// Atomically refcounted cache, shared with the FFI `CocoDataset`
121    /// handle (ADR-0020). Used by streaming so the kernel can be
122    /// `'static`.
123    Owned(Arc<T>),
124}
125
126impl<T: ?Sized> GtCacheRef<'_, T> {
127    /// Borrow the underlying cache irrespective of variant.
128    pub fn get(&self) -> &T {
129        match self {
130            GtCacheRef::Borrowed(r) => r,
131            GtCacheRef::Owned(a) => a.as_ref(),
132        }
133    }
134}
135
136/// Sentinel `category_id` emitted on every cell when `use_cats=false`.
137/// Mirrors pycocotools' `p.catIds = [-1]` collapse (quirk **L4**).
138pub const COLLAPSED_CATEGORY_SENTINEL: i64 = -1;
139
140/// Sentinel upper bound for "unbounded" area buckets, mirroring the
141/// `1e10` pycocotools uses for `all` / `large`.
142pub const AREA_UNBOUNDED: f64 = 1e10;
143
144/// Closed `[lo, hi]` area bucket — both bounds are inclusive per quirks
145/// **D6/D7**, so an annotation with area exactly equal to a bound lands
146/// in this bucket (and in the adjacent one when the boundary is shared).
147///
148/// `index` is the position on the `Accumulated` A-axis the resulting
149/// [`PerImageEval`] feeds into; matched at summarize time against
150/// [`crate::summarize::AreaRng::index`].
151#[derive(Debug, Clone, Copy, PartialEq, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
152#[rkyv(derive(Debug))]
153pub struct AreaRange {
154    /// A-axis position. `0` is conventionally the `all` bucket, matching
155    /// [`crate::summarize::AreaRng::ALL`].
156    pub index: usize,
157    /// Lower bound (inclusive — quirks D6/D7).
158    pub lo: f64,
159    /// Upper bound (inclusive — quirks D6/D7). Use [`AREA_UNBOUNDED`]
160    /// for "no upper bound".
161    pub hi: f64,
162}
163
164impl AreaRange {
165    /// Pycocotools' default detection grid: `all`, `small`, `medium`,
166    /// `large`. Indices line up with [`crate::summarize::AreaRng`]'s `ALL` /
167    /// `SMALL` / `MEDIUM` / `LARGE` constants.
168    pub fn coco_default() -> [Self; 4] {
169        [
170            Self {
171                index: 0,
172                lo: 0.0,
173                hi: AREA_UNBOUNDED,
174            },
175            Self {
176                index: 1,
177                lo: 0.0,
178                hi: 32.0 * 32.0,
179            },
180            Self {
181                index: 2,
182                lo: 32.0 * 32.0,
183                hi: 96.0 * 96.0,
184            },
185            Self {
186                index: 3,
187                lo: 96.0 * 96.0,
188                hi: AREA_UNBOUNDED,
189            },
190        ]
191    }
192
193    /// Keypoints area grid (per ADR-0012, quirk **D5**): `all`, `medium`,
194    /// `large` — pycocotools drops the `small` bucket for kp eval. The
195    /// A-axis is compressed to 3 entries with indices `0 = all`,
196    /// `1 = medium`, `2 = large`. Pair with
197    /// [`crate::summarize::StatRequest::coco_keypoints_default`] so the
198    /// summarizer's `req.area.index` lookups land on the right slice.
199    pub fn keypoints_default() -> [Self; 3] {
200        [
201            Self {
202                index: 0,
203                lo: 0.0,
204                hi: AREA_UNBOUNDED,
205            },
206            Self {
207                index: 1,
208                lo: 32.0 * 32.0,
209                hi: 96.0 * 96.0,
210            },
211            Self {
212                index: 2,
213                lo: 96.0 * 96.0,
214                hi: AREA_UNBOUNDED,
215            },
216        ]
217    }
218
219    fn contains(&self, area: f64) -> bool {
220        // D6 (strict): pycocotools (cocoeval.py:251) keeps a GT/DT in a
221        // bucket when `not (area < lo or area > hi)`, i.e. non-strict
222        // inclusion on both ends. An area equal to a bucket boundary
223        // (e.g. 32² = 1024) therefore lands in *both* adjacent buckets.
224        area >= self.lo && area <= self.hi
225    }
226}
227
228/// Inputs to [`evaluate_bbox`] / [`evaluate_segm`] / [`evaluate_boundary`] / [`evaluate_with`].
229/// IoU-agnostic — kernel-specific configuration (sigmas, prefilter
230/// thresholds, …) lives on the [`EvalKernel`] passed alongside.
231#[derive(Debug, Clone, Copy)]
232pub struct EvaluateParams<'p> {
233    /// IoU thresholds, length `T`. Use [`crate::parity::iou_thresholds`] for the
234    /// canonical 10-point COCO ladder.
235    pub iou_thresholds: &'p [f64],
236    /// Area ranges. The `index` field of each entry is the A-axis
237    /// position the resulting [`PerImageEval`] is filed under; the
238    /// orchestrator emits exactly `area_ranges.len()` cells per
239    /// `(image, category)`.
240    pub area_ranges: &'p [AreaRange],
241    /// Top-N filter applied to DTs per `(image, category)` cell before
242    /// matching. Should be the largest entry of the eventual
243    /// [`crate::accumulate::AccumulateParams::max_dets`] ladder; smaller caps are
244    /// sliced downstream.
245    pub max_dets_per_image: usize,
246    /// Quirk **L4** (`aligned`): when `false`, every category is
247    /// collapsed onto a single bucket `k=0` and `category_id` is ignored
248    /// for gather purposes.
249    pub use_cats: bool,
250    /// When `true`, [`evaluate_with`] retains the per-`(category,
251    /// image)` IoU matrix on [`EvalGrid::retained_ious`] so the
252    /// `per_pair` / `per_detection` result tables can read it. Default
253    /// `false`; the no-retention path allocates nothing extra and is
254    /// bit-identical to the 0.0.1 release.
255    pub retain_iou: bool,
256}
257
258/// Owned counterpart to [`EvaluateParams`].
259///
260/// The streaming evaluator holds its config across many `update()`
261/// calls and cannot borrow per-call slices the way the batch entry
262/// points do. [`Self::borrow`] reconstructs an [`EvaluateParams`] view
263/// that reuses this struct's storage, so handing the owned form to the
264/// unchanged `evaluate_with` path is zero-cost.
265#[derive(Debug, Clone, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize)]
266#[rkyv(derive(Debug))]
267pub struct OwnedEvaluateParams {
268    /// IoU thresholds, length `T`.
269    pub iou_thresholds: Vec<f64>,
270    /// Area ranges (owned).
271    pub area_ranges: Vec<AreaRange>,
272    /// Top-N filter applied to DTs per `(image, category)` cell before matching.
273    pub max_dets_per_image: usize,
274    /// Quirk **L4** collapse flag.
275    pub use_cats: bool,
276    /// IoU-matrix retention flag — see [`EvaluateParams::retain_iou`].
277    pub retain_iou: bool,
278}
279
280impl OwnedEvaluateParams {
281    /// Borrowed view. Reuses `self`'s storage; no allocation.
282    pub fn borrow(&self) -> EvaluateParams<'_> {
283        EvaluateParams {
284            iou_thresholds: &self.iou_thresholds,
285            area_ranges: &self.area_ranges,
286            max_dets_per_image: self.max_dets_per_image,
287            use_cats: self.use_cats,
288            retain_iou: self.retain_iou,
289        }
290    }
291
292    /// 32-byte BLAKE3 fingerprint of these params. Stable for equal
293    /// values; carried in distributed-eval partial headers (ADR-0031)
294    /// so heterogeneous-config partials are refused at merge time.
295    ///
296    /// The archived rkyv form is deterministic per the field order
297    /// declared on this struct, so the hash is stable as long as the
298    /// struct shape is. Adding fields invalidates the hash — that is
299    /// what bumps the partial format version.
300    ///
301    /// # Errors
302    ///
303    /// [`EvalError::InvalidConfig`] if rkyv refuses to serialize the
304    /// archived form. In practice this can only happen for the same
305    /// reasons `to_bytes` itself fails (allocator OOM); we map it to
306    /// the existing variant rather than introducing a new one.
307    pub fn params_hash(&self) -> Result<[u8; 32], EvalError> {
308        let bytes =
309            rkyv::to_bytes::<rkyv::rancor::Error>(self).map_err(|e| EvalError::InvalidConfig {
310                detail: format!("rkyv serialization of OwnedEvaluateParams failed: {e}"),
311            })?;
312        Ok(*blake3::hash(&bytes).as_bytes())
313    }
314}
315
316/// Discriminator for the four kernel families on the IoU axis (per
317/// ADR-0012's iou-type taxonomy). Carried in distributed-eval partials
318/// (ADR-0031) so a head-rank reconstruction refuses to merge bbox and
319/// segm partials silently.
320///
321/// Variant order is **wire-format load-bearing**: the rkyv archived
322/// discriminant is keyed off it. Adding new kernels appends; never
323/// reorder, never remove. Use a new ADR + format-version bump if the
324/// space ever needs to change.
325#[derive(
326    Debug, Clone, Copy, PartialEq, Eq, Hash, rkyv::Archive, rkyv::Serialize, rkyv::Deserialize,
327)]
328#[rkyv(derive(Debug, PartialEq, Eq))]
329pub enum KernelKind {
330    /// `BboxIou` — axis-aligned box IoU (the default).
331    Bbox,
332    /// `SegmIou` (and `SegmIouCached`) — RLE/polygon mask IoU.
333    Segm,
334    /// `BoundaryIou` (and `BoundaryIouCached`) — boundary-IoU per ADR-0017.
335    Boundary,
336    /// `OksSimilarity` — OKS-based keypoints similarity (ADR-0012).
337    Keypoints,
338}
339
340impl KernelKind {
341    /// `u32` discriminator carried in the wire envelope header
342    /// (ADR-0032). Stable values: `Bbox=0, Segm=1, Boundary=2,
343    /// Keypoints=3` — the same values ADR-0031 wrote as a `u8`.
344    /// Adding new kernels appends; never reorder.
345    pub const fn discriminator(self) -> u32 {
346        match self {
347            Self::Bbox => 0,
348            Self::Segm => 1,
349            Self::Boundary => 2,
350            Self::Keypoints => 3,
351        }
352    }
353}
354
355/// Bridges a [`CocoDataset`] / [`CocoDetections`] cell to a kernel's
356/// annotation type.
357///
358/// Per ADR-0005, the per-image pass is generic over this trait so a new
359/// IoU type plugs in via one `impl EvalKernel for FooIou` block — the
360/// matching engine, accumulator, and summarizer never see the new type.
361///
362/// Implementors do the per-cell rasterization / lookup that a [`Similarity`]
363/// kernel can't (because [`Similarity`] is dataset-agnostic by design).
364/// `image` carries the `(h, w)` segm impls need for [`crate::segmentation::Segmentation::to_rle`].
365pub trait EvalKernel: Similarity {
366    /// Discriminator carried in the distributed-eval wire format
367    /// (ADR-0031) so heterogeneous partials are refused at merge time.
368    /// Required (no default): every kernel must declare its kind.
369    fn kind(&self) -> KernelKind;
370
371    /// Build the kernel's GT annotation slice for one `(image, category)`
372    /// cell. `indices` selects from `gt_anns` in the order the cell
373    /// matcher will see.
374    fn build_gt_anns(
375        &self,
376        gt_anns: &[CocoAnnotation],
377        indices: &[usize],
378        image: &ImageMeta,
379    ) -> Result<Vec<Self::Annotation>, EvalError>;
380
381    /// Build the kernel's DT annotation slice for one `(image, category)`
382    /// cell, in score-descending sorted order matching `dt_indices`.
383    ///
384    /// `parity_mode` is threaded through so kernels with parity-aware
385    /// fallbacks (segm's J2 bbox→polygon synthesis under
386    /// [`ParityMode::Strict`]) can dispatch on it without reaching back
387    /// up the call stack.
388    fn build_dt_anns(
389        &self,
390        dt_anns: &[CocoDetection],
391        indices: &[usize],
392        image: &ImageMeta,
393        parity_mode: ParityMode,
394    ) -> Result<Vec<Self::Annotation>, EvalError>;
395
396    /// Optional kernel-specific GT ignore override. Default `false` (no
397    /// kernel reason to ignore).
398    ///
399    /// The orchestrator OR-s the result with the dataset-level
400    /// [`CocoAnnotation::effective_ignore`] (quirk **D1**) when building
401    /// `gt_base_ignore`. [`OksSimilarity`] overrides this to fold in
402    /// quirk **D2** (`strict`): GT with zero visible keypoints is
403    /// treated as an implicit ignore region, OR-ed with the existing
404    /// ignore. Bbox / segm / boundary kernels keep the default — D2 is
405    /// keypoints-specific and must not bleed across kernels.
406    fn extra_gt_ignore(&self, _ann: &CocoAnnotation) -> bool {
407        false
408    }
409
410    /// Marker: is this kernel the keypoints (OKS) kernel?
411    ///
412    /// The streaming evaluator dispatches its summarizer choice on this
413    /// flag: keypoints kernels resolve to the 10-stat
414    /// [`crate::summarize::StatRequest::coco_keypoints_default`] plan, every other
415    /// kernel resolves to the 12-stat detection plan. Default `false`;
416    /// [`OksSimilarity`] overrides to `true`. Additive trait method —
417    /// existing implementors keep the default.
418    fn is_keypoints(&self) -> bool {
419        false
420    }
421}
422
423impl EvalKernel for BboxIou {
424    fn kind(&self) -> KernelKind {
425        KernelKind::Bbox
426    }
427
428    fn build_gt_anns(
429        &self,
430        gt_anns: &[CocoAnnotation],
431        indices: &[usize],
432        _image: &ImageMeta,
433    ) -> Result<Vec<BboxAnn>, EvalError> {
434        Ok(indices
435            .iter()
436            .map(|&j| BboxAnn {
437                bbox: gt_anns[j].bbox,
438                is_crowd: gt_anns[j].is_crowd,
439            })
440            .collect())
441    }
442
443    fn build_dt_anns(
444        &self,
445        dt_anns: &[CocoDetection],
446        indices: &[usize],
447        _image: &ImageMeta,
448        _parity_mode: ParityMode,
449    ) -> Result<Vec<BboxAnn>, EvalError> {
450        // E2/J4: DT never carries crowd.
451        Ok(indices
452            .iter()
453            .map(|&j| BboxAnn {
454                bbox: dt_anns[j].bbox,
455                is_crowd: false,
456            })
457            .collect())
458    }
459}
460
461impl EvalKernel for SegmIou {
462    fn kind(&self) -> KernelKind {
463        KernelKind::Segm
464    }
465
466    fn build_gt_anns(
467        &self,
468        gt_anns: &[CocoAnnotation],
469        indices: &[usize],
470        image: &ImageMeta,
471    ) -> Result<Vec<SegmAnn>, EvalError> {
472        build_segm_gt_anns(gt_anns, indices, image)
473    }
474
475    fn build_dt_anns(
476        &self,
477        dt_anns: &[CocoDetection],
478        indices: &[usize],
479        image: &ImageMeta,
480        parity_mode: ParityMode,
481    ) -> Result<Vec<SegmAnn>, EvalError> {
482        build_segm_dt_anns(dt_anns, indices, image, parity_mode)
483    }
484}
485
486impl EvalKernel for BoundaryIou {
487    fn kind(&self) -> KernelKind {
488        KernelKind::Boundary
489    }
490
491    fn build_gt_anns(
492        &self,
493        gt_anns: &[CocoAnnotation],
494        indices: &[usize],
495        image: &ImageMeta,
496    ) -> Result<Vec<SegmAnn>, EvalError> {
497        build_segm_gt_anns(gt_anns, indices, image)
498    }
499
500    fn build_dt_anns(
501        &self,
502        dt_anns: &[CocoDetection],
503        indices: &[usize],
504        image: &ImageMeta,
505        parity_mode: ParityMode,
506    ) -> Result<Vec<SegmAnn>, EvalError> {
507        build_segm_dt_anns(dt_anns, indices, image, parity_mode)
508    }
509}
510
511impl EvalKernel for OksSimilarity {
512    fn kind(&self) -> KernelKind {
513        KernelKind::Keypoints
514    }
515
516    fn build_gt_anns(
517        &self,
518        gt_anns: &[CocoAnnotation],
519        indices: &[usize],
520        _image: &ImageMeta,
521    ) -> Result<Vec<OksAnn>, EvalError> {
522        indices
523            .iter()
524            .map(|&j| {
525                let ann = &gt_anns[j];
526                let kps = ann
527                    .keypoints
528                    .as_deref()
529                    .ok_or_else(|| missing_keypoints_err("GT", ann.id.0, ann.image_id.0))?;
530                let num_keypoints = ann
531                    .num_keypoints
532                    .unwrap_or_else(|| count_visible_keypoints(kps));
533                Ok(OksAnn {
534                    category_id: ann.category_id.0,
535                    keypoints: kps.to_vec(),
536                    num_keypoints,
537                    bbox: ann.bbox.into(),
538                    area: ann.area,
539                })
540            })
541            .collect()
542    }
543
544    fn build_dt_anns(
545        &self,
546        dt_anns: &[CocoDetection],
547        indices: &[usize],
548        _image: &ImageMeta,
549        _parity_mode: ParityMode,
550    ) -> Result<Vec<OksAnn>, EvalError> {
551        // E2/J4: DT never carries crowd. There is no parity-mode J2
552        // analog for keypoints — pycocotools has no bbox→keypoint
553        // synthesis path, so a missing `keypoints` field is always an
554        // [`EvalError::InvalidAnnotation`] regardless of mode.
555        indices
556            .iter()
557            .map(|&j| {
558                let dt = &dt_anns[j];
559                let kps = dt
560                    .keypoints
561                    .as_deref()
562                    .ok_or_else(|| missing_keypoints_err("DT", dt.id.0, dt.image_id.0))?;
563                let num_keypoints = dt
564                    .num_keypoints
565                    .unwrap_or_else(|| count_visible_keypoints(kps));
566                Ok(OksAnn {
567                    category_id: dt.category_id.0,
568                    keypoints: kps.to_vec(),
569                    num_keypoints,
570                    bbox: dt.bbox.into(),
571                    area: dt.area,
572                })
573            })
574            .collect()
575    }
576
577    fn extra_gt_ignore(&self, ann: &CocoAnnotation) -> bool {
578        // D2 (`strict`): GT with zero visible keypoints is an implicit
579        // ignore region. Annotations without a `keypoints` field at all
580        // are treated as zero-visible — `build_gt_anns` will reject
581        // them downstream, but this hook runs before that and must
582        // stay total.
583        let visible = ann
584            .num_keypoints
585            .or_else(|| ann.keypoints.as_deref().map(count_visible_keypoints))
586            .unwrap_or(0);
587        visible == 0
588    }
589
590    fn is_keypoints(&self) -> bool {
591        true
592    }
593}
594
595/// Count of *visible* keypoints (`v > 0`) in a flat
596/// `[x, y, v, ...]` triplet vector. Used as the fallback for
597/// pycocotools-precomputed `num_keypoints` on inputs that omit it.
598fn count_visible_keypoints(kps: &[f64]) -> u32 {
599    kps.chunks_exact(3).filter(|t| t[2] > 0.0).count() as u32
600}
601
602/// OKS path equivalent of [`missing_segmentation_err`] — names the
603/// offending kind/id/image when a `keypoints` field is required and
604/// absent. Unlike segm there is no parity-mode escape hatch.
605fn missing_keypoints_err(kind: &str, ann_id: i64, image_id: i64) -> EvalError {
606    EvalError::InvalidAnnotation {
607        detail: format!(
608            "{kind} id={ann_id} on image {image_id} has no `keypoints` field; \
609             OKS eval requires keypoints on every entry. There is no \
610             pycocotools-equivalent bbox-synthesis fallback for keypoints \
611             (unlike segm quirk J2)."
612        ),
613    }
614}
615
616fn build_segm_gt_anns(
617    gt_anns: &[CocoAnnotation],
618    indices: &[usize],
619    image: &ImageMeta,
620) -> Result<Vec<SegmAnn>, EvalError> {
621    indices
622        .iter()
623        .map(|&j| {
624            let ann = &gt_anns[j];
625            let seg = ann
626                .segmentation
627                .as_ref()
628                .ok_or_else(|| missing_segmentation_err("GT", ann.id.0, image.id.0))?;
629            Ok(SegmAnn {
630                rle: seg.to_rle(image.height, image.width)?,
631                is_crowd: ann.is_crowd,
632                ann_id: ann.id.0,
633            })
634        })
635        .collect()
636}
637
638fn build_segm_dt_anns(
639    dt_anns: &[CocoDetection],
640    indices: &[usize],
641    image: &ImageMeta,
642    parity_mode: ParityMode,
643) -> Result<Vec<SegmAnn>, EvalError> {
644    indices
645        .iter()
646        .map(|&j| {
647            let dt = &dt_anns[j];
648            let rle = match (&dt.segmentation, parity_mode) {
649                (Some(seg), _) => seg.to_rle(image.height, image.width)?,
650                // J2 (`strict`): pycocotools' coco.py:341 synthesizes
651                // a rectangular polygon `[[x1,y1, x1,y2, x2,y2, x2,y1]]`
652                // from the bbox when a DT under iouType="segm" lacks
653                // a `segmentation` field. We reproduce that path
654                // bit-for-bit so strict-mode parity covers bbox-only
655                // result files.
656                (None, ParityMode::Strict) => {
657                    synthesize_dt_segm_from_bbox(&dt.bbox, image.height, image.width)?
658                }
659                // J2 (`corrected`) + J6 (`corrected`): silent coercion
660                // of bbox results to rectangle masks is a footgun.
661                // Refusing here also turns a heterogeneous DT list
662                // (some entries with segm, some without) under
663                // iouType="segm" into a clean, per-entry-pinpointed
664                // error rather than the first-entry-decides dispatch
665                // pycocotools follows.
666                (None, ParityMode::Corrected) => {
667                    return Err(missing_segmentation_err("DT", dt.id.0, image.id.0));
668                }
669            };
670            Ok(SegmAnn {
671                rle,
672                is_crowd: false,
673                ann_id: dt.id.0,
674            })
675        })
676        .collect()
677}
678
679/// J2 (`strict`): synthesize a 4-point rectangle polygon from a DT bbox
680/// and rasterize it at the image's `(h, w)`. Mirrors
681/// `pycocotools/coco.py:341` exactly:
682/// `[[x1,y1, x1,y2, x2,y2, x2,y1]]` where `(x1, y1)` is the top-left and
683/// `(x2, y2) = (x1 + w, y1 + h)`.
684fn synthesize_dt_segm_from_bbox(bbox: &Bbox, h: u32, w: u32) -> Result<Rle, EvalError> {
685    let x1 = bbox.x;
686    let y1 = bbox.y;
687    let x2 = bbox.x + bbox.w;
688    let y2 = bbox.y + bbox.h;
689    let polygon = vec![x1, y1, x1, y2, x2, y2, x2, y1];
690    let segm = Segmentation::Polygons(vec![polygon]);
691    segm.to_rle(h, w)
692}
693
694/// J2 (`corrected`) / J6 (`corrected`) error path: a DT lacks the
695/// `segmentation` field under `iouType="segm"`. The detail names the
696/// offending kind (`GT` or `DT`), id, and image so a heterogeneous
697/// DT list pinpoints the first entry without segm rather than failing
698/// with a global "wrong shape" error.
699fn missing_segmentation_err(kind: &str, ann_id: i64, image_id: i64) -> EvalError {
700    EvalError::InvalidAnnotation {
701        detail: format!(
702            "{kind} id={ann_id} on image {image_id} has no `segmentation` field; \
703             segm eval in corrected mode requires one on every entry. \
704             pycocotools synthesizes a bbox-rectangle polygon here \
705             (quirks J2/J6); pass `ParityMode::Strict` to opt into that \
706             behavior."
707        ),
708    }
709}
710
711/// Pycocotools-shaped per-cell bookkeeping that the matching engine
712/// strips out when packing [`PerImageEval`]. Surfaced separately so the
713/// accumulator stays narrow per ADR-0005, and FFI / `COCOeval` drop-in
714/// consumers can reconstruct `evalImgs` dicts without re-running eval.
715///
716/// All `dt_*` axes are in score-descending sorted order (stable
717/// mergesort, quirk **A1**); all `gt_*` axes are in ignore-ascending
718/// sorted order (quirk **A4**). `dt_matches` and `gt_matches` carry
719/// pycocotools' value semantics: `i64` annotation ids on a hit, `0` on a
720/// miss (matching `dtm`/`gtm` initialization in `cocoeval.py`).
721#[derive(Debug, Clone)]
722pub struct EvalImageMeta {
723    /// COCO image id for this cell.
724    pub image_id: i64,
725    /// COCO category id, or [`COLLAPSED_CATEGORY_SENTINEL`] when
726    /// `use_cats=false`.
727    pub category_id: i64,
728    /// Active area range as `[lo, hi]`, mirroring pycocotools' `aRng`.
729    pub area_rng: [f64; 2],
730    /// `max_dets_per_image` cap that produced this cell's DT slice.
731    pub max_det: usize,
732    /// DT annotation ids in sorted-DT order, length `D`.
733    pub dt_ids: Vec<i64>,
734    /// GT annotation ids in sorted-GT order, length `G`.
735    pub gt_ids: Vec<i64>,
736    /// Shape `(T, D)`. GT id matched at `(threshold, sorted-DT k)`, or
737    /// `0` if unmatched (pycocotools sentinel; safe because COCO ids are
738    /// `>= 1` per spec, and vernier's auto-id assignment also starts at 1).
739    pub dt_matches: Array2<i64>,
740    /// Shape `(T, G)`. DT id matched at `(threshold, sorted-GT k)`, or
741    /// `0` if unmatched (same `>= 1` invariant as `dt_matches`).
742    pub gt_matches: Array2<i64>,
743}
744
745/// Output of [`evaluate_bbox`] / [`evaluate_segm`] / [`evaluate_boundary`]
746/// — the flat `(K, A, I)` grid of
747/// [`PerImageEval`] cells the accumulator consumes, plus the dimensions
748/// needed to construct [`crate::accumulate::AccumulateParams`].
749#[derive(Debug, Clone)]
750pub struct EvalGrid {
751    /// `Some(cell)` per `(k, a, i)` triple where the cell ran; `None`
752    /// where pycocotools would emit `None` (image absent from
753    /// detections, no GTs and no DTs in the cell). Layout is K-major,
754    /// then A, then I — `eval_imgs[k * A * I + a * I + i]`.
755    ///
756    /// Cells are heap-boxed: `Option<Box<PerImageEval>>` is 8 bytes
757    /// (Box's `NonNull` niche absorbs the discriminant), so the dense
758    /// `n_categories * n_area_ranges * n_images` grid only pays for a
759    /// pointer per slot at zero-init time. On val2017 (1.6M slots,
760    /// 14k populated) this drops the upfront alloc from 268 MB to
761    /// 12.8 MB and the zero-init from ~120 ms to ~5 ms — see
762    /// `docs/engineering/benchmarking/2026-05-bbox-cdf.md`.
763    pub eval_imgs: Vec<Option<Box<PerImageEval>>>,
764    /// Pycocotools-shaped bookkeeping for each populated cell (same
765    /// `[k][a][i]` layout as `eval_imgs`; `None` wherever `eval_imgs` is
766    /// `None`). Boxed for the same reason as `eval_imgs`.
767    pub eval_imgs_meta: Vec<Option<Box<EvalImageMeta>>>,
768    /// `K` axis size: the number of categories used for evaluation, or
769    /// `1` when `use_cats=false`.
770    pub n_categories: usize,
771    /// `A` axis size: equal to `params.area_ranges.len()`.
772    pub n_area_ranges: usize,
773    /// `I` axis size: number of images iterated over (every image in the
774    /// GT dataset, in deterministic id-ascending order).
775    pub n_images: usize,
776    /// Per-`(category, image)` IoU matrices retained when the caller
777    /// passed [`EvaluateParams::retain_iou`] = `true`. `None` on the
778    /// default no-retention path; one discriminant byte wide there.
779    pub retained_ious: Option<crate::tables::RetainedIous>,
780}
781
782impl EvalGrid {
783    /// Cell at `(category_index, area_index, image_index)`. Returns
784    /// `None` when the indices are in bounds but no cell ran (image
785    /// absent from detections, or no GTs and no DTs in the cell);
786    /// returns `None` for out-of-bounds indices as well.
787    pub fn cell(&self, k: usize, a: usize, i: usize) -> Option<&PerImageEval> {
788        let idx = self.flat_index(k, a, i)?;
789        self.eval_imgs.get(idx).and_then(Option::as_deref)
790    }
791
792    /// Pycocotools-shaped bookkeeping at `(category_index, area_index,
793    /// image_index)`. `None` exactly when [`EvalGrid::cell`] is `None`.
794    pub fn cell_meta(&self, k: usize, a: usize, i: usize) -> Option<&EvalImageMeta> {
795        let idx = self.flat_index(k, a, i)?;
796        self.eval_imgs_meta.get(idx).and_then(Option::as_deref)
797    }
798
799    fn flat_index(&self, k: usize, a: usize, i: usize) -> Option<usize> {
800        if k >= self.n_categories || a >= self.n_area_ranges || i >= self.n_images {
801            return None;
802        }
803        Some(k * self.n_area_ranges * self.n_images + a * self.n_images + i)
804    }
805}
806
807/// Run the per-image evaluation pass with the given [`EvalKernel`].
808///
809/// Iterates `(image, category)` cells, computes the IoU matrix once per
810/// cell via the kernel, runs the [`crate::matching`] engine once per area range,
811/// and packs the results into a flat `[k][a][i]`-ordered grid suitable
812/// for [`crate::accumulate`].
813///
814/// Most callers want [`evaluate_bbox`], [`evaluate_segm`], or
815/// [`evaluate_boundary`]; this entry point is exposed for downstream
816/// code that ships its own kernel.
817///
818/// # Errors
819///
820/// Propagates [`EvalError`] from the underlying [`Similarity`],
821/// [`EvalKernel::build_gt_anns`] / [`EvalKernel::build_dt_anns`], and
822/// [`crate::matching`] calls.
823pub fn evaluate_with<K: EvalKernel>(
824    gt: &CocoDataset,
825    dt: &CocoDetections,
826    params: EvaluateParams<'_>,
827    parity_mode: ParityMode,
828    kernel: &K,
829) -> Result<EvalGrid, EvalError> {
830    // Image and category ordering: id-ascending, deterministic across runs.
831    let mut images: Vec<&ImageMeta> = gt.images().iter().collect();
832    images.sort_unstable_by_key(|im| im.id.0);
833    let n_i = images.len();
834    let n_a = params.area_ranges.len();
835
836    // L4: collapse to a single virtual bucket when `use_cats=false`.
837    let category_buckets: Vec<Option<CategoryId>> = if params.use_cats {
838        let mut cats: Vec<_> = gt.categories().iter().map(|c| c.id).collect();
839        cats.sort_unstable_by_key(|id| id.0);
840        cats.into_iter().map(Some).collect()
841    } else {
842        vec![None]
843    };
844    let n_k = category_buckets.len();
845
846    let mut eval_imgs: Vec<Option<Box<PerImageEval>>> = vec![None; n_k * n_a * n_i];
847    let mut eval_imgs_meta: Vec<Option<Box<EvalImageMeta>>> = vec![None; n_k * n_a * n_i];
848    // Optional IoU retention, keyed by `(k, i)` — IoU is geometry-only,
849    // so storing per-area would duplicate ~4× under the COCO grid.
850    let mut retained_ious_map: Option<std::collections::HashMap<(usize, usize), Array2<f64>>> =
851        if params.retain_iou {
852            Some(std::collections::HashMap::new())
853        } else {
854            None
855        };
856
857    // ADR-0026 federated metadata is consumed only when `use_cats=true`;
858    // LVIS evaluation is per-category by construction. With
859    // `use_cats=false` the federated maps are intentionally ignored
860    // and the eval falls back to COCO semantics (the L4 `k=0` collapse
861    // never carries federated state). Pre-resolve the per-image
862    // (neg, not_exhaustive) set references once — the inner cell
863    // loop hits this `n_k` times per image, and the HashMap lookups
864    // dominate runtime on long-tail datasets (1203 cats * 19809 images
865    // = ~24M redundant probes on full LVIS val).
866    let federated_per_image: Vec<Option<(&HashSet<CategoryId>, &HashSet<CategoryId>)>> =
867        match (params.use_cats, gt.federated()) {
868            (true, Some(fed)) => images
869                .iter()
870                .map(|im| {
871                    let neg = fed.neg_category_ids.get(&im.id)?;
872                    let nel = fed.not_exhaustive_category_ids.get(&im.id)?;
873                    Some((neg, nel))
874                })
875                .collect(),
876            _ => Vec::new(),
877        };
878
879    // Pre-grown scratch shared across every `(k, i)` cell. `clear()`
880    // + `extend()` per cell amortizes the ~14k allocator round-trips
881    // val2017 would otherwise pay for these gathers.
882    let mut scratch = CellScratch::new();
883    let gt_anns = gt.annotations();
884    let dt_anns = dt.detections();
885
886    // Quirk **AG6** (strict, ADR-0026): the LVIS oracle's
887    // `LVIS.get_ann_ids` applies a strict `area > 0` filter
888    // (`lvis/lvis.py:94`) and silently drops GTs whose JSON `area`
889    // is zero. Post-filter `img_pl` then drives `_prepare`'s federated
890    // DT filter, so on "all-zero-area" `(image, category)` cells the
891    // DT is dropped along with the GT, and on "mixed" cells the
892    // orphan DTs become FPs. Reproduce in strict mode for federated
893    // datasets only — COCO and `Corrected` mode keep zero-area
894    // annotations (vernier's default behavior). The filter slots in
895    // before the cell-empty short-circuit so the AA4 cell-skip path
896    // naturally fires when every GT in a cell is zero-area.
897    let strict_lvis_zero_area_filter =
898        matches!(parity_mode, ParityMode::Strict) && gt.federated().is_some();
899
900    for (k, cat) in category_buckets.iter().enumerate() {
901        let nk = k * n_a * n_i;
902        let category_id = cat.map_or(COLLAPSED_CATEGORY_SENTINEL, |c| c.0);
903        for (i, image) in images.iter().enumerate() {
904            let image_id = image.id;
905            let gt_indices_raw = gt_indices_for_cell(gt, image_id, *cat);
906            let gt_indices_buf: Vec<usize>;
907            let gt_indices: &[usize] = if strict_lvis_zero_area_filter
908                && gt_indices_raw.iter().any(|&j| gt_anns[j].area <= 0.0)
909            {
910                gt_indices_buf = gt_indices_raw
911                    .iter()
912                    .copied()
913                    .filter(|&j| gt_anns[j].area > 0.0)
914                    .collect();
915                &gt_indices_buf
916            } else {
917                gt_indices_raw
918            };
919            let raw_dt_indices = raw_dt_indices_for_cell(dt, image_id, *cat);
920            if gt_indices.is_empty() && raw_dt_indices.is_empty() {
921                continue;
922            }
923
924            // AA4 cell-skip and AA3 `not_exhaustive` flag. The outer
925            // resolution of `federated_per_image` ensures every entry
926            // is `Some` exactly when federated semantics apply to
927            // this cell.
928            let mut not_exhaustive_for_cell = false;
929            if let (Some(c), Some(Some((neg_set, nel_set)))) = (cat, federated_per_image.get(i)) {
930                // pos[I] is derived from GTs at load: `C ∈ pos[I]`
931                // exactly when `gt_indices` is non-empty for this
932                // cell. Skip when the cell is outside `pos ∪ neg`.
933                if gt_indices.is_empty() && !neg_set.contains(c) {
934                    continue;
935                }
936                not_exhaustive_for_cell = nel_set.contains(c);
937            }
938
939            // Top-N DT filter — fills `scratch.dt_indices` in place,
940            // reusing `dt_score_buf` and `dt_perm_buf` across cells.
941            dt_top_indices_for_cell_into(
942                &mut scratch.dt_indices,
943                &mut scratch.dt_score_buf,
944                &mut scratch.dt_perm_buf,
945                dt_anns,
946                raw_dt_indices,
947                params.max_dets_per_image,
948            );
949
950            // Area-invariant per-cell gathers — built once, reused
951            // across every area range. All seven Vecs are fields of
952            // `scratch`; `clear()` + `extend()` keeps the allocations
953            // amortized.
954            scratch.gt_areas.clear();
955            scratch
956                .gt_areas
957                .extend(gt_indices.iter().map(|&j| gt_anns[j].area));
958            scratch.gt_iscrowd.clear();
959            scratch
960                .gt_iscrowd
961                .extend(gt_indices.iter().map(|&j| gt_anns[j].is_crowd));
962            // D1: parity-mode fork lives on the annotation; pass through.
963            // Kernel-specific ignore reasons (OKS quirk **D2**) are
964            // OR-ed in via [`EvalKernel::extra_gt_ignore`].
965            scratch.gt_base_ignore.clear();
966            scratch.gt_base_ignore.extend(gt_indices.iter().map(|&j| {
967                gt_anns[j].effective_ignore(parity_mode) || kernel.extra_gt_ignore(&gt_anns[j])
968            }));
969            scratch.gt_ids.clear();
970            scratch
971                .gt_ids
972                .extend(gt_indices.iter().map(|&j| gt_anns[j].id.0));
973            scratch.dt_areas.clear();
974            scratch
975                .dt_areas
976                .extend(scratch.dt_indices.iter().map(|&j| dt_anns[j].area));
977            scratch.dt_scores.clear();
978            scratch
979                .dt_scores
980                .extend(scratch.dt_indices.iter().map(|&j| dt_anns[j].score));
981            scratch.dt_ids.clear();
982            scratch
983                .dt_ids
984                .extend(scratch.dt_indices.iter().map(|&j| dt_anns[j].id.0));
985
986            let gt_kernel = kernel.build_gt_anns(gt_anns, gt_indices, image)?;
987            let dt_kernel =
988                kernel.build_dt_anns(dt_anns, &scratch.dt_indices, image, parity_mode)?;
989
990            // IoU scratch backing — `Vec<f64>` reused across cells, sized
991            // to `g * d` per cell. Zero-fill keeps the empty-side fallback
992            // (`g == 0` or `d == 0`) bit-identical to `Array2::zeros`.
993            let g = gt_kernel.len();
994            let d = dt_kernel.len();
995            scratch.iou_buf.clear();
996            scratch.iou_buf.resize(g * d, 0.0);
997            if g > 0 && d > 0 {
998                let mut iou_view = ArrayViewMut2::from_shape((g, d), &mut scratch.iou_buf[..])
999                    .map_err(|e| EvalError::DimensionMismatch {
1000                        detail: format!("iou scratch view: {e}"),
1001                    })?;
1002                kernel.compute(&gt_kernel, &dt_kernel, &mut iou_view)?;
1003            }
1004
1005            let iou_view = ArrayView2::from_shape((g, d), &scratch.iou_buf[..]).map_err(|e| {
1006                EvalError::DimensionMismatch {
1007                    detail: format!("iou scratch view: {e}"),
1008                }
1009            })?;
1010            let buffers = CellBuffers {
1011                image_id: image_id.0,
1012                category_id,
1013                max_det: params.max_dets_per_image,
1014                gt_areas: &scratch.gt_areas,
1015                gt_iscrowd: &scratch.gt_iscrowd,
1016                gt_base_ignore: &scratch.gt_base_ignore,
1017                gt_ids: &scratch.gt_ids,
1018                dt_areas: &scratch.dt_areas,
1019                dt_scores: &scratch.dt_scores,
1020                dt_ids: &scratch.dt_ids,
1021                iou: iou_view,
1022                not_exhaustive: not_exhaustive_for_cell,
1023            };
1024            for (a, area) in params.area_ranges.iter().enumerate() {
1025                let (cell, meta) = evaluate_cell(
1026                    &mut scratch.gt_ignore_buf,
1027                    &buffers,
1028                    area,
1029                    params.iou_thresholds,
1030                    parity_mode,
1031                )?;
1032                let flat = nk + a * n_i + i;
1033                eval_imgs[flat] = Some(Box::new(cell));
1034                eval_imgs_meta[flat] = Some(Box::new(meta));
1035            }
1036
1037            // Retain a clone of the IoU matrix exactly when the caller
1038            // asked. The check is at end-of-cell so the area-range
1039            // loop above runs on the borrow `buffers.iou`; cloning
1040            // here costs O(G*D) f64s, only when retention is active.
1041            if let Some(map) = retained_ious_map.as_mut() {
1042                let cloned =
1043                    Array2::from_shape_vec((g, d), scratch.iou_buf.clone()).map_err(|e| {
1044                        EvalError::DimensionMismatch {
1045                            detail: format!("retained iou clone: {e}"),
1046                        }
1047                    })?;
1048                map.insert((k, i), cloned);
1049            }
1050        }
1051    }
1052
1053    Ok(EvalGrid {
1054        eval_imgs,
1055        eval_imgs_meta,
1056        n_categories: n_k,
1057        n_area_ranges: n_a,
1058        n_images: n_i,
1059        retained_ious: retained_ious_map.map(crate::tables::RetainedIous::from_map),
1060    })
1061}
1062
1063/// Run the per-image evaluation pass *and* the cross-class IoU side
1064/// pass (per ADR-0023) in a single call.
1065///
1066/// Returns the standard [`EvalGrid`] alongside a
1067/// [`crate::tables::CrossClassIous`] populated by walking each image's
1068/// un-class-filtered GT and DT lists through the same kernel
1069/// [`evaluate_with`] uses internally. Future TIDE callers consume both
1070/// outputs from one call so they do not pay the matching cost twice.
1071///
1072/// The matching engine is unchanged — the side pass is a separate
1073/// kernel pass at the orchestrator level, preserving the ADR-0005
1074/// invariant that matching is generic over the IoU matrix only. The
1075/// side pass shares `params.max_dets_per_image` with the matching path
1076/// so the DT row indexing across the two passes is consistent.
1077///
1078/// # Errors
1079///
1080/// Propagates [`EvalError`] from either pass.
1081pub(crate) fn evaluate_with_retention<K: EvalKernel>(
1082    gt: &CocoDataset,
1083    dt: &CocoDetections,
1084    params: EvaluateParams<'_>,
1085    parity_mode: ParityMode,
1086    kernel: &K,
1087) -> Result<(EvalGrid, crate::tables::CrossClassIous), EvalError> {
1088    let grid = evaluate_with(gt, dt, params, parity_mode, kernel)?;
1089    let cross_class = crate::tide::compute_cross_class_ious(
1090        gt,
1091        dt,
1092        kernel,
1093        parity_mode,
1094        params.max_dets_per_image,
1095    )?;
1096    Ok((grid, cross_class))
1097}
1098
1099/// Run the per-image bbox evaluation pass. Thin wrapper over
1100/// [`evaluate_with`] with the [`BboxIou`] kernel.
1101///
1102/// # Errors
1103///
1104/// Propagates [`EvalError`] from the underlying kernel and matching
1105/// calls.
1106pub fn evaluate_bbox(
1107    gt: &CocoDataset,
1108    dt: &CocoDetections,
1109    params: EvaluateParams<'_>,
1110    parity_mode: ParityMode,
1111) -> Result<EvalGrid, EvalError> {
1112    evaluate_with(gt, dt, params, parity_mode, &BboxIou)
1113}
1114
1115/// Run the per-image segmentation-mask evaluation pass. Thin wrapper
1116/// over [`evaluate_with`] with the [`SegmIou`] kernel.
1117///
1118/// GTs must carry a `segmentation` field. DT handling is parity-mode
1119/// aware (quirks **J2** / **J6**):
1120///
1121/// - [`ParityMode::Strict`] reproduces `pycocotools/coco.py:341` —
1122///   DTs missing a `segmentation` field have a 4-point rectangle
1123///   polygon synthesized from their bbox and rasterized.
1124/// - [`ParityMode::Corrected`] (the default for net-new users) raises
1125///   [`EvalError::InvalidAnnotation`] instead, which also rejects
1126///   heterogeneous DT lists (some entries with segm, some without)
1127///   per-entry rather than via pycocotools' first-entry-decides
1128///   dispatch.
1129///
1130/// # Errors
1131///
1132/// Propagates [`EvalError`] from the underlying kernel and matching
1133/// calls.
1134pub fn evaluate_segm(
1135    gt: &CocoDataset,
1136    dt: &CocoDetections,
1137    params: EvaluateParams<'_>,
1138    parity_mode: ParityMode,
1139) -> Result<EvalGrid, EvalError> {
1140    evaluate_with(gt, dt, params, parity_mode, &segm_kernel(None))
1141}
1142
1143/// Cached variant of [`evaluate_segm`]: reuses GT bbox + area across
1144/// calls via a caller-owned [`SegmGtCache`].
1145///
1146/// Use this when the same GT dataset is evaluated repeatedly against
1147/// changing detections — e.g. validation passes inside a training
1148/// loop. The first call populates the cache; each subsequent call
1149/// skips the `Rle::bbox` and `Rle::area` walks on the GT side. DT-side
1150/// derivations are always fresh (predictions change per call).
1151///
1152/// The cache is keyed by GT [`crate::dataset::CocoAnnotation::id`].
1153///
1154/// # Errors
1155///
1156/// Propagates [`EvalError`] from the underlying kernel and matching
1157/// calls.
1158pub fn evaluate_segm_cached(
1159    gt: &CocoDataset,
1160    dt: &CocoDetections,
1161    params: EvaluateParams<'_>,
1162    parity_mode: ParityMode,
1163    cache: &SegmGtCache,
1164) -> Result<EvalGrid, EvalError> {
1165    evaluate_with(gt, dt, params, parity_mode, &segm_kernel(Some(cache)))
1166}
1167
1168fn segm_kernel(gt_cache: Option<&SegmGtCache>) -> SegmIouCached<'_> {
1169    SegmIouCached {
1170        scratch: Mutex::new(SegmComputeScratch::new()),
1171        gt_cache: gt_cache.map(GtCacheRef::Borrowed),
1172    }
1173}
1174
1175/// Kernel used by [`evaluate_segm`] and [`evaluate_segm_cached`] — same
1176/// semantics as [`SegmIou`] but threads a single `SegmComputeScratch`
1177/// across every `compute` call (so the dataset-wide pass amortizes
1178/// per-cell `Vec` allocations across the ~36 k anns of a val2017 pass)
1179/// and optionally consults a [`SegmGtCache`] for cross-call GT
1180/// bbox+area reuse.
1181///
1182/// The cache reference is generalised through [`GtCacheRef`] so the same
1183/// kernel feeds both the borrowed batch path (`evaluate_segm_cached`)
1184/// and the `Arc`-owned streaming path
1185/// ([`Self::with_arc_cache`] + [`crate::stream::StreamingEvaluator`]).
1186/// Held by [`Mutex`] to satisfy `Similarity: Send + Sync`; the lock is
1187/// uncontended in single-threaded use.
1188pub struct SegmIouCached<'a> {
1189    scratch: Mutex<SegmComputeScratch>,
1190    gt_cache: Option<GtCacheRef<'a, SegmGtCache>>,
1191}
1192
1193impl SegmIouCached<'static> {
1194    /// Construct a streaming-friendly kernel that owns its GT cache via
1195    /// [`Arc`] (ADR-0020). The kernel is `'static`, so a
1196    /// [`crate::stream::StreamingEvaluator`] can store it across the
1197    /// worker thread's lifetime; the same `Arc` is held by the FFI
1198    /// `CocoDataset` handle, so derivations populated on one path are
1199    /// visible to the other.
1200    pub fn with_arc_cache(cache: Arc<SegmGtCache>) -> Self {
1201        Self {
1202            scratch: Mutex::new(SegmComputeScratch::new()),
1203            gt_cache: Some(GtCacheRef::Owned(cache)),
1204        }
1205    }
1206}
1207
1208impl Similarity for SegmIouCached<'_> {
1209    type Annotation = SegmAnn;
1210
1211    fn compute(
1212        &self,
1213        gts: &[SegmAnn],
1214        dts: &[SegmAnn],
1215        out: &mut ArrayViewMut2<'_, f64>,
1216    ) -> Result<(), EvalError> {
1217        let mut scratch = self
1218            .scratch
1219            .lock()
1220            .unwrap_or_else(|poisoned| poisoned.into_inner());
1221        segm_iou_compute(
1222            gts,
1223            dts,
1224            out,
1225            &mut scratch,
1226            self.gt_cache.as_ref().map(GtCacheRef::get),
1227        )
1228    }
1229}
1230
1231impl EvalKernel for SegmIouCached<'_> {
1232    fn kind(&self) -> KernelKind {
1233        KernelKind::Segm
1234    }
1235
1236    fn build_gt_anns(
1237        &self,
1238        gt_anns: &[CocoAnnotation],
1239        indices: &[usize],
1240        image: &ImageMeta,
1241    ) -> Result<Vec<SegmAnn>, EvalError> {
1242        build_segm_gt_anns(gt_anns, indices, image)
1243    }
1244
1245    fn build_dt_anns(
1246        &self,
1247        dt_anns: &[CocoDetection],
1248        indices: &[usize],
1249        image: &ImageMeta,
1250        parity_mode: ParityMode,
1251    ) -> Result<Vec<SegmAnn>, EvalError> {
1252        build_segm_dt_anns(dt_anns, indices, image, parity_mode)
1253    }
1254}
1255
1256/// Run the per-image boundary-IoU evaluation pass (ADR-0010). Thin
1257/// wrapper over [`evaluate_with`] with the [`BoundaryIou`] kernel.
1258///
1259/// `dilation_ratio` controls the boundary band width per ADR-0010 §A2:
1260/// `0.02` is the COCO default and `0.008` is the LVIS variant.
1261///
1262/// GT/DT segmentation handling is identical to [`evaluate_segm`] — same
1263/// J2/J6 parity-mode dispatch on missing DT segmentations, same
1264/// "missing GT segmentation" error.
1265///
1266/// # Errors
1267///
1268/// Propagates [`EvalError`] from the underlying kernel and matching
1269/// calls.
1270pub fn evaluate_boundary(
1271    gt: &CocoDataset,
1272    dt: &CocoDetections,
1273    params: EvaluateParams<'_>,
1274    parity_mode: ParityMode,
1275    dilation_ratio: f64,
1276) -> Result<EvalGrid, EvalError> {
1277    evaluate_with(gt, dt, params, parity_mode, &kernel(dilation_ratio, None))
1278}
1279
1280/// Cached variant of [`evaluate_boundary`]: reuses GT bands across
1281/// calls via a caller-owned [`BoundaryGtCache`].
1282///
1283/// Use this when the same GT dataset is evaluated repeatedly against
1284/// changing detections — e.g. validation passes inside a training
1285/// loop. The first call populates the cache; each subsequent call
1286/// skips GT band derivation. DT bands are always derived fresh
1287/// (predictions change per call).
1288///
1289/// The cache is keyed by GT [`crate::dataset::CocoAnnotation::id`].
1290/// If `dilation_ratio` differs from the previous call's, the cache
1291/// is cleared and re-populated — the bands depend on the ratio.
1292///
1293/// # Errors
1294///
1295/// Propagates [`EvalError`] from the underlying kernel and matching
1296/// calls.
1297pub fn evaluate_boundary_cached(
1298    gt: &CocoDataset,
1299    dt: &CocoDetections,
1300    params: EvaluateParams<'_>,
1301    parity_mode: ParityMode,
1302    dilation_ratio: f64,
1303    cache: &BoundaryGtCache,
1304) -> Result<EvalGrid, EvalError> {
1305    cache.align_ratio(dilation_ratio);
1306    evaluate_with(
1307        gt,
1308        dt,
1309        params,
1310        parity_mode,
1311        &kernel(dilation_ratio, Some(cache)),
1312    )
1313}
1314
1315fn kernel(dilation_ratio: f64, gt_cache: Option<&BoundaryGtCache>) -> BoundaryIouCached<'_> {
1316    BoundaryIouCached {
1317        dilation_ratio,
1318        scratch: Mutex::new(BoundaryComputeScratch::new()),
1319        gt_cache: gt_cache.map(GtCacheRef::Borrowed),
1320    }
1321}
1322
1323/// Kernel used by [`evaluate_boundary`] and [`evaluate_boundary_cached`]
1324/// — same semantics as [`BoundaryIou`] but threads a single
1325/// `BoundaryComputeScratch` across every `compute` call (so the
1326/// dataset-wide pass amortizes per-mask + per-cell allocations) and
1327/// optionally consults a [`BoundaryGtCache`] for cross-call GT band
1328/// reuse.
1329///
1330/// The cache reference is generalised through [`GtCacheRef`] so the same
1331/// kernel feeds both the borrowed batch path
1332/// (`evaluate_boundary_cached`) and the `Arc`-owned streaming path
1333/// ([`Self::with_arc_cache`] + [`crate::stream::StreamingEvaluator`]).
1334/// Held by [`Mutex`] to satisfy `Similarity: Send + Sync`; the lock is
1335/// uncontended in single-threaded use.
1336pub struct BoundaryIouCached<'a> {
1337    dilation_ratio: f64,
1338    scratch: Mutex<BoundaryComputeScratch>,
1339    gt_cache: Option<GtCacheRef<'a, BoundaryGtCache>>,
1340}
1341
1342impl BoundaryIouCached<'static> {
1343    /// Construct a streaming-friendly kernel that owns its GT cache via
1344    /// [`Arc`] (ADR-0020). The kernel is `'static`, so a
1345    /// [`crate::stream::StreamingEvaluator`] can store it across the
1346    /// worker thread's lifetime; the same `Arc` is held by the FFI
1347    /// `CocoDataset` handle, so derivations populated on one path are
1348    /// visible to the other.
1349    ///
1350    /// Aligns the cache to `dilation_ratio` immediately — mismatched
1351    /// ratio invalidates prior bands, mirroring
1352    /// [`evaluate_boundary_cached`]'s contract.
1353    pub fn with_arc_cache(dilation_ratio: f64, cache: Arc<BoundaryGtCache>) -> Self {
1354        cache.align_ratio(dilation_ratio);
1355        Self {
1356            dilation_ratio,
1357            scratch: Mutex::new(BoundaryComputeScratch::new()),
1358            gt_cache: Some(GtCacheRef::Owned(cache)),
1359        }
1360    }
1361}
1362
1363impl Similarity for BoundaryIouCached<'_> {
1364    type Annotation = SegmAnn;
1365
1366    fn compute(
1367        &self,
1368        gts: &[SegmAnn],
1369        dts: &[SegmAnn],
1370        out: &mut ArrayViewMut2<'_, f64>,
1371    ) -> Result<(), EvalError> {
1372        let mut scratch = self
1373            .scratch
1374            .lock()
1375            .unwrap_or_else(|poisoned| poisoned.into_inner());
1376        boundary_iou_compute(
1377            self.dilation_ratio,
1378            gts,
1379            dts,
1380            out,
1381            &mut scratch,
1382            self.gt_cache.as_ref().map(GtCacheRef::get),
1383        )
1384    }
1385}
1386
1387impl EvalKernel for BoundaryIouCached<'_> {
1388    fn kind(&self) -> KernelKind {
1389        KernelKind::Boundary
1390    }
1391
1392    fn build_gt_anns(
1393        &self,
1394        gt_anns: &[CocoAnnotation],
1395        indices: &[usize],
1396        image: &ImageMeta,
1397    ) -> Result<Vec<SegmAnn>, EvalError> {
1398        build_segm_gt_anns(gt_anns, indices, image)
1399    }
1400
1401    fn build_dt_anns(
1402        &self,
1403        dt_anns: &[CocoDetection],
1404        indices: &[usize],
1405        image: &ImageMeta,
1406        parity_mode: ParityMode,
1407    ) -> Result<Vec<SegmAnn>, EvalError> {
1408        build_segm_dt_anns(dt_anns, indices, image, parity_mode)
1409    }
1410}
1411
1412/// Run the per-image OKS (`iouType="keypoints"`) evaluation pass per
1413/// ADR-0012. Thin wrapper over [`evaluate_with`] with the
1414/// [`OksSimilarity`] kernel.
1415///
1416/// `sigmas` is the per-category sigma override map consumed by
1417/// [`OksSimilarity::new`]: an empty map means "use
1418/// [`crate::similarity::oks::COCO_PERSON_SIGMAS`] for every category" (quirk **F1**,
1419/// `corrected`). Sigma resolution rules — including the COCO-person
1420/// default and the 17-keypoint length contract — are documented on
1421/// [`OksSimilarity`].
1422///
1423/// ## Caller responsibilities
1424///
1425/// - **Area ranges (quirk D5).** The keypoints-canonical 3-entry grid
1426///   (`all`, `medium`, `large` — pycocotools omits `small`) lives on the
1427///   caller side; pass it through `params.area_ranges`. Reusing the
1428///   detection-canonical 4-entry grid silently introduces an empty
1429///   `small` bucket that diverges from the parity oracle.
1430/// - `params.use_cats=true` is the standard configuration for
1431///   keypoints; per-category sigmas resolve via [`OksSimilarity`]
1432///   regardless.
1433///
1434/// ## Quirks honored here
1435///
1436/// - **D2** (`strict`): GT with zero visible keypoints is treated as an
1437///   implicit ignore region, OR-ed with the dataset-level ignore
1438///   ([`CocoAnnotation::effective_ignore`]) via
1439///   [`EvalKernel::extra_gt_ignore`].
1440/// - **F1**/**F2**/**F3**/**F4**/**F5**: inherited from
1441///   [`OksSimilarity::compute`].
1442///
1443/// GTs and DTs must carry a `keypoints` field; absence raises
1444/// [`EvalError::InvalidAnnotation`]. There is no
1445/// parity-mode-conditional bbox synthesis fallback for keypoints (no
1446/// J2 analog).
1447///
1448/// # Errors
1449///
1450/// Propagates [`EvalError`] from the underlying kernel and matching
1451/// calls.
1452pub fn evaluate_keypoints(
1453    gt: &CocoDataset,
1454    dt: &CocoDetections,
1455    params: EvaluateParams<'_>,
1456    parity_mode: ParityMode,
1457    sigmas: HashMap<i64, Vec<f64>>,
1458) -> Result<EvalGrid, EvalError> {
1459    evaluate_with(gt, dt, params, parity_mode, &OksSimilarity::new(sigmas))
1460}
1461
1462fn gt_indices_for_cell(gt: &CocoDataset, image: ImageId, cat: Option<CategoryId>) -> &[usize] {
1463    match cat {
1464        Some(c) => gt.ann_indices_for(image, c),
1465        None => gt.ann_indices_for_image(image),
1466    }
1467}
1468
1469/// Raw (un-sorted, un-truncated) DT index slice for a cell. The hot
1470/// loop in [`evaluate_with`] uses this to short-circuit empty cells
1471/// before incurring the score gather + sort cost in
1472/// [`dt_top_indices_for_cell_into`].
1473fn raw_dt_indices_for_cell(
1474    dt: &CocoDetections,
1475    image: ImageId,
1476    cat: Option<CategoryId>,
1477) -> &[usize] {
1478    match cat {
1479        Some(c) => dt.indices_for(image, c),
1480        None => dt.indices_for_image(image),
1481    }
1482}
1483
1484pub(crate) fn dt_top_indices_for_cell(
1485    dt: &CocoDetections,
1486    image: ImageId,
1487    cat: Option<CategoryId>,
1488    max_dets: usize,
1489) -> Vec<usize> {
1490    let raw_indices = raw_dt_indices_for_cell(dt, image, cat);
1491    let mut out = Vec::new();
1492    let mut score_buf = Vec::new();
1493    let mut perm_buf = Vec::new();
1494    dt_top_indices_for_cell_into(
1495        &mut out,
1496        &mut score_buf,
1497        &mut perm_buf,
1498        dt.detections(),
1499        raw_indices,
1500        max_dets,
1501    );
1502    out
1503}
1504
1505/// Allocation-free counterpart to [`dt_top_indices_for_cell`]. Fills
1506/// `out` with the top-`max_dets` DT input indices ordered by descending
1507/// score (stable mergesort, quirk **A1**), reusing `score_buf` and
1508/// `perm_buf` across calls. The hot per-cell loop in [`evaluate_with`]
1509/// would otherwise pay three allocator round-trips per `(image,
1510/// category)` cell — across val2017's 14k non-empty cells that
1511/// dominates the score-sort wall time.
1512fn dt_top_indices_for_cell_into(
1513    out: &mut Vec<usize>,
1514    score_buf: &mut Vec<f64>,
1515    perm_buf: &mut Vec<usize>,
1516    dts: &[CocoDetection],
1517    raw_indices: &[usize],
1518    max_dets: usize,
1519) {
1520    score_buf.clear();
1521    score_buf.extend(raw_indices.iter().map(|&i| dts[i].score));
1522    perm_buf.clear();
1523    perm_buf.extend(0..score_buf.len());
1524    // Stable mergesort tiebreak (quirk A1) — must match
1525    // `argsort_score_desc` semantics bit-for-bit.
1526    perm_buf.sort_by(|&a, &b| {
1527        score_buf[b]
1528            .partial_cmp(&score_buf[a])
1529            .unwrap_or(std::cmp::Ordering::Equal)
1530    });
1531    out.clear();
1532    out.extend(perm_buf.iter().take(max_dets).map(|&k| raw_indices[k]));
1533}
1534
1535/// Per-cell scratch buffers reused across the `(image, category)` loop
1536/// in [`evaluate_with`]. All `Vec` fields are `clear()`-ed and re-grown
1537/// each cell so allocator round-trips are paid once per buffer at most
1538/// (subsequent cells stay within the high-water capacity). On val2017
1539/// this elides ~11 allocations per cell × 14k cells = ~154k allocator
1540/// round-trips.
1541#[derive(Default)]
1542struct CellScratch {
1543    /// Cell-level GT gathers — sized to `gt_indices.len()` per cell.
1544    gt_areas: Vec<f64>,
1545    gt_iscrowd: Vec<bool>,
1546    gt_base_ignore: Vec<bool>,
1547    gt_ids: Vec<i64>,
1548    /// Top-N filtered DT input indices. Filled by
1549    /// [`dt_top_indices_for_cell_into`].
1550    dt_indices: Vec<usize>,
1551    /// Cell-level DT gathers — sized to `dt_indices.len()` per cell.
1552    dt_areas: Vec<f64>,
1553    dt_scores: Vec<f64>,
1554    dt_ids: Vec<i64>,
1555    /// Backing storage for the `(g, d)` IoU matrix. Resized + zeroed
1556    /// per cell; the kernel writes through an `ArrayViewMut2` that
1557    /// borrows this buffer in place.
1558    iou_buf: Vec<f64>,
1559    /// Score gather scratch for [`dt_top_indices_for_cell_into`].
1560    dt_score_buf: Vec<f64>,
1561    /// Permutation scratch for [`dt_top_indices_for_cell_into`].
1562    dt_perm_buf: Vec<usize>,
1563    /// Per-area-range `gt_ignore` mask reused across each call to
1564    /// [`evaluate_cell`] (the four COCO area ranges times every cell —
1565    /// passing through scratch elides one `Vec<bool>` allocation per
1566    /// area-range pass).
1567    gt_ignore_buf: Vec<bool>,
1568}
1569
1570impl CellScratch {
1571    fn new() -> Self {
1572        Self::default()
1573    }
1574}
1575
1576/// Area-invariant per-cell buffers shared across every area-range pass.
1577struct CellBuffers<'a> {
1578    image_id: i64,
1579    category_id: i64,
1580    max_det: usize,
1581    gt_areas: &'a [f64],
1582    gt_iscrowd: &'a [bool],
1583    gt_base_ignore: &'a [bool],
1584    gt_ids: &'a [i64],
1585    dt_areas: &'a [f64],
1586    dt_scores: &'a [f64],
1587    dt_ids: &'a [i64],
1588    iou: ArrayView2<'a, f64>,
1589    /// LVIS federated AA3: when `true`, the entire `(image, category)`
1590    /// cell is in `not_exhaustive_category_ids[image]`, so every
1591    /// unmatched DT in the cell gets `dt_ignore = true` (mirrors
1592    /// lvis-api `eval.py:278`). `false` outside LVIS evaluation.
1593    not_exhaustive: bool,
1594}
1595
1596fn evaluate_cell(
1597    gt_ignore_buf: &mut Vec<bool>,
1598    buf: &CellBuffers<'_>,
1599    area: &AreaRange,
1600    iou_thresholds: &[f64],
1601    parity_mode: ParityMode,
1602) -> Result<(PerImageEval, EvalImageMeta), EvalError> {
1603    // D3 + D6/D7: per-call ignore = base | out-of-area. Filled into a
1604    // scratch buffer owned by the caller — this Vec is the same length
1605    // every cell-area pair on a given image, so reusing the allocation
1606    // across all 4 area ranges (and across cells of similar shape)
1607    // amortizes ~14k allocator round-trips on val2017.
1608    gt_ignore_buf.clear();
1609    gt_ignore_buf.extend(
1610        buf.gt_base_ignore
1611            .iter()
1612            .zip(buf.gt_areas)
1613            .map(|(&base, &a)| base || !area.contains(a)),
1614    );
1615    let gt_ignore: &[bool] = gt_ignore_buf.as_slice();
1616
1617    let MatchResult {
1618        dt_perm,
1619        gt_perm,
1620        dt_matches: dt_matches_pos,
1621        gt_matches: gt_matches_pos,
1622        mut dt_ignore,
1623    } = match_image(
1624        buf.iou,
1625        gt_ignore,
1626        buf.gt_iscrowd,
1627        buf.dt_scores,
1628        iou_thresholds,
1629        parity_mode,
1630    )?;
1631
1632    let n_t = iou_thresholds.len();
1633    let n_d = buf.dt_scores.len();
1634    let n_g = gt_ignore.len();
1635
1636    let dt_scores_sorted: Vec<f64> = dt_perm.iter().map(|&k| buf.dt_scores[k]).collect();
1637    let gt_ignore_sorted: Vec<bool> = gt_perm.iter().map(|&k| gt_ignore[k]).collect();
1638    let dt_ids_sorted: Vec<i64> = dt_perm.iter().map(|&k| buf.dt_ids[k]).collect();
1639    let gt_ids_sorted: Vec<i64> = gt_perm.iter().map(|&k| buf.gt_ids[k]).collect();
1640
1641    let mut dt_matched = Array2::<bool>::default((n_t, n_d));
1642    let mut dt_matches_id = Array2::<i64>::zeros((n_t, n_d));
1643    let mut gt_matches_id = Array2::<i64>::zeros((n_t, n_g));
1644    // d-outer / t-inner reorders the original loop so the per-d
1645    // `area.contains(buf.dt_areas[dt_perm[d]])` test runs once per
1646    // detection instead of `n_t` times — dropping the prior
1647    // `dt_in_range_sorted: Vec<bool>` allocation entirely. Writes to
1648    // the three result `Array2`s are independent across `(t, d)`, so
1649    // the reorder is bit-equivalent to the original.
1650    for d in 0..n_d {
1651        let in_range = area.contains(buf.dt_areas[dt_perm[d]]);
1652        for t in 0..n_t {
1653            let m = dt_matches_pos[(t, d)];
1654            let matched = m >= 0;
1655            dt_matched[(t, d)] = matched;
1656            if matched {
1657                dt_matches_id[(t, d)] = gt_ids_sorted[m as usize];
1658            }
1659            // B7: unmatched AND out-of-area → ignore.
1660            // AA3 (LVIS): unmatched in a not_exhaustive cell → ignore.
1661            // Both branches share the same `dt_ignore` field; the
1662            // matching engine never sees the LVIS-specific flag.
1663            if !matched && (!in_range || buf.not_exhaustive) {
1664                dt_ignore[(t, d)] = true;
1665            }
1666        }
1667    }
1668    for t in 0..n_t {
1669        for g in 0..n_g {
1670            let p = gt_matches_pos[(t, g)];
1671            if p >= 0 {
1672                gt_matches_id[(t, g)] = dt_ids_sorted[p as usize];
1673            }
1674        }
1675    }
1676
1677    let cell = PerImageEval {
1678        dt_scores: dt_scores_sorted,
1679        dt_matched,
1680        dt_ignore,
1681        gt_ignore: gt_ignore_sorted,
1682    };
1683    let meta = EvalImageMeta {
1684        image_id: buf.image_id,
1685        category_id: buf.category_id,
1686        area_rng: [area.lo, area.hi],
1687        max_det: buf.max_det,
1688        dt_ids: dt_ids_sorted,
1689        gt_ids: gt_ids_sorted,
1690        dt_matches: dt_matches_id,
1691        gt_matches: gt_matches_id,
1692    };
1693    Ok((cell, meta))
1694}
1695
1696#[cfg(test)]
1697mod tests {
1698    use super::*;
1699    use crate::accumulate::{accumulate, AccumulateParams};
1700    use crate::dataset::{AnnId, Bbox, CategoryMeta, CocoAnnotation, DetectionInput, ImageMeta};
1701    use crate::parity::{iou_thresholds, recall_thresholds};
1702    use crate::summarize::summarize_detection;
1703
1704    fn img(id: i64, w: u32, h: u32) -> ImageMeta {
1705        ImageMeta {
1706            id: ImageId(id),
1707            width: w,
1708            height: h,
1709            file_name: None,
1710        }
1711    }
1712
1713    fn cat(id: i64, name: &str) -> CategoryMeta {
1714        CategoryMeta {
1715            id: CategoryId(id),
1716            name: name.into(),
1717            supercategory: None,
1718        }
1719    }
1720
1721    fn ann(id: i64, image: i64, cat: i64, bbox: (f64, f64, f64, f64)) -> CocoAnnotation {
1722        CocoAnnotation {
1723            id: AnnId(id),
1724            image_id: ImageId(image),
1725            category_id: CategoryId(cat),
1726            area: bbox.2 * bbox.3,
1727            is_crowd: false,
1728            ignore_flag: None,
1729            bbox: Bbox {
1730                x: bbox.0,
1731                y: bbox.1,
1732                w: bbox.2,
1733                h: bbox.3,
1734            },
1735            segmentation: None,
1736            keypoints: None,
1737            num_keypoints: None,
1738        }
1739    }
1740
1741    fn dt_input(image: i64, cat: i64, score: f64, bbox: (f64, f64, f64, f64)) -> DetectionInput {
1742        DetectionInput {
1743            id: None,
1744            image_id: ImageId(image),
1745            category_id: CategoryId(cat),
1746            score,
1747            bbox: Bbox {
1748                x: bbox.0,
1749                y: bbox.1,
1750                w: bbox.2,
1751                h: bbox.3,
1752            },
1753            segmentation: None,
1754            keypoints: None,
1755            num_keypoints: None,
1756        }
1757    }
1758
1759    fn perfect_match_grid() -> EvalGrid {
1760        let images = vec![img(1, 100, 100)];
1761        let cats = vec![cat(1, "thing")];
1762        let anns = vec![
1763            ann(1, 1, 1, (0.0, 0.0, 10.0, 10.0)),
1764            ann(2, 1, 1, (50.0, 50.0, 10.0, 10.0)),
1765        ];
1766        let gt = CocoDataset::from_parts(images, anns, cats).unwrap();
1767        let dts = CocoDetections::from_inputs(vec![
1768            dt_input(1, 1, 0.9, (0.0, 0.0, 10.0, 10.0)),
1769            dt_input(1, 1, 0.8, (50.0, 50.0, 10.0, 10.0)),
1770        ])
1771        .unwrap();
1772        let area = AreaRange::coco_default();
1773        let params = EvaluateParams {
1774            iou_thresholds: iou_thresholds(),
1775            area_ranges: &area,
1776            max_dets_per_image: 100,
1777            use_cats: true,
1778            retain_iou: false,
1779        };
1780        evaluate_bbox(&gt, &dts, params, ParityMode::Strict).unwrap()
1781    }
1782
1783    #[test]
1784    fn d4_coco_default_area_ranges_pin_literal_values() {
1785        // D4: the four COCO buckets are (0, 1e10), (0, 1024),
1786        // (1024, 9216), (9216, 1e10), labelled "all" / "small" /
1787        // "medium" / "large". Pin the literal numbers — the 1e10 sentinel
1788        // and the 32² / 96² boundaries are the parity contract; bumping
1789        // either silently in source would shift bucket membership
1790        // throughout the suite.
1791        let ranges = AreaRange::coco_default();
1792        assert_eq!(ranges.len(), 4);
1793        assert_eq!(
1794            (ranges[0].lo, ranges[0].hi),
1795            (0.0, 1e10),
1796            "all bucket bounds"
1797        );
1798        assert_eq!(
1799            (ranges[1].lo, ranges[1].hi),
1800            (0.0, 1024.0),
1801            "small bucket bounds"
1802        );
1803        assert_eq!(
1804            (ranges[2].lo, ranges[2].hi),
1805            (1024.0, 9216.0),
1806            "medium bucket bounds"
1807        );
1808        assert_eq!(
1809            (ranges[3].lo, ranges[3].hi),
1810            (9216.0, 1e10),
1811            "large bucket bounds"
1812        );
1813
1814        // A-axis indices line up with crate::AreaRng's labelled
1815        // constants. The summarizer keys on `index`, so this is the
1816        // bridge between the orchestrator and the canonical labels.
1817        use crate::summarize::AreaRng;
1818        assert_eq!(ranges[0].index, AreaRng::ALL.index);
1819        assert_eq!(AreaRng::ALL.label.as_ref(), "all");
1820        assert_eq!(ranges[1].index, AreaRng::SMALL.index);
1821        assert_eq!(AreaRng::SMALL.label.as_ref(), "small");
1822        assert_eq!(ranges[2].index, AreaRng::MEDIUM.index);
1823        assert_eq!(AreaRng::MEDIUM.label.as_ref(), "medium");
1824        assert_eq!(ranges[3].index, AreaRng::LARGE.index);
1825        assert_eq!(AreaRng::LARGE.label.as_ref(), "large");
1826
1827        // The 1e10 upper bound is bit-equal to pycocotools' `1e5 ** 2`.
1828        // Pinning the bit pattern guarantees the strict-mode area filter
1829        // makes the same `>` / `<` decisions the Python reference does.
1830        let pyco_unbounded: f64 = 1e5_f64.powi(2);
1831        assert_eq!(pyco_unbounded.to_bits(), 1e10_f64.to_bits());
1832        assert_eq!(ranges[0].hi.to_bits(), 1e10_f64.to_bits());
1833        assert_eq!(ranges[3].hi.to_bits(), 1e10_f64.to_bits());
1834    }
1835
1836    #[test]
1837    fn perfect_match_produces_one_cell_per_area_range() {
1838        let grid = perfect_match_grid();
1839        assert_eq!(grid.n_categories, 1);
1840        assert_eq!(grid.n_area_ranges, 4);
1841        assert_eq!(grid.n_images, 1);
1842        // Both DTs perfectly overlap their GTs → all four area cells exist.
1843        let cells: Vec<_> = grid.eval_imgs.iter().filter(|c| c.is_some()).collect();
1844        assert_eq!(cells.len(), 4);
1845        // The "all" bucket (a=0) has both DTs matched at every threshold.
1846        let all_cell = grid.cell(0, 0, 0).unwrap();
1847        assert_eq!(all_cell.dt_scores.len(), 2);
1848        assert!(all_cell.dt_matched.iter().all(|&m| m));
1849        assert!(all_cell.dt_ignore.iter().all(|&ig| !ig));
1850    }
1851
1852    #[test]
1853    fn perfect_match_summarizes_to_one() {
1854        let grid = perfect_match_grid();
1855        let max_dets = vec![1usize, 10, 100];
1856        let acc = accumulate(
1857            &grid.eval_imgs,
1858            AccumulateParams {
1859                iou_thresholds: iou_thresholds(),
1860                recall_thresholds: recall_thresholds(),
1861                max_dets: &max_dets,
1862                n_categories: grid.n_categories,
1863                n_area_ranges: grid.n_area_ranges,
1864                n_images: grid.n_images,
1865            },
1866            ParityMode::Strict,
1867        )
1868        .unwrap();
1869        let summary = summarize_detection(&acc, iou_thresholds(), &max_dets).unwrap();
1870        let stats = summary.stats();
1871        // GTs are 10x10 → area 100, which falls inside `small` (< 32²)
1872        // and `all`. `medium` and `large` see no in-range GTs, so AP and
1873        // AR collapse to the -1 sentinel (quirk C5).
1874        assert!((stats[0] - 1.0).abs() < 1e-12, "AP={}", stats[0]);
1875        assert!((stats[3] - 1.0).abs() < 1e-12, "AP_S={}", stats[3]);
1876        assert_eq!(stats[4], -1.0, "AP_M should be -1 with no medium GTs");
1877        assert_eq!(stats[5], -1.0, "AP_L should be -1 with no large GTs");
1878        assert!((stats[8] - 1.0).abs() < 1e-12, "AR@100={}", stats[8]);
1879    }
1880
1881    #[test]
1882    fn b7_unmatched_dt_outside_area_range_is_ignored() {
1883        // GT and DT both 200x200 (40000 area, "large" bucket). The
1884        // small-area cell (a=1, range [0, 32²)) sees the GT as ignored
1885        // (D6/D7) and the unmatched DT as ignored (B7).
1886        let images = vec![img(1, 300, 300)];
1887        let cats = vec![cat(1, "thing")];
1888        let anns = vec![ann(1, 1, 1, (0.0, 0.0, 200.0, 200.0))];
1889        let gt = CocoDataset::from_parts(images, anns, cats).unwrap();
1890        let dts =
1891            CocoDetections::from_inputs(vec![dt_input(1, 1, 0.5, (200.0, 200.0, 50.0, 50.0))])
1892                .unwrap();
1893        let area = AreaRange::coco_default();
1894        let params = EvaluateParams {
1895            iou_thresholds: iou_thresholds(),
1896            area_ranges: &area,
1897            max_dets_per_image: 100,
1898            use_cats: true,
1899            retain_iou: false,
1900        };
1901        let grid = evaluate_bbox(&gt, &dts, params, ParityMode::Strict).unwrap();
1902        let small = grid.cell(0, 1, 0).unwrap();
1903        // GT is out-of-area, so gt_ignore=true.
1904        assert_eq!(small.gt_ignore, vec![true]);
1905        // DT is unmatched (no IoU with GT) AND out-of-area → B7 sets ignore.
1906        assert!(small.dt_ignore.iter().all(|&ig| ig));
1907        assert!(small.dt_matched.iter().all(|&m| !m));
1908    }
1909
1910    #[test]
1911    fn d6_boundary_area_lands_in_both_buckets() {
1912        // D6 (strict): pycocotools (cocoeval.py:251) uses non-strict
1913        // inclusion on both ends, so a GT/DT with area exactly equal to a
1914        // bucket boundary (32² = 1024) lands in *both* adjacent buckets.
1915        let images = vec![img(1, 100, 100)];
1916        let cats = vec![cat(1, "thing")];
1917        // 32x32 → area 1024 exactly.
1918        let anns = vec![ann(1, 1, 1, (0.0, 0.0, 32.0, 32.0))];
1919        let gt = CocoDataset::from_parts(images, anns, cats).unwrap();
1920        let dts =
1921            CocoDetections::from_inputs(vec![dt_input(1, 1, 0.5, (0.0, 0.0, 32.0, 32.0))]).unwrap();
1922        let area = AreaRange::coco_default();
1923        let params = EvaluateParams {
1924            iou_thresholds: iou_thresholds(),
1925            area_ranges: &area,
1926            max_dets_per_image: 100,
1927            use_cats: true,
1928            retain_iou: false,
1929        };
1930        let grid = evaluate_bbox(&gt, &dts, params, ParityMode::Strict).unwrap();
1931        // small (lo=0, hi=32²=1024): area 1024 == hi → included.
1932        let small = grid.cell(0, 1, 0).unwrap();
1933        assert_eq!(small.gt_ignore, vec![false]);
1934        // medium (lo=1024, hi=96²=9216): area 1024 == lo → included.
1935        let medium = grid.cell(0, 2, 0).unwrap();
1936        assert_eq!(medium.gt_ignore, vec![false]);
1937        // all (lo=0, hi=1e10): area 1024 lies inside.
1938        let all = grid.cell(0, 0, 0).unwrap();
1939        assert_eq!(all.gt_ignore, vec![false]);
1940        // large (lo=96²=9216, hi=1e10): area 1024 < 9216 → ignored.
1941        let large = grid.cell(0, 3, 0).unwrap();
1942        assert_eq!(large.gt_ignore, vec![true]);
1943    }
1944
1945    #[test]
1946    fn l4_use_cats_false_collapses_categories() {
1947        let images = vec![img(1, 100, 100)];
1948        let cats = vec![cat(1, "a"), cat(2, "b")];
1949        let anns = vec![
1950            ann(1, 1, 1, (0.0, 0.0, 10.0, 10.0)),
1951            ann(2, 1, 2, (50.0, 50.0, 10.0, 10.0)),
1952        ];
1953        let gt = CocoDataset::from_parts(images, anns, cats).unwrap();
1954        // DT with category=1 overlapping the cat-2 GT — only matches
1955        // when use_cats=false.
1956        let dts = CocoDetections::from_inputs(vec![dt_input(1, 1, 0.9, (50.0, 50.0, 10.0, 10.0))])
1957            .unwrap();
1958        let area = AreaRange::coco_default();
1959        let params = EvaluateParams {
1960            iou_thresholds: iou_thresholds(),
1961            area_ranges: &area,
1962            max_dets_per_image: 100,
1963            use_cats: false,
1964            retain_iou: false,
1965        };
1966        let grid = evaluate_bbox(&gt, &dts, params, ParityMode::Strict).unwrap();
1967        assert_eq!(grid.n_categories, 1);
1968        let all = grid.cell(0, 0, 0).unwrap();
1969        // Both GTs land in the single bucket; the DT matches the second.
1970        assert_eq!(all.gt_ignore.len(), 2);
1971        assert_eq!(all.dt_scores.len(), 1);
1972        assert!(all.dt_matched.iter().all(|&m| m));
1973    }
1974
1975    #[test]
1976    fn max_dets_per_image_caps_top_n_by_score() {
1977        let images = vec![img(1, 100, 100)];
1978        let cats = vec![cat(1, "thing")];
1979        let anns = vec![ann(1, 1, 1, (0.0, 0.0, 10.0, 10.0))];
1980        let gt = CocoDataset::from_parts(images, anns, cats).unwrap();
1981        let dts = CocoDetections::from_inputs(vec![
1982            dt_input(1, 1, 0.1, (50.0, 50.0, 5.0, 5.0)),
1983            dt_input(1, 1, 0.9, (0.0, 0.0, 10.0, 10.0)),
1984            dt_input(1, 1, 0.5, (50.0, 50.0, 5.0, 5.0)),
1985        ])
1986        .unwrap();
1987        let area = AreaRange::coco_default();
1988        let params = EvaluateParams {
1989            iou_thresholds: iou_thresholds(),
1990            area_ranges: &area,
1991            max_dets_per_image: 2,
1992            use_cats: true,
1993            retain_iou: false,
1994        };
1995        let grid = evaluate_bbox(&gt, &dts, params, ParityMode::Strict).unwrap();
1996        let all = grid.cell(0, 0, 0).unwrap();
1997        // Only the top-2 by score survive the cap.
1998        assert_eq!(all.dt_scores.len(), 2);
1999        assert_eq!(all.dt_scores[0], 0.9);
2000        assert_eq!(all.dt_scores[1], 0.5);
2001    }
2002
2003    #[test]
2004    fn d1_parity_mode_propagates_to_base_ignore() {
2005        // GT with iscrowd=0 and explicit ignore=1.
2006        // Strict (pycocotools): ignore := iscrowd → false, the GT
2007        // counts and the matching DT scores a TP.
2008        // Corrected: respects user's ignore=1 → true, the GT becomes
2009        // ignored and the DT picks it up via B6 (dt_ignore=true).
2010        const ANN_JSON: &str = r#"{
2011            "images": [{"id": 1, "width": 100, "height": 100}],
2012            "annotations": [
2013                {"id": 1, "image_id": 1, "category_id": 1,
2014                 "bbox": [0, 0, 10, 10], "area": 100,
2015                 "iscrowd": 0, "ignore": 1}
2016            ],
2017            "categories": [{"id": 1, "name": "thing"}]
2018        }"#;
2019        let gt = CocoDataset::from_json_bytes(ANN_JSON.as_bytes()).unwrap();
2020        let dts =
2021            CocoDetections::from_inputs(vec![dt_input(1, 1, 0.9, (0.0, 0.0, 10.0, 10.0))]).unwrap();
2022        let area = AreaRange::coco_default();
2023        let params = EvaluateParams {
2024            iou_thresholds: iou_thresholds(),
2025            area_ranges: &area,
2026            max_dets_per_image: 100,
2027            use_cats: true,
2028            retain_iou: false,
2029        };
2030
2031        let strict = evaluate_bbox(&gt, &dts, params, ParityMode::Strict).unwrap();
2032        let strict_all = strict.cell(0, 0, 0).unwrap();
2033        assert_eq!(strict_all.gt_ignore, vec![false]);
2034        assert!(strict_all.dt_ignore.iter().all(|&ig| !ig));
2035
2036        let corrected = evaluate_bbox(&gt, &dts, params, ParityMode::Corrected).unwrap();
2037        let corrected_all = corrected.cell(0, 0, 0).unwrap();
2038        assert_eq!(corrected_all.gt_ignore, vec![true]);
2039        // DT matched the now-ignored GT → B6 inherits the ignore flag.
2040        assert!(corrected_all.dt_ignore.iter().all(|&ig| ig));
2041    }
2042
2043    #[test]
2044    fn cell_meta_carries_pycocotools_shape() {
2045        let grid = perfect_match_grid();
2046        // The "all" bucket sees both DTs matched.
2047        let meta = grid.cell_meta(0, 0, 0).unwrap();
2048        assert_eq!(meta.image_id, 1);
2049        assert_eq!(meta.category_id, 1);
2050        assert_eq!(meta.area_rng, [0.0, AREA_UNBOUNDED]);
2051        assert_eq!(meta.max_det, 100);
2052        // DTs sorted score-desc: id=1 (score 0.9) before id=2 (score 0.8).
2053        assert_eq!(meta.dt_ids, vec![1, 2]);
2054        // GTs sorted ignore-asc: both non-ignore, stable order preserved.
2055        assert_eq!(meta.gt_ids, vec![1, 2]);
2056        let n_t = iou_thresholds().len();
2057        assert_eq!(meta.dt_matches.shape(), &[n_t, 2]);
2058        assert_eq!(meta.gt_matches.shape(), &[n_t, 2]);
2059        // dt_matches carries the matched GT id (or 0); both DTs perfectly
2060        // overlap their same-position GT at every threshold.
2061        for t in 0..n_t {
2062            assert_eq!(meta.dt_matches[(t, 0)], 1, "dt[0] -> gt[1] at t={t}");
2063            assert_eq!(meta.dt_matches[(t, 1)], 2, "dt[1] -> gt[2] at t={t}");
2064            assert_eq!(meta.gt_matches[(t, 0)], 1, "gt[1] -> dt[1] at t={t}");
2065            assert_eq!(meta.gt_matches[(t, 1)], 2, "gt[2] -> dt[2] at t={t}");
2066        }
2067    }
2068
2069    #[test]
2070    fn cell_meta_unmatched_dt_uses_zero_sentinel() {
2071        // Single GT, single DT with no overlap → unmatched at every threshold.
2072        let images = vec![img(1, 100, 100)];
2073        let cats = vec![cat(1, "thing")];
2074        let anns = vec![ann(7, 1, 1, (0.0, 0.0, 10.0, 10.0))];
2075        let gt = CocoDataset::from_parts(images, anns, cats).unwrap();
2076        let dts = CocoDetections::from_inputs(vec![dt_input(1, 1, 0.5, (50.0, 50.0, 10.0, 10.0))])
2077            .unwrap();
2078        let area = AreaRange::coco_default();
2079        let params = EvaluateParams {
2080            iou_thresholds: iou_thresholds(),
2081            area_ranges: &area,
2082            max_dets_per_image: 100,
2083            use_cats: true,
2084            retain_iou: false,
2085        };
2086        let grid = evaluate_bbox(&gt, &dts, params, ParityMode::Strict).unwrap();
2087        let meta = grid.cell_meta(0, 0, 0).unwrap();
2088        assert_eq!(meta.gt_ids, vec![7]);
2089        // Auto-assigned DT id starts at 1 (first detection).
2090        assert_eq!(meta.dt_ids.len(), 1);
2091        assert!(meta.dt_matches.iter().all(|&x| x == 0));
2092        assert!(meta.gt_matches.iter().all(|&x| x == 0));
2093    }
2094
2095    #[test]
2096    fn cell_meta_use_cats_false_emits_sentinel_category() {
2097        let images = vec![img(1, 100, 100)];
2098        let cats = vec![cat(1, "a"), cat(2, "b")];
2099        let anns = vec![ann(1, 1, 1, (0.0, 0.0, 10.0, 10.0))];
2100        let gt = CocoDataset::from_parts(images, anns, cats).unwrap();
2101        let dts =
2102            CocoDetections::from_inputs(vec![dt_input(1, 1, 0.9, (0.0, 0.0, 10.0, 10.0))]).unwrap();
2103        let area = AreaRange::coco_default();
2104        let params = EvaluateParams {
2105            iou_thresholds: iou_thresholds(),
2106            area_ranges: &area,
2107            max_dets_per_image: 100,
2108            use_cats: false,
2109            retain_iou: false,
2110        };
2111        let grid = evaluate_bbox(&gt, &dts, params, ParityMode::Strict).unwrap();
2112        let meta = grid.cell_meta(0, 0, 0).unwrap();
2113        assert_eq!(meta.category_id, COLLAPSED_CATEGORY_SENTINEL);
2114    }
2115
2116    #[test]
2117    fn missing_dt_image_yields_none_cells() {
2118        // Pycocotools' `evaluateImg` returns a record (not None) when
2119        // GTs exist but DTs do not — vernier matches that.
2120        let images = vec![img(1, 100, 100), img(2, 100, 100)];
2121        let cats = vec![cat(1, "thing")];
2122        let anns = vec![ann(1, 1, 1, (0.0, 0.0, 10.0, 10.0))];
2123        let gt = CocoDataset::from_parts(images, anns, cats).unwrap();
2124        let dts = CocoDetections::from_inputs(vec![]).unwrap();
2125        let area = AreaRange::coco_default();
2126        let params = EvaluateParams {
2127            iou_thresholds: iou_thresholds(),
2128            area_ranges: &area,
2129            max_dets_per_image: 100,
2130            use_cats: true,
2131            retain_iou: false,
2132        };
2133        let grid = evaluate_bbox(&gt, &dts, params, ParityMode::Strict).unwrap();
2134        for a in 0..4 {
2135            assert!(grid.cell(0, a, 0).is_some(), "image 1 area {a}");
2136            assert!(grid.cell(0, a, 1).is_none(), "image 2 area {a}");
2137        }
2138    }
2139
2140    fn square_polygon(x: f64, y: f64, side: f64) -> Segmentation {
2141        Segmentation::Polygons(vec![vec![
2142            x,
2143            y,
2144            x + side,
2145            y,
2146            x + side,
2147            y + side,
2148            x,
2149            y + side,
2150        ]])
2151    }
2152
2153    fn ann_with_segm(
2154        id: i64,
2155        image: i64,
2156        cat: i64,
2157        bbox: (f64, f64, f64, f64),
2158        segm: Segmentation,
2159    ) -> CocoAnnotation {
2160        CocoAnnotation {
2161            id: AnnId(id),
2162            image_id: ImageId(image),
2163            category_id: CategoryId(cat),
2164            area: bbox.2 * bbox.3,
2165            is_crowd: false,
2166            ignore_flag: None,
2167            bbox: Bbox {
2168                x: bbox.0,
2169                y: bbox.1,
2170                w: bbox.2,
2171                h: bbox.3,
2172            },
2173            segmentation: Some(segm),
2174            keypoints: None,
2175            num_keypoints: None,
2176        }
2177    }
2178
2179    fn dt_input_with_segm(
2180        image: i64,
2181        cat: i64,
2182        score: f64,
2183        bbox: (f64, f64, f64, f64),
2184        segm: Segmentation,
2185    ) -> DetectionInput {
2186        DetectionInput {
2187            id: None,
2188            image_id: ImageId(image),
2189            category_id: CategoryId(cat),
2190            score,
2191            bbox: Bbox {
2192                x: bbox.0,
2193                y: bbox.1,
2194                w: bbox.2,
2195                h: bbox.3,
2196            },
2197            segmentation: Some(segm),
2198            keypoints: None,
2199            num_keypoints: None,
2200        }
2201    }
2202
2203    #[test]
2204    fn segm_perfect_overlap_summarizes_to_one() {
2205        let images = vec![img(1, 100, 100)];
2206        let cats = vec![cat(1, "thing")];
2207        let anns = vec![ann_with_segm(
2208            1,
2209            1,
2210            1,
2211            (10.0, 10.0, 20.0, 20.0),
2212            square_polygon(10.0, 10.0, 20.0),
2213        )];
2214        let gt = CocoDataset::from_parts(images, anns, cats).unwrap();
2215        let dts = CocoDetections::from_inputs(vec![dt_input_with_segm(
2216            1,
2217            1,
2218            0.9,
2219            (10.0, 10.0, 20.0, 20.0),
2220            square_polygon(10.0, 10.0, 20.0),
2221        )])
2222        .unwrap();
2223        let area = AreaRange::coco_default();
2224        let params = EvaluateParams {
2225            iou_thresholds: iou_thresholds(),
2226            area_ranges: &area,
2227            max_dets_per_image: 100,
2228            use_cats: true,
2229            retain_iou: false,
2230        };
2231        let grid = evaluate_segm(&gt, &dts, params, ParityMode::Strict).unwrap();
2232        let max_dets = vec![1usize, 10, 100];
2233        let acc = accumulate(
2234            &grid.eval_imgs,
2235            AccumulateParams {
2236                iou_thresholds: iou_thresholds(),
2237                recall_thresholds: recall_thresholds(),
2238                max_dets: &max_dets,
2239                n_categories: grid.n_categories,
2240                n_area_ranges: grid.n_area_ranges,
2241                n_images: grid.n_images,
2242            },
2243            ParityMode::Strict,
2244        )
2245        .unwrap();
2246        let summary = summarize_detection(&acc, iou_thresholds(), &max_dets).unwrap();
2247        let stats = summary.stats();
2248        assert!((stats[0] - 1.0).abs() < 1e-12, "AP={}", stats[0]);
2249    }
2250
2251    #[test]
2252    fn segm_disjoint_masks_summarize_to_zero() {
2253        let images = vec![img(1, 100, 100)];
2254        let cats = vec![cat(1, "thing")];
2255        let anns = vec![ann_with_segm(
2256            1,
2257            1,
2258            1,
2259            (0.0, 0.0, 10.0, 10.0),
2260            square_polygon(0.0, 0.0, 10.0),
2261        )];
2262        let gt = CocoDataset::from_parts(images, anns, cats).unwrap();
2263        let dts = CocoDetections::from_inputs(vec![dt_input_with_segm(
2264            1,
2265            1,
2266            0.9,
2267            (50.0, 50.0, 10.0, 10.0),
2268            square_polygon(50.0, 50.0, 10.0),
2269        )])
2270        .unwrap();
2271        let area = AreaRange::coco_default();
2272        let params = EvaluateParams {
2273            iou_thresholds: iou_thresholds(),
2274            area_ranges: &area,
2275            max_dets_per_image: 100,
2276            use_cats: true,
2277            retain_iou: false,
2278        };
2279        let grid = evaluate_segm(&gt, &dts, params, ParityMode::Strict).unwrap();
2280        let all = grid.cell(0, 0, 0).unwrap();
2281        // No overlap → no match at any threshold.
2282        assert!(all.dt_matched.iter().all(|&m| !m));
2283    }
2284
2285    #[test]
2286    fn segm_missing_gt_segmentation_surfaces_typed_error() {
2287        // GT has no `segmentation` field; running segm eval against it
2288        // must surface InvalidAnnotation, not silently treat as empty.
2289        let images = vec![img(1, 100, 100)];
2290        let cats = vec![cat(1, "thing")];
2291        let anns = vec![ann(7, 1, 1, (0.0, 0.0, 10.0, 10.0))];
2292        let gt = CocoDataset::from_parts(images, anns, cats).unwrap();
2293        let dts = CocoDetections::from_inputs(vec![dt_input_with_segm(
2294            1,
2295            1,
2296            0.9,
2297            (0.0, 0.0, 10.0, 10.0),
2298            square_polygon(0.0, 0.0, 10.0),
2299        )])
2300        .unwrap();
2301        let area = AreaRange::coco_default();
2302        let params = EvaluateParams {
2303            iou_thresholds: iou_thresholds(),
2304            area_ranges: &area,
2305            max_dets_per_image: 100,
2306            use_cats: true,
2307            retain_iou: false,
2308        };
2309        let err = evaluate_segm(&gt, &dts, params, ParityMode::Strict).unwrap_err();
2310        match err {
2311            EvalError::InvalidAnnotation { detail } => {
2312                assert!(detail.contains("GT id=7"), "msg: {detail}");
2313            }
2314            other => panic!("expected InvalidAnnotation, got {other:?}"),
2315        }
2316    }
2317
2318    #[test]
2319    fn j2_bbox_only_dt_under_segm_iou_type_raises_in_corrected_mode() {
2320        // Quirk J2 (`corrected`): vernier refuses to silently coerce a
2321        // bbox-only DT into a rectangle mask under iouType="segm". The
2322        // typed error cites the offending DT id and image.
2323        let images = vec![img(1, 100, 100)];
2324        let cats = vec![cat(1, "thing")];
2325        let anns = vec![ann_with_segm(
2326            1,
2327            1,
2328            1,
2329            (0.0, 0.0, 10.0, 10.0),
2330            square_polygon(0.0, 0.0, 10.0),
2331        )];
2332        let gt = CocoDataset::from_parts(images, anns, cats).unwrap();
2333        // DT without a segmentation field — only bbox.
2334        let dts =
2335            CocoDetections::from_inputs(vec![dt_input(1, 1, 0.9, (0.0, 0.0, 10.0, 10.0))]).unwrap();
2336        let area = AreaRange::coco_default();
2337        let params = EvaluateParams {
2338            iou_thresholds: iou_thresholds(),
2339            area_ranges: &area,
2340            max_dets_per_image: 100,
2341            use_cats: true,
2342            retain_iou: false,
2343        };
2344        let err = evaluate_segm(&gt, &dts, params, ParityMode::Corrected).unwrap_err();
2345        match err {
2346            EvalError::InvalidAnnotation { detail } => {
2347                assert!(detail.contains("DT"), "expected DT in msg: {detail}");
2348                assert!(detail.contains("J2"), "expected J2 cite in msg: {detail}");
2349            }
2350            other => panic!("expected InvalidAnnotation, got {other:?}"),
2351        }
2352    }
2353
2354    #[test]
2355    fn j2_bbox_only_dt_under_segm_iou_type_synthesizes_in_strict_mode() {
2356        // Quirk J2 (`strict`): pycocotools/coco.py:341 synthesizes a
2357        // 4-point rectangle polygon `[[x1,y1, x1,y2, x2,y2, x2,y1]]`
2358        // from the DT bbox and rasterizes it. A GT polygon perfectly
2359        // covering the same rectangle therefore IoU=1 against the
2360        // synthesized DT mask.
2361        let images = vec![img(1, 100, 100)];
2362        let cats = vec![cat(1, "thing")];
2363        // GT polygon covers a 10×10 square at (0, 0).
2364        let anns = vec![ann_with_segm(
2365            1,
2366            1,
2367            1,
2368            (0.0, 0.0, 10.0, 10.0),
2369            square_polygon(0.0, 0.0, 10.0),
2370        )];
2371        let gt = CocoDataset::from_parts(images, anns, cats).unwrap();
2372        // DT bbox covers the same rectangle but carries no `segmentation`.
2373        let dts =
2374            CocoDetections::from_inputs(vec![dt_input(1, 1, 0.9, (0.0, 0.0, 10.0, 10.0))]).unwrap();
2375        let area = AreaRange::coco_default();
2376        let params = EvaluateParams {
2377            iou_thresholds: iou_thresholds(),
2378            area_ranges: &area,
2379            max_dets_per_image: 100,
2380            use_cats: true,
2381            retain_iou: false,
2382        };
2383        let grid = evaluate_segm(&gt, &dts, params, ParityMode::Strict).unwrap();
2384        let all = grid.cell(0, 0, 0).unwrap();
2385        // Synthesized rectangle exactly covers the GT polygon → match
2386        // at every threshold.
2387        assert!(all.dt_matched.iter().all(|&m| m), "expected matches");
2388    }
2389
2390    #[test]
2391    fn j6_heterogeneous_dt_list_first_with_segm_second_without_raises_in_corrected_mode() {
2392        // Quirk J6 (`corrected`): per-entry dispatch. A heterogeneous DT
2393        // list under iouType="segm" — DT[0] carries a `segmentation`,
2394        // DT[1] does not — is rejected up-front in corrected mode rather
2395        // than silently routed through pycocotools' first-entry-decides
2396        // dispatch (`coco.py:330-363`). Verifies that vernier inspects
2397        // each entry independently rather than dispatching from `anns[0]`.
2398        let images = vec![img(1, 100, 100)];
2399        let cats = vec![cat(1, "thing")];
2400        let anns = vec![ann_with_segm(
2401            1,
2402            1,
2403            1,
2404            (0.0, 0.0, 10.0, 10.0),
2405            square_polygon(0.0, 0.0, 10.0),
2406        )];
2407        let gt = CocoDataset::from_parts(images, anns, cats).unwrap();
2408        // DT[0] has segm, DT[1] does not. pycocotools' first-entry
2409        // dispatch would route into the segm path on `anns[0]`, then
2410        // crash on `anns[1]` reading `ann['segmentation']`. vernier
2411        // raises InvalidAnnotation pinpointing the offending entry.
2412        let dts = CocoDetections::from_inputs(vec![
2413            dt_input_with_segm(
2414                1,
2415                1,
2416                0.9,
2417                (0.0, 0.0, 10.0, 10.0),
2418                square_polygon(0.0, 0.0, 10.0),
2419            ),
2420            dt_input(1, 1, 0.8, (50.0, 50.0, 10.0, 10.0)),
2421        ])
2422        .unwrap();
2423        let area = AreaRange::coco_default();
2424        let params = EvaluateParams {
2425            iou_thresholds: iou_thresholds(),
2426            area_ranges: &area,
2427            max_dets_per_image: 100,
2428            use_cats: true,
2429            retain_iou: false,
2430        };
2431        let err = evaluate_segm(&gt, &dts, params, ParityMode::Corrected).unwrap_err();
2432        assert!(matches!(err, EvalError::InvalidAnnotation { .. }));
2433    }
2434
2435    #[test]
2436    fn j6_heterogeneous_dt_list_first_without_segm_second_with_raises_in_corrected_mode() {
2437        // Mirror of the previous test with the order reversed. If the
2438        // dispatch were first-entry-decides (the pycocotools quirk J6
2439        // documents), DT[0] without `segmentation` would route to a
2440        // bbox-synthesis path and DT[1]'s segm would be ignored. Vernier
2441        // inspects every entry: missing segm anywhere in corrected mode
2442        // raises.
2443        let images = vec![img(1, 100, 100)];
2444        let cats = vec![cat(1, "thing")];
2445        let anns = vec![ann_with_segm(
2446            1,
2447            1,
2448            1,
2449            (0.0, 0.0, 10.0, 10.0),
2450            square_polygon(0.0, 0.0, 10.0),
2451        )];
2452        let gt = CocoDataset::from_parts(images, anns, cats).unwrap();
2453        let dts = CocoDetections::from_inputs(vec![
2454            dt_input(1, 1, 0.9, (0.0, 0.0, 10.0, 10.0)),
2455            dt_input_with_segm(
2456                1,
2457                1,
2458                0.8,
2459                (50.0, 50.0, 10.0, 10.0),
2460                square_polygon(50.0, 50.0, 10.0),
2461            ),
2462        ])
2463        .unwrap();
2464        let area = AreaRange::coco_default();
2465        let params = EvaluateParams {
2466            iou_thresholds: iou_thresholds(),
2467            area_ranges: &area,
2468            max_dets_per_image: 100,
2469            use_cats: true,
2470            retain_iou: false,
2471        };
2472        let err = evaluate_segm(&gt, &dts, params, ParityMode::Corrected).unwrap_err();
2473        assert!(matches!(err, EvalError::InvalidAnnotation { .. }));
2474    }
2475
2476    #[test]
2477    fn j6_heterogeneous_dt_list_in_strict_mode_synthesizes_per_entry() {
2478        // Quirk J2 (`strict`) layered with J6: per-entry dispatch under
2479        // strict mode means DTs without `segmentation` get the
2480        // bbox→polygon synthesis (matching pycocotools), while DTs with
2481        // a `segmentation` keep theirs. No first-entry-decides
2482        // global dispatch — every entry is handled independently.
2483        let images = vec![img(1, 100, 100)];
2484        let cats = vec![cat(1, "thing")];
2485        let anns = vec![
2486            ann_with_segm(
2487                1,
2488                1,
2489                1,
2490                (0.0, 0.0, 10.0, 10.0),
2491                square_polygon(0.0, 0.0, 10.0),
2492            ),
2493            ann_with_segm(
2494                2,
2495                1,
2496                1,
2497                (50.0, 50.0, 10.0, 10.0),
2498                square_polygon(50.0, 50.0, 10.0),
2499            ),
2500        ];
2501        let gt = CocoDataset::from_parts(images, anns, cats).unwrap();
2502        // DT[0] has segm covering GT[0]; DT[1] has only bbox covering GT[1].
2503        let dts = CocoDetections::from_inputs(vec![
2504            dt_input_with_segm(
2505                1,
2506                1,
2507                0.9,
2508                (0.0, 0.0, 10.0, 10.0),
2509                square_polygon(0.0, 0.0, 10.0),
2510            ),
2511            dt_input(1, 1, 0.8, (50.0, 50.0, 10.0, 10.0)),
2512        ])
2513        .unwrap();
2514        let area = AreaRange::coco_default();
2515        let params = EvaluateParams {
2516            iou_thresholds: iou_thresholds(),
2517            area_ranges: &area,
2518            max_dets_per_image: 100,
2519            use_cats: true,
2520            retain_iou: false,
2521        };
2522        let grid = evaluate_segm(&gt, &dts, params, ParityMode::Strict).unwrap();
2523        let all = grid.cell(0, 0, 0).unwrap();
2524        // Both DTs match their respective GTs (DT[1] via synthesized
2525        // rectangle), so every threshold sees both as TPs.
2526        assert_eq!(all.dt_matched.shape(), &[iou_thresholds().len(), 2]);
2527        assert!(all.dt_matched.iter().all(|&m| m));
2528    }
2529
2530    #[test]
2531    fn boundary_perfect_overlap_summarizes_to_one() {
2532        // Pins the wrapper end-to-end (kernel → grid → accumulate →
2533        // summarize) at AP=1; a regression in any stage trips this.
2534        let images = vec![img(1, 100, 100)];
2535        let cats = vec![cat(1, "thing")];
2536        let anns = vec![ann_with_segm(
2537            1,
2538            1,
2539            1,
2540            (10.0, 10.0, 20.0, 20.0),
2541            square_polygon(10.0, 10.0, 20.0),
2542        )];
2543        let gt = CocoDataset::from_parts(images, anns, cats).unwrap();
2544        let dts = CocoDetections::from_inputs(vec![dt_input_with_segm(
2545            1,
2546            1,
2547            0.9,
2548            (10.0, 10.0, 20.0, 20.0),
2549            square_polygon(10.0, 10.0, 20.0),
2550        )])
2551        .unwrap();
2552        let area = AreaRange::coco_default();
2553        let params = EvaluateParams {
2554            iou_thresholds: iou_thresholds(),
2555            area_ranges: &area,
2556            max_dets_per_image: 100,
2557            use_cats: true,
2558            retain_iou: false,
2559        };
2560        let grid = evaluate_boundary(&gt, &dts, params, ParityMode::Strict, 0.02).unwrap();
2561        let max_dets = vec![1usize, 10, 100];
2562        let acc = accumulate(
2563            &grid.eval_imgs,
2564            AccumulateParams {
2565                iou_thresholds: iou_thresholds(),
2566                recall_thresholds: recall_thresholds(),
2567                max_dets: &max_dets,
2568                n_categories: grid.n_categories,
2569                n_area_ranges: grid.n_area_ranges,
2570                n_images: grid.n_images,
2571            },
2572            ParityMode::Strict,
2573        )
2574        .unwrap();
2575        let summary = summarize_detection(&acc, iou_thresholds(), &max_dets).unwrap();
2576        let stats = summary.stats();
2577        assert!((stats[0] - 1.0).abs() < 1e-12, "AP={}", stats[0]);
2578    }
2579
2580    #[test]
2581    fn boundary_disjoint_masks_summarize_to_zero() {
2582        // Disjoint masks → bbox prefilter zeros the cell; no match at
2583        // any threshold.
2584        let images = vec![img(1, 100, 100)];
2585        let cats = vec![cat(1, "thing")];
2586        let anns = vec![ann_with_segm(
2587            1,
2588            1,
2589            1,
2590            (0.0, 0.0, 10.0, 10.0),
2591            square_polygon(0.0, 0.0, 10.0),
2592        )];
2593        let gt = CocoDataset::from_parts(images, anns, cats).unwrap();
2594        let dts = CocoDetections::from_inputs(vec![dt_input_with_segm(
2595            1,
2596            1,
2597            0.9,
2598            (50.0, 50.0, 10.0, 10.0),
2599            square_polygon(50.0, 50.0, 10.0),
2600        )])
2601        .unwrap();
2602        let area = AreaRange::coco_default();
2603        let params = EvaluateParams {
2604            iou_thresholds: iou_thresholds(),
2605            area_ranges: &area,
2606            max_dets_per_image: 100,
2607            use_cats: true,
2608            retain_iou: false,
2609        };
2610        let grid = evaluate_boundary(&gt, &dts, params, ParityMode::Strict, 0.02).unwrap();
2611        let all = grid.cell(0, 0, 0).unwrap();
2612        assert!(all.dt_matched.iter().all(|&m| !m));
2613    }
2614
2615    /// Two-image, two-category fixture exercised by the cache tests
2616    /// below. Returns gt + two distinct DT sets so a "second eval" is
2617    /// genuinely the same GT against fresh DTs (the training-loop
2618    /// validation pattern the cache is for).
2619    fn boundary_cache_fixture() -> (
2620        CocoDataset,
2621        CocoDetections,
2622        CocoDetections,
2623        OwnedEvaluateParams,
2624    ) {
2625        let images = vec![img(1, 100, 100), img(2, 100, 100)];
2626        let cats = vec![cat(1, "thing"), cat(2, "other")];
2627        let anns = vec![
2628            ann_with_segm(
2629                10,
2630                1,
2631                1,
2632                (10.0, 10.0, 20.0, 20.0),
2633                square_polygon(10.0, 10.0, 20.0),
2634            ),
2635            ann_with_segm(
2636                11,
2637                1,
2638                2,
2639                (50.0, 50.0, 15.0, 15.0),
2640                square_polygon(50.0, 50.0, 15.0),
2641            ),
2642            ann_with_segm(
2643                12,
2644                2,
2645                1,
2646                (5.0, 5.0, 25.0, 25.0),
2647                square_polygon(5.0, 5.0, 25.0),
2648            ),
2649        ];
2650        let gt = CocoDataset::from_parts(images, anns, cats).unwrap();
2651        let dts_a = CocoDetections::from_inputs(vec![
2652            dt_input_with_segm(
2653                1,
2654                1,
2655                0.9,
2656                (10.0, 10.0, 20.0, 20.0),
2657                square_polygon(10.0, 10.0, 20.0),
2658            ),
2659            dt_input_with_segm(
2660                2,
2661                1,
2662                0.8,
2663                (5.0, 5.0, 25.0, 25.0),
2664                square_polygon(5.0, 5.0, 25.0),
2665            ),
2666        ])
2667        .unwrap();
2668        // dts_b shifts both predictions a little so the grid changes
2669        // but GT bands don't: this is the regime the cache is for.
2670        let dts_b = CocoDetections::from_inputs(vec![
2671            dt_input_with_segm(
2672                1,
2673                1,
2674                0.7,
2675                (12.0, 12.0, 20.0, 20.0),
2676                square_polygon(12.0, 12.0, 20.0),
2677            ),
2678            dt_input_with_segm(
2679                2,
2680                1,
2681                0.6,
2682                (8.0, 8.0, 25.0, 25.0),
2683                square_polygon(8.0, 8.0, 25.0),
2684            ),
2685        ])
2686        .unwrap();
2687        let params = OwnedEvaluateParams {
2688            iou_thresholds: iou_thresholds().to_vec(),
2689            area_ranges: AreaRange::coco_default().to_vec(),
2690            max_dets_per_image: 100,
2691            use_cats: true,
2692            retain_iou: false,
2693        };
2694        (gt, dts_a, dts_b, params)
2695    }
2696
2697    fn boundary_grid_cells(grid: &EvalGrid) -> Vec<f64> {
2698        grid.eval_imgs
2699            .iter()
2700            .filter_map(|c| c.as_ref())
2701            .flat_map(|c| c.dt_scores.iter().copied())
2702            .collect()
2703    }
2704
2705    #[test]
2706    fn boundary_cached_matches_uncached_bit_exact() {
2707        // Same-GT, same-DT call via the cached entry point must
2708        // produce a grid bit-equal to the uncached entry point — the
2709        // cache is a memoization, never a semantic shift.
2710        let (gt, dts, _, params) = boundary_cache_fixture();
2711        let p = params.borrow();
2712        let baseline = evaluate_boundary(&gt, &dts, p, ParityMode::Strict, 0.02).unwrap();
2713        let cache = BoundaryGtCache::new();
2714        let cached_first =
2715            evaluate_boundary_cached(&gt, &dts, p, ParityMode::Strict, 0.02, &cache).unwrap();
2716        let cached_second =
2717            evaluate_boundary_cached(&gt, &dts, p, ParityMode::Strict, 0.02, &cache).unwrap();
2718
2719        let baseline_scores = boundary_grid_cells(&baseline);
2720        let first_scores = boundary_grid_cells(&cached_first);
2721        let second_scores = boundary_grid_cells(&cached_second);
2722        assert_eq!(baseline_scores.len(), first_scores.len());
2723        for (b, c) in baseline_scores.iter().zip(first_scores.iter()) {
2724            assert_eq!(b.to_bits(), c.to_bits());
2725        }
2726        for (b, c) in baseline_scores.iter().zip(second_scores.iter()) {
2727            assert_eq!(b.to_bits(), c.to_bits());
2728        }
2729    }
2730
2731    #[test]
2732    fn boundary_cache_populates_lazily_per_evaluated_cell() {
2733        // The cache fills as bands are derived, which only happens on
2734        // (image, category) cells that have a non-empty DT side. The
2735        // fixture has 3 GTs but only 2 ever participate under
2736        // `use_cats: true`: GT 11 (cat 2) has no matching DT, so its
2737        // band is never computed. Pinning the count documents the
2738        // lazy-load contract — entries we never need stay out of the
2739        // cache, keeping memory proportional to actual work.
2740        let (gt, dts, _, params) = boundary_cache_fixture();
2741        let cache = BoundaryGtCache::new();
2742        assert!(cache.is_empty());
2743        evaluate_boundary_cached(&gt, &dts, params.borrow(), ParityMode::Strict, 0.02, &cache)
2744            .unwrap();
2745        assert_eq!(cache.len(), 2);
2746    }
2747
2748    #[test]
2749    fn boundary_cache_invalidates_on_ratio_change() {
2750        // Bands depend on dilation_ratio; reusing entries computed at
2751        // ratio R₁ when the call is at ratio R₂ would silently return
2752        // wrong numerics. The cache must drop+repopulate.
2753        let (gt, dts, _, params) = boundary_cache_fixture();
2754        let cache = BoundaryGtCache::new();
2755        evaluate_boundary_cached(&gt, &dts, params.borrow(), ParityMode::Strict, 0.02, &cache)
2756            .unwrap();
2757        let after_first = cache.len();
2758        evaluate_boundary_cached(&gt, &dts, params.borrow(), ParityMode::Strict, 0.05, &cache)
2759            .unwrap();
2760        // Same GT count, but every entry was re-derived at the new
2761        // ratio: parity below proves the entries reflect R=0.05, not
2762        // stale R=0.02 data.
2763        assert_eq!(cache.len(), after_first);
2764        let fresh =
2765            evaluate_boundary(&gt, &dts, params.borrow(), ParityMode::Strict, 0.05).unwrap();
2766        let cached =
2767            evaluate_boundary_cached(&gt, &dts, params.borrow(), ParityMode::Strict, 0.05, &cache)
2768                .unwrap();
2769        let fresh_scores = boundary_grid_cells(&fresh);
2770        let cached_scores = boundary_grid_cells(&cached);
2771        for (f, c) in fresh_scores.iter().zip(cached_scores.iter()) {
2772            assert_eq!(f.to_bits(), c.to_bits());
2773        }
2774    }
2775
2776    #[test]
2777    fn boundary_cache_clear_resets_state() {
2778        let (gt, dts, _, params) = boundary_cache_fixture();
2779        let cache = BoundaryGtCache::new();
2780        evaluate_boundary_cached(&gt, &dts, params.borrow(), ParityMode::Strict, 0.02, &cache)
2781            .unwrap();
2782        assert!(!cache.is_empty());
2783        cache.clear();
2784        assert!(cache.is_empty());
2785        // Post-clear the next call must repopulate from scratch and
2786        // still produce the right answer.
2787        let after =
2788            evaluate_boundary_cached(&gt, &dts, params.borrow(), ParityMode::Strict, 0.02, &cache)
2789                .unwrap();
2790        let baseline =
2791            evaluate_boundary(&gt, &dts, params.borrow(), ParityMode::Strict, 0.02).unwrap();
2792        let after_scores = boundary_grid_cells(&after);
2793        let baseline_scores = boundary_grid_cells(&baseline);
2794        for (a, b) in after_scores.iter().zip(baseline_scores.iter()) {
2795            assert_eq!(a.to_bits(), b.to_bits());
2796        }
2797    }
2798
2799    #[test]
2800    fn boundary_cache_survives_changing_dt() {
2801        // The training-loop pattern: same GT, fresh DT each call.
2802        // Cache size must stay constant across DT swaps (the cache
2803        // only ever holds GT bands), and parity vs uncached must
2804        // hold for both DT sets.
2805        let (gt, dts_a, dts_b, params) = boundary_cache_fixture();
2806        let cache = BoundaryGtCache::new();
2807        let cached_a = evaluate_boundary_cached(
2808            &gt,
2809            &dts_a,
2810            params.borrow(),
2811            ParityMode::Strict,
2812            0.02,
2813            &cache,
2814        )
2815        .unwrap();
2816        let len_after_a = cache.len();
2817        let cached_b = evaluate_boundary_cached(
2818            &gt,
2819            &dts_b,
2820            params.borrow(),
2821            ParityMode::Strict,
2822            0.02,
2823            &cache,
2824        )
2825        .unwrap();
2826        assert_eq!(cache.len(), len_after_a);
2827
2828        let baseline_a =
2829            evaluate_boundary(&gt, &dts_a, params.borrow(), ParityMode::Strict, 0.02).unwrap();
2830        let baseline_b =
2831            evaluate_boundary(&gt, &dts_b, params.borrow(), ParityMode::Strict, 0.02).unwrap();
2832        for (lhs, rhs) in boundary_grid_cells(&cached_a)
2833            .iter()
2834            .zip(boundary_grid_cells(&baseline_a).iter())
2835        {
2836            assert_eq!(lhs.to_bits(), rhs.to_bits());
2837        }
2838        for (lhs, rhs) in boundary_grid_cells(&cached_b)
2839            .iter()
2840            .zip(boundary_grid_cells(&baseline_b).iter())
2841        {
2842            assert_eq!(lhs.to_bits(), rhs.to_bits());
2843        }
2844    }
2845
2846    // ---------------------------------------------------------------
2847    // SegmGtCache (mirrors the BoundaryGtCache suite above; the segm
2848    // fixture is built on the same pieces but lives here so the
2849    // boundary tests stay focused on band-specific behaviour).
2850    // ---------------------------------------------------------------
2851
2852    #[test]
2853    fn segm_cached_matches_uncached_bit_exact() {
2854        let (gt, dts, _, params) = boundary_cache_fixture();
2855        let p = params.borrow();
2856        let baseline = evaluate_segm(&gt, &dts, p, ParityMode::Strict).unwrap();
2857        let cache = SegmGtCache::new();
2858        let cached_first = evaluate_segm_cached(&gt, &dts, p, ParityMode::Strict, &cache).unwrap();
2859        let cached_second = evaluate_segm_cached(&gt, &dts, p, ParityMode::Strict, &cache).unwrap();
2860
2861        let baseline_scores = boundary_grid_cells(&baseline);
2862        let first_scores = boundary_grid_cells(&cached_first);
2863        let second_scores = boundary_grid_cells(&cached_second);
2864        assert_eq!(baseline_scores.len(), first_scores.len());
2865        for (b, c) in baseline_scores.iter().zip(first_scores.iter()) {
2866            assert_eq!(b.to_bits(), c.to_bits());
2867        }
2868        for (b, c) in baseline_scores.iter().zip(second_scores.iter()) {
2869            assert_eq!(b.to_bits(), c.to_bits());
2870        }
2871    }
2872
2873    #[test]
2874    fn segm_cache_populates_lazily_per_evaluated_cell() {
2875        // Same lazy-load contract as the boundary cache: only GTs
2876        // that participate in an evaluated `(image, category)` cell
2877        // — i.e. one with at least one DT — get cached. The
2878        // boundary fixture has 3 GTs but only 2 such cells under
2879        // `use_cats: true`.
2880        let (gt, dts, _, params) = boundary_cache_fixture();
2881        let cache = SegmGtCache::new();
2882        assert!(cache.is_empty());
2883        evaluate_segm_cached(&gt, &dts, params.borrow(), ParityMode::Strict, &cache).unwrap();
2884        assert_eq!(cache.len(), 2);
2885    }
2886
2887    #[test]
2888    fn segm_cache_clear_resets_state() {
2889        let (gt, dts, _, params) = boundary_cache_fixture();
2890        let cache = SegmGtCache::new();
2891        evaluate_segm_cached(&gt, &dts, params.borrow(), ParityMode::Strict, &cache).unwrap();
2892        assert!(!cache.is_empty());
2893        cache.clear();
2894        assert!(cache.is_empty());
2895        let after =
2896            evaluate_segm_cached(&gt, &dts, params.borrow(), ParityMode::Strict, &cache).unwrap();
2897        let baseline = evaluate_segm(&gt, &dts, params.borrow(), ParityMode::Strict).unwrap();
2898        for (a, b) in boundary_grid_cells(&after)
2899            .iter()
2900            .zip(boundary_grid_cells(&baseline).iter())
2901        {
2902            assert_eq!(a.to_bits(), b.to_bits());
2903        }
2904    }
2905
2906    #[test]
2907    fn segm_cache_survives_changing_dt() {
2908        // Training-loop pattern: same GT, fresh DT each call. Cache
2909        // size must stay constant across DT swaps (the cache only
2910        // holds GT entries) and parity vs uncached must hold for
2911        // both DT sets.
2912        let (gt, dts_a, dts_b, params) = boundary_cache_fixture();
2913        let cache = SegmGtCache::new();
2914        let cached_a =
2915            evaluate_segm_cached(&gt, &dts_a, params.borrow(), ParityMode::Strict, &cache).unwrap();
2916        let len_after_a = cache.len();
2917        let cached_b =
2918            evaluate_segm_cached(&gt, &dts_b, params.borrow(), ParityMode::Strict, &cache).unwrap();
2919        assert_eq!(cache.len(), len_after_a);
2920
2921        let baseline_a = evaluate_segm(&gt, &dts_a, params.borrow(), ParityMode::Strict).unwrap();
2922        let baseline_b = evaluate_segm(&gt, &dts_b, params.borrow(), ParityMode::Strict).unwrap();
2923        for (lhs, rhs) in boundary_grid_cells(&cached_a)
2924            .iter()
2925            .zip(boundary_grid_cells(&baseline_a).iter())
2926        {
2927            assert_eq!(lhs.to_bits(), rhs.to_bits());
2928        }
2929        for (lhs, rhs) in boundary_grid_cells(&cached_b)
2930            .iter()
2931            .zip(boundary_grid_cells(&baseline_b).iter())
2932        {
2933            assert_eq!(lhs.to_bits(), rhs.to_bits());
2934        }
2935    }
2936
2937    // ---------------------------------------------------------------
2938    // Phase 3: keypoints (OKS) eval pipeline (ADR-0012).
2939    // ---------------------------------------------------------------
2940
2941    /// Builds a flat `[x, y, v, ...]` keypoint vector at a single point.
2942    /// `len` controls the per-category sigma length the kernel expects
2943    /// (17 for COCO-person).
2944    fn const_kps_vec(x: f64, y: f64, v: u32, len: usize) -> Vec<f64> {
2945        let mut out = Vec::with_capacity(3 * len);
2946        for _ in 0..len {
2947            out.push(x);
2948            out.push(y);
2949            out.push(f64::from(v));
2950        }
2951        out
2952    }
2953
2954    fn ann_with_kps(
2955        id: i64,
2956        image: i64,
2957        cat: i64,
2958        bbox: (f64, f64, f64, f64),
2959        keypoints: Vec<f64>,
2960        num_keypoints: Option<u32>,
2961    ) -> CocoAnnotation {
2962        CocoAnnotation {
2963            id: AnnId(id),
2964            image_id: ImageId(image),
2965            category_id: CategoryId(cat),
2966            area: bbox.2 * bbox.3,
2967            is_crowd: false,
2968            ignore_flag: None,
2969            bbox: Bbox {
2970                x: bbox.0,
2971                y: bbox.1,
2972                w: bbox.2,
2973                h: bbox.3,
2974            },
2975            segmentation: None,
2976            keypoints: Some(keypoints),
2977            num_keypoints,
2978        }
2979    }
2980
2981    fn dt_input_with_kps(
2982        image: i64,
2983        cat: i64,
2984        score: f64,
2985        bbox: (f64, f64, f64, f64),
2986        keypoints: Vec<f64>,
2987    ) -> DetectionInput {
2988        DetectionInput {
2989            id: None,
2990            image_id: ImageId(image),
2991            category_id: CategoryId(cat),
2992            score,
2993            bbox: Bbox {
2994                x: bbox.0,
2995                y: bbox.1,
2996                w: bbox.2,
2997                h: bbox.3,
2998            },
2999            segmentation: None,
3000            keypoints: Some(keypoints),
3001            num_keypoints: None,
3002        }
3003    }
3004
3005    #[test]
3006    fn test_evaluate_keypoints_perfect_match() {
3007        // 1 image, 1 GT person, 1 DT person matching exactly. Every
3008        // keypoint aligns → OKS = 1.0 → matched at every threshold,
3009        // and the meta gt_matches matrix carries the matched DT id.
3010        let images = vec![img(1, 100, 100)];
3011        let cats = vec![cat(1, "person")];
3012        let kps = const_kps_vec(50.0, 50.0, 2, 17);
3013        let anns = vec![ann_with_kps(
3014            1,
3015            1,
3016            1,
3017            (40.0, 40.0, 20.0, 20.0),
3018            kps.clone(),
3019            None,
3020        )];
3021        let gt = CocoDataset::from_parts(images, anns, cats).unwrap();
3022        let dts = CocoDetections::from_inputs(vec![dt_input_with_kps(
3023            1,
3024            1,
3025            0.9,
3026            (40.0, 40.0, 20.0, 20.0),
3027            kps,
3028        )])
3029        .unwrap();
3030        let area = AreaRange::coco_default();
3031        let params = EvaluateParams {
3032            iou_thresholds: iou_thresholds(),
3033            area_ranges: &area,
3034            max_dets_per_image: 100,
3035            use_cats: true,
3036            retain_iou: false,
3037        };
3038        let grid =
3039            evaluate_keypoints(&gt, &dts, params, ParityMode::Strict, HashMap::new()).unwrap();
3040        let cell = grid.cell(0, 0, 0).unwrap();
3041        // gt_ignore is false (visible keypoints), so the GT is in play.
3042        assert_eq!(cell.gt_ignore, vec![false]);
3043        // Every threshold matches the DT at score 0.9.
3044        assert!(cell.dt_matched.iter().all(|&m| m));
3045        // Meta carries the matched DT id at every threshold for this GT.
3046        let meta = grid.cell_meta(0, 0, 0).unwrap();
3047        assert!(
3048            meta.gt_matches.iter().all(|&id| id > 0),
3049            "every threshold should match the DT id (>0)",
3050        );
3051    }
3052
3053    #[test]
3054    fn test_evaluate_keypoints_zero_overlap() {
3055        // 1 GT and 1 DT keypoints far apart (separated by ~1000 px on
3056        // a 10×10 bbox). OKS drops well below 0.5 → no match at any
3057        // threshold ≥ 0.5.
3058        let images = vec![img(1, 2000, 2000)];
3059        let cats = vec![cat(1, "person")];
3060        let gt_kps = const_kps_vec(50.0, 50.0, 2, 17);
3061        let dt_kps = const_kps_vec(1500.0, 1500.0, 2, 17);
3062        let anns = vec![ann_with_kps(
3063            1,
3064            1,
3065            1,
3066            (40.0, 40.0, 20.0, 20.0),
3067            gt_kps,
3068            None,
3069        )];
3070        let gt = CocoDataset::from_parts(images, anns, cats).unwrap();
3071        let dts = CocoDetections::from_inputs(vec![dt_input_with_kps(
3072            1,
3073            1,
3074            0.9,
3075            (1490.0, 1490.0, 20.0, 20.0),
3076            dt_kps,
3077        )])
3078        .unwrap();
3079        let area = AreaRange::coco_default();
3080        let params = EvaluateParams {
3081            iou_thresholds: iou_thresholds(),
3082            area_ranges: &area,
3083            max_dets_per_image: 100,
3084            use_cats: true,
3085            retain_iou: false,
3086        };
3087        let grid =
3088            evaluate_keypoints(&gt, &dts, params, ParityMode::Strict, HashMap::new()).unwrap();
3089        let cell = grid.cell(0, 0, 0).unwrap();
3090        assert!(
3091            cell.dt_matched.iter().all(|&m| !m),
3092            "DTs far from GT should not match at any IoU threshold",
3093        );
3094    }
3095
3096    #[test]
3097    fn test_evaluate_keypoints_d2_implicit_ignore() {
3098        // D2 (`strict`): GT with `num_keypoints == 0` is treated as an
3099        // implicit ignore region, OR-ed with the existing ignore. This
3100        // GT carries v=0 on every triplet (so num_keypoints derives to
3101        // 0 even without the precomputed field) and is not is_crowd.
3102        let images = vec![img(1, 100, 100)];
3103        let cats = vec![cat(1, "person")];
3104        let gt_kps = const_kps_vec(50.0, 50.0, 0, 17);
3105        let dt_kps = const_kps_vec(50.0, 50.0, 2, 17);
3106        let anns = vec![ann_with_kps(
3107            1,
3108            1,
3109            1,
3110            (40.0, 40.0, 20.0, 20.0),
3111            gt_kps,
3112            // Explicit Some(0) covers the precomputed-num_keypoints
3113            // path; the kernel treats it identically to deriving from
3114            // visibility flags.
3115            Some(0),
3116        )];
3117        let gt = CocoDataset::from_parts(images, anns, cats).unwrap();
3118        let dts = CocoDetections::from_inputs(vec![dt_input_with_kps(
3119            1,
3120            1,
3121            0.9,
3122            (40.0, 40.0, 20.0, 20.0),
3123            dt_kps,
3124        )])
3125        .unwrap();
3126        let area = AreaRange::coco_default();
3127        let params = EvaluateParams {
3128            iou_thresholds: iou_thresholds(),
3129            area_ranges: &area,
3130            max_dets_per_image: 100,
3131            use_cats: true,
3132            retain_iou: false,
3133        };
3134        let grid =
3135            evaluate_keypoints(&gt, &dts, params, ParityMode::Strict, HashMap::new()).unwrap();
3136        let cell = grid.cell(0, 0, 0).unwrap();
3137        assert_eq!(
3138            cell.gt_ignore,
3139            vec![true],
3140            "D2: zero-visible-keypoints GT must be ignored",
3141        );
3142    }
3143
3144    #[test]
3145    fn test_evaluate_keypoints_per_category_sigmas() {
3146        // Two GTs in different categories; sigmas provided per category.
3147        // Each row of the OKS matrix uses the right sigma vector — we
3148        // verify by asserting the cell evaluates without error and that
3149        // both DTs match their same-category GT with the override-tuned
3150        // sigmas. We pick large sigmas (0.5) so a 1-pixel offset still
3151        // OKS≈1, ensuring matches at every threshold.
3152        let images = vec![img(1, 200, 200)];
3153        let cats = vec![cat(1, "person"), cat(2, "dog")];
3154        let gt_kps = const_kps_vec(50.0, 50.0, 2, 17);
3155        let anns = vec![
3156            ann_with_kps(1, 1, 1, (40.0, 40.0, 20.0, 20.0), gt_kps, None),
3157            ann_with_kps(
3158                2,
3159                1,
3160                2,
3161                (140.0, 140.0, 20.0, 20.0),
3162                const_kps_vec(150.0, 150.0, 2, 17),
3163                None,
3164            ),
3165        ];
3166        let gt = CocoDataset::from_parts(images, anns, cats).unwrap();
3167        // DT[0] near GT[0] (cat 1), DT[1] near GT[1] (cat 2). Both off
3168        // by 1 pixel.
3169        let dts = CocoDetections::from_inputs(vec![
3170            dt_input_with_kps(
3171                1,
3172                1,
3173                0.9,
3174                (40.0, 40.0, 20.0, 20.0),
3175                const_kps_vec(51.0, 50.0, 2, 17),
3176            ),
3177            dt_input_with_kps(
3178                1,
3179                2,
3180                0.8,
3181                (140.0, 140.0, 20.0, 20.0),
3182                const_kps_vec(151.0, 150.0, 2, 17),
3183            ),
3184        ])
3185        .unwrap();
3186        let mut sigmas: HashMap<i64, Vec<f64>> = HashMap::new();
3187        sigmas.insert(1, vec![0.5_f64; 17]);
3188        sigmas.insert(2, vec![0.5_f64; 17]);
3189        let area = AreaRange::coco_default();
3190        let params = EvaluateParams {
3191            iou_thresholds: iou_thresholds(),
3192            area_ranges: &area,
3193            max_dets_per_image: 100,
3194            use_cats: true,
3195            retain_iou: false,
3196        };
3197        let grid = evaluate_keypoints(&gt, &dts, params, ParityMode::Strict, sigmas).unwrap();
3198        // K-axis is [cat 1, cat 2]; each cell sees one GT and one DT.
3199        let cell_cat1 = grid.cell(0, 0, 0).unwrap();
3200        let cell_cat2 = grid.cell(1, 0, 0).unwrap();
3201        assert!(
3202            cell_cat1.dt_matched.iter().all(|&m| m),
3203            "cat-1 DT should match cat-1 GT under override sigmas",
3204        );
3205        assert!(
3206            cell_cat2.dt_matched.iter().all(|&m| m),
3207            "cat-2 DT should match cat-2 GT under override sigmas",
3208        );
3209    }
3210
3211    #[test]
3212    fn test_evaluate_keypoints_missing_dt_kps_rejected() {
3213        // DT entry without `keypoints` field → the kernel build path
3214        // surfaces InvalidAnnotation. There is no parity-mode J2 analog
3215        // for keypoints (no bbox-synthesis fallback).
3216        let images = vec![img(1, 100, 100)];
3217        let cats = vec![cat(1, "person")];
3218        let gt_kps = const_kps_vec(50.0, 50.0, 2, 17);
3219        let anns = vec![ann_with_kps(
3220            1,
3221            1,
3222            1,
3223            (40.0, 40.0, 20.0, 20.0),
3224            gt_kps,
3225            None,
3226        )];
3227        let gt = CocoDataset::from_parts(images, anns, cats).unwrap();
3228        // DT has bbox + score but no keypoints — uses the existing
3229        // bbox-only `dt_input` helper.
3230        let dts = CocoDetections::from_inputs(vec![dt_input(1, 1, 0.9, (40.0, 40.0, 20.0, 20.0))])
3231            .unwrap();
3232        let area = AreaRange::coco_default();
3233        let params = EvaluateParams {
3234            iou_thresholds: iou_thresholds(),
3235            area_ranges: &area,
3236            max_dets_per_image: 100,
3237            use_cats: true,
3238            retain_iou: false,
3239        };
3240        let err =
3241            evaluate_keypoints(&gt, &dts, params, ParityMode::Strict, HashMap::new()).unwrap_err();
3242        match err {
3243            EvalError::InvalidAnnotation { detail } => {
3244                assert!(detail.contains("DT"), "expected DT in msg: {detail}");
3245                assert!(
3246                    detail.contains("keypoints"),
3247                    "expected keypoints in msg: {detail}",
3248                );
3249            }
3250            other => panic!("expected InvalidAnnotation, got {other:?}"),
3251        }
3252    }
3253
3254    #[test]
3255    fn test_keypoints_default_ignore_for_other_kernels() {
3256        // The D2 implicit-ignore clause must not bleed across kernels.
3257        // BboxIou::extra_gt_ignore (default impl) returns false even for
3258        // an annotation with num_keypoints=0; only OksSimilarity
3259        // overrides it.
3260        let ann_zero_kps = ann_with_kps(
3261            1,
3262            1,
3263            1,
3264            (0.0, 0.0, 10.0, 10.0),
3265            const_kps_vec(0.0, 0.0, 0, 17),
3266            Some(0),
3267        );
3268        assert!(
3269            !BboxIou.extra_gt_ignore(&ann_zero_kps),
3270            "BboxIou must keep the default `false` ignore",
3271        );
3272        assert!(
3273            !SegmIou.extra_gt_ignore(&ann_zero_kps),
3274            "SegmIou must keep the default `false` ignore",
3275        );
3276        assert!(
3277            !BoundaryIou {
3278                dilation_ratio: 0.02,
3279            }
3280            .extra_gt_ignore(&ann_zero_kps),
3281            "BoundaryIou must keep the default `false` ignore",
3282        );
3283        // And the OKS kernel does flip it on the same annotation.
3284        assert!(
3285            OksSimilarity::default().extra_gt_ignore(&ann_zero_kps),
3286            "OksSimilarity must flip D2 to true on zero-visible-keypoints GT",
3287        );
3288    }
3289
3290    #[test]
3291    fn boundary_missing_gt_segmentation_surfaces_typed_error() {
3292        // Boundary reuses the segm GT-build path, so missing GT segm
3293        // surfaces the same typed error. Pinned here so a future
3294        // refactor that splits the build paths can't silently regress.
3295        let images = vec![img(1, 100, 100)];
3296        let cats = vec![cat(1, "thing")];
3297        let anns = vec![ann(7, 1, 1, (0.0, 0.0, 10.0, 10.0))];
3298        let gt = CocoDataset::from_parts(images, anns, cats).unwrap();
3299        let dts = CocoDetections::from_inputs(vec![dt_input_with_segm(
3300            1,
3301            1,
3302            0.9,
3303            (0.0, 0.0, 10.0, 10.0),
3304            square_polygon(0.0, 0.0, 10.0),
3305        )])
3306        .unwrap();
3307        let area = AreaRange::coco_default();
3308        let params = EvaluateParams {
3309            iou_thresholds: iou_thresholds(),
3310            area_ranges: &area,
3311            max_dets_per_image: 100,
3312            use_cats: true,
3313            retain_iou: false,
3314        };
3315        let err = evaluate_boundary(&gt, &dts, params, ParityMode::Strict, 0.02).unwrap_err();
3316        match err {
3317            EvalError::InvalidAnnotation { detail } => {
3318                assert!(detail.contains("GT id=7"), "msg: {detail}");
3319            }
3320            other => panic!("expected InvalidAnnotation, got {other:?}"),
3321        }
3322    }
3323
3324    // -- ADR-0026: federated cell-skip and dt_ignore extension ---------------
3325
3326    /// Build an LVIS-style GT dataset directly: a `CocoDataset` whose
3327    /// federated metadata sets are populated. Mirrors what
3328    /// `from_lvis_json_bytes` produces, but lets tests pin the maps
3329    /// without round-tripping through JSON.
3330    fn lvis_dataset(
3331        images: &[ImageMeta],
3332        annotations: &[CocoAnnotation],
3333        categories: &[CategoryMeta],
3334        neg: &[(i64, Vec<i64>)],
3335        nel: &[(i64, Vec<i64>)],
3336        freq: &[(i64, crate::dataset::Frequency)],
3337    ) -> CocoDataset {
3338        // Build LVIS JSON bytes through the public loader so the
3339        // resulting dataset uses the same code path the FFI exercises.
3340        // (Constructing through `from_parts` would leave the federated
3341        // fields `None`.)
3342        let images_json: Vec<serde_json::Value> = images
3343            .iter()
3344            .map(|im| {
3345                let neg_for: Vec<i64> = neg
3346                    .iter()
3347                    .find(|(id, _)| *id == im.id.0)
3348                    .map(|(_, v)| v.clone())
3349                    .unwrap_or_default();
3350                let nel_for: Vec<i64> = nel
3351                    .iter()
3352                    .find(|(id, _)| *id == im.id.0)
3353                    .map(|(_, v)| v.clone())
3354                    .unwrap_or_default();
3355                serde_json::json!({
3356                    "id": im.id.0,
3357                    "width": im.width,
3358                    "height": im.height,
3359                    "neg_category_ids": neg_for,
3360                    "not_exhaustive_category_ids": nel_for,
3361                })
3362            })
3363            .collect();
3364        let cats_json: Vec<serde_json::Value> = categories
3365            .iter()
3366            .map(|c| {
3367                let f = freq
3368                    .iter()
3369                    .find(|(id, _)| *id == c.id.0)
3370                    .map(|(_, f)| match f {
3371                        crate::dataset::Frequency::Rare => "r",
3372                        crate::dataset::Frequency::Common => "c",
3373                        crate::dataset::Frequency::Frequent => "f",
3374                    })
3375                    .expect("test fixture must include frequency for every category");
3376                serde_json::json!({
3377                    "id": c.id.0,
3378                    "name": c.name,
3379                    "frequency": f,
3380                })
3381            })
3382            .collect();
3383        let anns_json = serde_json::to_value(annotations).unwrap();
3384        let payload = serde_json::json!({
3385            "images": images_json,
3386            "annotations": anns_json,
3387            "categories": cats_json,
3388        });
3389        let bytes = serde_json::to_vec(&payload).unwrap();
3390        CocoDataset::from_lvis_json_bytes(&bytes).unwrap()
3391    }
3392
3393    #[test]
3394    fn aa4_skips_cells_outside_pos_union_neg() {
3395        // Two images, two categories. Image 1 has GTs of cat 1 only;
3396        // image 2 has GTs of cat 2 only. Neither image lists anything
3397        // in `neg`. The DT set predicts cat 2 on image 1 (a category
3398        // for which image 1 has no GT and no neg listing) — the
3399        // federated cell-skip MUST drop the resulting (image 1,
3400        // cat 2) cell entirely. Without AA4 the DT counts as a FP and
3401        // tanks AP.
3402        let images = vec![img(1, 100, 100), img(2, 100, 100)];
3403        let cats = vec![cat(1, "a"), cat(2, "b")];
3404        let anns = vec![
3405            ann(1, 1, 1, (0.0, 0.0, 10.0, 10.0)),
3406            ann(2, 2, 2, (0.0, 0.0, 10.0, 10.0)),
3407        ];
3408        let gt_lvis = lvis_dataset(
3409            &images,
3410            &anns,
3411            &cats,
3412            &[(1, vec![]), (2, vec![])],
3413            &[(1, vec![]), (2, vec![])],
3414            &[
3415                (1, crate::dataset::Frequency::Frequent),
3416                (2, crate::dataset::Frequency::Frequent),
3417            ],
3418        );
3419        let gt_coco = CocoDataset::from_parts(images, anns, cats).unwrap();
3420        // DT: a "stray" cat 2 prediction on image 1 — federated wants
3421        // it dropped, COCO will score it as a FP.
3422        let dts = CocoDetections::from_inputs(vec![
3423            dt_input(1, 1, 0.9, (0.0, 0.0, 10.0, 10.0)),
3424            dt_input(1, 2, 0.7, (50.0, 50.0, 10.0, 10.0)),
3425            dt_input(2, 2, 0.9, (0.0, 0.0, 10.0, 10.0)),
3426        ])
3427        .unwrap();
3428        let area = AreaRange::coco_default();
3429        let params = EvaluateParams {
3430            iou_thresholds: iou_thresholds(),
3431            area_ranges: &area,
3432            max_dets_per_image: 100,
3433            use_cats: true,
3434            retain_iou: false,
3435        };
3436        let grid_lvis = evaluate_bbox(&gt_lvis, &dts, params, ParityMode::Strict).unwrap();
3437        let grid_coco = evaluate_bbox(&gt_coco, &dts, params, ParityMode::Strict).unwrap();
3438
3439        // Cell layout: K=[cat 1, cat 2], A=[all], I=[image 1, image 2].
3440        // (image 1, cat 2) sits at k=1, a=0, i=0 — federated dataset
3441        // skips it (None), COCO dataset evaluates it (Some).
3442        let lvis_cell = grid_lvis.cell(1, 0, 0);
3443        let coco_cell = grid_coco.cell(1, 0, 0);
3444        assert!(lvis_cell.is_none(), "AA4: federated cell must be skipped");
3445        assert!(
3446            coco_cell.is_some(),
3447            "control: COCO dataset must evaluate the same cell"
3448        );
3449        // The (image 1, cat 1) cell is unaffected — federated and
3450        // COCO must agree there because cat 1 ∈ pos[1].
3451        assert_eq!(
3452            grid_lvis.cell(0, 0, 0).map(|c| c.dt_scores.len()),
3453            grid_coco.cell(0, 0, 0).map(|c| c.dt_scores.len()),
3454        );
3455    }
3456
3457    #[test]
3458    fn aa4_keeps_neg_cells_with_no_gts() {
3459        // Same shape as the previous test, but image 1 lists cat 2 in
3460        // its `neg` set: the cell now stays (so we score recall on a
3461        // verified-absent category) and unmatched DTs become FPs.
3462        let images = vec![img(1, 100, 100)];
3463        let cats = vec![cat(1, "a"), cat(2, "b")];
3464        let anns = vec![ann(1, 1, 1, (0.0, 0.0, 10.0, 10.0))];
3465        let gt = lvis_dataset(
3466            &images,
3467            &anns,
3468            &cats,
3469            &[(1, vec![2])], // cat 2 ∈ neg[1]
3470            &[(1, vec![])],
3471            &[
3472                (1, crate::dataset::Frequency::Frequent),
3473                (2, crate::dataset::Frequency::Frequent),
3474            ],
3475        );
3476        let dts = CocoDetections::from_inputs(vec![
3477            dt_input(1, 1, 0.9, (0.0, 0.0, 10.0, 10.0)),
3478            dt_input(1, 2, 0.7, (50.0, 50.0, 10.0, 10.0)),
3479        ])
3480        .unwrap();
3481        let area = AreaRange::coco_default();
3482        let params = EvaluateParams {
3483            iou_thresholds: iou_thresholds(),
3484            area_ranges: &area,
3485            max_dets_per_image: 100,
3486            use_cats: true,
3487            retain_iou: false,
3488        };
3489        let grid = evaluate_bbox(&gt, &dts, params, ParityMode::Strict).unwrap();
3490        let cell = grid
3491            .cell(1, 0, 0)
3492            .expect("cat 2 ∈ neg[1] must produce an evaluated cell");
3493        // The cell has no GTs and one DT; the DT is an unmatched FP
3494        // (not ignored, because the cell is not in `not_exhaustive`).
3495        assert_eq!(cell.dt_scores.len(), 1);
3496        assert!(cell.dt_ignore.iter().all(|&ig| !ig));
3497    }
3498
3499    #[test]
3500    fn aa3_dt_ignore_extension_in_not_exhaustive_cell() {
3501        // Image 1 has GTs of cat 1 and lists cat 1 in its
3502        // `not_exhaustive` set. The DT set has two predictions for
3503        // cat 1: one matches the GT (TP), the other is unmatched.
3504        // Quirk **AA3** says the unmatched DT must have
3505        // `dt_ignore = true`; the matched DT keeps `dt_ignore =
3506        // false`.
3507        let images = vec![img(1, 100, 100)];
3508        let cats = vec![cat(1, "a")];
3509        let anns = vec![ann(1, 1, 1, (0.0, 0.0, 10.0, 10.0))];
3510        let gt = lvis_dataset(
3511            &images,
3512            &anns,
3513            &cats,
3514            &[(1, vec![])],
3515            &[(1, vec![1])], // cat 1 ∈ not_exhaustive[1]
3516            &[(1, crate::dataset::Frequency::Frequent)],
3517        );
3518        let dts = CocoDetections::from_inputs(vec![
3519            dt_input(1, 1, 0.9, (0.0, 0.0, 10.0, 10.0)),   // TP
3520            dt_input(1, 1, 0.7, (50.0, 50.0, 10.0, 10.0)), // unmatched FP candidate
3521        ])
3522        .unwrap();
3523        let area = AreaRange::coco_default();
3524        let params = EvaluateParams {
3525            iou_thresholds: iou_thresholds(),
3526            area_ranges: &area,
3527            max_dets_per_image: 100,
3528            use_cats: true,
3529            retain_iou: false,
3530        };
3531        let grid = evaluate_bbox(&gt, &dts, params, ParityMode::Strict).unwrap();
3532        let cell = grid.cell(0, 0, 0).expect("cell must evaluate");
3533        let n_t = cell.dt_ignore.shape()[0];
3534        // sorted-DT order is descending by score: [TP, FP]. TP must
3535        // never be `dt_ignore = true` (B6 only flips ignore on
3536        // *unmatched* DTs); FP must be `true` for every IoU
3537        // threshold.
3538        for t in 0..n_t {
3539            assert!(!cell.dt_ignore[(t, 0)], "TP should not be dt_ignore");
3540            assert!(
3541                cell.dt_ignore[(t, 1)],
3542                "AA3: unmatched DT in not_exhaustive cell must be dt_ignore"
3543            );
3544        }
3545    }
3546
3547    #[test]
3548    fn aa3_dt_ignore_only_unmatched() {
3549        // Mirror of the previous test but with `not_exhaustive` empty:
3550        // the same DT pair must produce `dt_ignore = false` on both
3551        // entries (the unmatched DT is now a real FP).
3552        let images = vec![img(1, 100, 100)];
3553        let cats = vec![cat(1, "a")];
3554        let anns = vec![ann(1, 1, 1, (0.0, 0.0, 10.0, 10.0))];
3555        let gt = lvis_dataset(
3556            &images,
3557            &anns,
3558            &cats,
3559            &[(1, vec![])],
3560            &[(1, vec![])],
3561            &[(1, crate::dataset::Frequency::Frequent)],
3562        );
3563        let dts = CocoDetections::from_inputs(vec![
3564            dt_input(1, 1, 0.9, (0.0, 0.0, 10.0, 10.0)),
3565            dt_input(1, 1, 0.7, (50.0, 50.0, 10.0, 10.0)),
3566        ])
3567        .unwrap();
3568        let area = AreaRange::coco_default();
3569        let params = EvaluateParams {
3570            iou_thresholds: iou_thresholds(),
3571            area_ranges: &area,
3572            max_dets_per_image: 100,
3573            use_cats: true,
3574            retain_iou: false,
3575        };
3576        let grid = evaluate_bbox(&gt, &dts, params, ParityMode::Strict).unwrap();
3577        let cell = grid.cell(0, 0, 0).expect("cell must evaluate");
3578        assert!(cell.dt_ignore.iter().all(|&ig| !ig));
3579    }
3580
3581    #[test]
3582    fn federated_dataset_with_use_cats_false_falls_back_to_coco() {
3583        // Federated logic requires `use_cats=true`. With `use_cats=false`
3584        // the L4 collapse merges every category into one bucket; we
3585        // explicitly skip the federated checks so a misconfigured
3586        // caller still sees deterministic COCO-grade output.
3587        let images = vec![img(1, 100, 100), img(2, 100, 100)];
3588        let cats = vec![cat(1, "a"), cat(2, "b")];
3589        let anns = vec![
3590            ann(1, 1, 1, (0.0, 0.0, 10.0, 10.0)),
3591            ann(2, 2, 2, (0.0, 0.0, 10.0, 10.0)),
3592        ];
3593        let gt = lvis_dataset(
3594            &images,
3595            &anns,
3596            &cats,
3597            &[(1, vec![]), (2, vec![])],
3598            &[(1, vec![]), (2, vec![])],
3599            &[
3600                (1, crate::dataset::Frequency::Frequent),
3601                (2, crate::dataset::Frequency::Frequent),
3602            ],
3603        );
3604        let dts = CocoDetections::from_inputs(vec![
3605            dt_input(1, 1, 0.9, (0.0, 0.0, 10.0, 10.0)),
3606            dt_input(1, 2, 0.7, (50.0, 50.0, 10.0, 10.0)),
3607        ])
3608        .unwrap();
3609        let area = AreaRange::coco_default();
3610        let params = EvaluateParams {
3611            iou_thresholds: iou_thresholds(),
3612            area_ranges: &area,
3613            max_dets_per_image: 100,
3614            use_cats: false,
3615            retain_iou: false,
3616        };
3617        // No panic, no skipped cell — the K-axis is collapsed to one
3618        // sentinel category so AA4 cannot apply.
3619        let grid = evaluate_bbox(&gt, &dts, params, ParityMode::Strict).unwrap();
3620        assert_eq!(grid.n_categories, 1);
3621        // (k=0, a=0, i=0) is the only image-1 cell; it must contain
3622        // both DTs (cat 1 and cat 2 collapsed onto k=0).
3623        let cell = grid.cell(0, 0, 0).expect("collapsed cell must evaluate");
3624        assert_eq!(cell.dt_scores.len(), 2);
3625    }
3626
3627    #[test]
3628    fn coco_dataset_unaffected_by_federated_machinery() {
3629        // The federated branches must be no-ops when
3630        // `is_federated()` is false. Pin this with a regression check
3631        // against the perfect_match_grid fixture: the cell shape
3632        // must be byte-identical to what the function returned
3633        // before the AA3/AA4 patch.
3634        let g = perfect_match_grid();
3635        // 1 category, 4 area ranges, 1 image. (k=0, a=0, i=0) holds
3636        // the all-area cell with both DTs matched.
3637        let cell = g.cell(0, 0, 0).expect("perfect_match cell must exist");
3638        assert_eq!(cell.dt_scores.len(), 2);
3639        assert!(cell.dt_ignore.iter().all(|&ig| !ig));
3640    }
3641
3642    // -- Quirk AG6: strict-mode `area > 0` GT filter (ADR-0026) --------------
3643
3644    /// Build a GT annotation with an explicitly-pinned `area`. The
3645    /// general-purpose `ann()` derives area from the bbox (`w * h`),
3646    /// which can't synthesize the "bbox has positive extent but `area`
3647    /// field is 0" case the oracle filters on.
3648    fn ann_with_area(
3649        id: i64,
3650        image: i64,
3651        cat: i64,
3652        bbox: (f64, f64, f64, f64),
3653        area: f64,
3654    ) -> CocoAnnotation {
3655        let mut a = ann(id, image, cat, bbox);
3656        a.area = area;
3657        a
3658    }
3659
3660    #[test]
3661    fn ag6_mixed_cell_drops_zero_area_gt_in_strict_mode() {
3662        // Mixed cell: one area>0 GT and one area==0 GT (both with
3663        // positive-extent bboxes — mirrors the LVIS val data where
3664        // ann_id=31604 has `bbox=[132.86, 347.1, 0.07, 0.08]` and
3665        // `area=0.0` because the segm-derived area is zero). Perfect-DTs
3666        // for both. Strict mode mirrors the oracle: the zero-area GT
3667        // is dropped, leaving its DT as an unmatched FP.
3668        let images = vec![img(1, 100, 100)];
3669        let cats = vec![cat(1, "a")];
3670        let anns = vec![
3671            ann(1, 1, 1, (10.0, 10.0, 20.0, 20.0)),
3672            ann_with_area(2, 1, 1, (50.0, 50.0, 0.1, 0.1), 0.0),
3673        ];
3674        let gt = lvis_dataset(
3675            &images,
3676            &anns,
3677            &cats,
3678            &[(1, vec![])],
3679            &[(1, vec![])],
3680            &[(1, crate::dataset::Frequency::Frequent)],
3681        );
3682        let dts = CocoDetections::from_inputs(vec![
3683            dt_input(1, 1, 0.9, (10.0, 10.0, 20.0, 20.0)),
3684            dt_input(1, 1, 0.8, (50.0, 50.0, 0.1, 0.1)),
3685        ])
3686        .unwrap();
3687        let area = AreaRange::coco_default();
3688        let params = EvaluateParams {
3689            iou_thresholds: iou_thresholds(),
3690            area_ranges: &area,
3691            max_dets_per_image: 100,
3692            use_cats: true,
3693            retain_iou: false,
3694        };
3695
3696        let strict = evaluate_bbox(&gt, &dts, params, ParityMode::Strict).unwrap();
3697        let cell = strict
3698            .cell(0, 0, 0)
3699            .expect("mixed cell must still evaluate in strict mode");
3700        assert_eq!(cell.dt_scores.len(), 2);
3701        // dt_scores sorted desc → [0.9, 0.8]. At t=0 (iou=0.5):
3702        // DT_real matches GT_real; DT_zero finds no GT (filtered out)
3703        // so dt_matches[0,1] == 0.
3704        let strict_meta = strict.cell_meta(0, 0, 0).unwrap();
3705        assert_eq!(strict_meta.dt_matches[(0, 0)], 1, "DT_real → GT id=1");
3706        assert_eq!(
3707            strict_meta.dt_matches[(0, 1)],
3708            0,
3709            "DT_zero must be unmatched after strict filter drops GT id=2"
3710        );
3711
3712        let corrected = evaluate_bbox(&gt, &dts, params, ParityMode::Corrected).unwrap();
3713        let cor_meta = corrected.cell_meta(0, 0, 0).unwrap();
3714        assert_eq!(cor_meta.dt_matches[(0, 0)], 1, "DT_real → GT id=1");
3715        assert_eq!(
3716            cor_meta.dt_matches[(0, 1)],
3717            2,
3718            "Corrected mode keeps the zero-area GT and matches DT_zero → GT id=2"
3719        );
3720    }
3721
3722    #[test]
3723    fn ag6_all_zero_area_cell_skipped_via_aa4_in_strict_mode() {
3724        // Only GT for (image 1, cat 1) is zero-area. Post-filter
3725        // gt_indices is empty; cat 1 is not in neg[1] either, so the
3726        // AA4 cell-skip path fires and the DT is silently dropped —
3727        // mirroring the oracle's behavior on the (image 492990,
3728        // cat 982) cell in LVIS val (the only all-zero-area cell on
3729        // that dataset).
3730        let images = vec![img(1, 100, 100)];
3731        let cats = vec![cat(1, "a")];
3732        let anns = vec![ann_with_area(1, 1, 1, (50.0, 50.0, 0.1, 0.1), 0.0)];
3733        let gt = lvis_dataset(
3734            &images,
3735            &anns,
3736            &cats,
3737            &[(1, vec![])],
3738            &[(1, vec![])],
3739            &[(1, crate::dataset::Frequency::Frequent)],
3740        );
3741        let dts =
3742            CocoDetections::from_inputs(vec![dt_input(1, 1, 0.9, (50.0, 50.0, 0.1, 0.1))]).unwrap();
3743        let area = AreaRange::coco_default();
3744        let params = EvaluateParams {
3745            iou_thresholds: iou_thresholds(),
3746            area_ranges: &area,
3747            max_dets_per_image: 100,
3748            use_cats: true,
3749            retain_iou: false,
3750        };
3751
3752        let strict = evaluate_bbox(&gt, &dts, params, ParityMode::Strict).unwrap();
3753        assert!(
3754            strict.cell(0, 0, 0).is_none(),
3755            "AG6: all-zero-area cell must be skipped via AA4 in strict mode"
3756        );
3757
3758        let corrected = evaluate_bbox(&gt, &dts, params, ParityMode::Corrected).unwrap();
3759        let cell = corrected
3760            .cell(0, 0, 0)
3761            .expect("Corrected mode must keep the zero-area GT");
3762        assert_eq!(cell.dt_scores.len(), 1);
3763    }
3764
3765    #[test]
3766    fn ag6_strict_filter_is_noop_on_coco_dataset() {
3767        // Same input as the mixed-cell test but constructed via
3768        // `from_parts` so `federated()` is `None`. The strict filter
3769        // must NOT apply — COCO eval keeps zero-area GTs (the
3770        // pycocotools oracle doesn't filter at load).
3771        let images = vec![img(1, 100, 100)];
3772        let cats = vec![cat(1, "a")];
3773        let anns = vec![
3774            ann(1, 1, 1, (10.0, 10.0, 20.0, 20.0)),
3775            ann_with_area(2, 1, 1, (50.0, 50.0, 0.1, 0.1), 0.0),
3776        ];
3777        let gt = CocoDataset::from_parts(images, anns, cats).unwrap();
3778        let dts = CocoDetections::from_inputs(vec![
3779            dt_input(1, 1, 0.9, (10.0, 10.0, 20.0, 20.0)),
3780            dt_input(1, 1, 0.8, (50.0, 50.0, 0.1, 0.1)),
3781        ])
3782        .unwrap();
3783        let area = AreaRange::coco_default();
3784        let params = EvaluateParams {
3785            iou_thresholds: iou_thresholds(),
3786            area_ranges: &area,
3787            max_dets_per_image: 100,
3788            use_cats: true,
3789            retain_iou: false,
3790        };
3791        let grid = evaluate_bbox(&gt, &dts, params, ParityMode::Strict).unwrap();
3792        let meta = grid.cell_meta(0, 0, 0).unwrap();
3793        assert_eq!(meta.dt_matches[(0, 0)], 1);
3794        assert_eq!(
3795            meta.dt_matches[(0, 1)],
3796            2,
3797            "COCO strict mode must NOT drop the zero-area GT — AG6 is LVIS-only"
3798        );
3799    }
3800}
vernier_core/evaluate.rs

vernier_core/
evaluate.rs