Skip to main content

vernier_core/
dataset.rs

1//! Dataset abstraction and the COCO ground-truth implementation.
2//!
3//! Per ADR-0005, the matching engine and accumulator are written once
4//! and never edited; they are generic over a dataset trait, never over
5//! a concrete dataset type. Future datasets (custom corpora, Phase 3
6//! keypoint datasets such as CrowdPose) add new `EvalDataset` impls
7//! without touching anything in `matching.rs` or `accumulate.rs`.
8//!
9//! The trait is shaped around two access patterns the matching loop
10//! drives:
11//!
12//! - "Give me the GTs for image `i`." — driven by the per-image
13//!   evaluation outer loop.
14//! - "Give me the GTs for category `k` across all images." — driven
15//!   by the per-category accumulation that happens after matching.
16//!
17//! Both go through index slices (`&[usize]`) into a single flat
18//! storage. The convenience method `ann_iter_for_image` builds an
19//! iterator on top of the slice; callers that want raw indices (e.g.,
20//! to interleave bbox / segm / keypoint lookups) use the slice form.
21//!
22//! ## Quirk dispositions
23//!
24//! The COCO loader honors the dataset-level dispositions ratified in
25//! ADR-0002:
26//!
27//! - **D1** (`corrected`): we store both the JSON `iscrowd` flag and
28//!   the optional `ignore` flag verbatim. The eval-time
29//!   [`CocoAnnotation::effective_ignore`] computes the flag per
30//!   parity mode, instead of overwriting one with the other at load
31//!   time the way pycocotools does.
32//! - **D3** (`aligned`): annotations are not mutated mid-evaluation;
33//!   the per-call `_ignore` (which combines the dataset flag with the
34//!   current area range) is computed at eval time.
35//! - **J3** (`strict`): detection-side area is derived at construction
36//!   from the bbox (`bbox.w * bbox.h`) and never read from JSON.
37//! - **J1** (`aligned`): user-supplied DT ids are preserved verbatim;
38//!   absent ids are auto-assigned sequentially during construction.
39//! - **E2 / J4** (`strict`): detections never carry an `iscrowd` flag
40//!   — the type does not have the field. JSON inputs that include
41//!   `iscrowd=1` are silently dropped, matching pycocotools' overwrite.
42
43use std::collections::{HashMap, HashSet};
44use std::sync::{Arc, OnceLock};
45
46use serde::{Deserialize, Serialize};
47
48use crate::error::EvalError;
49use crate::parity::ParityMode;
50use crate::segmentation::{Segmentation, SegmentationRleCounts};
51
52/// Newtype for image ids. Sourced from the JSON `id` field; preserved
53/// verbatim. Crowd_region's image with `id = 1` becomes
54/// `ImageId(1)`.
55#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
56#[serde(transparent)]
57pub struct ImageId(pub i64);
58
59/// Newtype for category ids.
60#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
61#[serde(transparent)]
62pub struct CategoryId(pub i64);
63
64/// Newtype for annotation ids.
65#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Serialize, Deserialize)]
66#[serde(transparent)]
67pub struct AnnId(pub i64);
68
69/// Per-image metadata. We keep only what the eval algorithm reads;
70/// fields like `coco_url`, `flickr_url`, `date_captured` are dropped on
71/// load (round-trip is via the typed COCO data, not raw JSON).
72#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
73pub struct ImageMeta {
74    /// Image id.
75    pub id: ImageId,
76    /// Image width in pixels.
77    pub width: u32,
78    /// Image height in pixels.
79    pub height: u32,
80    /// File name as recorded in the dataset JSON; useful for tracing
81    /// fixtures back to source images.
82    #[serde(default, skip_serializing_if = "Option::is_none")]
83    pub file_name: Option<String>,
84}
85
86/// Per-category metadata.
87#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
88pub struct CategoryMeta {
89    /// Category id.
90    pub id: CategoryId,
91    /// Human-readable category name (e.g., `"person"`).
92    pub name: String,
93    /// Optional supercategory grouping (e.g., `"animal"`).
94    #[serde(default, skip_serializing_if = "Option::is_none")]
95    pub supercategory: Option<String>,
96}
97
98/// Axis-aligned bounding box in COCO format `(x, y, w, h)`, where
99/// `(x, y)` is the top-left corner in pixels (typically with sub-pixel
100/// floats) and `(w, h)` are the width and height.
101#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq)]
102#[serde(from = "[f64; 4]", into = "[f64; 4]")]
103pub struct Bbox {
104    /// Top-left x (pixels).
105    pub x: f64,
106    /// Top-left y (pixels).
107    pub y: f64,
108    /// Width (pixels).
109    pub w: f64,
110    /// Height (pixels).
111    pub h: f64,
112}
113
114impl From<[f64; 4]> for Bbox {
115    fn from([x, y, w, h]: [f64; 4]) -> Self {
116        Self { x, y, w, h }
117    }
118}
119
120impl From<Bbox> for [f64; 4] {
121    fn from(b: Bbox) -> Self {
122        [b.x, b.y, b.w, b.h]
123    }
124}
125
126/// A COCO annotation as stored on the dataset side (ground truth).
127///
128/// Detection annotations follow a separate path — see the future
129/// `loadRes`-equivalent — because their `iscrowd` is always 0 (quirk
130/// **E2**) and their `area` is auto-derived (quirk **J3**). Conflating
131/// the two would let a DT bug silently corrupt GT semantics.
132#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
133pub struct CocoAnnotation {
134    /// Annotation id (preserved verbatim from JSON).
135    pub id: AnnId,
136    /// Image this annotation belongs to.
137    pub image_id: ImageId,
138    /// Category this annotation belongs to.
139    pub category_id: CategoryId,
140    /// Pixel area as recorded in JSON. For GT, COCO stores this
141    /// directly; we trust the field.
142    pub area: f64,
143    /// Crowd flag (the COCO `iscrowd` field). pycocotools coerces this
144    /// to bool via truthiness, so 0/1 ints round-trip identically.
145    #[serde(rename = "iscrowd", default, deserialize_with = "deserialize_bool_int")]
146    pub is_crowd: bool,
147    /// Optional explicit `ignore` flag.
148    ///
149    /// `None` means the JSON had no `ignore` field. pycocotools (quirk
150    /// **D1**) silently overwrites whatever was here with `is_crowd`;
151    /// vernier preserves it and lets [`Self::effective_ignore`] resolve
152    /// the strict vs corrected disposition at eval time.
153    #[serde(
154        rename = "ignore",
155        default,
156        deserialize_with = "deserialize_opt_bool_int"
157    )]
158    pub ignore_flag: Option<bool>,
159    /// Bounding box. Required for every COCO ground-truth annotation
160    /// (even keypoint-only annotations carry a bbox; the bbox is what
161    /// `J3` derives DT-area from). Phase 3 adds `keypoints` as an
162    /// additional optional field.
163    pub bbox: Bbox,
164    /// COCO `segmentation` field, in any of the three shapes
165    /// pycocotools accepts (multi-polygon, uncompressed RLE,
166    /// compressed RLE). `None` for keypoint-only annotations or
167    /// fixtures that omit it. The matching engine normalizes via
168    /// [`Segmentation::to_rle`] at eval time.
169    #[serde(default, skip_serializing_if = "Option::is_none")]
170    pub segmentation: Option<Segmentation>,
171    /// Flat keypoint triplets `[x_1, y_1, v_1, x_2, y_2, v_2, ...]`
172    /// (per ADR-0012). `None` for non-keypoint annotations; the eval
173    /// pipeline raises [`EvalError::InvalidAnnotation`] when a GT is
174    /// missing keypoints under `iouType="keypoints"`.
175    #[serde(default, skip_serializing_if = "Option::is_none")]
176    pub keypoints: Option<Vec<f64>>,
177    /// COCO `num_keypoints` count of *visible* keypoints (`v > 0`),
178    /// per ADR-0012. pycocotools precomputes this on GT (driving the
179    /// quirk **D2** implicit-ignore branch); on DT it is not required
180    /// and is derived from `keypoints` when needed.
181    #[serde(default, skip_serializing_if = "Option::is_none")]
182    pub num_keypoints: Option<u32>,
183}
184
185impl CocoAnnotation {
186    /// Resolves the effective ignore flag for this annotation under a
187    /// given parity mode (per ADR-0002 / quirk **D1**).
188    ///
189    /// - `Strict` reproduces pycocotools: the user's `ignore` field is
190    ///   discarded, and `ignore` is set to `is_crowd`.
191    /// - `Corrected` honors the user's explicit `ignore` field when
192    ///   present; falls back to `is_crowd` when absent.
193    pub fn effective_ignore(&self, mode: ParityMode) -> bool {
194        match mode {
195            ParityMode::Strict => self.is_crowd,
196            ParityMode::Corrected => self.ignore_flag.unwrap_or(self.is_crowd),
197        }
198    }
199}
200
201/// Common interface every annotation type on every dataset implements.
202///
203/// The matching engine (per ADR-0005) reads only this trait — it does
204/// not see [`CocoAnnotation`] or any future per-dataset annotation type
205/// directly.
206pub trait Annotation {
207    /// Image this annotation belongs to.
208    fn image_id(&self) -> ImageId;
209    /// Category this annotation belongs to.
210    fn category_id(&self) -> CategoryId;
211    /// Pixel area.
212    fn area(&self) -> f64;
213    /// Crowd flag (raw, before parity resolution).
214    fn is_crowd(&self) -> bool;
215    /// Effective ignore flag under the given parity mode.
216    fn effective_ignore(&self, mode: ParityMode) -> bool;
217}
218
219impl Annotation for CocoAnnotation {
220    fn image_id(&self) -> ImageId {
221        self.image_id
222    }
223    fn category_id(&self) -> CategoryId {
224        self.category_id
225    }
226    fn area(&self) -> f64 {
227        self.area
228    }
229    fn is_crowd(&self) -> bool {
230        self.is_crowd
231    }
232    fn effective_ignore(&self, mode: ParityMode) -> bool {
233        Self::effective_ignore(self, mode)
234    }
235}
236
237/// Trait every dataset (COCO, CrowdPose, custom) implements.
238///
239/// `Send + Sync` is required by the future `BackgroundEvaluator`
240/// (separate ADR) so the dataset can be shared across worker threads
241/// without copying.
242pub trait EvalDataset: Send + Sync {
243    /// Concrete annotation type. For [`CocoDataset`] this is
244    /// [`CocoAnnotation`]; future datasets may use their own type with
245    /// extra metadata.
246    type Annotation: Annotation;
247
248    /// All images in the dataset, in input order.
249    fn images(&self) -> &[ImageMeta];
250
251    /// All categories in the dataset, in input order.
252    fn categories(&self) -> &[CategoryMeta];
253
254    /// Flat slice of every annotation in the dataset, in input order.
255    fn annotations(&self) -> &[Self::Annotation];
256
257    /// Indices into [`Self::annotations`] for a given image.
258    /// Returns an empty slice when the image is unknown.
259    fn ann_indices_for_image(&self, image_id: ImageId) -> &[usize];
260
261    /// Indices into [`Self::annotations`] for a given category.
262    /// Returns an empty slice when the category is unknown.
263    fn ann_indices_for_category(&self, cat_id: CategoryId) -> &[usize];
264
265    /// Convenience iterator over annotations for a given image.
266    fn ann_iter_for_image(&self, image_id: ImageId) -> AnnotationIter<'_, Self::Annotation> {
267        AnnotationIter {
268            anns: self.annotations(),
269            indices: self.ann_indices_for_image(image_id).iter(),
270        }
271    }
272
273    /// Convenience iterator over annotations for a given category.
274    fn ann_iter_for_category(&self, cat_id: CategoryId) -> AnnotationIter<'_, Self::Annotation> {
275        AnnotationIter {
276            anns: self.annotations(),
277            indices: self.ann_indices_for_category(cat_id).iter(),
278        }
279    }
280}
281
282/// Iterator that walks a slice of annotation indices and yields
283/// references into the flat annotation storage. Returned by the
284/// `*_iter_for_*` methods on [`EvalDataset`].
285pub struct AnnotationIter<'a, A> {
286    anns: &'a [A],
287    indices: std::slice::Iter<'a, usize>,
288}
289
290impl<'a, A> Iterator for AnnotationIter<'a, A> {
291    type Item = &'a A;
292
293    fn next(&mut self) -> Option<Self::Item> {
294        let idx = *self.indices.next()?;
295        self.anns.get(idx)
296    }
297
298    fn size_hint(&self) -> (usize, Option<usize>) {
299        self.indices.size_hint()
300    }
301}
302
303impl<'a, A> ExactSizeIterator for AnnotationIter<'a, A> {}
304
305/// On-disk shape of a COCO ground-truth JSON file.
306///
307/// Only the fields vernier reads are typed; unknown top-level fields
308/// (`info`, `licenses`, …) are dropped on load. Round-tripping in tests
309/// uses the same struct; user JSON that round-trips through vernier
310/// will lose those fields. We document this loudly because pycocotools
311/// 2.0.11 added a single line preserving the `info` field on `loadRes`.
312#[derive(Debug, Clone, Serialize, Deserialize)]
313pub struct CocoJson {
314    /// All images.
315    pub images: Vec<ImageMeta>,
316    /// All annotations.
317    pub annotations: Vec<CocoAnnotation>,
318    /// All categories.
319    pub categories: Vec<CategoryMeta>,
320}
321
322/// LVIS category-frequency tier (quirk **AB1** of ADR-0026).
323///
324/// Each LVIS category is tagged at dataset publication with one of
325/// three buckets, keyed by how many *training* images contain at least
326/// one annotation of that category:
327///
328/// - [`Frequency::Rare`]: `< 10` train images
329/// - [`Frequency::Common`]: `[10, 100)` train images
330/// - [`Frequency::Frequent`]: `≥ 100` train images
331///
332/// The boundaries are pinned by the upstream eval code at
333/// `lvis/eval.py:537-541`; the LVIS paper's prose ("1-10 / 11-100 /
334/// `>100`") is loose — a 10-image category is `Common`, not `Rare`.
335/// The `frequency` field is precomputed at dataset publication;
336/// vernier reads it as-is and never derives it from `image_count`
337/// (quirk **AB2**).
338///
339/// Serializes to/from the single-letter form (`"r"` / `"c"` / `"f"`)
340/// the LVIS JSON schema uses.
341#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
342pub enum Frequency {
343    /// `< 10` train images.
344    #[serde(rename = "r")]
345    Rare,
346    /// `[10, 100)` train images.
347    #[serde(rename = "c")]
348    Common,
349    /// `≥ 100` train images.
350    #[serde(rename = "f")]
351    Frequent,
352}
353
354impl Frequency {
355    /// LVIS single-letter form (`"r"` / `"c"` / `"f"`). Mirrors the
356    /// `serde(rename = ...)` tags on the variants — same canonical
357    /// form the JSON schema uses, available without going through
358    /// serde for places (FFI, log lines) that just need the string.
359    pub const fn as_letter(self) -> &'static str {
360        match self {
361            Self::Rare => "r",
362            Self::Common => "c",
363            Self::Frequent => "f",
364        }
365    }
366}
367
368/// On-disk LVIS image record. Carries the COCO image fields plus the
369/// LVIS-specific federated lists. The `pos_category_ids` set is
370/// **derived** from GT annotations at load (quirk **AA1**) and is not
371/// a JSON field — only `neg` and `not_exhaustive` are explicit.
372#[derive(Debug, Clone, Deserialize)]
373struct LvisImageRaw {
374    id: ImageId,
375    width: u32,
376    height: u32,
377    #[serde(default)]
378    file_name: Option<String>,
379    /// LVIS-only: categories verified absent from this image. `None`
380    /// in the wild means a malformed LVIS JSON; v1 spec requires the
381    /// field on every image (possibly empty).
382    #[serde(default)]
383    neg_category_ids: Option<Vec<CategoryId>>,
384    /// LVIS-only: categories whose annotations on this image are not
385    /// guaranteed exhaustive. Subset of `pos` by spec; consumed by
386    /// quirk **AA3** to extend `dt_ignore` on unmatched DTs in the
387    /// cell.
388    #[serde(default)]
389    not_exhaustive_category_ids: Option<Vec<CategoryId>>,
390}
391
392/// On-disk LVIS category record. Carries the COCO category fields
393/// plus the `frequency` tag (quirk **AB1**). `image_count` and
394/// `instance_count` are stored on the upstream JSON but **not read**
395/// by the eval code (quirk **AB2**); we drop them on load.
396#[derive(Debug, Clone, Deserialize)]
397struct LvisCategoryRaw {
398    id: CategoryId,
399    name: String,
400    #[serde(default)]
401    supercategory: Option<String>,
402    /// Required field on every LVIS v1 category. `None` here means the
403    /// JSON entry omitted it; collected and surfaced via
404    /// [`EvalError::MissingFrequency`] (quirk **AB6** corrected).
405    #[serde(default)]
406    frequency: Option<Frequency>,
407}
408
409/// On-disk shape of an LVIS v1 ground-truth JSON file. Structurally
410/// COCO JSON (quirk **AG1**) plus the federated extras on per-image
411/// and per-category records. Annotations are byte-identical between
412/// COCO and LVIS schemas, so [`CocoAnnotation`] is reused.
413#[derive(Debug, Clone, Deserialize)]
414struct LvisJson {
415    images: Vec<LvisImageRaw>,
416    annotations: Vec<CocoAnnotation>,
417    categories: Vec<LvisCategoryRaw>,
418}
419
420/// LVIS federated metadata bundle (ADR-0026). Carried as a single
421/// `Option` on [`CocoDataset`] because the four fields are all
422/// populated together by [`CocoDataset::from_lvis_json_bytes`] and
423/// all `None` after the COCO loader path. Storing one optional
424/// struct (rather than four separate `Option<...>` fields) reflects
425/// the all-or-none semantics and lets the orchestrator gate
426/// federated branches on a single `is_some()` check.
427#[derive(Debug, Clone)]
428pub struct FederatedMetadata {
429    /// Per-image positive-category set, derived from GT annotations
430    /// at load (quirk **AA1**, not a JSON field).
431    pub pos_category_ids: HashMap<ImageId, HashSet<CategoryId>>,
432    /// Per-image negative-category set, read verbatim from the JSON
433    /// (quirk **AA2**).
434    pub neg_category_ids: HashMap<ImageId, HashSet<CategoryId>>,
435    /// Per-image not-exhaustive-category set, read verbatim from the
436    /// JSON (quirk **AA3**).
437    pub not_exhaustive_category_ids: HashMap<ImageId, HashSet<CategoryId>>,
438    /// Per-category frequency tag (quirk **AB1**). Required on every
439    /// category by `from_lvis_json_bytes`; missing entries raise
440    /// [`EvalError::MissingFrequency`] at load (quirk **AB6**
441    /// corrected).
442    pub category_frequency: HashMap<CategoryId, Frequency>,
443}
444
445/// COCO ground-truth dataset.
446///
447/// Storage is a single `Arc<Vec<CocoAnnotation>>` plus per-image and
448/// per-category index vectors. The `Arc` makes the dataset cheaply
449/// shareable across worker threads (the `BackgroundEvaluator` from a
450/// future ADR depends on this); the index vectors are owned by the
451/// `CocoDataset` because they're cheap to rebuild and rebuild needs
452/// to happen exactly when the annotation set changes.
453///
454/// ## LVIS federated metadata (ADR-0026)
455///
456/// `federated` is `Some` exactly when the dataset was loaded via
457/// [`CocoDataset::from_lvis_json_bytes`]. The orchestrator's
458/// federated branches gate on `federated.is_some()`; absence is the
459/// COCO default, where the matching engine runs unchanged.
460#[derive(Debug, Clone)]
461pub struct CocoDataset {
462    images: Arc<Vec<ImageMeta>>,
463    categories: Arc<Vec<CategoryMeta>>,
464    annotations: Arc<Vec<CocoAnnotation>>,
465    by_image: HashMap<ImageId, Vec<usize>>,
466    by_category: HashMap<CategoryId, Vec<usize>>,
467    by_image_cat: HashMap<(ImageId, CategoryId), Vec<usize>>,
468    federated: Option<FederatedMetadata>,
469    /// 32-byte BLAKE3 fingerprint of the dataset's canonical form.
470    /// Cached lazily on first call to [`Self::dataset_hash`]; carried
471    /// in distributed-eval partial headers (ADR-0031). Wrapped in
472    /// `Arc<OnceLock>` so cheap clones share the same cache, matching
473    /// the existing Arc-shared layout for `images` / `categories` /
474    /// `annotations`.
475    cached_hash: Arc<OnceLock<[u8; 32]>>,
476}
477
478impl CocoDataset {
479    /// Loads a dataset from a JSON byte slice.
480    ///
481    /// Validates that every annotation references a known image and a
482    /// known category; missing references raise [`EvalError::InvalidAnnotation`]
483    /// rather than producing a silently-empty dataset.
484    pub fn from_json_bytes(bytes: &[u8]) -> Result<Self, EvalError> {
485        let raw: CocoJson = serde_json::from_slice(bytes)?;
486        Self::from_parts(raw.images, raw.annotations, raw.categories)
487    }
488
489    /// Loads a dataset from already-typed parts.
490    pub fn from_parts(
491        images: Vec<ImageMeta>,
492        annotations: Vec<CocoAnnotation>,
493        categories: Vec<CategoryMeta>,
494    ) -> Result<Self, EvalError> {
495        let known_images: HashSet<ImageId> = images.iter().map(|i| i.id).collect();
496        let known_categories: HashSet<CategoryId> = categories.iter().map(|c| c.id).collect();
497
498        let mut by_image: HashMap<ImageId, Vec<usize>> = HashMap::with_capacity(images.len());
499        let mut by_category: HashMap<CategoryId, Vec<usize>> =
500            HashMap::with_capacity(categories.len());
501        let mut by_image_cat: HashMap<(ImageId, CategoryId), Vec<usize>> = HashMap::new();
502
503        for (idx, ann) in annotations.iter().enumerate() {
504            if !known_images.contains(&ann.image_id) {
505                return Err(EvalError::InvalidAnnotation {
506                    detail: format!(
507                        "annotation id={} references unknown image_id={}",
508                        ann.id.0, ann.image_id.0
509                    ),
510                });
511            }
512            if !known_categories.contains(&ann.category_id) {
513                return Err(EvalError::InvalidAnnotation {
514                    detail: format!(
515                        "annotation id={} references unknown category_id={}",
516                        ann.id.0, ann.category_id.0
517                    ),
518                });
519            }
520            by_image.entry(ann.image_id).or_default().push(idx);
521            by_category.entry(ann.category_id).or_default().push(idx);
522            by_image_cat
523                .entry((ann.image_id, ann.category_id))
524                .or_default()
525                .push(idx);
526        }
527
528        Ok(Self {
529            images: Arc::new(images),
530            categories: Arc::new(categories),
531            annotations: Arc::new(annotations),
532            by_image,
533            by_category,
534            by_image_cat,
535            federated: None,
536            cached_hash: Arc::new(OnceLock::new()),
537        })
538    }
539
540    /// Loads an LVIS v1 ground-truth dataset from a JSON byte slice.
541    ///
542    /// LVIS JSON is structurally COCO JSON plus per-image
543    /// `neg_category_ids` / `not_exhaustive_category_ids` and
544    /// per-category `frequency` (quirk **AG1**). This loader reads the
545    /// extras into the federated metadata fields on the returned
546    /// dataset; the underlying `images` / `annotations` / `categories`
547    /// projections match what [`Self::from_json_bytes`] would produce
548    /// on the same JSON.
549    ///
550    /// ## Validation
551    ///
552    /// - **AA1.** `pos_category_ids[I]` is **derived** from GT
553    ///   annotations: `pos[I] = {ann.category_id for ann in
554    ///   annotations[I]}`. Not a JSON field. A category with zero
555    ///   annotations on `I` is *not* in `pos[I]`.
556    /// - **AA7 (corrected).** Disjointness invariants are enforced at
557    ///   load:
558    ///     - `pos[I] ∩ neg[I] = ∅` — a category with GT on an image
559    ///       cannot also be in `neg[I]`.
560    ///     - `not_exhaustive[I] ⊆ pos[I]` — by spec, not_exhaustive
561    ///       is a subset of pos.
562    ///     - `not_exhaustive[I] ∩ neg[I] = ∅` — equivalent restatement
563    ///       given the prior two.
564    ///
565    ///   The first violation surfaces as
566    ///   [`EvalError::LvisFederatedConflict`] with the offending
567    ///   `(image_id, category_id)`.
568    /// - **AB6 (corrected).** Every category must carry a `frequency`
569    ///   tag. Missing tags are collected across the full categories
570    ///   list and surfaced once via [`EvalError::MissingFrequency`]
571    ///   with a sorted id list — more debuggable than lvis-api's
572    ///   mid-eval `KeyError` on the first miss.
573    ///
574    /// Per-image `neg_category_ids` and `not_exhaustive_category_ids`
575    /// are optional in the JSON: an absent field is treated as an
576    /// empty set, which matches the LVIS v1 semantic ("no negatives /
577    /// nothing flagged non-exhaustive on this image").
578    pub fn from_lvis_json_bytes(bytes: &[u8]) -> Result<Self, EvalError> {
579        let raw: LvisJson = serde_json::from_slice(bytes)?;
580
581        let images: Vec<ImageMeta> = raw
582            .images
583            .iter()
584            .map(|im| ImageMeta {
585                id: im.id,
586                width: im.width,
587                height: im.height,
588                file_name: im.file_name.clone(),
589            })
590            .collect();
591        let categories: Vec<CategoryMeta> = raw
592            .categories
593            .iter()
594            .map(|c| CategoryMeta {
595                id: c.id,
596                name: c.name.clone(),
597                supercategory: c.supercategory.clone(),
598            })
599            .collect();
600
601        // AB6 (corrected): collect all categories missing `frequency`
602        // and raise once with the full list. Sorted ascending for
603        // stable error messages.
604        let mut missing_freq: Vec<i64> = raw
605            .categories
606            .iter()
607            .filter(|c| c.frequency.is_none())
608            .map(|c| c.id.0)
609            .collect();
610        if !missing_freq.is_empty() {
611            missing_freq.sort_unstable();
612            return Err(EvalError::MissingFrequency {
613                category_ids: missing_freq,
614            });
615        }
616        let category_frequency: HashMap<CategoryId, Frequency> = raw
617            .categories
618            .iter()
619            .filter_map(|c| c.frequency.map(|f| (c.id, f)))
620            .collect();
621
622        // Build the dataset spine via the existing constructor — that
623        // gives us the ref-integrity validation (J5 / AG1) for free.
624        let mut dataset = Self::from_parts(images, raw.annotations, categories)?;
625
626        // AA1: derive pos[I] from GTs. Defaults each image to an empty
627        // set so callers can ask without special-casing.
628        let mut pos: HashMap<ImageId, HashSet<CategoryId>> =
629            HashMap::with_capacity(raw.images.len());
630        for im in &raw.images {
631            pos.entry(im.id).or_default();
632        }
633        for ann in dataset.annotations.iter() {
634            pos.entry(ann.image_id).or_default().insert(ann.category_id);
635        }
636
637        // Project explicit `neg` / `not_exhaustive` fields onto sets;
638        // treat absent / empty as the empty set.
639        let mut neg: HashMap<ImageId, HashSet<CategoryId>> =
640            HashMap::with_capacity(raw.images.len());
641        let mut nel: HashMap<ImageId, HashSet<CategoryId>> =
642            HashMap::with_capacity(raw.images.len());
643        for im in &raw.images {
644            let neg_set: HashSet<CategoryId> = im
645                .neg_category_ids
646                .as_deref()
647                .unwrap_or(&[])
648                .iter()
649                .copied()
650                .collect();
651            let nel_set: HashSet<CategoryId> = im
652                .not_exhaustive_category_ids
653                .as_deref()
654                .unwrap_or(&[])
655                .iter()
656                .copied()
657                .collect();
658            neg.insert(im.id, neg_set);
659            nel.insert(im.id, nel_set);
660        }
661
662        // AA7 (corrected): disjointness validation.
663        for im in &raw.images {
664            let image_id = im.id;
665            let pos_i = pos.get(&image_id).map_or_else(HashSet::new, Clone::clone);
666            let neg_i = &neg[&image_id];
667            let nel_i = &nel[&image_id];
668
669            // pos ∩ neg: a category with GT on this image cannot also
670            // be in neg.
671            if let Some(c) = pos_i.intersection(neg_i).next().copied() {
672                return Err(EvalError::LvisFederatedConflict {
673                    image_id: image_id.0,
674                    category_id: c.0,
675                    detail: "category has GT on image but is also in neg_category_ids",
676                });
677            }
678            // not_exhaustive ⊆ pos: by spec.
679            if let Some(c) = nel_i.difference(&pos_i).next().copied() {
680                return Err(EvalError::LvisFederatedConflict {
681                    image_id: image_id.0,
682                    category_id: c.0,
683                    detail:
684                        "category in not_exhaustive_category_ids but not in pos (no GT on image)",
685                });
686            }
687            // not_exhaustive ∩ neg: implied by the first two but
688            // checked explicitly so a malformed JSON gets the most
689            // direct error.
690            if let Some(c) = nel_i.intersection(neg_i).next().copied() {
691                return Err(EvalError::LvisFederatedConflict {
692                    image_id: image_id.0,
693                    category_id: c.0,
694                    detail: "category in both not_exhaustive_category_ids and neg_category_ids",
695                });
696            }
697        }
698
699        dataset.federated = Some(FederatedMetadata {
700            pos_category_ids: pos,
701            neg_category_ids: neg,
702            not_exhaustive_category_ids: nel,
703            category_frequency,
704        });
705        Ok(dataset)
706    }
707
708    /// LVIS federated metadata bundle. `Some` only when the dataset
709    /// was built by [`Self::from_lvis_json_bytes`]; the orchestrator's
710    /// AA3/AA4 branches gate on this.
711    pub fn federated(&self) -> Option<&FederatedMetadata> {
712        self.federated.as_ref()
713    }
714
715    /// Per-image positive-category set, derived from GTs at load time
716    /// (quirk **AA1**). `Some` only when the dataset is federated.
717    pub fn pos_category_ids(&self) -> Option<&HashMap<ImageId, HashSet<CategoryId>>> {
718        self.federated.as_ref().map(|f| &f.pos_category_ids)
719    }
720
721    /// Per-image negative-category set, read verbatim from the LVIS
722    /// JSON (quirk **AA2**). `Some` only when the dataset is federated.
723    pub fn neg_category_ids(&self) -> Option<&HashMap<ImageId, HashSet<CategoryId>>> {
724        self.federated.as_ref().map(|f| &f.neg_category_ids)
725    }
726
727    /// Per-image not-exhaustive-category set, read verbatim from the
728    /// LVIS JSON (quirk **AA3**). `Some` only when the dataset is
729    /// federated.
730    pub fn not_exhaustive_category_ids(&self) -> Option<&HashMap<ImageId, HashSet<CategoryId>>> {
731        self.federated
732            .as_ref()
733            .map(|f| &f.not_exhaustive_category_ids)
734    }
735
736    /// Per-category frequency tag, read verbatim from the LVIS JSON
737    /// (quirk **AB1**). `Some` only when the dataset is federated;
738    /// missing-on-some-categories inputs are rejected at load
739    /// (quirk **AB6**).
740    pub fn category_frequency(&self) -> Option<&HashMap<CategoryId, Frequency>> {
741        self.federated.as_ref().map(|f| &f.category_frequency)
742    }
743
744    /// `true` when the dataset carries LVIS federated metadata.
745    /// Cheap shortcut for orchestration code that gates behaviour on
746    /// the federated flag.
747    pub fn is_federated(&self) -> bool {
748        self.federated.is_some()
749    }
750
751    /// Round-trips the dataset to the on-disk JSON shape, preserving
752    /// every field vernier carries. Useful for fixture authoring and
753    /// for debugging serde mismatches.
754    ///
755    /// LVIS federated metadata is **not** included in the output —
756    /// the round trip targets the COCO schema only. Callers needing
757    /// to round-trip LVIS JSON must use the source bytes directly.
758    pub fn to_json_value(&self) -> CocoJson {
759        CocoJson {
760            images: (*self.images).clone(),
761            annotations: (*self.annotations).clone(),
762            categories: (*self.categories).clone(),
763        }
764    }
765}
766
767impl EvalDataset for CocoDataset {
768    type Annotation = CocoAnnotation;
769
770    fn images(&self) -> &[ImageMeta] {
771        &self.images
772    }
773
774    fn categories(&self) -> &[CategoryMeta] {
775        &self.categories
776    }
777
778    fn annotations(&self) -> &[CocoAnnotation] {
779        &self.annotations
780    }
781
782    fn ann_indices_for_image(&self, image_id: ImageId) -> &[usize] {
783        self.by_image.get(&image_id).map_or(&[][..], Vec::as_slice)
784    }
785
786    fn ann_indices_for_category(&self, cat_id: CategoryId) -> &[usize] {
787        self.by_category.get(&cat_id).map_or(&[][..], Vec::as_slice)
788    }
789}
790
791impl CocoDataset {
792    /// Indices into [`Self::annotations`] for a given `(image, category)`
793    /// cell. Empty when no GT of that category exists on that image.
794    pub fn ann_indices_for(&self, image: ImageId, cat: CategoryId) -> &[usize] {
795        self.by_image_cat
796            .get(&(image, cat))
797            .map_or(&[][..], Vec::as_slice)
798    }
799}
800
801// ---------------------------------------------------------------------------
802// dataset_hash — canonical-form fingerprint for ADR-0031 partials.
803//
804// The hash is the BLAKE3 digest of a deterministic byte stream built
805// from the dataset's images + categories + annotations + federated
806// metadata. Independent of input order: each section is sorted by id
807// before hashing. The canonical form is the load-bearing wire-format
808// invariant that makes "this partial was computed against the same GT
809// I have" a strict, refusable check; format_version bumps when the
810// canonical form changes (per ADR-0031 §"Wire format" backward-compat
811// rules).
812//
813// Domain separators (4-byte ASCII tags) precede each section so a
814// rearranged stream cannot collide with the canonical one.
815// ---------------------------------------------------------------------------
816
817/// Domain-separated section tag for the canonical-form stream.
818const HASH_TAG_DATASET: &[u8; 4] = b"DSET";
819const HASH_TAG_IMAGES: &[u8; 4] = b"IMGS";
820const HASH_TAG_CATEGORIES: &[u8; 4] = b"CATS";
821const HASH_TAG_ANNOTATIONS: &[u8; 4] = b"ANNS";
822const HASH_TAG_FEDERATED: &[u8; 4] = b"FEDM";
823
824/// Bumped when the canonical-form layout changes. Read into the
825/// stream once, before any section, so a v1 hash can never collide
826/// with a v2 hash even on identical underlying data.
827const HASH_CANONICAL_VERSION: u8 = 1;
828
829#[inline]
830fn hash_u8(h: &mut blake3::Hasher, v: u8) {
831    h.update(&[v]);
832}
833#[inline]
834fn hash_u32(h: &mut blake3::Hasher, v: u32) {
835    h.update(&v.to_le_bytes());
836}
837#[inline]
838fn hash_i64(h: &mut blake3::Hasher, v: i64) {
839    h.update(&v.to_le_bytes());
840}
841#[inline]
842fn hash_u64(h: &mut blake3::Hasher, v: u64) {
843    h.update(&v.to_le_bytes());
844}
845#[inline]
846fn hash_f64(h: &mut blake3::Hasher, v: f64) {
847    // Bit-exact representation; canonical for finite values. NaN
848    // payloads matter (two NaNs with different bits hash differently);
849    // the dataset loader rejects non-finite area / bbox / keypoints
850    // upstream so the surface here is f64s the user actually trusts.
851    h.update(&v.to_bits().to_le_bytes());
852}
853#[inline]
854fn hash_bool(h: &mut blake3::Hasher, v: bool) {
855    hash_u8(h, u8::from(v));
856}
857#[inline]
858fn hash_bytes(h: &mut blake3::Hasher, bytes: &[u8]) {
859    hash_u64(h, bytes.len() as u64);
860    h.update(bytes);
861}
862#[inline]
863fn hash_string(h: &mut blake3::Hasher, s: &str) {
864    hash_bytes(h, s.as_bytes());
865}
866#[inline]
867fn hash_option<T>(
868    h: &mut blake3::Hasher,
869    opt: Option<T>,
870    write: impl FnOnce(&mut blake3::Hasher, T),
871) {
872    match opt {
873        None => hash_u8(h, 0),
874        Some(v) => {
875            hash_u8(h, 1);
876            write(h, v);
877        }
878    }
879}
880
881fn hash_bbox(h: &mut blake3::Hasher, b: &Bbox) {
882    hash_f64(h, b.x);
883    hash_f64(h, b.y);
884    hash_f64(h, b.w);
885    hash_f64(h, b.h);
886}
887
888fn hash_segmentation(h: &mut blake3::Hasher, seg: Option<&Segmentation>) {
889    match seg {
890        None => hash_u8(h, 0),
891        Some(Segmentation::Polygons(polys)) => {
892            hash_u8(h, 1);
893            hash_u64(h, polys.len() as u64);
894            for poly in polys {
895                hash_u64(h, poly.len() as u64);
896                for &v in poly {
897                    hash_f64(h, v);
898                }
899            }
900        }
901        Some(Segmentation::Rle(rle)) => {
902            let [rh, rw] = rle.size;
903            match &rle.counts {
904                SegmentationRleCounts::Compressed(s) => {
905                    hash_u8(h, 2);
906                    hash_u32(h, rh);
907                    hash_u32(h, rw);
908                    hash_string(h, s);
909                }
910                SegmentationRleCounts::Uncompressed(counts) => {
911                    hash_u8(h, 3);
912                    hash_u32(h, rh);
913                    hash_u32(h, rw);
914                    hash_u64(h, counts.len() as u64);
915                    for &c in counts.iter() {
916                        hash_u32(h, c);
917                    }
918                }
919            }
920        }
921    }
922}
923
924/// Walk a slice in id-sorted order, prefixed by a domain tag and the
925/// element count, hashing each element via `write`. The id projection
926/// (`key`) returns the i64 id; sort is by that key, unstable (ids are
927/// unique by construction). Avoids materializing a fresh sorted Vec
928/// of items by sorting an index permutation instead.
929fn hash_id_sorted<T>(
930    h: &mut blake3::Hasher,
931    tag: &[u8; 4],
932    items: &[T],
933    key: impl Fn(&T) -> i64,
934    write: impl Fn(&mut blake3::Hasher, &T),
935) {
936    h.update(tag);
937    let mut order: Vec<usize> = (0..items.len()).collect();
938    order.sort_unstable_by_key(|&i| key(&items[i]));
939    hash_u64(h, order.len() as u64);
940    for &i in &order {
941        write(h, &items[i]);
942    }
943}
944
945fn hash_image_meta(h: &mut blake3::Hasher, im: &ImageMeta) {
946    let ImageMeta {
947        id,
948        width,
949        height,
950        file_name,
951    } = im;
952    hash_i64(h, id.0);
953    hash_u32(h, *width);
954    hash_u32(h, *height);
955    hash_option(h, file_name.as_deref(), hash_string);
956}
957
958fn hash_category_meta(h: &mut blake3::Hasher, c: &CategoryMeta) {
959    let CategoryMeta {
960        id,
961        name,
962        supercategory,
963    } = c;
964    hash_i64(h, id.0);
965    hash_string(h, name);
966    hash_option(h, supercategory.as_deref(), hash_string);
967}
968
969fn hash_coco_annotation(h: &mut blake3::Hasher, a: &CocoAnnotation) {
970    // Exhaustive destructure: adding a field to CocoAnnotation is a
971    // compile error here, forcing the canonical form to stay in sync.
972    let CocoAnnotation {
973        id,
974        image_id,
975        category_id,
976        area,
977        is_crowd,
978        ignore_flag,
979        bbox,
980        segmentation,
981        keypoints,
982        num_keypoints,
983    } = a;
984    hash_i64(h, id.0);
985    hash_i64(h, image_id.0);
986    hash_i64(h, category_id.0);
987    hash_f64(h, *area);
988    hash_bool(h, *is_crowd);
989    hash_option(h, *ignore_flag, hash_bool);
990    hash_bbox(h, bbox);
991    hash_segmentation(h, segmentation.as_ref());
992    hash_option(h, keypoints.as_deref(), |h, kps| {
993        hash_u64(h, kps.len() as u64);
994        for &v in kps {
995            hash_f64(h, v);
996        }
997    });
998    hash_option(h, *num_keypoints, hash_u32);
999}
1000
1001fn hash_federated(h: &mut blake3::Hasher, fed: &FederatedMetadata) {
1002    h.update(HASH_TAG_FEDERATED);
1003
1004    // category_frequency: sort by category id, write (id, letter byte).
1005    let mut freq_pairs: Vec<(i64, &Frequency)> = fed
1006        .category_frequency
1007        .iter()
1008        .map(|(k, v)| (k.0, v))
1009        .collect();
1010    freq_pairs.sort_unstable_by_key(|(k, _)| *k);
1011    hash_u64(h, freq_pairs.len() as u64);
1012    for (cid, freq) in freq_pairs {
1013        hash_i64(h, cid);
1014        // `as_letter` is a single ASCII char; one byte is enough.
1015        hash_u8(h, freq.as_letter().as_bytes()[0]);
1016    }
1017
1018    // pos / neg / not_exhaustive: each is HashMap<ImageId, HashSet<CategoryId>>.
1019    // Hash all three sections via the same canonical form: sort by image id,
1020    // then for each image sort the category ids ascending and write count + ids.
1021    type FedSection<'a> = (&'a [u8; 3], &'a HashMap<ImageId, HashSet<CategoryId>>);
1022    let sections: [FedSection<'_>; 3] = [
1023        (b"POS", &fed.pos_category_ids),
1024        (b"NEG", &fed.neg_category_ids),
1025        (b"NEX", &fed.not_exhaustive_category_ids),
1026    ];
1027    for (tag, map) in sections {
1028        h.update(tag);
1029        let mut entries: Vec<(i64, Vec<i64>)> = map
1030            .iter()
1031            .map(|(image_id, cats)| {
1032                let mut cat_ids: Vec<i64> = cats.iter().map(|c| c.0).collect();
1033                cat_ids.sort_unstable();
1034                (image_id.0, cat_ids)
1035            })
1036            .collect();
1037        entries.sort_unstable_by_key(|(image_id, _)| *image_id);
1038        hash_u64(h, entries.len() as u64);
1039        for (image_id, cat_ids) in entries {
1040            hash_i64(h, image_id);
1041            hash_u64(h, cat_ids.len() as u64);
1042            for cid in cat_ids {
1043                hash_i64(h, cid);
1044            }
1045        }
1046    }
1047}
1048
1049impl CocoDataset {
1050    /// 32-byte BLAKE3 fingerprint of this dataset's canonical form.
1051    /// Stable across input orderings: images, categories, annotations
1052    /// are sorted by id before hashing. Lazily cached on first call;
1053    /// shared across [`Clone`]s via the underlying `Arc<OnceLock>`.
1054    ///
1055    /// Carried in distributed-eval partial headers (ADR-0031); a
1056    /// receiving rank refuses to merge partials whose `dataset_hash`
1057    /// disagrees with its live dataset's.
1058    pub fn dataset_hash(&self) -> [u8; 32] {
1059        *self.cached_hash.get_or_init(|| self.compute_dataset_hash())
1060    }
1061
1062    fn compute_dataset_hash(&self) -> [u8; 32] {
1063        let mut h = blake3::Hasher::new();
1064        h.update(HASH_TAG_DATASET);
1065        hash_u8(&mut h, HASH_CANONICAL_VERSION);
1066
1067        hash_id_sorted(
1068            &mut h,
1069            HASH_TAG_IMAGES,
1070            &self.images,
1071            |im| im.id.0,
1072            hash_image_meta,
1073        );
1074        hash_id_sorted(
1075            &mut h,
1076            HASH_TAG_CATEGORIES,
1077            &self.categories,
1078            |c| c.id.0,
1079            hash_category_meta,
1080        );
1081        hash_id_sorted(
1082            &mut h,
1083            HASH_TAG_ANNOTATIONS,
1084            &self.annotations,
1085            |a| a.id.0,
1086            hash_coco_annotation,
1087        );
1088
1089        // Federated metadata, when present (LVIS path).
1090        match self.federated.as_ref() {
1091            None => hash_u8(&mut h, 0),
1092            Some(fed) => {
1093                hash_u8(&mut h, 1);
1094                hash_federated(&mut h, fed);
1095            }
1096        }
1097
1098        *h.finalize().as_bytes()
1099    }
1100}
1101
1102// ---------------------------------------------------------------------------
1103// detections (DT side)
1104// ---------------------------------------------------------------------------
1105
1106/// One COCO detection record (the DT side, what `loadRes` consumes).
1107///
1108/// Per the dispositions in this module's header:
1109///
1110/// - `is_crowd` does not exist as a field — quirks **E2 / J4**.
1111/// - `area` is derived from `bbox` at construction (`bbox.w * bbox.h`) —
1112///   quirk **J3**.
1113/// - `id` is honored when the user supplies one and auto-assigned
1114///   otherwise — quirk **J1** (`aligned`, an opinionated improvement
1115///   over pycocotools' silent overwrite).
1116#[derive(Debug, Clone, PartialEq)]
1117pub struct CocoDetection {
1118    /// Detection id. Either user-supplied (J1) or auto-assigned by
1119    /// [`CocoDetections::from_inputs`].
1120    pub id: AnnId,
1121    /// Image this detection is on.
1122    pub image_id: ImageId,
1123    /// Category this detection predicts.
1124    pub category_id: CategoryId,
1125    /// Confidence score. Sort key for the matching engine.
1126    pub score: f64,
1127    /// Bounding box (`(x, y, w, h)`).
1128    pub bbox: Bbox,
1129    /// Pixel area, derived from `bbox` per quirk **J3**.
1130    pub area: f64,
1131    /// Segmentation prediction, when the detector emits one. `None`
1132    /// for bbox-only detectors. Parity dispositions match
1133    /// [`CocoAnnotation::segmentation`].
1134    pub segmentation: Option<Segmentation>,
1135    /// Flat keypoint triplets `[x_1, y_1, v_1, x_2, y_2, v_2, ...]`
1136    /// (per ADR-0012). `None` for bbox-/segm-only detectors; the eval
1137    /// pipeline raises [`EvalError::InvalidAnnotation`] when a DT is
1138    /// missing keypoints under `iouType="keypoints"`.
1139    pub keypoints: Option<Vec<f64>>,
1140    /// COCO `num_keypoints` count of *visible* keypoints. On DT this
1141    /// field is not required (pycocotools never reads it); the OKS
1142    /// pipeline derives it from `keypoints` when needed. Tracked here
1143    /// for shape-parity with [`CocoAnnotation::num_keypoints`].
1144    pub num_keypoints: Option<u32>,
1145}
1146
1147impl Annotation for CocoDetection {
1148    fn image_id(&self) -> ImageId {
1149        self.image_id
1150    }
1151    fn category_id(&self) -> CategoryId {
1152        self.category_id
1153    }
1154    fn area(&self) -> f64 {
1155        self.area
1156    }
1157    fn is_crowd(&self) -> bool {
1158        false
1159    }
1160    fn effective_ignore(&self, _: ParityMode) -> bool {
1161        false
1162    }
1163}
1164
1165/// Caller-side input for one detection. Mirrors the shape of a single
1166/// entry of a COCO results JSON array but uses typed ids.
1167#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
1168pub struct DetectionInput {
1169    /// Optional user-supplied id (quirk **J1**). Absent → auto-assigned.
1170    #[serde(default)]
1171    pub id: Option<AnnId>,
1172    /// Image id.
1173    pub image_id: ImageId,
1174    /// Category id.
1175    pub category_id: CategoryId,
1176    /// Confidence score.
1177    pub score: f64,
1178    /// Bounding box.
1179    pub bbox: Bbox,
1180    /// Optional segmentation prediction. `None` for bbox-only
1181    /// detectors. Stored verbatim and normalized via
1182    /// [`Segmentation::to_rle`] at eval time.
1183    #[serde(default, skip_serializing_if = "Option::is_none")]
1184    pub segmentation: Option<Segmentation>,
1185    /// Optional keypoint prediction (flat `[x, y, v, ...]` triplets,
1186    /// per ADR-0012). `None` for non-keypoint detectors.
1187    #[serde(default, skip_serializing_if = "Option::is_none")]
1188    pub keypoints: Option<Vec<f64>>,
1189    /// Optional `num_keypoints` count. The OKS path derives this from
1190    /// `keypoints` when absent (DT side does not require it).
1191    #[serde(default, skip_serializing_if = "Option::is_none")]
1192    pub num_keypoints: Option<u32>,
1193}
1194
1195/// COCO detections collection — flat storage plus `(image, category)`-
1196/// and per-image indices for the per-cell gather.
1197#[derive(Debug, Clone)]
1198pub struct CocoDetections {
1199    detections: Arc<Vec<CocoDetection>>,
1200    by_image_cat: HashMap<(ImageId, CategoryId), Vec<usize>>,
1201    by_image: HashMap<ImageId, Vec<usize>>,
1202}
1203
1204impl CocoDetections {
1205    /// Loads detections from the JSON array shape pycocotools'
1206    /// `loadRes` consumes (a list of objects with `image_id`,
1207    /// `category_id`, `bbox`, `score`, optional `id`).
1208    ///
1209    /// `iscrowd` and `area` fields, if present, are silently dropped:
1210    /// quirks **E2/J4** force `is_crowd=0` and quirk **J3** derives
1211    /// `area` from `bbox`.
1212    pub fn from_json_bytes(bytes: &[u8]) -> Result<Self, EvalError> {
1213        let raw: Vec<DetectionInput> = serde_json::from_slice(bytes)?;
1214        Self::from_inputs(raw)
1215    }
1216
1217    /// Builds a [`CocoDetections`] from typed inputs. Auto-assigns ids
1218    /// (quirk **J1**) for inputs that did not supply one, validates
1219    /// finite scores, and derives areas (quirk **J3**).
1220    pub fn from_inputs(inputs: Vec<DetectionInput>) -> Result<Self, EvalError> {
1221        let mut detections = Vec::with_capacity(inputs.len());
1222        let mut next_auto = 1i64;
1223        for input in inputs {
1224            if !input.score.is_finite() {
1225                return Err(EvalError::NonFinite {
1226                    context: "detection score",
1227                });
1228            }
1229            let id = match input.id {
1230                Some(id) => id,
1231                None => {
1232                    let id = AnnId(next_auto);
1233                    next_auto += 1;
1234                    id
1235                }
1236            };
1237            detections.push(CocoDetection {
1238                id,
1239                image_id: input.image_id,
1240                category_id: input.category_id,
1241                score: input.score,
1242                bbox: input.bbox,
1243                area: input.bbox.w * input.bbox.h,
1244                segmentation: input.segmentation,
1245                keypoints: input.keypoints,
1246                num_keypoints: input.num_keypoints,
1247            });
1248        }
1249
1250        let mut by_image_cat: HashMap<(ImageId, CategoryId), Vec<usize>> = HashMap::new();
1251        let mut by_image: HashMap<ImageId, Vec<usize>> = HashMap::new();
1252        for (idx, dt) in detections.iter().enumerate() {
1253            by_image_cat
1254                .entry((dt.image_id, dt.category_id))
1255                .or_default()
1256                .push(idx);
1257            by_image.entry(dt.image_id).or_default().push(idx);
1258        }
1259
1260        Ok(Self {
1261            detections: Arc::new(detections),
1262            by_image_cat,
1263            by_image,
1264        })
1265    }
1266
1267    /// Build from already-resolved records, preserving their ids and
1268    /// fields verbatim. Used by the streaming evaluator to assemble a
1269    /// `CocoDetections` view across batches at finalize/snapshot time
1270    /// without re-running the auto-id and area-derivation logic in
1271    /// [`Self::from_inputs`].
1272    pub fn from_records(records: Vec<CocoDetection>) -> Self {
1273        let mut by_image_cat: HashMap<(ImageId, CategoryId), Vec<usize>> = HashMap::new();
1274        let mut by_image: HashMap<ImageId, Vec<usize>> = HashMap::new();
1275        for (idx, dt) in records.iter().enumerate() {
1276            by_image_cat
1277                .entry((dt.image_id, dt.category_id))
1278                .or_default()
1279                .push(idx);
1280            by_image.entry(dt.image_id).or_default().push(idx);
1281        }
1282        Self {
1283            detections: Arc::new(records),
1284            by_image_cat,
1285            by_image,
1286        }
1287    }
1288
1289    /// Flat slice of every detection.
1290    pub fn detections(&self) -> &[CocoDetection] {
1291        &self.detections
1292    }
1293
1294    /// Indices into [`Self::detections`] for one `(image, category)`
1295    /// cell. Empty slice when the cell is empty (no detections of that
1296    /// category on that image).
1297    pub fn indices_for(&self, image: ImageId, cat: CategoryId) -> &[usize] {
1298        self.by_image_cat
1299            .get(&(image, cat))
1300            .map_or(&[][..], Vec::as_slice)
1301    }
1302
1303    /// Indices into [`Self::detections`] for every detection on an
1304    /// image, regardless of category. Path used when `useCats=false`
1305    /// (quirk **L4**).
1306    pub fn indices_for_image(&self, image: ImageId) -> &[usize] {
1307        self.by_image.get(&image).map_or(&[][..], Vec::as_slice)
1308    }
1309
1310    /// LVIS per-image top-`max_dets` trim (quirk **AC2** of ADR-0026).
1311    ///
1312    /// Mirrors `LVISResults.limit_dets_per_image` at
1313    /// `lvis/results.py:73-84`: groups detections by `image_id`,
1314    /// sorts each group by score descending (stable — quirk
1315    /// **AC4**), and keeps the top `max_dets` across **all
1316    /// categories combined**. The cross-class consequence (quirk
1317    /// **AC3**): 250 cat-A + 350 cat-B detections on one image trim
1318    /// to **300 total**, not 250 + min(350, 300).
1319    ///
1320    /// `max_dets < 0` (or `i64::MIN`) disables the trim entirely
1321    /// (quirk **AC5**, mirroring the `if max_dets >= 0` guard at
1322    /// `results.py:39-40`). `max_dets == 0` keeps zero detections —
1323    /// edge case the upstream allows but isn't useful in practice.
1324    ///
1325    /// The output preserves DT ids and per-detection fields verbatim;
1326    /// only the membership of the flat detections vector and the
1327    /// per-cell index maps change. The original [`CocoDetections`] is
1328    /// untouched (the inner `Arc<Vec<CocoDetection>>` is *not* shared
1329    /// with the result — the trim copies the surviving entries into a
1330    /// fresh allocation).
1331    ///
1332    /// Within each image's group, ties on `score` resolve in input
1333    /// order: Rust's `slice::sort_by` is stable, matching Python's
1334    /// `sorted(_, reverse=True)` Timsort behavior. The fact that the
1335    /// matching path's `argsort_score_desc` is *also* stable
1336    /// (`np.argsort(-scores, kind="mergesort")`, AC4) is a separate
1337    /// invariant — vernier's parity claim covers both sites.
1338    pub fn lvis_trim(&self, max_dets: i64) -> CocoDetections {
1339        if max_dets < 0 {
1340            // AC5: negative cap disables the trim. Cheap clone — the
1341            // detections `Arc` is shared, only the index maps allocate.
1342            return self.clone();
1343        }
1344        let cap = max_dets as usize;
1345        let mut by_image_groups: HashMap<ImageId, Vec<usize>> = HashMap::new();
1346        for (idx, dt) in self.detections.iter().enumerate() {
1347            by_image_groups.entry(dt.image_id).or_default().push(idx);
1348        }
1349        // Iterate images in id-ascending order so the output's flat
1350        // detections vector is deterministic — the LVIS oracle's
1351        // `LVISResults.dataset['annotations']` is a dict-iteration
1352        // order (image insertion order), which Python's `dict` keeps
1353        // stable since 3.7. Rebuilding the order from id-ascending
1354        // here matches the shape vernier's later FFI consumers
1355        // expect; the per-image trim itself is order-invariant.
1356        let mut image_ids: Vec<ImageId> = by_image_groups.keys().copied().collect();
1357        image_ids.sort_unstable_by_key(|i| i.0);
1358
1359        // Tight upper bound on the post-trim count: input length is
1360        // always an upper bound on the result, and `cap * n_images`
1361        // only beats it when the input is dense enough to hit the
1362        // cap on every image. Take the smaller of the two so we
1363        // never over-allocate by a factor of 5x on typical evals
1364        // (most images carry far fewer than `max_dets` detections).
1365        let upper_bound = self
1366            .detections
1367            .len()
1368            .min(cap.saturating_mul(image_ids.len()));
1369        let mut out: Vec<CocoDetection> = Vec::with_capacity(upper_bound);
1370        for image_id in image_ids {
1371            let mut group = by_image_groups.remove(&image_id).unwrap_or_default();
1372            // Stable sort by score descending. `partial_cmp` returns
1373            // `None` only on NaN; `from_inputs` rejects NaN scores
1374            // upstream (quirk **AD3** corrected), so `Equal` is the
1375            // only fallback we need to consider.
1376            group.sort_by(|&a, &b| {
1377                self.detections[b]
1378                    .score
1379                    .partial_cmp(&self.detections[a].score)
1380                    .unwrap_or(std::cmp::Ordering::Equal)
1381            });
1382            for &idx in group.iter().take(cap) {
1383                out.push(self.detections[idx].clone());
1384            }
1385        }
1386        CocoDetections::from_records(out)
1387    }
1388}
1389
1390// ---------------------------------------------------------------------------
1391// serde glue
1392// ---------------------------------------------------------------------------
1393
1394/// COCO JSON uses `0`/`1` ints for `iscrowd` / `ignore`, but a
1395/// permissive reader also accepts bool literals. Shared between the
1396/// required and optional flag deserializers below.
1397#[derive(Deserialize)]
1398#[serde(untagged)]
1399enum BoolOrInt {
1400    Bool(bool),
1401    Int(i64),
1402}
1403
1404impl BoolOrInt {
1405    fn into_bool<E: serde::de::Error>(self) -> Result<bool, E> {
1406        match self {
1407            Self::Bool(b) => Ok(b),
1408            Self::Int(0) => Ok(false),
1409            Self::Int(1) => Ok(true),
1410            Self::Int(other) => Err(E::custom(format!(
1411                "expected 0 or 1 for COCO bool field, got {other}"
1412            ))),
1413        }
1414    }
1415}
1416
1417fn deserialize_bool_int<'de, D>(de: D) -> Result<bool, D::Error>
1418where
1419    D: serde::Deserializer<'de>,
1420{
1421    BoolOrInt::deserialize(de)?.into_bool()
1422}
1423
1424fn deserialize_opt_bool_int<'de, D>(de: D) -> Result<Option<bool>, D::Error>
1425where
1426    D: serde::Deserializer<'de>,
1427{
1428    Option::<BoolOrInt>::deserialize(de)?
1429        .map(BoolOrInt::into_bool)
1430        .transpose()
1431}
1432
1433#[cfg(test)]
1434mod tests {
1435    use super::*;
1436    use proptest::prelude::*;
1437
1438    const CROWD_REGION_GT: &str = r#"{
1439        "images": [
1440            {"id": 1, "width": 200, "height": 200, "file_name": "img1.png"}
1441        ],
1442        "annotations": [
1443            {"id": 1, "image_id": 1, "category_id": 1,
1444             "bbox": [100, 100, 50, 50], "area": 2500, "iscrowd": 0},
1445            {"id": 2, "image_id": 1, "category_id": 1,
1446             "bbox": [0, 0, 200, 200], "area": 40000, "iscrowd": 1}
1447        ],
1448        "categories": [
1449            {"id": 1, "name": "widget", "supercategory": "thing"}
1450        ]
1451    }"#;
1452
1453    fn load_crowd_region() -> CocoDataset {
1454        CocoDataset::from_json_bytes(CROWD_REGION_GT.as_bytes()).unwrap()
1455    }
1456
1457    #[test]
1458    fn loads_crowd_region_fixture() {
1459        let ds = load_crowd_region();
1460        assert_eq!(ds.images().len(), 1);
1461        assert_eq!(ds.categories().len(), 1);
1462        assert_eq!(ds.annotations().len(), 2);
1463        assert_eq!(ds.images()[0].file_name.as_deref(), Some("img1.png"));
1464        assert_eq!(ds.categories()[0].name, "widget");
1465    }
1466
1467    #[test]
1468    fn by_image_index_returns_both_anns() {
1469        let ds = load_crowd_region();
1470        let idxs = ds.ann_indices_for_image(ImageId(1));
1471        assert_eq!(idxs.len(), 2);
1472        let anns: Vec<_> = ds.ann_iter_for_image(ImageId(1)).collect();
1473        assert_eq!(anns.len(), 2);
1474        assert_eq!(anns[0].id, AnnId(1));
1475        assert_eq!(anns[1].id, AnnId(2));
1476    }
1477
1478    #[test]
1479    fn by_category_index_returns_both_anns() {
1480        let ds = load_crowd_region();
1481        let idxs = ds.ann_indices_for_category(CategoryId(1));
1482        assert_eq!(idxs.len(), 2);
1483    }
1484
1485    #[test]
1486    fn unknown_image_returns_empty_slice() {
1487        let ds = load_crowd_region();
1488        assert!(ds.ann_indices_for_image(ImageId(999)).is_empty());
1489        assert!(ds.ann_indices_for_category(CategoryId(999)).is_empty());
1490    }
1491
1492    #[test]
1493    fn empty_image_or_category_returns_empty_slice_not_missing() {
1494        // A dataset with an image that has no annotations: the index
1495        // must be present (empty), so the matching loop can ask
1496        // without special-casing.
1497        const ONLY_EMPTY_IMG: &str = r#"{
1498            "images": [{"id": 7, "width": 1, "height": 1}],
1499            "annotations": [],
1500            "categories": [{"id": 3, "name": "thing"}]
1501        }"#;
1502        let ds = CocoDataset::from_json_bytes(ONLY_EMPTY_IMG.as_bytes()).unwrap();
1503        assert!(ds.ann_indices_for_image(ImageId(7)).is_empty());
1504        assert!(ds.ann_indices_for_category(CategoryId(3)).is_empty());
1505    }
1506
1507    #[test]
1508    fn rejects_annotation_referencing_unknown_image() {
1509        const BAD: &str = r#"{
1510            "images": [{"id": 1, "width": 10, "height": 10}],
1511            "annotations": [
1512                {"id": 1, "image_id": 99, "category_id": 1,
1513                 "bbox": [0, 0, 1, 1], "area": 1, "iscrowd": 0}
1514            ],
1515            "categories": [{"id": 1, "name": "thing"}]
1516        }"#;
1517        let err = CocoDataset::from_json_bytes(BAD.as_bytes()).unwrap_err();
1518        match err {
1519            EvalError::InvalidAnnotation { detail } => {
1520                assert!(detail.contains("image_id=99"), "msg: {detail}");
1521            }
1522            other => panic!("expected InvalidAnnotation, got {other:?}"),
1523        }
1524    }
1525
1526    #[test]
1527    fn rejects_annotation_referencing_unknown_category() {
1528        const BAD: &str = r#"{
1529            "images": [{"id": 1, "width": 10, "height": 10}],
1530            "annotations": [
1531                {"id": 1, "image_id": 1, "category_id": 42,
1532                 "bbox": [0, 0, 1, 1], "area": 1, "iscrowd": 0}
1533            ],
1534            "categories": [{"id": 1, "name": "thing"}]
1535        }"#;
1536        let err = CocoDataset::from_json_bytes(BAD.as_bytes()).unwrap_err();
1537        match err {
1538            EvalError::InvalidAnnotation { detail } => {
1539                assert!(detail.contains("category_id=42"), "msg: {detail}");
1540            }
1541            other => panic!("expected InvalidAnnotation, got {other:?}"),
1542        }
1543    }
1544
1545    #[test]
1546    fn round_trips_through_json() {
1547        let ds = load_crowd_region();
1548        let json = serde_json::to_string(&ds.to_json_value()).unwrap();
1549        let again = CocoDataset::from_json_bytes(json.as_bytes()).unwrap();
1550        assert_eq!(ds.images(), again.images());
1551        assert_eq!(ds.categories(), again.categories());
1552        assert_eq!(ds.annotations(), again.annotations());
1553    }
1554
1555    // -- Quirk D1: effective_ignore differs by parity mode ----------------
1556
1557    #[test]
1558    fn d1_strict_mode_drops_explicit_ignore_field() {
1559        // Annotation with iscrowd=0 and explicit ignore=1.
1560        // Strict (pycocotools): ignore := iscrowd → false.
1561        // Corrected: respects user's ignore=1 → true.
1562        const ANN_JSON: &str = r#"{
1563            "images": [{"id": 1, "width": 10, "height": 10}],
1564            "annotations": [
1565                {"id": 1, "image_id": 1, "category_id": 1,
1566                 "bbox": [0, 0, 1, 1], "area": 1,
1567                 "iscrowd": 0, "ignore": 1}
1568            ],
1569            "categories": [{"id": 1, "name": "thing"}]
1570        }"#;
1571        let ds = CocoDataset::from_json_bytes(ANN_JSON.as_bytes()).unwrap();
1572        let ann = &ds.annotations()[0];
1573        assert!(!ann.effective_ignore(ParityMode::Strict));
1574        assert!(ann.effective_ignore(ParityMode::Corrected));
1575    }
1576
1577    #[test]
1578    fn d1_strict_mode_uses_iscrowd_when_ignore_absent() {
1579        // Annotation with iscrowd=1 and no ignore field.
1580        // Both modes: ignore = is_crowd = true.
1581        const ANN_JSON: &str = r#"{
1582            "images": [{"id": 1, "width": 10, "height": 10}],
1583            "annotations": [
1584                {"id": 1, "image_id": 1, "category_id": 1,
1585                 "bbox": [0, 0, 1, 1], "area": 1, "iscrowd": 1}
1586            ],
1587            "categories": [{"id": 1, "name": "thing"}]
1588        }"#;
1589        let ds = CocoDataset::from_json_bytes(ANN_JSON.as_bytes()).unwrap();
1590        let ann = &ds.annotations()[0];
1591        assert!(ann.effective_ignore(ParityMode::Strict));
1592        assert!(ann.effective_ignore(ParityMode::Corrected));
1593    }
1594
1595    // -- Per-cell index ((image, category)) -------------------------------
1596
1597    #[test]
1598    fn ann_indices_for_image_cat_returns_correct_subset() {
1599        const TWO_CATS: &str = r#"{
1600            "images": [{"id": 1, "width": 10, "height": 10}],
1601            "annotations": [
1602                {"id": 1, "image_id": 1, "category_id": 1,
1603                 "bbox": [0, 0, 1, 1], "area": 1, "iscrowd": 0},
1604                {"id": 2, "image_id": 1, "category_id": 2,
1605                 "bbox": [0, 0, 1, 1], "area": 1, "iscrowd": 0},
1606                {"id": 3, "image_id": 1, "category_id": 1,
1607                 "bbox": [0, 0, 1, 1], "area": 1, "iscrowd": 0}
1608            ],
1609            "categories": [
1610                {"id": 1, "name": "a"}, {"id": 2, "name": "b"}
1611            ]
1612        }"#;
1613        let ds = CocoDataset::from_json_bytes(TWO_CATS.as_bytes()).unwrap();
1614        let cat1: Vec<AnnId> = ds
1615            .ann_indices_for(ImageId(1), CategoryId(1))
1616            .iter()
1617            .map(|&i| ds.annotations()[i].id)
1618            .collect();
1619        assert_eq!(cat1, vec![AnnId(1), AnnId(3)]);
1620        let cat2: Vec<AnnId> = ds
1621            .ann_indices_for(ImageId(1), CategoryId(2))
1622            .iter()
1623            .map(|&i| ds.annotations()[i].id)
1624            .collect();
1625        assert_eq!(cat2, vec![AnnId(2)]);
1626        assert!(ds.ann_indices_for(ImageId(1), CategoryId(99)).is_empty());
1627        assert!(ds.ann_indices_for(ImageId(99), CategoryId(1)).is_empty());
1628    }
1629
1630    // -- CocoDetections: J1 (auto-id), J3 (area from bbox), validation ----
1631
1632    fn dt_input(image: i64, cat: i64, score: f64, bbox: (f64, f64, f64, f64)) -> DetectionInput {
1633        DetectionInput {
1634            id: None,
1635            image_id: ImageId(image),
1636            category_id: CategoryId(cat),
1637            score,
1638            bbox: Bbox {
1639                x: bbox.0,
1640                y: bbox.1,
1641                w: bbox.2,
1642                h: bbox.3,
1643            },
1644            segmentation: None,
1645            keypoints: None,
1646            num_keypoints: None,
1647        }
1648    }
1649
1650    #[test]
1651    fn j1_auto_assigns_ids_when_absent() {
1652        let dts = CocoDetections::from_inputs(vec![
1653            dt_input(1, 1, 0.9, (0.0, 0.0, 1.0, 1.0)),
1654            dt_input(1, 1, 0.8, (0.0, 0.0, 1.0, 1.0)),
1655        ])
1656        .unwrap();
1657        let ids: Vec<AnnId> = dts.detections().iter().map(|d| d.id).collect();
1658        assert_eq!(ids, vec![AnnId(1), AnnId(2)]);
1659    }
1660
1661    #[test]
1662    fn j1_preserves_user_supplied_ids() {
1663        let mut a = dt_input(1, 1, 0.9, (0.0, 0.0, 1.0, 1.0));
1664        a.id = Some(AnnId(42));
1665        let mut b = dt_input(1, 1, 0.8, (0.0, 0.0, 1.0, 1.0));
1666        b.id = Some(AnnId(7));
1667        let dts = CocoDetections::from_inputs(vec![a, b]).unwrap();
1668        let ids: Vec<AnnId> = dts.detections().iter().map(|d| d.id).collect();
1669        assert_eq!(ids, vec![AnnId(42), AnnId(7)]);
1670    }
1671
1672    #[test]
1673    fn j3_derives_area_from_bbox() {
1674        let dts =
1675            CocoDetections::from_inputs(vec![dt_input(1, 1, 0.5, (10.0, 10.0, 4.0, 5.0))]).unwrap();
1676        assert_eq!(dts.detections()[0].area, 20.0);
1677    }
1678
1679    #[test]
1680    fn rejects_non_finite_score() {
1681        let err = CocoDetections::from_inputs(vec![dt_input(1, 1, f64::NAN, (0.0, 0.0, 1.0, 1.0))])
1682            .unwrap_err();
1683        assert!(matches!(
1684            err,
1685            EvalError::NonFinite {
1686                context: "detection score"
1687            }
1688        ));
1689    }
1690
1691    #[test]
1692    fn detections_indices_per_image_cat() {
1693        let dts = CocoDetections::from_inputs(vec![
1694            dt_input(1, 1, 0.9, (0.0, 0.0, 1.0, 1.0)),
1695            dt_input(1, 2, 0.8, (0.0, 0.0, 1.0, 1.0)),
1696            dt_input(2, 1, 0.7, (0.0, 0.0, 1.0, 1.0)),
1697        ])
1698        .unwrap();
1699        assert_eq!(dts.indices_for(ImageId(1), CategoryId(1)), &[0]);
1700        assert_eq!(dts.indices_for(ImageId(1), CategoryId(2)), &[1]);
1701        assert_eq!(dts.indices_for(ImageId(2), CategoryId(1)), &[2]);
1702        assert!(dts.indices_for(ImageId(99), CategoryId(1)).is_empty());
1703        // Quirk L4 path: indices_for_image returns every category.
1704        let img1: Vec<usize> = dts.indices_for_image(ImageId(1)).to_vec();
1705        assert_eq!(img1, vec![0, 1]);
1706    }
1707
1708    #[test]
1709    fn loads_detections_from_json_array() {
1710        const JSON: &str = r#"[
1711            {"image_id": 1, "category_id": 1, "score": 0.9,
1712             "bbox": [0, 0, 2, 3]},
1713            {"id": 7, "image_id": 1, "category_id": 1, "score": 0.5,
1714             "bbox": [1, 1, 1, 1]}
1715        ]"#;
1716        let dts = CocoDetections::from_json_bytes(JSON.as_bytes()).unwrap();
1717        let ds = dts.detections();
1718        assert_eq!(ds[0].id, AnnId(1)); // auto-assigned
1719        assert_eq!(ds[0].area, 6.0); // J3
1720        assert_eq!(ds[1].id, AnnId(7)); // user-supplied (J1)
1721        assert!(!ds[0].is_crowd()); // E2/J4
1722        assert!(ds[0].segmentation.is_none());
1723    }
1724
1725    // -- Phase 2: segmentation field on GT and DT -----------------------------
1726
1727    #[test]
1728    fn gt_loads_polygon_segmentation() {
1729        const JSON: &str = r#"{
1730            "images": [{"id": 1, "width": 10, "height": 10}],
1731            "annotations": [
1732                {"id": 1, "image_id": 1, "category_id": 1,
1733                 "bbox": [0, 0, 4, 4], "area": 16, "iscrowd": 0,
1734                 "segmentation": [[0, 0, 4, 0, 4, 4, 0, 4]]}
1735            ],
1736            "categories": [{"id": 1, "name": "thing"}]
1737        }"#;
1738        let ds = CocoDataset::from_json_bytes(JSON.as_bytes()).unwrap();
1739        let seg = ds.annotations()[0].segmentation.as_ref().unwrap();
1740        let rle = seg.to_rle(10, 10).unwrap();
1741        assert_eq!(rle.area(), 16);
1742    }
1743
1744    #[test]
1745    fn gt_loads_compressed_rle_segmentation() {
1746        let counts_str = String::from_utf8(vernier_mask::encode_counts(&[0, 16])).unwrap();
1747        let json = format!(
1748            r#"{{
1749            "images": [{{"id": 1, "width": 4, "height": 4}}],
1750            "annotations": [
1751                {{"id": 1, "image_id": 1, "category_id": 1,
1752                 "bbox": [0, 0, 4, 4], "area": 16, "iscrowd": 1,
1753                 "segmentation": {{"size": [4, 4], "counts": "{counts_str}"}}}}
1754            ],
1755            "categories": [{{"id": 1, "name": "thing"}}]
1756        }}"#
1757        );
1758        let ds = CocoDataset::from_json_bytes(json.as_bytes()).unwrap();
1759        let seg = ds.annotations()[0].segmentation.as_ref().unwrap();
1760        let rle = seg.to_rle(4, 4).unwrap();
1761        assert_eq!((rle.h, rle.w), (4, 4));
1762        assert_eq!(rle.area(), 16);
1763    }
1764
1765    #[test]
1766    fn gt_segmentation_round_trips_through_to_json_value() {
1767        const JSON: &str = r#"{
1768            "images": [{"id": 1, "width": 10, "height": 10}],
1769            "annotations": [
1770                {"id": 1, "image_id": 1, "category_id": 1,
1771                 "bbox": [0, 0, 4, 4], "area": 16, "iscrowd": 0,
1772                 "segmentation": [[0, 0, 4, 0, 4, 4, 0, 4]]}
1773            ],
1774            "categories": [{"id": 1, "name": "thing"}]
1775        }"#;
1776        let ds = CocoDataset::from_json_bytes(JSON.as_bytes()).unwrap();
1777        let serialized = serde_json::to_string(&ds.to_json_value()).unwrap();
1778        let again = CocoDataset::from_json_bytes(serialized.as_bytes()).unwrap();
1779        assert_eq!(ds.annotations(), again.annotations());
1780    }
1781
1782    #[test]
1783    fn gt_without_segmentation_field_loads_as_none() {
1784        let ds = load_crowd_region();
1785        assert!(ds.annotations().iter().all(|a| a.segmentation.is_none()));
1786    }
1787
1788    #[test]
1789    fn dt_loads_compressed_rle_segmentation() {
1790        const JSON: &str = r#"[
1791            {"image_id": 1, "category_id": 1, "score": 0.9,
1792             "bbox": [0, 0, 4, 4],
1793             "segmentation": {"size": [4, 4], "counts": "04L4"}}
1794        ]"#;
1795        let dts = CocoDetections::from_json_bytes(JSON.as_bytes()).unwrap();
1796        assert!(dts.detections()[0].segmentation.is_some());
1797    }
1798
1799    #[test]
1800    fn dt_without_segmentation_loads_as_none() {
1801        const JSON: &str = r#"[
1802            {"image_id": 1, "category_id": 1, "score": 0.9, "bbox": [0, 0, 1, 1]}
1803        ]"#;
1804        let dts = CocoDetections::from_json_bytes(JSON.as_bytes()).unwrap();
1805        assert!(dts.detections()[0].segmentation.is_none());
1806    }
1807
1808    // -- Property: index invariants hold across arbitrary datasets --------
1809
1810    fn arb_image() -> impl Strategy<Value = ImageMeta> {
1811        (1i64..1000, 1u32..2048, 1u32..2048).prop_map(|(id, w, h)| ImageMeta {
1812            id: ImageId(id),
1813            width: w,
1814            height: h,
1815            file_name: None,
1816        })
1817    }
1818
1819    fn arb_category() -> impl Strategy<Value = CategoryMeta> {
1820        (1i64..100, "[a-z]{1,8}").prop_map(|(id, name)| CategoryMeta {
1821            id: CategoryId(id),
1822            name,
1823            supercategory: None,
1824        })
1825    }
1826
1827    /// Minimal `CocoAnnotation` with the required ids set and every
1828    /// optional field defaulted. Tests that only care about identity /
1829    /// canonical-form invariance use this to skip the 10-field literal.
1830    fn make_min_annotation(
1831        id: AnnId,
1832        image_id: ImageId,
1833        category_id: CategoryId,
1834    ) -> CocoAnnotation {
1835        CocoAnnotation {
1836            id,
1837            image_id,
1838            category_id,
1839            area: 25.0,
1840            is_crowd: false,
1841            ignore_flag: None,
1842            bbox: Bbox {
1843                x: 0.0,
1844                y: 0.0,
1845                w: 5.0,
1846                h: 5.0,
1847            },
1848            segmentation: None,
1849            keypoints: None,
1850            num_keypoints: None,
1851        }
1852    }
1853
1854    proptest! {
1855        #![proptest_config(ProptestConfig::with_cases(64))]
1856
1857        #[test]
1858        fn index_invariants_hold(
1859            // Generate a small image set, a small category set, and a
1860            // bag of annotations whose (image_id, category_id) pick
1861            // from those sets uniformly. The invariant we check: every
1862            // annotation appears in exactly one by_image bucket and
1863            // exactly one by_category bucket, and no bucket contains a
1864            // stray index.
1865            images in proptest::collection::vec(arb_image(), 1..6),
1866            categories in proptest::collection::vec(arb_category(), 1..6),
1867            n_anns in 0usize..40,
1868            ann_seed in any::<u64>(),
1869        ) {
1870            // De-duplicate ids; HashMaps in `from_parts` collapse them
1871            // anyway and tests should not depend on prop generators
1872            // accidentally minting collisions.
1873            let mut images = images;
1874            images.sort_by_key(|i| i.id);
1875            images.dedup_by_key(|i| i.id);
1876            let mut categories = categories;
1877            categories.sort_by_key(|c| c.id);
1878            categories.dedup_by_key(|c| c.id);
1879
1880            // Cheap deterministic PRNG from ann_seed; avoids pulling
1881            // in `rand` for a single proptest helper.
1882            let mut state = ann_seed.wrapping_add(1);
1883            let mut next = || {
1884                state = state.wrapping_mul(6364136223846793005)
1885                             .wrapping_add(1442695040888963407);
1886                state
1887            };
1888
1889            let mut annotations = Vec::with_capacity(n_anns);
1890            for ann_idx in 0..n_anns {
1891                let img = &images[(next() as usize) % images.len()];
1892                let cat = &categories[(next() as usize) % categories.len()];
1893                annotations.push(CocoAnnotation {
1894                    id: AnnId(ann_idx as i64 + 1),
1895                    image_id: img.id,
1896                    category_id: cat.id,
1897                    area: 1.0,
1898                    is_crowd: false,
1899                    ignore_flag: None,
1900                    bbox: Bbox { x: 0.0, y: 0.0, w: 1.0, h: 1.0 },
1901                    segmentation: None,
1902                    keypoints: None,
1903                    num_keypoints: None,
1904                });
1905            }
1906
1907            let ds = CocoDataset::from_parts(
1908                images.clone(), annotations.clone(), categories.clone()
1909            ).unwrap();
1910
1911            // Every annotation index appears exactly once across all
1912            // by_image buckets and exactly once across all by_category
1913            // buckets.
1914            let mut seen_img: Vec<usize> = images.iter()
1915                .flat_map(|i| ds.ann_indices_for_image(i.id).iter().copied())
1916                .collect();
1917            seen_img.sort_unstable();
1918            let expected: Vec<usize> = (0..annotations.len()).collect();
1919            prop_assert_eq!(&seen_img, &expected);
1920
1921            let mut seen_cat: Vec<usize> = categories.iter()
1922                .flat_map(|c| ds.ann_indices_for_category(c.id).iter().copied())
1923                .collect();
1924            seen_cat.sort_unstable();
1925            prop_assert_eq!(&seen_cat, &expected);
1926
1927            // Cross-check: every index in by_image[i] has image_id == i.
1928            for img in &images {
1929                for &idx in ds.ann_indices_for_image(img.id) {
1930                    prop_assert_eq!(ds.annotations()[idx].image_id, img.id);
1931                }
1932            }
1933            for cat in &categories {
1934                for &idx in ds.ann_indices_for_category(cat.id) {
1935                    prop_assert_eq!(ds.annotations()[idx].category_id, cat.id);
1936                }
1937            }
1938        }
1939    }
1940
1941    // -- ADR-0026: LVIS federated metadata loader -----------------------------
1942
1943    /// Minimal valid LVIS GT: 2 images, 2 categories with frequencies,
1944    /// 1 GT on image 1 (cat 1) and 1 GT on image 2 (cat 2). Image 1
1945    /// has cat 2 in `neg`, image 2 has cat 1 flagged not-exhaustive.
1946    /// Used as the base fixture for the AA1 / AA7 / AB6 tests; the
1947    /// negative tests mutate it to violate one constraint at a time.
1948    const LVIS_MIN_VALID: &str = r#"{
1949        "images": [
1950            {"id": 1, "width": 100, "height": 100,
1951             "neg_category_ids": [2], "not_exhaustive_category_ids": []},
1952            {"id": 2, "width": 100, "height": 100,
1953             "neg_category_ids": [], "not_exhaustive_category_ids": [2]}
1954        ],
1955        "annotations": [
1956            {"id": 1, "image_id": 1, "category_id": 1,
1957             "bbox": [0, 0, 10, 10], "area": 100, "iscrowd": 0},
1958            {"id": 2, "image_id": 2, "category_id": 2,
1959             "bbox": [0, 0, 20, 20], "area": 400, "iscrowd": 0}
1960        ],
1961        "categories": [
1962            {"id": 1, "name": "a", "frequency": "f"},
1963            {"id": 2, "name": "b", "frequency": "r"}
1964        ]
1965    }"#;
1966
1967    #[test]
1968    fn lvis_loads_minimal_valid_dataset() {
1969        let ds = CocoDataset::from_lvis_json_bytes(LVIS_MIN_VALID.as_bytes()).unwrap();
1970        // Spine identical to a COCO load.
1971        assert_eq!(ds.images().len(), 2);
1972        assert_eq!(ds.categories().len(), 2);
1973        assert_eq!(ds.annotations().len(), 2);
1974        // Federated metadata populated.
1975        assert!(ds.is_federated());
1976        let pos = ds.pos_category_ids().unwrap();
1977        let neg = ds.neg_category_ids().unwrap();
1978        let nel = ds.not_exhaustive_category_ids().unwrap();
1979        let freq = ds.category_frequency().unwrap();
1980        // AA1: pos derived from GTs.
1981        assert_eq!(pos[&ImageId(1)], HashSet::from([CategoryId(1)]));
1982        assert_eq!(pos[&ImageId(2)], HashSet::from([CategoryId(2)]));
1983        // AA2: neg read verbatim.
1984        assert_eq!(neg[&ImageId(1)], HashSet::from([CategoryId(2)]));
1985        assert_eq!(neg[&ImageId(2)], HashSet::new());
1986        // AA3: not_exhaustive read verbatim.
1987        assert_eq!(nel[&ImageId(1)], HashSet::new());
1988        assert_eq!(nel[&ImageId(2)], HashSet::from([CategoryId(2)]));
1989        // AB1: frequency tags.
1990        assert_eq!(freq[&CategoryId(1)], Frequency::Frequent);
1991        assert_eq!(freq[&CategoryId(2)], Frequency::Rare);
1992    }
1993
1994    #[test]
1995    fn aa1_pos_derived_from_gts_does_not_include_zero_ann_categories() {
1996        // Cat 2 has a GT only on image 2; pos[image 1] must NOT
1997        // contain cat 2 (it's only in neg there).
1998        let ds = CocoDataset::from_lvis_json_bytes(LVIS_MIN_VALID.as_bytes()).unwrap();
1999        let pos = ds.pos_category_ids().unwrap();
2000        assert!(!pos[&ImageId(1)].contains(&CategoryId(2)));
2001        assert!(!pos[&ImageId(2)].contains(&CategoryId(1)));
2002    }
2003
2004    #[test]
2005    fn from_json_bytes_leaves_federated_metadata_none() {
2006        // The COCO loader on the same JSON shape ignores the LVIS
2007        // extras and leaves federated metadata empty (the orchestrator
2008        // then runs COCO semantics on the cells).
2009        let ds = CocoDataset::from_json_bytes(LVIS_MIN_VALID.as_bytes()).unwrap();
2010        assert!(!ds.is_federated());
2011        assert!(ds.pos_category_ids().is_none());
2012        assert!(ds.neg_category_ids().is_none());
2013        assert!(ds.not_exhaustive_category_ids().is_none());
2014        assert!(ds.category_frequency().is_none());
2015    }
2016
2017    #[test]
2018    fn aa7_pos_intersect_neg_rejected() {
2019        // Cat 1 has a GT on image 1 → it's in pos[1]; the JSON also
2020        // lists cat 1 in image 1's neg → conflict.
2021        const BAD: &str = r#"{
2022            "images": [
2023                {"id": 1, "width": 10, "height": 10,
2024                 "neg_category_ids": [1], "not_exhaustive_category_ids": []}
2025            ],
2026            "annotations": [
2027                {"id": 1, "image_id": 1, "category_id": 1,
2028                 "bbox": [0, 0, 5, 5], "area": 25, "iscrowd": 0}
2029            ],
2030            "categories": [{"id": 1, "name": "a", "frequency": "f"}]
2031        }"#;
2032        let err = CocoDataset::from_lvis_json_bytes(BAD.as_bytes()).unwrap_err();
2033        match err {
2034            EvalError::LvisFederatedConflict {
2035                image_id,
2036                category_id,
2037                detail,
2038            } => {
2039                assert_eq!(image_id, 1);
2040                assert_eq!(category_id, 1);
2041                assert!(detail.contains("GT"));
2042            }
2043            other => panic!("expected LvisFederatedConflict, got {other:?}"),
2044        }
2045    }
2046
2047    #[test]
2048    fn aa7_not_exhaustive_outside_pos_rejected() {
2049        // Image 1 lists cat 2 in not_exhaustive but has no GT of cat 2
2050        // → not_exhaustive ⊄ pos.
2051        const BAD: &str = r#"{
2052            "images": [
2053                {"id": 1, "width": 10, "height": 10,
2054                 "neg_category_ids": [], "not_exhaustive_category_ids": [2]}
2055            ],
2056            "annotations": [
2057                {"id": 1, "image_id": 1, "category_id": 1,
2058                 "bbox": [0, 0, 5, 5], "area": 25, "iscrowd": 0}
2059            ],
2060            "categories": [
2061                {"id": 1, "name": "a", "frequency": "f"},
2062                {"id": 2, "name": "b", "frequency": "r"}
2063            ]
2064        }"#;
2065        let err = CocoDataset::from_lvis_json_bytes(BAD.as_bytes()).unwrap_err();
2066        match err {
2067            EvalError::LvisFederatedConflict {
2068                image_id,
2069                category_id,
2070                detail,
2071            } => {
2072                assert_eq!(image_id, 1);
2073                assert_eq!(category_id, 2);
2074                assert!(detail.contains("not_exhaustive"));
2075            }
2076            other => panic!("expected LvisFederatedConflict, got {other:?}"),
2077        }
2078    }
2079
2080    #[test]
2081    fn ab6_missing_frequency_collects_all_offenders() {
2082        // Two categories, neither has a frequency. The error must
2083        // surface both ids in sorted order, not just the first miss.
2084        const BAD: &str = r#"{
2085            "images": [
2086                {"id": 1, "width": 10, "height": 10,
2087                 "neg_category_ids": [], "not_exhaustive_category_ids": []}
2088            ],
2089            "annotations": [],
2090            "categories": [
2091                {"id": 7, "name": "g"},
2092                {"id": 3, "name": "c"}
2093            ]
2094        }"#;
2095        let err = CocoDataset::from_lvis_json_bytes(BAD.as_bytes()).unwrap_err();
2096        match err {
2097            EvalError::MissingFrequency { category_ids } => {
2098                assert_eq!(category_ids, vec![3, 7]);
2099            }
2100            other => panic!("expected MissingFrequency, got {other:?}"),
2101        }
2102    }
2103
2104    #[test]
2105    fn lvis_loader_treats_absent_neg_field_as_empty() {
2106        // LVIS schema requires neg/not_exhaustive on every image, but a
2107        // tolerant loader treats absence as empty (matches the LVIS v1
2108        // semantic where a missing field → no negatives).
2109        const TOLERANT: &str = r#"{
2110            "images": [{"id": 1, "width": 10, "height": 10}],
2111            "annotations": [],
2112            "categories": [{"id": 1, "name": "a", "frequency": "c"}]
2113        }"#;
2114        let ds = CocoDataset::from_lvis_json_bytes(TOLERANT.as_bytes()).unwrap();
2115        let neg = ds.neg_category_ids().unwrap();
2116        let nel = ds.not_exhaustive_category_ids().unwrap();
2117        assert!(neg[&ImageId(1)].is_empty());
2118        assert!(nel[&ImageId(1)].is_empty());
2119    }
2120
2121    #[test]
2122    fn frequency_round_trips_serde() {
2123        for f in [Frequency::Rare, Frequency::Common, Frequency::Frequent] {
2124            let s = serde_json::to_string(&f).unwrap();
2125            let back: Frequency = serde_json::from_str(&s).unwrap();
2126            assert_eq!(f, back);
2127        }
2128        // Confirm the serde rename targets the LVIS single-letter form.
2129        assert_eq!(serde_json::to_string(&Frequency::Rare).unwrap(), "\"r\"");
2130        assert_eq!(serde_json::to_string(&Frequency::Common).unwrap(), "\"c\"");
2131        assert_eq!(
2132            serde_json::to_string(&Frequency::Frequent).unwrap(),
2133            "\"f\""
2134        );
2135    }
2136
2137    // -- AC2/AC3/AC4/AC5: lvis_trim per-image top-K ---------------------------
2138
2139    #[test]
2140    fn ac2_q1_trims_500_single_category_to_300() {
2141        // ADR-0026 appendix Q1: 500 single-category detections on one
2142        // image must trim to exactly 300, dropping the lowest-score
2143        // 200. Score-descending order is preserved.
2144        let dts = CocoDetections::from_inputs(
2145            (0..500)
2146                .map(|i| {
2147                    let score = 1.0 - (i as f64) / 1000.0; // 1.0, 0.999, …, 0.501
2148                    dt_input(1, 1, score, (0.0, 0.0, 1.0, 1.0))
2149                })
2150                .collect(),
2151        )
2152        .unwrap();
2153        let trimmed = dts.lvis_trim(300);
2154        assert_eq!(trimmed.detections().len(), 300);
2155        // Scores must be descending and start at 1.0.
2156        let scores: Vec<f64> = trimmed.detections().iter().map(|d| d.score).collect();
2157        for w in scores.windows(2) {
2158            assert!(
2159                w[0] >= w[1],
2160                "lvis_trim must preserve score-descending order"
2161            );
2162        }
2163        assert!((scores[0] - 1.0).abs() < 1e-12);
2164        // The lowest score in the trimmed set is the 300th input
2165        // (1.0 - 299/1000 = 0.701).
2166        assert!((scores[299] - 0.701).abs() < 1e-12);
2167    }
2168
2169    #[test]
2170    fn ac3_q2_cross_class_crowding_keeps_300_total_across_classes() {
2171        // ADR-0026 appendix Q2: 250 cat-A + 350 cat-B detections on
2172        // one image trim to **300 total** (top-300 across both
2173        // classes by score combined), not 250 + min(350, 300) = 550.
2174        // Score layouts are interleaved so the trim has to actually
2175        // sort across classes — a per-class trim would leave cat-A
2176        // intact and only trim cat-B.
2177        let mut inputs = Vec::with_capacity(600);
2178        for i in 0..250 {
2179            // cat 1 scores: 0.5, 0.498, …, 0.002 (250 values)
2180            let score = 0.5 - (i as f64) * 0.002;
2181            inputs.push(dt_input(1, 1, score, (0.0, 0.0, 1.0, 1.0)));
2182        }
2183        for i in 0..350 {
2184            // cat 2 scores: 1.0, 0.998, …, 0.302 (350 values).
2185            // The top 300 across both classes are all cat-2 (every
2186            // cat-2 score >= 0.302 > every cat-1 score 0.5 only at
2187            // its top, so cross-class trim keeps cat-2 dominant).
2188            // Actually score 0.302 < 0.5 so cat-1 top entries
2189            // survive — see assertion below.
2190            let score = 1.0 - (i as f64) * 0.002;
2191            inputs.push(dt_input(1, 2, score, (0.0, 0.0, 1.0, 1.0)));
2192        }
2193        let dts = CocoDetections::from_inputs(inputs).unwrap();
2194        let trimmed = dts.lvis_trim(300);
2195        // AC3: top-300 total — not per-class.
2196        assert_eq!(trimmed.detections().len(), 300);
2197        // Counts per category in the trim — cat-2 has higher overall
2198        // scores so most of the trim is cat-2; cat-1's top entries
2199        // (score 0.5 ≥ 0.302) also make the cut.
2200        let n_cat1 = trimmed
2201            .detections()
2202            .iter()
2203            .filter(|d| d.category_id == CategoryId(1))
2204            .count();
2205        let n_cat2 = trimmed
2206            .detections()
2207            .iter()
2208            .filter(|d| d.category_id == CategoryId(2))
2209            .count();
2210        // cat-2 scores >= 0.5 are i in 0..=250; cat-1 scores >= 0.302
2211        // are i in 0..=99. The exact mix is determined by the sort
2212        // of all 600 scores; what we assert is the cross-class total.
2213        assert_eq!(n_cat1 + n_cat2, 300);
2214        // Sanity: neither class is fully empty (otherwise the trim
2215        // would have collapsed to per-class).
2216        assert!(n_cat1 > 0, "cat 1 must keep at least its top-score entries");
2217        assert!(n_cat2 > 0, "cat 2 must keep its high-score entries");
2218    }
2219
2220    #[test]
2221    fn ac5_negative_max_dets_disables_trim() {
2222        // `max_dets < 0` is the upstream `if max_dets >= 0` guard
2223        // disabled. `lvis_trim(-1)` must return every input
2224        // detection unchanged.
2225        let dts = CocoDetections::from_inputs(
2226            (0..50)
2227                .map(|i| dt_input(1, 1, i as f64 / 100.0, (0.0, 0.0, 1.0, 1.0)))
2228                .collect(),
2229        )
2230        .unwrap();
2231        let trimmed = dts.lvis_trim(-1);
2232        assert_eq!(trimmed.detections().len(), 50);
2233        // No reordering — the AC5 path doesn't even sort.
2234        for (i, dt) in trimmed.detections().iter().enumerate() {
2235            assert!((dt.score - (i as f64 / 100.0)).abs() < 1e-12);
2236        }
2237    }
2238
2239    #[test]
2240    fn ac5_max_dets_at_capacity_is_no_op() {
2241        // `max_dets >= n_dts` keeps every detection — but resorts
2242        // them score-descending. (We don't assert order preservation
2243        // because the trim is allowed to reorder; the contract is
2244        // count + membership.)
2245        let dts = CocoDetections::from_inputs(
2246            (0..10)
2247                .map(|i| dt_input(1, 1, i as f64 / 10.0, (0.0, 0.0, 1.0, 1.0)))
2248                .collect(),
2249        )
2250        .unwrap();
2251        let trimmed = dts.lvis_trim(100);
2252        assert_eq!(trimmed.detections().len(), 10);
2253    }
2254
2255    #[test]
2256    fn ac4_stable_sort_preserves_input_order_for_score_ties() {
2257        // Two detections with the exact same score — the trim must
2258        // keep them in input order. Python's `sorted(_,
2259        // reverse=True)` uses Timsort (stable); Rust's `slice::sort_by`
2260        // is also stable. This test pins the cross-language
2261        // invariant.
2262        let mut a = dt_input(1, 1, 0.5, (0.0, 0.0, 1.0, 1.0));
2263        a.id = Some(AnnId(100));
2264        let mut b = dt_input(1, 1, 0.5, (1.0, 0.0, 1.0, 1.0));
2265        b.id = Some(AnnId(200));
2266        let dts = CocoDetections::from_inputs(vec![a, b]).unwrap();
2267        let trimmed = dts.lvis_trim(2);
2268        let ids: Vec<AnnId> = trimmed.detections().iter().map(|d| d.id).collect();
2269        assert_eq!(
2270            ids,
2271            vec![AnnId(100), AnnId(200)],
2272            "AC4: stable sort must preserve input order on score ties"
2273        );
2274    }
2275
2276    #[test]
2277    fn lvis_trim_groups_by_image_id() {
2278        // 3 images, each with 5 detections; trim to 2 per image.
2279        // Verify the group boundaries are honored: image 1 gets its
2280        // top-2 cat-1 entries, image 2 gets its top-2 cat-2 entries,
2281        // etc.
2282        let mut inputs = Vec::with_capacity(15);
2283        for img in 1..=3i64 {
2284            for i in 0..5 {
2285                let score = 1.0 - (img as f64) * 0.01 - (i as f64) * 0.001;
2286                inputs.push(dt_input(img, img, score, (0.0, 0.0, 1.0, 1.0)));
2287            }
2288        }
2289        let dts = CocoDetections::from_inputs(inputs).unwrap();
2290        let trimmed = dts.lvis_trim(2);
2291        assert_eq!(trimmed.detections().len(), 6);
2292        // 2 per image:
2293        for img in 1..=3i64 {
2294            let n = trimmed
2295                .detections()
2296                .iter()
2297                .filter(|d| d.image_id == ImageId(img))
2298                .count();
2299            assert_eq!(n, 2, "image {img} must trim to 2");
2300        }
2301    }
2302
2303    #[test]
2304    fn lvis_trim_zero_max_dets_keeps_nothing() {
2305        let dts = CocoDetections::from_inputs(vec![
2306            dt_input(1, 1, 0.9, (0.0, 0.0, 1.0, 1.0)),
2307            dt_input(1, 1, 0.5, (0.0, 0.0, 1.0, 1.0)),
2308        ])
2309        .unwrap();
2310        let trimmed = dts.lvis_trim(0);
2311        assert!(trimmed.detections().is_empty());
2312    }
2313
2314    #[test]
2315    fn lvis_loader_inherits_invalid_annotation_validation() {
2316        // Annotation references unknown image — the spine validation
2317        // (J5 / AG1) must fire before AA7.
2318        const BAD: &str = r#"{
2319            "images": [
2320                {"id": 1, "width": 10, "height": 10,
2321                 "neg_category_ids": [], "not_exhaustive_category_ids": []}
2322            ],
2323            "annotations": [
2324                {"id": 1, "image_id": 99, "category_id": 1,
2325                 "bbox": [0, 0, 1, 1], "area": 1, "iscrowd": 0}
2326            ],
2327            "categories": [{"id": 1, "name": "a", "frequency": "f"}]
2328        }"#;
2329        let err = CocoDataset::from_lvis_json_bytes(BAD.as_bytes()).unwrap_err();
2330        assert!(matches!(err, EvalError::InvalidAnnotation { .. }));
2331    }
2332
2333    // -----------------------------------------------------------------
2334    // dataset_hash stability tests (ADR-0031)
2335    // -----------------------------------------------------------------
2336
2337    #[test]
2338    fn dataset_hash_is_stable_for_equal_inputs() {
2339        let a = load_crowd_region();
2340        let b = load_crowd_region();
2341        assert_eq!(a.dataset_hash(), b.dataset_hash());
2342    }
2343
2344    #[test]
2345    fn dataset_hash_caches_via_arc_clone() {
2346        // The cache is `Arc<OnceLock>` so a clone shares the slot. The
2347        // first call on either side populates it; the second call on
2348        // the clone should observe the cached value (i.e., equal).
2349        let a = load_crowd_region();
2350        let b = a.clone();
2351        let h1 = a.dataset_hash();
2352        let h2 = b.dataset_hash();
2353        assert_eq!(h1, h2);
2354    }
2355
2356    #[test]
2357    fn dataset_hash_invariant_to_image_order() {
2358        // Two datasets that differ only in image declaration order
2359        // must hash identically.
2360        let order_a = r#"{
2361            "images": [
2362                {"id": 1, "width": 10, "height": 10},
2363                {"id": 2, "width": 20, "height": 20}
2364            ],
2365            "annotations": [
2366                {"id": 1, "image_id": 1, "category_id": 1,
2367                 "bbox": [0, 0, 5, 5], "area": 25, "iscrowd": 0}
2368            ],
2369            "categories": [{"id": 1, "name": "x"}]
2370        }"#;
2371        let order_b = r#"{
2372            "images": [
2373                {"id": 2, "width": 20, "height": 20},
2374                {"id": 1, "width": 10, "height": 10}
2375            ],
2376            "annotations": [
2377                {"id": 1, "image_id": 1, "category_id": 1,
2378                 "bbox": [0, 0, 5, 5], "area": 25, "iscrowd": 0}
2379            ],
2380            "categories": [{"id": 1, "name": "x"}]
2381        }"#;
2382        let a = CocoDataset::from_json_bytes(order_a.as_bytes()).unwrap();
2383        let b = CocoDataset::from_json_bytes(order_b.as_bytes()).unwrap();
2384        assert_eq!(a.dataset_hash(), b.dataset_hash());
2385    }
2386
2387    #[test]
2388    fn dataset_hash_invariant_to_annotation_order() {
2389        let order_a = r#"{
2390            "images": [{"id": 1, "width": 200, "height": 200}],
2391            "annotations": [
2392                {"id": 1, "image_id": 1, "category_id": 1,
2393                 "bbox": [0, 0, 5, 5], "area": 25, "iscrowd": 0},
2394                {"id": 2, "image_id": 1, "category_id": 1,
2395                 "bbox": [10, 10, 5, 5], "area": 25, "iscrowd": 0}
2396            ],
2397            "categories": [{"id": 1, "name": "x"}]
2398        }"#;
2399        let order_b = r#"{
2400            "images": [{"id": 1, "width": 200, "height": 200}],
2401            "annotations": [
2402                {"id": 2, "image_id": 1, "category_id": 1,
2403                 "bbox": [10, 10, 5, 5], "area": 25, "iscrowd": 0},
2404                {"id": 1, "image_id": 1, "category_id": 1,
2405                 "bbox": [0, 0, 5, 5], "area": 25, "iscrowd": 0}
2406            ],
2407            "categories": [{"id": 1, "name": "x"}]
2408        }"#;
2409        let a = CocoDataset::from_json_bytes(order_a.as_bytes()).unwrap();
2410        let b = CocoDataset::from_json_bytes(order_b.as_bytes()).unwrap();
2411        assert_eq!(a.dataset_hash(), b.dataset_hash());
2412    }
2413
2414    #[test]
2415    fn dataset_hash_changes_when_bbox_changes_by_one_pixel() {
2416        let base = r#"{
2417            "images": [{"id": 1, "width": 200, "height": 200}],
2418            "annotations": [
2419                {"id": 1, "image_id": 1, "category_id": 1,
2420                 "bbox": [10, 10, 5, 5], "area": 25, "iscrowd": 0}
2421            ],
2422            "categories": [{"id": 1, "name": "x"}]
2423        }"#;
2424        let shifted = r#"{
2425            "images": [{"id": 1, "width": 200, "height": 200}],
2426            "annotations": [
2427                {"id": 1, "image_id": 1, "category_id": 1,
2428                 "bbox": [11, 10, 5, 5], "area": 25, "iscrowd": 0}
2429            ],
2430            "categories": [{"id": 1, "name": "x"}]
2431        }"#;
2432        let a = CocoDataset::from_json_bytes(base.as_bytes()).unwrap();
2433        let b = CocoDataset::from_json_bytes(shifted.as_bytes()).unwrap();
2434        assert_ne!(a.dataset_hash(), b.dataset_hash());
2435    }
2436
2437    proptest! {
2438        #[test]
2439        fn dataset_hash_invariant_under_id_shuffle(
2440            mut images in proptest::collection::vec(arb_image(), 1..16),
2441            categories in proptest::collection::vec(arb_category(), 1..4),
2442        ) {
2443            // Dedup images / categories by id — `from_parts` doesn't
2444            // reject duplicates, but the canonical-form hash is only
2445            // well-defined over a unique set.
2446            images.sort_by_key(|im| im.id.0);
2447            images.dedup_by_key(|im| im.id.0);
2448            let mut unique_categories = categories;
2449            unique_categories.sort_by_key(|c| c.id.0);
2450            unique_categories.dedup_by_key(|c| c.id.0);
2451            prop_assume!(!images.is_empty());
2452            prop_assume!(!unique_categories.is_empty());
2453
2454            // One annotation per image, all on the first category — the
2455            // shape doesn't matter, only that two datasets that differ
2456            // solely in declaration order should hash identically.
2457            let cat_id = unique_categories[0].id;
2458            let annotations: Vec<CocoAnnotation> = images
2459                .iter()
2460                .enumerate()
2461                .map(|(i, im)| make_min_annotation(AnnId((i as i64) + 1), im.id, cat_id))
2462                .collect();
2463            let mut shuffled = images.clone();
2464            shuffled.reverse();
2465
2466            let a = CocoDataset::from_parts(
2467                images,
2468                annotations.clone(),
2469                unique_categories.clone(),
2470            ).unwrap();
2471            let b = CocoDataset::from_parts(
2472                shuffled,
2473                annotations,
2474                unique_categories,
2475            ).unwrap();
2476            prop_assert_eq!(a.dataset_hash(), b.dataset_hash());
2477        }
2478    }
2479
2480    // -----------------------------------------------------------------
2481    // params_hash stability tests (ADR-0031)
2482    // -----------------------------------------------------------------
2483
2484    #[test]
2485    fn params_hash_is_stable_for_equal_inputs() {
2486        use crate::evaluate::OwnedEvaluateParams;
2487        let a = OwnedEvaluateParams {
2488            iou_thresholds: vec![0.5, 0.55, 0.6],
2489            area_ranges: vec![],
2490            max_dets_per_image: 100,
2491            use_cats: true,
2492            retain_iou: false,
2493        };
2494        let b = a.clone();
2495        assert_eq!(a.params_hash().unwrap(), b.params_hash().unwrap());
2496    }
2497
2498    #[test]
2499    fn params_hash_changes_when_thresholds_change() {
2500        use crate::evaluate::OwnedEvaluateParams;
2501        let a = OwnedEvaluateParams {
2502            iou_thresholds: vec![0.5, 0.55, 0.6],
2503            area_ranges: vec![],
2504            max_dets_per_image: 100,
2505            use_cats: true,
2506            retain_iou: false,
2507        };
2508        let mut b = a.clone();
2509        b.iou_thresholds.push(0.65);
2510        assert_ne!(a.params_hash().unwrap(), b.params_hash().unwrap());
2511    }
2512
2513    #[test]
2514    fn params_hash_changes_when_use_cats_toggles() {
2515        use crate::evaluate::OwnedEvaluateParams;
2516        let a = OwnedEvaluateParams {
2517            iou_thresholds: vec![0.5],
2518            area_ranges: vec![],
2519            max_dets_per_image: 100,
2520            use_cats: true,
2521            retain_iou: false,
2522        };
2523        let mut b = a.clone();
2524        b.use_cats = false;
2525        assert_ne!(a.params_hash().unwrap(), b.params_hash().unwrap());
2526    }
2527}