vernier-core 0.0.2

Pure-Rust core for the vernier evaluation library
Documentation
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
//! Confusion matrix sibling capability of TIDE error decomposition.
//!
//! Per ADR-0023, the same cross-class IoU side pass that powers the
//! TIDE Cls / Both bins also funds the confusion-matrix output: per-DT,
//! the best-overlapping GT across all classes is exactly the input the
//! confusion-matrix needs to count `(true_class, predicted_class)`
//! pairs. One pass through [`crate::tide::compute_cross_class_ious`]
//! serves both consumers.
//!
//! Output shape — counts keyed by `(Option<usize>, Option<usize>)` over
//! the **category-index** space (id-ascending; the same coordinate
//! system the cross-class side pass uses, *not* the raw COCO category
//! ids). The `None` sentinel maps to the FFI's `"__none__"` row /
//! column at the Python boundary:
//!
//! - `(Some(gt), Some(dt))` with `gt == dt` — true positives.
//! - `(Some(gt), Some(dt))` with `gt != dt` — classification confusion.
//! - `(None, Some(dt))` — the DT had no GT at IoU ≥ `iou_threshold`
//!   on the same image; counts as a false positive.
//! - `(Some(gt), None)` — the GT was not covered by any DT at the
//!   threshold; counts as a missed GT (false negative).
//!
//! The matching engine (ADR-0005) is unchanged; this module computes
//! its own argmax over the side-pass matrix rather than reusing the
//! same-class matching path. The shape is genuinely different — every
//! DT is compared against every GT on the image regardless of class —
//! and the matching engine's per-cell same-class invariant would have
//! to be peeled apart to model it.
//!
//! ## GT-side ignore handling
//!
//! pycocotools treats `iscrowd=1` (and the optional `ignore` field)
//! GTs as silent: a DT matched to one is dropped from the FP/TP count;
//! an unmatched ignore-GT is **not** a missed GT. Per ADR-0023's
//! recommendation, the side-pass storage [`crate::tables::CrossClassIous`] only
//! carries category indices and an opaque `(D, G)` matrix — no
//! per-column ignore flag — so this module recomputes the per-image
//! GT-annotation indices via [`crate::dataset::EvalDataset::ann_indices_for_image`]
//! and reads the original [`crate::dataset::CocoAnnotation::is_crowd`]
//! / [`crate::dataset::CocoAnnotation::ignore_flag`] to decide. Cheap
//! recompute over a small per-image list; keeps the storage type
//! single-purpose.

use std::collections::{HashMap, HashSet};

use crate::dataset::{CocoDataset, CocoDetections, EvalDataset, ImageMeta};
use crate::error::EvalError;
use crate::evaluate::EvalKernel;
use crate::parity::ParityMode;
use crate::tide::cross_class::compute_cross_class_ious;

/// Aggregated confusion-matrix counts across an entire dataset.
///
/// Indices in [`Self::counts`]'s keys reference [`Self::category_ids`]
/// — both the row (GT) and column (DT) indices live in the
/// id-ascending category-index space the cross-class side pass uses.
/// `None` is the sentinel for "no class" — false-positive row when in
/// the GT slot, missed-GT column when in the DT slot. The FFI surfaces
/// the sentinel as the literal string `"__none__"`.
#[derive(Debug, Clone, Default)]
pub struct ConfusionMatrixCounts {
    /// `(gt_category_idx_or_none, dt_category_idx_or_none) -> count`.
    /// Pairs absent from the map have count zero (the FFI fills in
    /// the dense long-format only for pairs that fired).
    pub counts: HashMap<(Option<usize>, Option<usize>), u64>,
    /// Category ids in the same index space the matrix uses.
    /// `category_ids[i]` is the user-visible COCO category id for
    /// matrix index `i`.
    pub category_ids: Vec<i64>,
}

/// Threshold used to decide whether a DT covers a GT. The DT's
/// best-overlapping GT (across all classes) at IoU ≥ `iou_threshold`
/// is the matched pair; anything below the threshold counts as a
/// false positive and the GT (if not yet covered by a higher-scoring
/// DT) eventually counts as missed.
///
/// `max_dets_per_image` matches the matching path's per-image cap so
/// the rows of the side-pass matrix this function reads line up with
/// the post-cap DT slice the matching engine saw on the same dataset.
///
/// # Errors
///
/// Propagates [`EvalError`] from [`compute_cross_class_ious`] —
/// kernel construction failures and category-id-not-found errors flow
/// through unchanged.
pub fn compute_confusion_matrix<K: EvalKernel>(
    gt: &CocoDataset,
    dt: &CocoDetections,
    kernel: &K,
    iou_threshold: f64,
    max_dets_per_image: usize,
    parity_mode: ParityMode,
) -> Result<ConfusionMatrixCounts, EvalError> {
    let cross = compute_cross_class_ious(gt, dt, kernel, parity_mode, max_dets_per_image)?;

    // Category-id list in id-ascending order — same axis the side
    // pass keys on. `category_ids[i]` is the COCO id at matrix index
    // `i`; consumers use this to map from `Some(idx)` back to a class
    // string at the FFI boundary.
    let mut category_ids: Vec<i64> = gt.categories().iter().map(|c| c.id.0).collect();
    category_ids.sort_unstable();

    let mut images: Vec<&ImageMeta> = gt.images().iter().collect();
    images.sort_unstable_by_key(|im| im.id.0);

    let gt_anns = gt.annotations();
    let mut counts: HashMap<(Option<usize>, Option<usize>), u64> = HashMap::new();

    for (image_idx, image) in images.iter().enumerate() {
        // Rebuild `gt_indices` for this image to read the ignore
        // flags. Storage-shape rationale lives in the module doc.
        let gt_indices = gt.ann_indices_for_image(image.id);

        // The side pass inserts iou + dt_classes + gt_classes
        // atomically per image; if the matrix is missing the image
        // is empty (no DTs and no GTs) and there is nothing to count.
        let (Some(iou), Some(dt_classes), Some(gt_classes)) = (
            cross.get(image_idx),
            cross.dt_classes(image_idx),
            cross.gt_classes(image_idx),
        ) else {
            continue;
        };

        let n_d = iou.shape()[0];
        let n_g = iou.shape()[1];

        // Per-image: which GT columns are already taken by some DT?
        // Walk DTs in row order, which is score-descending per the
        // side pass (and the matching path's A1 ordering). The first
        // DT to claim a GT keeps it; subsequent DTs ignore that
        // column when picking their argmax.
        let mut gt_taken: HashSet<usize> = HashSet::new();

        for d in 0..n_d {
            // argmax over G of iou[d, g] restricted to non-taken GTs.
            // No need to also restrict against ignore-GTs here: if a
            // DT's best overlap is an ignore-GT and the threshold
            // fires, pycocotools-style semantics say we **drop** the
            // DT (not count it as FP, not count the ignore-GT as
            // matched). We model that by skipping the count entirely.
            let mut best_g: Option<usize> = None;
            let mut best_iou = f64::NEG_INFINITY;
            for g in 0..n_g {
                if gt_taken.contains(&g) {
                    continue;
                }
                let v = iou[(d, g)];
                if v > best_iou {
                    best_iou = v;
                    best_g = Some(g);
                }
            }

            let dt_class_idx = dt_classes[d];

            if let Some(g) = best_g {
                if best_iou >= iou_threshold {
                    if is_ignore_gt(&gt_anns[gt_indices[g]]) {
                        // Match against an ignore-GT: drop the DT.
                        // Don't count as FP, don't mark the GT as
                        // covered (so a non-ignore DT later can't
                        // claim it — but ignore-GTs are excluded
                        // from the missed pass anyway, so this is
                        // moot for the missed-GT row).
                        continue;
                    }
                    gt_taken.insert(g);
                    *counts
                        .entry((Some(gt_classes[g]), Some(dt_class_idx)))
                        .or_insert(0) += 1;
                    continue;
                }
            }

            // Either no GT on this image at all, or best overlap
            // didn't clear the threshold → false positive in the
            // `__none__` row.
            *counts.entry((None, Some(dt_class_idx))).or_insert(0) += 1;
        }

        // After walking every DT, count missed (uncovered, non-ignore)
        // GTs in the `__none__` column.
        for (g, &gt_class_idx) in gt_classes.iter().enumerate() {
            if gt_taken.contains(&g) || is_ignore_gt(&gt_anns[gt_indices[g]]) {
                continue;
            }
            *counts.entry((Some(gt_class_idx), None)).or_insert(0) += 1;
        }
    }

    Ok(ConfusionMatrixCounts {
        counts,
        category_ids,
    })
}

/// Mirror the `effective_ignore` semantics from
/// `crate::tide::assignment` (D1): a GT is silent when `iscrowd=1` or
/// the optional `ignore` field is set. Inlined here rather than reaching
/// for [`crate::dataset::CocoAnnotation::effective_ignore`] because that
/// method takes a [`ParityMode`] and the confusion-matrix layer treats
/// strict and corrected identically — both fold iscrowd into ignore.
fn is_ignore_gt(ann: &crate::dataset::CocoAnnotation) -> bool {
    ann.is_crowd || ann.ignore_flag.unwrap_or(false)
}

#[cfg(test)]
fn category_index_for_id(counts: &ConfusionMatrixCounts, category_id: i64) -> Option<usize> {
    counts.category_ids.iter().position(|&id| id == category_id)
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::dataset::{
        AnnId, Bbox, CategoryId, CategoryMeta, CocoAnnotation, DetectionInput, ImageId, ImageMeta,
    };
    use crate::similarity::BboxIou;

    fn img(id: i64, w: u32, h: u32) -> ImageMeta {
        ImageMeta {
            id: ImageId(id),
            width: w,
            height: h,
            file_name: None,
        }
    }

    fn cat(id: i64, name: &str) -> CategoryMeta {
        CategoryMeta {
            id: CategoryId(id),
            name: name.into(),
            supercategory: None,
        }
    }

    fn ann(
        id: i64,
        image: i64,
        cat: i64,
        bbox: (f64, f64, f64, f64),
        iscrowd: bool,
    ) -> CocoAnnotation {
        CocoAnnotation {
            id: AnnId(id),
            image_id: ImageId(image),
            category_id: CategoryId(cat),
            area: bbox.2 * bbox.3,
            is_crowd: iscrowd,
            ignore_flag: None,
            bbox: Bbox {
                x: bbox.0,
                y: bbox.1,
                w: bbox.2,
                h: bbox.3,
            },
            segmentation: None,
            keypoints: None,
            num_keypoints: None,
        }
    }

    fn dt_input(image: i64, cat: i64, score: f64, bbox: (f64, f64, f64, f64)) -> DetectionInput {
        DetectionInput {
            id: None,
            image_id: ImageId(image),
            category_id: CategoryId(cat),
            score,
            bbox: Bbox {
                x: bbox.0,
                y: bbox.1,
                w: bbox.2,
                h: bbox.3,
            },
            segmentation: None,
            keypoints: None,
            num_keypoints: None,
        }
    }

    /// Two GTs of distinct classes, two DTs perfectly aligned with
    /// their same-class GTs. Confusion matrix is diagonal-only.
    #[test]
    fn diagonal_only_when_every_dt_matches_same_class_gt() {
        let images = vec![img(1, 200, 200)];
        let cats = vec![cat(1, "a"), cat(2, "b")];
        let anns = vec![
            ann(1, 1, 1, (10.0, 10.0, 40.0, 40.0), false),
            ann(2, 1, 2, (100.0, 100.0, 40.0, 40.0), false),
        ];
        let gt = CocoDataset::from_parts(images, anns, cats).expect("dataset builds");
        let dts = CocoDetections::from_inputs(vec![
            dt_input(1, 1, 0.9, (10.0, 10.0, 40.0, 40.0)),
            dt_input(1, 2, 0.8, (100.0, 100.0, 40.0, 40.0)),
        ])
        .expect("detections build");

        let cm = compute_confusion_matrix(&gt, &dts, &BboxIou, 0.5, 100, ParityMode::Strict)
            .expect("confusion matrix runs");

        let idx_a = category_index_for_id(&cm, 1).expect("class 1 in matrix");
        let idx_b = category_index_for_id(&cm, 2).expect("class 2 in matrix");
        assert_eq!(cm.counts.get(&(Some(idx_a), Some(idx_a))), Some(&1));
        assert_eq!(cm.counts.get(&(Some(idx_b), Some(idx_b))), Some(&1));
        // No off-diagonal, no FP/Missed.
        assert_eq!(cm.counts.len(), 2);
    }

    /// Two GTs and two DTs, every DT wears the wrong class at the
    /// right location. Counts off-diagonal cells only.
    #[test]
    fn off_diagonal_when_every_dt_is_wrong_class() {
        let images = vec![img(1, 200, 200)];
        let cats = vec![cat(1, "a"), cat(2, "b")];
        let anns = vec![
            ann(1, 1, 1, (10.0, 10.0, 40.0, 40.0), false),
            ann(2, 1, 2, (100.0, 100.0, 40.0, 40.0), false),
        ];
        let gt = CocoDataset::from_parts(images, anns, cats).expect("dataset builds");
        let dts = CocoDetections::from_inputs(vec![
            dt_input(1, 2, 0.9, (10.0, 10.0, 40.0, 40.0)),
            dt_input(1, 1, 0.9, (100.0, 100.0, 40.0, 40.0)),
        ])
        .expect("detections build");

        let cm = compute_confusion_matrix(&gt, &dts, &BboxIou, 0.5, 100, ParityMode::Strict)
            .expect("confusion matrix runs");

        let idx_a = category_index_for_id(&cm, 1).expect("class 1");
        let idx_b = category_index_for_id(&cm, 2).expect("class 2");
        // GT class A → DT class B (DT 1 location matches GT 1 cat 1).
        assert_eq!(cm.counts.get(&(Some(idx_a), Some(idx_b))), Some(&1));
        assert_eq!(cm.counts.get(&(Some(idx_b), Some(idx_a))), Some(&1));
        // No diagonal, no FP/Missed.
        assert_eq!(cm.counts.len(), 2);
    }

    /// All DTs are background (no overlap). Two DTs with overlap
    /// (score 0.5) cover their same-class GTs; two DTs are pure
    /// background (no overlap, score 0.9). Verifies the FP row and
    /// the lack of missed GTs.
    #[test]
    fn fp_row_for_background_dts_and_no_missed_for_covered_gts() {
        let images = vec![img(1, 1000, 1000)];
        let cats = vec![cat(1, "a"), cat(2, "b")];
        let anns = vec![
            ann(1, 1, 1, (10.0, 10.0, 40.0, 40.0), false),
            ann(2, 1, 2, (100.0, 100.0, 40.0, 40.0), false),
        ];
        let gt = CocoDataset::from_parts(images, anns, cats).expect("dataset builds");
        // Two background DTs (high score, no overlap) plus two
        // covering DTs.
        let dts = CocoDetections::from_inputs(vec![
            dt_input(1, 1, 0.9, (500.0, 500.0, 30.0, 30.0)),
            dt_input(1, 2, 0.9, (600.0, 500.0, 30.0, 30.0)),
            dt_input(1, 1, 0.5, (10.0, 10.0, 40.0, 40.0)),
            dt_input(1, 2, 0.5, (100.0, 100.0, 40.0, 40.0)),
        ])
        .expect("detections build");

        let cm = compute_confusion_matrix(&gt, &dts, &BboxIou, 0.5, 100, ParityMode::Strict)
            .expect("confusion matrix runs");

        let idx_a = category_index_for_id(&cm, 1).expect("class 1");
        let idx_b = category_index_for_id(&cm, 2).expect("class 2");
        // Two FPs (the high-score background DTs).
        assert_eq!(cm.counts.get(&(None, Some(idx_a))), Some(&1));
        assert_eq!(cm.counts.get(&(None, Some(idx_b))), Some(&1));
        // Two covering DTs land on the diagonal.
        assert_eq!(cm.counts.get(&(Some(idx_a), Some(idx_a))), Some(&1));
        assert_eq!(cm.counts.get(&(Some(idx_b), Some(idx_b))), Some(&1));
        // No missed GTs (both covered).
        assert!(!cm.counts.contains_key(&(Some(idx_a), None)));
        assert!(!cm.counts.contains_key(&(Some(idx_b), None)));
    }

    /// All DTs are background AND no DT covers any GT → the FP row
    /// fires for the DTs, the Missed column fires for the GTs.
    #[test]
    fn fp_and_missed_when_dts_and_gts_dont_overlap_at_all() {
        let images = vec![img(1, 1000, 1000)];
        let cats = vec![cat(1, "a"), cat(2, "b")];
        let anns = vec![
            ann(1, 1, 1, (10.0, 10.0, 40.0, 40.0), false),
            ann(2, 1, 2, (100.0, 100.0, 40.0, 40.0), false),
        ];
        let gt = CocoDataset::from_parts(images, anns, cats).expect("dataset builds");
        let dts = CocoDetections::from_inputs(vec![
            dt_input(1, 1, 0.9, (500.0, 500.0, 30.0, 30.0)),
            dt_input(1, 2, 0.9, (600.0, 500.0, 30.0, 30.0)),
        ])
        .expect("detections build");

        let cm = compute_confusion_matrix(&gt, &dts, &BboxIou, 0.5, 100, ParityMode::Strict)
            .expect("confusion matrix runs");

        let idx_a = category_index_for_id(&cm, 1).expect("class 1");
        let idx_b = category_index_for_id(&cm, 2).expect("class 2");
        // FP row.
        assert_eq!(cm.counts.get(&(None, Some(idx_a))), Some(&1));
        assert_eq!(cm.counts.get(&(None, Some(idx_b))), Some(&1));
        // Missed column.
        assert_eq!(cm.counts.get(&(Some(idx_a), None)), Some(&1));
        assert_eq!(cm.counts.get(&(Some(idx_b), None)), Some(&1));
    }

    /// One iscrowd GT (image 1) and one regular GT (image 2). A DT
    /// landing on the crowd is dropped (no FP, no TP); the regular
    /// GT is matched. No missed-GT count for the crowd.
    #[test]
    fn ignore_gt_neither_matched_nor_missed() {
        let images = vec![img(1, 1000, 1000), img(2, 1000, 1000)];
        let cats = vec![cat(1, "a")];
        let anns = vec![
            ann(1, 1, 1, (10.0, 10.0, 40.0, 40.0), true), // iscrowd
            ann(2, 2, 1, (10.0, 10.0, 40.0, 40.0), false),
        ];
        let gt = CocoDataset::from_parts(images, anns, cats).expect("dataset builds");
        let dts = CocoDetections::from_inputs(vec![
            // Lands on the crowd → dropped.
            dt_input(1, 1, 0.9, (10.0, 10.0, 40.0, 40.0)),
            // Pure FP on image 1.
            dt_input(1, 1, 0.5, (500.0, 500.0, 30.0, 30.0)),
            // Covers the regular GT on image 2.
            dt_input(2, 1, 0.9, (10.0, 10.0, 40.0, 40.0)),
        ])
        .expect("detections build");

        let cm = compute_confusion_matrix(&gt, &dts, &BboxIou, 0.5, 100, ParityMode::Strict)
            .expect("confusion matrix runs");

        let idx_a = category_index_for_id(&cm, 1).expect("class 1");
        // Diagonal — covered regular GT.
        assert_eq!(cm.counts.get(&(Some(idx_a), Some(idx_a))), Some(&1));
        // FP — the second DT on image 1.
        assert_eq!(cm.counts.get(&(None, Some(idx_a))), Some(&1));
        // No missed-GT for the crowd.
        assert!(!cm.counts.contains_key(&(Some(idx_a), None)));
        // The crowd-matching DT is silent — no entry was created for it.
        assert_eq!(cm.counts.values().sum::<u64>(), 2);
    }
}