Skip to main content

vernier_core/
segmentation.rs

1//! COCO `segmentation` field deserialization and normalization.
2//!
3//! pycocotools accepts three on-disk shapes for the COCO `segmentation`
4//! field on both the GT and DT side:
5//!
6//! 1. `[[x0, y0, x1, y1, …], …]` — list of polygons. One inner list
7//!    per polygon. Multi-polygon GT (e.g., a person split across an
8//!    occluder) is merged into a single mask via union (quirk **K2**,
9//!    `strict`).
10//! 2. `{"size": [h, w], "counts": [u32, …]}` — uncompressed RLE,
11//!    counts as a JSON array of integers.
12//! 3. `{"size": [h, w], "counts": "…"}` — compressed RLE, counts as
13//!    the COCO 6-bit char string. Quirk **K3** (`aligned`): we accept
14//!    `str` only because JSON has no bytes type; the decoder treats
15//!    every wire byte as ASCII regardless.
16//!
17//! The matching engine consumes a normalized [`vernier_mask::Rle`].
18//! [`Segmentation::to_rle`] performs that normalization eagerly when
19//! requested; the dataset stores the field verbatim (lazy). This
20//! matches pycocotools' `annToRLE` — RLE materialization happens at
21//! eval time, not at load time.
22//!
23//! ## Quirk dispositions
24//!
25//! - **K1** (`corrected`): degenerate polygon inputs (<3 vertices,
26//!   odd coordinate counts, non-finite values) are rejected by
27//!   [`vernier_mask::Rle::from_polygon`]; the error surfaces here as
28//!   [`EvalError::Mask`]. pycocotools accepts these and silently
29//!   produces malformed RLE.
30//! - **K2** (`strict`): polygon-side normalization unions every
31//!   sub-polygon into a single RLE, matching `mask.merge`.
32//! - **K3** (`aligned`): bytes-vs-str distinction collapses on the
33//!   JSON wire — counts are always a `String` here.
34//! - **H2** (`corrected`): an RLE whose declared `size` disagrees
35//!   with the requested `(h, w)` raises
36//!   [`EvalError::DimensionMismatch`] instead of silently emitting an
37//!   empty `0x0` RLE the way pycocotools does.
38
39use std::sync::Arc;
40
41use serde::{Deserialize, Serialize};
42use vernier_mask::Rle;
43
44use crate::error::EvalError;
45
46/// One COCO `segmentation` field, in any of the three shapes
47/// pycocotools accepts. The dataset stores this verbatim;
48/// [`Self::to_rle`] normalizes to a single [`Rle`] at eval time.
49#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
50#[serde(untagged)]
51pub enum Segmentation {
52    /// Multi-polygon shape (`[[x0, y0, x1, y1, …], …]`). Each inner
53    /// vector is a flat `(x, y)` pair sequence. Merged into a single
54    /// RLE via union per **K2**.
55    Polygons(Vec<Vec<f64>>),
56    /// COCO RLE shape (`{"size": [h, w], "counts": …}`). Counts
57    /// payload may be either the compressed 6-bit char string
58    /// (typical) or an uncompressed JSON array of integers.
59    Rle(SegmentationRle),
60}
61
62/// COCO RLE wrapper carrying the declared `(h, w)` shape alongside
63/// either the compressed string or the uncompressed counts array.
64///
65/// `size` is the COCO `[h, w]` order — this is **not** `(w, h)`.
66#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
67pub struct SegmentationRle {
68    /// `[h, w]` per the COCO spec.
69    pub size: [u32; 2],
70    /// Run lengths in either compressed or uncompressed form.
71    pub counts: SegmentationRleCounts,
72}
73
74/// Counts payload of [`SegmentationRle`]. JSON shapes the variants
75/// disambiguate untagged: a string parses as [`Self::Compressed`], a
76/// number array as [`Self::Uncompressed`].
77#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
78#[serde(untagged)]
79pub enum SegmentationRleCounts {
80    /// Compressed 6-bit char string per ADR-0002 / quirk **G1–G3**.
81    Compressed(String),
82    /// Raw run-length array. Stored as [`Arc<[u32]>`] so the
83    /// dataset-cached value can be cloned cheaply into the per-eval
84    /// [`Rle`] without an O(N) memcpy.
85    Uncompressed(#[serde(with = "arc_u32_serde")] Arc<[u32]>),
86}
87
88mod arc_u32_serde {
89    use std::sync::Arc;
90
91    use serde::{Deserialize, Deserializer, Serialize, Serializer};
92
93    pub(super) fn serialize<S: Serializer>(value: &Arc<[u32]>, ser: S) -> Result<S::Ok, S::Error> {
94        value.as_ref().serialize(ser)
95    }
96
97    pub(super) fn deserialize<'de, D: Deserializer<'de>>(de: D) -> Result<Arc<[u32]>, D::Error> {
98        Vec::<u32>::deserialize(de).map(Arc::from)
99    }
100}
101
102impl Segmentation {
103    /// Normalizes this segmentation into a single [`Rle`] of shape
104    /// `(h, w)`.
105    ///
106    /// - Polygons are rasterized via `Rle::from_polygon` and unioned
107    ///   into one RLE (**K2**).
108    /// - An RLE variant is checked against the requested `(h, w)`;
109    ///   mismatch raises [`EvalError::DimensionMismatch`] (**H2**
110    ///   `corrected`).
111    pub fn to_rle(&self, h: u32, w: u32) -> Result<Rle, EvalError> {
112        match self {
113            Self::Polygons(polys) => Ok(Rle::from_polygons(polys, h, w)?),
114            Self::Rle(rle) => {
115                let [rh, rw] = rle.size;
116                if rh != h || rw != w {
117                    return Err(EvalError::DimensionMismatch {
118                        detail: format!(
119                            "segmentation declares size [{rh}, {rw}] but image is [{h}, {w}]"
120                        ),
121                    });
122                }
123                match &rle.counts {
124                    SegmentationRleCounts::Compressed(s) => {
125                        Ok(Rle::from_string_bytes(s.as_bytes(), h, w)?)
126                    }
127                    SegmentationRleCounts::Uncompressed(counts) => Ok(Rle {
128                        h,
129                        w,
130                        counts: Arc::clone(counts),
131                    }),
132                }
133            }
134        }
135    }
136}
137
138#[cfg(test)]
139mod tests {
140    use super::*;
141
142    fn parse(json: &str) -> Segmentation {
143        serde_json::from_str(json).unwrap()
144    }
145
146    #[test]
147    fn parses_polygon_shape() {
148        let s = parse("[[0.0, 0.0, 2.0, 0.0, 2.0, 2.0, 0.0, 2.0]]");
149        match s {
150            Segmentation::Polygons(p) => {
151                assert_eq!(p.len(), 1);
152                assert_eq!(p[0].len(), 8);
153            }
154            other => panic!("expected Polygons, got {other:?}"),
155        }
156    }
157
158    #[test]
159    fn parses_compressed_rle_shape() {
160        let s = parse(r#"{"size": [10, 10], "counts": "PPYo`0"}"#);
161        match s {
162            Segmentation::Rle(rle) => {
163                assert_eq!(rle.size, [10, 10]);
164                assert!(matches!(rle.counts, SegmentationRleCounts::Compressed(_)));
165            }
166            other => panic!("expected Rle, got {other:?}"),
167        }
168    }
169
170    #[test]
171    fn parses_uncompressed_rle_shape() {
172        let s = parse(r#"{"size": [2, 2], "counts": [0, 4]}"#);
173        match s {
174            Segmentation::Rle(rle) => {
175                assert_eq!(rle.size, [2, 2]);
176                match rle.counts {
177                    SegmentationRleCounts::Uncompressed(c) => {
178                        assert_eq!(&c[..], &[0u32, 4][..]);
179                    }
180                    other => panic!("expected Uncompressed, got {other:?}"),
181                }
182            }
183            other => panic!("expected Rle, got {other:?}"),
184        }
185    }
186
187    #[test]
188    fn polygon_to_rle_rasterizes_and_unions_k2() {
189        // Two unit squares side by side as separate polygons. K2: the
190        // result is the unioned mask, not two distinct objects.
191        let json = r#"[
192            [0.0, 0.0, 2.0, 0.0, 2.0, 2.0, 0.0, 2.0],
193            [3.0, 0.0, 5.0, 0.0, 5.0, 2.0, 3.0, 2.0]
194        ]"#;
195        let s: Segmentation = serde_json::from_str(json).unwrap();
196        let rle = s.to_rle(8, 8).unwrap();
197        // Two 2×2 foreground regions → 4 + 4 = 8 foreground pixels.
198        assert_eq!(rle.area(), 8);
199    }
200
201    #[test]
202    fn compressed_rle_to_rle_round_trips() {
203        // Produce a compressed counts string from a known RLE, then
204        // round-trip it through Segmentation::to_rle.
205        let original = Rle {
206            h: 4,
207            w: 4,
208            counts: vec![0u32, 4, 4, 4, 4].into(),
209        };
210        let counts = String::from_utf8(original.to_string_bytes()).unwrap();
211        let json = format!(r#"{{"size": [4, 4], "counts": "{counts}"}}"#);
212        let s: Segmentation = serde_json::from_str(&json).unwrap();
213        let rle = s.to_rle(4, 4).unwrap();
214        assert_eq!(rle, original);
215    }
216
217    #[test]
218    fn uncompressed_rle_to_rle_uses_counts_verbatim() {
219        let s = parse(r#"{"size": [2, 2], "counts": [0, 4]}"#);
220        let rle = s.to_rle(2, 2).unwrap();
221        assert_eq!(rle.h, 2);
222        assert_eq!(rle.w, 2);
223        assert_eq!(&rle.counts[..], &[0u32, 4][..]);
224        assert_eq!(rle.area(), 4);
225    }
226
227    #[test]
228    fn rle_size_mismatch_errors_h2_corrected() {
229        let s = parse(r#"{"size": [10, 10], "counts": [0, 100]}"#);
230        let err = s.to_rle(20, 20).unwrap_err();
231        match err {
232            EvalError::DimensionMismatch { detail } => {
233                assert!(detail.contains("[10, 10]"));
234                assert!(detail.contains("[20, 20]"));
235            }
236            other => panic!("expected DimensionMismatch, got {other:?}"),
237        }
238    }
239
240    #[test]
241    fn empty_polygon_list_yields_all_background_at_requested_shape() {
242        // `[]` is degenerate but legal COCO; pycocotools silently
243        // emits a 0×0 RLE here. K2/H2 disposition: produce a
244        // well-formed all-bg RLE at the caller's (h, w).
245        let s = parse("[]");
246        let rle = s.to_rle(4, 4).unwrap();
247        assert_eq!(rle.h, 4);
248        assert_eq!(rle.w, 4);
249        assert_eq!(rle.area(), 0);
250    }
251
252    #[test]
253    fn polygon_with_too_few_vertices_propagates_k1_error() {
254        // Two-point polygon: 4 floats. K1 corrected: reject.
255        let s = parse("[[0.0, 0.0, 1.0, 1.0]]");
256        let err = s.to_rle(8, 8).unwrap_err();
257        assert!(matches!(err, EvalError::Mask(_)));
258    }
259}