edgefirst_client/coco/
types.rs

1// SPDX-License-Identifier: Apache-2.0
2// Copyright © 2025 Au-Zone Technologies. All Rights Reserved.
3
4//! COCO JSON data structures for serde serialization/deserialization.
5//!
6//! Supports object detection and instance segmentation annotation types.
7//! Keypoints, captions, and panoptic segmentation are NOT supported in this
8//! version.
9
10use serde::{Deserialize, Serialize};
11use std::collections::HashMap;
12
13/// Top-level COCO dataset structure.
14///
15/// This is the root structure for COCO annotation files like
16/// `instances_train2017.json`.
17#[derive(Debug, Clone, Default, Serialize, Deserialize)]
18pub struct CocoDataset {
19    /// Dataset metadata (optional but commonly present).
20    #[serde(default)]
21    pub info: CocoInfo,
22    /// License information for the images.
23    #[serde(default)]
24    pub licenses: Vec<CocoLicense>,
25    /// List of images in the dataset.
26    pub images: Vec<CocoImage>,
27    /// List of annotations (one per object instance).
28    #[serde(default)]
29    pub annotations: Vec<CocoAnnotation>,
30    /// List of object categories/classes.
31    #[serde(default)]
32    pub categories: Vec<CocoCategory>,
33}
34
35/// Dataset metadata.
36#[derive(Debug, Clone, Default, Serialize, Deserialize)]
37pub struct CocoInfo {
38    /// Year the dataset was created.
39    #[serde(default)]
40    pub year: Option<u32>,
41    /// Version string.
42    #[serde(default)]
43    pub version: Option<String>,
44    /// Dataset description.
45    #[serde(default)]
46    pub description: Option<String>,
47    /// Dataset contributor.
48    #[serde(default)]
49    pub contributor: Option<String>,
50    /// Dataset URL.
51    #[serde(default)]
52    pub url: Option<String>,
53    /// Date the dataset was created.
54    #[serde(default)]
55    pub date_created: Option<String>,
56}
57
58/// License information.
59#[derive(Debug, Clone, Serialize, Deserialize)]
60pub struct CocoLicense {
61    /// Unique license ID.
62    pub id: u32,
63    /// License name.
64    pub name: String,
65    /// License URL.
66    #[serde(default)]
67    pub url: Option<String>,
68}
69
70/// Image metadata.
71///
72/// Each image has a unique ID and associated metadata.
73#[derive(Debug, Clone, Serialize, Deserialize, Default)]
74pub struct CocoImage {
75    /// Unique image ID.
76    pub id: u64,
77    /// Image width in pixels.
78    pub width: u32,
79    /// Image height in pixels.
80    pub height: u32,
81    /// Filename (relative path within the images folder).
82    pub file_name: String,
83    /// License ID (references `CocoLicense.id`).
84    #[serde(default)]
85    pub license: Option<u32>,
86    /// Flickr URL (if from Flickr).
87    #[serde(default)]
88    pub flickr_url: Option<String>,
89    /// COCO download URL.
90    #[serde(default)]
91    pub coco_url: Option<String>,
92    /// Date the image was captured.
93    #[serde(default)]
94    pub date_captured: Option<String>,
95}
96
97/// Category definition.
98///
99/// Categories define the object classes used in the dataset.
100#[derive(Debug, Clone, Serialize, Deserialize, Default)]
101pub struct CocoCategory {
102    /// Unique category ID.
103    pub id: u32,
104    /// Category name (e.g., "person", "car").
105    pub name: String,
106    /// Parent category name (e.g., "human" for "person").
107    #[serde(default)]
108    pub supercategory: Option<String>,
109}
110
111/// Annotation for object detection and instance segmentation.
112///
113/// Each annotation represents a single object instance in an image.
114///
115/// Note: Keypoints, captions, and panoptic fields are NOT supported.
116#[derive(Debug, Clone, Default, Serialize, Deserialize)]
117pub struct CocoAnnotation {
118    /// Unique annotation ID.
119    pub id: u64,
120    /// ID of the image containing this object.
121    pub image_id: u64,
122    /// Category ID of this object.
123    pub category_id: u32,
124    /// Bounding box: `[x, y, width, height]` in pixels (top-left corner).
125    pub bbox: [f64; 4],
126    /// Area of the segmentation mask in pixels².
127    #[serde(default)]
128    pub area: f64,
129    /// Whether this is a crowd annotation (0 = single instance, 1 = crowd).
130    #[serde(default)]
131    pub iscrowd: u8,
132    /// Segmentation mask (polygon or RLE format).
133    #[serde(default, skip_serializing_if = "Option::is_none")]
134    pub segmentation: Option<CocoSegmentation>,
135}
136
137/// Segmentation format: polygon array or RLE.
138///
139/// COCO supports two segmentation formats:
140/// - **Polygon**: For single instances (`iscrowd=0`), uses nested coordinate
141///   arrays
142/// - **RLE**: For crowds (`iscrowd=1`), uses run-length encoding
143#[derive(Debug, Clone, Serialize, Deserialize)]
144#[serde(untagged)]
145pub enum CocoSegmentation {
146    /// Polygon format: `[[x1,y1,x2,y2,...], [x3,y3,...]]`
147    ///
148    /// Multiple polygons represent disjoint regions of the same object.
149    Polygon(Vec<Vec<f64>>),
150    /// Uncompressed RLE format with counts array.
151    Rle(CocoRle),
152    /// Compressed RLE format with LEB128-encoded counts string.
153    CompressedRle(CocoCompressedRle),
154}
155
156/// Uncompressed RLE (Run-Length Encoding) segmentation.
157///
158/// The counts array alternates between background and foreground pixel runs,
159/// starting with background. The encoding is **column-major** (Fortran order).
160#[derive(Debug, Clone, Serialize, Deserialize)]
161pub struct CocoRle {
162    /// Run-length counts: `[bg_run, fg_run, bg_run, fg_run, ...]`
163    pub counts: Vec<u32>,
164    /// Image size as `[height, width]` (NOT `[width, height]`!)
165    pub size: [u32; 2],
166}
167
168/// Compressed RLE segmentation (LEB128 encoded).
169///
170/// Used by pycocotools for more compact storage.
171#[derive(Debug, Clone, Serialize, Deserialize)]
172pub struct CocoCompressedRle {
173    /// LEB128-encoded counts string.
174    pub counts: String,
175    /// Image size as `[height, width]`.
176    pub size: [u32; 2],
177}
178
179/// Lookup tables for efficient COCO data access.
180///
181/// Builds indexes from a `CocoDataset` for O(1) lookups.
182#[derive(Debug, Clone)]
183pub struct CocoIndex {
184    /// `image_id` → `CocoImage`
185    pub images: HashMap<u64, CocoImage>,
186    /// `category_id` → `CocoCategory`
187    pub categories: HashMap<u32, CocoCategory>,
188    /// `category_id` → `label_index` (0-based, alphabetical order by name)
189    pub label_indices: HashMap<u32, u64>,
190    /// `image_id` → `Vec<CocoAnnotation>`
191    pub annotations_by_image: HashMap<u64, Vec<CocoAnnotation>>,
192}
193
194impl CocoIndex {
195    /// Build lookup index from a `CocoDataset`.
196    ///
197    /// Creates efficient lookup tables for accessing images, categories,
198    /// and annotations by their IDs.
199    pub fn from_dataset(dataset: &CocoDataset) -> Self {
200        let images: HashMap<_, _> = dataset
201            .images
202            .iter()
203            .map(|img| (img.id, img.clone()))
204            .collect();
205
206        let categories: HashMap<_, _> = dataset
207            .categories
208            .iter()
209            .map(|cat| (cat.id, cat.clone()))
210            .collect();
211
212        // Build alphabetically-sorted label indices for consistent ordering
213        let mut category_names: Vec<_> = dataset
214            .categories
215            .iter()
216            .map(|c| (c.id, c.name.clone()))
217            .collect();
218        category_names.sort_by(|a, b| a.1.cmp(&b.1));
219        let label_indices: HashMap<_, _> = category_names
220            .iter()
221            .enumerate()
222            .map(|(idx, (cat_id, _))| (*cat_id, idx as u64))
223            .collect();
224
225        let mut annotations_by_image: HashMap<u64, Vec<CocoAnnotation>> = HashMap::new();
226        for ann in &dataset.annotations {
227            annotations_by_image
228                .entry(ann.image_id)
229                .or_default()
230                .push(ann.clone());
231        }
232
233        Self {
234            images,
235            categories,
236            label_indices,
237            annotations_by_image,
238        }
239    }
240
241    /// Get the label name for a category ID.
242    pub fn label_name(&self, category_id: u32) -> Option<&str> {
243        self.categories.get(&category_id).map(|c| c.name.as_str())
244    }
245
246    /// Get the label index for a category ID.
247    pub fn label_index(&self, category_id: u32) -> Option<u64> {
248        self.label_indices.get(&category_id).copied()
249    }
250
251    /// Get annotations for an image.
252    pub fn annotations_for_image(&self, image_id: u64) -> &[CocoAnnotation] {
253        self.annotations_by_image
254            .get(&image_id)
255            .map(|v| v.as_slice())
256            .unwrap_or(&[])
257    }
258}
259
260#[cfg(test)]
261mod tests {
262    use super::*;
263
264    #[test]
265    fn test_coco_dataset_default() {
266        let dataset = CocoDataset::default();
267        assert!(dataset.images.is_empty());
268        assert!(dataset.annotations.is_empty());
269        assert!(dataset.categories.is_empty());
270    }
271
272    #[test]
273    fn test_coco_index_from_dataset() {
274        let dataset = CocoDataset {
275            images: vec![
276                CocoImage {
277                    id: 1,
278                    width: 640,
279                    height: 480,
280                    file_name: "image1.jpg".to_string(),
281                    ..Default::default()
282                },
283                CocoImage {
284                    id: 2,
285                    width: 800,
286                    height: 600,
287                    file_name: "image2.jpg".to_string(),
288                    ..Default::default()
289                },
290            ],
291            categories: vec![
292                CocoCategory {
293                    id: 1,
294                    name: "person".to_string(),
295                    supercategory: Some("human".to_string()),
296                },
297                CocoCategory {
298                    id: 2,
299                    name: "car".to_string(),
300                    supercategory: Some("vehicle".to_string()),
301                },
302            ],
303            annotations: vec![
304                CocoAnnotation {
305                    id: 100,
306                    image_id: 1,
307                    category_id: 1,
308                    bbox: [10.0, 20.0, 100.0, 200.0],
309                    area: 20000.0,
310                    iscrowd: 0,
311                    segmentation: None,
312                },
313                CocoAnnotation {
314                    id: 101,
315                    image_id: 1,
316                    category_id: 2,
317                    bbox: [50.0, 60.0, 150.0, 100.0],
318                    area: 15000.0,
319                    iscrowd: 0,
320                    segmentation: None,
321                },
322            ],
323            ..Default::default()
324        };
325
326        let index = CocoIndex::from_dataset(&dataset);
327
328        // Check images lookup
329        assert_eq!(index.images.len(), 2);
330        assert_eq!(index.images.get(&1).unwrap().file_name, "image1.jpg");
331
332        // Check categories lookup
333        assert_eq!(index.categories.len(), 2);
334        assert_eq!(index.label_name(1), Some("person"));
335        assert_eq!(index.label_name(2), Some("car"));
336
337        // Check alphabetical label indices (car=0, person=1)
338        assert_eq!(index.label_index(2), Some(0)); // car
339        assert_eq!(index.label_index(1), Some(1)); // person
340
341        // Check annotations by image
342        let anns = index.annotations_for_image(1);
343        assert_eq!(anns.len(), 2);
344
345        let anns = index.annotations_for_image(2);
346        assert!(anns.is_empty());
347    }
348
349    #[test]
350    fn test_coco_segmentation_polygon_deserialize() {
351        let json = r#"[[100.0, 200.0, 150.0, 250.0, 100.0, 250.0]]"#;
352        let seg: CocoSegmentation = serde_json::from_str(json).unwrap();
353
354        match seg {
355            CocoSegmentation::Polygon(polys) => {
356                assert_eq!(polys.len(), 1);
357                assert_eq!(polys[0].len(), 6);
358            }
359            _ => panic!("Expected polygon segmentation"),
360        }
361    }
362
363    #[test]
364    fn test_coco_segmentation_rle_deserialize() {
365        let json = r#"{"counts": [10, 20, 30, 40], "size": [100, 200]}"#;
366        let seg: CocoSegmentation = serde_json::from_str(json).unwrap();
367
368        match seg {
369            CocoSegmentation::Rle(rle) => {
370                assert_eq!(rle.counts, vec![10, 20, 30, 40]);
371                assert_eq!(rle.size, [100, 200]);
372            }
373            _ => panic!("Expected RLE segmentation"),
374        }
375    }
376
377    #[test]
378    fn test_coco_annotation_roundtrip() {
379        let ann = CocoAnnotation {
380            id: 12345,
381            image_id: 67890,
382            category_id: 1,
383            bbox: [100.5, 200.5, 50.0, 80.0],
384            area: 4000.0,
385            iscrowd: 0,
386            segmentation: Some(CocoSegmentation::Polygon(vec![vec![
387                100.0, 200.0, 150.0, 200.0, 150.0, 280.0, 100.0, 280.0,
388            ]])),
389        };
390
391        let json = serde_json::to_string(&ann).unwrap();
392        let restored: CocoAnnotation = serde_json::from_str(&json).unwrap();
393
394        assert_eq!(restored.id, ann.id);
395        assert_eq!(restored.image_id, ann.image_id);
396        assert_eq!(restored.category_id, ann.category_id);
397        assert_eq!(restored.bbox, ann.bbox);
398    }
399}