panlabel 0.7.0

The universal annotation converter
Documentation
//! VGG Image Annotator CSV bbox-only adapter.

use std::collections::BTreeMap;
use std::fs::File;
use std::io::{BufReader, BufWriter, Write};
use std::path::Path;

use serde_json::{json, Value};

use super::io_bbox_adapters_common::{
    dataset_from_raw, image_dimensions_if_found, scalar_to_string, RawAnn, RawImage,
};
use super::model::{Dataset, DatasetInfo};
use super::BBoxXYXY;
use crate::error::PanlabelError;

const HEADER: [&str; 7] = [
    "filename",
    "file_size",
    "file_attributes",
    "region_count",
    "region_id",
    "region_shape_attributes",
    "region_attributes",
];

pub fn read_via_csv(path: &Path) -> Result<Dataset, PanlabelError> {
    let file = File::open(path).map_err(PanlabelError::Io)?;
    let mut rdr = csv::ReaderBuilder::new()
        .has_headers(false)
        .from_reader(BufReader::new(file));
    let base = path.parent().unwrap_or_else(|| Path::new("."));
    let mut images_by_name: BTreeMap<String, RawImage> = BTreeMap::new();
    let mut anns = Vec::new();
    let mut skipped = 0usize;
    for (idx, result) in rdr.records().enumerate() {
        let row_num = idx + 1;
        let rec = result.map_err(|source| PanlabelError::ViaCsvParse {
            path: path.to_path_buf(),
            source,
        })?;
        if row_num == 1 && is_via_csv_header(&rec) {
            continue;
        }
        if rec.len() != 7 {
            return Err(PanlabelError::ViaCsvInvalid {
                path: path.to_path_buf(),
                message: format!("row {row_num}: expected 7 columns, got {}", rec.len()),
            });
        }
        let filename = rec.get(0).unwrap_or("").to_string();
        if filename.is_empty() {
            continue;
        }
        let mut attrs = BTreeMap::new();
        if let Some(size) = rec.get(1).filter(|s| !s.is_empty()) {
            attrs.insert("via_csv_size_bytes".into(), size.to_string());
        }
        if let Ok(file_attrs) = serde_json::from_str::<Value>(rec.get(2).unwrap_or("{}")) {
            if let Some(obj) = file_attrs.as_object() {
                for (k, v) in obj {
                    if let Some(s) = scalar_to_string(v) {
                        attrs.insert(format!("via_csv_file_attr_{k}"), s);
                    }
                }
            }
        }
        images_by_name.entry(filename.clone()).or_insert_with(|| {
            let dims = image_dimensions_if_found(base, &filename).unwrap_or((1, 1));
            RawImage {
                file_name: filename.clone(),
                width: dims.0,
                height: dims.1,
                attributes: attrs,
            }
        });
        let shape_str = rec.get(5).unwrap_or("");
        if shape_str.trim().is_empty() {
            continue;
        }
        let shape: Value =
            serde_json::from_str(shape_str).map_err(|e| PanlabelError::ViaCsvInvalid {
                path: path.to_path_buf(),
                message: format!("row {row_num}: invalid region_shape_attributes JSON: {e}"),
            })?;
        if shape.get("name").and_then(Value::as_str) != Some("rect") {
            skipped += 1;
            continue;
        }
        let x = shape.get("x").and_then(Value::as_f64).unwrap_or(0.0);
        let y = shape.get("y").and_then(Value::as_f64).unwrap_or(0.0);
        let w = shape.get("width").and_then(Value::as_f64).unwrap_or(0.0);
        let h = shape.get("height").and_then(Value::as_f64).unwrap_or(0.0);
        let region_attrs: Value =
            serde_json::from_str(rec.get(6).unwrap_or("{}")).map_err(|e| {
                PanlabelError::ViaCsvInvalid {
                    path: path.to_path_buf(),
                    message: format!("row {row_num}: invalid region_attributes JSON: {e}"),
                }
            })?;
        let label = resolve_label(&region_attrs);
        let mut ann_attrs = BTreeMap::new();
        if let Some(obj) = region_attrs.as_object() {
            for (k, v) in obj {
                if let Some(s) = scalar_to_string(v) {
                    ann_attrs.insert(format!("via_csv_region_attr_{k}"), s);
                }
            }
        }
        anns.push(RawAnn {
            image: filename,
            category: label,
            bbox: BBoxXYXY::from_xywh(x, y, w, h),
            confidence: None,
            attributes: ann_attrs,
        });
    }
    let mut info = DatasetInfo::default();
    if skipped > 0 {
        info.attributes.insert(
            "via_csv_non_rect_regions_skipped".into(),
            skipped.to_string(),
        );
    }
    Ok(dataset_from_raw(
        images_by_name.into_values().collect(),
        anns,
        vec![],
        info,
    ))
}

pub fn write_via_csv(path: &Path, dataset: &Dataset) -> Result<(), PanlabelError> {
    let csv_string = to_via_csv_string(dataset)?;
    let file = File::create(path).map_err(PanlabelError::Io)?;
    let mut writer = BufWriter::new(file);
    writer
        .write_all(csv_string.as_bytes())
        .map_err(PanlabelError::Io)?;
    writer.flush().map_err(PanlabelError::Io)
}

pub fn to_via_csv_string(dataset: &Dataset) -> Result<String, PanlabelError> {
    let cat_lookup: BTreeMap<_, _> = dataset.categories.iter().map(|c| (c.id, c)).collect();
    let anns_by_image = super::io_bbox_adapters_common::annotations_by_image(dataset);
    let mut images: Vec<_> = dataset.images.iter().collect();
    images.sort_by(|a, b| a.file_name.cmp(&b.file_name));
    let mut wtr = csv::WriterBuilder::new()
        .has_headers(false)
        .from_writer(Vec::new());
    wtr.write_record(HEADER)
        .map_err(|source| PanlabelError::ViaCsvWrite {
            path: "<string>".into(),
            source,
        })?;
    for img in images {
        let anns = anns_by_image.get(&img.id).cloned().unwrap_or_default();
        if anns.is_empty() {
            wtr.write_record([&img.file_name, "", "{}", "0", "", "", ""])
                .map_err(|source| PanlabelError::ViaCsvWrite {
                    path: "<string>".into(),
                    source,
                })?;
        } else {
            let count = anns.len().to_string();
            for (idx, ann) in anns.iter().enumerate() {
                let (x, y, bw, bh) = ann.bbox.to_xywh();
                let shape = json!({"name":"rect","x":x,"y":y,"width":bw,"height":bh}).to_string();
                let attrs = json!({"label": cat_lookup.get(&ann.category_id).map(|c| c.name.as_str()).unwrap_or("object")}).to_string();
                wtr.write_record([
                    &img.file_name,
                    "",
                    "{}",
                    &count,
                    &idx.to_string(),
                    &shape,
                    &attrs,
                ])
                .map_err(|source| PanlabelError::ViaCsvWrite {
                    path: "<string>".into(),
                    source,
                })?;
            }
        }
    }
    let bytes = wtr
        .into_inner()
        .map_err(|e| PanlabelError::Io(e.into_error()))?;
    String::from_utf8(bytes).map_err(|e| PanlabelError::ViaCsvInvalid {
        path: "<string>".into(),
        message: e.to_string(),
    })
}

pub(crate) fn is_via_csv_header(record: &csv::StringRecord) -> bool {
    record.len() == 7
        && HEADER
            .iter()
            .enumerate()
            .all(|(i, h)| record.get(i).is_some_and(|c| c.eq_ignore_ascii_case(h)))
}

fn resolve_label(attrs: &Value) -> String {
    let Some(obj) = attrs.as_object() else {
        return "object".into();
    };
    for key in ["label", "class"] {
        if let Some(s) = obj
            .get(key)
            .and_then(Value::as_str)
            .filter(|s| !s.is_empty())
        {
            return s.to_string();
        }
    }
    let scalars: Vec<String> = obj.values().filter_map(scalar_to_string).collect();
    if scalars.len() == 1 && !scalars[0].is_empty() {
        scalars[0].clone()
    } else {
        "object".into()
    }
}