panlabel 0.7.0

The universal annotation converter
Documentation
//! OIDv4 Toolkit-style TXT adapter.

use std::collections::{BTreeMap, BTreeSet};
use std::fs::{self, File};
use std::io::{BufRead, BufReader, BufWriter, Write};
use std::path::{Path, PathBuf};

use super::io_adapter_common::write_images_readme;
use super::io_bbox_adapters_common::{
    dataset_from_raw, image_dimensions_or_error, RawAnn, RawImage,
};
use super::model::{Dataset, DatasetInfo};
use super::BBoxXYXY;
use crate::error::PanlabelError;

pub fn read_oidv4_txt(path: &Path) -> Result<Dataset, PanlabelError> {
    if path.is_dir() {
        read_oidv4_dir(path)
    } else {
        read_oidv4_file(path)
    }
}

pub fn write_oidv4_txt(path: &Path, dataset: &Dataset) -> Result<(), PanlabelError> {
    if path.extension().is_some() {
        write_oidv4_single_file(path, dataset)
    } else {
        write_oidv4_dir(path, dataset)
    }
}

pub(crate) fn looks_like_oidv4_txt_file(path: &Path) -> Result<bool, PanlabelError> {
    let file = File::open(path).map_err(PanlabelError::Io)?;
    for line in BufReader::new(file).lines().take(8) {
        let line = line.map_err(PanlabelError::Io)?;
        if line.trim().is_empty() {
            continue;
        }
        let parts: Vec<&str> = line.split_whitespace().collect();
        return Ok(parts.len() == 5 && parts[1..].iter().all(|p| p.parse::<f64>().is_ok()));
    }
    Ok(true)
}

pub(crate) fn dir_has_oidv4_label_files(path: &Path) -> Result<bool, PanlabelError> {
    for entry in walkdir::WalkDir::new(path).follow_links(true) {
        let entry = entry.map_err(|source| PanlabelError::FormatDetectionFailed {
            path: path.to_path_buf(),
            reason: source.to_string(),
        })?;
        if entry.file_type().is_file()
            && entry
                .path()
                .extension()
                .and_then(|e| e.to_str())
                .is_some_and(|e| e.eq_ignore_ascii_case("txt"))
            && entry
                .path()
                .parent()
                .and_then(|p| p.file_name())
                .and_then(|n| n.to_str())
                == Some("Label")
        {
            return Ok(true);
        }
    }
    Ok(false)
}

fn read_oidv4_dir(path: &Path) -> Result<Dataset, PanlabelError> {
    let mut label_files = Vec::new();
    for entry in walkdir::WalkDir::new(path).follow_links(true) {
        let entry = entry.map_err(|source| PanlabelError::FormatDetectionFailed {
            path: path.to_path_buf(),
            reason: source.to_string(),
        })?;
        if entry.file_type().is_file()
            && entry
                .path()
                .extension()
                .and_then(|e| e.to_str())
                .is_some_and(|e| e.eq_ignore_ascii_case("txt"))
            && entry
                .path()
                .parent()
                .and_then(|p| p.file_name())
                .and_then(|n| n.to_str())
                == Some("Label")
        {
            label_files.push(entry.path().to_path_buf());
        }
    }
    label_files.sort();
    oidv4_files_to_ir(path, label_files)
}

fn read_oidv4_file(path: &Path) -> Result<Dataset, PanlabelError> {
    oidv4_files_to_ir(
        path.parent().unwrap_or_else(|| Path::new(".")),
        vec![path.to_path_buf()],
    )
}

fn oidv4_files_to_ir(root: &Path, files: Vec<PathBuf>) -> Result<Dataset, PanlabelError> {
    let mut images = Vec::new();
    let mut anns = Vec::new();
    let mut seen = BTreeSet::new();
    for label_path in files {
        let stem = label_path
            .file_stem()
            .and_then(|s| s.to_str())
            .unwrap_or("image")
            .to_string();
        let image_name = resolve_image_name(root, &label_path, &stem);
        if seen.insert(image_name.clone()) {
            let base = label_path.parent().and_then(|p| p.parent()).unwrap_or(root);
            let (width, height) = image_dimensions_or_error(
                base,
                &image_name,
                || PanlabelError::Oidv4ImageNotFound {
                    path: label_path.clone(),
                    image_ref: image_name.clone(),
                },
                |p, source| PanlabelError::Oidv4ImageDimensionRead { path: p, source },
            )?;
            images.push(RawImage {
                file_name: image_name.clone(),
                width,
                height,
                attributes: BTreeMap::new(),
            });
        }
        let file = File::open(&label_path).map_err(PanlabelError::Io)?;
        for (idx, line) in BufReader::new(file).lines().enumerate() {
            let line = line.map_err(PanlabelError::Io)?;
            if line.trim().is_empty() {
                continue;
            }
            let parts: Vec<&str> = line.split_whitespace().collect();
            if parts.len() != 5 {
                return Err(PanlabelError::Oidv4TxtParse {
                    path: label_path.clone(),
                    line: idx + 1,
                    message: "expected class xmin ymin xmax ymax".into(),
                });
            }
            let parse = |i: usize| {
                parts[i]
                    .parse::<f64>()
                    .map_err(|_| PanlabelError::Oidv4TxtParse {
                        path: label_path.clone(),
                        line: idx + 1,
                        message: format!("invalid numeric field {}", i + 1),
                    })
            };
            anns.push(RawAnn {
                image: image_name.clone(),
                category: parts[0].to_string(),
                bbox: BBoxXYXY::from_xyxy(parse(1)?, parse(2)?, parse(3)?, parse(4)?),
                confidence: None,
                attributes: BTreeMap::new(),
            });
        }
    }
    Ok(dataset_from_raw(
        images,
        anns,
        vec![],
        DatasetInfo::default(),
    ))
}

fn resolve_image_name(root: &Path, label_path: &Path, stem: &str) -> String {
    let base = label_path.parent().and_then(|p| p.parent()).unwrap_or(root);
    for ext in super::io_bbox_adapters_common::IMAGE_EXTENSIONS {
        let name = format!("{stem}{ext}");
        if base.join(&name).is_file() || root.join("images").join(&name).is_file() {
            return name;
        }
    }
    format!("{stem}.jpg")
}

fn write_oidv4_dir(path: &Path, dataset: &Dataset) -> Result<(), PanlabelError> {
    let labels = path.join("Label");
    fs::create_dir_all(&labels).map_err(PanlabelError::Io)?;
    write_images_readme(path, "PanLabel does not copy image binaries.\n")?;
    let cat_lookup: BTreeMap<_, _> = dataset.categories.iter().map(|c| (c.id, c)).collect();
    let anns_by_image = super::io_bbox_adapters_common::annotations_by_image(dataset);
    for img in &dataset.images {
        let stem = Path::new(&img.file_name)
            .file_stem()
            .and_then(|s| s.to_str())
            .unwrap_or(&img.file_name);
        let mut out = String::new();
        for ann in anns_by_image
            .get(&img.id)
            .into_iter()
            .flat_map(|v| v.iter())
        {
            out.push_str(&format!(
                "{} {} {} {} {}\n",
                cat_lookup
                    .get(&ann.category_id)
                    .map(|c| c.name.as_str())
                    .unwrap_or("object"),
                ann.bbox.xmin(),
                ann.bbox.ymin(),
                ann.bbox.xmax(),
                ann.bbox.ymax()
            ));
        }
        fs::write(labels.join(format!("{stem}.txt")), out).map_err(PanlabelError::Io)?;
    }
    Ok(())
}

fn write_oidv4_single_file(path: &Path, dataset: &Dataset) -> Result<(), PanlabelError> {
    let cat_lookup: BTreeMap<_, _> = dataset.categories.iter().map(|c| (c.id, c)).collect();
    let mut anns: Vec<_> = dataset.annotations.iter().collect();
    anns.sort_by_key(|a| a.id);
    let file = File::create(path).map_err(PanlabelError::Io)?;
    let mut w = BufWriter::new(file);
    for ann in anns {
        writeln!(
            w,
            "{} {} {} {} {}",
            cat_lookup
                .get(&ann.category_id)
                .map(|c| c.name.as_str())
                .unwrap_or("object"),
            ann.bbox.xmin(),
            ann.bbox.ymin(),
            ann.bbox.xmax(),
            ann.bbox.ymax()
        )
        .map_err(PanlabelError::Io)?;
    }
    Ok(())
}