semantic-scene 0.1.1

Rust parser for semantic scene descriptors, currently focused on Habitat-Sim Matterport3D .house files.
Documentation
//! MP3D dataset support.
use std::{
    collections::{HashMap, HashSet},
    io::BufRead,
};

use crate::{
    Dataset, LoadError, Rotation3, SemanticLevel, SemanticObject, SemanticRegion, SemanticScene,
    scene::ElementKind,
};

mod category;
pub(crate) mod parser;
pub(crate) mod raw;

pub use category::{Mp3dCategoryMapping, Mp3dObjectCategory, Mp3dRegionCategory};
use raw::Mp3dRecord;

/// Loader for Habitat-Sim `Matterport3D` `.house` semantic descriptors.
pub struct Mp3d;

type Mp3dScene = SemanticScene<Mp3dObjectCategory, Mp3dRegionCategory>;

/// Options for loading `Matterport3D` `.house` descriptors.
#[derive(Debug, Clone)]
pub struct Mp3dOptions {
    /// Rotation to apply while loading.
    ///
    /// Defaults to [`Rotation3::HABITAT_MP3D`] to match Habitat-Sim's MP3D
    /// loader. Use [`Rotation3::IDENTITY`] to keep raw MP3D coordinates.
    pub rotation: Rotation3,
}

impl Default for Mp3dOptions {
    fn default() -> Self {
        Self {
            rotation: Rotation3::HABITAT_MP3D,
        }
    }
}

impl Dataset for Mp3d {
    type Options = Mp3dOptions;
    type Error = LoadError;
    type ObjectCategory = Mp3dObjectCategory;
    type RegionCategory = Mp3dRegionCategory;

    fn from_reader<R: BufRead>(
        mut reader: R,
        options: Self::Options,
    ) -> Result<Mp3dScene, Self::Error> {
        let mut header = String::new();
        let bytes = reader.read_line(&mut header)?;
        if bytes == 0 {
            return Err(LoadError::BadHeader {
                found: "<empty file>".to_string(),
            });
        }
        let header = header.trim_end_matches(['\r', '\n']);
        if header != "ASCII 1.1" {
            return Err(LoadError::BadHeader {
                found: header.to_string(),
            });
        }

        let mut scene_header = None;
        let mut levels = Vec::<raw::LevelRecord>::new();
        let mut regions = Vec::<(raw::RegionRecord, usize)>::new();
        let mut categories = HashMap::<i32, Mp3dObjectCategory>::new();
        let mut objects = Vec::<(raw::ObjectRecord, usize)>::new();
        let mut segments = HashMap::<i32, (i32, usize)>::new();

        for (offset, line) in reader.lines().enumerate() {
            let line_number = offset + 2;
            let line = line?;
            if line.trim().is_empty() {
                continue;
            }

            let record = parser::parse_record(&line).map_err(|source| LoadError::ParseLine {
                line_number,
                line: line.clone(),
                source,
            })?;
            match record {
                Mp3dRecord::House(record) => {
                    scene_header = Some(record);
                }
                Mp3dRecord::Level(record) => levels.push(record),
                Mp3dRecord::Region(record) => regions.push((record, line_number)),
                Mp3dRecord::Category(record) => {
                    categories.insert(
                        record.index,
                        Mp3dObjectCategory::new(
                            record.index,
                            record.raw_index,
                            record.raw_name,
                            record.mpcat40_index,
                            record.mpcat40_name,
                        ),
                    );
                }
                Mp3dRecord::Object(record) => objects.push((record, line_number)),
                Mp3dRecord::Segment(record) => {
                    if segments
                        .insert(record.segment_id, (record.object_index, line_number))
                        .is_some()
                    {
                        return Err(LoadError::DuplicateSegmentId {
                            line_number,
                            segment_id: record.segment_id,
                        });
                    }
                }
                Mp3dRecord::Ignored => {}
            }
        }

        build_scene(
            scene_header,
            levels,
            regions,
            &categories,
            objects,
            &segments,
            options.rotation,
        )
    }
}

fn build_scene(
    scene_header: Option<raw::HouseRecord>,
    levels: Vec<raw::LevelRecord>,
    regions: Vec<(raw::RegionRecord, usize)>,
    categories: &HashMap<i32, Mp3dObjectCategory>,
    objects: Vec<(raw::ObjectRecord, usize)>,
    segments: &HashMap<i32, (i32, usize)>,
    rotation: Rotation3,
) -> Result<Mp3dScene, LoadError> {
    let scene_header = scene_header.ok_or_else(|| LoadError::BadHeader {
        found: "<missing house record>".to_string(),
    })?;
    validate_segments(&objects, segments)?;

    let region_indices = source_indices(&regions);
    let objects_by_region =
        build_objects_by_region(objects, categories, &region_indices, rotation)?;

    let level_indices = levels
        .iter()
        .map(|record| record.index)
        .collect::<HashSet<_>>();
    let regions_by_level =
        build_regions_by_level(regions, objects_by_region, &level_indices, rotation)?;
    let levels = build_levels(levels, regions_by_level, rotation);

    let counts = house_counts(&scene_header);
    Ok(SemanticScene::from_parts(
        scene_header.name,
        scene_header.label,
        counts,
        Some(scene_header.aabb.rotated(rotation)),
        levels,
    ))
}

fn validate_segments(
    objects: &[(raw::ObjectRecord, usize)],
    segments: &HashMap<i32, (i32, usize)>,
) -> Result<(), LoadError> {
    let object_indices = objects
        .iter()
        .map(|(record, _)| record.index)
        .collect::<HashSet<_>>();
    for (object_index, line_number) in segments.values().copied() {
        if !object_indices.contains(&object_index) {
            return Err(LoadError::MissingParent {
                line_number,
                kind: "object",
                index: object_index,
            });
        }
    }
    Ok(())
}

fn source_indices(records: &[(raw::RegionRecord, usize)]) -> HashSet<i32> {
    records.iter().map(|(record, _)| record.index).collect()
}

fn build_objects_by_region(
    objects: Vec<(raw::ObjectRecord, usize)>,
    categories: &HashMap<i32, Mp3dObjectCategory>,
    region_indices: &HashSet<i32>,
    rotation: Rotation3,
) -> Result<HashMap<i32, Vec<SemanticObject<Mp3dObjectCategory>>>, LoadError> {
    let mut objects_by_region = HashMap::<i32, Vec<SemanticObject<Mp3dObjectCategory>>>::new();
    for (record, line_number) in objects {
        assert!(
            record.region_index >= 0,
            "orphan MP3D object {} on line {}",
            record.index,
            line_number
        );
        if !region_indices.contains(&record.region_index) {
            return Err(LoadError::MissingParent {
                line_number,
                kind: "region",
                index: record.region_index,
            });
        }
        let category = if record.category_index < 0 {
            None
        } else {
            Some(categories.get(&record.category_index).cloned().ok_or(
                LoadError::MissingCategory {
                    line_number,
                    index: record.category_index,
                },
            )?)
        };
        objects_by_region
            .entry(record.region_index)
            .or_default()
            .push(SemanticObject::new(
                record.index,
                category,
                record.obb.rotated(rotation),
            ));
    }
    Ok(objects_by_region)
}

fn build_regions_by_level(
    regions: Vec<(raw::RegionRecord, usize)>,
    mut objects_by_region: HashMap<i32, Vec<SemanticObject<Mp3dObjectCategory>>>,
    level_indices: &HashSet<i32>,
    rotation: Rotation3,
) -> Result<HashMap<i32, Vec<SemanticRegion<Mp3dObjectCategory, Mp3dRegionCategory>>>, LoadError> {
    let mut regions_by_level =
        HashMap::<i32, Vec<SemanticRegion<Mp3dObjectCategory, Mp3dRegionCategory>>>::new();
    for (record, line_number) in regions {
        assert!(
            record.level_index >= 0,
            "orphan MP3D region {} on line {}",
            record.index,
            line_number
        );
        if !level_indices.contains(&record.level_index) {
            return Err(LoadError::MissingParent {
                line_number,
                kind: "level",
                index: record.level_index,
            });
        }
        let objects = objects_by_region.remove(&record.index).unwrap_or_default();
        regions_by_level
            .entry(record.level_index)
            .or_default()
            .push(SemanticRegion::new(
                record.index,
                Mp3dRegionCategory::new(record.category_code),
                rotation.transform_vector(record.position),
                record.aabb.rotated(rotation),
                objects,
            ));
    }
    Ok(regions_by_level)
}

fn build_levels(
    levels: Vec<raw::LevelRecord>,
    mut regions_by_level: HashMap<i32, Vec<SemanticRegion<Mp3dObjectCategory, Mp3dRegionCategory>>>,
    rotation: Rotation3,
) -> Vec<SemanticLevel<Mp3dObjectCategory, Mp3dRegionCategory>> {
    levels
        .into_iter()
        .map(|record| {
            let regions = regions_by_level.remove(&record.index).unwrap_or_default();
            SemanticLevel::new(
                record.index,
                record.label,
                rotation.transform_vector(record.position),
                record.aabb.rotated(rotation),
                regions,
            )
        })
        .collect()
}

fn house_counts(record: &raw::HouseRecord) -> HashMap<&'static str, usize> {
    HashMap::from([
        (ElementKind::Images.as_str(), record.images),
        (ElementKind::Panoramas.as_str(), record.panoramas),
        (ElementKind::Vertices.as_str(), record.vertices),
        (ElementKind::Surfaces.as_str(), record.surfaces),
        (ElementKind::Segments.as_str(), record.segments),
        (ElementKind::Objects.as_str(), record.objects),
        (ElementKind::Categories.as_str(), record.categories),
        (ElementKind::Regions.as_str(), record.regions),
        (ElementKind::Portals.as_str(), record.portals),
        (ElementKind::Levels.as_str(), record.levels),
    ])
}