umls 0.1.0

A library and command line tool for working with the UMLS Metathesaurus
Documentation
mod file_iterator;
mod find_files;
mod schema;

use std::path::{Path, PathBuf};

use ahash::{HashMap, HashMapExt};
use eyre::Result;

pub use schema::*;

use self::{file_iterator::File, find_files::find_data_files};

#[derive(Clone, Default)]
struct FileMetadata {
    locations: Vec<PathBuf>,
    columns: Vec<String>,
}

pub struct Files {
    files: HashMap<String, FileMetadata>,
}

impl Files {
    pub fn new(dir: &Path) -> Result<Self> {
        let dir = find_data_files(dir)?;

        let mut files = HashMap::new();

        let entries = std::fs::read_dir(&dir)?;
        for file in entries {
            let file = file?;
            if !file.metadata()?.is_file() {
                continue;
            }

            let name = file.file_name();
            let base_name = name
                .to_string_lossy()
                .split('.')
                .next()
                .unwrap_or_default()
                .to_string();

            files
                .entry(base_name)
                .or_insert_with(FileMetadata::default)
                .locations
                .push(file.path());
        }

        // read_dir may not return the files in order, so sort them.
        for (_, file) in files.iter_mut() {
            file.locations.sort_unstable();
        }

        let mut slf = Self { files };
        slf.init_file_columns()?;

        Ok(slf)
    }

    pub fn get_file_stream(&self, filename: &str) -> Result<File> {
        let locations = self
            .files
            .get(filename)
            .ok_or_else(|| eyre::eyre!("No file named {}", filename,))?;

        File::new(locations)
    }

    fn init_file_columns(&mut self) -> Result<()> {
        let mut mrfiles = self.get_file_stream("MRFILES")?;
        for line in mrfiles.reader.records() {
            let line = line?;
            let filename = line.get(0).unwrap_or_default();
            let basename = filename.split('.').next().unwrap_or_default();
            let columns = line.get(2).unwrap_or_default();

            let columns = columns
                .split(',')
                .map(|s| s.to_string())
                .collect::<Vec<_>>();

            if let Some(f) = self.files.get_mut(basename) {
                f.columns = columns;
            }
        }

        Ok(())
    }
}