umls 0.1.0

A library and command line tool for working with the UMLS Metathesaurus
Documentation
use eyre::Result;
use fst::{IntoStreamer, Streamer};
use regex_automata::dense;
use std::{
    io::{BufRead, Read},
    path::Path,
};

pub mod build;

pub struct Searcher {
    pub concepts: Vec<String>,
    pub index: fst::Map<Vec<u8>>,
}

const STRINGS_FST_NAME: &str = "umls_search.strings.fst";
const CONCEPTS_LST_NAME: &str = "umls_search.concepts.lst";

impl Searcher {
    pub fn new(base_dir: &Path) -> Result<Searcher> {
        let concepts_lst_path = base_dir.join(CONCEPTS_LST_NAME);

        let concepts_file = std::fs::File::open(concepts_lst_path)?;
        let concepts_reader = std::io::BufReader::new(concepts_file);
        let concepts = concepts_reader.lines().collect::<Result<Vec<_>, _>>()?;

        let strings_fst_path = base_dir.join(STRINGS_FST_NAME);
        let mut strings = std::fs::File::open(strings_fst_path)?;
        let mut fst_contents = Vec::new();
        strings.read_to_end(&mut fst_contents)?;

        let index = fst::Map::new(fst_contents)?;

        Ok(Self { concepts, index })
    }

    pub fn search(&self, word: &str) -> Result<Option<u64>> {
        let pattern = format!("(?i){}", word);
        let dfa = dense::Builder::new().anchored(true).build(&pattern)?;
        let result = self.index.search(&dfa).into_stream().next().map(|i| i.1);
        Ok(result)
    }

    pub fn fuzzy_search(
        &self,
        word: &str,
        levenshtein: u32,
    ) -> Result<fst::map::StreamWithState<'_, fst::automaton::Levenshtein>> {
        let auto = fst::automaton::Levenshtein::new_with_limit(word, levenshtein, 1_000_000)?;
        Ok(self.index.search_with_state(auto).into_stream())
    }
}