umls 0.1.3

A library and command line tool for working with the UMLS Metathesaurus
Documentation
use stringmetrics::jaccard;

pub fn jaccard_trigram_distance(one: &str, two: &str) -> f32 {
    jaccard(TrigramIterator::new(one), TrigramIterator::new(two))
}

enum TrigramIteratorState {
    FirstOneGram,
    FirstTwoGram,
    Trigram(usize),
    LastTwoGram,
    LastOneGram,
    Done,
}

pub struct TrigramIterator<'a> {
    word: &'a str,
    state: TrigramIteratorState,
}

impl<'a> TrigramIterator<'a> {
    pub fn new(word: &str) -> TrigramIterator {
        TrigramIterator {
            word,
            state: TrigramIteratorState::FirstOneGram,
        }
    }
}

impl<'a> Iterator for TrigramIterator<'a> {
    type Item = &'a str;

    fn next(&mut self) -> Option<Self::Item> {
        let (next_state, result) = match self.state {
            TrigramIteratorState::FirstOneGram => {
                let next_state = if self.word.len() > 1 {
                    TrigramIteratorState::FirstTwoGram
                } else {
                    TrigramIteratorState::Done
                };
                (next_state, &self.word[0..1])
            }
            TrigramIteratorState::FirstTwoGram => {
                let next_state = if self.word.len() > 2 {
                    TrigramIteratorState::Trigram(0)
                } else {
                    TrigramIteratorState::LastOneGram
                };
                (next_state, &self.word[0..2])
            }
            TrigramIteratorState::Trigram(index) => {
                let next_state = if index + 3 >= self.word.len() {
                    TrigramIteratorState::LastTwoGram
                } else {
                    TrigramIteratorState::Trigram(index + 1)
                };

                (next_state, &self.word[index..index + 3])
            }

            TrigramIteratorState::LastTwoGram => (
                TrigramIteratorState::LastOneGram,
                &self.word[self.word.len() - 2..],
            ),
            TrigramIteratorState::LastOneGram => (
                TrigramIteratorState::Done,
                &self.word[self.word.len() - 1..],
            ),
            TrigramIteratorState::Done => return None,
        };

        self.state = next_state;
        Some(result)
    }
}

#[cfg(test)]
mod test {
    use super::TrigramIterator;

    #[test]
    fn trigram_iterator() {
        let result = TrigramIterator::new("abcdef").collect::<Vec<_>>();
        assert_eq!(
            result,
            vec!["a", "ab", "abc", "bcd", "cde", "def", "ef", "f"]
        );
    }

    #[test]
    fn empty() {
        let result = TrigramIterator::new("a").collect::<Vec<_>>();
        assert_eq!(result, vec!["a"]);
    }

    #[test]
    fn one_letter_word() {
        let result = TrigramIterator::new("a").collect::<Vec<_>>();
        assert_eq!(result, vec!["a"]);
    }

    #[test]
    fn two_letter_word() {
        let result = TrigramIterator::new("ab").collect::<Vec<_>>();
        assert_eq!(result, vec!["a", "ab", "b"]);
    }

    #[test]
    fn three_letter_word() {
        let result = TrigramIterator::new("abc").collect::<Vec<_>>();
        assert_eq!(result, vec!["a", "ab", "abc", "bc", "c"]);
    }

    #[test]
    fn four_letter_word() {
        let result = TrigramIterator::new("abcd").collect::<Vec<_>>();
        assert_eq!(result, vec!["a", "ab", "abc", "bcd", "cd", "d"]);
    }
}