rsmorphy 0.4.0

Morphological analyzer / inflection engine for Russian and Ukrainian (soon) languages (WIP)
use std::borrow::Cow;
use std::collections::BTreeSet;
use std::iter::FromIterator;

use analyzer::units::abc::AnalyzerUnit;
use analyzer::MorphAnalyzer;
use container::stack::StackSource;
use container::{Initials, InitialsKind};
use container::{Lex, Score};
use container::{ParseResult, Parsed, SeenSet};
use opencorpora::OpencorporaTagReg;

lazy_static! {
    #[derive(Debug)]
    pub static ref LETTERS: BTreeSet<&'static str> = {
        let set = "АБВГДЕЁЖЗИЙКЛМНОПРСТУФХЦЧШЩЭЮЯ".split("").filter(|v| !v.is_empty());
        BTreeSet::from_iter(set)
    };
}

const SCORE: Score = Score::Fake(0.1);

#[derive(Debug, Clone)]
pub struct InitialsAnalyzer {
    pub tags: Vec<(OpencorporaTagReg, InitialsKind)>,
}

impl Default for InitialsAnalyzer {
    fn default() -> Self {
        InitialsAnalyzer {
            tags: {
                let mut result: Vec<_> = Vec::new();
                for &kind in &["Name", "Patr"] {
                    for &gender in &["masc", "femn"] {
                        for &case in &["nomn", "gent", "datv", "accs", "ablt", "loct"] {
                            let tag = OpencorporaTagReg::new(format!(
                                "NOUN,anim,{gender},Sgtm,{kind},Fixd,Abbr,Init sing,{case}",
                                kind = kind,
                                gender = gender,
                                case = case
                            ));
                            let kind = match kind {
                                "Name" => InitialsKind::FirstName,
                                "Patr" => InitialsKind::Patronym,
                                _ => unreachable!(),
                            };
                            result.push((tag, kind));
                        }
                    }
                }
                result
            },
        }
    }
}

impl AnalyzerUnit for InitialsAnalyzer {
    fn parse(
        &self,
        morph: &MorphAnalyzer,
        result: &mut ParseResult,
        word: &str,
        word_lower: &str,
        _seen_parses: &mut SeenSet,
    ) {
        trace!("AbbreviatedFirstNameAnalyzer::parse()");
        trace!(r#" word: "{}", word_lower: "{}" "#, word, word_lower);
        trace!(
            r#" LETTERS: "{:?}" "#,
            LETTERS.iter().cloned().collect::<Vec<&str>>().join(", ")
        );
        trace!(r#" LETTERS contains word: "{}" "#, LETTERS.contains(word));

        if let Some(&letter) = LETTERS.get(word) {
            for (tag_idx, &(_, kind)) in self.tags.iter().enumerate() {
                let tag_idx = tag_idx as u8;
                let letter = Cow::from(letter);
                let container = Initials {
                    letter,
                    kind,
                    tag_idx,
                };
                let lex = Lex::from_stack(morph, StackSource::from(container));
                result.push(Parsed::new(lex, SCORE));
            }
        }
    }
}