1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
use std::path::Path;
use container::{ParseResult, SeenSet};
use opencorpora::dictionary::Dictionary;
use analyzer::units::*;
use estimator::SingleTagProbabilityEstimator;
#[derive(Debug, Default, Clone)]
pub struct Units {
pub dictionary: DictionaryAnalyzer,
pub initials: InitialsAnalyzer,
pub latin: LatinAnalyzer,
pub number: NumberAnalyzer,
pub roman: RomanAnalyzer,
pub punct: PunctuationAnalyzer,
pub ha: HyphenAdverbAnalyzer,
pub hsp: HyphenSeparatedParticleAnalyzer,
pub hword: HyphenatedWordsAnalyzer,
pub kp: KnownPrefixAnalyzer,
pub ks: KnownSuffixAnalyzer,
pub up: UnknownPrefixAnalyzer,
pub unknown: UnknownAnalyzer,
}
#[derive(Debug, Clone)]
pub struct MorphAnalyzer {
pub dict: Dictionary,
pub estimator: SingleTagProbabilityEstimator,
pub units: Units,
}
impl MorphAnalyzer {
pub fn new(dict: Dictionary) -> Self {
let estimator = SingleTagProbabilityEstimator {};
let units = Units::default();
MorphAnalyzer {
dict,
estimator,
units,
}
}
pub fn from_file<P>(p: P) -> Self
where
P: AsRef<Path>,
{
let dict = Dictionary::from_file(p);
MorphAnalyzer::new(dict)
}
pub fn parse(&self, word: &str) -> ParseResult {
let word_lower = word.to_lowercase();
let look_over = || -> ParseResult {
let mut result = ParseResult::new();
let mut seen = SeenSet::default();
macro_rules! look_in (
($t: ident) => {{
self.units.$t.parse(self, &mut result, word, &word_lower, &mut seen);
}};
($t: ident, return) => {{
self.units.$t.parse(self, &mut result, word, &word_lower, &mut seen);
if !result.is_empty() { return result };
}}
);
look_in!(dictionary);
look_in!(initials, return);
look_in!(number, return);
look_in!(punct, return);
look_in!(roman);
look_in!(latin, return);
look_in!(hsp, return);
look_in!(ha, return);
look_in!(hword, return);
look_in!(kp, return);
look_in!(up);
look_in!(ks, return);
look_in!(unknown, return);
unreachable!();
};
let mut result = look_over();
self.estimator
.apply_to_parses(self, word, &word_lower, &mut result);
result
}
}
#[cfg(test)]
mod tests {
use env_logger;
use {rsmorphy_dict_ru, MorphAnalyzer};
lazy_static! {
static ref RU: MorphAnalyzer = MorphAnalyzer::from_file(rsmorphy_dict_ru::DICT_PATH);
}
#[test]
fn load_ru() {
env_logger::init();
let _ = RU.dict;
}
#[test]
fn parse() {
assert_eq!(RU.parse("минимальный").len(), 2);
assert_eq!(RU.parse("менимальный").len(), 3);
}
}