1pub(crate) mod analyzer;
3pub mod errors;
4#[macro_use]
6pub mod morph;
7pub(crate) mod opencorpora;
9pub(crate) mod test_infrastructure;
11
12use allocative::Allocative;
13use analyzer::{InflectWords, Lemmas, LemmasRows, ParseTable, Tag, Tags};
14use errors::{MopsErr, MopsResult};
15use fst::Map;
16use serde::{Deserialize, Serialize};
17use std::path::Path;
18use tracing::info;
19
20use crate::{
21 analyzer::{Dictionary, Vanga},
22 morph::grammemes::Grammem,
23 opencorpora::DictionaryOpenCorpora,
24};
25pub use analyzer::{NormalizedWords, ParsedWords, SMALLLEMMA, SMALLTAG, SMALLVANGA};
26
27#[rustfmt::skip]
28#[derive(Debug, Clone, Default, clap::Parser, clap::ValueEnum, Serialize, Deserialize, Allocative)]
29pub enum Language {
31 #[default]
32 Russian,
33}
34
35#[derive(Debug, Allocative)]
36pub struct MorphAnalyzer {
38 #[allocative(skip)]
39 pub fst: Map<Vec<u8>>,
40 #[allocative(skip)]
41 pub word_parses: ParseTable,
42 #[allocative(skip)]
43 pub tags: Tags,
44 #[allocative(skip)]
45 pub lemmas: Lemmas,
46 pub paradigms: Vec<Vanga>,
47 pub lemmas_rows: LemmasRows,
48}
49
50#[derive(
51 Debug, Clone, derive_more::Display, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize,
52)]
53pub enum Method {
55 Dictionary,
56 #[display(fmt = "{}", _0.display())]
57 Vangovanie(Vangovanie),
58}
59
60#[derive(
61 Debug, Clone, derive_more::Display, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize,
62)]
63pub enum Vangovanie {
65 #[display(fmt = "KnowPrefix({_0})")]
66 KnownPrefix(String),
67 #[display(fmt = "UnknowPrefix({_0})")]
68 UnknownPrefix(String),
69 Postfix,
70}
71
72pub type Normalized = String;
73
74#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize, Deserialize)]
75pub struct ParsedWord {
78 word: String,
79 tags: Tag,
80 normal_form: Normalized,
81 method: Method,
82}
83
84impl ParsedWord {
85 pub fn word(&self) -> String {
87 self.word.to_owned()
88 }
89
90 pub fn tag(&self) -> Tag {
92 self.tags.to_owned()
93 }
94
95 pub fn normal_form(&self) -> Normalized {
97 self.normal_form.to_owned()
98 }
99
100 pub fn method(&self) -> Method {
102 self.method.to_owned()
103 }
104}
105
106#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
107pub struct NormalizedWord {
110 normal_word: Normalized,
111 tags: Tag,
112 method: Method,
113}
114
115impl NormalizedWord {
116 pub fn word(&self) -> Normalized {
118 self.normal_word.to_owned()
119 }
120
121 pub fn tag(&self) -> Tag {
123 self.tags.to_owned()
124 }
125
126 pub fn method(&self) -> Method {
128 self.method.to_owned()
129 }
130}
131
132#[derive(Debug, Clone, PartialEq, Eq)]
133pub struct InflectWord {
137 inflect_form: String,
138 tags: Tag,
139 normal_form: Normalized,
140 method: Method,
141}
142
143impl InflectWord {
144 pub fn word(&self) -> String {
146 self.inflect_form.to_owned()
147 }
148
149 pub fn tag(&self) -> Tag {
151 self.tags.to_owned()
152 }
153
154 pub fn method(&self) -> Method {
155 self.method.to_owned()
156 }
157}
158
159impl MorphAnalyzer {
162 pub fn create<P: AsRef<Path>>(
168 dict_path: P,
169 out_dir: P,
170 lang: Language,
171 ) -> MopsResult<Dictionary> {
172 let dictionary = DictionaryOpenCorpora::init_from_path(dict_path)?;
173 let dictionary = Dictionary::init(dictionary, &out_dir, lang)?;
174
175 info!("Dictionary was created");
176 Ok(dictionary)
177 }
178
179 pub fn create_with_reader<P: AsRef<Path>>(
187 dict_path: P,
188 out_dir: P,
189 lang: Language,
190 ) -> MopsResult<Dictionary> {
191 let dictionary = DictionaryOpenCorpora::init_from_path_with_reader(dict_path)?;
192 let dictionary = Dictionary::init(dictionary, out_dir, lang)?;
193
194 info!("Dictionary was created");
195 Ok(dictionary)
196 }
197
198 pub fn init<P: AsRef<Path>>(dictionary: Dictionary, dir: P) -> MopsResult<Self> {
203 let fst = dir.as_ref().join("dict.fst");
204 Self::from_dictionary(dictionary, fst)
205 }
206
207 pub fn open<P: AsRef<Path>>(path: P) -> MopsResult<Self> {
209 let dictionary: Dictionary = Dictionary::open(&path)?;
210 Self::init(dictionary, path)
211 }
212
213 pub fn open_from_reader<P: AsRef<Path>>(path: P) -> MopsResult<Self> {
219 let dictionary: Dictionary = Dictionary::open_from_reader(&path)?;
220 Self::init(dictionary, path)
221 }
222
223 pub fn parse(&self, word: &str) -> MopsResult<ParsedWords> {
228 self.parse_word(word).map_err(MopsErr::Parse)
229 }
230
231 pub fn normalize(&self, word: &str) -> MopsResult<NormalizedWords> {
236 self.normalized_word(word).map_err(MopsErr::Parse)
237 }
238
239 pub fn is_known(&self, word: &str) -> bool {
241 let map = &self.fst;
242 map.get(word).is_some()
243 }
244
245 pub fn parse_get(&self, word: &str, index: usize) -> MopsResult<Option<ParsedWord>> {
247 Ok(self.parse(word)?.0.get(index).map(|w| w.to_owned()))
248 }
249
250 pub fn parse_grammemes(
252 &self,
253 word: &str,
254 grammemes: Vec<Grammem>,
255 ) -> MopsResult<Option<ParsedWord>> {
256 let parsed = self.parse(word)?;
257 Ok(parsed.find(grammemes))
258 }
259
260 pub fn normalize_get(&self, word: &str, index: usize) -> MopsResult<Option<NormalizedWord>> {
262 Ok(self.normalize(word)?.0.get(index).map(|w| w.to_owned()))
263 }
264
265 pub fn normalize_grammemes(
267 &self,
268 word: &str,
269 grammemes: Vec<Grammem>,
270 ) -> MopsResult<Option<NormalizedWord>> {
271 let normalized = self.normalize(word)?;
272
273 Ok(normalized.find(grammemes))
274 }
275
276 pub fn inflect_inizio(&self, word: &str) -> MopsResult<Option<InflectWords>> {
279 self.inflect_word(word, None).map_err(MopsErr::Parse)
280 }
281
282 pub fn inflect_forms(
284 &self,
285 word: &str,
286 grammemes: Vec<Grammem>,
287 ) -> MopsResult<Option<InflectWords>> {
288 self.inflect_word(word, Some(grammemes))
289 .map_err(MopsErr::Parse)
290 }
291
292 pub fn inflect_parsed(
294 &self,
295 parse: ParsedWord,
296 grammemes: Vec<Grammem>,
297 ) -> MopsResult<Option<InflectWords>> {
298 self.inflect_parsed_words(parse, Some(grammemes))
299 .map_err(MopsErr::Parse)
300 }
301
302 pub fn declension(&self, word: &str) -> MopsResult<Vec<InflectWords>> {
308 self.declension_word(word).map_err(MopsErr::Parse)
309 }
310
311 pub fn declension_get(&self, word: &str, index: usize) -> MopsResult<Option<InflectWords>> {
317 self.declension(word)
318 .map(|w| w.get(index).map(|p| p.to_owned()))
319 }
320
321 pub fn declension_parsed(&self, parse: &ParsedWord) -> MopsResult<Option<InflectWords>> {
327 self.declension_parsed_word(parse).map_err(MopsErr::Parse)
328 }
329}