1#[macro_use]
2extern crate include_dir;
3extern crate regex;
4
5use include_dir::Dir;
6use regex::Regex;
7use lazy_static::lazy_static;
8
9use std::collections::HashMap;
10use serde::{Serialize, Deserialize};
11
12const SCHEMA_DIR: Dir = include_dir!("./iuliia");
13const DUMMY_SYMBOL: &str = "$";
14
15#[derive(Serialize, Deserialize, Debug)]
17pub struct Schema {
18 name: String,
19 description: String,
20 url: String,
21 mapping: Option<HashMap<String, String>>,
22 prev_mapping: Option<HashMap<String, String>>,
23 next_mapping: Option<HashMap<String, String>>,
24 ending_mapping: Option<HashMap<String, String>>,
25 samples: Option<Vec<Vec<String>>>,
26}
27
28impl Schema {
29 pub fn for_name(s: &str) -> Schema {
31 let schema_file = SCHEMA_DIR.get_file(format!("{}{}", s, ".json"))
32 .expect(&format!("There are no schema with name {}", s));
33 serde_json::from_str(schema_file.contents_utf8().unwrap()).unwrap()
34 }
35
36 pub fn get_pref(&self, s: &str) -> Option<String> {
37 if self.prev_mapping.is_none() {
38 return None;
39 }
40 match self.prev_mapping.as_ref().unwrap().get(&s.replace(DUMMY_SYMBOL.clone(), "").to_lowercase()) {
41 Some(result) => Some(result.clone()),
42 None => None
43 }
44 }
45
46 pub fn get_next(&self, s: &str) -> Option<String> {
47 if self.next_mapping.is_none() {
48 return None;
49 }
50 match self.next_mapping.as_ref().unwrap().get(&s.replace(DUMMY_SYMBOL.clone(), "").to_lowercase()) {
51 Some(result) => Some(result.clone()),
52 None => None
53 }
54 }
55
56 pub fn get_letter(&self, s: &str) -> Option<String> {
57 if self.mapping.is_none() {
58 return None;
59 }
60 match self.mapping.as_ref().unwrap().get(&s.to_lowercase()) {
61 Some(result) => Some(result.clone()),
62 None => None
63 }
64 }
65
66 pub fn get_ending(&self, s: &str) -> Option<String> {
67 if self.ending_mapping.is_none() {
68 return None;
69 }
70 match self.ending_mapping.as_ref().unwrap().get(&s.to_lowercase()) {
71 Some(result) => Some(result.clone()),
72 None => None
73 }
74 }
75}
76
77pub fn parse_by_schema_name(s: &str, schema_name: &str) -> String {
84 let schema = Schema::for_name(schema_name);
85 parse_by_schema(&s, &schema)
86
87}
88
89pub fn parse_by_schema(s: &str, schema: &Schema) -> String {
103 lazy_static! {
104 static ref RE: Regex = Regex::new(r"\b").unwrap();
105 }
106 RE.split(s)
107 .map(|word| parse_word_by_schema(word, schema))
108 .collect()
109}
110
111fn parse_word_by_schema(s: &str, schema: &Schema) -> String {
112 let word_by_letters: Vec<String> = s.chars()
113 .map(|char| char.to_string())
114 .collect::<Vec<_>>();
115
116 let ending = parse_ending(&word_by_letters, schema);
118 let mut parsed_end = String::new();
119 let word_without_ending = match ending {
120 Some(matched) => {
121 parsed_end = matched.translate;
122 word_by_letters[..matched.ending_start].to_vec()
123 }
124 None => word_by_letters
125 };
126
127 let mut word_for_parse: Vec<String> = Vec::with_capacity(word_without_ending.len() + 2);
129 let dummy_string: Vec<String> = vec![String::from(DUMMY_SYMBOL.clone())];
130 word_for_parse.extend(dummy_string.clone());
131 word_for_parse.extend(word_without_ending);
132 word_for_parse.extend(dummy_string);
133
134 let parsed_word: String = word_for_parse
136 .windows(3)
137 .map(|letter_with_neighbors| parse_letter(letter_with_neighbors, schema))
138 .collect();
139
140 format!("{}{}", parsed_word, parsed_end)
142}
143
144fn parse_ending(s: &Vec<String>, schema: &Schema) -> Option<Ending> {
145 let length = s.len();
146 if length < 3 {
147 return None;
148 }
149
150 match schema.get_ending(&s[length - 1..].concat()) {
151 Some(matched) => return Some(Ending {
152 translate: propagate_case_from_source(matched, &s[length - 1..].concat(), false),
153 ending_start: length - 1,
154 }),
155 None => ()
156 };
157 return match schema.get_ending(&s[length - 2..].concat()) {
158 Some(matched) => return Some(Ending {
159 translate: propagate_case_from_source(matched, &s[length - 2..].concat(), false),
160 ending_start: length - 2,
161 }),
162 None => None
163 };
164}
165
166struct Ending {
167 translate: String,
168 ending_start: usize,
169}
170
171fn parse_letter(letter_with_neighbors: &[String], schema: &Schema) -> String {
177 let prefix: String = letter_with_neighbors[..2].concat();
178 let postfix: String = letter_with_neighbors[1..].concat();
179 let letter: String = letter_with_neighbors[1..2].concat();
180 let mut result = letter.clone();
181 match schema.get_letter(&letter) {
182 Some(matched) => result = matched,
183 None => ()
184 };
185 match schema.get_next(&postfix) {
186 Some(matched) => result = matched,
187 None => ()
188 };
189 match schema.get_pref(&prefix) {
190 Some(matched) => result = matched,
191 None => ()
192 };
193 propagate_case_from_source(result, &letter, true)
194}
195
196fn propagate_case_from_source(result: String, source_letter: &str, only_first_symbol: bool) -> String {
197 let letter_upper = source_letter.chars().any(|letter| letter.is_uppercase());
199
200 if !letter_upper {
201 return result.to_owned();
202 }
203
204 if only_first_symbol {
205 let mut c = result.chars();
206 match c.next() {
207 None => String::new(),
208 Some(f) => f.to_uppercase().collect::<String>() + c.as_str(),
209 }
210 } else {
211 result.to_uppercase()
212 }
213}
214
215
216#[cfg(test)]
217mod tests {
218 use crate::{Schema, parse_by_schema};
219
220 #[test]
221 fn schema_test() {
222 let schema = Schema::for_name("ala_lc");
223 assert_eq!(schema.name, "ala_lc")
224 }
225
226 #[test]
227 fn simple_word_test() {
228 let test_words = vec!["б", "пол"];
230 let expected_words = vec!["b", "pol"];
231 let schema = Schema::for_name("wikipedia");
232
233 let transliterated_words: Vec<String> = test_words.iter()
235 .map(|word| parse_by_schema(&word, &schema))
236 .collect();
237
238 assert_eq!(transliterated_words, expected_words)
240 }
241
242 #[test]
243 fn prefix_word_test() {
244 let test_words = vec!["ель"];
246 let expected_words = vec!["yel"];
247 let schema = Schema::for_name("wikipedia");
248
249 let transliterated_words: Vec<String> = test_words.iter()
251 .map(|word| parse_by_schema(&word, &schema))
252 .collect();
253
254 assert_eq!(transliterated_words, expected_words)
256 }
257
258 #[test]
259 fn postfix_word_test() {
260 let test_words = vec!["бульон"];
262 let expected_words = vec!["bulyon"];
263 let schema = Schema::for_name("wikipedia");
264
265 let transliterated_words: Vec<String> = test_words.iter()
267 .map(|word| parse_by_schema(&word, &schema))
268 .collect();
269
270 assert_eq!(transliterated_words, expected_words)
272 }
273
274 #[test]
275 fn test_letter_case() {
276 let test_words = vec!["ноГа", "Рука"];
278 let expected_words = vec!["noGa", "Ruka"];
279 let schema = Schema::for_name("wikipedia");
280
281 let transliterated_words: Vec<String> = test_words.iter()
283 .map(|word| parse_by_schema(&word, &schema))
284 .collect();
285
286 assert_eq!(transliterated_words, expected_words)
288 }
289
290 #[test]
291 fn test_ending() {
292 let test_words = vec!["хороший"];
294 let expected_words = vec!["khoroshy"];
295 let schema = Schema::for_name("wikipedia");
296
297 let transliterated_words: Vec<String> = test_words.iter()
299 .map(|word| parse_by_schema(&word, &schema))
300 .collect();
301
302 assert_eq!(transliterated_words, expected_words)
304 }
305
306 #[test]
307 fn test_sentence() {
308 let test_words = vec!["Юлия, съешь ещё этих мягких французских булок из Йошкар-Олы, да выпей алтайского чаю", "ВЕЛИКИЙ"];
310 let expected_words = vec!["Yuliya, syesh yeshchyo etikh myagkikh frantsuzskikh bulok iz Yoshkar-Oly, da vypey altayskogo chayu", "VELIKY"];
311 let schema = Schema::for_name("wikipedia");
312
313 let transliterated_words: Vec<String> = test_words.iter()
315 .map(|word| parse_by_schema(&word, &schema))
316 .collect();
317
318 assert_eq!(transliterated_words, expected_words)
320 }
321}