easy_regex/helpers/
persian_pattern.rs

1//! Helper methods for Persian letters.
2
3#[allow(dead_code)]
4
5struct LettersIndices<'a> {
6    letter: &'a str,
7    index: &'a str,
8    hex: &'a str,
9    name: &'a str,
10}
11
12lazy_static! {
13    static ref LETTERS: [LettersIndices<'static>; 43] = [
14        LettersIndices {
15            index: "\u{0621}}",
16            letter: "ء",
17            hex: "d8 a1",
18            name: "ARABIC LETTER HAMZA",
19        },
20        LettersIndices {
21            index: "\u{0622}",
22            letter: "آ",
23            hex: "d8 a2",
24            name: "ARABIC LETTER ALEF WITH MADDA ABOVE",
25        },
26        LettersIndices {
27            index: "\u{0623}",
28            letter: "أ",
29            hex: "d8 a3",
30            name: "ARABIC LETTER ALEF WITH HAMZA ABOVE",
31        },
32        LettersIndices {
33            index: "\u{0624}",
34            letter: "ؤ",
35            hex: "d8 a4",
36            name: "ARABIC LETTER WAW WITH HAMZA ABOVE",
37        },
38        LettersIndices {
39            index: "\u{0625}",
40            letter: "إ",
41            hex: "d8 a5",
42            name: "ARABIC LETTER ALEF WITH HAMZA BELOW",
43        },
44        LettersIndices {
45            index: "\u{0626}",
46            letter: "ئ",
47            hex: "d8 a6",
48            name: "ARABIC LETTER YEH WITH HAMZA ABOVE",
49        },
50        LettersIndices {
51            index: "\u{0627}",
52            letter: "ا",
53            hex: "d8 a7",
54            name: "ARABIC LETTER ALEF",
55        },
56        LettersIndices {
57            index: "\u{0628}",
58            letter: "ب",
59            hex: "d8 a8",
60            name: "ARABIC LETTER BEH",
61        },
62        LettersIndices {
63            index: "\u{067E}",
64            letter: "پ",
65            hex: "d9 be",
66            name: "PERSIAN LETTER PEH",
67        },
68        LettersIndices {
69            index: "\u{062A}",
70            letter: "ت",
71            hex: "d8 aa",
72            name: "ARABIC LETTER TEH",
73        },
74        LettersIndices {
75            index: "\u{062B}",
76            letter: "ث",
77            hex: "d8 ab",
78            name: "ARABIC LETTER THEH",
79        },
80        LettersIndices {
81            index: "\u{062C}",
82            letter: "ج",
83            hex: "d8 ac",
84            name: "ARABIC LETTER JEEM",
85        },
86        LettersIndices {
87            index: "\u{0686}",
88            letter: "چ",
89            hex: "da 86",
90            name: "ARABIC LETTER TCHEH",
91        },
92        LettersIndices {
93            index: "\u{062D}",
94            letter: "ح",
95            hex: "d8 ad",
96            name: "ARABIC LETTER HAH",
97        },
98        LettersIndices {
99            index: "\u{062E}",
100            letter: "خ",
101            hex: "d8 ae",
102            name: "ARABIC LETTER KHAH",
103        },
104        LettersIndices {
105            index: "\u{062F}",
106            letter: "د",
107            hex: "d8 af",
108            name: "ARABIC LETTER DAL",
109        },
110        LettersIndices {
111            index: "\u{0630}",
112            letter: "ذ",
113            hex: "d8 b0",
114            name: "ARABIC LETTER ZAL",
115        },
116        LettersIndices {
117            index: "\u{0631}",
118            letter: "ر",
119            hex: "d8 b1",
120            name: "ARABIC LETTER REH",
121        },
122        LettersIndices {
123            index: "\u{0632}",
124            letter: "ز",
125            hex: "d8 b2",
126            name: "ARABIC LETTER ZAIN",
127        },
128        LettersIndices {
129            index: "\u{0698}",
130            letter: "ژ",
131            hex: "da 98",
132            name: "ARABIC LETTER ZHEH",
133        },
134        LettersIndices {
135            index: "\u{0633}",
136            letter: "س",
137            hex: "d8 b3",
138            name: "ARABIC LETTER SEEN",
139        },
140        LettersIndices {
141            index: "\u{0634}",
142            letter: "ش",
143            hex: "d8 b4",
144            name: "ARABIC LETTER SHEEN",
145        },
146        LettersIndices {
147            index: "\u{0635}",
148            letter: "ص",
149            hex: "d8 b5",
150            name: "ARABIC LETTER SAD",
151        },
152        LettersIndices {
153            index: "\u{0636}",
154            letter: "ض",
155            hex: "d8 b6",
156            name: "ARABIC LETTER ZAD",
157        },
158        LettersIndices {
159            index: "\u{0637}",
160            letter: "ط",
161            hex: "d8 b7",
162            name: "ARABIC LETTER TAH",
163        },
164        LettersIndices {
165            index: "\u{0638}",
166            letter: "ظ",
167            hex: "d8 b8",
168            name: "ARABIC LETTER ZAH",
169        },
170        LettersIndices {
171            index: "\u{0639}",
172            letter: "ع",
173            hex: "d8 b9",
174            name: "ARABIC LETTER AIN",
175        },
176        LettersIndices {
177            index: "\u{063A}",
178            letter: "غ",
179            hex: "d8 ba",
180            name: "ARABIC LETTER GHAIN",
181        },
182        LettersIndices {
183            index: "\u{0641}",
184            letter: "ف",
185            hex: "d9 81",
186            name: "ARABIC LETTER FEH",
187        },
188        LettersIndices {
189            index: "\u{0642}",
190            letter: "ق",
191            hex: "d9 82",
192            name: "ARABIC LETTER QAF",
193        },
194        LettersIndices {
195            index: "\u{06A9}",
196            letter: "ک",
197            hex: "da a9",
198            name: "ARABIC LETTER KEHEH",
199        },
200        LettersIndices {
201            index: "\u{06AF}",
202            letter: "گ",
203            hex: "da af",
204            name: "ARABIC LETTER GAF",
205        },
206        LettersIndices {
207            index: "\u{0644}",
208            letter: "ل",
209            hex: "d9 84",
210            name: "ARABIC LETTER LAM",
211        },
212        LettersIndices {
213            index: "\u{0645}",
214            letter: "م",
215            hex: "d9 85",
216            name: "ARABIC LETTER MEEM",
217        },
218        LettersIndices {
219            index: "\u{0646}",
220            letter: "ن",
221            hex: "d9 86",
222            name: "ARABIC LETTER NOON",
223        },
224        LettersIndices {
225            index: "\u{0648}",
226            letter: "و",
227            hex: "d9 88",
228            name: "ARABIC LETTER WAW",
229        },
230        LettersIndices {
231            index: "\u{0647}\u{06BE}",
232            letter: "ه",
233            hex: "ه: d9 87, ھ: da be",
234            name: "ARABIC LETTER HEH AND HEH DOACHASHMEE",
235        },
236        LettersIndices {
237            index: "\u{06CC}",
238            letter: "ی",
239            hex: "db 8c",
240            name: "ARABIC LETTER FARSI YEH",
241        },
242        LettersIndices {
243            index: "\u{064E}",
244            letter: "َ",
245            hex: "d9 8e",
246            name: "ARABIC FATHA",
247        },
248        LettersIndices {
249            index: "\u{064F}",
250            letter: "ُ",
251            hex: "d9 8f",
252            name: "ARABIC DAMMA",
253        },
254        LettersIndices {
255            index: "\u{0650}",
256            letter: "ِ",
257            hex: "d9 90",
258            name: "ARABIC KASRA",
259        },
260        LettersIndices {
261            index: "\u{0651}",
262            letter: "ّ",
263            hex: "d9 91",
264            name: "ARABIC SHADDA",
265        },
266        LettersIndices {
267            index: "\u{0655}",
268            letter: "ٕ",
269            hex: "d9 95",
270            name: "ARABIC HAMZA BELOW",
271        },
272    ];
273}
274
275/// Creates a list/range of persian letters.
276///
277/// Unicode characters of Persian letters are not defined in the same order as their corresponding letters.
278/// This could result in confusion when creating customized persian word patterns.
279/// To solve that, a helper method is defined here.
280/// 
281/// # Examples
282/// ```
283/// use easy_regex::helpers::persian_pattern::create_persian_pattern;
284/// 
285/// let result = create_persian_pattern(("ا"), ("ر"));
286/// assert_eq!("ابپتثجچحخدذر", result.unwrap());
287/// ```
288pub fn create_persian_pattern<'a>(from: &'a str, to: &'a str) -> Result<String, String> {
289    if let Some(found_from_idx) = LETTERS.iter().position(|l| l.letter == from) {
290        if let Some(found_to_idx) = LETTERS.iter().position(|l| l.letter == to) {
291            if found_from_idx > found_to_idx {
292                let err_msg = format!(
293                    "letter '{}' is after letter '{}', consider swapping them",
294                    &from, &to
295                );
296                Err(err_msg)
297            } else {
298                let found_slice = &LETTERS[found_from_idx..found_to_idx + 1];
299                let mut result = String::new();
300                found_slice.iter().for_each(|l| {
301                    result.push_str(l.index);
302                });
303
304                Ok(result)
305            }
306        } else {
307            let err_msg = format!("letter '{}' is not valid", &to);
308            Err(err_msg)
309        }
310    } else {
311        let err_msg = format!("letter '{}' is not valid", &from);
312        Err(err_msg)
313    }
314}
315
316#[cfg(test)]
317mod tests {
318    use super::create_persian_pattern;
319
320    #[test]
321    fn create_persian_pattern_works() {
322        let result = create_persian_pattern("ا", "د");
323        assert_eq!("ابپتثجچحخد", result.unwrap())
324    }
325
326    #[test]
327    fn create_persian_pattern_wrong_from_input() {
328        let result = create_persian_pattern("d", "م");
329        assert_eq!("letter 'd' is not valid", result.unwrap_err())
330    }
331
332    #[test]
333    fn create_persian_pattern_wrong_to_input() {
334        let result = create_persian_pattern("ا", "d");
335        assert_eq!("letter 'd' is not valid", result.unwrap_err())
336    }
337
338    #[test]
339    fn create_persian_pattern_wrong_order() {
340        let result = create_persian_pattern("خ", "ا");
341        assert_eq!(
342            "letter 'خ' is after letter 'ا', consider swapping them",
343            result.unwrap_err()
344        );
345    }
346}