arabic_reshaper/
letters.rs

1// Each letter is of the format:
2//
3//   ('<letter>', <replacement>)
4//
5// And replacement is of the format:
6//
7//   ('<isolated>', '<initial>', '<medial>', '<final>')
8//
9// Where <letter> is the string to replace, and <isolated> is the replacement in
10// case <letter> should be in isolated form, <initial> is the replacement in
11// case <letter> should be in initial form, <medial> is the replacement in case
12// <letter> should be in medial form, and <final> is the replacement in case
13// <letter> should be in final form. If no replacement is specified for a form,
14// then no that means the letter doesn't support this form.
15use std::ops::Index;
16
17pub const UNSHAPED: i16 = 255;
18pub const ISOLATED: i16 = 0;
19pub const INITIAL: i16 = 1;
20pub const MEDIAL: i16 = 2;
21pub const FINAL: i16 = 3;
22
23pub const TATWEEL: char = '\u{0640}';
24pub const ZWJ: char = '\u{200D}';
25
26pub struct Map<T, U, const N: usize>([(T, U); N]);
27
28impl<T: PartialEq, U, const N: usize> Index<T> for Map<T, U, N> {
29    type Output = U;
30
31    fn index(&self, t: T) -> &Self::Output {
32        &self.0.iter().find(|elem| elem.0 == t).unwrap().1
33    }
34}
35
36impl<T: PartialEq, U, const N: usize> Map<T, U, N> {
37    pub fn contains_key(&self, key: &T) -> bool {
38        self.0.iter().any(|elem| &elem.0 == key)
39    }
40    pub fn get(&self, key: &T) -> Option<&U> {
41        self.0
42            .iter()
43            .find(|elem| &elem.0 == key)
44            .map(|(_, value)| value)
45    }
46}
47
48pub static LETTERS: Map<char, [&'static str; 4], 78> = Map([
49    // ARABIC LETTER HAMZA
50    ('\u{0621}', ["\u{FE80}", "", "", ""]),
51    // ARABIC LETTER ALEF WITH MADDA ABOVE
52    ('\u{0622}', ["\u{FE81}", "", "", "\u{FE82}"]),
53    // ARABIC LETTER ALEF WITH HAMZA ABOVE
54    ('\u{0623}', ["\u{FE83}", "", "", "\u{FE84}"]),
55    // ARABIC LETTER WAW WITH HAMZA ABOVE
56    ('\u{0624}', ["\u{FE85}", "", "", "\u{FE86}"]),
57    // ARABIC LETTER ALEF WITH HAMZA BELOW
58    ('\u{0625}', ["\u{FE87}", "", "", "\u{FE88}"]),
59    // ARABIC LETTER YEH WITH HAMZA ABOVE
60    ('\u{0626}', ["\u{FE89}", "\u{FE8B}", "\u{FE8C}", "\u{FE8A}"]),
61    // ARABIC LETTER ALEF
62    ('\u{0627}', ["\u{FE8D}", "", "", "\u{FE8E}"]),
63    // ARABIC LETTER BEH
64    ('\u{0628}', ["\u{FE8F}", "\u{FE91}", "\u{FE92}", "\u{FE90}"]),
65    // ARABIC LETTER TEH MARBUTA
66    ('\u{0629}', ["\u{FE93}", "", "", "\u{FE94}"]),
67    // ARABIC LETTER TEH
68    ('\u{062A}', ["\u{FE95}", "\u{FE97}", "\u{FE98}", "\u{FE96}"]),
69    // ARABIC LETTER THEH
70    ('\u{062B}', ["\u{FE99}", "\u{FE9B}", "\u{FE9C}", "\u{FE9A}"]),
71    // ARABIC LETTER JEEM
72    ('\u{062C}', ["\u{FE9D}", "\u{FE9F}", "\u{FEA0}", "\u{FE9E}"]),
73    // ARABIC LETTER HAH
74    ('\u{062D}', ["\u{FEA1}", "\u{FEA3}", "\u{FEA4}", "\u{FEA2}"]),
75    // ARABIC LETTER KHAH
76    ('\u{062E}', ["\u{FEA5}", "\u{FEA7}", "\u{FEA8}", "\u{FEA6}"]),
77    // ARABIC LETTER DAL
78    ('\u{062F}', ["\u{FEA9}", "", "", "\u{FEAA}"]),
79    // ARABIC LETTER THAL
80    ('\u{0630}', ["\u{FEAB}", "", "", "\u{FEAC}"]),
81    // ARABIC LETTER REH
82    ('\u{0631}', ["\u{FEAD}", "", "", "\u{FEAE}"]),
83    // ARABIC LETTER ZAIN
84    ('\u{0632}', ["\u{FEAF}", "", "", "\u{FEB0}"]),
85    // ARABIC LETTER SEEN
86    ('\u{0633}', ["\u{FEB1}", "\u{FEB3}", "\u{FEB4}", "\u{FEB2}"]),
87    // ARABIC LETTER SHEEN
88    ('\u{0634}', ["\u{FEB5}", "\u{FEB7}", "\u{FEB8}", "\u{FEB6}"]),
89    // ARABIC LETTER SAD
90    ('\u{0635}', ["\u{FEB9}", "\u{FEBB}", "\u{FEBC}", "\u{FEBA}"]),
91    // ARABIC LETTER DAD
92    ('\u{0636}', ["\u{FEBD}", "\u{FEBF}", "\u{FEC0}", "\u{FEBE}"]),
93    // ARABIC LETTER TAH
94    ('\u{0637}', ["\u{FEC1}", "\u{FEC3}", "\u{FEC4}", "\u{FEC2}"]),
95    // ARABIC LETTER ZAH
96    ('\u{0638}', ["\u{FEC5}", "\u{FEC7}", "\u{FEC8}", "\u{FEC6}"]),
97    // ARABIC LETTER AIN
98    ('\u{0639}', ["\u{FEC9}", "\u{FECB}", "\u{FECC}", "\u{FECA}"]),
99    // ARABIC LETTER GHAIN
100    ('\u{063A}', ["\u{FECD}", "\u{FECF}", "\u{FED0}", "\u{FECE}"]),
101    // ARABIC TATWEEL
102    (TATWEEL, ["\u{0640}", "\u{0640}", "\u{0640}", "\u{0640}"]),
103    // ARABIC LETTER FEH
104    ('\u{0641}', ["\u{FED1}", "\u{FED3}", "\u{FED4}", "\u{FED2}"]),
105    // ARABIC LETTER QAF
106    ('\u{0642}', ["\u{FED5}", "\u{FED7}", "\u{FED8}", "\u{FED6}"]),
107    // ARABIC LETTER KAF
108    ('\u{0643}', ["\u{FED9}", "\u{FEDB}", "\u{FEDC}", "\u{FEDA}"]),
109    // ARABIC LETTER LAM
110    ('\u{0644}', ["\u{FEDD}", "\u{FEDF}", "\u{FEE0}", "\u{FEDE}"]),
111    // ARABIC LETTER MEEM
112    ('\u{0645}', ["\u{FEE1}", "\u{FEE3}", "\u{FEE4}", "\u{FEE2}"]),
113    // ARABIC LETTER NOON
114    ('\u{0646}', ["\u{FEE5}", "\u{FEE7}", "\u{FEE8}", "\u{FEE6}"]),
115    // ARABIC LETTER HEH
116    ('\u{0647}', ["\u{FEE9}", "\u{FEEB}", "\u{FEEC}", "\u{FEEA}"]),
117    // ARABIC LETTER WAW
118    ('\u{0648}', ["\u{FEED}", "", "", "\u{FEEE}"]),
119    // ARABIC LETTER (UIGHUR KAZAKH KIRGHIZ)? ALEF MAKSURA
120    ('\u{0649}', ["\u{FEEF}", "\u{FBE8}", "\u{FBE9}", "\u{FEF0}"]),
121    // ARABIC LETTER YEH
122    ('\u{064A}', ["\u{FEF1}", "\u{FEF3}", "\u{FEF4}", "\u{FEF2}"]),
123    // ARABIC LETTER ALEF WASLA
124    ('\u{0671}', ["\u{FB50}", "", "", "\u{FB51}"]),
125    // ARABIC LETTER U WITH HAMZA ABOVE
126    ('\u{0677}', ["\u{FBDD}", "", "", ""]),
127    // ARABIC LETTER TTEH
128    ('\u{0679}', ["\u{FB66}", "\u{FB68}", "\u{FB69}", "\u{FB67}"]),
129    // ARABIC LETTER TTEHEH
130    ('\u{067A}', ["\u{FB5E}", "\u{FB60}", "\u{FB61}", "\u{FB5F}"]),
131    // ARABIC LETTER BEEH
132    ('\u{067B}', ["\u{FB52}", "\u{FB54}", "\u{FB55}", "\u{FB53}"]),
133    // ARABIC LETTER PEH
134    ('\u{067E}', ["\u{FB56}", "\u{FB58}", "\u{FB59}", "\u{FB57}"]),
135    // ARABIC LETTER TEHEH
136    ('\u{067F}', ["\u{FB62}", "\u{FB64}", "\u{FB65}", "\u{FB63}"]),
137    // ARABIC LETTER BEHEH
138    ('\u{0680}', ["\u{FB5A}", "\u{FB5C}", "\u{FB5D}", "\u{FB5B}"]),
139    // ARABIC LETTER NYEH
140    ('\u{0683}', ["\u{FB76}", "\u{FB78}", "\u{FB79}", "\u{FB77}"]),
141    // ARABIC LETTER DYEH
142    ('\u{0684}', ["\u{FB72}", "\u{FB74}", "\u{FB75}", "\u{FB73}"]),
143    // ARABIC LETTER TCHEH
144    ('\u{0686}', ["\u{FB7A}", "\u{FB7C}", "\u{FB7D}", "\u{FB7B}"]),
145    // ARABIC LETTER TCHEHEH
146    ('\u{0687}', ["\u{FB7E}", "\u{FB80}", "\u{FB81}", "\u{FB7F}"]),
147    // ARABIC LETTER DDAL
148    ('\u{0688}', ["\u{FB88}", "", "", "\u{FB89}"]),
149    // ARABIC LETTER DAHAL
150    ('\u{068C}', ["\u{FB84}", "", "", "\u{FB85}"]),
151    // ARABIC LETTER DDAHAL
152    ('\u{068D}', ["\u{FB82}", "", "", "\u{FB83}"]),
153    // ARABIC LETTER DUL
154    ('\u{068E}', ["\u{FB86}", "", "", "\u{FB87}"]),
155    // ARABIC LETTER RREH
156    ('\u{0691}', ["\u{FB8C}", "", "", "\u{FB8D}"]),
157    // ARABIC LETTER JEH
158    ('\u{0698}', ["\u{FB8A}", "", "", "\u{FB8B}"]),
159    // ARABIC LETTER VEH
160    ('\u{06A4}', ["\u{FB6A}", "\u{FB6C}", "\u{FB6D}", "\u{FB6B}"]),
161    // ARABIC LETTER PEHEH
162    ('\u{06A6}', ["\u{FB6E}", "\u{FB70}", "\u{FB71}", "\u{FB6F}"]),
163    // ARABIC LETTER KEHEH
164    ('\u{06A9}', ["\u{FB8E}", "\u{FB90}", "\u{FB91}", "\u{FB8F}"]),
165    // ARABIC LETTER NG
166    ('\u{06AD}', ["\u{FBD3}", "\u{FBD5}", "\u{FBD6}", "\u{FBD4}"]),
167    // ARABIC LETTER GAF
168    ('\u{06AF}', ["\u{FB92}", "\u{FB94}", "\u{FB95}", "\u{FB93}"]),
169    // ARABIC LETTER NGOEH
170    ('\u{06B1}', ["\u{FB9A}", "\u{FB9C}", "\u{FB9D}", "\u{FB9B}"]),
171    // ARABIC LETTER GUEH
172    ('\u{06B3}', ["\u{FB96}", "\u{FB98}", "\u{FB99}", "\u{FB97}"]),
173    // ARABIC LETTER NOON GHUNNA
174    ('\u{06BA}', ["\u{FB9E}", "", "", "\u{FB9F}"]),
175    // ARABIC LETTER RNOON
176    ('\u{06BB}', ["\u{FBA0}", "\u{FBA2}", "\u{FBA3}", "\u{FBA1}"]),
177    // ARABIC LETTER HEH DOACHASHMEE
178    ('\u{06BE}', ["\u{FBAA}", "\u{FBAC}", "\u{FBAD}", "\u{FBAB}"]),
179    // ARABIC LETTER HEH WITH YEH ABOVE
180    ('\u{06C0}', ["\u{FBA4}", "", "", "\u{FBA5}"]),
181    // ARABIC LETTER HEH GOAL
182    ('\u{06C1}', ["\u{FBA6}", "\u{FBA8}", "\u{FBA9}", "\u{FBA7}"]),
183    // ARABIC LETTER KIRGHIZ OE
184    ('\u{06C5}', ["\u{FBE0}", "", "", "\u{FBE1}"]),
185    // ARABIC LETTER OE
186    ('\u{06C6}', ["\u{FBD9}", "", "", "\u{FBDA}"]),
187    // ARABIC LETTER U
188    ('\u{06C7}', ["\u{FBD7}", "", "", "\u{FBD8}"]),
189    // ARABIC LETTER YU
190    ('\u{06C8}', ["\u{FBDB}", "", "", "\u{FBDC}"]),
191    // ARABIC LETTER KIRGHIZ YU
192    ('\u{06C9}', ["\u{FBE2}", "", "", "\u{FBE3}"]),
193    // ARABIC LETTER VE
194    ('\u{06CB}', ["\u{FBDE}", "", "", "\u{FBDF}"]),
195    // ARABIC LETTER FARSI YEH
196    ('\u{06CC}', ["\u{FBFC}", "\u{FBFE}", "\u{FBFF}", "\u{FBFD}"]),
197    // ARABIC LETTER E
198    ('\u{06D0}', ["\u{FBE4}", "\u{FBE6}", "\u{FBE7}", "\u{FBE5}"]),
199    // ARABIC LETTER YEH BARREE
200    ('\u{06D2}', ["\u{FBAE}", "", "", "\u{FBAF}"]),
201    // ARABIC LETTER YEH BARREE WITH HAMZA ABOVE
202    ('\u{06D3}', ["\u{FBB0}", "", "", "\u{FBB1}"]),
203    // ZWJ
204    (ZWJ, ["\u{200D}", "\u{200D}", "\u{200D}", "\u{200D}"]),
205]);
206
207pub fn connects_with_letter_before(letter: char) -> bool {
208    if let Some(forms) = LETTERS.get(&letter) {
209        if !forms[FINAL as usize].is_empty() || !forms[MEDIAL as usize].is_empty() {
210            return true;
211        }
212    }
213
214    false
215}
216
217pub fn connects_with_letter_after(letter: char) -> bool {
218    if let Some(forms) = LETTERS.get(&letter) {
219        if !forms[INITIAL as usize].is_empty() || !forms[MEDIAL as usize].is_empty() {
220            return true;
221        }
222    }
223
224    false
225}
226
227pub fn connects_with_letters_before_and_after(letter: char) -> bool {
228    if let Some(forms) = LETTERS.get(&letter) {
229        if !forms[MEDIAL as usize].is_empty() {
230            return true;
231        }
232    }
233
234    false
235}